def main():
  inFile = plausi()
  fo = open(inFile)
  for line in fo:
    o = OrthoMCLCluster(line.rstrip())
    name = o.get_name()
    geneHash = o.get_gene_hash()
    for geneid, species in geneHash.iteritems(): print geneid + "\t" + name
def main():
    inFile = plausi()
    fo = open(inFile)
    for line in fo:
        o = OrthoMCLCluster(line.rstrip())
        name = o.get_name()
        geneHash = o.get_gene_hash()
        for geneid, species in geneHash.iteritems():
            print geneid + "\t" + name
Exemple #3
0
def main():
    args = plausi()
    in_orthomcl = args[0]
    EVALUE = float('1e-20')
    IDENTITY = 30.0
    if len(args) == 4:
        in_fasta, in_gg, in_blast = args[1:4]
        gene2species, speciesArray = read_gg(in_gg)
        gene2length = get_seq_lengths(in_fasta)
        dbmfile = in_blast + ".add.dbm"
        dbm = anydbm.open(dbmfile, "c")
        fo = open(in_blast)
        for line in fo:
            line = line.rstrip()
            cols = line.split("\t")
            qid, hid, evalue, identity = cols[0], cols[1], float(
                cols[10]), float(cols[2])
            # ignore self-hits and between-species hits, check e-value threshold
            if qid == hid: continue
            if gene2species[qid] != gene2species[hid]: continue
            if evalue > EVALUE: continue
            if identity < IDENTITY: continue
            # check that blast alignment spans at least 75% of the longer sequence
            alnlength, qlength, hlength = int(
                cols[3]), gene2length[qid], gene2length[hid]
            lengthcutoff = 0.80 * max([qlength, hlength])
            if alnlength < lengthcutoff: continue
            if not dbm.has_key(qid): dbm[qid] = ""
            else: dbm[qid] += " "
            dbm[qid] += hid
        fo.close()
        dbm.close()
    else:
        dbmfile = args[1]
    dbm = anydbm.open(dbmfile)

    fo = open(in_orthomcl)
    for line in fo:
        o = OrthoMCLCluster(line.rstrip())
        oldsize = o.get_count()
        additions = []
        for geneid, species in o.get_gene_hash().iteritems():
            if not dbm.has_key(geneid): continue
            [additions.append([x, species]) for x in dbm[geneid].split()]

        for x, species in additions:
            o.add_gene(x, species)
        o.to_s()
        newsize = o.get_count()
        print >> sys.stderr, "%s\t%s\t%s" % (o.get_name(), oldsize, newsize)
def main():
  args = plausi()
  in_orthomcl = args[0]
  EVALUE = float('1e-20')
  IDENTITY = 30.0
  if len(args) == 4:
    in_fasta, in_gg, in_blast = args[1:4]
    gene2species, speciesArray = read_gg(in_gg)
    gene2length = get_seq_lengths(in_fasta)
    dbmfile = in_blast + ".add.dbm"
    dbm = anydbm.open(dbmfile, "c")
    fo = open(in_blast)
    for line in fo: 
      line = line.rstrip()
      cols = line.split("\t")
      qid, hid, evalue, identity = cols[0], cols[1], float(cols[10]), float(cols[2])
      # ignore self-hits and between-species hits, check e-value threshold
      if qid == hid: continue
      if gene2species[qid] != gene2species[hid]: continue
      if evalue > EVALUE: continue
      if identity < IDENTITY: continue
      # check that blast alignment spans at least 75% of the longer sequence
      alnlength, qlength, hlength = int(cols[3]), gene2length[qid], gene2length[hid]
      lengthcutoff = 0.80 * max([qlength, hlength])
      if alnlength < lengthcutoff: continue
      if not dbm.has_key(qid): dbm[qid] = ""
      else: dbm[qid] += " "
      dbm[qid] += hid
    fo.close()
    dbm.close()
  else: dbmfile = args[1]
  dbm = anydbm.open(dbmfile)

  fo = open(in_orthomcl)
  for line in fo:
    o = OrthoMCLCluster(line.rstrip())
    oldsize = o.get_count()
    additions = []
    for geneid, species in o.get_gene_hash().iteritems():
      if not dbm.has_key(geneid): continue
      [additions.append([x, species]) for x in dbm[geneid].split()]

    for x, species in additions: o.add_gene(x,species)
    o.to_s()
    newsize = o.get_count()
    print >> sys.stderr, "%s\t%s\t%s" %(o.get_name(), oldsize, newsize)
def main():
  inFile = plausi()
  fo = open(inFile)
  for line in fo:
    o = OrthoMCLCluster(line.rstrip())
    print o.get_name() + "\t" + o.get_species_hash()['Arath'][0]
def main():
    inFile = plausi()
    fo = open(inFile)
    for line in fo:
        o = OrthoMCLCluster(line.rstrip())
        print o.get_name() + "\t" + o.get_species_hash()['Arath'][0]