def store_paralogues (cursor_species, gene_id, orthos): for ortho in orthos: [ortho_stable, species, cognate_genome_db_id] = ortho ortho_gene_id = stable2gene (cursor_species, ortho_stable) fixed_fields = {} fixed_fields ['gene_id'] = gene_id fixed_fields ['cognate_genome_db_id'] = cognate_genome_db_id fixed_fields ['cognate_gene_id'] = ortho_gene_id update_fields = {} update_fields['source'] = 'ensembl' store_or_update (cursor_species, 'paralogue', fixed_fields, update_fields)
def store_seq_filenames (cursor, name, file_names): fixed_fields = {} update_fields = {} fixed_fields ['name'] = name update_fields['file_name'] = file_names retval = store_or_update (cursor, "seq_region", fixed_fields, update_fields) return retval
def store_orthologues (cursor_human, ortho_table, cursor, all_species, ensembl_db_name, gene_id, orthos): for ortho in orthos: [ortho_stable, species, cognate_genome_db_id] = ortho if (not species in all_species): continue ortho_gene_id = stable2gene (cursor, ortho_stable, ensembl_db_name[species]) fixed_fields = {} fixed_fields ['gene_id'] = gene_id fixed_fields ['cognate_genome_db_id'] = cognate_genome_db_id update_fields = {} update_fields['source'] = 'ensembl' if ( ortho_table == 'orthologue'): update_fields['cognate_gene_id'] = ortho_gene_id else: fixed_fields['cognate_gene_id'] = ortho_gene_id store_or_update (cursor_human, ortho_table, fixed_fields, update_fields)
def store(cursor, infile): inf = erropen(infile, "r") total = 0 id_not_found = 0 for line in inf: line.rstrip() total += 1 if not total%1000: print "\t", total if ( len(line.split()) != 2 or not 'ENS' in line): continue [stable_id1, stable_id2] = line.split() fixed_fields = {} update_fields = {} fixed_fields['gene_id1'] = stable_id1 fixed_fields['gene_id2'] = stable_id2 store_or_update (cursor, 'paralog', fixed_fields, update_fields) print "done with ", infile, "total ", total inf.close ()
def store(cursor, infile): inf = erropen(infile, "r") total = 0 id_not_found = 0 for line in inf: line.rstrip() total += 1 if not total % 1000: print "\t", total if (len(line.split()) != 2 or not 'ENS' in line): continue [stable_id1, stable_id2] = line.split() fixed_fields = {} update_fields = {} fixed_fields['gene_id1'] = stable_id1 fixed_fields['gene_id2'] = stable_id2 store_or_update(cursor, 'paralog', fixed_fields, update_fields) print "done with ", infile, "total ", total inf.close()
def main(): parameter = {} # in case I ever have to handle multiple versions of ensembl # (but for now I don't have enough space) # note though that there are functions in el_utils/mysql.py that assume # that whatever ensembl stuff is available to the mysql server corresponds to the same release release_number = '76' parameter['ensembl_release_number'] = release_number parameter['blastp_e_value'] = "1.e-10" # it will be used as a string when fmting the blastp cmd parameter['min_accptbl_exon_sim'] = 0.33333 #minimum acceptable exon similarity dir_path = {} dir_path['ensembl_fasta'] = '/mnt/ensembl-mirror/release-'+release_number+'/fasta' # local juggling of data from one database base to the other dir_path['afs_dumps'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['afs_dumps'] += 'ExoLocator/results/dumpster' dir_path['resources'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['resources'] += 'pypeworks/exolocator/resources' dir_path['scratch'] = '/tmp' dir_path['maxentscan'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['maxentscan'] += 'pypeworks/exolocator/pl_utils/maxentscan' util_path = {} util_path['mafft'] = '/usr/bin/mafft' util_path['blastall'] = '/usr/bin/blastall' util_path['fastacmd'] = '/usr/bin/fastacmd' util_path['sw#'] = '/usr/bin/swsharp' util_path['usearch'] = '/usr/bin/usearch' util_path['score3'] = dir_path['maxentscan'] + '/score3.pl' util_path['score5'] = dir_path['maxentscan'] + '/score5.pl' if 1: # check if the paths are functioning (at this point at least) for util in util_path.values(): if (not os.path.exists(util)): print util, " not found " sys.exit (1) for dir in dir_path.values(): if (not os.path.exists(dir)): print dir, " not found " sys.exit (1) if (not os.path.isdir (dir)): print dir, " is not a directory " sys.exit (1) db = connect_to_mysql() cursor = db.cursor() ####################################################### # check if the config db exists -- if not, make it db_name = "exolocator_config" qry = "show databases like'%s'" % db_name rows = search_db (cursor, qry) if (not rows): print db_name, "database not found" qry = "create database %s " % db_name rows = search_db (cursor, qry) if (rows): print "some problem creating the database ..." rows = search_db (cursor, qry, verbose = True) else: print db_name, "database found" qry = "use %s " % db_name search_db (cursor, qry) # make tables for table in ['util_path', 'dir_path', 'parameter']: if ( check_table_exists (cursor, db_name, table)): print table, " found in ", db_name else: print table, " not found in ", db_name make_table (cursor, table) # fill util, dir and path tables fixed_fields = {} update_fields = {} for [name, path] in util_path.iteritems(): fixed_fields['name'] = name update_fields['path'] = path store_or_update (cursor, 'util_path', fixed_fields, update_fields) fixed_fields = {} update_fields = {} for [name, path] in dir_path.iteritems(): fixed_fields['name'] = name update_fields['path'] = path store_or_update (cursor, 'dir_path', fixed_fields, update_fields) fixed_fields = {} update_fields = {} for [name, value] in parameter.iteritems(): fixed_fields['name'] = name update_fields['value'] = value store_or_update (cursor, 'parameter', fixed_fields, update_fields) ####################################################### # add trivial names to ncbi_taxonomy.names [all_species, ensembl_db_name] = get_species (cursor) feed_trivial_names (cursor, all_species) ####################################################### # add species shorthands (used in ENS* names formation) # though we will not needed unit the paralogue alignment reconstruction point) feed_name_shorthands (cursor, all_species) cursor.close() db.close()
def feed_name_shorthands (cursor, all_species): short = {} short['ailuropoda_melanoleuca'] = 'AME' short['anas_platyrhynchos'] = 'APL' short['anolis_carolinensis'] = 'ACA' short['astyanax_mexicanus'] = 'AMX' short['bos_taurus'] = 'BTA' short['callithrix_jacchus'] = 'CJA' short['canis_familiaris'] = 'CAF' short['cavia_porcellus'] = 'CPO' short['choloepus_hoffmanni'] = 'CHO' short['danio_rerio'] = 'DAR' short['dasypus_novemcinctus'] = 'DNO' short['dipodomys_ordii'] = 'DOR' short['echinops_telfairi'] = 'ETE' short['equus_caballus'] = 'ECA' short['erinaceus_europaeus'] = 'EEU' short['felis_catus'] = 'FCA' short['ficedula_albicollis'] = 'FAL' short['gadus_morhua'] = 'GMO' short['gallus_gallus'] = 'GAL' short['gasterosteus_aculeatus'] = 'GAC' short['gorilla_gorilla'] = 'GGO' short['homo_sapiens'] = '' short['ictidomys_tridecemlineatus'] = 'STO' short['latimeria_chalumnae'] = 'LAC' short['lepisosteus_oculatus'] = 'LOC' short['loxodonta_africana'] = 'LAF' short['macaca_mulatta'] = 'MMU' short['macropus_eugenii'] = 'MEU' short['meleagris_gallopavo'] = 'MGA' short['microcebus_murinus'] = 'MIC' short['monodelphis_domestica'] = 'MOD' short['mus_musculus'] = 'MUS' short['mustela_putorius_furo'] = 'MPU' short['myotis_lucifugus'] = 'MLU' short['nomascus_leucogenys'] = 'NLE' short['ochotona_princeps'] = 'OPR' short['oreochromis_niloticus'] = 'ONI' short['ornithorhynchus_anatinus'] = 'OAN' short['oryctolagus_cuniculus'] = 'OCU' short['oryzias_latipes'] = 'ORL' short['ovis_aries'] = 'OAR' short['otolemur_garnettii'] = 'OGA' short['pan_troglodytes'] = 'PTR' short['papio_anubis'] = 'PAN' short['poecilia_formosa'] = 'PFO' short['pelodiscus_sinensis'] = 'PSI' short['petromyzon_marinus'] = 'PMA' short['pongo_abelii'] = 'PPY' short['procavia_capensis'] = 'PCA' short['pteropus_vampyrus'] = 'PVA' short['rattus_norvegicus'] = 'RNO' short['sarcophilus_harrisii'] = 'SHA' short['sorex_araneus'] = 'SAR' short['sus_scrofa'] = 'SSC' short['taeniopygia_guttata'] = 'TGU' short['takifugu_rubripes'] = 'TRU' short['tarsius_syrichta'] = 'TSY' short['tetraodon_nigroviridis'] = 'TNI' short['tupaia_belangeri'] = 'TBE' short['tursiops_truncatus'] = 'TTR' short['vicugna_pacos'] = 'VPA' short['xenopus_tropicalis'] = 'XET' short['xiphophorus_maculatus'] = 'XMA' db_name = get_compara_name (cursor) qry = "use %s " % db_name search_db (cursor, qry) table = 'species_name_shorthands' # if the table does not exist, make it if not check_table_exists (cursor, db_name, table): qry = "CREATE TABLE " + table + " (id INT(10) PRIMARY KEY AUTO_INCREMENT)" rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(100)" % (table, 'species') rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(10)" % (table, 'shorthand') rows = search_db (cursor, qry) if (rows): return False for species in all_species: if short.has_key(species): fixed_fields = {} update_fields = {} fixed_fields ['species'] = species update_fields ['shorthand'] = short[species] store_or_update (cursor, table, fixed_fields, update_fields) else: print "short for ", species, " not found " short[species] = ""
def feed_trivial_names (cursor, all_species): tax_id = {} trivial = {} trivial['ailuropoda_melanoleuca'] = 'panda' trivial['anas_platyrhynchos'] = 'duck' trivial['anolis_carolinensis'] = 'anole_lizard' trivial['astyanax_mexicanus'] = 'blind_cavefish' trivial['bos_taurus'] = 'cow' trivial['callithrix_jacchus'] = 'marmoset' trivial['canis_familiaris'] = 'dog' trivial['cavia_porcellus'] = 'guinea_pig' trivial['choloepus_hoffmanni'] = 'sloth' trivial['danio_rerio'] = 'zebrafish' trivial['dasypus_novemcinctus'] = 'armadillo' trivial['dipodomys_ordii'] = 'kangaroo_rat' trivial['echinops_telfairi'] = 'madagascar_hedgehog' trivial['equus_caballus'] = 'horse' trivial['erinaceus_europaeus'] = 'european_hedgehog' trivial['felis_catus'] = 'cat' trivial['ficedula_albicollis'] = 'flycatcher' trivial['gadus_morhua'] = 'cod' trivial['gallus_gallus'] = 'chicken' trivial['gasterosteus_aculeatus'] = 'stickleback' trivial['gorilla_gorilla'] = 'gorilla' trivial['homo_sapiens'] = 'human' trivial['ictidomys_tridecemlineatus'] = 'squirrel' trivial['latimeria_chalumnae'] = 'coelacanth' trivial['lepisosteus_oculatus'] = 'spotted_gar' trivial['loxodonta_africana'] = 'elephant' trivial['macaca_mulatta'] = 'macaque' trivial['macropus_eugenii'] = 'wallaby' trivial['meleagris_gallopavo'] = 'turkey' trivial['microcebus_murinus'] = 'lemur' trivial['monodelphis_domestica'] = 'opossum' trivial['mus_musculus'] = 'mouse' trivial['mustela_putorius_furo'] = 'ferret' trivial['myotis_lucifugus'] = 'bat' trivial['nomascus_leucogenys'] = 'gibbon' trivial['ochotona_princeps'] = 'pika' trivial['oreochromis_niloticus'] = 'tilapia' trivial['ornithorhynchus_anatinus'] = 'platypus' trivial['oryctolagus_cuniculus'] = 'rabbit' trivial['oryzias_latipes'] = 'medaka' trivial['otolemur_garnettii'] = 'galago_lemur' trivial['ovis_aries'] = 'sheep' trivial['pan_troglodytes'] = 'chimpanzee' trivial['papio_anubis'] = 'baboon' trivial['pelodiscus_sinensis'] = 'turtle' trivial['petromyzon_marinus'] = 'lamprey' trivial['poecilia_formosa'] = 'amazon_molly' trivial['pongo_abelii'] = 'orangutan' trivial['procavia_capensis'] = 'hyrax' trivial['pteropus_vampyrus'] = 'flying_fox' trivial['rattus_norvegicus'] = 'rat' trivial['sarcophilus_harrisii'] = 'tasmanian_devil' trivial['sorex_araneus'] = 'european_shrew' trivial['sus_scrofa'] = 'pig' trivial['taeniopygia_guttata'] = 'zebra_finch' trivial['takifugu_rubripes'] = 'fugu' trivial['tarsius_syrichta'] = 'tarsier' trivial['tetraodon_nigroviridis'] = 'pufferfish' trivial['tupaia_belangeri'] = 'tree_shrew' trivial['tursiops_truncatus'] = 'dolphin' trivial['vicugna_pacos'] = 'alpaca' trivial['xenopus_tropicalis'] = 'xenopus' trivial['xiphophorus_maculatus'] = 'platyfish' db_name = get_compara_name (cursor) if (not db_name): print "compara db not found" exit(1) qry = "use %s " % db_name search_db (cursor, qry) for species in all_species: tax_id[species] = species2taxid (cursor, species) # switch to ncbi taxonomy database db_name = get_ncbi_tax_name (cursor) if (not db_name): print "ncbi taxonomy db not found" exit(1) qry = "use %s " % db_name search_db (cursor, qry) for species in all_species: if trivial.has_key(species): fixed_fields = {} update_fields = {} fixed_fields ['tax_id'] = tax_id[species] fixed_fields ['name_class'] = 'trivial' update_fields['name_txt'] = trivial[species] store_or_update (cursor, 'names', fixed_fields, update_fields) else: print "trivial for ", species, " not found " trivial[species] = "" return True
def multiple_exon_alnmt(gene_list, db_info): print "process pid: %d, length of gene list: %d" % ( get_process_id(), len(gene_list)) [local_db, ensembl_db_name] = db_info db = connect_to_mysql() cfg = ConfigurationReader() acg = AlignmentCommandGenerator() cursor = db.cursor() # find db ids adn common names for each species db [all_species, ensembl_db_name] = get_species (cursor) species = 'homo_sapiens' switch_to_db (cursor, ensembl_db_name[species]) gene_ids = get_gene_ids (cursor, biotype='protein_coding', is_known=1) # for each human gene gene_ct = 0 tot = 0 ok = 0 no_maps = 0 no_pepseq = 0 no_orthologues = 0 min_similarity = cfg.get_value('min_accptbl_exon_sim') #gene_list.reverse() for gene_id in gene_list: start = time() gene_ct += 1 if not gene_ct%10: print gene_ct, "genes out of", len(gene_list) switch_to_db (cursor, ensembl_db_name['homo_sapiens']) print gene_ct, len(gene_ids), gene_id, gene2stable(cursor, gene_id), get_description (cursor, gene_id) human_exons = filter (lambda e: e.is_known==1 and e.is_coding and e.covering_exon<0, gene2exon_list(cursor, gene_id)) human_exons.sort(key=lambda exon: exon.start_in_gene) ################################################################## for human_exon in human_exons: tot += 1 # find all orthologous exons the human exon maps to maps = get_maps(cursor, ensembl_db_name, human_exon.exon_id, human_exon.is_known) if verbose: print "\texon no.", tot, " id", human_exon.exon_id, if not maps: print " no maps" print human_exon print if not maps: no_maps += 1 continue # human sequence to fasta: seqname = "{0}:{1}:{2}".format('homo_sapiens', human_exon.exon_id, human_exon.is_known) switch_to_db (cursor, ensembl_db_name['homo_sapiens']) [exon_seq_id, pepseq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = get_exon_seqs (cursor, human_exon.exon_id, human_exon.is_known) if (not pepseq): if verbose and human_exon.is_coding and human_exon.covering_exon <0: # this should be a master exon print "no pep seq for", human_exon.exon_id, "coding ", human_exon.is_coding, print "canonical: ", human_exon.is_canonical print "length of dna ", len(dna_seq) no_pepseq += 1 continue # collect seq from all maps, and output them in fasta format hassw = False headers = [] sequences = {} exons_per_species = {} for map in maps: switch_to_db (cursor, ensembl_db_name[map.species_2]) if map.similarity < min_similarity: continue exon = map2exon(cursor, ensembl_db_name, map) pepseq = get_exon_pepseq (cursor,exon) if (not pepseq): continue if map.source == 'sw_sharp': exon_known_code = 2 hassw = True elif map.source == 'usearch': exon_known_code = 3 hassw = True else: exon_known_code = map.exon_known_2 seqname = "{0}:{1}:{2}".format(map.species_2, map.exon_id_2, exon_known_code) headers.append(seqname) sequences[seqname] = pepseq # for split exon concatenation (see below) if not map.species_2 in exons_per_species.keys(): exons_per_species[map.species_2] = [] exons_per_species[map.species_2].append ([ map.exon_id_2, exon_known_code]); if (len(headers) <=1 ): if verbose: print "single species in the alignment" no_orthologues += 1 continue # concatenate exons from the same gene - the alignment program might go wrong otherwise concatenated = concatenate_exons (cursor, ensembl_db_name, sequences, exons_per_species) fasta_fnm = "{0}/{1}.fa".format( cfg.dir_path['scratch'], human_exon.exon_id) output_fasta (fasta_fnm, sequences.keys(), sequences) # align afa_fnm = "{0}/{1}.afa".format( cfg.dir_path['scratch'], human_exon.exon_id) mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm) ret = commands.getoutput(mafftcmd) if (verbose): print 'almt to', afa_fnm # read in the alignment inf = erropen(afa_fnm, "r") aligned_seqs = {} for record in SeqIO.parse(inf, "fasta"): aligned_seqs[record.id] = str(record.seq) inf.close() # split back the concatenated exons if concatenated: split_concatenated_exons (aligned_seqs, concatenated) human_seq_seen = False for seq_name, sequence in aligned_seqs.iteritems(): # if this is one of the concatenated seqs, split them back to two ### store the alignment as bitstring # Generate the bitmap bs = Bits(bin='0b' + re.sub("[^0]","1", sequence.replace('-','0'))) # The returned value of tobytes() will be padded at the end # with between zero and seven 0 bits to make it byte aligned. # I will end up with something that looks like extra alignment gaps, that I'll have to return msa_bitmap = bs.tobytes() # Retrieve information on the cognate cognate_species, cognate_exon_id, cognate_exon_known = seq_name.split(':') if cognate_exon_known == '2': source = 'sw_sharp' elif cognate_exon_known == '3': source = 'usearch' else: source = 'ensembl' if (cognate_species == 'homo_sapiens'): human_seq_seen = True cognate_genome_db_id = species2genome_db_id(cursor, cognate_species) # moves the cursor switch_to_db(cursor, ensembl_db_name['homo_sapiens']) # so move it back to h**o sapiens # Write the bitmap to the database #if (cognate_species == 'homo_sapiens'): if verbose: # and (source=='sw_sharp' or source=='usearch'): print "storing" print human_exon.exon_id, human_exon.is_known print cognate_species, cognate_genome_db_id, cognate_exon_id, cognate_exon_known, source print sequence if not msa_bitmap: print "no msa_bitmap" continue store_or_update(cursor, "exon_map", {"cognate_genome_db_id":cognate_genome_db_id, "cognate_exon_id":cognate_exon_id ,"cognate_exon_known" :cognate_exon_known, "source": source, "exon_id" :human_exon.exon_id, "exon_known":human_exon.is_known}, {"msa_bitstring":MySQLdb.escape_string(msa_bitmap)}) ok += 1 commands.getoutput("rm "+afa_fnm+" "+fasta_fnm) if verbose: print " time: %8.3f\n" % (time()-start); print "tot: ", tot, "ok: ", ok print "no maps ", no_pepseq print "no pepseq ", no_pepseq print "no orthologues ", no_orthologues print
def multiple_exon_alnmt(species_list, db_info): [local_db, ensembl_db_name] = db_info verbose = False db = connect_to_mysql() cfg = ConfigurationReader() acg = AlignmentCommandGenerator() cursor = db.cursor() for species in species_list: print print "############################" print species switch_to_db (cursor, ensembl_db_name[species]) gene_ids = get_gene_ids (cursor, biotype='protein_coding') #gene_ids = get_theme_ids(cursor, cfg, 'wnt_pathway') if not gene_ids: print "no gene_ids" continue gene_ct = 0 tot = 0 ok = 0 no_maps = 0 no_pepseq = 0 no_paralogues = 0 for gene_id in gene_ids: if verbose: start = time() gene_ct += 1 if not gene_ct%100: print species, gene_ct, "genes out of", len(gene_ids) if verbose: print print gene_id, gene2stable(cursor, gene_id), get_description (cursor, gene_id) # get the paralogues - only the representative for the family will have this paralogues = get_paras (cursor, gene_id) if not paralogues: if verbose: print "\t not a template or no paralogues" continue if verbose: print "paralogues: ", paralogues # get _all_ exons template_exons = gene2exon_list(cursor, gene_id) if (not template_exons): if verbose: print 'no exons for ', gene_id continue # find all template exons we are tracking in the database for template_exon in template_exons: if verbose: print template_exon.exon_id maps = get_maps(cursor, ensembl_db_name, template_exon.exon_id, template_exon.is_known, species=species, table='para_exon_map') if not maps: no_maps += 1 continue # output to fasta: seqname = "{0}:{1}:{2}".format('template', template_exon.exon_id, template_exon.is_known) exon_seqs_info = get_exon_seqs (cursor, template_exon.exon_id, template_exon.is_known) if not exon_seqs_info: continue [exon_seq_id, pepseq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = exon_seqs_info if (not pepseq): if ( template_exon.is_coding and template_exon.covering_exon <0): # this should be a master exon print "no pep seq for", template_exon.exon_id, "coding ", template_exon.is_coding, print "canonical: ", template_exon.is_canonical print "length of dna ", len(dna_seq) no_pepseq += 1 continue tot += 1 sequences = {seqname:pepseq} headers = [seqname] for map in maps: exon = map2exon(cursor, ensembl_db_name, map, paralogue=True) pepseq = get_exon_pepseq (cursor,exon) if (not pepseq): continue seqname = "{0}:{1}:{2}".format('para', map.exon_id_2, map.exon_known_2) headers.append(seqname) sequences[seqname] = pepseq fasta_fnm = "{0}/{1}_{2}_{3}.fa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known) output_fasta (fasta_fnm, headers, sequences) if (len(headers) <=1 ): print "single species in the alignment (?)" no_paralogues += 1 continue # align afa_fnm = "{0}/{1}_{2}_{3}.afa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known) mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm) ret = commands.getoutput(mafftcmd) # read in the alignment inf = erropen(afa_fnm, "r") if not inf: print gene_id continue template_seq_seen = False for record in SeqIO.parse(inf, "fasta"): ### store the alignment as bitstring # Generate the bitmap bs = Bits(bin='0b' + re.sub("[^0]","1", str(record.seq).replace('-','0'))) msa_bitmap = bs.tobytes() # Retrieve information on the cognate label, cognate_exon_id, cognate_exon_known = record.id.split(':') if (label == 'template'): template_seq_seen = True # Write the bitmap to the database #print "updating: ", template_exon.exon_id store_or_update(cursor, "para_exon_map", {"cognate_exon_id" :cognate_exon_id, "cognate_exon_known" :cognate_exon_known, "exon_id" :template_exon.exon_id, "exon_known" :template_exon.is_known}, {"msa_bitstring":MySQLdb.escape_string(msa_bitmap)}) inf.close() ok += 1 commands.getoutput("rm "+afa_fnm+" "+fasta_fnm) if verbose: print " time: %8.3f\n" % (time()-start); outstr = species + " done \n" outstr += "tot: %d ok: %d \n" % (tot, ok) outstr += "no maps %d \n" % no_pepseq outstr += "no pepseq %d \n" % no_pepseq outstr += "no paralogues %d \n" % no_paralogues outstr += "\n" print outstr