print msg LOG.write(msg) split_info = re.split(" ", orig_search_ids[0]) [orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]] msg = "%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein) print msg LOG.write(msg) msg = "gene name in %s: %s\n\n" % (orig_genome, orig_gene_name) print msg LOG.write(msg) orig_seq_file = "orig_seq.fasta" extract_first_seq(outfile, orig_protein, orig_seq_file) ################################################################## # blast the query sequence against all of the remaining genomes # not_found = [] for genome in genomes: #forward search msg = "%s\n" % genome print msg LOG.write(msg) found = 0 found_in_known = 0; found_in_abinitio = 0 outfile = "tmp.fasta"
output_gene_location = "" output_protein = "" ############################### # find the query sequence in the "original" species # (together with the gene/protein/trnascript entry it belongs to) known_proteome_f = ensembldb + orig_search_ids = find_by_blasting("%s%s/%s" % (known_proteome_f, orig_genome, orig_genome), seqfile, working_results_f) logger("the closest match to %s in %s is %s\n" % (seqfile, orig_genome, orig_search_ids[0])) split_info = re.split(" ", orig_search_ids[0]) [orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]] logger("%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein)) logger("gene name in %s: %s\n\n" % (orig_genome, orig_gene_name)) orig_seq_f = "orig_seq.fasta" extract_first_seq(working_results_f, orig_protein, orig_seq_f) ############################### # blast the query sequence against all of the remaining proteomes # not_found = [] for species in species_list: ###forward search### logger("%s\n" % species) found = 0 found_in_known = 0; found_in_abinitio = 0 forward_ids = find_by_blasting("%s%s/%s" % (known_proteome_f, species, species), orig_seq_f, working_results_f) if (len(forward_ids) == 0): logger("%s not found in %s, \"known\" sequences\n\n" % (seqfile, species))