# dictionaries # forward_ids_dict = dict() ab_init_forward_ids_dict = dict() seen = dict() ################################################ # other initialization # output_gene_location = "" output_protein = "" ################################################################## # find the query sequence in the "original" genome (together # with the gene/protein/trnascript entry it belongs to) # outfile = "tmp.fasta" orig_search_ids = find_by_blasting("%s/%s/%s" % (genome_path, orig_genome, orig_genome), seqfile, blast, outfile) msg = "the closest match to %s in %s is %s\n" % (seqfile, orig_genome, orig_search_ids[0]) print msg LOG.write(msg) split_info = re.split(" ", orig_search_ids[0]) [orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]] msg = "%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein) print msg LOG.write(msg) msg = "gene name in %s: %s\n\n" % (orig_genome, orig_gene_name) print msg LOG.write(msg)
forward_fasta_f = open(forward_fasta, 'w') LOG = open(log_path, "w") ############################### # dictionaries forward_ids_dict = dict() ab_init_forward_ids_dict = dict() seen = dict() ############################### # other initialization output_gene_location = "" output_protein = "" ############################### # find the query sequence in the "original" species # (together with the gene/protein/trnascript entry it belongs to) known_proteome_f = ensembldb + orig_search_ids = find_by_blasting("%s%s/%s" % (known_proteome_f, orig_genome, orig_genome), seqfile, working_results_f) logger("the closest match to %s in %s is %s\n" % (seqfile, orig_genome, orig_search_ids[0])) split_info = re.split(" ", orig_search_ids[0]) [orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]] logger("%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein)) logger("gene name in %s: %s\n\n" % (orig_genome, orig_gene_name)) orig_seq_f = "orig_seq.fasta" extract_first_seq(working_results_f, orig_protein, orig_seq_f) ############################### # blast the query sequence against all of the remaining proteomes # not_found = [] for species in species_list: