コード例 #1
0
#  dictionaries
#
forward_ids_dict = dict()
ab_init_forward_ids_dict = dict()
seen = dict()
################################################
#  other initialization
#
output_gene_location = ""
output_protein = ""
##################################################################
#  find the  query sequence in the "original" genome (together
#  with the  gene/protein/trnascript entry it belongs to)
#
outfile = "tmp.fasta"
orig_search_ids = find_by_blasting("%s/%s/%s" % (genome_path, orig_genome, orig_genome), seqfile, blast, outfile)

msg = "the closest match to %s in  %s is %s\n" % (seqfile, orig_genome, orig_search_ids[0])
print msg
LOG.write(msg)

split_info = re.split(" ", orig_search_ids[0])
[orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]]

msg = "%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein)
print msg
LOG.write(msg)

msg = "gene name in %s:   %s\n\n" % (orig_genome, orig_gene_name)
print msg
LOG.write(msg)
コード例 #2
0
forward_fasta_f = open(forward_fasta, 'w')
LOG = open(log_path, "w")
###############################
#  dictionaries
forward_ids_dict = dict()
ab_init_forward_ids_dict = dict()
seen = dict()
###############################
#  other initialization
output_gene_location = ""
output_protein = ""
###############################
#  find the  query sequence in the "original" species
#  (together with the  gene/protein/trnascript entry it belongs to)
known_proteome_f = ensembldb + 
orig_search_ids = find_by_blasting("%s%s/%s" % (known_proteome_f, orig_genome, orig_genome), seqfile, working_results_f)
logger("the closest match to %s in  %s is %s\n" % (seqfile, orig_genome, orig_search_ids[0]))

split_info = re.split(" ", orig_search_ids[0])
[orig_protein, orig_gene_name, orig_transcript, orig_gene_location] = [split_info[0], split_info[1], split_info[2], split_info[4]]
logger("%s id in %s: %s\n" % (seqfile, orig_genome, orig_protein))

logger("gene name in %s:   %s\n\n" % (orig_genome, orig_gene_name))
orig_seq_f = "orig_seq.fasta"
extract_first_seq(working_results_f, orig_protein, orig_seq_f)
###############################
#  blast the query sequence against all of the remaining proteomes
#
not_found = []

for species in species_list: