def last_against_itself(last_executable, seq_subset_file, last_table_out): dirname = os.path.dirname(seq_subset_file.name) cmd='%s -o %s -f 0 %s %s' %(last_executable, last_table_out, dirname + PATHDELIM + 'subset_db', seq_subset_file.name) result= getstatusoutput(cmd)
def format_db_last(formatdb_executable, seq_subset_file): dirname = os.path.dirname(seq_subset_file.name) cmd='%s -p -c %s %s' %(formatdb_executable, dirname + PATHDELIM + 'subset_db', seq_subset_file.name) result= getstatusoutput(cmd)
def blast_against_itself(blast_executable, seq_subset_file, blast_table_out): cmd='%s -outfmt 6 -db %s -query %s -out %s' %(blast_executable, seq_subset_file.name, seq_subset_file.name, blast_table_out) result= getstatusoutput(cmd)
def format_db_blast(formatdb_executable, seq_subset_file): cmd = '%s -dbtype prot -in %s' % (formatdb_executable, seq_subset_file.name) result = getstatusoutput(cmd)
def format_db_blast(formatdb_executable, seq_subset_file): cmd='%s -dbtype prot -in %s' %(formatdb_executable, seq_subset_file.name) result= getstatusoutput(cmd)
def last_against_itself(last_executable, seq_subset_file, last_table_out): dirname = os.path.dirname(seq_subset_file.name) cmd = '%s -o %s -f 0 %s %s' % (last_executable, last_table_out, dirname + PATHDELIM + 'subset_db', seq_subset_file.name) result = getstatusoutput(cmd)
def blast_against_itself(blast_executable, seq_subset_file, blast_table_out): cmd = '%s -outfmt 6 -db %s -query %s -out %s' % ( blast_executable, seq_subset_file.name, seq_subset_file.name, blast_table_out) result = getstatusoutput(cmd)
def format_db_last(formatdb_executable, seq_subset_file): dirname = os.path.dirname(seq_subset_file.name) cmd = '%s -p -c %s %s' % (formatdb_executable, dirname + PATHDELIM + 'subset_db', seq_subset_file.name) result = getstatusoutput(cmd)
def start_pathway_tools_api_mode(pathway_tools_exe): command = pathway_tools_exe + " -api" result = getstatusoutput(command)
def write_sequin_file(tbl_file_name, contig_dict, sample_name, nucleotide_seq_dict, protein_seq_dict, sequin_input_files): sequin_src_filename = re.sub(r'tbl$', 'src', tbl_file_name) sequin_output_fasta = re.sub(r'tbl$', 'fasta', tbl_file_name) sequin_output_sbt = re.sub(r'tbl$', 'sbt', tbl_file_name) shutil.copy(sequin_input_files['sequin_fasta'], sequin_output_fasta) shutil.copy(sequin_input_files['sequin_sbt_file'], sequin_output_sbt) sequin_required_files = { 'fasta': sequin_output_fasta, 'tbl': tbl_file_name, 'src': sequin_src_filename, 'tbl2asn': sequin_input_files['sequin_tbl2asn'], 'sbt': sequin_output_sbt } outputfile = open(tbl_file_name, 'w') #print contig_dict count =0 outputStr="" for key in contig_dict: first = True if count %10000 == 0: #print "count " + str(count) outputfile.write(outputStr) outputStr="" count+=1 for attrib in contig_dict[key]: id = attrib['id'] try: protein_seq = protein_seq_dict[id] except: protein_seq = "" None definition = sample_name accession = '.' version = '.' +spaces(10) + "GI:." dblink = sample_name keywords = '.' source = sample_name organism = sample_name if first: first = False try: dna_seq = nucleotide_seq_dict[key] dna_seq_formatted = format_sequence_origin(dna_seq) dna_length = len(dna_seq) sourceStr = "1.." + str(dna_length) except: dna_seq = "" dna_seq_formatted = "" dna_length = 0 sourceStr ="0..0" outputStr+=(">Feature %s\n" % (key)) outputStr+=re.sub('\.\.','\t',sourceStr)+'\t'+"REFERENCE" + '\n' startPrefix = '' endPrefix = '' if 'partial' in attrib: if attrib['partial']=='10': startPrefix = '<' if attrib['partial']=='01': endPrefix = '>' if attrib['partial']=='11': startPrefix = '<' endPrefix = '>' if 'start' in attrib and 'end' in attrib: if 'strand' in attrib: if attrib['strand']=='-': geneLoc = str(attrib['end']) + endPrefix +'\t' + startPrefix + str(attrib['start']) else: geneLoc = startPrefix + str(attrib['start']) +'\t' + str(attrib['end']) + endPrefix outputStr+=geneLoc + '\t' + "gene" + '\n' if 'locus_tag' in attrib: locus_tag = "gene" + '\t' + attrib['locus_tag'] outputStr+='\t\t\t' + locus_tag +'\n' outputStr+=geneLoc + '\t' + "CDS" + '\n' if 'product' in attrib: product_tag = "product" + '\t' + attrib['product'] outputStr+='\t\t\t' + product_tag +'\n' outputfile.write(outputStr) outputfile.close() outputsrcfile = open(sequin_src_filename, 'w') ncbi_sequin_params = parse_parameter_file(sequin_input_files['sequin_params']) headers = ['Collection_date', 'Country', 'isolation_source', 'Lat_Lon', 'Organism', 'environmental_sample'] header_values = {} headerStr = 'Sequence_ID' for header_name in headers: headerStr += '\t' + header_name header_values[header_name]= get_parameter(ncbi_sequin_params, 'SequinHeader', header_name, default='__'+ header_name + '__') valueStr ="" for header_name in headers: valueStr += "\t" + header_values[header_name] fprintf(outputsrcfile, "%s\n", key + headerStr) for key in contig_dict: fprintf(outputsrcfile, "%s\n", key + valueStr) outputsrcfile.close() # Now open a pipe process and run the tbl2asn script on the sequin input for file in sequin_required_files: if not path.exists(sequin_required_files[file]): print "Could not find file : " + sequin_required_files[file] print "Make sure all of the following files are present :" for file in sequin_required_files: print file sys.exit(0) args = [ sequin_required_files['tbl2asn'], '-t', sequin_required_files['sbt'] , '-i', sequin_required_files['fasta'], '-a', 's', '-V', 'v'] command = ' '.join(args) result = getstatusoutput(command) if result[0] == 0 : print "Successfully created the SEQUIN file"