def last_against_itself(last_executable, seq_subset_file, last_table_out):
    dirname = os.path.dirname(seq_subset_file.name)     
    cmd='%s -o %s -f 0 %s %s' %(last_executable,  last_table_out, dirname + PATHDELIM + 'subset_db',  seq_subset_file.name)
    result= getstatusoutput(cmd)
def format_db_last(formatdb_executable, seq_subset_file):
    dirname = os.path.dirname(seq_subset_file.name)     
    cmd='%s -p -c %s  %s' %(formatdb_executable, dirname + PATHDELIM + 'subset_db', seq_subset_file.name)
    result= getstatusoutput(cmd)
def blast_against_itself(blast_executable, seq_subset_file, blast_table_out):
    cmd='%s -outfmt 6 -db  %s -query %s -out  %s' %(blast_executable,  seq_subset_file.name, seq_subset_file.name, blast_table_out)
    result= getstatusoutput(cmd)
def format_db_blast(formatdb_executable, seq_subset_file):
    cmd = '%s -dbtype prot -in %s' % (formatdb_executable,
                                      seq_subset_file.name)
    result = getstatusoutput(cmd)
def format_db_blast(formatdb_executable, seq_subset_file):
    cmd='%s -dbtype prot -in %s' %(formatdb_executable, seq_subset_file.name)
    result= getstatusoutput(cmd)
def last_against_itself(last_executable, seq_subset_file, last_table_out):
    dirname = os.path.dirname(seq_subset_file.name)
    cmd = '%s -o %s -f 0 %s %s' % (last_executable, last_table_out,
                                   dirname + PATHDELIM + 'subset_db',
                                   seq_subset_file.name)
    result = getstatusoutput(cmd)
def blast_against_itself(blast_executable, seq_subset_file, blast_table_out):
    cmd = '%s -outfmt 6 -db  %s -query %s -out  %s' % (
        blast_executable, seq_subset_file.name, seq_subset_file.name,
        blast_table_out)
    result = getstatusoutput(cmd)
def format_db_last(formatdb_executable, seq_subset_file):
    dirname = os.path.dirname(seq_subset_file.name)
    cmd = '%s -p -c %s  %s' % (formatdb_executable, dirname + PATHDELIM +
                               'subset_db', seq_subset_file.name)
    result = getstatusoutput(cmd)
Beispiel #9
0
def start_pathway_tools_api_mode(pathway_tools_exe):
     command = pathway_tools_exe + " -api"
     result = getstatusoutput(command)
def  write_sequin_file(tbl_file_name, contig_dict, sample_name, nucleotide_seq_dict, protein_seq_dict, sequin_input_files):
      

     sequin_src_filename = re.sub(r'tbl$', 'src', tbl_file_name)
     sequin_output_fasta = re.sub(r'tbl$', 'fasta', tbl_file_name)
     sequin_output_sbt = re.sub(r'tbl$', 'sbt', tbl_file_name)

     shutil.copy(sequin_input_files['sequin_fasta'], sequin_output_fasta)
     shutil.copy(sequin_input_files['sequin_sbt_file'], sequin_output_sbt)
     sequin_required_files = { 'fasta': sequin_output_fasta, 'tbl': tbl_file_name, 'src': sequin_src_filename, 'tbl2asn': sequin_input_files['sequin_tbl2asn'], 'sbt': sequin_output_sbt }

     outputfile = open(tbl_file_name, 'w')
     #print contig_dict
    
     count =0 
     outputStr=""
     for key in contig_dict:
        first = True
        if count %10000 == 0:
           #print "count " + str(count)
           outputfile.write(outputStr)
           outputStr=""
        count+=1

        for attrib in contig_dict[key]:     
           id  = attrib['id']
           try:
              protein_seq = protein_seq_dict[id]
           except:
              protein_seq = ""
              None
           
           definition = sample_name
           accession = '.'
           version = '.' +spaces(10) + "GI:."
           dblink = sample_name
           keywords = '.'
           source = sample_name
           organism = sample_name
           if first:   
              first = False
              try:
                dna_seq =  nucleotide_seq_dict[key]
                dna_seq_formatted =  format_sequence_origin(dna_seq)
                dna_length = len(dna_seq)
                sourceStr = "1.." + str(dna_length)
              except:
                dna_seq = ""
                dna_seq_formatted =  ""
                dna_length = 0
                sourceStr ="0..0"

              outputStr+=(">Feature %s\n" % (key))
              outputStr+=re.sub('\.\.','\t',sourceStr)+'\t'+"REFERENCE" + '\n'
            
           startPrefix = ''
           endPrefix = ''
           if 'partial' in attrib:
               if attrib['partial']=='10':
                 startPrefix = '<'
               if attrib['partial']=='01':
                 endPrefix = '>'
               if attrib['partial']=='11':
                 startPrefix = '<'
                 endPrefix = '>'


           if 'start' in attrib and 'end' in attrib:
              if 'strand' in attrib:
                 if attrib['strand']=='-':
                     geneLoc = str(attrib['end']) + endPrefix +'\t' + startPrefix +  str(attrib['start'])
                 else:
                     geneLoc = startPrefix + str(attrib['start']) +'\t' + str(attrib['end']) + endPrefix
              outputStr+=geneLoc + '\t' + "gene" + '\n'
 

           if 'locus_tag' in attrib:
               locus_tag = "gene" + '\t' + attrib['locus_tag'] 
               outputStr+='\t\t\t' + locus_tag +'\n'


           outputStr+=geneLoc + '\t' + "CDS" + '\n'

           if 'product' in attrib:
              product_tag = "product" + '\t' + attrib['product'] 
              outputStr+='\t\t\t' + product_tag +'\n'

     outputfile.write(outputStr)
     outputfile.close() 

     outputsrcfile = open(sequin_src_filename, 'w')
     ncbi_sequin_params = parse_parameter_file(sequin_input_files['sequin_params'])
       
     headers =  ['Collection_date', 'Country', 'isolation_source',  'Lat_Lon', 'Organism', 'environmental_sample']
     
     header_values = {}
     headerStr = 'Sequence_ID'
     for header_name in headers:
        headerStr += '\t' + header_name
        header_values[header_name]= get_parameter(ncbi_sequin_params, 'SequinHeader', header_name, default='__'+ header_name + '__')

    
     valueStr =""
     for header_name in headers:
         valueStr += "\t" + header_values[header_name]

     fprintf(outputsrcfile, "%s\n", key + headerStr)
     for key in contig_dict:
        fprintf(outputsrcfile, "%s\n", key + valueStr)
     outputsrcfile.close()

     # Now open a pipe process and run the tbl2asn script on the sequin input
       
     for file in sequin_required_files:
        if not path.exists(sequin_required_files[file]):
           print "Could not find file : " + sequin_required_files[file]
           print "Make sure all of the following files are present :"
           for file in sequin_required_files:
                print file
           sys.exit(0)

     args = [ sequin_required_files['tbl2asn'], '-t', sequin_required_files['sbt'] , '-i', sequin_required_files['fasta'], '-a', 's', '-V', 'v']  
     command = ' '.join(args)
     result = getstatusoutput(command)
     if result[0] == 0 :
         print "Successfully created the SEQUIN file"