Beispiel #1
0
 def _main(self):
     min_prod = 400
     silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta'
     alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta'
     add_filename = FilenameParser.strip_ext(
         alifile) + '.with_additions.fasta'
     outgroups = [
         'Thermococcus_chitonophagus', 'SMTZ1-55',
         'contig72135_1581_sunspring_meta'
     ]
     add = ['KF836721.1.1270', 'EU635905.1.1323']
     exclude = [
     ]  #['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta']
     #load alignment
     if os.path.isfile(add_filename):
         alifile = add_filename
         add_filename = ''
     with user_message('Loadding initial alignment...', '\n'):
         orig_ali = AlignmentUtils.load_first(alifile)
         if not orig_ali: return 1
     #load homologs
     if add_filename:
         with user_message('Loadding additional sequences...', '\n'):
             add_seqs = []
             db = SeqView()
             if db.load(silva_db):
                 for sid in add:
                     seq = db.get(sid)
                     if seq: add_seqs.append(seq)
                     else: print '%s not found in %s' % (sid, silva_db)
         #realign data if needed
         if add_seqs:
             with user_message('Realigning data...', '\n'):
                 add_filename = FilenameParser.strip_ext(
                     alifile) + '.with_additions.fasta'
                 AlignmentUtils.align(
                     list(orig_ali) + add_seqs, add_filename)
                 orig_ali = AlignmentUtils.load_first(add_filename)
                 if not orig_ali: return 2
     #process the alignment
     ali = orig_ali.remove(*exclude).trim()
     for out in outgroups:
         if not ali.index(out):
             print '%s not found in the alignment' % out
             return 3
     ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id)
     AlignmentUtils.save(
         ali,
         '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta'
     )
     args = dict(plen=(20, 40),
                 max_mismatches=8,
                 min_match_mismatches=1,
                 first_match_mismatches=1,
                 first_may_match=1,
                 AT_first=True,
                 outgroup=len(outgroups))
     fprimers = PrimerFinder.find_discriminating_primers(ali, **args)
     rprimers = PrimerFinder.find_discriminating_primers(ali,
                                                         reverse=True,
                                                         **args)
     pairs = PrimerFinder.compile_pairs(fprimers, rprimers, min_prod,
                                        'SSBa')
     if not pairs:
         print '\nNo suitable primer pairs found'
         return 3
     PrimerFinder.print_pairs(pairs)
     orig_ali = PrimerFinder.add_pairs_to_alignment(pairs, orig_ali)
     AlignmentUtils.save(
         orig_ali,
         '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta'
     )
     print 'Done'
Beispiel #2
0
 def _main(self):
     min_prod = 400
     silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta'
     alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta'
     add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta'
     outgroups = ['Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta']
     add = ['KF836721.1.1270','EU635905.1.1323']
     exclude = []#['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta']
     #load alignment
     if os.path.isfile(add_filename): 
         alifile = add_filename
         add_filename = ''
     with user_message('Loadding initial alignment...', '\n'):
         orig_ali = AlignmentUtils.load_first(alifile)
         if not orig_ali: return 1
     #load homologs
     if add_filename:
         with user_message('Loadding additional sequences...', '\n'):
             add_seqs = []
             db = SeqView()
             if db.load(silva_db):
                 for sid in add:
                     seq = db.get(sid)
                     if seq: add_seqs.append(seq)
                     else: print '%s not found in %s' % (sid, silva_db)
         #realign data if needed
         if add_seqs:
             with user_message('Realigning data...', '\n'):
                 add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta'
                 AlignmentUtils.align(list(orig_ali)+add_seqs, add_filename)
                 orig_ali = AlignmentUtils.load_first(add_filename)
                 if not orig_ali: return 2
     #process the alignment
     ali = orig_ali.remove(*exclude).trim()
     for out in outgroups:
         if not ali.index(out):
             print '%s not found in the alignment' % out
             return 3
     ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id)
     ali_len = ali.get_alignment_length()
     AlignmentUtils.save(ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta')
     args = dict(plen = (20,40),
                 max_mismatches = 8,
                 min_match_mismatches = 1,
                 first_match_mismatches = 1,
                 first_may_match = 1,
                 AT_first=True,
                 outgroup=len(outgroups))
     fprimers = self._find_primers(ali, **args)
     rprimers = self._find_primers(ali.reverse_complement(), **args)
     pairs = []
     for i, (fs, fp) in enumerate(fprimers):
         start = fs
         fprimer = Primer.from_sequences(fp[:-1], 1, 'SSBaF%d' % fs)
         for _j, (rs, rp) in enumerate(rprimers):
             end = ali_len-rs
             if end-start <= min_prod: continue
             pairs.append((fprimer, Primer.from_sequences(rp[:-1], 1, 'SSBaR%d' % (ali_len-rs+1))))
     if not pairs:
         print '\nNo suitable primer pairs found'
         return 3
     added = set()
     for i, (fp, rp) in enumerate(pairs):
         print '\npair %d' % (i+1)
         print '%s: %s' % (fp.id, fp)
         print '%s: %s' % (rp.id, rp)
         if fp.id not in added:
             orig_ali.append(fp.master_sequence+'-'*(orig_ali.get_alignment_length()-len(fp)))
             added.add(fp.id)
         if rp.id not in added:
             orig_ali.append(copy_attrs(rp.master_sequence,
                                        rp.master_sequence.reverse_complement())+
                             '-'*(orig_ali.get_alignment_length()-len(rp)))
             added.add(rp.id)
     print
     orig_ali = AlignmentUtils.align(orig_ali)
     AlignmentUtils.save(orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta')
     print 'Done'
Beispiel #3
0
    def _main(self):
        email = '*****@*****.**'
        genome_dir = '/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcales/Thermococcus/'
        genome = 'Thermococcus_barophilus_Ch5.gb'
        gene = 'TBCH5v1_1369'  #cooS
        database = 'nr'
        segment = [3200, 12000]

        seq = SeqLoader.load_file(os.path.join(genome_dir, genome))
        if not seq: raise RuntimeError('No genome loaded')
        seq = seq[0]

        index = get_indexes_of_genes(seq, gene)
        if not index: raise RuntimeError('No gene found')

        feature = seq.features[index[0]]
        query = feature.extract(seq)

        segments_file = 'CO-clusters.gb'
        #get cluster variants if needed
        if not os.path.isfile(segments_file):
            blast_file = 'blast.results.xml'
            if os.path.isfile(blast_file):
                blast = list(parse(open(blast_file)))
            else:
                blast = BlastCLI.blast_seq(query,
                                           database,
                                           100,
                                           remote=True,
                                           task='blastn',
                                           parse_results=True,
                                           save_results_to='blast.results.xml')
            if not blast: raise RuntimeError('Blast returned no results')
            flt = BlastFilter(lambda hsp, r: hsp.align_length > 700,
                              filter_hsps=True)
            flt(blast)
            queries = []
            for ali in BlastCLI.iter_alignments(blast):
                q = BlastCLI.Query(ali,
                                   'hsp',
                                   start_offset=segment[0],
                                   end_offset=segment[1])
                if q: queries.append(q)
                print(queries[-1])

            segments = BlastWWW.fetch_queries(email, queries)
            safe_write(segments, segments_file)
            for r in segments:
                print('[%s] %s: %dbp' % (r.id, pretty_rec_name(r), len(r)))
            return 0

        #find primers in alignments of the selected features
        local_files = [
            os.path.join(genome_dir, f)
            for f in ('Thermococcus_barophilus_DT4-complete-genome.gb',
                      'Thermococcus_ST-423.gb', 'Thermococcus_CH1-complete.gb')
        ]
        loader = SeqLoader(self.abort_event)
        segments = loader.load_files([segments_file] + local_files)
        fprimers, transF_ali = find_primers(
            segments, 'transF',
            dict(plen=(20, 30),
                 max_mismatches=5,
                 min_first_matches=3,
                 AT_first=True))
        rprimers, cooS_ali = find_primers(segments,
                                          'cooS',
                                          dict(plen=(20, 30),
                                               max_mismatches=4,
                                               min_first_matches=3,
                                               AT_first=True),
                                          reverse=True)
        if not fprimers:
            print('\nNo forward primers found')
            return 1
        if not rprimers:
            print('\nNo reverse primers found')
            return 1
        print('\nForward primers:')
        for p in fprimers:
            print('%s: %s' % (p.id, p))
        print('\nReverse primers:')
        for p in rprimers:
            print('%s: %s' % (p.id, p))
        print()
        #add primers to alignments and save them
        transF_ali = PrimerFinder.add_primers_to_alignment(
            fprimers, transF_ali)
        cooS_ali = PrimerFinder.add_primers_to_alignment(rprimers,
                                                         cooS_ali,
                                                         reverse=True)
        AlignmentUtils.save(transF_ali, 'transF.aln')
        AlignmentUtils.save(cooS_ali, 'cooS.aln')