def per_hit_header(self, hit):
     if not self._nonzero:
         return '\nNo PCR products have been found.\n'
     header_string = ''
     header_string += 'Reaction ended in: %d cycles.\n' \
         % self._reaction_ends[hit]['cycles']
     header_string += 'C(dNTP) after reaction: ~%s\n' \
         % tdf.format_concentration(self._reaction_ends[hit]['dNTP'])
     if self._reaction_ends[hit]['cycles'] < self._num_cycles:
         if self._reaction_ends[hit]['dNTP'] == 0:
             header_string += 'dNTP have been depleted.\n'
         else:
             header_string += 'Primers have been depleted.\n'
     if self._reaction_ends[hit]['poly']:
         header_string += wrap_text('Polymerase activity was '
                                    'insufficient in these cycles:\n')
         shortage_string = ''
         for shortage_period in self._reaction_ends[hit]['poly']:
             if shortage_string: shortage_string += ', '
             if shortage_period[0] == shortage_period[1]:
                 shortage_string += str(shortage_period[0])
             else:
                 shortage_string += '%d-%d' % tuple(shortage_period)
         header_string += wrap_text(shortage_string + '\n')
     header_string += '\n'
     return header_string
Beispiel #2
0
 def per_hit_header(self, hit):
     if not self._nonzero: 
         return '\nNo PCR products have been found.\n'
     header_string  = ''
     header_string += 'Reaction ended in: %d cycles.\n' \
         % self._reaction_ends[hit]['cycles']
     header_string += 'C(dNTP) after reaction: ~%s\n' \
         % tdf.format_concentration(self._reaction_ends[hit]['dNTP'])
     if self._reaction_ends[hit]['cycles'] < self._num_cycles:
         if self._reaction_ends[hit]['dNTP'] == 0:
             header_string += 'dNTP have been depleted.\n'
         else:
             header_string += 'Primers have been depleted.\n'
     if self._reaction_ends[hit]['poly']:
         header_string += wrap_text('Polymerase activity was '
                                    'insufficient in these cycles:\n')
         shortage_string = ''
         for shortage_period in self._reaction_ends[hit]['poly']:
             if shortage_string: shortage_string += ', '
             if shortage_period[0] == shortage_period[1]:
                 shortage_string += str(shortage_period[0])
             else:
                 shortage_string += '%d-%d' % tuple(shortage_period)
         header_string += wrap_text(shortage_string + '\n')
     header_string += '\n'
     return header_string
Beispiel #3
0
 def _format_header(self):
     header = ''
     if self._optimized:
         header += 'Optimization was successful.\n'
         header += wrap_text('Results of PCR simulation with optimized values of the parameters '
                             'are given below.\n\n')
     else:
         header += 'Optimization was unsuccessful: %s\n' % self._exit_status
         header += wrap_text('Results of PCR simulation with the values of the parameters '
                             'from the last iteration of the optimization are given below. These '
                             'values, nevertheless, usually give better simulation results than '
                             'the initial ones.\n\n')
     header += iPCR_Base._format_header(self)
     header += self._PCR_Simulation.format_report_header()
     return header
Beispiel #4
0
    def format_products_report(self):
        header = wrap_text('For each target sequence a list of possible '
                                 'PCR products which were predicted by the '
                                 'simulation is given. ' 
                                 'Information about each product includes:\n'
                                 '-start and end positions on a target sequence.\n'
                                 '   *the first nucleotide of a sequence is at the position 1\n'
                                 '   *primers are not included, so their 3\'-ends are located at '
                                 'start-1 and end+1 positions.\n'
                                 '-length in base pairs.\n'
                                 '   *the length too do not include primers.\n'
                                 '-concentration which was calculated during PCR simulation.\n'
                                 '-number of PCR cycles during which the product has been generated.\n'
                                 '   *if this number is lower than the number of simulated reaction cycles '
                                 'it means that primers producing the product were depleted.\n'
                                 '-lists of forward and reverse primers which produced the product'
                                 '\n\n\n')
        def worker(hit):
            prod_string  = hr(' %s ' % hit, '*')
            prod_string += hr(' %d products have been found ' % len(self._products[hit]), '*')
            products = self._products[hit].values()
            products.sort(key=lambda x: x.start)
            for pi, product in enumerate(products):
                prod_string += hr(' product %d ' % (pi+1), '=')
                prod_string += product.pretty_print(with_name=False, include_fwd_3_mismatch=self._with_exonuclease)
                prod_string += hr('', '=')
            prod_string += hr('', '*')
            return prod_string
        prod_strings = self.parallelize_work(1, worker, self._products.keys())
        if not prod_strings:
            return 'Failed to compile products report. See the log for errors.'
        return header+''.join(prod_strings)+'\n'
#end class
 def _format_primers_report_header(self):
     header_string  = ''
     header_string += time_hr()
     header_string += wrap_text('For each degenerate primer provided, a set '
                                'of unambiguous primers is generated. '
                                'For each such set the minimum, maximum and '
                                'mean melting temperatures are calculated. '
                                'For each primer in each set stable self-'
                                'dimers and hairpins are predicted. '
                                'For every possible combination of two '
                                'unambiguous primers cross-dimers are also '
                                'predicted. If an unambiguous primer is '
                                'provided, it is treated as a set with a '
                                'single element.\n\n')
     header_string += hr(' PCR conditions ')
     header_string += TD_Functions.format_PCR_conditions(self._primers)+'\n'
     header_string += hr(' primers and their melting temperatures ')
     for primer in self._primers:
         header_string += repr(primer) + '\n'
     #warning
     if len(self._primers) > 1:
         if abs(self._primers[0].Tm_min - self._primers[1].Tm_min) >= 5:
             header_string += '\nWarning: lowest melting temperatures of sense and antisense primes \n'
             header_string += '         differ more then by 5C\n'
     header_string += '\n'
     return header_string
Beispiel #6
0
 def _format_primers_report_header(self):
     header_string = ''
     header_string += time_hr()
     header_string += wrap_text(
         'For each degenerate primer provided, a set '
         'of unambiguous primers is generated. '
         'For each such set the minimum, maximum and '
         'mean melting temperatures are calculated. '
         'For each primer in each set stable self-'
         'dimers and hairpins are predicted. '
         'For every possible combination of two '
         'unambiguous primers cross-dimers are also '
         'predicted. If an unambiguous primer is '
         'provided, it is treated as a set with a '
         'single element.\n\n')
     header_string += hr(' PCR conditions ')
     header_string += TD_Functions.format_PCR_conditions(
         self._primers) + '\n'
     header_string += hr(' primers and their melting temperatures ')
     for primer in self._primers:
         header_string += repr(primer) + '\n'
     #warning
     if len(self._primers) > 1:
         if abs(self._primers[0].Tm_min - self._primers[1].Tm_min) >= 5:
             header_string += '\nWarning: lowest melting temperatures of sense and antisense primes \n'
             header_string += '         differ more then by 5C\n'
     header_string += '\n'
     return header_string
Beispiel #7
0
 def _format_header(self):
     header = ''
     if self._optimized:
         header += 'Optimization was successful.\n'
         header += wrap_text(
             'Results of PCR simulation with optimized values of the parameters '
             'are given below.\n\n')
     else:
         header += 'Optimization was unsuccessful: %s\n' % self._exit_status
         header += wrap_text(
             'Results of PCR simulation with the values of the parameters '
             'from the last iteration of the optimization are given below. These '
             'values, nevertheless, usually give better simulation results than '
             'the initial ones.\n\n')
     header += iPCR_Base._format_header(self)
     header += self._PCR_Simulation.format_report_header()
     return header
Beispiel #8
0
 def format_quantity_explanation(self):
     expl_string  = ''
     expl_string += hr(' estimation of PCR products concentrations ')
     expl_string += 'The value of the objective function of at the solution ' + \
                    '(the lower the better):\n   %e\n' % \
                     self._max_objective_value
     expl_string += wrap_text('This value shows "distance" to the solution of '
                        'the system of equilibrium equations which were used '
                        'to calculate concentrations of PCR products.\n\n')
     expl_string += wrap_text(('Products with concentration less than %.2f%% '
                               'of the concentration of the most abundant '
                               'product or less than initial DNA concentration '
                               'are not shown.'
                              '\n\n') % (self._min_quantity_factor*100))
     expl_string += wrap_text('Boundaries of a product and it\'s length do '
                              'not include primers.\n\n')
     expl_string += '\n'
     return expl_string
 def format_quantity_explanation(self):
     expl_string = ''
     expl_string += hr(' estimation of PCR products concentrations ')
     expl_string += 'The value of the objective function of at the solution ' + \
                    '(the lower the better):\n   %e\n' % \
                     self._max_objective_value
     expl_string += wrap_text(
         'This value shows "distance" to the solution of '
         'the system of equilibrium equations which were used '
         'to calculate concentrations of PCR products.\n\n')
     expl_string += wrap_text(
         ('Products with concentration less than %.2f%% '
          'of the concentration of the most abundant '
          'product or less than initial DNA concentration '
          'are not shown.'
          '\n\n') % (self._min_quantity_factor * 100))
     expl_string += wrap_text('Boundaries of a product and it\'s length do '
                              'not include primers.\n\n')
     expl_string += '\n'
     return expl_string
Beispiel #10
0
 def _format_header(self):
     header =  wrap_text('All possible PCR products are ' 
                         'filtered by amplicon size.\n'
                         'If --no-exonuclease option was ' 
                         'provided, products which may be formed by '
                         'primers with ' 
                         "mismatches on 3'-end are ignored.\n"
                         'Quantities of the remaining products ' 
                         'are estimated using equilibrium equations '
                         'and current PCR parameters.\n\n')
     header += iPCR_Base._format_header(self)
     header += self._PCR_Simulation.format_report_header()
     return header
Beispiel #11
0
 def _format_header(self):
     header = wrap_text('All possible PCR products are '
                        'filtered by amplicon size.\n'
                        'If --no-exonuclease option was '
                        'provided, products which may be formed by '
                        'primers with '
                        "mismatches on 3'-end are ignored.\n"
                        'Quantities of the remaining products '
                        'are estimated using equilibrium equations '
                        'and current PCR parameters.\n\n')
     header += iPCR_Base._format_header(self)
     header += self._PCR_Simulation.format_report_header()
     return header
Beispiel #12
0
 def _format_header(self): 
     header = wrap_text('For each hit all alignments are sorted '
                        'into "forward" and "reverse" groups. '
                        'Pairs of forward and reverse ' 
                        'alignments comprise possible PCR products.\n'
                        'If --no-exonuclease option was '
                        'provided, alignments with mismatches on ' 
                        "3'-end are ignored.\n"
                        'Quantities of products are estimated '
                        'using equilibrium equations and ' 
                        'current PCR parameters.\n')
     header += self._PCR_Simulations.values()[0].format_report_header()
     header += self._PCR_Simulations.values()[0].format_quantity_explanation()
     return header
    def format_products_report(self):
        header = wrap_text(
            'For each target sequence a list of possible '
            'PCR products which were predicted by the '
            'simulation is given. '
            'Information about each product includes:\n'
            '-start and end positions on a target sequence.\n'
            '   *the first nucleotide of a sequence is at the position 1\n'
            '   *primers are not included, so their 3\'-ends are located at '
            'start-1 and end+1 positions.\n'
            '-length in base pairs.\n'
            '   *the length too do not include primers.\n'
            '-concentration which was calculated during PCR simulation.\n'
            '-number of PCR cycles during which the product has been generated.\n'
            '   *if this number is lower than the number of simulated reaction cycles '
            'it means that primers producing the product were depleted.\n'
            '-lists of forward and reverse primers which produced the product'
            '\n\n\n')

        def worker(hit):
            prod_string = hr(' %s ' % hit, '*')
            prod_string += hr(
                ' %d products have been found ' % len(self._products[hit]),
                '*')
            products = self._products[hit].values()
            products.sort(key=lambda x: x.start)
            for pi, product in enumerate(products):
                prod_string += hr(' product %d ' % (pi + 1), '=')
                prod_string += product.pretty_print(
                    with_name=False,
                    include_fwd_3_mismatch=self._with_exonuclease)
                prod_string += hr('', '=')
            prod_string += hr('', '*')
            return prod_string

        prod_strings = self.parallelize_work(1, worker, self._products.keys())
        if not prod_strings:
            return 'Failed to compile products report. See the log for errors.'
        return header + ''.join(prod_strings) + '\n'


#end class
Beispiel #14
0
 def formatted_desc(self):
     return wrap_text(self.desc.replace('%%', '%')).strip()
Beispiel #15
0
'''
Created on 2016-01-14

@author: Allis Tauri <*****@*****.**>
'''

import  cProfile
from BioUtils.Tools.Text import wrap_text, line_by_line

if __name__ == '__main__':
    txt = '''If true, TextWrapper attempts to detect sentence endings and ensure 
    that sentences are always separated by exactly two spaces. This is generally 
    desired for text in a monospaced font. However, the sentence detection    
    algorithm is imperfect: it assumes that a sentence ending consists of a 
    lowercase letter followed by one of '.', '!', or '?', possibly followed by 
    one of '"' or "'", followed by a space. One problem with this is algorithm 
    is that it is unable to detect the difference between “Dr.” in'''
    print wrap_text(txt)
    print '='*80
    
    texts = ['asdfasd  sreydstnsr mywyy    eratg AG RADFG SDFGA lkjoiuguivuasdfhpwoiefjahgaiohghouygmuimjiuh', 
             'qasf; a[r uq[ewjrfasdhfuiah [WERJ AOSF BA;We werqt', 
             'wreyqeuqy a  poiertqprohg aspoei  toeaprt a']
    widths = [20,30,30]
    print line_by_line(texts, widths)
    
    cProfile.run('''for i in xrange(10000): 
    wrap_text(txt)''', 
    'word_wrap.profile')
    print 'Done'
Beispiel #16
0
'''
Created on 2016-01-14

@author: Allis Tauri <*****@*****.**>
'''

import cProfile
from BioUtils.Tools.Text import wrap_text, line_by_line

if __name__ == '__main__':
    txt = '''If true, TextWrapper attempts to detect sentence endings and ensure 
    that sentences are always separated by exactly two spaces. This is generally 
    desired for text in a monospaced font. However, the sentence detection    
    algorithm is imperfect: it assumes that a sentence ending consists of a 
    lowercase letter followed by one of '.', '!', or '?', possibly followed by 
    one of '"' or "'", followed by a space. One problem with this is algorithm 
    is that it is unable to detect the difference between “Dr.” in'''
    print wrap_text(txt)
    print '=' * 80

    texts = [
        'asdfasd  sreydstnsr mywyy    eratg AG RADFG SDFGA lkjoiuguivuasdfhpwoiefjahgaiohghouygmuimjiuh',
        'qasf; a[r uq[ewjrfasdhfuiah [WERJ AOSF BA;We werqt',
        'wreyqeuqy a  poiertqprohg aspoei  toeaprt a'
    ]
    widths = [20, 30, 30]
    print line_by_line(texts, widths)

    cProfile.run('''for i in xrange(10000): 
    wrap_text(txt)''', 'word_wrap.profile')
    print 'Done'
Beispiel #17
0
 def formatted_desc(self):
     return wrap_text(self.desc.replace('%%', '%')).strip()
Beispiel #18
0
 def write_hits_report(self):
     if not self._have_blast_results: return
     blast_report = self._open_report('blast hits report', self._hits_report_filename)
     if not blast_report: return
     #header
     blast_report.write(time_hr())
     blast_report.write(wrap_text('All hits are filtered by dG of the annealing '
                                  'of alignments and, if --no-exonuclease '
                                  'option was provided, hits with mismatches on '
                                  "3'-end are ignored.\n"))
     blast_report.write('\n')
     #filter parameters
     blast_report.write(hr(' filtration parameters '))
     if self._with_exonuclease:
         blast_report.write("DNA polymerase HAS 3'-5'-exonuclease activity\n")
     else: blast_report.write("DNA polymerase doesn't have 3'-5'-exonuclease activity\n")
     blast_report.write('Maximum dG of an alignment: %.2f kcal/mol\n' % max_dimer_dG)
     blast_report.write('\n')
     blast_report.write(hr(''))
     blast_report.write('\n\n')
     #print records
     for r, blast_record in enumerate(self._blast_results):
         num_hits = len(blast_record.alignments)
         blast_report.write(hr(' query ID: %s '  % blast_record.query, symbol='#'))
         #filter hits by alignments and format report text for each hit
         hits = []
         for h in xrange(num_hits):
             hit  = blast_record.alignments[h]
             desc = blast_record.descriptions[h] 
             #check and format hsps
             hsps = []
             for hsp in hit.hsps:
                 hsp_duplex = self._duplex_from_hsp(hsp)
                 #check if any stable dimers are formed by this duplex
                 if not hsp_duplex: continue
                 #check 3' mismatch
                 if not self._with_exonuclease \
                 and not hsp_duplex.have_3_matches: continue
                 #get primer concentration
                 for primer in self._primers:
                     if primer.has_subsequence(hsp_duplex.fwd_seq): 
                         hsp_primer_concentration = primer.concentration
                         break
                 #format hsps representation
                 hsp_text  = ('score: %d; bits: %d; E-value: %.2e;\n\n'
                              % (hsp.score, hsp.bits, hsp.expect))
                 hsp_text += hsp_duplex.print_most_stable(include_fwd_3_mismatch=self._with_exonuclease)
                 hsp_text += 'Conversion degree = %.2f%%\n\n' \
                 % (tdf.primer_DNA_conversion_degree(hsp_primer_concentration, hsp_duplex.K)*100)
                 hsp_text += 'Template strand: '
                 if hsp.frame[1] == 1:    
                     hsp_text += 'antisense\n'
                     hsp_text += 'Position on template: %d ==> %d\n' \
                           % (hsp.sbjct_start, hsp.sbjct_end)
                 elif hsp.frame[1] == -1: 
                     hsp_text += 'sense\n'
                     hsp_text += 'Position on template: %d <== %d\n' \
                           % (hsp.sbjct_start, hsp.sbjct_end)
                 hsp_text += hr('', '.')
                 hsps.append((hsp_duplex.dG, hsp_text))
             #no need to include weak hits to the report
             if not hsps: continue 
             #sort hsps by minimum dG
             hsps.sort(key=lambda(x): x[0])
             #format hit
             hit_str  = wrap_text(hit.title)+'\n'
             hit_str += 'Length:     %d\n' % hit.length
             hit_str += 'Max bits:   %d\n' % desc.bits
             hit_str += 'Alignments: %d\n' % len(hit.hsps)
             hit_str += '\n'
             hit_str += hr(' %d alignments after filtration ' % len(hsps))
             hit_str += ''.join(_hsp[1] for _hsp in hsps)[:-1]
             hits.append((hsps[0][0], desc.title, desc.score, desc.bits, desc.e, len(hsps), hit_str))
         if not hits:
             blast_report.write(wrap_text('All hits were filtered out.\n'))
         #write report to the file
         else:
             #print the short list of all hits
             num_hits     = len(hits)
             num_hits_len = len(str(num_hits))
             #sort hits by minimum dG
             hits.sort(key=lambda(x): x[0])
             #print header
             blast_report.write(hr(' %d hits ' % num_hits,  symbol='#'))
             for h, hit in enumerate(hits):
                 spacer    = ' '*(num_hits_len-len(str(h)))
                 blast_report.write(wrap_text('%d.%s %s\n' 
                                              % (h+1, spacer, 
                                                (hit[1].split('|')[-1]).strip())))
                 blast_report.write(('%s  min dG: %.2f; score: %d; bits: %d; '
                                     'E-value: %.2e; alignments: %d\n\n')
                                    % (' '*num_hits_len, hit[0], hit[2], 
                                       hit[3], hit[4], hit[5]))
             blast_report.write('\n')
             #print each formatted hit
             for h, hit in enumerate(hits):
                 blast_report.write(hr(' Hit #%d ' % (h+1), symbol='='))
                 blast_report.write(hit[6])
                 blast_report.write(hr('', symbol='='))
                 if h < num_hits-1: blast_report.write('\n\n')
         blast_report.write(hr('', symbol='#'))
         if r < len(self._blast_results)-1: blast_report.write('\n\n')
     blast_report.close()
     print '\nTop hits with top HSPs from BLAST results were written to:\n   ' + \
         self._hits_report_filename
     self._add_report('BLAST hits', self._hits_report_filename)