Exemple #1
0
  def check_blat_indel(self, br):

    '''
    '''

    indel = False
    indel_size_thresh = int(self.meta_dict['params'].opts['indel_size']) 
    utils.log(self.logging_name, 'info', 'Checking if blat result contains an indel variant')
    nhits = 0
    for i in self.hit_freq:
      if i > 0: nhits += 1  
    if br.spans_query() or (len(self.blat_results) == 1 and br.in_target):
      utils.log(self.logging_name, 'info', 'Blat result spans query (%r) or only one blat result (%r) and blat result in target (%r)'%(br.spans_query(), (len(self.blat_results) == 1), br.in_target))
      indel = True
      keep_br = br.valid and br.mean_cov < 2 and br.in_target and (br.indel_maxevent_size[0] >= indel_size_thresh) and (not br.rep_man.breakpoint_in_rep[0] and not br.rep_man.breakpoint_in_rep[1])
      utils.log(self.logging_name, 'debug', 'Keep blat result %r'%keep_br)
      if keep_br:
        brkpt_cov = [self.meta_dict['contig_vals'][1].get_counts(x, x, 'indel') for x in br.query_brkpts]
        low_cov = min(brkpt_cov) < self.meta_dict['params'].get_param('indel_sr_thresh')
        flank_match_thresh = True
        for fm in br.indel_flank_match:
          fm_perc = round((float(fm)/float(br.get_size('query')))*100,2)
          if fm_perc < 10.0: 
            flank_match_thresh = False
          utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event of %d (%d of query)'%(fm,fm_perc))
        utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event (10 perc of query) on both sides (%r)'%flank_match_thresh)
        in_ff, span_ff = utils.filter_by_feature(br.get_brkpt_locs(), self.meta_dict['query_region'], self.meta_dict['params'].opts['keep_intron_vars'])
        if not in_ff and not low_cov and flank_match_thresh:
          self.se = sv_event(br, self.meta_dict['query_region'], self.meta_dict['contig_vals'], self.meta_dict['sbam'])
          utils.log(self.logging_name, 'debug', 'Top hit contains whole query sequence, indel variant')
        else: 
          utils.log(self.logging_name, 'debug', 'Indel in intron (%r) or low coverage at breakpoints (%r) or minimum segment size < 20 (%r), filtering out.' % (in_ff, low_cov, min(br.query_blocksizes)) )
      else: 
        utils.log(self.logging_name, 'debug', 'Indel failed checking criteria: in annotated gene: %r, mean query coverage < 2: %r, in target: %r, in repeat: %r, indel size < %d: %r' % (br.valid, br.mean_cov, br.in_target, ",".join([str(x) for x in br.rep_man.breakpoint_in_rep]), indel_size_thresh, br.indel_maxevent_size[0] < indel_size_thresh))
    return indel
Exemple #2
0
 def filter_rearr(self, query_region, params, brkpts, brkpt_counts, brkpt_kmers, rearr_type, disc_read_count):
   print 'Breakpoint counts', brkpt_counts
   print params.opts
   in_ff, span_ff = utils.filter_by_feature(brkpts, query_region, params.get_param('keep_intron_vars'))
   filter = (min(brkpt_counts['n']) < params.get_param('rearr_sr_thresh')) or self.br_sorted[0][1] < params.get_param('rearr_minseg_len') or (in_ff and span_ff) or (disc_read_count < 1) or (rearr_type == 'rearrangement') or (min(brkpt_kmers) == 0)
   utils.log(self.logging_name, 'info' ,'Check filter for rearrangement')
   utils.log(self.logging_name, 'info', 'Filter by feature for being in exon (%r) or spanning exon (%r)'%(in_ff, span_ff))
   utils.log(self.logging_name, 'info', 'Split read threshold %d, breakpoint read counts %d'%(min(brkpt_counts['n']), params.get_param('rearr_minseg_len')))
   utils.log(self.logging_name, 'info', 'Minimum segment length observed (%d) meets threshold (%d)'%(self.br_sorted[0][1], params.get_param('rearr_minseg_len')))
   utils.log(self.logging_name, 'info', 'Minimum discordant read pairs for rearrangement (%d)'%(disc_read_count))
   return filter
Exemple #3
0
    def filter_rearr(self, sv_result): # query_region, params, brkpts, brkpt_counts, brkpt_kmers, rearr_type, disc_read_count):

        '''
        '''

        in_ff, span_ff = utils.filter_by_feature(sv_result.breakpoint_values['ref_pos'], sv_result.query_region, self.params.get_param('keep_intron_vars'))
        match_sorted_realignments = sorted(sv_result.sv_event.realignments, key=lambda x: x.get_nmatch_total())
        top_realigned_segment = match_sorted_realignments[0]

        check1 = (min(sv_result.breakpoint_values['counts']['n']) < self.params.get_param('rearr_sr_thresh'))
        check2 = top_realigned_segment.get_nmatch_total() < self.params.get_param('rearr_minseg_len')
        check3 = (in_ff and span_ff)
        check4 = (sv_result.values['disc_read_count'] < 1)
        check5 = (sv_result.values['sv_subtype'] == 'NA')
        check6 = (min(sv_result.breakpoint_values['kmers']) == 0)
        filter_result = check1 or check2 or check3 or check4 or check5 or check6
        utils.log(self.logging_name, 'info', 'Check filter for rearrangement')
        utils.log(self.logging_name, 'info', 'Filter by feature for being in exon (%r) or spanning exon (%r)' % (in_ff, span_ff))
        utils.log(self.logging_name, 'info', 'Split read threshold %d, breakpoint read counts %d' % (min(sv_result.breakpoint_values['counts']['n']), self.params.get_param('rearr_minseg_len')))
        utils.log(self.logging_name, 'info', 'Minimum segment length observed (%d) meets threshold (%d)' % (top_realigned_segment.get_nmatch_total(), self.params.get_param('rearr_minseg_len')))
        utils.log(self.logging_name, 'info', 'Minimum discordant read pairs for rearrangement (%d)' % (sv_result.values['disc_read_count']))
        return filter_result
Exemple #4
0
    def filter_rearr(self, sv_result): # query_region, params, brkpts, brkpt_counts, brkpt_kmers, rearr_type, disc_read_count):

        '''
        '''

        in_ff, span_ff = utils.filter_by_feature(sv_result.breakpoint_values['ref_pos'], sv_result.query_region, self.params.get_param('keep_intron_vars'))
        match_sorted_realignments = sorted(sv_result.sv_event.realignments, key=lambda x: x.get_nmatch_total())
        top_realigned_segment = match_sorted_realignments[0]

        check1 = (min(sv_result.breakpoint_values['counts']['n']) < self.params.get_param('rearr_sr_thresh'))
        check2 = top_realigned_segment.get_nmatch_total() < self.params.get_param('rearr_minseg_len')
        check3 = (in_ff and span_ff)
        check4 = (sv_result.values['disc_read_count'] < 1)
        check5 = (sv_result.values['sv_subtype'] == 'NA')
        check6 = (min(sv_result.breakpoint_values['kmers']) == 0)
        filter_result = check1 or check2 or check3 or check4 or check5 or check6
        utils.log(self.logging_name, 'info', 'Check filter for rearrangement')
        utils.log(self.logging_name, 'info', 'Filter by feature for being in exon (%r) or spanning exon (%r)' % (in_ff, span_ff))
        utils.log(self.logging_name, 'info', 'Split read threshold %d, breakpoint read counts %d' % (min(sv_result.breakpoint_values['counts']['n']), self.params.get_param('rearr_minseg_len')))
        utils.log(self.logging_name, 'info', 'Minimum segment length observed (%d) meets threshold (%d)' % (top_realigned_segment.get_nmatch_total(), self.params.get_param('rearr_minseg_len')))
        utils.log(self.logging_name, 'info', 'Minimum discordant read pairs for rearrangement (%d)' % (sv_result.values['disc_read_count']))
        return filter_result
Exemple #5
0
    def filter_indel(self, sv_result):

        '''
        '''

        indel_size_thresh = int(self.params.get_param('indel_size'))
        indel_segment = sv_result.sv_event.realignments[0]
        utils.log(self.logging_name, 'info', 'Blat result spans query (%r) or only one blat result (%r) and blat result in target (%r)' % (indel_segment.spans_query(), (len(sv_result.sv_event.realignments) == 1), indel_segment.in_target))

        keep_result = indel_segment.valid and (indel_segment.mean_cov < 2) and indel_segment.in_target and (indel_segment.indel_maxevent_size[0] >= indel_size_thresh) # and (not indel_segment.rep_man.breakpoint_in_rep[0] and not indel_segment.rep_man.breakpoint_in_rep[1])
        utils.log(self.logging_name, 'debug', 'Keep blat result %r' % keep_result)

        filter_result = False
        if not keep_result:
            filter_result = True
            utils.log(self.logging_name, 'debug', 'Indel failed checking criteria: in annotated gene: %r, mean query coverage < 2: %r, in target: %r, in repeat: %r, indel size < %d: %r' % (indel_segment.valid, indel_segment.mean_cov, indel_segment.in_target, False, indel_size_thresh, indel_segment.indel_maxevent_size[0] < indel_size_thresh))
            return filter_result

        flank_match_thresh = True
        for flank_match in indel_segment.indel_flank_match:
            fm_perc = round((float(flank_match)/float(indel_segment.get_size('query')))*100, 2)
            if fm_perc < 10.0: 
                flank_match_thresh = False
            utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event of %d (%d of query)'%(flank_match, fm_perc))

        utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event (10 perc of query) on both sides (%r)' % flank_match_thresh)
        in_ff, span_ff = utils.filter_by_feature(indel_segment.get_brkpt_locs(), sv_result.query_region, self.params.get_param('keep_intron_vars'))

        brkpt_cov = [sv_result.contig.get_contig_counts().get_counts(x, x, 'indel') for x in indel_segment.query_brkpts]
        low_cov = min(brkpt_cov) < self.params.get_param('indel_sr_thresh')
        if not in_ff and not low_cov and flank_match_thresh:
            utils.log(self.logging_name, 'debug', 'Top hit contains whole query sequence, indel variant')
        else:
            filter_result = True
            utils.log(self.logging_name, 'debug', 'Indel in intron (%r) or low coverage at breakpoints (%r) or minimum segment size < 20 (%r), filtering out.' % (in_ff, low_cov, min(indel_segment.query_blocksizes)) )
        return filter_result
Exemple #6
0
    def filter_indel(self, sv_result):

        '''
        '''

        indel_size_thresh = int(self.params.get_param('indel_size'))
        indel_segment = sv_result.sv_event.realignments[0]
        utils.log(self.logging_name, 'info', 'Blat result spans query (%r) or only one blat result (%r) and blat result in target (%r)' % (indel_segment.spans_query(), (len(sv_result.sv_event.realignments) == 1), indel_segment.in_target))

        keep_result = indel_segment.valid and (indel_segment.mean_cov < 2) and indel_segment.in_target and (indel_segment.indel_maxevent_size[0] >= indel_size_thresh) # and (not indel_segment.rep_man.breakpoint_in_rep[0] and not indel_segment.rep_man.breakpoint_in_rep[1])
        utils.log(self.logging_name, 'debug', 'Keep blat result %r' % keep_result)

        filter_result = False
        if not keep_result:
            filter_result = True
            utils.log(self.logging_name, 'debug', 'Indel failed checking criteria: in annotated gene: %r, mean query coverage < 2: %r, in target: %r, in repeat: %r, indel size < %d: %r' % (indel_segment.valid, indel_segment.mean_cov, indel_segment.in_target, False, indel_size_thresh, indel_segment.indel_maxevent_size[0] < indel_size_thresh))
            return filter_result

        flank_match_thresh = True
        for flank_match in indel_segment.indel_flank_match:
            fm_perc = round((float(flank_match)/float(indel_segment.get_size('query')))*100, 2)
            if fm_perc < 10.0:
                flank_match_thresh = False
            utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event of %d (%d%% of query)'%(flank_match, fm_perc))

        utils.log(self.logging_name, 'info', 'Indel result has matching flanking sequence of largest indel event (10 %% of query) on both sides (%r)' % flank_match_thresh)
        in_ff, span_ff = utils.filter_by_feature(indel_segment.get_brkpt_locs(), sv_result.query_region, self.params.get_param('keep_intron_vars'))

        brkpt_cov = [sv_result.contig.get_contig_counts().get_counts(x, x, 'indel') for x in indel_segment.query_brkpts]
        low_cov = min(brkpt_cov) < self.params.get_param('indel_sr_thresh')
        if not in_ff and not low_cov and flank_match_thresh:
            utils.log(self.logging_name, 'debug', 'Top hit contains whole query sequence, indel variant')
        else:
            filter_result = True
            utils.log(self.logging_name, 'debug', 'Indel in intron (%r) or low coverage at breakpoints (%r) or minimum segment size < 20 (%r), filtering out.' % (in_ff, low_cov, min(indel_segment.query_blocksizes)) )
        return filter_result