コード例 #1
0
def is_acceptable(scol, acceptable_values, lval):
    if lval in acceptable_values:
        return True
    if args.any_allele and '_gene' in scol and any(
            utils.are_alleles(g, lval) for g in acceptable_values):
        return True
    return False
コード例 #2
0
 def set_bool_column(self, true_line, inf_line, column, overall_mute_freq):
     if utils.are_alleles(true_line[column], inf_line[column]):  # NOTE this doesn't require allele to be correct, but set_per_gene_support() does
         self.values[column]['right'] += 1
         self.hists[column + '_right_vs_mute_freq'].fill(overall_mute_freq)  # NOTE this'll toss a KeyError if you add bool column that aren't [vdj]_gene
     else:
         self.values[column]['wrong'] += 1
         self.hists[column + '_wrong_vs_mute_freq'].fill(overall_mute_freq)
コード例 #3
0
 def set_bool_column(self, true_line, inf_line, column, overall_mute_freq):
     if utils.are_alleles(true_line[column], inf_line[column]):  # NOTE this doesn't require allele to be correct, but set_per_gene_support() does
         self.values[column]['right'] += 1
         self.hists[column + '_right_vs_mute_freq'].fill(overall_mute_freq)  # NOTE this'll toss a KeyError if you add bool column that aren't [vdj]_gene
     else:
         self.values[column]['wrong'] += 1
         self.hists[column + '_wrong_vs_mute_freq'].fill(overall_mute_freq)
コード例 #4
0
    def evaluate(self, true_line, inf_line):
        for column in self.values:
            if column in bool_columns:
                if utils.are_alleles(true_line[column], inf_line[column]):  # NOTE you have to change this above as well!
                # if true_line[column] == inf_line[column]:
                    self.values[column]['right'] += 1
                else:
                    self.values[column]['wrong'] += 1
            else:
                trueval, guessval = 0, 0
                if column[2:] == '_insertion':  # insertion length
                    trueval = len(true_line[column])
                    guessval = len(inf_line[column])
                # elif '_content' in column:
                #     seq_to_use = inf_line[column[ : column.find('_', 3)]]  # NOTE has to work for seq_content *and* vd_insertion_content, hence the 3
                #         for nuke in seq_to_use:
                #             self.counts[col][nuke] += 1
                elif 'hamming_to_true_naive' in column:
                    trueval = 0  # NOTE this is a kind of weird way to do it, since diff ends up as really just the guessval, but it does the job
                    restrict_to_region = column[0].replace('h', '')  # if fist char in <column> is not an 'h', restrict to that region
                    normalize = '_norm' in column
                    guessval = self.hamming_distance_to_true_naive(true_line, inf_line, inf_line['unique_id'], restrict_to_region=restrict_to_region, normalize=normalize)
                else:
                    trueval = int(true_line[column])
                    guessval = int(inf_line[column])

                diff = guessval - trueval
                if diff not in self.values[column]:
                    self.values[column][diff] = 0
                self.values[column][diff] += 1

        for column in self.hists:
            trueval = utils.get_mutation_rate(self.germlines, true_line)
            guessval = utils.get_mutation_rate(self.germlines, inf_line)
            self.hists[column].fill(guessval - trueval)
コード例 #5
0
ファイル: joinparser.py プロジェクト: Irrationone/partis
def figure_out_which_damn_gene(germline_seqs, gene_name, seq, debug=False):
    region = utils.get_region(gene_name)
    seq = seq.replace(' ', '')
    if gene_name in germline_seqs[region]:  # already have it, but maybe when we added it before it was a shorter match, so substitute with the new longer match
        if len(seq) > len(germline_seqs[region][gene_name]):
            print '      gl match longer than gl!'
            print '       ', seq
            print '       ', germline_seqs[region][gene_name]
            germline_seqs[region][gene_name] = seq
        return gene_name
    candidates = []

    # if it doesn't specify an allele, see if any of the alleles we've got have the same sequence in the match region
    if gene_name.find('*') == -1:
        for candidate_gene in germline_seqs[region]:
            if candidate_gene.find(gene_name) == 0:
                if seq in germline_seqs[region][candidate_gene]:
                     candidates.append(candidate_gene)

    # if it *does* specify an allele, see if any of the other allele have the same sequence in the match region
    if len(candidates) == 0:  # didn't find anything... try other alleles
        for candidate_gene in germline_seqs[region]:
            if utils.are_alleles(candidate_gene, gene_name):
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    # sometimes it's 3-9, but sometimes 3-09. *grrrrrr*.
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name.replace('-0', '-') == candidate_gene:
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    # try adding _F and _P to the end of j names
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name + '_F' == candidate_gene or gene_name + '_P' == candidate_gene:
                if seq[ : len(germline_seqs[region][candidate_gene])] in germline_seqs[region][candidate_gene]:  # shorten <seq> to account for extra bases on right of imgt j versions
                    candidates.append(candidate_gene)

    # try removing the darn R at the end (and remove the zero). I hope it doesn't mean anything important
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name.replace('R', '').replace('-0', '-') == candidate_gene:
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)
        
    if len(candidates) == 0:
        print '    ERROR didn\'t find jack for', gene_name, seq
        assert False
    # elif len(candidates) > 1:
    #     print 'NOTE found',len(candidates),'candidates, just using the first one'

    if debug:
        print '     swapping', gene_name, '-->', candidates[0]

    return candidates[0]
コード例 #6
0
 def add_partial_fail(self, true_line, line):
     for column in self.values:
         if column in bool_columns:
             if column in line and utils.are_alleles(true_line[column], line[column]):  # NOTE you have to change this below as well!
                 self.values[column]['right'] += 1
                 # if column == 'v_gene':
                 #     print '  partial right ', true_line[column], line[column]
             else:
                 self.values[column]['wrong'] += 1
                 # if column == 'v_gene':
                 #     print '  partial wrong ', true_line[column], line[column] if column in line else 'FOO'
         else:
             pass
コード例 #7
0
    def add_partial_fail(self, true_line, line):

        overall_mute_freq = utils.get_mutation_rate(self.germlines, true_line)  # true value

        for column in self.values:
            if column in bool_columns:
                if column in line and utils.are_alleles(true_line[column], line[column]):  # NOTE you have to change this below as well!
                    self.values[column]['right'] += 1
                    self.hists[column + '_right_vs_mute_freq'].fill(overall_mute_freq)  # NOTE this'll toss a KeyError if you add bool column that aren't [vdj]_gene
                else:
                    self.values[column]['wrong'] += 1
                    self.hists[column + '_wrong_vs_mute_freq'].fill(overall_mute_freq)
            else:
                pass
コード例 #8
0
    def add_partial_fail(self, true_line, line):

        overall_mute_freq = utils.get_mutation_rate(self.germlines, true_line)  # true value

        for column in self.values:
            if column in bool_columns:
                if column in line and utils.are_alleles(true_line[column], line[column]):  # NOTE you have to change this below as well!
                    self.values[column]['right'] += 1
                    self.hists[column + '_right_vs_mute_freq'].fill(overall_mute_freq)  # NOTE this'll toss a KeyError if you add bool column that aren't [vdj]_gene
                else:
                    self.values[column]['wrong'] += 1
                    self.hists[column + '_wrong_vs_mute_freq'].fill(overall_mute_freq)
            else:
                pass
コード例 #9
0
    def evaluate(self, true_line, inf_line, padfo=None):
 	#CHANGES FOR MIXCR
        #overall_mute_freq = utils.get_mutation_rate(self.germlines, true_line)  # true value

        for column in self.values:
            if self.only_correct_gene_fractions and column not in bool_columns:
                continue
            if column in bool_columns:
                if utils.are_alleles(true_line[column], inf_line[column]):  # NOTE you have to change this above as well!
                    self.values[column]['right'] += 1
                    self.hists[column + '_right_vs_mute_freq'].fill(overall_mute_freq)  # NOTE this'll toss a KeyError if you add bool column that aren't [vdj]_gene
                else:
                    self.values[column]['wrong'] += 1
                    self.hists[column + '_wrong_vs_mute_freq'].fill(overall_mute_freq)
            else:
                trueval, guessval = 0, 0
                if column[2:] == '_insertion':  # insertion length
                    trueval = len(true_line[column])
                    guessval = len(inf_line[column])
                # elif '_content' in column:
                #     seq_to_use = inf_line[column[ : column.find('_', 3)]]  # NOTE has to work for seq_content *and* vd_insertion_content, hence the 3
                #         for nuke in seq_to_use:
                #             self.counts[col][nuke] += 1
                elif 'hamming_to_true_naive' in column:
                    trueval = 0  # NOTE this is a kind of weird way to do it, since diff ends up as really just the guessval, but it does the job
                    restrict_to_region = column[0].replace('h', '')  # if fist char in <column> is not an 'h', restrict to that region
                    normalize = '_norm' in column
                    guessval = self.hamming_distance_to_true_naive(true_line, inf_line, inf_line['unique_id'], restrict_to_region=restrict_to_region, normalize=normalize, padfo=padfo)
                else:
		    #CHANGES FOR MIXCR
		    return
                    #trueval = int(true_line[column])
                    #guessval = int(inf_line[column])

                diff = guessval - trueval
                if diff not in self.values[column]:
                    self.values[column][diff] = 0
                self.values[column][diff] += 1

        for column in self.hists:
            if '_vs_mute_freq' in column:  # fill these above
                continue
            if len(re.findall('[vdj]_', column)) == 1:
                region = re.findall('[vdj]_', column)[0][0]
            else:
                region = ''
            trueval = utils.get_mutation_rate(self.germlines, true_line, restrict_to_region=region)
            guessval = utils.get_mutation_rate(self.germlines, inf_line, restrict_to_region=region)
            self.hists[column].fill(guessval - trueval)
コード例 #10
0
ファイル: ihhhmmmparser.py プロジェクト: stevenweaver/partis
    def parse_detail(self, fk, unique_id):
        assert fk.iline < len(fk.lines)

        while fk.line[1] != "Details":
            fk.increment()
            if fk.eof:
                return

        fk.increment()
        info = {}
        info["unique_id"] = unique_id
        for begin_line, column, index, required, default in line_order:
            if fk.line[0].find(begin_line) != 0:
                if required:
                    print "oop", begin_line, fk.line
                    sys.exit()
                else:
                    info[column] = default
                    continue
            if column != "":
                info[column] = clean_value(column, fk.line[index])
                # if '[' in info[column]:
                #     print 'added', column, clean_value(column, fk.line[index])
                if column.find("_gene") == 1:
                    region = column[0]
                    info[region + "_5p_del"] = (
                        int(fk.line[fk.line.index("start:") + 1]) - 1
                    )  # NOTE their indices are 1-based
                    gl_length = int(fk.line[fk.line.index("gene:") + 1]) - 1
                    match_end = int(fk.line[fk.line.index("end:") + 1]) - 1
                    assert gl_length >= match_end
                    info[region + "_3p_del"] = gl_length - match_end

            fk.increment()

        if unique_id not in self.sim_need:
            while not fk.eof and fk.line[1] != "Details":  # skip stuff until start of next Detail block
                fk.increment()
            return

        info["fv_insertion"] = ""
        info["jf_insertion"] = ""
        info["seq"] = (
            info["v_qr_seq"] + info["vd_insertion"] + info["d_qr_seq"] + info["dj_insertion"] + info["j_qr_seq"]
        )

        if "-" in info["seq"]:
            print "ERROR found a dash in %s, returning failure" % unique_id
            while not fk.eof and fk.line[1] != "Details":  # skip stuff until start of next Detail block
                fk.increment()
            return

        if (
            info["seq"] not in self.siminfo[unique_id]["seq"]
        ):  # arg. I can't do != because it tacks on v left and j right deletions
            print "ERROR didn't find the right sequence for %s" % unique_id
            print "  ", info["seq"]
            print "  ", self.siminfo[unique_id]["seq"]
            sys.exit()

        if self.args.debug:
            print unique_id
            for region in utils.regions:
                infer_gene = info[region + "_gene"]
                true_gene = self.siminfo[unique_id][region + "_gene"]
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color("bold", utils.color("blue", region))
                    truestr = ""  #'(originally %s)' % match_name
                else:
                    regionstr = utils.color("bold", utils.color("red", region))
                    truestr = "(true: %s)" % utils.color_gene(true_gene).replace(region, "")
                print "  %s %s %s" % (regionstr, utils.color_gene(infer_gene).replace(region, ""), truestr)

            utils.print_reco_event(self.germline_seqs, self.siminfo[unique_id], label="true:", extra_str="    ")
            utils.print_reco_event(self.germline_seqs, info, label="inferred:", extra_str="    ")

        for region in utils.regions:
            if info[region + "_gene"] not in self.germline_seqs[region]:
                print "ERROR %s not in germlines" % info[region + "_gene"]
                assert False

            gl_seq = info[region + "_gl_seq"]
            if "[" in gl_seq:  # ambiguous
                for nuke in utils.nukes:
                    gl_seq = gl_seq.replace("[", nuke)
                    if gl_seq in self.germline_seqs[region][info[region + "_gene"]]:
                        print "  replaced [ with %s" % nuke
                        break
                info[region + "_gl_seq"] = gl_seq

            if info[region + "_gl_seq"] not in self.germline_seqs[region][info[region + "_gene"]]:
                print "ERROR gl match not found for %s in %s" % (info[region + "_gene"], unique_id)
                print "  ", info[region + "_gl_seq"]
                print "  ", self.germline_seqs[region][info[region + "_gene"]]
                self.perfplotter.add_partial_fail(self.siminfo[unique_id], info)
                while not fk.eof and fk.line[1] != "Details":  # skip stuff until start of next Detail block
                    fk.increment()
                return

        self.perfplotter.evaluate(self.siminfo[unique_id], info)
        self.details[unique_id] = info
        self.sim_need.remove(unique_id)

        while not fk.eof and fk.line[1] != "Details":  # skip stuff until start of next Detail block
            fk.increment()
コード例 #11
0
    def process_query(self, qr_info, query_name, query_lines):
        # split query_lines up into blocks
        blocks = []
        for line in query_lines:
            if line.find('Query_') == 0:
                blocks.append([])
            if len(line) == 0:
                continue
            if len(re.findall('<a name=#_[0-9][0-9]*_IGH', line)) == 0 and line.find('Query_') != 0:
                continue
            if len(blocks) == 0:
                print 'wtf? %s' % query_name  # it's probably kicking a reverse match
                self.perfplotter.add_partial_fail(self.seqinfo[query_name], qr_info)  # NOTE that's really a total failure
                self.n_partially_failed += 1
                return
            blocks[-1].append(line)

        # then process each block
        for block in blocks:
            self.process_single_block(block, query_name, qr_info)
            if 'skip_gene' in qr_info:
                self.n_skipped += 1
                return
            if 'fail' in qr_info:
                self.perfplotter.add_partial_fail(self.seqinfo[query_name], qr_info)
                self.n_partially_failed += 1
                return

        for region in utils.regions:
            if region + '_gene' not in qr_info:
                print '    %d: no %s match' % (query_name, region)
                self.perfplotter.add_partial_fail(self.seqinfo[query_name], qr_info)
                self.n_partially_failed += 1
                return

        # expand v match to left end and j match to right end
        qr_info['v_5p_del'] = 0
        qr_info['fv_insertion'] = ''
        if qr_info['match_start'] > 0:
            if self.args.debug:
                print '    add to v left:', self.seqinfo[query_name]['seq'][ : qr_info['match_start']]
            qr_info['seq'] = self.seqinfo[query_name]['seq'][ : qr_info['match_start']] + qr_info['seq']

        qr_info['j_3p_del'] = 0
        qr_info['jf_insertion'] = ''
        if len(self.seqinfo[query_name]['seq']) > qr_info['match_end']:
            if self.args.debug:
                print '    add to j right:', self.seqinfo[query_name]['seq'][ qr_info['match_end'] - len(self.seqinfo[query_name]['seq']) : ]
            qr_info['seq'] = qr_info['seq'] + self.seqinfo[query_name]['seq'][ qr_info['match_end'] - len(self.seqinfo[query_name]['seq']) : ]

        for boundary in utils.boundaries:
            start = qr_info[boundary[0] + '_qr_bounds'][1]
            end = qr_info[boundary[1] + '_qr_bounds'][0]
            qr_info[boundary + '_insertion'] = qr_info['seq'][start : end]

        for region in utils.regions:
            start = qr_info[region + '_qr_bounds'][0]
            end = qr_info[region + '_qr_bounds'][1]
            qr_info[region + '_qr_seq'] = qr_info['seq'][start : end]

        try:
            resolve_overlapping_matches(qr_info, self.args.debug, self.germline_seqs)
        except AssertionError:
            print '    %s: apportionment failed' % query_name
            self.perfplotter.add_partial_fail(self.seqinfo[query_name], qr_info)
            self.n_partially_failed += 1
            return

        if self.args.debug:
            print '  query seq:', qr_info['seq']
            for region in utils.regions:
                true_gene = self.seqinfo[query_name][region + '_gene']
                infer_gene = qr_info[region + '_gene']
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold', utils.color('blue', region))
                    truestr = ''  #'(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(true_gene).replace(region, '')
                # print '  %s %s %s' % (regionstr, utils.color_gene(infer_gene).replace(region, ''), truestr)

                print '    %s %3d %3d %s %s %s' % (regionstr, qr_info[region + '_qr_bounds'][0], qr_info[region + '_qr_bounds'][1], utils.color_gene(infer_gene).replace(region, ''), truestr, qr_info[region + '_gl_seq'])
        for boundary in utils.boundaries:
            start = qr_info[boundary[0] + '_qr_bounds'][1]
            end = qr_info[boundary[1] + '_qr_bounds'][0]
            qr_info[boundary + '_insertion'] = qr_info['seq'][start : end]
            if self.args.debug:
                print '   ', boundary, qr_info[boundary + '_insertion']

        self.perfplotter.evaluate(self.seqinfo[query_name], qr_info)
        # for key, val in qr_info.items():
        #     print key, val
        if self.args.debug:
            utils.print_reco_event(self.germline_seqs, self.seqinfo[query_name], label='true:', extra_str='  ')
            utils.print_reco_event(self.germline_seqs, qr_info, extra_str=' ')
コード例 #12
0
def figure_out_which_damn_gene(germline_seqs, gene_name, seq, debug=False):
    region = utils.get_region(gene_name)
    seq = seq.replace(' ', '')
    if gene_name in germline_seqs[
            region]:  # already have it, but maybe when we added it before it was a shorter match, so substitute with the new longer match
        if len(seq) > len(germline_seqs[region][gene_name]):
            print '      gl match longer than gl!'
            print '       ', seq
            print '       ', germline_seqs[region][gene_name]
            germline_seqs[region][gene_name] = seq
        return gene_name
    candidates = []

    # if it doesn't specify an allele, see if any of the alleles we've got have the same sequence in the match region
    if gene_name.find('*') == -1:
        for candidate_gene in germline_seqs[region]:
            if candidate_gene.find(gene_name) == 0:
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    # if it *does* specify an allele, see if any of the other allele have the same sequence in the match region
    if len(candidates) == 0:  # didn't find anything... try other alleles
        for candidate_gene in germline_seqs[region]:
            if utils.are_alleles(candidate_gene, gene_name):
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    # sometimes it's 3-9, but sometimes 3-09. *grrrrrr*.
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name.replace('-0', '-') == candidate_gene:
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    # try adding _F and _P to the end of j names
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name + '_F' == candidate_gene or gene_name + '_P' == candidate_gene:
                if seq[:len(
                        germline_seqs[region][candidate_gene]
                )] in germline_seqs[region][
                        candidate_gene]:  # shorten <seq> to account for extra bases on right of imgt j versions
                    candidates.append(candidate_gene)

    # try removing the darn R at the end (and remove the zero). I hope it doesn't mean anything important
    if len(candidates) == 0:
        for candidate_gene in germline_seqs[region]:
            if gene_name.replace('R', '').replace('-0', '-') == candidate_gene:
                if seq in germline_seqs[region][candidate_gene]:
                    candidates.append(candidate_gene)

    if len(candidates) == 0:
        print '    ERROR didn\'t find jack for', gene_name, seq
        assert False
    # elif len(candidates) > 1:
    #     print 'NOTE found',len(candidates),'candidates, just using the first one'

    if debug:
        print '     swapping', gene_name, '-->', candidates[0]

    return candidates[0]
コード例 #13
0
    def process_query(self, qr_info, query_name, query_lines):
        # split query_lines up into blocks
        blocks = []
        for line in query_lines:
            if line.find('Query_') == 0:
                blocks.append([])
            if len(line) == 0:
                continue
            if len(re.findall('<a name=#_[0-9][0-9]*_IGH',
                              line)) == 0 and line.find('Query_') != 0:
                continue
            if len(blocks) == 0:
                print 'wtf? %s' % query_name  # it's probably kicking a reverse match
                self.perfplotter.add_partial_fail(
                    self.seqinfo[query_name],
                    qr_info)  # NOTE that's really a total failure
                self.n_partially_failed += 1
                return
            blocks[-1].append(line)

        # then process each block
        for block in blocks:
            self.process_single_block(block, query_name, qr_info)
            if 'skip_gene' in qr_info:
                self.n_skipped += 1
                return
            if 'fail' in qr_info:
                self.perfplotter.add_partial_fail(self.seqinfo[query_name],
                                                  qr_info)
                self.n_partially_failed += 1
                return

        for region in utils.regions:
            if region + '_gene' not in qr_info:
                print '    %d: no %s match' % (query_name, region)
                self.perfplotter.add_partial_fail(self.seqinfo[query_name],
                                                  qr_info)
                self.n_partially_failed += 1
                return

        # expand v match to left end and j match to right end
        qr_info['v_5p_del'] = 0
        qr_info['fv_insertion'] = ''
        if qr_info['match_start'] > 0:
            if self.args.debug:
                print '    add to v left:', self.seqinfo[query_name][
                    'seq'][:qr_info['match_start']]
            qr_info['seq'] = self.seqinfo[query_name][
                'seq'][:qr_info['match_start']] + qr_info['seq']

        qr_info['j_3p_del'] = 0
        qr_info['jf_insertion'] = ''
        if len(self.seqinfo[query_name]['seq']) > qr_info['match_end']:
            if self.args.debug:
                print '    add to j right:', self.seqinfo[query_name][
                    'seq'][qr_info['match_end'] -
                           len(self.seqinfo[query_name]['seq']):]
            qr_info['seq'] = qr_info['seq'] + self.seqinfo[query_name]['seq'][
                qr_info['match_end'] - len(self.seqinfo[query_name]['seq']):]

        for boundary in utils.boundaries:
            start = qr_info[boundary[0] + '_qr_bounds'][1]
            end = qr_info[boundary[1] + '_qr_bounds'][0]
            qr_info[boundary + '_insertion'] = qr_info['seq'][start:end]

        for region in utils.regions:
            start = qr_info[region + '_qr_bounds'][0]
            end = qr_info[region + '_qr_bounds'][1]
            qr_info[region + '_qr_seq'] = qr_info['seq'][start:end]

        try:
            resolve_overlapping_matches(qr_info, self.args.debug,
                                        self.germline_seqs)
        except AssertionError:
            print '    %s: apportionment failed' % query_name
            self.perfplotter.add_partial_fail(self.seqinfo[query_name],
                                              qr_info)
            self.n_partially_failed += 1
            return

        if self.args.debug:
            print '  query seq:', qr_info['seq']
            for region in utils.regions:
                true_gene = self.seqinfo[query_name][region + '_gene']
                infer_gene = qr_info[region + '_gene']
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold',
                                            utils.color('blue', region))
                    truestr = ''  #'(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(
                        true_gene).replace(region, '')
                # print '  %s %s %s' % (regionstr, utils.color_gene(infer_gene).replace(region, ''), truestr)

                print '    %s %3d %3d %s %s %s' % (
                    regionstr, qr_info[region + '_qr_bounds'][0],
                    qr_info[region + '_qr_bounds'][1],
                    utils.color_gene(infer_gene).replace(
                        region, ''), truestr, qr_info[region + '_gl_seq'])
        for boundary in utils.boundaries:
            start = qr_info[boundary[0] + '_qr_bounds'][1]
            end = qr_info[boundary[1] + '_qr_bounds'][0]
            qr_info[boundary + '_insertion'] = qr_info['seq'][start:end]
            if self.args.debug:
                print '   ', boundary, qr_info[boundary + '_insertion']

        self.perfplotter.evaluate(self.seqinfo[query_name], qr_info)
        # for key, val in qr_info.items():
        #     print key, val
        if self.args.debug:
            utils.print_reco_event(self.germline_seqs,
                                   self.seqinfo[query_name],
                                   label='true:',
                                   extra_str='  ')
            utils.print_reco_event(self.germline_seqs, qr_info, extra_str=' ')
コード例 #14
0
    def parse_query_text(self, unique_id, query_info):
        if len(query_info
               ) == 0:  # one for the query sequence, then one for v, d, and j
            print 'no info for', unique_id
            return {}
        elif len(query_info) < 4:
            regions_ok = ''
            for info in query_info:
                for region in utils.regions:
                    if 'IGH' + region.upper() in info:
                        regions_ok += region
            for region in utils.regions:
                if region not in regions_ok:
                    print '    ERROR no %s matches' % region
                    return {}
            assert False  # shouldn't get here
        elif len(query_info) != 4:
            print 'info for', unique_id, 'all messed up'
            for info in query_info:
                print info
            sys.exit()

        full_qr_seq = query_info[0].replace('>', '').replace(
            unique_id, '')  # strip off the unique id
        full_qr_seq = ''.join(full_qr_seq.split()).upper(
        )  # strip off white space and uppercase it
        assert full_qr_seq == self.seqinfo[unique_id]['seq']

        line = {}
        line['unique_id'] = unique_id
        line['seq'] = full_qr_seq
        for ireg in range(len(utils.regions)):
            region = utils.regions[ireg]
            info = query_info[ireg + 1].splitlines()
            while unique_id not in info[
                    0]:  # remove the line marking cdr3 and framework regions
                info.pop(0)
            if len(info) <= 1:
                print info
            assert len(info) > 1
            assert len(info[0].split()) == 2
            qr_seq = info[0].split()[1].upper(
            )  # this line should be '<unique_id> .............<query_seq>'

            true_gene = self.seqinfo[unique_id][region + '_gene']
            imatch = 1  # which match to take
            match_name = str(info[imatch].split()[2])
            while match_name in just_always_friggin_skip and len(
                    info) > imatch + 1 and len(info[imatch + 1].split()) > 2:
                imatch += 1
                old_one = match_name
                match_name = str(info[imatch].split()[2])
                if self.args.debug:
                    print '    %s: taking next match: %s --> %s)' % (
                        unique_id, utils.color_gene(old_one),
                        utils.color_gene(match_name))

            infer_gene = match_name
            for gset in equivalent_genes:
                if match_name in gset and true_gene in gset and match_name != true_gene:  # if the true gene and the inferred gene are in the same equivalence set, treat it as correct, i.e. just pretend it inferred the right name
                    if self.args.debug:
                        print '   %s: replacing name %s with true name %s' % (
                            unique_id, match_name, true_gene)
                    infer_gene = true_gene

            # ----------------------------------------------------------------------------------------
            # skipping bullshit
            def skip_gene(gene):
                print '    %s in list of genes to skip' % utils.color_gene(
                    gene)
                if gene not in genes_actually_skipped:
                    genes_actually_skipped[gene] = 0
                genes_actually_skipped[gene] += 1
                line['skip_gene'] = True

            if infer_gene not in self.germline_seqs[region]:
                print '    couldn\'t find %s in germlines (skipping)' % infer_gene
                skip_gene(infer_gene)
                return line

            if infer_gene in just_always_friggin_skip:
                skip_gene(infer_gene)
                return line
            if true_gene in just_always_friggin_skip:
                skip_gene(true)
                return line

            if not self.args.dont_skip_or15_genes and '/OR1' in true_gene:
                skip_gene(true_gene)
                return line

            if self.args.skip_missing_genes:
                if infer_gene in genes_to_skip:
                    skip_gene(infer_gene)
                    return line
                if true_gene in genes_to_skip:
                    skip_gene(true_gene)
                    return line

            gl_seq = info[imatch].split()[4].upper()
            if qr_seq.replace('.', '') not in self.seqinfo[unique_id]['seq']:
                # if self.args.debug:
                print '    qr_seq not found in seqinfo'
                line['failed'] = True
                return line

            if self.args.debug:
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold',
                                            utils.color('blue', region))
                    truestr = '(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(
                        true_gene).replace(region, '')
                print '  %s %s %s' % (regionstr,
                                      utils.color_gene(infer_gene).replace(
                                          region, ''), truestr)
                print '    gl', gl_seq
                print '      ', qr_seq

            # replace the dots (gaps) in the gl match
            new_qr_seq, new_gl_seq = [], []
            for inuke in range(min(len(qr_seq), len(gl_seq))):
                if gl_seq[inuke] == '.':
                    pass
                else:
                    new_qr_seq.append(
                        qr_seq[inuke]
                    )  # this should only be out of range if the v match extends through the whole query sequence, i.e. friggin never
                    new_gl_seq.append(gl_seq[inuke])
            for inuke in range(len(gl_seq), len(qr_seq)):
                new_qr_seq.append(qr_seq[inuke])
            for inuke in range(len(qr_seq), len(gl_seq)):
                new_gl_seq.append(gl_seq[inuke])
            qr_seq = ''.join(new_qr_seq)
            gl_seq = ''.join(new_gl_seq)

            # work out the erosions
            qr_ldots = qr_seq.rfind(
                '.') + 1  # first strip off any dots on the left of query seq
            qr_seq = qr_seq[qr_ldots:]
            gl_seq = gl_seq[qr_ldots:]
            gl_ldots = gl_seq.rfind(
                '.') + 1  # then remove dots on the left of the germline seq
            qr_seq = qr_seq[gl_ldots:]
            gl_seq = gl_seq[gl_ldots:]
            del_5p = qr_ldots + gl_ldots
            jf_insertion = ''
            if region == 'j':
                jf_insertion = qr_seq[len(gl_seq):]
            qr_seq = qr_seq[:len(
                gl_seq
            )]  # then strip the right-hand portion of the query sequence that isn't aligned to the germline
            del_3p = len(gl_seq) - len(
                qr_seq
            )  # then do the same for the germline overhanging on the right of the query
            gl_seq = gl_seq[:len(qr_seq)]
            assert len(gl_seq) == len(qr_seq)

            new_gl_seq = []
            for inuke in range(len(gl_seq)):  # replace dashes (matched bases)
                assert gl_seq[inuke] != '.'  # hoping there's no gaps in here
                if gl_seq[inuke] == '-':
                    new_gl_seq.append(qr_seq[inuke])
                else:
                    new_gl_seq.append(gl_seq[inuke])
            gl_seq = ''.join(new_gl_seq)

            if self.germline_seqs[region][infer_gene].find(
                    gl_seq
            ) != del_5p:  # why the *@*!! can't they make this consistent?
                if self.germline_seqs[region][infer_gene].find(gl_seq) < 0:
                    print 'whooooaa'
                    print self.germline_seqs[region][infer_gene]
                    print gl_seq
                    line['failed'] = True
                    return line
                del_5p += self.germline_seqs[region][infer_gene].find(gl_seq)

            try:
                assert del_5p + len(gl_seq) + del_3p + len(
                    jf_insertion) == len(
                        self.germline_seqs[region][infer_gene])
            except:
                print '    ERROR lengths failed for %s' % unique_id
                # print del_5p, len(gl_seq), del_3p, del_5p + len(gl_seq) + del_3p , len(self.germline_seqs[region][infer_gene])
                # print gl_seq
                # print self.germline_seqs[region][infer_gene]
                line['failed'] = True
                return line
                # assert False

            if self.args.debug:
                utils.color_mutants(gl_seq,
                                    qr_seq,
                                    ref_label='gl ',
                                    extra_str='    ',
                                    print_result=True,
                                    post_str='    del: %d %d' %
                                    (del_5p, del_3p))

            # try:
            #     infer_gene = joinparser.figure_out_which_damn_gene(self.germline_seqs, infer_gene, gl_seq, debug=self.args.debug)
            # except:
            #     print 'ERROR couldn\'t figure out the gene for %s' % infer_gene
            #     return {}

            line[region + '_gene'] = infer_gene
            line[region + '_qr_seq'] = qr_seq
            line[region + '_gl_seq'] = gl_seq
            line[region + '_5p_del'] = del_5p
            line[region + '_3p_del'] = del_3p
            if region == 'j':
                line['jf_insertion'] = jf_insertion

        return line
コード例 #15
0
ファイル: ihhhmmmparser.py プロジェクト: Irrationone/partis
    def parse_detail(self, fk, unique_id):
        assert fk.iline < len(fk.lines)

        while fk.line[1] != 'Details':
            fk.increment()
            if fk.eof:
                return

        fk.increment()
        info = {}
        info['unique_id'] = unique_id
        for begin_line, column, index, required, default in line_order:
            if fk.line[0].find(begin_line) != 0:
                if required:
                    print 'oop', begin_line, fk.line
                    sys.exit()
                else:
                    info[column] = default
                    continue
            if column != '':
                info[column] = clean_value(column, fk.line[index])
                # if '[' in info[column]:
                #     print 'added', column, clean_value(column, fk.line[index])
                if column.find('_gene') == 1:
                    region = column[0]
                    info[region + '_5p_del'] = int(fk.line[fk.line.index('start:') + 1]) - 1  # NOTE their indices are 1-based
                    gl_length = int(fk.line[fk.line.index('gene:') + 1]) - 1
                    match_end = int(fk.line[fk.line.index('end:') + 1]) - 1
                    assert gl_length >= match_end
                    info[region + '_3p_del'] = gl_length - match_end

            fk.increment()

        if unique_id not in self.sim_need:
            while not fk.eof and fk.line[1] != 'Details':  # skip stuff until start of next Detail block
                fk.increment()
            return

        info['fv_insertion'] = ''
        info['jf_insertion'] = ''
        info['seq'] = info['v_qr_seq'] + info['vd_insertion'] + info['d_qr_seq'] + info['dj_insertion'] + info['j_qr_seq']

        if '-' in info['seq']:
            print 'ERROR found a dash in %s, returning failure' % unique_id
            while not fk.eof and fk.line[1] != 'Details':  # skip stuff until start of next Detail block
                fk.increment()
            return

        if info['seq'] not in self.siminfo[unique_id]['seq']:  # arg. I can't do != because it tacks on v left and j right deletions
            print 'ERROR didn\'t find the right sequence for %s' % unique_id
            print '  ', info['seq']
            print '  ', self.siminfo[unique_id]['seq']
            sys.exit()

        if self.args.debug:
            print unique_id
            for region in utils.regions:
                infer_gene = info[region + '_gene']
                true_gene = self.siminfo[unique_id][region + '_gene']
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold', utils.color('blue', region))
                    truestr = ''  #'(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(true_gene).replace(region, '')
                print '  %s %s %s' % (regionstr, utils.color_gene(infer_gene).replace(region, ''), truestr)

            utils.print_reco_event(self.germline_seqs, self.siminfo[unique_id], label='true:', extra_str='    ')
            utils.print_reco_event(self.germline_seqs, info, label='inferred:', extra_str='    ')

        for region in utils.regions:
            if info[region + '_gene'] not in self.germline_seqs[region]:
                print 'ERROR %s not in germlines' % info[region + '_gene']
                assert False

            gl_seq = info[region + '_gl_seq']
            if '[' in gl_seq:  # ambiguous
                for nuke in utils.nukes:
                    gl_seq = gl_seq.replace('[', nuke)
                    if gl_seq in self.germline_seqs[region][info[region + '_gene']]:
                        print '  replaced [ with %s' % nuke
                        break
                info[region + '_gl_seq'] = gl_seq

            if info[region + '_gl_seq'] not in self.germline_seqs[region][info[region + '_gene']]:
                print 'ERROR gl match not found for %s in %s' % (info[region + '_gene'], unique_id)
                print '  ', info[region + '_gl_seq']
                print '  ', self.germline_seqs[region][info[region + '_gene']]                
                self.perfplotter.add_partial_fail(self.siminfo[unique_id], info)
                while not fk.eof and fk.line[1] != 'Details':  # skip stuff until start of next Detail block
                    fk.increment()
                return

        self.perfplotter.evaluate(self.siminfo[unique_id], info)
        self.details[unique_id] = info
        self.sim_need.remove(unique_id)

        while not fk.eof and fk.line[1] != 'Details':  # skip stuff until start of next Detail block
            fk.increment()
コード例 #16
0
ファイル: imgtparser.py プロジェクト: stevenweaver/partis
    def parse_query_text(self, unique_id, query_info):
        if len(query_info) == 0:  # one for the query sequence, then one for v, d, and j
            print 'no info for',unique_id
            return {}
        elif len(query_info) < 4:
            regions_ok = ''
            for info in query_info:
                for region in utils.regions:
                    if 'IGH' + region.upper() in info:
                        regions_ok += region
            for region in utils.regions:
                if region not in regions_ok:
                    print '    ERROR no %s matches' % region
                    return {}
            assert False  # shouldn't get here
        elif len(query_info) != 4:
            print 'info for', unique_id, 'all messed up'
            for info in query_info:
                print info
            sys.exit()

        full_qr_seq = query_info[0].replace('>', '').replace(unique_id, '')  # strip off the unique id
        full_qr_seq = ''.join(full_qr_seq.split()).upper()  # strip off white space and uppercase it
        assert full_qr_seq == self.seqinfo[unique_id]['seq']

        line = {}
        line['unique_id'] = unique_id
        line['seq'] = full_qr_seq
        for ireg in range(len(utils.regions)):
            region = utils.regions[ireg]
            info = query_info[ireg + 1].splitlines()
            while unique_id not in info[0]:  # remove the line marking cdr3 and framework regions
                info.pop(0)
            if len(info) <= 1:
                print info
            assert len(info) > 1
            assert len(info[0].split()) == 2
            qr_seq = info[0].split()[1].upper()  # this line should be '<unique_id> .............<query_seq>'

            true_gene = self.seqinfo[unique_id][region + '_gene']
            imatch = 1  # which match to take
            match_name = str(info[imatch].split()[2])
            while match_name in just_always_friggin_skip and len(info) > imatch+1 and len(info[imatch+1].split()) > 2:
                imatch += 1
                old_one = match_name
                match_name = str(info[imatch].split()[2])
                if self.args.debug:
                    print '    %s: taking next match: %s --> %s)' % (unique_id, utils.color_gene(old_one), utils.color_gene(match_name))

            infer_gene = match_name
            for gset in equivalent_genes:
                if match_name in gset and true_gene in gset and match_name != true_gene:  # if the true gene and the inferred gene are in the same equivalence set, treat it as correct, i.e. just pretend it inferred the right name
                    if self.args.debug:
                        print '   %s: replacing name %s with true name %s' % (unique_id, match_name, true_gene)
                    infer_gene = true_gene

            # ----------------------------------------------------------------------------------------
            # skipping bullshit
            def skip_gene(gene):
                print '    %s in list of genes to skip' % utils.color_gene(gene)
                if gene not in genes_actually_skipped:
                    genes_actually_skipped[gene] = 0
                genes_actually_skipped[gene] += 1
                line['skip_gene'] = True

            if infer_gene not in self.germline_seqs[region]:
                print '    couldn\'t find %s in germlines (skipping)' % infer_gene
                skip_gene(infer_gene)
                return line

            if infer_gene in just_always_friggin_skip:
                skip_gene(infer_gene)
                return line
            if true_gene in just_always_friggin_skip:
                skip_gene(true)
                return line

            if not self.args.dont_skip_or15_genes and '/OR1' in true_gene:
                skip_gene(true_gene)
                return line

            if self.args.skip_missing_genes:
                if infer_gene in genes_to_skip:
                    skip_gene(infer_gene)
                    return line
                if true_gene in genes_to_skip:
                    skip_gene(true_gene)
                    return line

            gl_seq = info[imatch].split()[4].upper()
            if qr_seq.replace('.', '') not in self.seqinfo[unique_id]['seq']:
                # if self.args.debug:
                print '    qr_seq not found in seqinfo'
                line['failed'] = True
                return line

            if self.args.debug:
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold', utils.color('blue', region))
                    truestr = '(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(true_gene).replace(region, '')
                print '  %s %s %s' % (regionstr, utils.color_gene(infer_gene).replace(region, ''), truestr)
                print '    gl', gl_seq
                print '      ', qr_seq

            # replace the dots (gaps) in the gl match
            new_qr_seq, new_gl_seq = [], []
            for inuke in range(min(len(qr_seq), len(gl_seq))):
                if gl_seq[inuke] == '.':
                    pass
                else:
                    new_qr_seq.append(qr_seq[inuke])  # this should only be out of range if the v match extends through the whole query sequence, i.e. friggin never
                    new_gl_seq.append(gl_seq[inuke])
            for inuke in range(len(gl_seq), len(qr_seq)):
                new_qr_seq.append(qr_seq[inuke])
            for inuke in range(len(qr_seq), len(gl_seq)):
                new_gl_seq.append(gl_seq[inuke])
            qr_seq = ''.join(new_qr_seq)
            gl_seq = ''.join(new_gl_seq)

            # work out the erosions
            qr_ldots = qr_seq.rfind('.') + 1  # first strip off any dots on the left of query seq
            qr_seq = qr_seq[qr_ldots : ]
            gl_seq = gl_seq[qr_ldots : ]
            gl_ldots = gl_seq.rfind('.') + 1  # then remove dots on the left of the germline seq
            qr_seq = qr_seq[gl_ldots : ]
            gl_seq = gl_seq[gl_ldots : ]
            del_5p = qr_ldots + gl_ldots
            jf_insertion = ''
            if region == 'j':
                jf_insertion = qr_seq[len(gl_seq) : ]
            qr_seq = qr_seq[ : len(gl_seq)]  # then strip the right-hand portion of the query sequence that isn't aligned to the germline
            del_3p = len(gl_seq) - len(qr_seq)  # then do the same for the germline overhanging on the right of the query
            gl_seq = gl_seq[ : len(qr_seq)]
            assert len(gl_seq) == len(qr_seq)

            new_gl_seq = []
            for inuke in range(len(gl_seq)):  # replace dashes (matched bases)
                assert gl_seq[inuke] != '.'  # hoping there's no gaps in here
                if gl_seq[inuke] == '-':
                    new_gl_seq.append(qr_seq[inuke])
                else:
                    new_gl_seq.append(gl_seq[inuke])
            gl_seq = ''.join(new_gl_seq)

            if self.germline_seqs[region][infer_gene].find(gl_seq) != del_5p:  # why the *@*!! can't they make this consistent?
                if self.germline_seqs[region][infer_gene].find(gl_seq) < 0:
                    print 'whooooaa'
                    print self.germline_seqs[region][infer_gene]
                    print gl_seq
                    line['failed'] = True
                    return line
                del_5p += self.germline_seqs[region][infer_gene].find(gl_seq)

            try:
                assert del_5p + len(gl_seq) + del_3p + len(jf_insertion) == len(self.germline_seqs[region][infer_gene])
            except:
                print '    ERROR lengths failed for %s' % unique_id
                # print del_5p, len(gl_seq), del_3p, del_5p + len(gl_seq) + del_3p , len(self.germline_seqs[region][infer_gene])
                # print gl_seq
                # print self.germline_seqs[region][infer_gene]
                line['failed'] = True
                return line
                # assert False

            if self.args.debug:
                utils.color_mutants(gl_seq, qr_seq, ref_label='gl ', extra_str='    ', print_result=True, post_str='    del: %d %d' % (del_5p, del_3p))

            # try:
            #     infer_gene = joinparser.figure_out_which_damn_gene(self.germline_seqs, infer_gene, gl_seq, debug=self.args.debug)
            # except:
            #     print 'ERROR couldn\'t figure out the gene for %s' % infer_gene
            #     return {}

            line[region + '_gene'] = infer_gene
            line[region + '_qr_seq'] = qr_seq
            line[region + '_gl_seq'] = gl_seq
            line[region + '_5p_del'] = del_5p
            line[region + '_3p_del'] = del_3p
            if region == 'j':
                line['jf_insertion'] = jf_insertion
            
        return line
コード例 #17
0
    def parse_detail(self, fk, unique_id):
        assert fk.iline < len(fk.lines)

        while fk.line[1] != 'Details':
            fk.increment()
            if fk.eof:
                return

        fk.increment()
        info = {}
        info['unique_id'] = unique_id
        for begin_line, column, index, required, default in line_order:
            if fk.line[0].find(begin_line) != 0:
                if required:
                    print 'oop', begin_line, fk.line
                    sys.exit()
                else:
                    info[column] = default
                    continue
            if column != '':
                info[column] = clean_value(column, fk.line[index])
                # if '[' in info[column]:
                #     print 'added', column, clean_value(column, fk.line[index])
                if column.find('_gene') == 1:
                    region = column[0]
                    info[region + '_5p_del'] = int(
                        fk.line[fk.line.index('start:') +
                                1]) - 1  # NOTE their indices are 1-based
                    gl_length = int(fk.line[fk.line.index('gene:') + 1]) - 1
                    match_end = int(fk.line[fk.line.index('end:') + 1]) - 1
                    assert gl_length >= match_end
                    info[region + '_3p_del'] = gl_length - match_end

            fk.increment()

        if unique_id not in self.sim_need:
            while not fk.eof and fk.line[
                    1] != 'Details':  # skip stuff until start of next Detail block
                fk.increment()
            return

        info['fv_insertion'] = ''
        info['jf_insertion'] = ''
        info['seq'] = info['v_qr_seq'] + info['vd_insertion'] + info[
            'd_qr_seq'] + info['dj_insertion'] + info['j_qr_seq']

        if '-' in info['seq']:
            print 'ERROR found a dash in %s, returning failure' % unique_id
            while not fk.eof and fk.line[
                    1] != 'Details':  # skip stuff until start of next Detail block
                fk.increment()
            return

        if info['seq'] not in self.siminfo[unique_id][
                'seq']:  # arg. I can't do != because it tacks on v left and j right deletions
            print 'ERROR didn\'t find the right sequence for %s' % unique_id
            print '  ', info['seq']
            print '  ', self.siminfo[unique_id]['seq']
            sys.exit()

        if self.args.debug:
            print unique_id
            for region in utils.regions:
                infer_gene = info[region + '_gene']
                true_gene = self.siminfo[unique_id][region + '_gene']
                if utils.are_alleles(infer_gene, true_gene):
                    regionstr = utils.color('bold',
                                            utils.color('blue', region))
                    truestr = ''  #'(originally %s)' % match_name
                else:
                    regionstr = utils.color('bold', utils.color('red', region))
                    truestr = '(true: %s)' % utils.color_gene(
                        true_gene).replace(region, '')
                print '  %s %s %s' % (regionstr,
                                      utils.color_gene(infer_gene).replace(
                                          region, ''), truestr)

            utils.print_reco_event(self.germline_seqs,
                                   self.siminfo[unique_id],
                                   label='true:',
                                   extra_str='    ')
            utils.print_reco_event(self.germline_seqs,
                                   info,
                                   label='inferred:',
                                   extra_str='    ')

        for region in utils.regions:
            if info[region + '_gene'] not in self.germline_seqs[region]:
                print 'ERROR %s not in germlines' % info[region + '_gene']
                assert False

            gl_seq = info[region + '_gl_seq']
            if '[' in gl_seq:  # ambiguous
                for nuke in utils.nukes:
                    gl_seq = gl_seq.replace('[', nuke)
                    if gl_seq in self.germline_seqs[region][info[region +
                                                                 '_gene']]:
                        print '  replaced [ with %s' % nuke
                        break
                info[region + '_gl_seq'] = gl_seq

            if info[region + '_gl_seq'] not in self.germline_seqs[region][info[
                    region + '_gene']]:
                print 'ERROR gl match not found for %s in %s' % (
                    info[region + '_gene'], unique_id)
                print '  ', info[region + '_gl_seq']
                print '  ', self.germline_seqs[region][info[region + '_gene']]
                self.perfplotter.add_partial_fail(self.siminfo[unique_id],
                                                  info)
                while not fk.eof and fk.line[
                        1] != 'Details':  # skip stuff until start of next Detail block
                    fk.increment()
                return

        self.perfplotter.evaluate(self.siminfo[unique_id], info)
        self.details[unique_id] = info
        self.sim_need.remove(unique_id)

        while not fk.eof and fk.line[
                1] != 'Details':  # skip stuff until start of next Detail block
            fk.increment()