Example #1
0
def test_pagb():
    """ Test probability of a given b calculation """
    records = [
        _Record(1, 0, '1', '', '', '', '', '', '', ''),
        _Record(2, 0, '2', '', '', '', '', '', '', ''),
        _Record(3, 0, '3', '', '', '', '', '', '', ''),
        _Record(4, 0, '4', '', '', '', '', '', '', '')
    ]
    group = vg.VariantGroup('1', records)
    group.coverage_array = populate_array(len(group.coverage_array), 400)
    group.existence_array = populate_array(len(group.coverage_array), 100)
    group.a_not_b_array = populate_array(len(group.coverage_array), 25)
    group.a_not_b_array[1][0] = 100
    group.a_not_b_array[2][0] = 25
    group.a_not_b_array[3][1] = 100
    group.b_not_a_array[3][0] = 25
    group.b_not_a_array[2][1] = 100
    group.b_not_a_array[3][1] = 100
    group.set_filter_fq_pagb(50, True)
    for i in range(len(group.filter)):
        for j in range(i, len(group.filter)):
            group.filter[i][j] = True
    assert_true(not group.filter.all())
    group.filter[3][1] = True
    assert_true(group.filter.all())
Example #2
0
def test_add_min_read_filter():
    """ Test minimum read filter """
    records = [
        _Record(1, 0, '1', '', '', '', '', '', '', ''),
        _Record(2, 0, '2', '', '', '', '', '', '', ''),
        _Record(3, 0, '3', '', '', '', '', '', '', ''),
        _Record(4, 0, '4', '', '', '', '', '', '', '')
    ]
    group = vg.VariantGroup('1', records)
    arlen = len(records)
    group.existence_array = np.zeros((arlen, arlen))
    correct = {'1': np.zeros((arlen, arlen)), '2': np.zeros((arlen, arlen))}
    group.existence_array[0][1] = 30
    group.existence_array[0][2] = 40
    group.existence_array[0][3] = 50
    group.existence_array[1][2] = 5
    group.existence_array[1][3] = 10
    group.existence_array[2][3] = 1
    correct['1'][0][2] = 1
    correct['1'][0][3] = 1
    correct['2'][0][1] = 1
    correct['2'][0][2] = 1
    correct['2'][0][3] = 1
    correct['2'][1][3] = 1
    correct = {key: correct[key] > 0 for key in correct}
    group.add_filter_min_reads(30)
    assert_true((correct['1'] == group.filter).all())
    group.reset_filter()
    group.add_filter_min_reads(5)
    assert_true((correct['2'] == group.filter).all())
Example #3
0
 def test_coordinates_for_insert_and_snp(self):
     record = model._Record(
             '1',
             10,
             'id6',
             'C',
             [
                 model._Substitution('GTA'),
                 model._Substitution('G'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
     record = model._Record(
             '1',
             10,
             'id7',
             'C',
             [
                 model._Substitution('G'),
                 model._Substitution('GTA'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #4
0
 def test_coordinates_for_snp_and_deletion(self):
     record = model._Record(
             '1',
             10,
             'id8',
             'CTA',
             [
                 model._Substitution('C'),
                 model._Substitution('CTG'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
     record = model._Record(
             '1',
             10,
             'id9',
             'CTA',
             [
                 model._Substitution('CTG'),
                 model._Substitution('C'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
Example #5
0
 def test_coordinates_for_insertion_and_deletion(self):
     record = model._Record(
             '1',
             10,
             'id10',
             'CT',
             [
                 model._Substitution('CA'),
                 model._Substitution('CTT'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
     record = model._Record(
             '1',
             10,
             'id11',
             'CT',
             [
                 model._Substitution('CTT'),
                 model._Substitution('CA'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
Example #6
0
 def test_coordinates_for_insertion_and_deletion(self):
     record = model._Record('1', 10, 'id10', 'CT', [
         model._Substitution('CA'),
         model._Substitution('CTT'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
     record = model._Record('1', 10, 'id11', 'CT', [
         model._Substitution('CTT'),
         model._Substitution('CA'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
Example #7
0
 def test_coordinates_for_snp_and_deletion(self):
     record = model._Record('1', 10, 'id8', 'CTA', [
         model._Substitution('C'),
         model._Substitution('CTG'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
     record = model._Record('1', 10, 'id9', 'CTA', [
         model._Substitution('CTG'),
         model._Substitution('C'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
Example #8
0
 def test_coordinates_for_insert_and_snp(self):
     record = model._Record('1', 10, 'id6', 'C', [
         model._Substitution('GTA'),
         model._Substitution('G'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
     record = model._Record('1', 10, 'id7', 'C', [
         model._Substitution('G'),
         model._Substitution('GTA'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #9
0
 def test_coordinates_for_multiple_snps(self):
     record = model._Record('1', 10, 'id5', 'C', [
         model._Substitution('A'),
         model._Substitution('G'),
         model._Substitution('T')
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #10
0
    def _get_record_for_indel(self, bpm_record_group):
        """
        Create a new VCF record for an indel

        Args:
            bpm_record_group (list(BPMRecord)) : BPM records for the group (must all be indels)

        Returns:
            vcf._Record : The new VCF record definition
        """
        (qual, filt, info,
         sample_indexes) = VcfRecordFactory._get_record_defaults()

        identifier = self._get_identifier(bpm_record_group)
        for record in bpm_record_group:
            assert record.is_deletion == bpm_record_group[0].is_deletion

        bpm_record = bpm_record_group[0]

        (_, indel_sequence,
         _) = bpm_record.get_indel_source_sequences(RefStrand.Plus)
        start_index = bpm_record.pos - 1
        chrom = bpm_record.chromosome
        if chrom == "XX" or chrom == "XY":
            chrom = "X"

        if bpm_record.is_deletion:
            reference_base = self._genome_reader.get_reference_bases(
                chrom, start_index - 1, start_index)
            reference_allele = reference_base + indel_sequence
            alternate_allele = reference_base
            return _Record(chrom, start_index, identifier, reference_allele,
                           [_Substitution(alternate_allele)], qual, filt, info,
                           self._format_factory.get_format_id_string(),
                           sample_indexes)

        reference_base = self._genome_reader.get_reference_bases(
            chrom, start_index, start_index + 1)
        reference_allele = reference_base
        alternate_allele = reference_base + indel_sequence
        return _Record(chrom, start_index + 1, identifier, reference_allele,
                       [_Substitution(alternate_allele)], qual, filt, info,
                       self._format_factory.get_format_id_string(),
                       sample_indexes)
Example #11
0
def _grid_item_to_vcf_record(info_dict, obj, sample_ids, sample_names):  # , get_genotype_from_expanded_zygosity):
    CHROM = obj.get("locus__contig__name", ".")
    POS = obj.get("locus__position", ".")
    ID = obj.get("variantannotation__dbsnp_rs_id")
    REF = obj.get("locus__ref__seq", ".")
    ALT = obj.get("alt__seq", ".")
    QUAL = '.'  # QUAL = obj.get("annotation__quality", ".")
    FILTER = None
    INFO = {}

    for info_id, data in info_dict.items():
        col = data['column__variant_column']
        val = obj.get(col)
        if val:
            INFO[info_id] = val

    FORMAT = None
    MY_FORMAT = ['GT', 'AD', 'AF', 'PL', 'DP', 'GQ']
    CallData = make_calldata_tuple(MY_FORMAT)
    sample_indexes = {}
    samples = []

    if sample_ids:
        FORMAT = ':'.join(MY_FORMAT)

    alts = [_Substitution(ALT)]
    ALT = alts
    record = _Record(CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes)

    if sample_ids:
        for i, (sample_id, sample) in enumerate(zip(sample_ids, sample_names)):
            ad = obj[f"{sample_id}_samples_allele_depth"]
            zygosity = obj[f"{sample_id}_samples_zygosity"]
            gt = Zygosity.get_genotype_from_expanded_zygosity(zygosity)
            dp = obj[f"{sample_id}_samples_read_depth"]
            af = obj[f"{sample_id}_samples_allele_frequency"]
            # GQ/PL/FT are optional now
            # TODO: Ideally, we'd not write them out
            pl = obj.get(f"{sample_id}_samples_phred_likelihood", ".")
            gq = obj.get(f"{sample_id}_samples_genotype_quality", ".")
            # TODO: Need to grab information for reference base to be able to properly fill in this data.
            data_args = {'AD': ['.', ad],
                         'GT': gt,
                         'PL': ['.', pl],
                         'DP': ['.', dp],
                         'GQ': ['.', gq],
                         'AF': ['.', af]}

            data = CallData(**data_args)
            call = _Call(record, sample, data)
            samples.append(call)
            sample_indexes[sample] = i

        record.samples = samples

    return record
Example #12
0
def tab_to_vcf(input_file, output_file, reference_file, columns, info_fields, convert_iupac=False):
    """
    Convert tab-delimited file to VCF.

    Support for the fixed VCF fields: #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO

    PyVCF's _Record class requires the following arguments:

    CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes

    convert_iupac (bool) : When present, convert IUPAC codes to the non-reference allele.
        This is only possible for when the reference and IUPAC-determined alternates share 
        at least one allele. Tri-allelic conversion is not supported and will emit a warning.
        IUPAC codes: http://www.bioinformatics.org/sms/iupac.html
    """
    reference_dict = FastaHack(reference_file)

    with open(input_file, "r") as input_fh:
        reader = csv.DictReader(input_fh, delimiter="\t")

        with open(TEMPLATE_VCF_FILE, "r") as template_fh:
            vcf_reader = vcf.Reader(template_fh)

            with open(output_file, "w") as output_fh:
                vcf_writer = vcf.Writer(output_fh, vcf_reader, lineterminator='\n')

                for row in reader:
                    
                    args = [row.get(columns.get(f,None), ".") for f in VCF_COLUMN_ORDER]
                    # Convert position to an integer.
                    args[POSITION_INDEX] = int(args[POSITION_INDEX])

                    # Convert indels from GATK to VCF format.
                    if args[ALT_INDEX].startswith(("+", "-")) and not "/" in args[ALT_INDEX]:
                        args = gatk_indel_to_vcf(args, reference_dict)

                    # Optionally convert IUPAC code
                    if convert_iupac:
                        args = _convert_iupac(args)

                    # Convert alternate allele scalar to a list.
                    args[ALT_INDEX] = [args[ALT_INDEX]]

                    # Convert info fields
                    if info_fields:
                        INFO = {}
                        for vcf_field,tab_field in info_fields.items():
                            if tab_field in row:
                                INFO[vcf_field] = row[tab_field]
                    else:
                        INFO = {}
                    # Add empty entries for INFO, FORMAT, and sample_indexes.
                    args.extend([INFO, ".", []])

                    record = _Record(*args)
                    vcf_writer.write_record(record)
Example #13
0
def test_split_and_trim():
    """Validates that we correctly remove variants which don't meet a threshold"""
    records = [
        _Record(1, 0, '1', '', '', '', '', '', '', ''),
        _Record(2, 0, '2', '', '', '', '', '', '', ''),
        _Record(3, 0, '3', '', '', '', '', '', '', ''),
        _Record(4, 0, '4', '', '', '', '', '', '', '')
    ]
    groups = {
        '1': vg.VariantGroup('1', records),
        '2': vg.VariantGroup('1', records),
    }
    arlen = len(records)
    groups['1'].coverage_array = np.zeros((arlen, arlen))
    groups['1'].existence_array = np.zeros((arlen, arlen))
    groups['1'].coverage_array[-1][0] = 100.
    groups['1'].existence_array[-1][0] = 50.
    groups['1'].coverage_array[-2][1] = 1000.
    groups['1'].existence_array[-2][1] = 1.
    groups['2'].coverage_array = groups['1'].coverage_array.copy()
    groups['2'].existence_array = groups['1'].existence_array.copy()

    groups['1'].set_filter_fq_pab(25)
    groups['2'].set_filter_fq_pab(0)

    correct = {'1': ['1', '4'], '2': ['1', '2', '3', '4']}
    removed = {'1': ['2', '3'], '2': []}
    keep, reject = {}, {}
    keep['1'], reject['1'] = groups['1'].split_and_trim()
    keep['2'], reject['2'] = groups['2'].split_and_trim()
    keep['1'] = keep['1'][0]
    keep['2'][0].unsplit(keep['2'][1:])
    obs_removed = {
        '1': [vlist.ID for vlist in reject['1']],
        '2': [vlist.ID for vlist in reject['2']]
    }
    obs_correct = {
        '1': sorted([vlist.ID for vlist in keep['1'].variant_list]),
        '2': sorted([vlist.ID for vlist in keep['2'][0].variant_list])
    }
    assert_dict_equal(correct, obs_correct)
    assert_dict_equal(removed, obs_removed)
Example #14
0
def tab_to_vcf(input_file, output_file, reference_file, convert_iupac=False, info_fields=None):
    """
    Convert tab-delimited file to VCF.

    Support for the fixed VCF fields: #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO

    PyVCF's _Record class requires the following arguments:

    CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes

    convert_iupac (bool) : When present, convert IUPAC codes to the non-reference allele.
        This is only possible for when the reference and IUPAC-determined alternates share 
        at least one allele. Tri-allelic conversion is not supported and will emit a warning.
        IUPAC codes: http://www.bioinformatics.org/sms/iupac.html
    """
    reference_dict = FastaHack(reference_file)

    with open(input_file, "r") as input_fh:
        reader = csv.DictReader(input_fh, delimiter="\t")

        with open(TEMPLATE_VCF_FILE, "r") as template_fh:
            vcf_reader = vcf.Reader(template_fh)

            with open(output_file, "w") as output_fh:
                vcf_writer = vcf.Writer(output_fh, vcf_reader, lineterminator="\n")

                for row in reader:
                    args = [row.get(tab_field, ".") for vcf_field, tab_field in VCF_TO_FIELDS]
                    # Convert position to an integer.
                    args[POSITION_INDEX] = int(args[POSITION_INDEX])

                    # Convert indels from GATK to VCF format.
                    if args[ALT_INDEX].startswith(("+", "-")) and not "/" in args[ALT_INDEX]:
                        args = gatk_indel_to_vcf(args, reference_dict)

                    # Optionally convert IUPAC code
                    if convert_iupac:
                        args = _convert_iupac(args)

                    # Convert alternate allele scalar to a list.
                    args[ALT_INDEX] = [args[ALT_INDEX]]
                    if info_fields:
                        INFO = {}
                        for k, v in info_fields.items():
                            if k in row:
                                INFO[v] = row[k]
                    else:
                        INFO = {}
                    # Add empty entries for INFO, FORMAT, and sample_indexes.
                    args.extend([INFO, ".", []])

                    record = _Record(*args)
                    vcf_writer.write_record(record)
Example #15
0
 def test_coordinates_for_breakend(self):
     record = model._Record(
             '1',
             10,
             'id12',
             'CTA',
             [model._Breakend('1', 500, False, True, 'GGTC', True)],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 12), (9, 12))
Example #16
0
 def test_coordinates_for_None_alt(self):
     record = model._Record(
             '1',
             10,
             'id4',
             'C',
             [None],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #17
0
 def test_is_snp_for_n_alt(self):
     record = model._Record(
             '1',
             10,
             'id1',
             'C',
             [model._Substitution('N')],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assertTrue(record.is_snp)
Example #18
0
 def test_coordinates_for_insertion(self):
     record = model._Record(
             '1',
             10,
             'id2',
             'C',
             [model._Substitution('CTA')],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (10, 10))
Example #19
0
    def __next__(self):
        """Return the next record in the file."""
        line = next(self.reader)
        row = self._row_pattern.split(line.rstrip())
        chrom = row[0]
        if self._prepend_chr:
            chrom = 'chr' + chrom
        pos = int(row[1])

        if row[2] != '.':
            ID = row[2]
        else:
            ID = None

        ref = row[3]
        alt = self._map(self._parse_alt, row[4].split(','))

        try:
            qual = int(row[5])
        except IndexError:
            qual = None
        except ValueError:
            try:
                qual = float(row[5])
            except ValueError:
                qual = None

        filt = None # None returned by _parse_filter if. in column
        info = {}   # Empty dict is returned by _parse_info if . is in column
        fmt = None  # Set if fmt is .
        if len(row)>6:
            filt = self._parse_filter(row[6])
            if len(row) > 7:
                info = self._parse_info(row[7])
            if len(row) > 8:
                fmt = row[8]
                if fmt == '.':
                    fmt = None

        record = _Record(chrom, pos, ID, ref, alt, qual, filt,
                info, fmt, self._sample_indexes)

        if fmt is not None:
            samples = self._parse_samples(row[9:], fmt, record)
            record.samples = samples

        return record
Example #20
0
    def _get_record_for_auxiliary(self, bpm_record_group, auxiliary_record):
        """
        Create a new VCF record for an auxiliar locus
        THIS WILL ALSO ALTER THE BPM RECORD

        Args:
            bpm_record_group (list(BPMRecord)) : BPM records for the group (must be length 1)
            auxiliary_record (vcf._Record) : Auxiliar locus definition

        Returns:
            vcf._Record : The new VCF record definition
        """
        # update BPM record with plus-strand alleles
        # these must stay in the same order as so plus allele shouldn't be reported (necessarily) as reference, alternate
        # need to use ref strand info to figure out which end of MNV allele to use for comparisonn
        # then create vcf record with alleles from auxiliary record
        identifier = self._get_identifier(bpm_record_group)

        assert len(bpm_record_group) == 1
        bpm_record = bpm_record_group[0]
        (qual, filt, info,
         sample_indexes) = VcfRecordFactory._get_record_defaults()
        old_plus_strand_alleles = bpm_record.plus_strand_alleles
        new_plus_strand_alleles = []
        if bpm_record.ref_strand == RefStrand.Plus:
            for old_allele in old_plus_strand_alleles:
                for new_allele in auxiliary_record.alleles:
                    if str(new_allele)[0] == old_allele[0]:
                        new_plus_strand_alleles.append(str(new_allele))
                        break
        else:
            assert bpm_record.ref_strand == RefStrand.Minus
            for old_allele in old_plus_strand_alleles:
                for new_allele in auxiliary_record.alleles:
                    if str(new_allele)[-1] == old_allele[-1]:
                        new_plus_strand_alleles.append(str(new_allele))
                        break
        assert len(new_plus_strand_alleles) == 2
        assert new_plus_strand_alleles[0] != new_plus_strand_alleles[1]
        bpm_record.plus_strand_alleles = new_plus_strand_alleles

        return _Record(auxiliary_record.CHROM, auxiliary_record.POS,
                       identifier, auxiliary_record.REF, auxiliary_record.ALT,
                       qual, filt, info,
                       self._format_factory.get_format_id_string(),
                       sample_indexes)
Example #21
0
def tab_to_vcf(input_file, output_file, reference_file):
    """
    Convert tab-delimited file to VCF.

    Support for the fixed VCF fields: #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO

    PyVCF's _Record class requires the following arguments:

    CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes
    """
    reference_dict = FastaHack(reference_file)

    with open(input_file, "r") as input_fh:
        reader = csv.DictReader(input_fh, delimiter="\t")

        with open(TEMPLATE_VCF_FILE, "r") as template_fh:
            vcf_reader = vcf.Reader(template_fh)

            with open(output_file, "w") as output_fh:
                vcf_writer = vcf.Writer(output_fh,
                                        vcf_reader,
                                        lineterminator='\n')

                for row in reader:
                    args = [
                        row.get(tab_field, ".")
                        for vcf_field, tab_field in VCF_TO_FIELDS
                    ]

                    # Convert position to an integer.
                    args[POSITION_INDEX] = int(args[POSITION_INDEX])

                    # Convert indels from GATK to VCF format.
                    if args[ALT_INDEX].startswith(
                        ("+", "-")) and not "/" in args[ALT_INDEX]:
                        args = gatk_indel_to_vcf(args, reference_dict)

                    # Convert alternate allele scalar to a list.
                    args[ALT_INDEX] = [args[ALT_INDEX]]

                    # Add empty entries for INFO, FORMAT, and sample_indexes.
                    args.extend([{}, ".", []])

                    record = _Record(*args)
                    vcf_writer.write_record(record)
Example #22
0
 def test_coordinates_for_multiple_snps(self):
     record = model._Record(
             '1',
             10,
             'id5',
             'C',
             [
                 model._Substitution('A'),
                 model._Substitution('G'),
                 model._Substitution('T')
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #23
0
def tab_to_vcf(input_file, output_file, reference_file):
    """
    Convert tab-delimited file to VCF.

    Support for the fixed VCF fields: #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO

    PyVCF's _Record class requires the following arguments:

    CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes
    """
    reference_dict = FastaHack(reference_file)

    with open(input_file, "r") as input_fh:
        reader = csv.DictReader(input_fh, delimiter="\t")

        with open(TEMPLATE_VCF_FILE, "r") as template_fh:
            vcf_reader = vcf.Reader(template_fh)

            with open(output_file, "w") as output_fh:
                vcf_writer = vcf.Writer(output_fh, vcf_reader, lineterminator='\n')

                for row in reader:
                    args = [row.get(tab_field, ".")
                            for vcf_field, tab_field in VCF_TO_FIELDS]

                    # Convert position to an integer.
                    args[POSITION_INDEX] = int(args[POSITION_INDEX])

                    # Convert indels from GATK to VCF format.
                    if args[ALT_INDEX].startswith(("+", "-")) and not "/" in args[ALT_INDEX]:
                        args = gatk_indel_to_vcf(args, reference_dict)

                    # Convert alternate allele scalar to a list.
                    args[ALT_INDEX] = [args[ALT_INDEX]]

                    # Add empty entries for INFO, FORMAT, and sample_indexes.
                    args.extend([{}, ".", []])

                    record = _Record(*args)
                    vcf_writer.write_record(record)
Example #24
0
    def _get_record_for_snv(self, bpm_record_group):
        """
        Create a new VCF record for an SNV

        Args:
            bpm_record_group (list(BPMRecord)) : BPM records for the group (must all be SNV records)

        Returns:
            vcf._Record : The new VCF record definition
        """
        assert not bpm_record_group[0].is_indel()
        identifier = self._get_identifier(bpm_record_group)
        bpm_record = bpm_record_group[0]
        (qual, filt, info,
         sample_indexes) = VcfRecordFactory._get_record_defaults()

        start_index = bpm_record.pos - 1
        chrom = bpm_record.chromosome
        if chrom == "XX" or chrom == "XY":
            chrom = "X"

        reference_base = self._genome_reader.get_reference_bases(
            chrom, start_index, start_index + 1)
        if not check_reference_allele(reference_base, bpm_record_group):
            self._logger.warn("Reference allele is not queried for locus: " +
                              identifier)

        alts = []
        for record in bpm_record_group:
            for nucleotide in record.plus_strand_alleles:
                if nucleotide != reference_base:
                    substitution = _Substitution(nucleotide)
                    if substitution not in alts:
                        alts.append(_Substitution(nucleotide))

        return _Record(chrom, bpm_record.pos, identifier, reference_base, alts,
                       qual, filt, info,
                       self._format_factory.get_format_id_string(),
                       sample_indexes)
 def vcf_record(self, *args, **kwargs):
     return _Record(*args, **kwargs)
Example #26
0
 def vcf_record( self, *args, **kwargs ):
     return _Record( *args, **kwargs )
Example #27
0
 def test_is_snp_for_n_alt(self):
     record = model._Record('1', 10, 'id1', 'C', [model._Substitution('N')],
                            None, None, {}, None, {}, None)
     self.assertTrue(record.is_snp)
Example #28
0
 def test_coordinates_for_insertion(self):
     record = model._Record('1', 10, 'id2', 'C',
                            [model._Substitution('CTA')], None, None, {},
                            None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (10, 10))
Example #29
0
 def test_coordinates_for_None_alt(self):
     record = model._Record('1', 10, 'id4', 'C', [None], None, None, {},
                            None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Example #30
0
 def test_coordinates_for_breakend(self):
     record = model._Record(
         '1', 10, 'id12', 'CTA',
         [model._Breakend('1', 500, False, True, 'GGTC', True)], None, None,
         {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 12), (9, 12))