Esempio n. 1
0
 def test_coordinates_for_insertion_and_deletion(self):
     record = model._Record(
             '1',
             10,
             'id10',
             'CT',
             [
                 model._Substitution('CA'),
                 model._Substitution('CTT'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
     record = model._Record(
             '1',
             10,
             'id11',
             'CT',
             [
                 model._Substitution('CTT'),
                 model._Substitution('CA'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
Esempio n. 2
0
 def test_coordinates_for_insert_and_snp(self):
     record = model._Record(
             '1',
             10,
             'id6',
             'C',
             [
                 model._Substitution('GTA'),
                 model._Substitution('G'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
     record = model._Record(
             '1',
             10,
             'id7',
             'C',
             [
                 model._Substitution('G'),
                 model._Substitution('GTA'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Esempio n. 3
0
 def test_coordinates_for_snp_and_deletion(self):
     record = model._Record(
             '1',
             10,
             'id8',
             'CTA',
             [
                 model._Substitution('C'),
                 model._Substitution('CTG'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
     record = model._Record(
             '1',
             10,
             'id9',
             'CTA',
             [
                 model._Substitution('CTG'),
                 model._Substitution('C'),
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
Esempio n. 4
0
 def test_coordinates_for_multiple_snps(self):
     record = model._Record('1', 10, 'id5', 'C', [
         model._Substitution('A'),
         model._Substitution('G'),
         model._Substitution('T')
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Esempio n. 5
0
 def test_coordinates_for_insertion_and_deletion(self):
     record = model._Record('1', 10, 'id10', 'CT', [
         model._Substitution('CA'),
         model._Substitution('CTT'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
     record = model._Record('1', 10, 'id11', 'CT', [
         model._Substitution('CTT'),
         model._Substitution('CA'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 11), (10, 11))
Esempio n. 6
0
 def test_coordinates_for_snp_and_deletion(self):
     record = model._Record('1', 10, 'id8', 'CTA', [
         model._Substitution('C'),
         model._Substitution('CTG'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
     record = model._Record('1', 10, 'id9', 'CTA', [
         model._Substitution('CTG'),
         model._Substitution('C'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 12), (10, 12))
Esempio n. 7
0
 def test_coordinates_for_insert_and_snp(self):
     record = model._Record('1', 10, 'id6', 'C', [
         model._Substitution('GTA'),
         model._Substitution('G'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
     record = model._Record('1', 10, 'id7', 'C', [
         model._Substitution('G'),
         model._Substitution('GTA'),
     ], None, None, {}, None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Esempio n. 8
0
 def _parse_alt(self, str):
     if self._alt_pattern.search(str) is not None:
         # Paired breakend
         items = self._alt_pattern.split(str)
         remoteCoords = items[1].split(':')
         chr = remoteCoords[0]
         if chr[0] == '<':
             chr = chr[1:-1]
             withinMainAssembly = False
         else:
             withinMainAssembly = True
         pos = remoteCoords[1]
         orientation = (str[0] == '[' or str[0] == ']')
         remoteOrientation = (re.search('\[', str) is not None)
         if orientation:
             connectingSequence = items[2]
         else:
             connectingSequence = items[0]
         return _Breakend(chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly)
     elif str[0] == '.' and len(str) > 1:
         return _SingleBreakend(True, str[1:])
     elif str[-1] == '.' and len(str) > 1:
         return _SingleBreakend(False, str[:-1])
     elif str[0] == "<" and str[-1] == ">":
         return _SV(str[1:-1])
     else:
         return _Substitution(str)
Esempio n. 9
0
    def _get_record_for_indel(self, bpm_record_group):
        """
        Create a new VCF record for an indel

        Args:
            bpm_record_group (list(BPMRecord)) : BPM records for the group (must all be indels)

        Returns:
            vcf._Record : The new VCF record definition
        """
        (qual, filt, info,
         sample_indexes) = VcfRecordFactory._get_record_defaults()

        identifier = self._get_identifier(bpm_record_group)
        for record in bpm_record_group:
            assert record.is_deletion == bpm_record_group[0].is_deletion

        bpm_record = bpm_record_group[0]

        (_, indel_sequence,
         _) = bpm_record.get_indel_source_sequences(RefStrand.Plus)
        start_index = bpm_record.pos - 1
        chrom = bpm_record.chromosome
        if chrom == "XX" or chrom == "XY":
            chrom = "X"

        if bpm_record.is_deletion:
            reference_base = self._genome_reader.get_reference_bases(
                chrom, start_index - 1, start_index)
            reference_allele = reference_base + indel_sequence
            alternate_allele = reference_base
            return _Record(chrom, start_index, identifier, reference_allele,
                           [_Substitution(alternate_allele)], qual, filt, info,
                           self._format_factory.get_format_id_string(),
                           sample_indexes)

        reference_base = self._genome_reader.get_reference_bases(
            chrom, start_index, start_index + 1)
        reference_allele = reference_base
        alternate_allele = reference_base + indel_sequence
        return _Record(chrom, start_index + 1, identifier, reference_allele,
                       [_Substitution(alternate_allele)], qual, filt, info,
                       self._format_factory.get_format_id_string(),
                       sample_indexes)
Esempio n. 10
0
def _grid_item_to_vcf_record(info_dict, obj, sample_ids, sample_names):  # , get_genotype_from_expanded_zygosity):
    CHROM = obj.get("locus__contig__name", ".")
    POS = obj.get("locus__position", ".")
    ID = obj.get("variantannotation__dbsnp_rs_id")
    REF = obj.get("locus__ref__seq", ".")
    ALT = obj.get("alt__seq", ".")
    QUAL = '.'  # QUAL = obj.get("annotation__quality", ".")
    FILTER = None
    INFO = {}

    for info_id, data in info_dict.items():
        col = data['column__variant_column']
        val = obj.get(col)
        if val:
            INFO[info_id] = val

    FORMAT = None
    MY_FORMAT = ['GT', 'AD', 'AF', 'PL', 'DP', 'GQ']
    CallData = make_calldata_tuple(MY_FORMAT)
    sample_indexes = {}
    samples = []

    if sample_ids:
        FORMAT = ':'.join(MY_FORMAT)

    alts = [_Substitution(ALT)]
    ALT = alts
    record = _Record(CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes)

    if sample_ids:
        for i, (sample_id, sample) in enumerate(zip(sample_ids, sample_names)):
            ad = obj[f"{sample_id}_samples_allele_depth"]
            zygosity = obj[f"{sample_id}_samples_zygosity"]
            gt = Zygosity.get_genotype_from_expanded_zygosity(zygosity)
            dp = obj[f"{sample_id}_samples_read_depth"]
            af = obj[f"{sample_id}_samples_allele_frequency"]
            # GQ/PL/FT are optional now
            # TODO: Ideally, we'd not write them out
            pl = obj.get(f"{sample_id}_samples_phred_likelihood", ".")
            gq = obj.get(f"{sample_id}_samples_genotype_quality", ".")
            # TODO: Need to grab information for reference base to be able to properly fill in this data.
            data_args = {'AD': ['.', ad],
                         'GT': gt,
                         'PL': ['.', pl],
                         'DP': ['.', dp],
                         'GQ': ['.', gq],
                         'AF': ['.', af]}

            data = CallData(**data_args)
            call = _Call(record, sample, data)
            samples.append(call)
            sample_indexes[sample] = i

        record.samples = samples

    return record
Esempio n. 11
0
 def test_coordinates_for_multiple_snps(self):
     record = model._Record(
             '1',
             10,
             'id5',
             'C',
             [
                 model._Substitution('A'),
                 model._Substitution('G'),
                 model._Substitution('T')
             ],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (9, 10))
Esempio n. 12
0
    def _get_record_for_snv(self, bpm_record_group):
        """
        Create a new VCF record for an SNV

        Args:
            bpm_record_group (list(BPMRecord)) : BPM records for the group (must all be SNV records)

        Returns:
            vcf._Record : The new VCF record definition
        """
        assert not bpm_record_group[0].is_indel()
        identifier = self._get_identifier(bpm_record_group)
        bpm_record = bpm_record_group[0]
        (qual, filt, info,
         sample_indexes) = VcfRecordFactory._get_record_defaults()

        start_index = bpm_record.pos - 1
        chrom = bpm_record.chromosome
        if chrom == "XX" or chrom == "XY":
            chrom = "X"

        reference_base = self._genome_reader.get_reference_bases(
            chrom, start_index, start_index + 1)
        if not check_reference_allele(reference_base, bpm_record_group):
            self._logger.warn("Reference allele is not queried for locus: " +
                              identifier)

        alts = []
        for record in bpm_record_group:
            for nucleotide in record.plus_strand_alleles:
                if nucleotide != reference_base:
                    substitution = _Substitution(nucleotide)
                    if substitution not in alts:
                        alts.append(_Substitution(nucleotide))

        return _Record(chrom, bpm_record.pos, identifier, reference_base, alts,
                       qual, filt, info,
                       self._format_factory.get_format_id_string(),
                       sample_indexes)
Esempio n. 13
0
 def test_coordinates_for_insertion(self):
     record = model._Record(
             '1',
             10,
             'id2',
             'C',
             [model._Substitution('CTA')],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assert_has_expected_coordinates(record, (9, 10), (10, 10))
Esempio n. 14
0
 def test_is_snp_for_n_alt(self):
     record = model._Record(
             '1',
             10,
             'id1',
             'C',
             [model._Substitution('N')],
             None,
             None,
             {},
             None,
             {},
             None
     )
     self.assertTrue(record.is_snp)
Esempio n. 15
0
 def test_coordinates_for_insertion(self):
     record = model._Record('1', 10, 'id2', 'C',
                            [model._Substitution('CTA')], None, None, {},
                            None, {}, None)
     self.assert_has_expected_coordinates(record, (9, 10), (10, 10))
Esempio n. 16
0
 def test_is_snp_for_n_alt(self):
     record = model._Record('1', 10, 'id1', 'C', [model._Substitution('N')],
                            None, None, {}, None, {}, None)
     self.assertTrue(record.is_snp)