Esempio n. 1
0
    def maker_clinvar() -> pandas.DataFrame:
        from idiva.db import clinvar_open
        from idiva.io import ReadVCF
        from idiva.db.clinvar import clinvar_to_df

        with clinvar_open(which=which) as fd:
            return clinvar_to_df(ReadVCF(fd))
Esempio n. 2
0
 def test_open_read_vcf_meta(self):
     from idiva.db import clinvar_open
     from idiva.io import ReadVCF
     with clinvar_open(which='vcf_37') as fd:
         vcf = ReadVCF(fd)
         assert not hasattr(vcf, "sample_ids")
         print(vcf.header)
     raise NotImplementedError
Esempio n. 3
0
 def test_howto(self):
     from idiva.db import clinvar_open
     from idiva.io.vcf import ReadVCF
     with clinvar_open() as fd:
         vcf = ReadVCF(fd)
         vcf.meta
         for dataline in vcf:
             dataline
Esempio n. 4
0
 def test_open_read_manual(self):
     from idiva.db import clinvar_open
     with clinvar_open() as fd:
         self.assertIsInstance(fd, io.TextIOBase)
         reference = [
             '##fileformat=VCFv4.1', '##fileDate=2020-11-07',
             '##source=ClinVar'
         ]
         candidate = [fd.readline().strip() for __ in range(3)]
         self.assertListEqual(reference, candidate)
Esempio n. 5
0
 def maker_clinvar() -> pd.DataFrame:
     """
     creates the clinvar dataframe
     """
     from idiva.db import clinvar_open
     from idiva.io import ReadVCF
     from idiva.db.clinvar import clinvar_to_df
     log.info('Making clinvar df.')
     with clinvar_open(which=clinvar_file) as fd:
         return clinvar_to_df(ReadVCF(fd))
Esempio n. 6
0
 def test_clinvar_df(self):
     from idiva.db import clinvar_open
     from idiva.io import ReadVCF
     from idiva.db.clinvar import clinvar_to_df
     with clinvar_open(which='vcf_37') as fd:
         df = clinvar_to_df(ReadVCF(fd))
     self.assertEqual(len(df), REF_LENGTHS['clinvar_df'])
     self.assertTrue(all(
         df.loc[df['CLNVC'] == 'single_nucleotide_variant']))
     self.assertFalse(df['CLNVC'].isnull().values.any())
     self.assertTrue('OMIM_id' in df.columns)
Esempio n. 7
0
    def test_length_clinvar(self):
        from idiva.db import clinvar_open
        from idiva.io import ReadVCF
        from tqdm import tqdm
        with clinvar_open(which='vcf_37') as fd:
            vcf = ReadVCF(fd)
            for idx, line in tqdm(enumerate(vcf.datalines),
                                  postfix='reading clinvar file'):
                pass

        self.assertEqual(idx, REF_LENGTHS['clinvar_csv'])
Esempio n. 8
0
    def test_open_read_vcf_datalines(self):
        from idiva.db import clinvar_open
        from idiva.io import ReadVCF
        with clinvar_open(which='vcf_37') as fd:
            vcf = ReadVCF(fd)

            reference = [
                "1	865568	846933	G	A	.	.	ALLELEID=824438;CLNDISDB=MedGen:CN517202;CLNDN=not_provided;CLNHGVS=NC_000001.10:g.865568G>A;CLNREVSTAT=criteria_provided,_single_submitter;CLNSIG=Uncertain_significance;CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;GENEINFO=SAMD11:148398;MC=SO:0001583|missense_variant;ORIGIN=1",
                "1	865583	972363	C	T	.	.	ALLELEID=959431;CLNDISDB=MedGen:CN517202;CLNDN=not_provided;CLNHGVS=NC_000001.10:g.865583C>T;CLNREVSTAT=criteria_provided,_single_submitter;CLNSIG=Uncertain_significance;CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;GENEINFO=SAMD11:148398;MC=SO:0001583|missense_variant;ORIGIN=1",
                "1	865628	789256	G	A	.	.	AF_ESP=0.00347;AF_EXAC=0.00622;AF_TGP=0.00280;ALLELEID=707587;CLNDISDB=MedGen:CN517202;CLNDN=not_provided;CLNHGVS=NC_000001.10:g.865628G>A;CLNREVSTAT=criteria_provided,_single_submitter;CLNSIG=Likely_benign;CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;GENEINFO=SAMD11:148398;MC=SO:0001583|missense_variant;ORIGIN=1;RS=41285790",
            ]

            from idiva.io.vcf import RawDataline
            datalines: typing.List[RawDataline]
            datalines = list(at_most_n(vcf, n=len(reference)))

            self.assertIsInstance(datalines[0], RawDataline)

            candidate = list(map(str, datalines))
            self.assertListEqual(reference, candidate)

            self.assertEqual(datalines[0].ref, 'G')
            self.assertEqual(datalines[1].ref, 'C')
            self.assertEqual(datalines[2].ref, 'G')
Esempio n. 9
0
 def test_open(self):
     from idiva.db import clinvar_open
     with clinvar_open() as fd:
         self.assertIsInstance(fd, io.TextIOBase)