def test_makes_df(self):
        from idiva.clf.df import v0_df
        from idiva.io import ReadVCF
        from idiva.utils import seek_then_rewind

        for k in PATHS:
            with PATHS[k].open(mode='r') as fd:
                assert isinstance(fd, io.TextIOBase)
                with seek_then_rewind(fd):
                    datalines = list(ReadVCF(fd))
                with seek_then_rewind(fd):
                    df = v0_df(ReadVCF(fd))
                self.assertEqual(len(datalines), len(df))
 def maker_clinvar() -> pd.DataFrame:
     """
     creates the clinvar dataframe
     """
     from idiva.db import clinvar_open
     from idiva.io import ReadVCF
     from idiva.db.clinvar import clinvar_to_df
     log.info('Making clinvar df.')
     with clinvar_open(which=clinvar_file) as fd:
         return clinvar_to_df(ReadVCF(fd))
    def test_fisher_large_head(self):
        from idiva.io import ReadVCF, open_maybe_gz

        with open_maybe_gz(PATHS_LARGE_HEAD['case'], mode='r') as case:
            with open_maybe_gz(PATHS_LARGE_HEAD['ctrl'], mode='r') as ctrl:
                from idiva.stat.vcf_to_fisher import vcf_to_fisher
                result = vcf_to_fisher(case=ReadVCF(case), ctrl=ReadVCF(ctrl))
                df = result.df

        from idiva.utils.testing import whatsmyname
        out_dir = MY_SPACE / whatsmyname()
        print(df)
    def test_join_does_something(self):
        from idiva.io import ReadVCF
        from idiva.clf.df import v0_df, join

        dfs = {}

        for k in PATHS:
            with PATHS[k].open(mode='r') as fd:
                assert isinstance(fd, io.TextIOBase)
                dfs[k] = v0_df(ReadVCF(fd))

        df = join(case=dfs['case'], ctrl=dfs['ctrl'])
    def test_poc_head(self):
        """
        Proof-of-concept.
        """
        from idiva.io import ReadVCF, open_maybe_gz

        for (k, ref_file) in PATHS_LARGE_HEAD.items():
            can_file = (MY_SPACE / F"{sys._getframe().f_code.co_name}__{k}").with_suffix(".log")

            with open_maybe_gz(ref_file, mode='r') as fd_ref:
                assert isinstance(fd_ref, io.TextIOBase)
                vcf = ReadVCF(fd_ref).preload_all()

            with open(can_file, mode='w') as fd_can:
                with redirect_stdout(fd_can):
                    for (k, v) in vcf.meta.items():
                        if isinstance(v, str):
                            if (str(k).lower() == "filedate"):
                                from idiva.io.out import fileDate
                                v = fileDate
                            if (str(k).lower() == "source"):
                                from idiva.io.out import source
                                v = source
                            print(F"##{k}={v}")
                        elif isinstance(v, dict):
                            for (i, v) in v.items():
                                assert isinstance(v, dict)
                                assert v
                                p = ','.join(F"{k}={v if v is not None else '.'}" for (k, v) in v.items())
                                print(F"##{k}=<ID={i},{p}>")

                    print(F"#{SEP.join(vcf.header)}")

                    for dataline in vcf:
                        print(str(dataline))

            from idiva.io import Oneliner
            with open_maybe_gz(ref_file, mode='r') as fd_ref:
                with open_maybe_gz(can_file, mode='r') as fd_can:
                    assert isinstance(fd_ref, io.TextIOBase)
                    assert isinstance(fd_can, io.TextIOBase)
                    lines_ref = list(Oneliner(fd_ref))
                    lines_can = list(Oneliner(fd_can))
                    for (ref, can) in zip(lines_ref, lines_can):
                        if not (ref.startswith("##fileDate") or ref.startswith("##source")):
                            self.assertEqual(can, ref)
                    self.assertEqual(len(lines_can), len(lines_ref))
    def test_combine(self):
        from idiva.io import ReadVCF
        from idiva.io.vcf import SEP
        from idiva.clf.df import v0_df, join, dtype_v0

        dfs = {}

        for k in PATHS:
            with PATHS[k].open(mode='r') as fd:
                assert isinstance(fd, io.TextIOBase)
                dfs[k] = v0_df(ReadVCF(fd))

        candidate = join(case=dfs['case'], ctrl=dfs['ctrl'])

        def read_csv(file) -> pd.DataFrame:
            return pd.read_csv(file, sep=SEP).astype(
                {
                    'CHROM': str,
                    'POS': int,
                    'ID': str,
                    'ALT0_case': float, 'ALT1_case': float, 'ALT2_case': float,
                    'ALT0_ctrl': float, 'ALT1_ctrl': float, 'ALT2_ctrl': float,
                }
            )

        ref_file = MY_SPACE / "reference.txt"

        # Hack to create the reference
        # candidate.to_csv(ref_file, sep=SEP, index=True)

        reference = read_csv(ref_file)

        # Write and read `candidate` to make comparable to `reference`
        import tempfile
        with tempfile.NamedTemporaryFile(mode='w') as tf:
            candidate.to_csv(tf, sep=SEP, index=True)
            tf.flush()
            candidate = read_csv(tf.name)

        self.assertTrue(reference.equals(candidate))