class TestReportPy(unittest.TestCase):
    """ test the Report class
    """
    
    def setUp(self):
        """ define a family and variant, and start the Allosomal class
        """
        
        # generate a test family
        child_gender = "F"
        mom_aff = "1"
        dad_aff = "1"
        
        self.trio = self.create_family(child_gender, mom_aff, dad_aff)
        
        # generate a test variant
        child_var = self.create_snv(child_gender, "0/1")
        mom_var = self.create_snv("F", "0/0")
        dad_var = self.create_snv("M", "0/0")
        
        var = TrioGenotypes(child_var)
        var.add_mother_variant(mom_var)
        var.add_father_variant(dad_var)
        self.variants = [var]
        
        self.report = Report(None, None, None, None)
        self.report.family = self.trio
        # self.report.tags_dict = tags
    
    def create_snv(self, gender, genotype):
        """ create a default variant
        """
        
        chrom = "X"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        qual = "50"
        filt = "PASS"
        
        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)
        
        info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005"
        format_keys = "GT:DP"
        sample_values = genotype + ":50"
        
        var.vcf_line = [chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys, sample_values]
        
        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()
        
        return var
    
    def create_family(self, child_gender, mom_aff, dad_aff):
        """ create a default family, with optional gender and parental statuses
        """
        
        fam = Family("test")
        fam.add_child("child", "child_vcf", "2", child_gender)
        fam.add_mother("mother", "mother_vcf", mom_aff, "2")
        fam.add_father("father", "father_vcf", dad_aff, "1")
        fam.set_child()
        
        return fam
    
    def test__get_provenance(self):
        """ check that _get_provenance() works correctly
        """
        
        prov = ["checksum", "sample.calls.date.vcf.gz", "2014-01-01"]
        member = "proband"
        
        self.assertEqual(self.report._get_provenance(prov, member), \
            ["##UberVCF_proband_Id=sample\n", \
            "##UberVCF_proband_Checksum=checksum\n", \
            "##UberVCF_proband_Basename=sample.calls.date.vcf.gz\n", \
            "##UberVCF_proband_Date=2014-01-01\n"])
    
    def test__get_vcf_export_path(self):
        """ check that _get_vcf_export_path() works correctly
        """
        
        # use a folder to place the VCFG file in, which means we join the
        # proband ID to get a full path
        self.report.export_vcf = os.getcwd()
        self.assertEqual(self.report._get_vcf_export_path(), os.path.join(os.getcwd(), "child.vcf.gz"))
        
        # define an un-uable directory, to raise an error
        self.report.export_vcf = os.getcwd() + "asjhfgasjhfg"
        self.assertRaises(ValueError, self.report._get_vcf_export_path)
        
        # define a specific path for a VCF file, which is returned directly
        self.report.export_vcf = os.path.join(os.getcwd(), "sample_id.vcf.gz")
        self.assertEqual(self.report._get_vcf_export_path(), self.report.export_vcf)
    
    def test__make_vcf_header(self):
        """ check that _make_vcf_header() works correctly
        """
        
        # define the intial header lines
        header = ["####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"]
        
        # define the VCF provenances
        provenance = [("checksum", "proband.calls.date.vcf.gz", "2014-01-01"),
            ("checksum", "mother.calls.date.vcf.gz", "2014-01-02"),
            ("checksum", "father.calls.date.vcf.gz", "2014-01-03")]
        
        processed_header = ["####fileformat=VCFv4.1\n",
           "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
           '##INFO=<ID=ClinicalFilterType,Number=.,Type=String,Description="The type of clinical filter that passed this variant.">\n',
           '##INFO=<ID=ClinicalFilterGeneInheritance,Number=.,Type=String,Description="The inheritance mode (Monoallelic, Biallelic etc) under which the variant was found.">\n',
           '##INFO=<ID=ClinicalFilterReportableHGNC,Number=.,Type=String,Description="The HGNC symbol which the variant was identified as being reportable for.">\n',
           '##FORMAT=<ID=INHERITANCE_GENOTYPE,Number=.,Type=String,Description="The 012 coded genotypes for a trio (child, mother, father).">\n',
           '##FORMAT=<ID=INHERITANCE,Number=.,Type=String,Description="The inheritance of the variant in the trio (biparental, paternal, maternal, deNovo).">\n',
           "##ClinicalFilterRunDate={0}\n".format(datetime.date.today()),
           "##ClinicalFilterVersion=XXX\n",
           "##ClinicalFilterHistory=single_variant,compound_het\n",
           "##UberVCF_proband_Id=proband\n",
           "##UberVCF_proband_Checksum=checksum\n",
           "##UberVCF_proband_Basename=proband.calls.date.vcf.gz\n",
           "##UberVCF_proband_Date=2014-01-01\n",
           "##UberVCF_maternal_Id=mother\n",
           "##UberVCF_maternal_Checksum=checksum\n",
           "##UberVCF_maternal_Basename=mother.calls.date.vcf.gz\n",
           "##UberVCF_maternal_Date=2014-01-02\n",
           "##UberVCF_paternal_Id=father\n",
           "##UberVCF_paternal_Checksum=checksum\n",
           "##UberVCF_paternal_Basename=father.calls.date.vcf.gz\n",
           "##UberVCF_paternal_Date=2014-01-03\n",
           "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"]
        
        # check that the standard function returns the expected value. Note that
        # I haven't checked the output if self.known_genes_date is not None, nor
        # have I checked if the _clinicalFilterVersion is available
        self.assertEqual(self.report._make_vcf_header(header, provenance),
           processed_header)
    
    def test__get_parental_inheritance(self):
        """ check that _get_parental_inheritance() works correctly
        """
        
        var = self.variants[0]
        
        # check for the default genotypes
        self.assertEqual(self.report._get_parental_inheritance(var), "deNovo")
        
        # check when only the mother is non-ref
        var.mother.genotype = 1
        self.assertEqual(self.report._get_parental_inheritance(var), "maternal")
        
        # check when both parents are non-ref
        var.father.genotype = 1
        self.assertEqual(self.report._get_parental_inheritance(var), "biparental")
        
        # check when only the father is non-ref
        var.mother.genotype = 0
        self.assertEqual(self.report._get_parental_inheritance(var), "paternal")
        
        # check when the proband lacks parental information
        self.report.family.father = None
        self.report.family.mother = None
        self.assertEqual(self.report._get_parental_inheritance(var), "unknown")
    
    def test__get_vcf_lines(self):
        """ check that _get_vcf_lines() works correctly
        """
        
         # define the intial header lines
        header = ["####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"]
        
        # define the VCF provenances
        provenance = [("checksum", "proband.calls.date.vcf.gz", "2014-01-01"),
            ("checksum", "mother.calls.date.vcf.gz", "2014-01-02"),
            ("checksum", "father.calls.date.vcf.gz", "2014-01-03")]
        
        # define what the header will become
        vcf_lines = ["####fileformat=VCFv4.1\n",
           "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
           '##INFO=<ID=ClinicalFilterType,Number=.,Type=String,Description="The type of clinical filter that passed this variant.">\n',
           '##INFO=<ID=ClinicalFilterGeneInheritance,Number=.,Type=String,Description="The inheritance mode (Monoallelic, Biallelic etc) under which the variant was found.">\n',
           '##INFO=<ID=ClinicalFilterReportableHGNC,Number=.,Type=String,Description="The HGNC symbol which the variant was identified as being reportable for.">\n',
           '##FORMAT=<ID=INHERITANCE_GENOTYPE,Number=.,Type=String,Description="The 012 coded genotypes for a trio (child, mother, father).">\n',
           '##FORMAT=<ID=INHERITANCE,Number=.,Type=String,Description="The inheritance of the variant in the trio (biparental, paternal, maternal, deNovo).">\n',
           "##ClinicalFilterRunDate={0}\n".format(datetime.date.today()),
           "##ClinicalFilterVersion=XXX\n",
           "##ClinicalFilterHistory=single_variant,compound_het\n",
           "##UberVCF_proband_Id=proband\n",
           "##UberVCF_proband_Checksum=checksum\n",
           "##UberVCF_proband_Basename=proband.calls.date.vcf.gz\n",
           "##UberVCF_proband_Date=2014-01-01\n",
           "##UberVCF_maternal_Id=mother\n",
           "##UberVCF_maternal_Checksum=checksum\n",
           "##UberVCF_maternal_Basename=mother.calls.date.vcf.gz\n",
           "##UberVCF_maternal_Date=2014-01-02\n",
           "##UberVCF_paternal_Id=father\n",
           "##UberVCF_paternal_Checksum=checksum\n",
           "##UberVCF_paternal_Basename=father.calls.date.vcf.gz\n",
           "##UberVCF_paternal_Date=2014-01-03\n",
           "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"]
        
        # define what the default variant vcf line will become
        line = ["X\t15000000\t.\tA\tG\t50\tPASS\tHGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005;ClinicalFilterGeneInheritance=Monoallelic;ClinicalFilterType=single_variant;ClinicalFilterReportableHGNC=TEST\tGT:DP:INHERITANCE:INHERITANCE_GENOTYPE\t0/1:50:deNovo:1,0,0\n"]
        
        # check that a list of one variant produces the correct VCF output. Note
        # that we haven't checked against CNVs, which can change the
        # INHERITANCE_GENOTYPE flag, nor have we tested a larger list of variants
        var = (self.variants[0], ["single_variant"], ["Monoallelic"], ["TEST"])
        self.assertEqual(self.report._get_vcf_lines([var], header, provenance), vcf_lines + line)
    
    def test__get_output_line(self):
        """ check that _get_output_line() works correctly
        """
        
        var = (self.variants[0], ["single_variant"], ["Monoallelic"], ["TEST"])
        dad_aff = "0"
        mom_aff = "1"
        alt_id = "test_id"
        
        # check the output for the default variant
        expected = "child\ttest_id\tF\tX\t15000000\tTEST\tNA\tNA\tmissense_variant\tA/G\t0.0005\tMonoallelic\t1/0/0\t1\t0\tsingle_variant\tNA\tNA\n"
        self.assertEqual(self.report._get_output_line(var, dad_aff, mom_aff, alt_id), expected)
        
        # introduce additional info for the output line parsing, check the line
        # that is returned is expected
        var[0].child.info["PolyPhen"] = "probably_damaging(0.99)"
        var[0].child.info["SIFT"] = "deleterious(0)"
        var[0].child.info["ENST"] = "ENST00X"
        expected = "child\ttest_id\tF\tX\t15000000\tTEST\tNA\tENST00X\tmissense_variant,PolyPhen=probably_damaging(0.99),SIFT=deleterious(0)\tA/G\t0.0005\tMonoallelic\t1/0/0\t1\t0\tsingle_variant\tNA\tNA\n"
        self.assertEqual(self.report._get_output_line(var, dad_aff, mom_aff, alt_id), expected)
class TestReportPy(unittest.TestCase):
    """ test the Report class
    """
    def setUp(self):
        """ define a family and variant, and start the Allosomal class
        """

        # generate a test family
        child_gender = "F"
        mom_aff = "1"
        dad_aff = "1"

        self.trio = self.create_family(child_gender, mom_aff, dad_aff)

        # generate a test variant
        child_var = self.create_snv(child_gender, "0/1")
        mom_var = self.create_snv("F", "0/0")
        dad_var = self.create_snv("M", "0/0")

        var = TrioGenotypes(child_var)
        var.add_mother_variant(mom_var)
        var.add_father_variant(dad_var)
        self.variants = [var]

        self.report = Report(None, None, None, None)
        self.report.family = self.trio
        # self.report.tags_dict = tags

    def create_snv(self, gender, genotype):
        """ create a default variant
        """

        chrom = "X"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        qual = "50"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005"
        format_keys = "GT:DP"
        sample_values = genotype + ":50"

        var.vcf_line = [
            chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys,
            sample_values
        ]

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()

        return var

    def create_family(self, child_gender, mom_aff, dad_aff):
        """ create a default family, with optional gender and parental statuses
        """

        fam = Family("test")
        fam.add_child("child", "child_vcf", "2", child_gender)
        fam.add_mother("mother", "mother_vcf", mom_aff, "2")
        fam.add_father("father", "father_vcf", dad_aff, "1")
        fam.set_child()

        return fam

    def test__get_provenance(self):
        """ check that _get_provenance() works correctly
        """

        prov = ["checksum", "sample.calls.date.vcf.gz", "2014-01-01"]
        member = "proband"

        self.assertEqual(self.report._get_provenance(prov, member), \
            ["##UberVCF_proband_Id=sample\n", \
            "##UberVCF_proband_Checksum=checksum\n", \
            "##UberVCF_proband_Basename=sample.calls.date.vcf.gz\n", \
            "##UberVCF_proband_Date=2014-01-01\n"])

    def test__get_vcf_export_path(self):
        """ check that _get_vcf_export_path() works correctly
        """

        # use a folder to place the VCFG file in, which means we join the
        # proband ID to get a full path
        self.report.export_vcf = os.getcwd()
        self.assertEqual(self.report._get_vcf_export_path(),
                         os.path.join(os.getcwd(), "child.vcf.gz"))

        # define an un-uable directory, to raise an error
        self.report.export_vcf = os.getcwd() + "asjhfgasjhfg"
        self.assertRaises(ValueError, self.report._get_vcf_export_path)

        # define a specific path for a VCF file, which is returned directly
        self.report.export_vcf = os.path.join(os.getcwd(), "sample_id.vcf.gz")
        self.assertEqual(self.report._get_vcf_export_path(),
                         self.report.export_vcf)

    def test__make_vcf_header(self):
        """ check that _make_vcf_header() works correctly
        """

        # define the intial header lines
        header = [
            "####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"
        ]

        # define the VCF provenances
        provenance = [("checksum", "proband.calls.date.vcf.gz", "2014-01-01"),
                      ("checksum", "mother.calls.date.vcf.gz", "2014-01-02"),
                      ("checksum", "father.calls.date.vcf.gz", "2014-01-03")]

        processed_header = [
            "####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            '##INFO=<ID=ClinicalFilterType,Number=.,Type=String,Description="The type of clinical filter that passed this variant.">\n',
            '##INFO=<ID=ClinicalFilterGeneInheritance,Number=.,Type=String,Description="The inheritance mode (Monoallelic, Biallelic etc) under which the variant was found.">\n',
            '##FORMAT=<ID=INHERITANCE_GENOTYPE,Number=.,Type=String,Description="The 012 coded genotypes for a trio (child, mother, father).">\n',
            '##FORMAT=<ID=INHERITANCE,Number=.,Type=String,Description="The inheritance of the variant in the trio (biparental, paternal, maternal, deNovo).">\n',
            "##ClinicalFilterRunDate={0}\n".format(
                datetime.date.today()), "##ClinicalFilterVersion=XXX\n",
            "##ClinicalFilterHistory=single_variant,compound_het\n",
            "##UberVCF_proband_Id=proband\n",
            "##UberVCF_proband_Checksum=checksum\n",
            "##UberVCF_proband_Basename=proband.calls.date.vcf.gz\n",
            "##UberVCF_proband_Date=2014-01-01\n",
            "##UberVCF_maternal_Id=mother\n",
            "##UberVCF_maternal_Checksum=checksum\n",
            "##UberVCF_maternal_Basename=mother.calls.date.vcf.gz\n",
            "##UberVCF_maternal_Date=2014-01-02\n",
            "##UberVCF_paternal_Id=father\n",
            "##UberVCF_paternal_Checksum=checksum\n",
            "##UberVCF_paternal_Basename=father.calls.date.vcf.gz\n",
            "##UberVCF_paternal_Date=2014-01-03\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"
        ]

        # check that the standard function returns the expected value. Note that
        # I haven't checked the output if self.known_genes_date is not None, nor
        # have I checked if the _clinicalFilterVersion is available
        self.assertEqual(self.report._make_vcf_header(header, provenance),
                         processed_header)

    def test__get_parental_inheritance(self):
        """ check that _get_parental_inheritance() works correctly
        """

        var = self.variants[0]

        # check for the default genotypes
        self.assertEqual(self.report._get_parental_inheritance(var), "deNovo")

        # check when only the mother is non-ref
        var.mother.genotype = 1
        self.assertEqual(self.report._get_parental_inheritance(var),
                         "maternal")

        # check when both parents are non-ref
        var.father.genotype = 1
        self.assertEqual(self.report._get_parental_inheritance(var),
                         "biparental")

        # check when only the father is non-ref
        var.mother.genotype = 0
        self.assertEqual(self.report._get_parental_inheritance(var),
                         "paternal")

        # check when the proband lacks parental information
        self.report.family.father = None
        self.report.family.mother = None
        self.assertEqual(self.report._get_parental_inheritance(var), "unknown")

    def test__get_vcf_lines(self):
        """ check that _get_vcf_lines() works correctly
        """

        # define the intial header lines
        header = [
            "####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"
        ]

        # define the VCF provenances
        provenance = [("checksum", "proband.calls.date.vcf.gz", "2014-01-01"),
                      ("checksum", "mother.calls.date.vcf.gz", "2014-01-02"),
                      ("checksum", "father.calls.date.vcf.gz", "2014-01-03")]

        # define what the header will become
        vcf_lines = [
            "####fileformat=VCFv4.1\n",
            "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n",
            '##INFO=<ID=ClinicalFilterType,Number=.,Type=String,Description="The type of clinical filter that passed this variant.">\n',
            '##INFO=<ID=ClinicalFilterGeneInheritance,Number=.,Type=String,Description="The inheritance mode (Monoallelic, Biallelic etc) under which the variant was found.">\n',
            '##FORMAT=<ID=INHERITANCE_GENOTYPE,Number=.,Type=String,Description="The 012 coded genotypes for a trio (child, mother, father).">\n',
            '##FORMAT=<ID=INHERITANCE,Number=.,Type=String,Description="The inheritance of the variant in the trio (biparental, paternal, maternal, deNovo).">\n',
            "##ClinicalFilterRunDate={0}\n".format(
                datetime.date.today()), "##ClinicalFilterVersion=XXX\n",
            "##ClinicalFilterHistory=single_variant,compound_het\n",
            "##UberVCF_proband_Id=proband\n",
            "##UberVCF_proband_Checksum=checksum\n",
            "##UberVCF_proband_Basename=proband.calls.date.vcf.gz\n",
            "##UberVCF_proband_Date=2014-01-01\n",
            "##UberVCF_maternal_Id=mother\n",
            "##UberVCF_maternal_Checksum=checksum\n",
            "##UberVCF_maternal_Basename=mother.calls.date.vcf.gz\n",
            "##UberVCF_maternal_Date=2014-01-02\n",
            "##UberVCF_paternal_Id=father\n",
            "##UberVCF_paternal_Checksum=checksum\n",
            "##UberVCF_paternal_Basename=father.calls.date.vcf.gz\n",
            "##UberVCF_paternal_Date=2014-01-03\n",
            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\sample_id\n"
        ]

        # define what the default variant vcf line will become
        line = [
            "X\t15000000\t.\tA\tG\t50\tPASS\tHGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005;ClinicalFilterGeneInheritance=Monoallelic;ClinicalFilterType=single_variant\tGT:DP:INHERITANCE:INHERITANCE_GENOTYPE\t0/1:50:deNovo:1,0,0\n"
        ]

        # check that a list of one variant produces the correct VCF output. Note
        # that we haven't checked against CNVs, which can change the
        # INHERITANCE_GENOTYPE flag, nor have we tested a larger list of variants
        var = (self.variants[0], "single_variant", "Monoallelic")
        self.assertEqual(self.report._get_vcf_lines([var], header, provenance),
                         vcf_lines + line)

    def test__get_output_line(self):
        """ check that _get_output_line() works correctly
        """

        var = (self.variants[0], "single_variant", "Monoallelic")
        dad_aff = "0"
        mom_aff = "1"
        alt_id = "test_id"

        # check the output for the default variant
        expected = "child\ttest_id\tF\tX\t15000000\tTEST\tNA\tNA\tmissense_variant\tA/G\t0.0005\tMonoallelic\t1/0/0\t1\t0\tsingle_variant\n"
        self.assertEqual(
            self.report._get_output_line(var, dad_aff, mom_aff, alt_id),
            expected)

        # introduce additional info for the output line parsing, check the line
        # that is returned is expected
        var[0].child.info["PolyPhen"] = "probably_damaging(0.99)"
        var[0].child.info["SIFT"] = "deleterious(0)"
        var[0].child.info["ENST"] = "ENST00X"
        expected = "child\ttest_id\tF\tX\t15000000\tTEST\tNA\tENST00X\tmissense_variant,PolyPhen=probably_damaging(0.99),SIFT=deleterious(0)\tA/G\t0.0005\tMonoallelic\t1/0/0\t1\t0\tsingle_variant\n"
        self.assertEqual(
            self.report._get_output_line(var, dad_aff, mom_aff, alt_id),
            expected)