Esempio n. 1
0
    def testResultsFilterAnnAndVar(self):
        cmd = self.cmd + [
            "--input-filters-variants", self.tmp_var_filters,
            "--input-filters-annotations", self.tmp_annot_filters
        ]

        # Execute command
        subprocess.check_call(cmd, stderr=subprocess.DEVNULL)

        # Validate results
        expected = list()
        for record in self.variants:
            if record.info["is_filtered"] == 0:
                expected.append(record.id)
                if "ANN" in record.info:
                    for curr_ann in record.info["ANN"]:
                        if curr_ann["is_filtered"] == 0:
                            expected.append(curr_ann["id"])
        observed = list()
        with AnnotVCFIO(self.tmp_output) as FH_results:
            for record in FH_results:
                observed.append(record.id)
                if "ANN" in record.info:
                    for curr_ann in record.info["ANN"]:
                        observed.append(curr_ann["id"])
        self.assertEqual(expected, observed)
    def testResultsAnnotRemove(self):
        cmd = [
            "filterVCFByAnnot.py", "--mode", "remove", "--input-selected-RNA",
            self.tmp_selected_rna, "--input-variants", self.tmp_variants,
            "--output-variants", self.tmp_output
        ]

        # Execute command
        subprocess.check_call(cmd, stderr=subprocess.DEVNULL)

        # Validate results
        expected = list()
        for record in self.variants:
            if record.info["expected_filter"] == ["PASS"]:
                annot_idx = 0
                for annot in record.info["ANN"]:
                    if annot["expected_filter"] == "PASS":
                        expected.append("{}:{}:PASS".format(
                            record.id, annot_idx))
                    annot_idx += 1
        observed = list()
        with AnnotVCFIO(self.tmp_output) as FH_results:
            for record in FH_results:
                for annot_idx, annot in enumerate(record.info["ANN"]):
                    for curr_filter in sorted(annot["FILTER"].split("&")):
                        observed.append("{}:{}:{}".format(
                            record.id, annot_idx, curr_filter))
        self.assertEqual(expected, observed)
    def testResultsRecordTag(self):
        cmd = [
            "filterVCFByAnnot.py", "--mode", "tag", "--input-selected-RNA",
            self.tmp_selected_rna, "--input-variants", self.tmp_variants,
            "--output-variants", self.tmp_output
        ]

        # Execute command
        subprocess.check_call(cmd, stderr=subprocess.DEVNULL)

        # Validate results
        expected = list()
        for record in self.variants:
            for curr_filter in sorted(record.info["expected_filter"]):
                expected.append("{}:{}".format(record.id, curr_filter))
        observed = list()
        with AnnotVCFIO(self.tmp_output) as FH_results:
            for record in FH_results:
                for curr_filter in sorted(record.filter):
                    observed.append("{}:{}".format(record.id, curr_filter))
        self.assertEqual(expected, observed)
    def testResultsRecordRemove(self):
        cmd = [
            "filterVCFByAnnot.py", "--mode", "remove", "--input-selected-RNA",
            self.tmp_selected_rna, "--input-variants", self.tmp_variants,
            "--output-variants", self.tmp_output
        ]

        # Execute command
        subprocess.check_call(cmd, stderr=subprocess.DEVNULL)

        # Validate results
        expected = list()
        for record in self.variants:
            if record.info["expected_filter"] == ["PASS"]:
                expected.append(record.id)
        observed = list()
        with AnnotVCFIO(self.tmp_output) as FH_results:
            for record in FH_results:
                if record.info["expected_filter"] == ["PASS"]:
                    observed.append(record.id)
        self.assertEqual(expected, observed)
Esempio n. 5
0
    group_output.add_argument('-o',
                              '--output-fusions',
                              required=True,
                              help='Path to the output file (format: VCF).')
    args = parser.parse_args()

    # Logger
    logging.basicConfig(
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger()
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))
    log.info("Version: " + str(__version__))

    # Process
    with AnnotVCFIO(args.output_fusions, "w", args.annotation_field) as writer:
        with FusionFileReader.factory(args.input_fusions, "r",
                                      args.annotation_field,
                                      args.sample_name) as reader:
            # Header
            reader.__class__.setVCFHeader(writer, args.annotation_field)
            writer.samples = [args.sample_name]
            writer.writeHeader()
            # Records
            for first_bnd, second_bnd in reader:
                writer.write(first_bnd)
                writer.write(second_bnd)
    log.info("End of job")
Esempio n. 6
0
        required=True,
        help='The path to the file outputted file (format: JSON).')
    args = parser.parse_args()

    # Logger
    logging.basicConfig(
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Convert VCF in python dict
    json_data = list()
    with AnnotVCFIO(args.input_variants, "r", args.annotation_field) as FH_vcf:
        # Get sources IDs for VCF coming from merged sources
        id_by_src = None
        if args.merged_sources:
            SRC_id_desc = FH_vcf.info["SRC"].description.split(
                "Possible values: ")[1].replace("'", '"')
            id_by_src = json.loads(SRC_id_desc)
        # Records
        for record in FH_vcf:
            for idx_alt, alt in enumerate(record.alt):
                allele_record = getAlleleRecord(FH_vcf, record, idx_alt)
                allele_record.normalizeSingleAllele()
                curr_json = dict()
                # Coord information
                curr_json["coord"] = {
                    "region":
Esempio n. 7
0
    log.info("Command: " + " ".join(sys.argv))
    log.info("Version: " + str(__version__))

    # Get pathways
    pathways_by_gene = {}
    with open(args.input_pathways) as FH_pathways:
        for line in FH_pathways:
            fields = [elt.strip() for elt in line.split("\t")]
            for gene in fields[3:]:
                if gene not in pathways_by_gene:
                    pathways_by_gene[gene] = set()
                pathways_by_gene[gene].add(fields[1])

    # Write output
    with AnnotVCFIO(args.output_variants,
                    "w",
                    annot_field=args.annotation_field) as FH_out:
        with AnnotVCFIO(args.input_variants,
                        annot_field=args.annotation_field) as FH_in:
            # Header
            FH_out.copyHeader(FH_in)
            FH_out.ANN_titles.append("Pathways")
            FH_out.writeHeader()
            # Records
            for record in FH_in:
                for annot in record.info[FH_in.annot_field]:
                    if annot[args.gene_field] is not None and annot[
                            args.gene_field] != "":
                        pathways = set()
                        for gene in annot[args.gene_field].split(","):
                            if gene in pathways_by_gene:
    def setUp(self):
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())

        # Temporary files
        self.tmp_selected_rna = os.path.join(tmp_folder,
                                             unique_id + "_rna.tsv")
        self.tmp_variants = os.path.join(tmp_folder, unique_id + ".vcf")
        self.tmp_output = os.path.join(tmp_folder, unique_id + "_out.vcf")

        # Create RNA ref
        with open(self.tmp_selected_rna, "w") as FH_rna:
            FH_rna.write("#Gene\tTranscript\n")
            FH_rna.write("Gene_1\tENST_selected1\n")
            FH_rna.write("Gene_1\tENST_selected2\n")

        # Create VCF
        with AnnotVCFIO(self.tmp_variants, "w") as FH_var:
            FH_var.ANN_titles = [
                "Allele", "Consequence", "Feature", "EUR_AF", "gnomAD_AF",
                "expected_filter"
            ]
            FH_var.info = {
                "ANN":
                HeaderInfoAttr(
                    "ANN",
                    "Consequence annotations from Ensembl VEP. Format: Allele|Consequence|Feature|gnomAD_AF|expected_filter.",
                    type="String",
                    number="."),
                "expected_filter":
                HeaderInfoAttr("expected_filter",
                               "The expected filters.",
                               type="String",
                               number=".")
            }
            FH_var.writeHeader()
            self.variants = [
                VCFRecord(
                    "artificial_chr1", 14, "alt_00", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "PASS"
                        }],
                        "expected_filter": ["PASS"]
                    }),
                VCFRecord("artificial_chr1", 14, "alt_01", "G", ["T"], None,
                          None, {"expected_filter": ["CSQ"]}),
                VCFRecord(
                    "artificial_chr1", 14, "alt_02", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.CSQ"
                        }],
                        "expected_filter": ["CSQ"]
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_03", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.popAF"
                        }],
                        "expected_filter": ["popAF"]
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_04", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "other",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.RNA"
                        }],
                        "expected_filter": ["CSQ"]
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_05", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "G",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ"]
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_06", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "PASS"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["PASS"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_07", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_08", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.CSQ"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_09", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "other",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.RNA"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_10", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "other",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.CSQ&ANN.RNA&ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ", "popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_11", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.CSQ&ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ", "popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_12", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.CSQ&ANN.popAF"
                        }, {
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "other",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.01,
                            "expected_filter": "ANN.RNA&ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.001&0.001",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC"
                        }],
                        "expected_filter": ["CSQ", "popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_13", "G", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "T",
                            "Consequence": "synonymous_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.CSQ&ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }],
                        "expected_filter": ["CSQ", "popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_14", "G", ["GT"], None, None, {
                        "ANN": [{
                            "Allele": "GT",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }, {
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }],
                        "expected_filter": ["popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 15, "alt_15", "-", ["T"], None, None, {
                        "ANN": [{
                            "Allele": "GT",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }, {
                            "Allele": "T",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }],
                        "expected_filter": ["popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_15", "G", ["-"], None, None, {
                        "ANN": [{
                            "Allele": "-",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.popAF"
                        }, {
                            "Allele": "G",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }],
                        "expected_filter": ["popAF"],
                    }),
                VCFRecord(
                    "artificial_chr1", 14, "alt_16", "GG", ["G"], None, None, {
                        "ANN": [{
                            "Allele": "-",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }, {
                            "Allele": "G",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.01&0.01",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.popAF"
                        }, {
                            "Allele": "C",
                            "Consequence": "missense_variant",
                            "Feature": "ENST_selected1",
                            "EUR_AF": "0.05&0.05",
                            "gnomAD_AF": 0.001,
                            "expected_filter": "ANN.COLLOC&ANN.popAF"
                        }],
                        "expected_filter": ["popAF"],
                    })
            ]
            for idx, curr_var in enumerate(self.variants):
                FH_var.write(curr_var)
Esempio n. 9
0
    def setUp(self):
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())

        # Temporary files
        self.tmp_var_filters = os.path.join(tmp_folder,
                                            unique_id + "_varFilters.json")
        self.tmp_annot_filters = os.path.join(tmp_folder,
                                              unique_id + "_annFilters.json")
        self.tmp_variants = os.path.join(tmp_folder, unique_id + ".vcf")
        self.tmp_output = os.path.join(tmp_folder, unique_id + "_out.vcf")

        # Command
        self.cmd = [
            "filterAnnotVCF.py", "--input-variants", self.tmp_variants,
            "--output-variants", self.tmp_output
        ]

        # Create filters
        with open(self.tmp_var_filters, "w") as FH_filter:
            FH_filter.write("""{
    "class": "FiltersCombiner",
    "operator": "or",
    "filters": [
        {
            "class": "Filter",
            "getter": "filter",
            "action": "select",
            "aggregator": "ratio:1",
            "operator": "!=",
            "values": "CSQ"
        }, {
            "class": "Filter",
            "getter": "chrom",
            "action": "select",
            "aggregator": "nb:1",
            "operator": "==",
            "values": "artificial_chr2"
        }
    ]
}""")
        with open(self.tmp_annot_filters, "w") as FH_filter:
            FH_filter.write("""{
    "class": "Filter",
    "getter": "FILTER",
    "action": "select",
    "aggregator": "ratio:1",
    "operator": "==",
    "values": "PASS"
}""")

        # Create VCF
        with AnnotVCFIO(self.tmp_variants, "w") as FH_var:
            FH_var.ANN_titles = ["Allele", "id", "is_filtered", "FILTER"]
            FH_var.info = {
                "ANN":
                HeaderInfoAttr(
                    "ANN",
                    "Consequence annotations from Ensembl VEP. Format: Allele|id|is_filtered|FILTER.",
                    type="String",
                    number="."),
                "is_filtered":
                HeaderInfoAttr("is_filtered",
                               "The expected result.",
                               type="Integer",
                               number="1")
            }
            FH_var.writeHeader()
            self.variants = [
                VCFRecord("artificial_chr1", 10, "alt_00", "G", ["T"], None,
                          ["PASS"], {"is_filtered": 0}),
                VCFRecord("artificial_chr1", 10, "alt_01", "G", ["T"], None,
                          ["CSQ"], {"is_filtered": 1}),
                VCFRecord(
                    "artificial_chr2",
                    10,
                    "alt_02",
                    "G",
                    ["T"],
                    None,
                    ["CSQ"],
                    {
                        "is_filtered": 0,  # Proctected
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_03", "G", ["T"], None,
                    ["PASS"], {
                        "ANN": [{
                            "Allele": "T",
                            "id": "ann_00",
                            "FILTER": "PASS",
                            "is_filtered": 0
                        }],
                        "is_filtered":
                        0
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_04", "G", ["T"], None,
                    ["PASS"], {
                        "ANN": [{
                            "Allele": "C",
                            "id": "ann_01",
                            "FILTER": "ANN.COLLOC",
                            "is_filtered": 1
                        }],
                        "is_filtered":
                        0
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_05", "G", ["T"], None, ["CSQ"],
                    {
                        "ANN": [{
                            "Allele": "C",
                            "id": "ann_02",
                            "FILTER": "ANN.COLLOC",
                            "is_filtered": 1
                        }],
                        "is_filtered":
                        1
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_06", "G", ["T"], None, ["CSQ"],
                    {
                        "ANN": [{
                            "Allele": "T",
                            "id": "ann_03",
                            "FILTER": "PASS",
                            "is_filtered": 0
                        }],
                        "is_filtered":
                        1
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_07", "G", ["T"], None,
                    ["PASS"], {
                        "ANN": [
                            {
                                "Allele": "T",
                                "id": "ann_04",
                                "FILTER": "PASS",
                                "is_filtered": 0
                            },
                            {
                                "Allele": "C",
                                "id": "ann_05",
                                "FILTER": "ANN.COLLOC",
                                "is_filtered": 1
                            },
                        ],
                        "is_filtered":
                        0
                    }),
                VCFRecord(
                    "artificial_chr1", 10, "alt_08", "G", ["T"], None,
                    ["PASS"], {
                        "ANN": [
                            {
                                "Allele": "T",
                                "id": "ann_06",
                                "FILTER": "ANN.popAF",
                                "is_filtered": 1
                            },
                            {
                                "Allele": "C",
                                "id": "ann_07",
                                "FILTER": "ANN.COLLOC&ANN.popAF",
                                "is_filtered": 1
                            },
                        ],
                        "is_filtered":
                        0
                    }),
                VCFRecord(
                    "artificial_chr2",
                    10,
                    "alt_09",
                    "G",
                    ["T"],
                    None,
                    ["CSQ"],
                    {
                        "ANN": [
                            {
                                "Allele": "T",
                                "id": "ann_08",
                                "FILTER": "ANN.popAF",
                                "is_filtered": 1
                            },
                            {
                                "Allele": "C",
                                "id": "ann_09",
                                "FILTER": "ANN.COLLOC&ANN.popAF",
                                "is_filtered": 1
                            },
                        ],
                        "is_filtered":
                        0  # Protected
                    }),
                VCFRecord(
                    "artificial_chr2",
                    10,
                    "alt_10",
                    "G",
                    ["T"],
                    None,
                    ["CSQ"],
                    {
                        "ANN": [
                            {
                                "Allele": "T",
                                "id": "ann_10",
                                "FILTER": "PASS",
                                "is_filtered": 0
                            },
                            {
                                "Allele": "C",
                                "id": "ann_11",
                                "FILTER": "ANN.COLLOC&ANN.popAF",
                                "is_filtered": 1
                            },
                        ],
                        "is_filtered":
                        0  # Protected
                    })
            ]
            for idx, curr_var in enumerate(self.variants):
                FH_var.write(curr_var)
Esempio n. 10
0
    with MAFIO(args.input_variants) as FH_in:
        for record in FH_in:
            nb_records += 1
            samples.add(record["Tumor_Sample_Barcode"])
            variants_id = getName(record)
            if variants_id not in occur_by_id:
                occur_by_id[variants_id] = {"nb_expec": 0, "data": {}}
            occur_by_id[variants_id]["nb_expec"] += 1
    samples = sorted(samples)
    log.info("{} samples, {} variants and {} records.".format(
        len(samples), len(occur_by_id), nb_records))

    # Convert
    log.info("Convert to VCF.")
    with MAFIO(args.input_variants) as FH_in:
        with AnnotVCFIO(args.output_variants, "w") as FH_out:
            # Header
            FH_out.samples = samples
            FH_out.ANN_titles = [
                "Allele", "Consequence", "SYMBOL", "Feature_type", "Feature",
                "HGVSc", "HGVSp", "RefSeq"
            ]
            FH_out.info = {
                "SC":
                HeaderInfoAttr("SC",
                               "Mutated sample count",
                               type="Integer",
                               number="1"),
                "AD":
                HeaderInfoAttr("AD",
                               "Allele depth in tumor",