コード例 #1
0
 def test_overlapping_deletion(self):
     graph = graphContainer.GraphContainer()
     n = graph.add_altNode("chr", 10, 17, "ATCGATCG")
     varDel = {"start": 2, "end": 4, "alt": ""}
     varSNV = {"start": 4, "end": 4, "alt": "C"}
     variants.add_variants(graph, {n['name']: [varDel, varSNV]})
     graphUtils.remove_empty_nodes(graph)
     nodeNames = [n['name'] for n in graph.nodes.values()]
     self.assertCountEqual(nodeNames, [
         "chr:10-11:AT", "chr:12-13:CG", "chr:14-14:A", "chr:14-14:C",
         "chr:15-17:TCG"
     ])
     left = graph.nodes["chr:10-11:AT"]
     right = graph.nodes["chr:15-17:TCG"]
     alt = graph.nodes["chr:14-14:C"]
     ref = graph.nodes["chr:14-14:A"]
     ins = graph.nodes["chr:12-13:CG"]
     self.assertTrue(graph.has_edge(left, right))
     self.assertTrue(graph.has_edge(left, ins))
     self.assertTrue(graph.has_edge(ins, ref))
     self.assertTrue(graph.has_edge(ref, right))
     self.assertTrue(graph.has_edge(ins, alt))
     self.assertTrue(graph.has_edge(alt, right))
     self.assertFalse(graph.has_edge(left, ref))
     self.assertFalse(graph.has_edge(left, alt))
     self.assertFalse(graph.has_edge(ref, ins))
コード例 #2
0
 def test_ins_end(self):
     graph = graphContainer.GraphContainer()
     r = graph.add_refNode("chr", 18, 20)
     n = graph.add_altNode("chr", 10, 17, "ATCGATCG")
     graph.add_edge(n, r, ["foo"])
     var = {"start": 8, "end": 7, "alt": "CCC"}
     variants.add_variants(graph, {n['name']: [var]})
     graphUtils.remove_empty_nodes(graph)
     left = graph.nodes["chr:10-17:ATCGATCG"]
     right = graph.nodes["ref-chr:18-20"]
     alt = graph.nodes["chr:18-17:CCC"]
     self.assertEqual(len(graph.nodes), 3)
     self.assertTrue(graph.has_edge(left, right))
     self.assertTrue(graph.has_edge(left, alt))
     self.assertTrue(graph.has_edge(alt, right))
     self.assertFalse(graph.has_edge(alt, left))
     self.assertFalse(graph.has_edge(right, alt))
     self.assertCountEqual(graph.get_edge(left['name'], right['name'])['sequences'], ["foo"])
     self.assertCountEqual(graph.get_edge(alt['name'], right['name'])['sequences'], ["foo"])
コード例 #3
0
ファイル: addVariants.py プロジェクト: zhishanchen/paragraph
def run(args):
    levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    logging.basicConfig(format='%(message)s',
                        level=levels[min(args.verbose,
                                         len(levels) - 1)])

    graphDict = load_json(args.graph)
    graph = graphUtils.load_json(graphDict)
    if args.variants:
        varJson = load_json(args.variants)
        if "variants" not in varJson:
            raise Exception("No variants in variant JSON")
        varDict = varJson["variants"]
    else:
        varDict = graphDict.get("variants", {})
        if not varDict:
            logging.warning("No variants in graph")
            print(varDict.keys())
    variants.add_variants(graph, varDict)
    graphUtils.remove_empty_nodes(graph)
    json.dump(graph.json_dict(), args.output, sort_keys=True)
コード例 #4
0
 def test_var_begin(self):
     graph = graphContainer.GraphContainer()
     r = graph.add_refNode("chr", 1, 9)
     n = graph.add_altNode("chr", 10, 17, "ATCGATCG")
     graph.add_edge(r, n, ["foo"])
     var = {"start": 0, "end": 0, "alt": "G"}
     variants.add_variants(graph, {n['name']: [var]})
     graphUtils.remove_empty_nodes(graph)
     left = graph.nodes["ref-chr:1-9"]
     right = graph.nodes["chr:11-17:TCGATCG"]
     ref = graph.nodes["chr:10-10:A"]
     alt = graph.nodes["chr:10-10:G"]
     self.assertEqual(len(graph.nodes), 4)
     self.assertTrue(graph.has_edge(left, ref))
     self.assertTrue(graph.has_edge(left, alt))
     self.assertTrue(graph.has_edge(ref, right))
     self.assertTrue(graph.has_edge(alt, right))
     self.assertFalse(graph.has_edge(left, right))
     self.assertFalse(graph.has_edge(ref, alt))
     self.assertCountEqual(graph.get_edge(left['name'], ref['name'])['sequences'], ["foo"])
     self.assertCountEqual(graph.get_edge(left['name'], alt['name'])['sequences'], ["foo"])
コード例 #5
0
ファイル: __init__.py プロジェクト: pkrusche/paragraph
def convert_vcf(vcf,
                ref,
                target_regions=None,
                ref_node_padding=150,
                ref_node_max_length=1000,
                allele_graph=False,
                simplify=True,
                alt_paths=False,
                alt_splitting=False):
    """
    Convert a single VCF file to a graph dictionary
    :param vcf: file name of the VCF file
    :param ref: reference FASTA file name
    :param target_regions: target region list
    :param ref_node_padding: padding / read length
    :param ref_node_max_length: maximum length before splitting a reference node
    :param allele_graph: add edges between any compatible allele pair, not just haplotypes from input
    :param simplify: simplify the graph
    :param alt_paths: Add all possible non-reference paths to the graph
    :param alt_splitting: also split long alt nodes (e.g. long insertions)
    :return: dictionary containing JSON graph
    """
    graph = GraphContainer("Graph from %s" % vcf)
    indexed_vcf = tempfile.NamedTemporaryFile(delete=False, suffix=".vcf.gz")
    try:
        indexed_vcf.close()
        # noinspection PyUnresolvedReferences
        pysam.bcftools.view(vcf, "-o", indexed_vcf.name, "-O", "z", catch_stdout=False)  # pylint: disable=no-member
        # noinspection PyUnresolvedReferences
        pysam.bcftools.index(indexed_vcf.name)  # pylint: disable=no-member

        regions = map(parse_region, target_regions) if target_regions else [(None,)*3]
        for (chrom, start, end) in regions:
            if chrom is not None:
                logging.info(f"Starting work on region: {chrom}:{start}-{end}")
            try:
                vcfGraph = VCFGraph.create_from_vcf(
                    ref, indexed_vcf.name, chrom, start, end, ref_node_padding, allele_graph)
            except NoVCFRecordsException:
                logging.info(f"Region {chrom}:{start}-{end} has no VCF records, skipping.")
                continue
            logging.info(f"CONSTRUCTED VCF GRAPH:\n{str(vcfGraph)}")
            chromGraph = vcfGraph.get_graph(allele_graph)
            if ref_node_max_length:
                graphUtils.split_ref_nodes(chromGraph, ref_node_max_length, ref_node_padding)
                if alt_splitting:
                    graphUtils.split_alt_nodes(chromGraph, ref_node_max_length, ref_node_padding)

            if simplify:
                graphUtils.remove_empty_nodes(chromGraph)
                graphUtils.combine_nodes(chromGraph)
                # Disable edge label simplification for now. May use node-label short-cut later
                # graphUtils.remove_redundant_edge_labels(graph)
            chromGraph.check()

            graphUtils.add_graph(graph, chromGraph)
    finally:
        os.remove(indexed_vcf.name)

    graph.target_regions = target_regions or graph.get_reference_regions()
    graphUtils.add_source_sink(graph)
    graphUtils.add_ref_path(graph)
    if alt_paths:
        graphUtils.add_alt_paths(graph)
    graph.check()
    return graph.json_dict()