Esempio n. 1
0
def load_json(json) -> GraphContainer:
    """
    Construct graph object from JSON representation
    :param json: Dictionary of JSON file contents
    """
    graph = GraphContainer()
    for node in json["nodes"]:
        seqs = node.get("sequences", ())
        if "reference" in node:
            chrom, start, end = parse_region(node["reference"])
            graph.add_refNode(chrom, start, end, seqs, node["name"])
        elif "position" in node:
            chrom, start, end = parse_region(node["position"])
            graph.add_altNode(chrom, start, end, node["sequence"], seqs,
                              node["name"])
        else:
            graph.nodes[node["name"]] = node
    for edge in json["edges"]:
        seqs = edge.get("sequences", ())
        graph.add_edge(graph.nodes[edge["from"]], graph.nodes[edge["to"]],
                       seqs)
    graph.name = json["model_name"]
    graph.paths = json.get("paths", [])
    graph.target_regions = json.get("target_regions", [])
    graph.check()
    return graph
Esempio n. 2
0
def add_reference_information(paragraph_dict, reference_fasta):
    """ Adds reference sequence information to reference nodes """
    fasta = pysam.FastaFile(reference_fasta)
    for n in paragraph_dict["nodes"]:
        if "reference" in n:
            chrom, start, end = parse_region(n["reference"])
            n["reference_sequence"] = fasta.fetch(chrom, start - 1, end).upper()
Esempio n. 3
0
 def _parse_breakend(self, alt):
     """ Parse remote breakend info from BND VCF record
     :param record: VCF record
     :param alt: Alt allele from VCF record (BND)
     :return Inserted sequence and position of first base after remote breakend
     """
     # We only support forward strand breakends.
     be_match = re.match(r'([ACGTNXacgtnx]+)([\[\]])([^\[\]]+)([\[\]])',
                         alt)
     if not be_match:
         raise Exception("Unsupported breakend ALT: %s" % alt)
     ins_sequence = be_match.group(1)
     be_direction1 = be_match.group(2)
     be_pos = be_match.group(3)
     be_direction2 = be_match.group(4)
     be_chrom, be_start, be_end = parse_region(be_pos)
     if be_direction1 != "[" or be_direction2 != "[":
         raise Exception("Reverse-comp breakends are not supported.")
     assert not be_end
     if be_chrom != self.chrom:
         raise Exception("Breakends across chromosomes are not supported.")
     return ins_sequence, be_start
Esempio n. 4
0
def run_vcf2paragraph(event_and_args):
    """
    run vcf2paragraph for one single variant
    """
    event = event_and_args[0]
    params = event_and_args[1]
    tempfiles = []
    result = {}

    try:
        logging.debug("Converting: %s", str(event))

        result["graph"] = convert_vcf(
            event,
            params["reference"],
            None,
            ref_node_padding=params["read_length"],
            ref_node_max_length=params["max_ref_node_length"],
            allele_graph=params["graph_type"] == "alleles",
            alt_splitting=params["alt_splitting"],
            alt_paths=params["alt_paths"])

        chrom = None
        start = None
        end = None
        if "vcf_records" in result["graph"]:
            for r in result["graph"]["vcf_records"]:
                if chrom is None:
                    chrom = r["chrom"]
                else:
                    assert chrom == r["chrom"]
                if start is None:
                    start = r["pos"]
                else:
                    start = min(start, r["pos"])
                if end is None:
                    end = r["end"]
                else:
                    end = max(end, r["end"])
        else:
            for tr in result["graph"]["target_regions"]:
                c, s, e = parse_region(tr)
                if chrom is None:
                    chrom = c
                else:
                    assert chrom == c
                if start is None:
                    start = s
                else:
                    start = min(start, s)
                if end is None:
                    end = e
                else:
                    end = max(end, e)

        assert chrom is not None
        assert start is not None
        assert end is not None
        result["chrom"] = chrom
        result["start"] = start
        result["end"] = end
    except Exception:  # pylint: disable=broad-except
        logging.error("Exception when running vcf2paragraph on %s", str(event))
        traceback.print_exc(file=LoggingWriter(logging.ERROR))
        raise
    finally:
        for x in tempfiles:
            try:
                os.remove(x)
            except:  # pylint: disable=bare-except
                pass

    if params["retrieve_reference_sequence"]:
        add_reference_information(result["graph"], params["reference"])

    return result