def load_json(json) -> GraphContainer: """ Construct graph object from JSON representation :param json: Dictionary of JSON file contents """ graph = GraphContainer() for node in json["nodes"]: seqs = node.get("sequences", ()) if "reference" in node: chrom, start, end = parse_region(node["reference"]) graph.add_refNode(chrom, start, end, seqs, node["name"]) elif "position" in node: chrom, start, end = parse_region(node["position"]) graph.add_altNode(chrom, start, end, node["sequence"], seqs, node["name"]) else: graph.nodes[node["name"]] = node for edge in json["edges"]: seqs = edge.get("sequences", ()) graph.add_edge(graph.nodes[edge["from"]], graph.nodes[edge["to"]], seqs) graph.name = json["model_name"] graph.paths = json.get("paths", []) graph.target_regions = json.get("target_regions", []) graph.check() return graph
def add_reference_information(paragraph_dict, reference_fasta): """ Adds reference sequence information to reference nodes """ fasta = pysam.FastaFile(reference_fasta) for n in paragraph_dict["nodes"]: if "reference" in n: chrom, start, end = parse_region(n["reference"]) n["reference_sequence"] = fasta.fetch(chrom, start - 1, end).upper()
def _parse_breakend(self, alt): """ Parse remote breakend info from BND VCF record :param record: VCF record :param alt: Alt allele from VCF record (BND) :return Inserted sequence and position of first base after remote breakend """ # We only support forward strand breakends. be_match = re.match(r'([ACGTNXacgtnx]+)([\[\]])([^\[\]]+)([\[\]])', alt) if not be_match: raise Exception("Unsupported breakend ALT: %s" % alt) ins_sequence = be_match.group(1) be_direction1 = be_match.group(2) be_pos = be_match.group(3) be_direction2 = be_match.group(4) be_chrom, be_start, be_end = parse_region(be_pos) if be_direction1 != "[" or be_direction2 != "[": raise Exception("Reverse-comp breakends are not supported.") assert not be_end if be_chrom != self.chrom: raise Exception("Breakends across chromosomes are not supported.") return ins_sequence, be_start
def run_vcf2paragraph(event_and_args): """ run vcf2paragraph for one single variant """ event = event_and_args[0] params = event_and_args[1] tempfiles = [] result = {} try: logging.debug("Converting: %s", str(event)) result["graph"] = convert_vcf( event, params["reference"], None, ref_node_padding=params["read_length"], ref_node_max_length=params["max_ref_node_length"], allele_graph=params["graph_type"] == "alleles", alt_splitting=params["alt_splitting"], alt_paths=params["alt_paths"]) chrom = None start = None end = None if "vcf_records" in result["graph"]: for r in result["graph"]["vcf_records"]: if chrom is None: chrom = r["chrom"] else: assert chrom == r["chrom"] if start is None: start = r["pos"] else: start = min(start, r["pos"]) if end is None: end = r["end"] else: end = max(end, r["end"]) else: for tr in result["graph"]["target_regions"]: c, s, e = parse_region(tr) if chrom is None: chrom = c else: assert chrom == c if start is None: start = s else: start = min(start, s) if end is None: end = e else: end = max(end, e) assert chrom is not None assert start is not None assert end is not None result["chrom"] = chrom result["start"] = start result["end"] = end except Exception: # pylint: disable=broad-except logging.error("Exception when running vcf2paragraph on %s", str(event)) traceback.print_exc(file=LoggingWriter(logging.ERROR)) raise finally: for x in tempfiles: try: os.remove(x) except: # pylint: disable=bare-except pass if params["retrieve_reference_sequence"]: add_reference_information(result["graph"], params["reference"]) return result