def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") parser.add_argument('-a', "--assembly", default="GRCh38") args = parser.parse_args() printHeader() inputBuffer = "" with open(args.clinVarXmlFilename) as inputFile: inClinVarSet = False for line in inputFile: #print "inClinVarSet", inClinVarSet, " input:", line.rstrip() if "<ClinVarSet" in line: inHeader = False inputBuffer = line inClinVarSet = True elif "</ClinVarSet>" in line: inputBuffer += line inClinVarSet = False cvs = ET.fromstring(inputBuffer) if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) processSubmission(submissionSet, args.assembly) inputBuffer = None elif inClinVarSet: inputBuffer += line
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") parser.add_argument('-a', "--assembly", default="GRCh38") args = parser.parse_args() printHeader() inputBuffer = "" with open(args.clinVarXmlFilename) as inputFile: inClinVarSet = False for line in inputFile: if "<ClinVarSet" in line: inHeader = False inputBuffer = line inClinVarSet = True elif "</ClinVarSet>" in line: inputBuffer += line inClinVarSet = False cvs = ET.fromstring(inputBuffer) if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) processSubmission(submissionSet, args.assembly) inputBuffer = None elif inClinVarSet: inputBuffer += line
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") args = parser.parse_args() inputBuffer = "" with gzip.open(args.clinVarXmlFilename) as inputFile: inClinVarSet = False for line in inputFile: #print "inClinVarSet", inClinVarSet, " input:", line.rstrip() if "<ClinVarSet" in line: inHeader = False inputBuffer = line inClinVarSet = True elif "</ClinVarSet>" in line: inputBuffer += line inClinVarSet = False cvs = ET.fromstring(inputBuffer) if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) variant = submissionSet.referenceAssertion.variant if variant != None: if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2": print inputBuffer inputBuffer = None elif inClinVarSet: inputBuffer += line else: if len(line) > 1: print line.rstrip()
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") parser.add_argument('-a', "--assembly", default="GRCh38") args = parser.parse_args() printHeader() tree = ET.parse(args.clinVarXmlFilename) root = tree.getroot() for cvs in root.findall("ClinVarSet"): if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) ra = submissionSet.referenceAssertion for oa in submissionSet.otherAssertions.values(): submitter = oa.submitter if oa.method != "literature only" or oa.submitter == "Counsyl": if oa.origin != "somatic" and oa.clinicalSignificance != "none provided" and oa.clinicalSignificance != "not provided": variant = ra.variant hgvs = re.sub("\(" + "(BRCA[1|2])" + "\)", "", variant.name.split()[0]) proteinChange = None if variant.attribute.has_key("HGVS, protein, RefSeq"): proteinChange = variant.attribute["HGVS, protein, RefSeq"] if not re.search("^NP", hgvs): chrom = None start = None referenceAllele = None alternateAllele = None genomicCoordinate = "chrNone:None:None>None" if args.assembly in variant.coordinates: genomicData = variant.coordinates[args.assembly] chrom = genomicData.chrom start = genomicData.start referenceAllele = genomicData.referenceAllele alternateAllele = genomicData.alternateAllele genomicCoordinate = "chr%s:%s:%s>%s" % (chrom, start, referenceAllele, alternateAllele) print("\t".join((str(hgvs), str(oa.submitter), str(oa.clinicalSignificance), str(oa.dateLastUpdated), str(oa.accession), str(oa.origin), str(oa.method), genomicCoordinate, str(variant.geneSymbol), str(proteinChange) )))
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") args = parser.parse_args() tree = ET.parse(args.clinVarXmlFilename) root = tree.getroot() newRoot = ET.Element(root.tag, attrib=root.attrib) for cvs in root.findall("ClinVarSet"): if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) variant = submissionSet.referenceAssertion.variant if variant != None: if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2": newRoot.append(cvs) print(prettify(newRoot))
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") parser.add_argument( '-a', "--artifacts_dir", help='Artifacts directory with pipeline artifact files.') args = parser.parse_args() logging_level = logging.DEBUG log_file_path = args.artifacts_dir + "clinvarbrcapy.log" logging.basicConfig(filename=log_file_path, filemode="w", level=logging_level) inputBuffer = "" with gzip.open(args.clinVarXmlFilename) as inputFile: inClinVarSet = False for line in inputFile: if "<ClinVarSet" in line: inHeader = False inputBuffer = line inClinVarSet = True elif "</ClinVarSet>" in line: inputBuffer += line inClinVarSet = False cvs = ET.fromstring(inputBuffer) if clinvar.isCurrent(cvs): try: submissionSet = clinvar.clinVarSet(cvs) except AttributeError: # TODO: this is a hack to avoid errors, figure out how to correctly handle these reports. logging.debug( "AttributeError running clinvar.clinVarSet(cvs), inputBuffer: %s, cvs: %s", inputBuffer, cvs) continue variant = submissionSet.referenceAssertion.variant if variant != None: if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2": print inputBuffer inputBuffer = None elif inClinVarSet: inputBuffer += line else: if len(line) > 1: print line.rstrip()
def main(): parser = argparse.ArgumentParser() parser.add_argument("clinVarXmlFilename") parser.add_argument('-a', "--assembly", default="GRCh38") args = parser.parse_args() printHeader() tree = ET.parse(args.clinVarXmlFilename) root = tree.getroot() for cvs in root.findall("ClinVarSet"): if clinvar.isCurrent(cvs): submissionSet = clinvar.clinVarSet(cvs) ra = submissionSet.referenceAssertion for oa in submissionSet.otherAssertions.values(): submitter = oa.submitter variant = ra.variant hgvs = re.sub("\(" + "(BRCA[1|2])" + "\)", "", variant.name.split()[0]) proteinChange = None if variant.attribute.has_key("HGVS, protein, RefSeq"): proteinChange = variant.attribute["HGVS, protein, RefSeq"] chrom = None start = None referenceAllele = None alternateAllele = None if args.assembly in variant.coordinates: genomicData = variant.coordinates[args.assembly] chrom = genomicData.chrom start = genomicData.start referenceAllele = genomicData.referenceAllele alternateAllele = genomicData.alternateAllele genomicCoordinate = "chr%s:%s:%s>%s" % ( chrom, start, referenceAllele, alternateAllele) print("\t".join( (str(hgvs), oa.submitter.encode('utf-8'), str(oa.clinicalSignificance), str(oa.dateLastUpdated), str(oa.accession), str(oa.id), str(oa.origin), str(oa.method), genomicCoordinate, str(variant.geneSymbol), str(proteinChange))))