def __init__(self, config): Gel2Decipher._sanity_checks(config) self.gel_user = config['gel_user'] self.gel_password = config['gel_password'] self.cipapi_url = config['cipapi_url'] self.cipapi = CipApiClient(self.cipapi_url, user=self.gel_user, password=self.gel_password) self.cva_url = config['cva_url'] self.cva = CvaClient(self.cva_url, user=self.gel_user, password=self.gel_password) self.report_events_client = self.cva.report_events() self.decipher_system_key = config['decipher_system_key'] self.decipher_user_key = config['decipher_user_key'] self.decipher_url = config['decipher_url'] self.send_absent_phenotypes = config['send_absent_phenotypes'] self.decipher = DecipherClient(self.decipher_url, self.decipher_system_key, self.decipher_user_key)
def setUpClass(cls): logging.basicConfig(level=logging.DEBUG) if cls.GEL_PASSWORD is None: cls.GEL_PASSWORD = "" if not cls.CVA_URL_BASE or not cls.GEL_USER: logging.error("Please set the configuration environment variables: CVA_URL, GEL_USER, GEL_PASSWORD") raise ValueError("Missing config") logging.info("Running tests against {}".format(cls.CVA_URL_BASE)) cls.cva = CvaClient(cls.CVA_URL_BASE, user=cls.GEL_USER, password=cls.GEL_PASSWORD, retries=10) cls.report_events = cls.cva.report_events() cls.entities = cls.cva.entities() cls.cases = cls.cva.cases() cls.variants = cls.cva.variants() cls.data_intake = cls.cva.data_intake() # fetch 50 cases to run tests on cls.random_cases = list(cls.cases.get_cases( max_results=50, program=Program.rare_disease, assembly=Assembly.GRCh38, filter='countInterpretationServices.exomiser gt 0 and countTiered gt 100', hasClinicalData=True)) cls.random_closed_cases = list(cls.cases.get_cases( max_results=50, program=Program.rare_disease, assembly=Assembly.GRCh38, filter='countInterpretationServices.exomiser gt 0 and countTiered gt 100', hasClinicalData=True, hasPositiveDx=True))
from pyark.cva_client import CvaClient from protocols.reports_5_0_0 import Tier, Program, Assembly import getpass from itertools import islice cva = CvaClient('https://bio-prod-cva.gel.zone', user='******', password=getpass.getpass()) # report_events = client.report_events().get_report_events(panelName='cakut') # [re.caseId for re in islice(report_events, 5)] # report_events = client.report_events().get_report_events(caseId='2915') # sum(1 for _ in report_events) # report_events = client.report_events().get_report_events(caseId='2915') # tiers = set([re.reportEvent.tier for re in report_events]) tiers # client.cases().get_variants_by_panel('cakut', hasTiered=True, tiers=["TIER1"]) report_events = cva.report_events() entities = cva.entities() cases = cva.cases() variants = cva.variants() from pyark.cva_client import CvaClient from protocols.reports_6_0_0 import Program, Assembly import pandas as pd import itertools
def test_returns_empty_if_no_results(self, post, get): self._mock_panels_to_return(get, post, 200) self.assertEqual(0, CvaClient("https://nowhere.invalid", user='******', password='******').entities().get_all_panels().size)
def test_errors_if_5xx(self, post, get): self._mock_panels_to_return(get, post, 500) self.assertRaises( CvaServerError, lambda: CvaClient("https://nowhere.invalid", user='******', password='******').entities().get_all_panels() )
def test_errors_if_cva_down(self): self.assertRaises( ConnectionError, lambda: CvaClient("https://nowhere.invalid", user='******', password='******', retries=2).entities().get_all_panels() )
# sets logging logger = logging.getLogger() formatter = logging.Formatter('%(message)s') logger.setLevel(logging.INFO) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(formatter) logger.addHandler(consoleHandler) cva_url = os.environ.get("CVA_URL_BASE", "http://localhost:8090") gel_user = os.environ.get("CVA_USER", None) if not gel_user: gel_user = getpass.getpass("User:"******"CVA_PASSWORD", None) if not gel_password: gel_password = getpass.getpass("Password:"******"pyark version {}".format(pyark.VERSION)) def get_size(obj, seen=None): """Recursively finds size of objects""" size = sys.getsizeof(obj) if seen is None: seen = set() obj_id = id(obj) if obj_id in seen:
class Gel2Decipher(object): def __init__(self, config): Gel2Decipher._sanity_checks(config) self.gel_user = config['gel_user'] self.gel_password = config['gel_password'] self.cipapi_url = config['cipapi_url'] self.cipapi = CipApiClient(self.cipapi_url, user=self.gel_user, password=self.gel_password) self.cva_url = config['cva_url'] self.cva = CvaClient(self.cva_url, user=self.gel_user, password=self.gel_password) self.report_events_client = self.cva.report_events() self.decipher_system_key = config['decipher_system_key'] self.decipher_user_key = config['decipher_user_key'] self.decipher_url = config['decipher_url'] self.send_absent_phenotypes = config['send_absent_phenotypes'] self.decipher = DecipherClient(self.decipher_url, self.decipher_system_key, self.decipher_user_key) @staticmethod def _sanity_checks(config): assert config is not None, "Empty config!" # TODO! def _get_pedigree(self, case_id, case_version): interpretation_request_json = self.cipapi.get_interpretation_request( case_id, case_version) pedigree = self.cipapi.get_pedigree(interpretation_request_json) return pedigree @staticmethod def _get_person_id_by_relation(persons, relation): person_id = None for person in persons: if person['relation'] == relation: person_id = person['person_id'] return person_id @staticmethod def _get_proband_observed_variant(observed_variants, proband_id, obfustcated=True): """ :type observed_variants: list :type proband_id: str :rtype: ObservedVariant """ proband = None if obfustcated: proband_id = gel2decipher.hash_id(proband_id) for observed_variant in observed_variants: # type: ObservedVariant variant_call = observed_variant.variantCall # type: VariantCall ov_participant_id = variant_call.participantId if obfustcated: ov_participant_id = gel2decipher.hash_id(ov_participant_id) if proband_id == ov_participant_id: proband = observed_variant break return proband @staticmethod def _get_variant_representation_grch37(observed_variant): """ :type observed_variant: ObservedVariant :rtype: VariantAvro """ grch37_variant = None for variant_representation in observed_variant.variant.variants: # type: VariantRepresentation if variant_representation.assembly == Assembly.GRCh37: grch37_variant = variant_representation.variant # type: VariantAvro break return grch37_variant @staticmethod def _select_consequence_type(so_terms, gene_symbols, tier): """ :type so_terms: list :type gene_symbols: list :type tier: Tier :rtype: ConsequenceType """ # filter consequence types by the provided list of gene symbols filtered_cts = [ct for ct in so_terms if ct.geneName in gene_symbols] if len(filtered_cts) == 0: filtered_cts = so_terms # filter consequence types by SO terms so_terms = { Tier.TIER1: [ "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0001582" ], Tier.TIER2: [ "SO:0001889", "SO:0001821", "SO:0001822", "SO:0001583", "SO:0001630", "SO:0001626" ] } filtered_cts_by_so = [] for ct in filtered_cts: # type: ConsequenceType sos = set([so.accession for so in ct.sequenceOntologyTerms ]) # type: SequenceOntologyTerm matching_sos = sos.intersection(so_terms[tier]) if len(matching_sos) > 0: filtered_cts_by_so.append(ct) # filter consequence types by biotypes biotypes = [ "IG_C_gene", "IG_D_gene", " IG_J_gene", "IG_V_gene", "IG_V_gene", "protein_coding", "nonsense_mediated_decay", "non_stop_decay", "TR_C_gene", "TR_D_gene", "TR_J_gene", "TR_V_gene" ] filtered_cts_by_bt = [ ct for ct in filtered_cts_by_so if ct.biotype in biotypes ] # type: ConsequenceType # filter consequence types by flags transcript_flags = set(["basic"]) filtered_cts_by_flag = [ ct for ct in filtered_cts_by_bt if len(transcript_flags.intersection(ct.transcriptAnnotationFlags)) > 0 ] # type: ConsequenceType # sort by transcript so the selection is deterministic filtered_cts_by_flag.sort(key=lambda x: x.ensemblTranscriptId) consequence_type = filtered_cts_by_flag[0] # type: ConsequenceType logging.info("The selected transcript is {} at gene {}".format( consequence_type.ensemblTranscriptId, consequence_type.geneName)) return consequence_type def _send_pedigree_member_phenotypes(self, pedigree_member, decipher_person_id): """ :type pedigree_member: PedigreeMember :type decipher_person_id: str :return: """ accepted_phenotypes = [] rejected_phenotypes = [] decipher_phenotype_ids = [] for phenotype in pedigree_member.hpoTermList: # type: HpoTerm # avoid sending unknown presence phenotypes if phenotype.termPresence == TernaryOption.unknown: logging.warn("Skipping phenotype {}".format(phenotype.term)) continue # optionally send absent phenotypes elif not self.send_absent_phenotypes and phenotype.termPresence == TernaryOption.no: logging.warn("Skipping phenotype {}".format(phenotype.term)) continue else: dec_phenotype = gel2decipher.map_phenotype( phenotype, decipher_person_id) try: phenotype_id = self.decipher.create_phenotypes( [dec_phenotype], decipher_person_id)[0] decipher_phenotype_ids.append(phenotype_id) accepted_phenotypes.append(phenotype) except HTTPError: logging.warning("Rejected phenotype: {}".format( phenotype.toJsonString())) rejected_phenotypes.append(phenotype) return accepted_phenotypes, rejected_phenotypes, decipher_phenotype_ids def send_case(self, case_id, case_version): # fetch pedigree and get proband case = self.cipapi.get_case(case_id, case_version) pedigree = case.get_pedigree() proband = pedigree.get_proband() father = pedigree.get_father(proband) # type: GelRDParticipant mother = pedigree.get_mother(proband) # type: GelRDParticipant logging.info("The proband is {}".format(proband.participantId)) # fetch variants from CVA report_events_iterator = self.report_events_client.get_report_events({ 'parent_id': case_id, 'parent_version': case_version, 're_type': 'tiered', 'tier': 'TIER1,TIER2', 'vcf_format': True, 'full_populate': True }) # filters down the variants accepted_variants = [] for report_event in report_events_iterator: # type: ReportEventEntry # selects the observed variant for the proband proband_ov = Gel2Decipher._get_proband_observed_variant( report_event.observedVariants, proband_id=proband.participantId) # type: ObservedVariant if proband_ov is None: raise UnacceptableCase( "There is a report event with no observed variant for the proband" ) variant_call = proband_ov.variantCall # type: VariantCall # selects the variant representation for assembly GRCh37 grch37_variant = Gel2Decipher._get_variant_representation_grch37( proband_ov) if grch37_variant is None: logging.warning( "The report event does not have coordinates in GRCh37") continue else: accepted_variants.append( (grch37_variant, report_event, variant_call)) if len(accepted_variants) == 0: message = "The case id={} and version={} has no variants".format( case_id, case_version) logging.warning(message) raise UnacceptableCase(message) # create patient for proband in Decipher try: patient_id = self.decipher.create_patients([ gel2decipher.map_pedigree_member_to_patient( proband, self.decipher.project_id, self.decipher.user_id) ])[0]['patient_id'] except HTTPError: # the patient already exists?? raise UnacceptableCase( "Patient registration failed, don't know how to continue") # create phenotypes dec_persons = self.decipher.get_persons_by_patient(patient_id) logging.info("The persons: " + str(dec_persons)) dec_proband = Gel2Decipher._get_person_id_by_relation( dec_persons, 'patient') dec_mother = Gel2Decipher._get_person_id_by_relation( dec_persons, 'mother') dec_father = Gel2Decipher._get_person_id_by_relation( dec_persons, 'father') self._send_pedigree_member_phenotypes(proband, dec_proband) # updates mother and father affection status because they are created automatically if mother is not None: self.decipher.update_person( gel2decipher.map_affection_status(mother.affectionStatus), dec_mother) self._send_pedigree_member_phenotypes(mother, dec_mother) if father is not None: self.decipher.update_person( gel2decipher.map_affection_status(father.affectionStatus), dec_father) self._send_pedigree_member_phenotypes(father, dec_father) for member in pedigree.members: # type: GelRDParticipant logging.info("Member of family: {}".format(member.pedigreeId)) if member.pedigreeId not in [ proband.pedigreeId, father.pedigreeId, mother.pedigreeId ]: dec_person = self.decipher.create_persons([ gel2decipher.map_pedigree_member_to_person( member, patient_id, pedigree.get_relationship(member.pedigreeId, proband.pedigreeId)) ], patient_id)[0] self._send_pedigree_member_phenotypes(member, dec_person) # push the variants to Decipher unique_variants = {} for variant, report_event, variant_call in accepted_variants: # type: (VariantAvro, ReportEventEntry) annotation = variant.annotation # type: VariantAnnotation gene_symbols = [ x.geneSymbol for x in report_event.reportEvent.genomicEntities ] consequence_type = Gel2Decipher._select_consequence_type( annotation.consequenceTypes, gene_symbols, report_event.reportEvent.tier) # type: ConsequenceType # builds the variant in decipher model dec_variant = gel2decipher.map_report_event( report_event.reportEvent, variant, variant_call, consequence_type, patient_id) uid = "{}:{}:{}:{}".format(dec_variant.chr, dec_variant.start, dec_variant.ref_allele, dec_variant.alt_allele) if uid not in unique_variants: unique_variants[uid] = dec_variant # NOTE: this removes the duplicated variants from composite heterozygous report events self.decipher.create_snvs(unique_variants.values(), patient_id) return patient_id