def write_output(self, dir_out): write_string_list_to_file(self.nsv_list, dir_out + '/' + config.NSV_LIST_FILE) # Contains traits without a mapping in Gary's xls with utilities.open_file( dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw: fdw.write('Trait\tCount\n') for trait_list in self.unmapped_traits: fdw.write( str(trait_list) + '\t' + str(self.unmapped_traits[trait_list]) + '\n') # Contains urls provided by Gary which are not yet included within EFO with utilities.open_file( dir_out + '/' + config.UNAVAILABLE_EFO_FILE_NAME, 'wt') as fdw: fdw.write('Trait\tCount\n') for clinvar_name in self.unavailable_efo: fdw.write(clinvar_name + "\n") with utilities.open_file( dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME, 'wt') as fdw: for evidence_string in self.evidence_string_list: fdw.write(json.dumps(evidence_string) + '\n') self.write_zooma_file(dir_out)
def test_existing(self): test_file_b = os.path.join(self.test_dir_b, "test.txt") with utilities.open_file(test_file_b, "wt") as f: f.write("hello world") utilities.copy_dir(self.test_dir_a, self.test_dir_b) with utilities.open_file(test_file_b, "rt") as f: contents = f.read() self.assertEqual(contents, self.test_string)
def test_nonexisting(self): utilities.copy_dir(self.test_dir_a, self.test_dir_c) with utilities.open_file(os.path.join(self.test_dir_c, "test.txt"), "rt") as f: contents = f.read() self.assertEqual(contents, self.test_string) shutil.rmtree(self.test_dir_c)
def process_consequence_type_file_tsv(snp_2_gene_filepath): consequence_type_dict = defaultdict(list) one_rs_multiple_genes = set() with utilities.open_file(snp_2_gene_filepath, "rt") as snp_2_gene_file: for line in snp_2_gene_file: line = line.rstrip() line_list = line.split("\t") if len(line_list) < 6: logger.warning( 'Skip invalid line in snp_2_gene file: {}'.format(line)) continue variant_id = line_list[0] ensembl_gene_id = line_list[2] so_term = line_list[4] if ensembl_gene_id == 'NA': logger.warning( 'Skip line with missing gene ID: {}'.format(line)) continue process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term) return consequence_type_dict, one_rs_multiple_genes
def get_test_record(): test_clinvar_record_filepath = os.path.join(os.path.dirname(__file__), 'resources', 'test_clinvar_record.json') with utilities.open_file(test_clinvar_record_filepath, "rt") as f: test_record_dict = json.load(f) test_record = clinvar.ClinvarRecord(test_record_dict) return test_record
def load_efo_mapping(efo_mapping_file): trait_2_efo = defaultdict(list) n_efo_mappings = 0 with utilities.open_file(efo_mapping_file, "rt") as f: for line in f: line = line.rstrip() if line.startswith("#") or not line: continue line_list = line.split("\t") clinvar_name = line_list[0].lower() if len(line_list) > 1: ontology_id_list = line_list[1].split("|") ontology_label_list = line_list[2].split("|") if len( line_list) > 2 else [None] * len(ontology_id_list) for ontology_id, ontology_label in zip(ontology_id_list, ontology_label_list): trait_2_efo[clinvar_name].append( (ontology_id, ontology_label)) n_efo_mappings += 1 else: raise ValueError( 'No mapping provided for trait: {}'.format(clinvar_name)) logger.info('{} EFO mappings loaded'.format(n_efo_mappings)) return trait_2_efo
def load_efo_mapping(efo_mapping_file): trait_2_efo = defaultdict(list) unavailable_efo = set() n_efo_mappings = 0 with utilities.open_file(efo_mapping_file, "rt") as f: for line in f: if line.startswith("#"): continue line_list = line.rstrip().split("\t") clinvar_name = line_list[0].lower() if len(line_list) > 1: ontology_id_list = line_list[1].split("|") ontology_label_list = line_list[2].split("|") if len( line_list) > 2 else [None] * len(ontology_id_list) for ontology_id, ontology_label in zip(ontology_id_list, ontology_label_list): trait_2_efo[clinvar_name].append( (ontology_id, ontology_label)) n_efo_mappings += 1 else: unavailable_efo.add(clinvar_name) print(str(n_efo_mappings) + ' EFO mappings loaded') print( str(len(unavailable_efo)) + ' urls without an actual valid EFO mapping') return trait_2_efo, unavailable_efo
def get_terms_from_file(terms_file_path): if terms_file_path is not None: print('Loading list of terms...') with utilities.open_file(terms_file_path, 'rt') as terms_file: terms_list = [line.rstrip() for line in terms_file] print(str(len(terms_file_path)) + ' terms found at ' + terms_file_path) else: terms_list = [] return terms_list
def setUp(self): self.test_dir_a = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_a") self.test_dir_b = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_b") self.test_dir_c = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_c") os.makedirs(self.test_dir_a) os.makedirs(self.test_dir_b) self.test_file_a = os.path.join(self.test_dir_a, "test.txt") self.test_string = "this is a test string" with utilities.open_file(self.test_file_a, "wt") as f: f.write(self.test_string)
def write_zooma_file(self, dir_out): """Write zooma records to zooma file""" with utilities.open_file(os.path.join(dir_out, config.ZOOMA_FILE_NAME), "wt") as zooma_fh: zooma_fh.write("STUDY\tBIOENTITY\tPROPERTY_TYPE\tPROPERTY_VALUE\tSEMANTIC_TAG\tANNOTATOR\tANNOTATION_DATE\n") date = strftime("%d/%m/%y %H:%M", gmtime()) for evidence_record in self.evidence_list: self.write_zooma_record_to_zooma_file(evidence_record, zooma_fh, date) for trait_name, ontology_tuple_list in self.trait_mappings.items(): self.write_extra_trait_to_zooma_file(ontology_tuple_list, trait_name, date, zooma_fh)
def write_output(self, dir_out): write_string_list_to_file(self.nsv_list, dir_out + '/' + config.NSV_LIST_FILE) # Contains traits without a mapping in Gary's xls with utilities.open_file( dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw: fdw.write('Trait\tCount\n') for trait_list in self.unmapped_traits: fdw.write( str(trait_list) + '\t' + str(self.unmapped_traits[trait_list]) + '\n') with utilities.open_file( dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME, 'wt') as fdw: for evidence_string in self.evidence_string_list: fdw.write(json.dumps(evidence_string) + '\n') self.write_zooma_file(dir_out)
def write_output(self, dir_out): write_string_list_to_file(self.nsv_list, dir_out + '/' + config.NSV_LIST_FILE) # Contains traits without a mapping in Gary's xls with utilities.open_file(dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw: fdw.write('Trait\tCount\n') for trait_list in self.unmapped_traits: fdw.write(str(trait_list) + '\t' + str(self.unmapped_traits[trait_list]) + '\n') # Contains urls provided by Gary which are not yet included within EFO with utilities.open_file(dir_out + '/' + config.UNAVAILABLE_EFO_FILE_NAME, 'wt') as fdw: fdw.write('Trait\tCount\n') for clinvar_name in self.unavailable_efo: fdw.write(clinvar_name + "\n") with utilities.open_file(dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME, 'wt') as fdw: for evidence_string in self.evidence_string_list: fdw.write(json.dumps(evidence_string) + '\n') self.write_zooma_file(dir_out)
def setUp(self): self.test_dir_a = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_a") self.test_dir_b = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_b") # self.test_dir_c = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_c") os.makedirs(self.test_dir_a) os.makedirs(self.test_dir_b) self.test_file_a = os.path.join(self.test_dir_a, "test.txt") self.test_string = "this is a test string" with utilities.open_file(self.test_file_a, "wt") as f: f.write(self.test_string)
def write_zooma_file(self, dir_out): """Write zooma records to zooma file""" with utilities.open_file(os.path.join(dir_out, config.ZOOMA_FILE_NAME), "wt") as zooma_fh: zooma_fh.write( "STUDY\tBIOENTITY\tPROPERTY_TYPE\tPROPERTY_VALUE\tSEMANTIC_TAG\tANNOTATOR\tANNOTATION_DATE\n" ) date = strftime("%d/%m/%y %H:%M", gmtime()) for evidence_record in self.evidence_list: self.write_zooma_record_to_zooma_file(evidence_record, zooma_fh, date) for trait_name, ontology_tuple_list in self.trait_mappings.items(): self.write_extra_trait_to_zooma_file(ontology_tuple_list, trait_name, date, zooma_fh)
def process_consequence_type_file_tsv(snp_2_gene_filepath): consequence_type_dict = defaultdict(list) one_rs_multiple_genes = set() with utilities.open_file(snp_2_gene_filepath, "rt") as snp_2_gene_file: for line in snp_2_gene_file: line = line.rstrip() line_list = line.split("\t") if len(line_list) < 6: continue variant_id = line_list[0] ensembl_gene_id = line_list[2] so_term = line_list[4] process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term) return consequence_type_dict, one_rs_multiple_genes
def load_efo_mapping(efo_mapping_file): trait_2_efo = defaultdict(list) unavailable_efo = set() n_efo_mappings = 0 with utilities.open_file(efo_mapping_file, "rt") as f: for line in f: if line.startswith("#"): continue line_list = line.rstrip().split("\t") clinvar_name = line_list[0].lower() if len(line_list) > 1: ontology_id_list = line_list[1].split("|") ontology_label_list = line_list[2].split("|") if len(line_list) > 2 else [None] * len(ontology_id_list) for ontology_id, ontology_label in zip(ontology_id_list, ontology_label_list): trait_2_efo[clinvar_name].append((ontology_id, ontology_label)) n_efo_mappings += 1 else: unavailable_efo.add(clinvar_name) print(str(n_efo_mappings) + ' EFO mappings loaded') print(str(len(unavailable_efo)) + ' urls without an actual valid EFO mapping') return trait_2_efo, unavailable_efo
def __iter__(self): with utilities.open_file(self.json_file, "rt") as f: for line in f: yield json.loads(line.rstrip())
def clinvar_to_evidence_strings(allowed_clinical_significance, mappings, json_file, ot_schema, output_evidence_strings): report = Report(trait_mappings=mappings.trait_2_efo) cell_recs = cellbase_records.CellbaseRecords(json_file=json_file) ot_schema_contents = json.loads(open(ot_schema).read()) output_evidence_strings_file = utilities.open_file(output_evidence_strings, 'wt') for cellbase_record in cell_recs: report.counters["record_counter"] += 1 if report.counters["record_counter"] % 1000 == 0: logger.info("{} records processed".format( report.counters["record_counter"])) n_ev_strings_per_record = 0 clinvar_record = clinvar.ClinvarRecord(cellbase_record['clinvarSet']) for clinvar_record_measure in clinvar_record.measures: report.counters["n_nsvs"] += (clinvar_record_measure.nsv_id is not None) append_nsv(report.nsv_list, clinvar_record_measure) report.counters["n_multiple_allele_origin"] += (len( clinvar_record.allele_origins) > 1) traits = create_traits(clinvar_record.traits, mappings.trait_2_efo, report) converted_allele_origins = convert_allele_origins( clinvar_record.allele_origins) for consequence_type, trait, allele_origin in itertools.product( get_consequence_types(clinvar_record_measure, mappings.consequence_type_dict), traits, converted_allele_origins): if skip_record(clinvar_record, clinvar_record_measure, consequence_type, allele_origin, allowed_clinical_significance, report): continue if allele_origin == 'germline': evidence_string = evidence_strings.CTTVGeneticsEvidenceString( clinvar_record, clinvar_record_measure, report, trait, consequence_type) elif allele_origin == 'somatic': evidence_string = evidence_strings.CTTVSomaticEvidenceString( clinvar_record, clinvar_record_measure, report, trait, consequence_type) else: raise AssertionError( 'Unknown allele_origin present in the data: {}'.format( allele_origin)) # Validate and immediately output the evidence string (not keeping everything in memory) validate_evidence_string(evidence_string, clinvar_record, trait, consequence_type.ensembl_gene_id, ot_schema_contents) output_evidence_strings_file.write( json.dumps(evidence_string) + '\n') report.evidence_string_count += 1 report.evidence_list.append([ clinvar_record.accession, clinvar_record_measure.rs_id, trait.clinvar_name, trait.ontology_id ]) report.counters["n_valid_rs_and_nsv"] += ( clinvar_record_measure.nsv_id is not None) report.traits.add(trait.ontology_id) report.remove_trait_mapping(trait.clinvar_name) report.ensembl_gene_id_uris.add( evidence_strings.get_ensembl_gene_id_uri( consequence_type.ensembl_gene_id)) n_ev_strings_per_record += 1 if n_ev_strings_per_record > 0: report.counters["n_processed_clinvar_records"] += 1 if n_ev_strings_per_record > 1: report.counters["n_multiple_evidence_strings"] += 1 output_evidence_strings_file.close() return report
def write_string_list_to_file(string_list, filename): with utilities.open_file(filename, 'wt') as out_file: out_file.write('\n'.join(string_list))
class CTTVGeneticsEvidenceString(CTTVEvidenceString): """ Class for genetics evidence string specifically. Holds information required for Open Target's evidence strings for genetic information. """ with utilities.open_file( utilities.get_resource_file(__package__, config.GEN_EV_STRING_JSON), "rt") as gen_json_file: base_json = json.load(gen_json_file) def __init__(self, clinvar_record, clinvar_record_measure, report, trait, consequence_type): a_dictionary = copy.deepcopy(self.base_json) ref_list = list( set(clinvar_record.trait_refs_list[trait.trait_counter] + clinvar_record.observed_refs_list + clinvar_record_measure.refs_list)) ref_list.sort() super().__init__(a_dictionary, clinvar_record, ref_list, consequence_type.ensembl_gene_id, report, trait) variant_type = get_cttv_variant_type(clinvar_record_measure) self.add_unique_association_field('alleleOrigin', 'germline') if clinvar_record_measure.rs_id: self.set_variant( 'http://identifiers.org/dbsnp/' + clinvar_record_measure.rs_id, variant_type) self.add_unique_association_field('variant_id', clinvar_record_measure.rs_id) elif clinvar_record_measure.nsv_id: self.set_variant( 'http://identifiers.org/dbsnp/' + clinvar_record_measure.nsv_id, variant_type) self.add_unique_association_field('variant_id', clinvar_record_measure.nsv_id) else: self.set_variant( 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession, variant_type) self.add_unique_association_field('variant_id', clinvar_record.accession) self.date = clinvar_record.date self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession self.association = clinvar_record.clinical_significance not in \ ('non-pathogenic', 'probable-non-pathogenic', 'likely benign', 'benign') self.gene_2_var_ev_codes = [ 'http://identifiers.org/eco/cttv_mapping_pipeline' ] most_severe_so_term = consequence_type.so_term if most_severe_so_term.accession is None: self.gene_2_var_func_consequence = 'http://targetvalidation.org/sequence/' + \ most_severe_so_term.so_name else: self.gene_2_var_func_consequence = 'http://purl.obolibrary.org/obo/' + \ most_severe_so_term.accession.replace(':', '_') if len(ref_list) > 0: self.set_var_2_disease_literature(ref_list) # Arbitrarily select only one reference among all self.unique_reference = ref_list[0] if clinvar_record.clinical_significance: self.clinical_significance = clinvar_record.clinical_significance @property def db_xref_url(self): if self['evidence']['gene2variant']['provenance_type']['database']['dbxref']['url'] \ == self['evidence']['variant2disease']['provenance_type']['database']['dbxref']['url']: return \ self['evidence']['variant2disease']['provenance_type']['database']['dbxref']['url'] else: raise Exception("db_xref_url attributes different") @db_xref_url.setter def db_xref_url(self, url): self['evidence']['gene2variant']['provenance_type']['database'][ 'dbxref']['url'] = url self['evidence']['variant2disease']['provenance_type']['database'][ 'dbxref']['url'] = url @property def url(self): if self['evidence']['gene2variant']['urls'][0]['url'] \ == self['evidence']['variant2disease']['urls'][0]['url']: return self['evidence']['gene2variant']['urls'][0]['url'] else: raise Exception("url attributes different") @url.setter def url(self, url): self['evidence']['gene2variant']['urls'][0]['url'] = url self['evidence']['variant2disease']['urls'][0]['url'] = url @property def gene_2_var_ev_codes(self): return self['evidence']['gene2variant']['evidence_codes'] @gene_2_var_ev_codes.setter def gene_2_var_ev_codes(self, gene_2_var_ev_codes): self['evidence']['gene2variant'][ 'evidence_codes'] = gene_2_var_ev_codes @property def gene_2_var_func_consequence(self): return self['evidence']['gene2variant']['functional_consequence'] @gene_2_var_func_consequence.setter def gene_2_var_func_consequence(self, so_term): self['evidence']['gene2variant']['functional_consequence'] = so_term def set_var_2_disease_literature(self, ref_list): self['evidence']['variant2disease']['provenance_type']['literature'] = \ {'references': [{'lit_id': reference} for reference in ref_list]} @property def association(self): if self['evidence']['gene2variant']['is_associated'] \ == self['evidence']['variant2disease']['is_associated']: return self['evidence']['gene2variant']['is_associated'] else: raise Exception("association attributes different") @association.setter def association(self, is_associated): self['evidence']['gene2variant']['is_associated'] = is_associated self['evidence']['variant2disease']['is_associated'] = is_associated def _clear_variant(self): self['variant']['id'] = [] self['variant']['type'] = [] def set_variant(self, var_id, var_type): self['variant']['id'] = var_id self['variant']['type'] = var_type @property def unique_reference(self): return self['evidence']['variant2disease'][ 'unique_experiment_reference'] @unique_reference.setter def unique_reference(self, reference): self['evidence']['variant2disease'][ 'unique_experiment_reference'] = reference @property def date(self): if self['evidence']['gene2variant']['date_asserted'] == \ self['evidence']['variant2disease']['date_asserted']: return self['evidence']['gene2variant']['date_asserted'] else: raise Exception("date attributes have different values") @date.setter def date(self, date_string): self['evidence']['gene2variant']['date_asserted'] = date_string self['evidence']['variant2disease']['date_asserted'] = date_string @property def clinical_significance(self): return self['evidence']['variant2disease']['clinical_significance'] @clinical_significance.setter def clinical_significance(self, clinical_significance): self['evidence']['variant2disease'][ 'clinical_significance'] = clinical_significance
class CTTVSomaticEvidenceString(CTTVEvidenceString): """ Class for somatic evidence string specifically. Holds information required for Open Target's evidence strings for somatic information. """ with utilities.open_file( utilities.get_resource_file(__package__, config.SOM_EV_STRING_JSON), "rt") as som_json_file: base_json = json.load(som_json_file) def __init__(self, clinvar_record, clinvar_record_measure, report, trait, consequence_type): a_dictionary = copy.deepcopy(self.base_json) ref_list = list( set(clinvar_record.trait_refs_list[trait.trait_counter] + clinvar_record.observed_refs_list + clinvar_record_measure.refs_list)) ref_list.sort() super().__init__(a_dictionary, clinvar_record, ref_list, consequence_type.ensembl_gene_id, report, trait) self.add_unique_association_field('alleleOrigin', 'somatic') if clinvar_record_measure.rs_id: self.add_unique_association_field('variant_id', clinvar_record_measure.rs_id) elif clinvar_record_measure.nsv_id: self.add_unique_association_field('variant_id', clinvar_record_measure.nsv_id) else: self.add_unique_association_field('variant_id', clinvar_record.accession) self.date = clinvar_record.date self.last_evaluated_date = clinvar_record.last_evaluated_date self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession # See https://github.com/opentargets/platform/issues/1139#issuecomment-682592678 self.association = True self.set_known_mutations(consequence_type.so_term) if len(ref_list) > 0: self.evidence_literature = ref_list if clinvar_record.clinical_significance: self.clinical_significance = process_clinical_significance( clinvar_record.clinical_significance) # Populate star rating and review status star_rating, review_status = clinvar_record.score self.clinvar_rating = (star_rating, review_status) # Populate mode of inheritance (if present) self.mode_of_inheritance = clinvar_record.mode_of_inheritance @property def db_xref_url(self): return self['evidence']['provenance_type']['database']['dbxref']['url'] @db_xref_url.setter def db_xref_url(self, url): self['evidence']['provenance_type']['database']['dbxref']['url'] = url @property def url(self): return self['evidence']['urls'][0]['url'] @url.setter def url(self, url): self['evidence']['urls'][0]['url'] = url @property def evidence_literature(self): return self['evidence']['provenance_type']['literature']['references'] @evidence_literature.setter def evidence_literature(self, ref_list): self['evidence']['provenance_type']['literature'] = \ {'references': [{'lit_id': reference} for reference in ref_list]} @property def association(self): return self['evidence']['is_associated'] @association.setter def association(self, is_associated): self['evidence']['is_associated'] = is_associated @property def date(self): return self['evidence']['date_asserted'] @date.setter def date(self, date_string): self['evidence']['date_asserted'] = date_string @property def last_evaluated_date(self): return self['evidence']['last_evaluated_date'] @last_evaluated_date.setter def last_evaluated_date(self, clinvar_last_evaluated_date): if clinvar_last_evaluated_date: self['evidence'][ 'last_evaluated_date'] = clinvar_last_evaluated_date def _clear_known_mutations(self): self['evidence']['known_mutations'] = [] def add_known_mutation(self, new_functional_consequence, so_name): new_known_mutation = \ {'functional_consequence': new_functional_consequence, 'preferred_name': so_name} self['evidence']['known_mutations'].append(new_known_mutation) def set_known_mutations(self, so_term): if so_term.accession: new_functional_consequence = \ "http://purl.obolibrary.org/obo/" + so_term.accession.replace(':', '_') else: new_functional_consequence = \ 'http://targetvalidation.org/sequence/' + so_term.so_name self.add_known_mutation(new_functional_consequence, so_term.so_name) @property def clinical_significance(self): return self['evidence']['clinical_significance'] @clinical_significance.setter def clinical_significance(self, clinical_significance): self['evidence']['clinical_significance'] = clinical_significance @property def clinvar_rating(self): return self['evidence']['clinvar_rating'] @clinvar_rating.setter def clinvar_rating(self, clinvar_rating_data): star_rating, review_status = clinvar_rating_data self['evidence']['clinvar_rating'] = { 'star_rating': star_rating, 'review_status': review_status, } @property def mode_of_inheritance(self): return self['evidence'].get('mode_of_inheritance') @mode_of_inheritance.setter def mode_of_inheritance(self, mode_of_inheritance): if mode_of_inheritance: self['evidence']['mode_of_inheritance'] = mode_of_inheritance
class CTTVSomaticEvidenceString(CTTVEvidenceString): """ Class for somatic evidence string specifically. Holds information required for Open Target's evidence strings for somatic information. """ with utilities.open_file( utilities.get_resource_file(__package__, config.SOM_EV_STRING_JSON), "rt") as som_json_file: base_json = json.load(som_json_file) def __init__(self, clinvar_record, clinvar_record_measure, report, trait, consequence_type): a_dictionary = copy.deepcopy(self.base_json) ref_list = list( set(clinvar_record.trait_refs_list[trait.trait_counter] + clinvar_record.observed_refs_list + clinvar_record_measure.refs_list)) ref_list.sort() super().__init__(a_dictionary, clinvar_record, ref_list, consequence_type.ensembl_gene_id, report, trait) self.add_unique_association_field('alleleOrigin', 'somatic') if clinvar_record_measure.rs_id: self.add_unique_association_field('variant_id', clinvar_record_measure.rs_id) elif clinvar_record_measure.nsv_id: self.add_unique_association_field('variant_id', clinvar_record_measure.nsv_id) else: self.add_unique_association_field('variant_id', clinvar_record.accession) self.date = clinvar_record.date self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession self.association = clinvar_record.clinical_significance not in \ ('non-pathogenic', 'probable-non-pathogenic', 'likely benign', 'benign') self.set_known_mutations(consequence_type.so_term) if len(ref_list) > 0: self.evidence_literature = ref_list if clinvar_record.clinical_significance: self.clinical_significance = clinvar_record.clinical_significance @property def db_xref_url(self): return self['evidence']['provenance_type']['database']['dbxref']['url'] @db_xref_url.setter def db_xref_url(self, url): self['evidence']['provenance_type']['database']['dbxref']['url'] = url @property def url(self): return self['evidence']['urls'][0]['url'] @url.setter def url(self, url): self['evidence']['urls'][0]['url'] = url @property def evidence_literature(self): return self['evidence']['provenance_type']['literature']['references'] @evidence_literature.setter def evidence_literature(self, ref_list): self['evidence']['provenance_type']['literature'] = \ {'references': [{'lit_id': reference} for reference in ref_list]} @property def association(self): return self['evidence']['is_associated'] @association.setter def association(self, is_associated): self['evidence']['is_associated'] = is_associated @property def date(self): return self['evidence']['date_asserted'] @date.setter def date(self, date_string): self['evidence']['date_asserted'] = date_string def _clear_known_mutations(self): self['evidence']['known_mutations'] = [] def add_known_mutation(self, new_functional_consequence, so_name): new_known_mutation = \ {'functional_consequence': new_functional_consequence, 'preferred_name': so_name} self['evidence']['known_mutations'].append(new_known_mutation) def set_known_mutations(self, so_term): if so_term.accession: new_functional_consequence = \ "http://purl.obolibrary.org/obo/" + so_term.accession.replace(':', '_') else: new_functional_consequence = \ 'http://targetvalidation.org/sequence/' + so_term.so_name self.add_known_mutation(new_functional_consequence, so_term.so_name) @property def clinical_significance(self): return self['evidence']['clinical_significance'] @clinical_significance.setter def clinical_significance(self, clinical_significance): self['evidence']['clinical_significance'] = clinical_significance