コード例 #1
0
    def write_output(self, dir_out):
        write_string_list_to_file(self.nsv_list,
                                  dir_out + '/' + config.NSV_LIST_FILE)

        # Contains traits without a mapping in Gary's xls
        with utilities.open_file(
                dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw:
            fdw.write('Trait\tCount\n')
            for trait_list in self.unmapped_traits:
                fdw.write(
                    str(trait_list) + '\t' +
                    str(self.unmapped_traits[trait_list]) + '\n')

        # Contains urls provided by Gary which are not yet included within EFO
        with utilities.open_file(
                dir_out + '/' + config.UNAVAILABLE_EFO_FILE_NAME, 'wt') as fdw:
            fdw.write('Trait\tCount\n')
            for clinvar_name in self.unavailable_efo:
                fdw.write(clinvar_name + "\n")

        with utilities.open_file(
                dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME,
                'wt') as fdw:
            for evidence_string in self.evidence_string_list:
                fdw.write(json.dumps(evidence_string) + '\n')

        self.write_zooma_file(dir_out)
コード例 #2
0
 def test_existing(self):
     test_file_b = os.path.join(self.test_dir_b, "test.txt")
     with utilities.open_file(test_file_b, "wt") as f:
         f.write("hello world")
     utilities.copy_dir(self.test_dir_a, self.test_dir_b)
     with utilities.open_file(test_file_b, "rt") as f:
         contents = f.read()
     self.assertEqual(contents, self.test_string)
コード例 #3
0
 def test_existing(self):
     test_file_b = os.path.join(self.test_dir_b, "test.txt")
     with utilities.open_file(test_file_b, "wt") as f:
         f.write("hello world")
     utilities.copy_dir(self.test_dir_a, self.test_dir_b)
     with utilities.open_file(test_file_b, "rt") as f:
         contents = f.read()
     self.assertEqual(contents, self.test_string)
コード例 #4
0
 def test_nonexisting(self):
     utilities.copy_dir(self.test_dir_a, self.test_dir_c)
     with utilities.open_file(os.path.join(self.test_dir_c, "test.txt"),
                              "rt") as f:
         contents = f.read()
     self.assertEqual(contents, self.test_string)
     shutil.rmtree(self.test_dir_c)
コード例 #5
0
def process_consequence_type_file_tsv(snp_2_gene_filepath):
    consequence_type_dict = defaultdict(list)
    one_rs_multiple_genes = set()

    with utilities.open_file(snp_2_gene_filepath, "rt") as snp_2_gene_file:
        for line in snp_2_gene_file:
            line = line.rstrip()
            line_list = line.split("\t")

            if len(line_list) < 6:
                logger.warning(
                    'Skip invalid line in snp_2_gene file: {}'.format(line))
                continue

            variant_id = line_list[0]
            ensembl_gene_id = line_list[2]
            so_term = line_list[4]

            if ensembl_gene_id == 'NA':
                logger.warning(
                    'Skip line with missing gene ID: {}'.format(line))
                continue

            process_gene(consequence_type_dict, variant_id, ensembl_gene_id,
                         so_term)

    return consequence_type_dict, one_rs_multiple_genes
コード例 #6
0
def get_test_record():
    test_clinvar_record_filepath = os.path.join(os.path.dirname(__file__), 'resources',
                                              'test_clinvar_record.json')
    with utilities.open_file(test_clinvar_record_filepath, "rt") as f:
        test_record_dict = json.load(f)
    test_record = clinvar.ClinvarRecord(test_record_dict)
    return test_record
コード例 #7
0
def load_efo_mapping(efo_mapping_file):
    trait_2_efo = defaultdict(list)
    n_efo_mappings = 0

    with utilities.open_file(efo_mapping_file, "rt") as f:
        for line in f:
            line = line.rstrip()
            if line.startswith("#") or not line:
                continue
            line_list = line.split("\t")
            clinvar_name = line_list[0].lower()
            if len(line_list) > 1:
                ontology_id_list = line_list[1].split("|")
                ontology_label_list = line_list[2].split("|") if len(
                    line_list) > 2 else [None] * len(ontology_id_list)
                for ontology_id, ontology_label in zip(ontology_id_list,
                                                       ontology_label_list):
                    trait_2_efo[clinvar_name].append(
                        (ontology_id, ontology_label))
                n_efo_mappings += 1
            else:
                raise ValueError(
                    'No mapping provided for trait: {}'.format(clinvar_name))
    logger.info('{} EFO mappings loaded'.format(n_efo_mappings))
    return trait_2_efo
コード例 #8
0
def load_efo_mapping(efo_mapping_file):
    trait_2_efo = defaultdict(list)
    unavailable_efo = set()
    n_efo_mappings = 0

    with utilities.open_file(efo_mapping_file, "rt") as f:
        for line in f:
            if line.startswith("#"):
                continue
            line_list = line.rstrip().split("\t")
            clinvar_name = line_list[0].lower()
            if len(line_list) > 1:
                ontology_id_list = line_list[1].split("|")
                ontology_label_list = line_list[2].split("|") if len(
                    line_list) > 2 else [None] * len(ontology_id_list)
                for ontology_id, ontology_label in zip(ontology_id_list,
                                                       ontology_label_list):
                    trait_2_efo[clinvar_name].append(
                        (ontology_id, ontology_label))
                n_efo_mappings += 1
            else:
                unavailable_efo.add(clinvar_name)

    print(str(n_efo_mappings) + ' EFO mappings loaded')
    print(
        str(len(unavailable_efo)) +
        ' urls without an actual valid EFO mapping')

    return trait_2_efo, unavailable_efo
コード例 #9
0
def get_terms_from_file(terms_file_path):
    if terms_file_path is not None:
        print('Loading list of terms...')
        with utilities.open_file(terms_file_path, 'rt') as terms_file:
            terms_list = [line.rstrip() for line in terms_file]
        print(str(len(terms_file_path)) + ' terms found at ' + terms_file_path)
    else:
        terms_list = []

    return terms_list
コード例 #10
0
 def setUp(self):
     self.test_dir_a = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_a")
     self.test_dir_b = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_b")
     self.test_dir_c = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_c")
     os.makedirs(self.test_dir_a)
     os.makedirs(self.test_dir_b)
     self.test_file_a = os.path.join(self.test_dir_a, "test.txt")
     self.test_string = "this is a test string"
     with utilities.open_file(self.test_file_a, "wt") as f:
         f.write(self.test_string)
コード例 #11
0
def get_terms_from_file(terms_file_path):
    if terms_file_path is not None:
        print('Loading list of terms...')
        with utilities.open_file(terms_file_path, 'rt') as terms_file:
            terms_list = [line.rstrip() for line in terms_file]
        print(str(len(terms_file_path)) + ' terms found at ' + terms_file_path)
    else:
        terms_list = []

    return terms_list
コード例 #12
0
    def write_zooma_file(self, dir_out):
        """Write zooma records to zooma file"""
        with utilities.open_file(os.path.join(dir_out, config.ZOOMA_FILE_NAME), "wt") as zooma_fh:

            zooma_fh.write("STUDY\tBIOENTITY\tPROPERTY_TYPE\tPROPERTY_VALUE\tSEMANTIC_TAG\tANNOTATOR\tANNOTATION_DATE\n")
            date = strftime("%d/%m/%y %H:%M", gmtime())
            for evidence_record in self.evidence_list:
                self.write_zooma_record_to_zooma_file(evidence_record, zooma_fh, date)

            for trait_name, ontology_tuple_list in self.trait_mappings.items():
                self.write_extra_trait_to_zooma_file(ontology_tuple_list, trait_name, date, zooma_fh)
コード例 #13
0
    def write_output(self, dir_out):
        write_string_list_to_file(self.nsv_list,
                                  dir_out + '/' + config.NSV_LIST_FILE)

        # Contains traits without a mapping in Gary's xls
        with utilities.open_file(
                dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw:
            fdw.write('Trait\tCount\n')
            for trait_list in self.unmapped_traits:
                fdw.write(
                    str(trait_list) + '\t' +
                    str(self.unmapped_traits[trait_list]) + '\n')

        with utilities.open_file(
                dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME,
                'wt') as fdw:
            for evidence_string in self.evidence_string_list:
                fdw.write(json.dumps(evidence_string) + '\n')

        self.write_zooma_file(dir_out)
コード例 #14
0
    def write_output(self, dir_out):
        write_string_list_to_file(self.nsv_list, dir_out + '/' + config.NSV_LIST_FILE)

        # Contains traits without a mapping in Gary's xls
        with utilities.open_file(dir_out + '/' + config.UNMAPPED_TRAITS_FILE_NAME, 'wt') as fdw:
            fdw.write('Trait\tCount\n')
            for trait_list in self.unmapped_traits:
                fdw.write(str(trait_list) + '\t' +
                          str(self.unmapped_traits[trait_list]) + '\n')

        # Contains urls provided by Gary which are not yet included within EFO
        with utilities.open_file(dir_out + '/' + config.UNAVAILABLE_EFO_FILE_NAME, 'wt') as fdw:
            fdw.write('Trait\tCount\n')
            for clinvar_name in self.unavailable_efo:
                fdw.write(clinvar_name + "\n")

        with utilities.open_file(dir_out + '/' + config.EVIDENCE_STRINGS_FILE_NAME, 'wt') as fdw:
            for evidence_string in self.evidence_string_list:
                fdw.write(json.dumps(evidence_string) + '\n')

        self.write_zooma_file(dir_out)
コード例 #15
0
 def setUp(self):
     self.test_dir_a = os.path.join(os.path.dirname(__file__), "resources",
                                    "test_tmp_a")
     self.test_dir_b = os.path.join(os.path.dirname(__file__), "resources",
                                    "test_tmp_b")
     # self.test_dir_c = os.path.join(os.path.dirname(__file__), "resources", "test_tmp_c")
     os.makedirs(self.test_dir_a)
     os.makedirs(self.test_dir_b)
     self.test_file_a = os.path.join(self.test_dir_a, "test.txt")
     self.test_string = "this is a test string"
     with utilities.open_file(self.test_file_a, "wt") as f:
         f.write(self.test_string)
コード例 #16
0
    def write_zooma_file(self, dir_out):
        """Write zooma records to zooma file"""
        with utilities.open_file(os.path.join(dir_out, config.ZOOMA_FILE_NAME),
                                 "wt") as zooma_fh:

            zooma_fh.write(
                "STUDY\tBIOENTITY\tPROPERTY_TYPE\tPROPERTY_VALUE\tSEMANTIC_TAG\tANNOTATOR\tANNOTATION_DATE\n"
            )
            date = strftime("%d/%m/%y %H:%M", gmtime())
            for evidence_record in self.evidence_list:
                self.write_zooma_record_to_zooma_file(evidence_record,
                                                      zooma_fh, date)

            for trait_name, ontology_tuple_list in self.trait_mappings.items():
                self.write_extra_trait_to_zooma_file(ontology_tuple_list,
                                                     trait_name, date,
                                                     zooma_fh)
コード例 #17
0
def process_consequence_type_file_tsv(snp_2_gene_filepath):
    consequence_type_dict = defaultdict(list)
    one_rs_multiple_genes = set()

    with utilities.open_file(snp_2_gene_filepath, "rt") as snp_2_gene_file:
        for line in snp_2_gene_file:
            line = line.rstrip()
            line_list = line.split("\t")

            if len(line_list) < 6:
                continue

            variant_id = line_list[0]
            ensembl_gene_id = line_list[2]
            so_term = line_list[4]

            process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term)

    return consequence_type_dict, one_rs_multiple_genes
コード例 #18
0
def process_consequence_type_file_tsv(snp_2_gene_filepath):
    consequence_type_dict = defaultdict(list)
    one_rs_multiple_genes = set()

    with utilities.open_file(snp_2_gene_filepath, "rt") as snp_2_gene_file:
        for line in snp_2_gene_file:
            line = line.rstrip()
            line_list = line.split("\t")

            if len(line_list) < 6:
                continue

            variant_id = line_list[0]
            ensembl_gene_id = line_list[2]
            so_term = line_list[4]

            process_gene(consequence_type_dict, variant_id, ensembl_gene_id,
                         so_term)

    return consequence_type_dict, one_rs_multiple_genes
コード例 #19
0
def load_efo_mapping(efo_mapping_file):
    trait_2_efo = defaultdict(list)
    unavailable_efo = set()
    n_efo_mappings = 0

    with utilities.open_file(efo_mapping_file, "rt") as f:
        for line in f:
            if line.startswith("#"):
                continue
            line_list = line.rstrip().split("\t")
            clinvar_name = line_list[0].lower()
            if len(line_list) > 1:
                ontology_id_list = line_list[1].split("|")
                ontology_label_list = line_list[2].split("|") if len(line_list) > 2 else [None] * len(ontology_id_list)
                for ontology_id, ontology_label in zip(ontology_id_list, ontology_label_list):
                    trait_2_efo[clinvar_name].append((ontology_id, ontology_label))
                n_efo_mappings += 1
            else:
                unavailable_efo.add(clinvar_name)

    print(str(n_efo_mappings) + ' EFO mappings loaded')
    print(str(len(unavailable_efo)) + ' urls without an actual valid EFO mapping')

    return trait_2_efo, unavailable_efo
コード例 #20
0
 def __iter__(self):
     with utilities.open_file(self.json_file, "rt") as f:
         for line in f:
             yield json.loads(line.rstrip())
コード例 #21
0
def clinvar_to_evidence_strings(allowed_clinical_significance, mappings,
                                json_file, ot_schema, output_evidence_strings):
    report = Report(trait_mappings=mappings.trait_2_efo)
    cell_recs = cellbase_records.CellbaseRecords(json_file=json_file)
    ot_schema_contents = json.loads(open(ot_schema).read())
    output_evidence_strings_file = utilities.open_file(output_evidence_strings,
                                                       'wt')
    for cellbase_record in cell_recs:
        report.counters["record_counter"] += 1
        if report.counters["record_counter"] % 1000 == 0:
            logger.info("{} records processed".format(
                report.counters["record_counter"]))

        n_ev_strings_per_record = 0
        clinvar_record = clinvar.ClinvarRecord(cellbase_record['clinvarSet'])

        for clinvar_record_measure in clinvar_record.measures:
            report.counters["n_nsvs"] += (clinvar_record_measure.nsv_id
                                          is not None)
            append_nsv(report.nsv_list, clinvar_record_measure)
            report.counters["n_multiple_allele_origin"] += (len(
                clinvar_record.allele_origins) > 1)
            traits = create_traits(clinvar_record.traits, mappings.trait_2_efo,
                                   report)
            converted_allele_origins = convert_allele_origins(
                clinvar_record.allele_origins)

            for consequence_type, trait, allele_origin in itertools.product(
                    get_consequence_types(clinvar_record_measure,
                                          mappings.consequence_type_dict),
                    traits, converted_allele_origins):

                if skip_record(clinvar_record, clinvar_record_measure,
                               consequence_type, allele_origin,
                               allowed_clinical_significance, report):
                    continue

                if allele_origin == 'germline':
                    evidence_string = evidence_strings.CTTVGeneticsEvidenceString(
                        clinvar_record, clinvar_record_measure, report, trait,
                        consequence_type)
                elif allele_origin == 'somatic':
                    evidence_string = evidence_strings.CTTVSomaticEvidenceString(
                        clinvar_record, clinvar_record_measure, report, trait,
                        consequence_type)
                else:
                    raise AssertionError(
                        'Unknown allele_origin present in the data: {}'.format(
                            allele_origin))

                # Validate and immediately output the evidence string (not keeping everything in memory)
                validate_evidence_string(evidence_string, clinvar_record,
                                         trait,
                                         consequence_type.ensembl_gene_id,
                                         ot_schema_contents)
                output_evidence_strings_file.write(
                    json.dumps(evidence_string) + '\n')
                report.evidence_string_count += 1

                report.evidence_list.append([
                    clinvar_record.accession, clinvar_record_measure.rs_id,
                    trait.clinvar_name, trait.ontology_id
                ])
                report.counters["n_valid_rs_and_nsv"] += (
                    clinvar_record_measure.nsv_id is not None)
                report.traits.add(trait.ontology_id)
                report.remove_trait_mapping(trait.clinvar_name)
                report.ensembl_gene_id_uris.add(
                    evidence_strings.get_ensembl_gene_id_uri(
                        consequence_type.ensembl_gene_id))

                n_ev_strings_per_record += 1

            if n_ev_strings_per_record > 0:
                report.counters["n_processed_clinvar_records"] += 1
                if n_ev_strings_per_record > 1:
                    report.counters["n_multiple_evidence_strings"] += 1

    output_evidence_strings_file.close()
    return report
コード例 #22
0
def write_string_list_to_file(string_list, filename):
    with utilities.open_file(filename, 'wt') as out_file:
        out_file.write('\n'.join(string_list))
コード例 #23
0
def write_string_list_to_file(string_list, filename):
    with utilities.open_file(filename, 'wt') as out_file:
        out_file.write('\n'.join(string_list))
コード例 #24
0
class CTTVGeneticsEvidenceString(CTTVEvidenceString):
    """
    Class for genetics evidence string specifically.
    Holds information required for Open Target's evidence strings for genetic information.
    """

    with utilities.open_file(
            utilities.get_resource_file(__package__,
                                        config.GEN_EV_STRING_JSON),
            "rt") as gen_json_file:
        base_json = json.load(gen_json_file)

    def __init__(self, clinvar_record, clinvar_record_measure, report, trait,
                 consequence_type):

        a_dictionary = copy.deepcopy(self.base_json)

        ref_list = list(
            set(clinvar_record.trait_refs_list[trait.trait_counter] +
                clinvar_record.observed_refs_list +
                clinvar_record_measure.refs_list))
        ref_list.sort()

        super().__init__(a_dictionary, clinvar_record, ref_list,
                         consequence_type.ensembl_gene_id, report, trait)

        variant_type = get_cttv_variant_type(clinvar_record_measure)

        self.add_unique_association_field('alleleOrigin', 'germline')
        if clinvar_record_measure.rs_id:
            self.set_variant(
                'http://identifiers.org/dbsnp/' + clinvar_record_measure.rs_id,
                variant_type)
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.rs_id)
        elif clinvar_record_measure.nsv_id:
            self.set_variant(
                'http://identifiers.org/dbsnp/' +
                clinvar_record_measure.nsv_id, variant_type)
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.nsv_id)
        else:
            self.set_variant(
                'http://www.ncbi.nlm.nih.gov/clinvar/' +
                clinvar_record.accession, variant_type)
            self.add_unique_association_field('variant_id',
                                              clinvar_record.accession)
        self.date = clinvar_record.date
        self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession
        self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession
        self.association = clinvar_record.clinical_significance not in \
                           ('non-pathogenic', 'probable-non-pathogenic', 'likely benign', 'benign')
        self.gene_2_var_ev_codes = [
            'http://identifiers.org/eco/cttv_mapping_pipeline'
        ]
        most_severe_so_term = consequence_type.so_term
        if most_severe_so_term.accession is None:
            self.gene_2_var_func_consequence = 'http://targetvalidation.org/sequence/' + \
                                               most_severe_so_term.so_name
        else:
            self.gene_2_var_func_consequence = 'http://purl.obolibrary.org/obo/' + \
                                               most_severe_so_term.accession.replace(':', '_')

        if len(ref_list) > 0:
            self.set_var_2_disease_literature(ref_list)
            # Arbitrarily select only one reference among all
            self.unique_reference = ref_list[0]

        if clinvar_record.clinical_significance:
            self.clinical_significance = clinvar_record.clinical_significance

    @property
    def db_xref_url(self):
        if self['evidence']['gene2variant']['provenance_type']['database']['dbxref']['url'] \
                == self['evidence']['variant2disease']['provenance_type']['database']['dbxref']['url']:
            return \
                self['evidence']['variant2disease']['provenance_type']['database']['dbxref']['url']
        else:
            raise Exception("db_xref_url attributes different")

    @db_xref_url.setter
    def db_xref_url(self, url):
        self['evidence']['gene2variant']['provenance_type']['database'][
            'dbxref']['url'] = url
        self['evidence']['variant2disease']['provenance_type']['database'][
            'dbxref']['url'] = url

    @property
    def url(self):
        if self['evidence']['gene2variant']['urls'][0]['url'] \
                == self['evidence']['variant2disease']['urls'][0]['url']:
            return self['evidence']['gene2variant']['urls'][0]['url']
        else:
            raise Exception("url attributes different")

    @url.setter
    def url(self, url):
        self['evidence']['gene2variant']['urls'][0]['url'] = url
        self['evidence']['variant2disease']['urls'][0]['url'] = url

    @property
    def gene_2_var_ev_codes(self):
        return self['evidence']['gene2variant']['evidence_codes']

    @gene_2_var_ev_codes.setter
    def gene_2_var_ev_codes(self, gene_2_var_ev_codes):
        self['evidence']['gene2variant'][
            'evidence_codes'] = gene_2_var_ev_codes

    @property
    def gene_2_var_func_consequence(self):
        return self['evidence']['gene2variant']['functional_consequence']

    @gene_2_var_func_consequence.setter
    def gene_2_var_func_consequence(self, so_term):
        self['evidence']['gene2variant']['functional_consequence'] = so_term

    def set_var_2_disease_literature(self, ref_list):
        self['evidence']['variant2disease']['provenance_type']['literature'] = \
            {'references': [{'lit_id': reference} for reference in ref_list]}

    @property
    def association(self):
        if self['evidence']['gene2variant']['is_associated'] \
                == self['evidence']['variant2disease']['is_associated']:
            return self['evidence']['gene2variant']['is_associated']
        else:
            raise Exception("association attributes different")

    @association.setter
    def association(self, is_associated):
        self['evidence']['gene2variant']['is_associated'] = is_associated
        self['evidence']['variant2disease']['is_associated'] = is_associated

    def _clear_variant(self):
        self['variant']['id'] = []
        self['variant']['type'] = []

    def set_variant(self, var_id, var_type):
        self['variant']['id'] = var_id
        self['variant']['type'] = var_type

    @property
    def unique_reference(self):
        return self['evidence']['variant2disease'][
            'unique_experiment_reference']

    @unique_reference.setter
    def unique_reference(self, reference):
        self['evidence']['variant2disease'][
            'unique_experiment_reference'] = reference

    @property
    def date(self):
        if self['evidence']['gene2variant']['date_asserted'] == \
                self['evidence']['variant2disease']['date_asserted']:
            return self['evidence']['gene2variant']['date_asserted']
        else:
            raise Exception("date attributes have different values")

    @date.setter
    def date(self, date_string):
        self['evidence']['gene2variant']['date_asserted'] = date_string
        self['evidence']['variant2disease']['date_asserted'] = date_string

    @property
    def clinical_significance(self):
        return self['evidence']['variant2disease']['clinical_significance']

    @clinical_significance.setter
    def clinical_significance(self, clinical_significance):
        self['evidence']['variant2disease'][
            'clinical_significance'] = clinical_significance
コード例 #25
0
 def test_nonexisting(self):
     utilities.copy_dir(self.test_dir_a, self.test_dir_c)
     with utilities.open_file(os.path.join(self.test_dir_c, "test.txt"), "rt") as f:
         contents = f.read()
     self.assertEqual(contents, self.test_string)
     shutil.rmtree(self.test_dir_c)
コード例 #26
0
class CTTVSomaticEvidenceString(CTTVEvidenceString):
    """
    Class for somatic evidence string specifically.
    Holds information required for Open Target's evidence strings for somatic information.
    """

    with utilities.open_file(
            utilities.get_resource_file(__package__,
                                        config.SOM_EV_STRING_JSON),
            "rt") as som_json_file:
        base_json = json.load(som_json_file)

    def __init__(self, clinvar_record, clinvar_record_measure, report, trait,
                 consequence_type):

        a_dictionary = copy.deepcopy(self.base_json)

        ref_list = list(
            set(clinvar_record.trait_refs_list[trait.trait_counter] +
                clinvar_record.observed_refs_list +
                clinvar_record_measure.refs_list))
        ref_list.sort()

        super().__init__(a_dictionary, clinvar_record, ref_list,
                         consequence_type.ensembl_gene_id, report, trait)

        self.add_unique_association_field('alleleOrigin', 'somatic')
        if clinvar_record_measure.rs_id:
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.rs_id)
        elif clinvar_record_measure.nsv_id:
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.nsv_id)
        else:
            self.add_unique_association_field('variant_id',
                                              clinvar_record.accession)

        self.date = clinvar_record.date
        self.last_evaluated_date = clinvar_record.last_evaluated_date
        self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession
        self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession
        # See https://github.com/opentargets/platform/issues/1139#issuecomment-682592678
        self.association = True

        self.set_known_mutations(consequence_type.so_term)

        if len(ref_list) > 0:
            self.evidence_literature = ref_list

        if clinvar_record.clinical_significance:
            self.clinical_significance = process_clinical_significance(
                clinvar_record.clinical_significance)

        # Populate star rating and review status
        star_rating, review_status = clinvar_record.score
        self.clinvar_rating = (star_rating, review_status)

        # Populate mode of inheritance (if present)
        self.mode_of_inheritance = clinvar_record.mode_of_inheritance

    @property
    def db_xref_url(self):
        return self['evidence']['provenance_type']['database']['dbxref']['url']

    @db_xref_url.setter
    def db_xref_url(self, url):
        self['evidence']['provenance_type']['database']['dbxref']['url'] = url

    @property
    def url(self):
        return self['evidence']['urls'][0]['url']

    @url.setter
    def url(self, url):
        self['evidence']['urls'][0]['url'] = url

    @property
    def evidence_literature(self):
        return self['evidence']['provenance_type']['literature']['references']

    @evidence_literature.setter
    def evidence_literature(self, ref_list):
        self['evidence']['provenance_type']['literature'] = \
            {'references': [{'lit_id': reference} for reference in ref_list]}

    @property
    def association(self):
        return self['evidence']['is_associated']

    @association.setter
    def association(self, is_associated):
        self['evidence']['is_associated'] = is_associated

    @property
    def date(self):
        return self['evidence']['date_asserted']

    @date.setter
    def date(self, date_string):
        self['evidence']['date_asserted'] = date_string

    @property
    def last_evaluated_date(self):
        return self['evidence']['last_evaluated_date']

    @last_evaluated_date.setter
    def last_evaluated_date(self, clinvar_last_evaluated_date):
        if clinvar_last_evaluated_date:
            self['evidence'][
                'last_evaluated_date'] = clinvar_last_evaluated_date

    def _clear_known_mutations(self):
        self['evidence']['known_mutations'] = []

    def add_known_mutation(self, new_functional_consequence, so_name):
        new_known_mutation = \
            {'functional_consequence': new_functional_consequence, 'preferred_name': so_name}
        self['evidence']['known_mutations'].append(new_known_mutation)

    def set_known_mutations(self, so_term):
        if so_term.accession:
            new_functional_consequence = \
                "http://purl.obolibrary.org/obo/" + so_term.accession.replace(':', '_')
        else:
            new_functional_consequence = \
                'http://targetvalidation.org/sequence/' + so_term.so_name
        self.add_known_mutation(new_functional_consequence, so_term.so_name)

    @property
    def clinical_significance(self):
        return self['evidence']['clinical_significance']

    @clinical_significance.setter
    def clinical_significance(self, clinical_significance):
        self['evidence']['clinical_significance'] = clinical_significance

    @property
    def clinvar_rating(self):
        return self['evidence']['clinvar_rating']

    @clinvar_rating.setter
    def clinvar_rating(self, clinvar_rating_data):
        star_rating, review_status = clinvar_rating_data
        self['evidence']['clinvar_rating'] = {
            'star_rating': star_rating,
            'review_status': review_status,
        }

    @property
    def mode_of_inheritance(self):
        return self['evidence'].get('mode_of_inheritance')

    @mode_of_inheritance.setter
    def mode_of_inheritance(self, mode_of_inheritance):
        if mode_of_inheritance:
            self['evidence']['mode_of_inheritance'] = mode_of_inheritance
コード例 #27
0
class CTTVSomaticEvidenceString(CTTVEvidenceString):
    """
    Class for somatic evidence string specifically.
    Holds information required for Open Target's evidence strings for somatic information.
    """

    with utilities.open_file(
            utilities.get_resource_file(__package__,
                                        config.SOM_EV_STRING_JSON),
            "rt") as som_json_file:
        base_json = json.load(som_json_file)

    def __init__(self, clinvar_record, clinvar_record_measure, report, trait,
                 consequence_type):

        a_dictionary = copy.deepcopy(self.base_json)

        ref_list = list(
            set(clinvar_record.trait_refs_list[trait.trait_counter] +
                clinvar_record.observed_refs_list +
                clinvar_record_measure.refs_list))
        ref_list.sort()

        super().__init__(a_dictionary, clinvar_record, ref_list,
                         consequence_type.ensembl_gene_id, report, trait)

        self.add_unique_association_field('alleleOrigin', 'somatic')
        if clinvar_record_measure.rs_id:
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.rs_id)
        elif clinvar_record_measure.nsv_id:
            self.add_unique_association_field('variant_id',
                                              clinvar_record_measure.nsv_id)
        else:
            self.add_unique_association_field('variant_id',
                                              clinvar_record.accession)

        self.date = clinvar_record.date
        self.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvar_record.accession
        self.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvar_record.accession
        self.association = clinvar_record.clinical_significance not in \
                           ('non-pathogenic', 'probable-non-pathogenic', 'likely benign', 'benign')

        self.set_known_mutations(consequence_type.so_term)

        if len(ref_list) > 0:
            self.evidence_literature = ref_list

        if clinvar_record.clinical_significance:
            self.clinical_significance = clinvar_record.clinical_significance

    @property
    def db_xref_url(self):
        return self['evidence']['provenance_type']['database']['dbxref']['url']

    @db_xref_url.setter
    def db_xref_url(self, url):
        self['evidence']['provenance_type']['database']['dbxref']['url'] = url

    @property
    def url(self):
        return self['evidence']['urls'][0]['url']

    @url.setter
    def url(self, url):
        self['evidence']['urls'][0]['url'] = url

    @property
    def evidence_literature(self):
        return self['evidence']['provenance_type']['literature']['references']

    @evidence_literature.setter
    def evidence_literature(self, ref_list):
        self['evidence']['provenance_type']['literature'] = \
            {'references': [{'lit_id': reference} for reference in ref_list]}

    @property
    def association(self):
        return self['evidence']['is_associated']

    @association.setter
    def association(self, is_associated):
        self['evidence']['is_associated'] = is_associated

    @property
    def date(self):
        return self['evidence']['date_asserted']

    @date.setter
    def date(self, date_string):
        self['evidence']['date_asserted'] = date_string

    def _clear_known_mutations(self):
        self['evidence']['known_mutations'] = []

    def add_known_mutation(self, new_functional_consequence, so_name):
        new_known_mutation = \
            {'functional_consequence': new_functional_consequence, 'preferred_name': so_name}
        self['evidence']['known_mutations'].append(new_known_mutation)

    def set_known_mutations(self, so_term):
        if so_term.accession:
            new_functional_consequence = \
                "http://purl.obolibrary.org/obo/" + so_term.accession.replace(':', '_')
        else:
            new_functional_consequence = \
                'http://targetvalidation.org/sequence/' + so_term.so_name
        self.add_known_mutation(new_functional_consequence, so_term.so_name)

    @property
    def clinical_significance(self):
        return self['evidence']['clinical_significance']

    @clinical_significance.setter
    def clinical_significance(self, clinical_significance):
        self['evidence']['clinical_significance'] = clinical_significance
コード例 #28
0
 def __iter__(self):
     with utilities.open_file(self.json_file, "rt") as f:
         for line in f:
             yield json.loads(line.rstrip())