Esempio n. 1
0
def generate_targets_file(disease_id, outpath, anno_type: str = 'entrezgene') -> None:
    """Creates a disease list

    :param disease_id: EFO code from the disease.
    :param outpath:
    :param anno_type: `entrezgene` for Entrez Id or `symbol` for Gene symbol.
    :return:
    """
    ot = OpenTargetsClient()
    assoc = ot.get_associations_for_disease(
        disease_id,
        fields=['association_scoredatatypes', 'target.id']
    ).filter(
        datatype='known_drug'
    )
    ensembl_list = [a['target']['id'] for a in assoc]

    # TODO use the converters.get_converter_to_entrez
    mg = mygene.MyGeneInfo()
    id_mappings = mg.getgenes(ensembl_list, fields=anno_type)

    with open(outpath, 'w+') as outfile:
        for mapping in id_mappings:
            if anno_type in mapping.keys():
                outfile.write(mapping[anno_type])
                outfile.write('\n')
Esempio n. 2
0
def generate_disease_gene_association_file(disease_id,
                                           outpath,
                                           anno_type: str = 'entrezgene'):
    """Obtain the association scores from the specified disease that are
    stored in the OpenTargets database.

    :param disease_id: The EFO code to the disease.
    :param outpath: The path to the file to be created.
    :param anno_type: `entrezgene` for Entrez Id or `symbol` for Gene symbol.
    :return:
    """
    ot = OpenTargetsClient()
    assoc = ot.get_associations_for_disease(
        disease_id, fields=['association_scoreoverall', 'target.id'])
    assoc_simple = [{
        'id': a['target']['id'],
        'score': a['association_score']['overall']
    } for a in assoc]
    ensembl_list = [a['id'] for a in assoc_simple]

    # Obtain the symbols for the genes associated to disease_id
    id_mappings = get_converter_to_entrez(ensembl_list)

    # Get the symbols and the scores
    ensembl_list = [(id_mappings[a['id']], a['score']) for a in assoc_simple
                    if a['id'] in id_mappings]

    with open(outpath, 'w+') as outfile:
        for symbol, score in ensembl_list:
            print(f'{symbol}\t{score}', file=outfile)
def search_disease(x):
	from opentargets import OpenTargetsClient
	from sys import argv

	client = OpenTargetsClient()
	response = client.get_associations_for_disease(x)
	return response.to_dataframe()
def download_for_disease(disease_id, outpath):
    ot = OpenTargetsClient()
    assoc = ot.get_associations_for_disease(
        disease_id, fields=['associationscore.datatypes',
                            'target.id']).filter(datatype='known_drug')
    ensembl_list = [a['target']['id'] for a in assoc]

    mg = mygene.MyGeneInfo()
    id_mappings = mg.getgenes(ensembl_list, fields="entrezgene")

    with open(outpath, 'w+') as outfile:
        for mapping in id_mappings:
            if 'entrezgene' in mapping.keys():
                outfile.write(mapping['entrezgene'])
                outfile.write('\n')
Esempio n. 5
0
        target_as = ot.get_associations_for_target(target_id)
        print(target_as)
        l = []
        print('Target id associations:\n')
        for a in target_as:
            print(a['id'], a['association_score']['overall'])
            l.append(a['association_score']['overall'])
        print('Maximum target id association:', np.max(l))
        print('Minimum target id association:', np.min(l))
        print('Mean target id association:', np.mean(l))
        print('Standard deviation of target id association:', np.std(l))

    except ValueError:
        print('Target id not found')

#Checks for a disease id as input
if disease_id is not None:
    try:
        disease_as = ot.get_associations_for_disease(disease_id)
        l = []
        for a in disease_as:
            print(a['id'], a['association_score']['overall'])
            l.append(a['association_score']['overall'])
        print('Maximum disease id association:', np.max(l))
        print('Minimum disease id association:', np.min(l))
        print('Mean disease id association:', np.mean(l))
        print('Standard deviation of disease id association:', np.std(l))

    except ValueError:
        print('Disease id not found')
Esempio n. 6
0
class OpenTargetClientTest(unittest.TestCase):
    _AUTO_GET_TOKEN = 'auto'

    def setUp(self):

        self.client = OpenTargetsClient()
        self.http2_client = OpenTargetsClient(use_http2=True)
        self.auth_client = OpenTargetsClient(
            auth_app_name='test',
            auth_secret='test',
        )

    def tearDown(self):
        self.client.close()

    def testSearchTargetCorrectResult(self):
        target_symbol = 'BRAF'
        response = self.client.search(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-target')
        self.assertEqual(result['id'], 'ENSG00000157764')
        self.assertEqual(result['data']['approved_symbol'], target_symbol)

    def testSearchTargetFetchAllResults(self):
        target_symbol = 'BRAF'
        response = self.client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchTargetFetchAllResultsAuth(self):
        target_symbol = 'BRAF'
        response = self.auth_client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchTargetCorrectResultHTTP2(self):
        target_symbol = 'BRAF'
        response = self.http2_client.search(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-target')
        self.assertEqual(result['id'], 'ENSG00000157764')
        self.assertEqual(result['data']['approved_symbol'], target_symbol)

    def testSearchTargetFetchAllResultsHTTP2(self):
        target_symbol = 'BRAF'
        response = self.http2_client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchDiseaseCorrectResult(self):
        disease_label = 'cancer'
        response = self.client.search(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-disease')
        self.assertEqual(result['id'], 'EFO_0000311')

    # #this takes a lot to run
    # def testSearchDiseaseFetchAllResults(self):
    #     disease_label = 'cancer'
    #     response = self.client.search(disease_label, size = 100)
    #     total_results = len(response)
    #     self.assertGreater(total_results,0)
    #     c=0
    #     for i in response:
    #         c+=1
    #     self.assertEqual(total_results, c)
    #     print(total_results, c)

    def testGetAssociation(self):
        association_id = "ENSG00000157764-EFO_0005803"
        response = self.client.get_association(association_id)
        self.assertEquals(len(response), 1)
        self.assertEquals(association_id, response[0]['id'])

    def testFilterAssociations(self):
        response = self.client.filter_associations()
        self.assertGreater(len(response), 0)
        total = response.info.total
        response.filter(target='ENSG00000157764')
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(direct=True)
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(scorevalue_min=0.2)
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(therapeutic_area='efo_0000701')
        self.assertLess(len(response), total)
        results = []
        for i, r in enumerate(response):
            print(i, r['id'], r['association_score']['overall'],
                  r['disease']['efo_info']['label'])
            results.append(r)
        response_multi = self.client.filter_associations(
            target='ENSG00000157764',
            direct=True,
            scorevalue_min=0.2,
            therapeutic_area='efo_0000701')
        self.assertEqual(len(response_multi), response.info.total)
        for i, r in enumerate(response_multi):
            self.assertEqual(results[i]['id'], r['id'])
        response_chained = self.client.filter_associations().filter(
            target='ENSG00000157764').filter(direct=True).filter(
                therapeutic_area='efo_0000701').filter(scorevalue_min=0.2)
        self.assertEqual(len(response_chained), response.info.total)
        for i, r in enumerate(response_chained):
            self.assertEqual(results[i]['id'], r['id'])

    def testGetAssociationsForTarget(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol,
                                                           size=30)
        self.assertGreater(len(response), 0)
        for i, result in enumerate(response):
            self.assertEqual(result['target']['gene_info']['symbol'],
                             target_symbol)
            if i > 90:
                break

    def testGetAssociationsForDisease(self):
        disease_label = 'cancer'
        response = self.client.get_associations_for_disease(disease_label)
        self.assertGreater(len(response), 0)
        for result in response:
            self.assertEqual(result['disease']['efo_info']['label'],
                             disease_label)

    @unittest.expectedFailure
    def testGetEvidence(self):
        evidence_id = "03fba0599655b9040012b29cf0de8060"
        response = self.client.get_evidence(evidence_id)
        self.assertEquals(len(response), 1)
        self.assertEquals(evidence_id, response[0]['id'])

    def testFilterEvidence(self):
        response = self.client.filter_evidence()
        self.assertGreater(len(response), 0)

    def testGetEvidenceForTarget(self):
        target_symbol = 'BRAF'
        response = self.client.get_evidence_for_target(target_symbol,
                                                       size=1000)
        self.assertGreater(len(response), 0)
        for i, result in enumerate(response):
            self.assertEqual(result['target']['gene_info']['symbol'],
                             target_symbol)
            if i > 100:
                break

    def testGetSimilarTargets(self):
        target_symbol = 'BRAF'
        response = self.client.get_similar_target(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['subject']['label'], target_symbol)
        self.assertEqual(result['object']['label'], 'KRAS')

    def testGetSimilarDisease(self):
        disease_label = 'ulcerative colitis'
        response = self.client.get_similar_disease(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['subject']['label'], disease_label)
        self.assertEqual(result['object']['label'], "Crohn's disease")

    def testGetEvidenceForDisease(self):
        disease_label = 'medulloblastoma'
        response = self.client.get_evidence_for_disease(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['disease']['efo_info']['label'], disease_label)

    def testSerialiseToJson(self):
        target_symbol = 'BRAF'
        '''test iterable version'''
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        json_output = response.to_json()
        parsed_json = [json.loads(i) for i in json_output]
        self.assertEqual(items, len(parsed_json))
        '''test non iterable version'''
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        json_output = response.to_json(iterable=False)
        parsed_json = json.loads(json_output)
        self.assertEqual(items, len(parsed_json))

    def testResultToPandasDataFrame(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        dataframe = response.to_dataframe()
        self.assertEqual(len(dataframe), items)

    def testResultToPandasCSV(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        items = len(response)
        self.assertGreater(len(response), 0)
        csv = response.to_csv()
        filename = 'braf_associations.csv'
        open(filename, 'wb').write(csv.encode('utf-8'))
        self.assertEqual(len(csv.split('\n')), items + 2)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testResultToPandasExcel(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        self.assertGreater(len(response), 0)
        filename = 'braf_associations.xls'
        response.to_excel(filename)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testResultToFile(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        self.assertGreater(len(response), 0)
        filename = 'braf_associations.json.gz'
        response.to_file(filename)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testSerialiseToObject(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        obj_output = list(response.to_object())
        for i, result in enumerate(obj_output):
            self.assertIsNotNone(result.target.id)
        self.assertEqual(items, i + 1)

    def testGetStats(self):
        response = self.client.get_stats()
        self.assertEquals(len(response), 0)

    def testAutodetectPost(self):
        self.assertFalse(
            Connection._auto_detect_post({'target': ['ENSG00000157764']}))
        self.assertTrue(
            Connection._auto_detect_post({
                'target': [
                    'ENSG00000157764',
                    'ENSG00000171862',
                    'ENSG00000136997',
                    'ENSG00000012048',
                    'ENSG00000139618',
                ]
            }))

    def testGetToPost(self):
        response = self.client.conn.get('/platform/public/association/filter',
                                        params={
                                            'target': [
                                                'ENSG00000157764',
                                                'ENSG00000171862',
                                                'ENSG00000136997',
                                                'ENSG00000012048',
                                                'ENSG00000139618',
                                            ]
                                        })
        self.assertGreater(len(response), 0)

    def testCustomScore(self):
        def score_with_datatype_subset(datatypes, results):
            for r in results:
                datatype_scores = r['association_score']['datatypes']
                filtered_scores = [datatype_scores[dt] for dt in datatypes]
                custom_score = HarmonicSumScorer.harmonic_sum(filtered_scores)
                if custom_score:
                    yield (round(custom_score, 3), r['disease']['id'],
                           dict(zip(datatypes, filtered_scores)))

        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        self.assertGreater(len(response), 0)
        for i, filtered_data in enumerate(
                score_with_datatype_subset(
                    ['genetic_association', 'known_drug', 'somatic_mutation'],
                    response)):
            self.assertGreater(filtered_data[0], 0.)

        self.assertLess(i, len(response))

    def testGetAvailableEndpoints(self):
        endpoints = self.client.conn.get_api_endpoints()
        self.assertTrue('/platform/public/search' in endpoints)

    def testGetEndpointDocs(self):
        docs = self.client.conn.api_endpoint_docs('/platform/public/search')
        self.assertGreater(len(docs['get']['parameters']), 0)

    def testPing(self):
        response = self.client.conn.ping()
        if isinstance(response, bool):
            self.assertTrue(response)
        else:
            self.assertIsNotNone(response)
Esempio n. 7
0
def main(t, d):

    # Quick Check - arguments (none/both)
    if t == '' and d == '':sys.exit(1)
    if t != '' and d != '':sys.exit(1)

    # Load Func re opentargets querying ( pip install opentargets )
    # https://opentargets.readthedocs.io/en/stable/index.html
    from opentargets import OpenTargetsClient
    ot = OpenTargetsClient()
    # dir(ot)

    # Define Func re statistical analyses
    def doScoreStats(search_score):

        import statistics
        print('-----')
        print('Max:', max(search_score))
        print('Min:', min(search_score))
        print('Ave:', statistics.mean(search_score))
        print('SD:', statistics.stdev(search_score))

    # Run Analysis re TARGET (t)
    if t != '':

        search_id = t;
        a_for_target = ot.get_associations_for_target(search_id)

        # Quick Check - correct 'target' queried
        if a_for_target.info['query']['target'] == [search_id]:

            search_score = []  # initialise list

            # Loop Over Entries
            for a in a_for_target:
                search_score.append(a['association_score']['overall'])
                print(a['target']['id'], a['disease']['id'], a['association_score']['overall'])

            doScoreStats(search_score)

        else:
            print("please check best match (exited) -", [search_id], " vs ", a_for_target.info['query']['target'])
            sys.exit(1)

    # Run Analysis re DISEASE (d)
    elif d != '':

        search_id = d
        a_for_disease = ot.get_associations_for_disease(search_id)

        # Quick Check - correct 'disease' queried
        if a_for_disease.info['query']['disease'] == [search_id]:

            search_score = []  # initialise list

            # Loop Over Entries
            for a in a_for_disease:
                search_score.append(a['association_score']['overall'])
                print(a['target']['id'], a['disease']['id'], a['association_score']['overall'])

            doScoreStats(search_score)

        else:
            print("please check best match (exited) - ", [search_id], " vs ", a_for_disease.info['query']['disease'])
            sys.exit(1)