def make_edge_attribute_from_curies(self,
                                        source_curie,
                                        target_curie,
                                        source_name="",
                                        target_name="",
                                        default=0.,
                                        name=""):
        """
        Generic function to make an edge attribute
        :source_curie: CURIE of the source node for the edge under consideration
        :target_curie: CURIE of the target node for the edge under consideration
        :source_name: text name of the source node (in case the KP doesn't understand the CURIE)
        :target: text name of the target node (in case the KP doesn't understand the CURIE)
        :default: default value of the edge attribute
        :name: name of the KP functionality you want to apply
        """
        try:
            # edge attributes
            name = name
            type = "data:0951"
            url = "http://cohd.smart-api.info/"
            value = default

            node_curie_to_type = self.node_curie_to_type
            source_type = node_curie_to_type[source_curie]
            target_type = node_curie_to_type[target_curie]
            # figure out which knowledge provider to use  # TODO: should handle this in a more structured fashion, does there exist a standardized KP API format?
            KP_to_use = None
            for KP in self.who_knows_about_what:
                # see which KP's can label both sources of information
                if self.in_common(
                        source_type,
                        self.who_knows_about_what[KP]) and self.in_common(
                            target_type, self.who_knows_about_what[KP]):
                    KP_to_use = KP
            if KP_to_use == 'COHD':
                # convert CURIE to OMOP identifiers
                source_OMOPs = [
                    str(x['omop_standard_concept_id'])
                    for x in COHD.get_xref_to_OMOP(source_curie, 1)
                ]
                target_OMOPs = [
                    str(x['omop_standard_concept_id'])
                    for x in COHD.get_xref_to_OMOP(target_curie, 1)
                ]
                # FIXME: Super hacky way to get around the fact that COHD can't map CHEMBL drugs
                if source_curie.split('.')[0] == 'CHEMBL':
                    source_OMOPs = [
                        str(x['concept_id']) for x in COHD.find_concept_ids(
                            source_name, domain="Drug", dataset_id=3)
                    ]
                if target_curie.split('.')[0] == 'CHEMBL':
                    target_OMOPs = [
                        str(x['concept_id']) for x in COHD.find_concept_ids(
                            target_name, domain="Drug", dataset_id=3)
                    ]
                # uniquify everything
                source_OMOPs = list(set(source_OMOPs))
                target_OMOPs = list(set(target_OMOPs))

                # Decide how to handle the response from the KP
                if name == 'paired_concept_frequency':
                    # sum up all frequencies  #TODO check with COHD people to see if this is kosher
                    frequency = default
                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        freq_data = COHD.get_paired_concept_freq(
                            omop1, omop2, 3)  # use the hierarchical dataset
                        if freq_data and 'concept_frequency' in freq_data:
                            frequency += freq_data['concept_frequency']
                    # decorate the edges
                    value = frequency
                elif name == 'observed_expected_ratio':
                    # should probably take the largest obs/exp ratio  # TODO: check with COHD people to see if this is kosher
                    # FIXME: the ln_ratio can be negative, so I should probably account for this, but the object model doesn't like -np.inf
                    value = float(
                        "-inf"
                    )  # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string

                    ###############################
                    # The following code was an experiment to see if it would speed things up, leaving it out for now since it's difficult to quantify if it does speed things up given the cacheing
                    #if len(source_OMOPs) < len(target_OMOPs):
                    #    for omop1 in source_OMOPs:
                    #        omop_to_ln_ratio = dict()
                    #        response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3)  # use the hierarchical dataset
                    #        if response:
                    #            for res in response:
                    #                omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio']
                    #        for omop2 in target_OMOPs:
                    #            if omop2 in omop_to_ln_ratio:
                    #                temp_value = omop_to_ln_ratio[omop2]
                    #                if temp_value > value:
                    #                    value = temp_value
                    #else:
                    #    for omop1 in target_OMOPs:
                    #        omop_to_ln_ratio = dict()
                    #        response = COHD.get_obs_exp_ratio(omop1, domain="", dataset_id=3)  # use the hierarchical dataset
                    #        if response:
                    #            for res in response:
                    #                omop_to_ln_ratio[str(res['concept_id_2'])] = res['ln_ratio']
                    #        for omop2 in source_OMOPs:
                    #            if omop2 in omop_to_ln_ratio:
                    #                temp_value = omop_to_ln_ratio[omop2]
                    #                if temp_value > value:
                    #                    value = temp_value
                    ###################################

                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        #print(f"{omop1},{omop2}")
                        response = COHD.get_obs_exp_ratio(
                            omop1, concept_id_2=omop2, domain="",
                            dataset_id=3)  # use the hierarchical dataset
                        # response is a list, since this function is overloaded and can omit concept_id_2, take the first element
                        if response and 'ln_ratio' in response[0]:
                            temp_val = response[0]['ln_ratio']
                            if temp_val > value:
                                value = temp_val
                elif name == 'chi_square':
                    value = float("inf")
                    for (omop1,
                         omop2) in itertools.product(source_OMOPs,
                                                     target_OMOPs):
                        response = COHD.get_chi_square(
                            omop1, concept_id_2=omop2, domain="",
                            dataset_id=3)  # use the hierarchical dataset
                        # response is a list, since this function is overloaded and can omit concept_id_2, take the first element
                        if response and 'p-value' in response[0]:
                            temp_val = response[0]['p-value']
                            if temp_val < value:  # looking at p=values, so lower is better
                                value = temp_val
                # create the edge attribute
                edge_attribute = EdgeAttribute(
                    type=type, name=name, value=str(value), url=url
                )  # populate the edge attribute # FIXME: unclear in object model if attribute type dictates value type, or if value always needs to be a string
                return edge_attribute
            else:
                return None
        except:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            self.response.error(tb, error_code=error_type.__name__)
            self.response.error(
                f"Something went wrong when adding the edge attribute from {KP_to_use}."
            )
Beispiel #2
0
    def test_get_chi_square(self):
        #   default dataset_id
        result = QueryCOHD.get_chi_square("192855", "2008271", "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 306.2816108187519,
                                  'concept_id_1': 192855,
                                  'concept_id_2': 2008271,
                                  'dataset_id': 1,
                                  'p-value': 1.4101531778039801e-68}])

        #   dataset_id == 2
        result = QueryCOHD.get_chi_square("192855", "2008271", "Condition", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 7065.7865572100745,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 2,
                                   'p-value': 0.0}])

        #   default domain and dataset_id
        result = QueryCOHD.get_chi_square("192855", "2008271")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result, [{'chi_square': 306.2816108187519,
                                   'concept_id_1': 192855,
                                   'concept_id_2': 2008271,
                                   'dataset_id': 1,
                                   'p-value': 1.4101531778039801e-68}])

        #   no concept_id_2, default domain and dataset_id
        result = QueryCOHD.get_chi_square("192855")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 768)

        #   no concept_id_2, default dataset_id
        result = QueryCOHD.get_chi_square("192855", "", "Condition")
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 226)

        #   no concept_id_2, dataset_id == 2
        result = QueryCOHD.get_chi_square("192855", "", "Condition", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 991)

        #   no concept_id_2, dataset_id == 2, default domain
        result = QueryCOHD.get_chi_square("192855", "", "", 2)
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 2735)

        #   invalid concept_id_1 type
        result = QueryCOHD.get_chi_square(192855, "", "", 1)
        self.assertEqual(result, [])

        #   invalid concept_id_2 type
        result = QueryCOHD.get_chi_square("192855", 2008271, "", 1)
        self.assertEqual(result, [])

        #   invalid dataset_id value
        result = QueryCOHD.get_chi_square("192855", "2008271", "condition", 10)
        self.assertEqual(result, [])