コード例 #1
0
    def checkForEmptyResponseBody(self):
        """
        Just log if the response body is empty, helpful for the users
        :return: None
        """
        if self.responseBody is None:
            self.logs.append(clsLogEvent(
                identifier=self.name,
                level="ERROR",
                code="Error",
                message=f"Knowledge Provider {self.url} did not return a message"
            ))
            return True

        knowledge_graph = self.responseBody['message']['knowledge_graph']
        if knowledge_graph is None:
            # todo, this is a bug, some of the knowledge providers return a None, which seems to valid by TRAPI v1.1
            knowledge_graph = {'edges': {}, 'nodes': {}}
            self.responseBody['message']['knowledge_graph'] = knowledge_graph
        if len(knowledge_graph['edges']) == 0 or len(knowledge_graph['nodes']) == 0:
            self.logs.append(clsLogEvent(
                identifier=self.name,
                level="WARNING",
                code="KPEmptyResponse",
                message=f"Knowledge Provider {self.url} returned an empty knowledge graph"
            ))

            return True
        else:
            return False
コード例 #2
0
    def dispatch(self):
        if self.dispatchList is not None:
            self.logs.append(
                clsLogEvent(
                    identifier="",
                    level="DEBUG",
                    code="",
                    message=
                    f"Executing {len(self.dispatchList)} predicate searches."))

            query_dispatch(objects=self.dispatchList,
                           method=self.dispatchMode,
                           parentId=self.dispatchId,
                           abort_time=int(self.started_time +
                                          self.processing_timeout))

            # all solution managers have identified case solutions to use, now we aggregate and dispatch those
            self.caseMultiHops = []
            for case_solution_manager in self.dispatchList:
                # also merge any logs
                self.logs += case_solution_manager.logs

                if case_solution_manager.dispatchList:
                    self.caseMultiHops += case_solution_manager.dispatchList
            # self.caseMultiHops = self.caseMultiHops[:5]
            # test_solutions = [c for c in self.caseSolutions if c.caseId == 'Q000391' and c.id == 2065]  # Solution ID '2065' for Case ID 'Q000391'
            # self.caseMultiHops = [c for c in self.caseMultiHops if c.caseId != 'Q001884'][:10] + test_solutions
            # self.caseMultiHops = test_solutions

            # self.caseSolutions = self.caseSolutions[:200]

            # self.logs.append(clsLogEvent(
            #     identifier="",
            #     level="DEBUG",
            #     code="",
            #     message=f"Matched {len(self.caseSolutions)} Case Solutions."
            # ))

            query_dispatch(
                objects=self.caseMultiHops,
                method=self.dispatchMode,
                parentId=self.dispatchId,
                abort_time=int(self.started_time + self.processing_timeout),
                # abort_time=10,
                pool_size=5)

            self.logs.append(
                clsLogEvent(
                    identifier="",
                    level="DEBUG",
                    code="",
                    message=
                    f"Fully ran {len([s for s in self.caseMultiHops if s.finished])} Case Solutions."
                ))

            return
コード例 #3
0
    def execute(self):
        """
        Sends a query to the Knowledge Provider and sets the responseBody property
        :return: None
        """
        self.validateRequestBody()
        try:
            if self.url is None:
                raise ValueError(f"No URL identified for Knowledge Provider '{self.name}'")

            self.logs.append(clsLogEvent(
                identifier=self.name,
                level="DEBUG",
                code="",
                message=f"Sending request to Knowledge Provider {self.url}..."
            ))
            response = request_with_global_timeout(
                method="post",
                url=self.url,
                # global_timeout=None,
                global_timeout=self.timeoutSeconds,
                json=self.requestBody
            )
            response.raise_for_status()
        except requests.exceptions.Timeout:
            self.logs.append(clsLogEvent(
                identifier=self.name,
                level="ERROR",
                code="KPTimeout",
                message=f"Knowledge Provider {self.url} timed out after {self.timeoutSeconds} seconds"
            ))
            raise
        except requests.exceptions.HTTPError:
            self.logs.append(clsLogEvent(
                identifier=self.name,
                level="ERROR",
                code="KPNotAvailable",
                message=f"Knowledge Provider {self.url} returned HTTP error code {str(response.status_code)}"
            ))
            raise
        self.logs.append(clsLogEvent(
            identifier=self.name,
            level="DEBUG",
            code="",
            message=f"KP 200 response in {response.elapsed}. Content size: {len(response.content)}."
        ))
        self.responseBody = response.json()
        self.validateResponseBody()
        self.checkForEmptyResponseBody()
コード例 #4
0
def runner(object):
    startTime = time.time()
    status = "Started"

    try:
        logging.debug(printer(object=object, status=status))
        object.execute()
        status = "Completed"

    except Exception as tb:
        status = "Failed"
        print(traceback.format_exc())
        logging.error(str(tb))
        object.logs.append(
            clsLogEvent(
                level="ERROR",
                code="",
                message=
                f"Unexpected error in object with dispatchId {object.dispatchId}: {tb}"
            ))
        # raise clsDelayedException(tb)

    finally:
        endTime = time.time()
        object.dispatchRunTimeSeconds = endTime - startTime
        message = printer(object=object, status=status)
        if status == "Completed":
            logging.debug(message)
        elif status == "Failed":
            logging.error(message)
        logging.debug(
            f"{findObjectType(object)} with dispatchId {object.dispatchId} took {object.dispatchRunTimeSeconds} seconds"
        )

        return object
コード例 #5
0
 def execute(self, no_results=False):
     self.logs.append(clsLogEvent(
         identifier=self.id,
         level="DEBUG",
         code="",
         message=f"Executing Solution ID '{self.id}' for Case ID '{self.caseId}'"
     ))
     try:
         if len(self.paths) == 1:
             self.generateKnowledgeGraphForOnePath()
             self.generateQueryGraphForOnePath()
             # if no_results is False:
             #     self.generateResultsForOnePath()
             # else:
             #     self.results = []
         elif len(self.paths) == 2:
             self.createCaseSolutions()
             self.generateKnowledgeGraphForTwoPaths()
             self.generateQueryGraphForTwoPaths()
             # if no_results is False:
             #     self.generateResultsForTwoPaths()
             # else:
             #     self.results = []
         else:
             raise AttributeError("Number of knowledge provider paths supported is either 1 or 2")
         self.updateKnowledgeProviderLogs()
         self.logs.append(clsLogEvent(
             identifier=self.id,
             level="DEBUG",
             code="",
             message=f"Case executed successfully."
         ))
         # if no_results is False and len(self.knowledge_graph["edges"]) > 0 and len(self.results) == 0:
         #     raise AssertionError(f"Results were not generated for solution ID {self.id}.")
         self.successful = True
     except Exception as e:
         print(traceback.format_exc())
         self.logs.append(clsLogEvent(
             identifier=self.id,
             level="ERROR",
             code="Error",
             message=f"Unhandled {type(e).__name__} Exception: {e}"
         ))
         # self.logs.append("Exception: {}".format(e,))
     finally:
         self.finished = True
コード例 #6
0
    def userResponseBodyValidation(self):
        """
        A function to evaluate whether the JSON body sent to the client conforms to the proper input standard.
        :return: Boolean: True meaning the body is valid, False meaning the body is not valid
        """
        validity_status = {"isValid": True, "error": None}

        # ensure all edge attribute lists end with the Explanatory Agent provenance attribute
        try:
            if self.userResponseBody['message'][
                    'knowledge_graph'] and "edges" in self.userResponseBody[
                        'message']['knowledge_graph']:
                for edge_id, edge in self.userResponseBody['message'][
                        'knowledge_graph']['edges'].items():
                    last_attribute = edge['attributes'][-1]
                    assert last_attribute[
                        "attribute_type_id"] == "biolink:aggregator_knowledge_source", "Edge missing xARA provenance data"
                    assert last_attribute[
                        "attribute_source"] == "infores:explanatory-agent", "Edge missing xARA provenance data"
        except AssertionError as e:
            self.logs.append(
                clsLogEvent(
                    identifier="",
                    level="DEBUG",
                    code="",
                    message=f"Provenance assertion failure: {e}").dict())
            # validity_status = {"isValid": False, "error": e}
        except Exception as e:
            self.logs.append(
                clsLogEvent(
                    identifier="",
                    level="DEBUG",
                    code="",
                    message=f"Provenance assertion failure: {e}").dict())

        try:
            reasoner_validator.validate(self.userResponseBody, "Response",
                                        trapi_version)
        except ValidationError as e:
            validity_status = {"isValid": False, "error": e}

        return validity_status
コード例 #7
0
 def execute(self):
     try:
         self.execute_hops()
         self.generate_results()
         # self.merge_query_graphs()  # not used anymore since the QG must remain constant (except for derived queries)
         self.merge_knowledge_graphs()
         self.merge_results()
         # self.merge_logs()
         self.successful = True
     except AbortMultihopException as e:
         self.logs.append(
             clsLogEvent(identifier=self.dispatchId,
                         level="DEBUG",
                         code="",
                         message=f"Aborting Multihop execution: {e}"))
     except Exception as e:
         self.logs.append(
             clsLogEvent(
                 identifier=self.dispatchId,
                 level="ERROR",
                 code="Error",
                 message=f"Unhandled {type(e).__name__} Exception: {e}"))
     self.finished = True
コード例 #8
0
    def generateResultsForOnePath(self):
        self.explanation_case_solution = self.explanation_solution_finder.search(self.similarSubject, self.similarObject, self.similarPredicate,
                                                                                 self.knowledge_graph,
                                                                                 self.paths[0].knowledgeProvider.name,
                                                                                 similarity_threshold=self.explanation_similarity)
        self.explanation_case_solution.logs = self.logs
        self.logs.append(clsLogEvent(
            identifier=self.id,
            level="DEBUG",
            code="",
            message=f"Executing explanation case '{self.explanation_case_solution.case_id}'."
        ))

        self.results = self.explanation_case_solution.create_results_and_explain(self)
コード例 #9
0
 def validateResponseBody(self):
     """
     Verify its a valid TRAPI response
     :return: None
     :raises: Will raise InvalidSchema if not valid
     """
     try:
         # reasoner_validator.validate_Response(self.responseBody)
         reasoner_validator.validate(self.responseBody, "Response", trapi_version)
     except ValidationError as e:
         self.logs.append(clsLogEvent(
             identifier=self.name,
             level="ERROR",
             code="KPMalformedResponse",
             message=f"Knowledge Provider {self.url} did not return a valid TRAPI v1.2 response"
         ))
         raise requests.exceptions.InvalidSchema  # force raise a request.exception to know its the KP's fault
コード例 #10
0
    def findMostSimilarCases(self):
        self.triplets_case_ids = []
        for i, triplet in enumerate(self.query_path.triplets):
            source_category = triplet.source_node.data["categories"][0]
            target_category = triplet.target_node.data["categories"][0]
            predicate = triplet.predicate.data["predicates"][0]
            logging.debug(f"Finding similar cases for {triplet}")
            origins = self.one_hop_origin if self.is_one_hop else self.multiple_hop_origin
            triplet_case_ids = self.case_problem_searcher.get_global_sim_triplets(source_category, target_category, predicate, origins)

            self.logs.append(clsLogEvent(
                identifier=f"Hop {i+1} of {len(self.query_path.triplets)} - {triplet}",
                level="DEBUG",
                code="",
                message=f"Identified {len(self.case_problem_searcher.selected_case_ids_and_similarities)} Case Problems: (Case ID, Similarity) '{self.case_problem_searcher.selected_case_ids_and_similarities}'"
            ))
            logging.debug(f"Identified {len(self.case_problem_searcher.selected_case_ids_and_similarities)} Case Problems: (Case ID, Similarity) '{self.case_problem_searcher.selected_case_ids_and_similarities}'")
            self.triplets_case_ids.append(triplet_case_ids)
コード例 #11
0
    def execute_hops(self):
        for i, case_solution in enumerate(self.case_solutions):
            # if we are on a second+ hop, then try to populate its unknown node ids
            if i > 0:
                previous_solution = self.case_solutions[i - 1]
                response_body = previous_solution.paths[
                    -1].knowledgeProvider.responseBody
                if response_body is None or len(
                        response_body["message"]["results"]) <= 0:
                    raise AbortMultihopException(
                        f"Previous hop KP did not return results: '{previous_solution.paths[-1].knowledgeProvider.url}'"
                    )

                # build a list of all ids in the previous case's response for each node to be used in the current case
                response_node_ids = {}
                for result in response_body["message"]["results"]:
                    for node_id, node_bindings in result[
                            "node_bindings"].items():
                        if node_id not in response_node_ids:
                            response_node_ids[node_id] = set()
                        for node_binding in node_bindings:
                            if "id" in node_binding:
                                response_node_ids[node_id].add(
                                    node_binding["id"])

                # for each current case node see if it does not have any ids set already. If it doesn't set them.
                if case_solution.subjectCurieIds is None:
                    if case_solution.subject_query_graph_node_id in response_node_ids:
                        subject_new_ids = sorted(
                            list(response_node_ids[
                                case_solution.subject_query_graph_node_id]))
                        if len(subject_new_ids) > 50:
                            self.logs.append(
                                clsLogEvent(
                                    identifier=self.dispatchId,
                                    level="INFO",
                                    code="Info",
                                    message=
                                    f"Too many previous hop subject ids to provide {len(subject_new_ids)}, reducing to 50."
                                ))
                            subject_new_ids = subject_new_ids[:50]
                        case_solution.subjectCurieIds = subject_new_ids

                if case_solution.objectCurieIds is None:
                    if case_solution.object_query_graph_node_id in response_node_ids:
                        object_new_ids = sorted(
                            list(response_node_ids[
                                case_solution.object_query_graph_node_id]))
                        if len(object_new_ids) > 50:
                            self.logs.append(
                                clsLogEvent(
                                    identifier=self.dispatchId,
                                    level="INFO",
                                    code="Info",
                                    message=
                                    f"Too many previous hop object ids to provide {len(object_new_ids)}, reducing to 50."
                                ))
                            object_new_ids = object_new_ids[:50]
                        case_solution.objectCurieIds = object_new_ids

            case_solution.execute(no_results=False)
コード例 #12
0
    def create_results_and_explain(self, case_solution):
        query_graph = case_solution.query_graph
        knowledge_graph = case_solution.knowledge_graph

        e00_id = list(query_graph["edges"].keys())[0]
        node_ids = sorted(list(query_graph["nodes"].keys()))
        n00_id = node_ids[0]
        n01_id = node_ids[1]

        results = []
        score_attribute_lower = self.score_attribute.lower()
        # MolePro changed their attribute value to "CMAP:similarity score" (no underscores) so test for that, as well.
        score_attribute_lower_no_underscores = score_attribute_lower.replace("_", " ")

        # first pass on edges to get all scores
        similarity_scores = []
        for edge_id, edge in knowledge_graph['edges'].items():
            for attribute in edge["attributes"]:
                attribute_type_id_lower = attribute['attribute_type_id'].lower()
                if attribute_type_id_lower == score_attribute_lower or attribute_type_id_lower == score_attribute_lower_no_underscores:
                    similarity_scores.append(float(attribute['value']))

        if len(similarity_scores) > 0:
            min_similarity_score = min(similarity_scores)
            max_similarity_score = max(similarity_scores)
        else:
            min_similarity_score = -1
            max_similarity_score = -1

        # sorting for deterministic results. This can go away after index isn't used for rank.
        for index, (edgeId, edge) in enumerate(sorted(list(knowledge_graph['edges'].items()), key=lambda i: i[0])):
            if index == 0:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Starting explanations for {len(knowledge_graph['edges'])} edges."
                ))
            if index % 50 == 0 and index > 0:
                logging.debug(f"{self.case_id} edge {index} of {len(knowledge_graph['edges'])}")
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Explained {index} of {len(knowledge_graph['edges'])} edges."
                ))

            result = OrderedDict()
            result['edge_bindings'] = {
                e00_id: [{
                    "id": edgeId,
                }]
            }
            result['node_bindings'] = {
                n00_id: [{"id": edge['subject']}],
                n01_id: [{"id": edge['object']}],
            }

            similarity_score = None
            for attribute in edge["attributes"]:
                attribute_type_id_lower = attribute['attribute_type_id'].lower()
                if attribute_type_id_lower == score_attribute_lower or attribute_type_id_lower == score_attribute_lower_no_underscores:
                    similarity_score = float(attribute['value'])

            if similarity_score is None:
                similarity_score = -1

            result['score'] = similarity_score / max_similarity_score
            result['attributes'] = [
                {
                    "original_attribute_name": "Explanation Rationale",
                    "attribute_type_id": "biolink:description",
                    "value": "The score was obtained based on the similarity score produced by CMAP (portals.broadinstitute.org/cmap).",
                    "description": "Describes to user the Rationale for explaining the ranking"
                },
                {
                    "original_attribute_name": "Explanation Text",
                    "attribute_type_id": "biolink:has_evidence",
                    "value": f"This result was given a score of {result['score']} based on the range (i.e, {min_similarity_score} to {max_similarity_score}) observed in the current set of results for the attribute {self.score_attribute} supplied by {self.kp_name}.",
                    "description": "Describes the reason this specific edge receives a score w.r.t Rationale"
                }
            ]

            results.append(result)

        self.logs.append(clsLogEvent(
            identifier=self.case_id,
            level="DEBUG",
            code="",
            message=f"Explained all {len(knowledge_graph['edges'])} edges."
        ))

        return results
コード例 #13
0
    def create_results_and_explain(self, case_solution):
        query_graph = case_solution.query_graph
        knowledge_graph = case_solution.knowledge_graph

        e00_id = list(query_graph["edges"].keys())[0]
        node_ids = sorted(list(query_graph["nodes"].keys()))
        n00_id = node_ids[0]
        n01_id = node_ids[1]

        results = []

        # sorting for deterministic results. This can go away after index isn't used for rank.
        for index, (edgeId, edge) in enumerate(sorted(list(knowledge_graph['edges'].items()), key=lambda i: i[0])):
            if index == 0:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Starting explanations for {len(knowledge_graph['edges'])} edges."
                ))
            if index % 50 == 0 and index > 0:
                logging.debug(f"{self.case_id} edge {index} of {len(knowledge_graph['edges'])}")
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Explained {index} of {len(knowledge_graph['edges'])} edges."
                ))

            explanation, score = self.identify_edge(edgeId)

            result = OrderedDict()
            result['edge_bindings'] = {
                e00_id: [{
                    "id": edgeId,
                }]
            }
            result['node_bindings'] = {
                n00_id: [{"id": edge['subject']}],
                n01_id: [{"id": edge['object']}],
            }
            result['score'] = score
            result['attributes'] = [
                {
                    "original_attribute_name": "Explanation Rationale",
                    "attribute_type_id": "biolink:description",
                    "value": self.rationale,
                    "description": "Describes to user the Rationale for explaining the ranking"
                },
                {
                    "original_attribute_name": "Explanation Text",
                    "attribute_type_id": "biolink:has_evidence",
                    "value": explanation,
                    "description": "Describes the reason this specific edge receives a score w.r.t Rationale"
                }
            ]

            results.append(result)

        self.logs.append(clsLogEvent(
            identifier=self.case_id,
            level="DEBUG",
            code="",
            message=f"Explained all {len(knowledge_graph['edges'])} edges."
        ))

        return results
コード例 #14
0
    def findCaseSolutions(self):
        case_solution_sorter = clsCaseSolutionRetrival()

        # if isNullOrEmptyList(self.caseIds): return

        all_case_ids = set()
        for case_ids in self.triplets_case_ids:
            all_case_ids |= set(case_ids)

        case_id_to_solutions = {}
        with self.app.app_context():
            results = db.session.execute(
                statement=self.sqlFindCaseSolutions,
                params={
                    "caseIds": list(all_case_ids)
                }
            ).fetchall()
            for result in results:
                case_id = result[1]
                if case_id not in case_id_to_solutions:
                    case_id_to_solutions[case_id] = []
                case_id_to_solutions[case_id].append(result)


        caseSolutionDispatchId = 1

        triplets_case_solutions = []
        for i, triplet in enumerate(self.query_path.triplets):
            case_ids = self.triplets_case_ids[i]
            triplet_case_solutions = []

            for caseId in case_ids:
                results = case_id_to_solutions.get(caseId, [])

                # logging.debug(f"Getting case solutions for Case ID {caseId}")
                if isNullOrEmptyList(results):
                    logging.error(f"No case solutions retrieved for Case ID '{caseId}")
                    continue

                sorted_results = case_solution_sorter.retrieveRows_xARA_CaseSolutions(results)

                for result in sorted_results:
                    triplet_case_solutions.append(
                        self.buildCaseSolutionFromResult(
                            triplet,
                            caseSolutionDispatchId=self.dispatchId + caseSolutionDispatchId,
                            result=self.populateResultKPInfo(result)
                        )
                    )
                    caseSolutionDispatchId += 1

            triplets_case_solutions.append(triplet_case_solutions)

            # source_category = triplet.given_node.data["categories"][0]
            # opposite_category = triplet.opposite_node.data["categories"][0]
            # predicate = triplet.predicate.data["predicates"][0]
            self.logs.append(clsLogEvent(
                identifier=f"Hop {i+1} of {len(self.query_path.triplets)} - {triplet}",
                level="DEBUG",
                code="",
                message=f"Identified {len(triplet_case_solutions)} Case Solutions (first 50): '{triplet_case_solutions[:50]}...'"
            ))
            logging.debug(f"Identified {len(triplet_case_solutions)} Case Solutions (first 50): '{triplet_case_solutions[:50]}...'")
        self.triplets_case_solutions = triplets_case_solutions

        # trim the solutions down to the smallest set size so they can be iterated with no missing steps
        smallest_solutions = min([len(s) for s in self.triplets_case_solutions])
        logging.debug(f"Reducing triplet solutions length to {smallest_solutions}")
        for i in range(len(self.triplets_case_solutions)):
            self.triplets_case_solutions[i] = self.triplets_case_solutions[i][:smallest_solutions]

        self.dispatchList = []
        for solution_index in range(len(self.triplets_case_solutions[0])):
            path_solutions = []
            for triplet_index, triplet in enumerate(self.query_path.triplets):
                triplet_case_solution = self.triplets_case_solutions[triplet_index][solution_index]
                path_solutions.append(triplet_case_solution)
            multi_hop = clsMultiHop(self.dispatchId + 10000 * (solution_index+1) + 1000 * (triplet_index + 1), "", path_solutions)
            self.dispatchList.append(multi_hop)
コード例 #15
0
    def create_results_and_explain(self, case_solution):
        query_graph = case_solution.query_graph
        knowledge_graph = case_solution.knowledge_graph

        e00_id = list(query_graph["edges"].keys())[0]
        node_ids = sorted(list(query_graph["nodes"].keys()))
        n00_id = node_ids[0]
        n01_id = node_ids[1]

        results = []

        start_time = time.time()

        # sorting edge IDs for deterministic results.
        edges = sorted(list(knowledge_graph['edges'].items()), key=lambda i: i[0])

        # bulk get all canonical identifiers for all node ids
        node_identifiers = set()
        for index, (edgeId, edge) in enumerate(edges):
            edge_subject = edge["subject"]
            edge_object = edge["object"]
            node_identifiers.add(edge_subject)
            node_identifiers.add(edge_object)
        logging.debug(f"Getting node identifiers")
        self.logs.append(clsLogEvent(
            identifier=self.case_id,
            level="DEBUG",
            code="",
            message=f"Getting all ({len(node_identifiers)}) node identifiers."
        ))
        node_normalizer_provider = clsNodeNormalizerProvider(sorted(list(node_identifiers)))
        node_normalizer_provider.get_identifiers()
        identifiers = node_normalizer_provider.identifiers

        # bulk get all synonyms
        logging.debug(f"Getting node synonyms")
        self.logs.append(clsLogEvent(
            identifier=self.case_id,
            level="DEBUG",
            code="",
            message=f"Getting all node synonyms."
        ))
        name_resolution_provider = clsNameResolutionProvider(sorted(list(identifiers.values())))
        name_resolution_provider.get_synonyms()

        # collect all edges that have acceptable sentences to bulk predict later
        predictable_edges = []

        for index, (edgeId, edge) in enumerate(edges):
            # # TODO: Remove! Debugging only!
            # if index > 10:
            #     break
            if index == 0:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Starting explanations for {len(knowledge_graph['edges'])} edges."
                ))
            if index % 50 == 0 and index > 0:
                logging.debug(f"{self.case_id} edge {index} of {len(knowledge_graph['edges'])}")
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Explained {index} of {len(knowledge_graph['edges'])} edges."
                ))
            if time.time() > start_time + 3.0 * 60:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="ERROR",
                    code="",
                    message=f"Explanation timeout."
                ))
                # break
                raise TimeoutError("Explanation execution ran too long")

            result = OrderedDict()
            result['edge_bindings'] = {
                e00_id: [{
                    "id": edgeId,
                }]
            }

            # TODO: Some KGs may have reversed edge subjects and objects compared to the result node bindings! Should use result node bindings instead.
            result['node_bindings'] = {
                n00_id: [{"id": edge['subject']}],
                n01_id: [{"id": edge['object']}],
            }

            edge_subject = edge["subject"]
            subject_categories = knowledge_graph['nodes'][edge_subject]['categories']
            subject_categories_lower = [s.lower() for s in subject_categories]
            edge_object = edge["object"]
            object_categories = knowledge_graph['nodes'][edge_object]['categories']
            object_categories_lower = [o.lower() for o in object_categories]

            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Explaining edge {index+1}: {edgeId} Predicate {edge['predicate']}, Subject: {edge_subject} {subject_categories}, Object: {edge_object} {object_categories}."
            ))

            # if either the subject's or object's categories don't have tables representing them, stop.
            if len(set(subject_categories_lower) & self.valid_categories) == 0 or len(set(object_categories_lower) & self.valid_categories) == 0:
                result['score'] = 0.0
                result['attributes'] = ExplanationX00002.no_found_references_attributes
                results.append(result)
                continue

            subject_synonyms = name_resolution_provider.synonyms[identifiers[edge_subject]]

            logging.debug(f"{self.case_id} edge {index+1} of {len(knowledge_graph['edges'])} getting subject article references")
            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Edge {edgeId} getting subject article references."
            ))
            subject_references = self.find_references(subject_synonyms, subject_categories)
            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Edge {edgeId} Subject Synonyms: {subject_synonyms}, References: {len(subject_references)}."
            ))

            if len(subject_references) <= 0:
                result['score'] = 0.0
                result['attributes'] = ExplanationX00002.no_found_references_attributes
                results.append(result)
                continue

            object_synonyms = name_resolution_provider.synonyms[identifiers[edge_object]]
            logging.debug(f"{self.case_id} edge {index + 1} of {len(knowledge_graph['edges'])} getting object article references")
            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Edge {edgeId} getting object article references."
            ))
            object_references = self.find_references(object_synonyms, object_categories)

            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Edge {edgeId} Object Synonyms: {object_synonyms}, References: {len(object_references)}."
            ))

            # object_references = [
            #     {'ID': 261595, 'Article_Number': '1336', 'Article_ID': '288e7bf5d16ac18731d2f1ee7db6d6ed2a720cc2', 'SENT_ID': 85,
            #      'SENT_TEXT': 'On the other hand, AP2 and SP1 are known to activate epidermal growth factor receptor some chemical.',
            #      'TOKS': 'some chemical'}
            # ]

            if len(object_references) <= 0:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Edge {edgeId} No object references found, stopping."
                ))
                result['score'] = 0.0
                result['attributes'] = ExplanationX00002.no_found_references_attributes
                results.append(result)
                continue

            # find matching article ids between the two lists of subject and object results
            matching_reference_pairs_by_article = {}
            for subject_reference in subject_references:
                for object_reference in object_references:
                    if subject_reference["Article_ID"] == object_reference["Article_ID"]:
                        if subject_reference["Article_ID"] not in matching_reference_pairs_by_article:
                            matching_reference_pairs_by_article[subject_reference["Article_ID"]] = []
                        matching_reference_pairs_by_article[subject_reference["Article_ID"]].append((subject_reference, object_reference))

            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Edge {edgeId} Matched subject/object articles: {len(matching_reference_pairs_by_article)}."
            ))

            if len(matching_reference_pairs_by_article) <= 0:
                result['score'] = 0.0
                attributes = copy.deepcopy(ExplanationX00002.no_matching_reference_pairs_attributes)
                # get all unique PMIDs to return to the user
                pmids = set()
                for subject_reference in subject_references:
                    pmids.add(subject_reference['Article_ID'])
                for object_reference in object_references:
                    pmids.add(object_reference['Article_ID'])

                attributes[1]["value"] += ", ".join(pmids)
                result['attributes'] = attributes
                results.append(result)
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Edge {edgeId} No matched articles found, stopping."
                ))
                continue

            article_information = self.get_article_information(list(matching_reference_pairs_by_article.keys()))

            result['attributes'] = []
            score = 0.5

            pairs_to_predict = []
            for article_id, reference_pairs in matching_reference_pairs_by_article.items():
                if time.time() > start_time + 4.5 * 60:
                    self.logs.append(clsLogEvent(
                        identifier=self.case_id,
                        level="ERROR",
                        code="",
                        message=f"Explanation timeout."
                    ))
                    raise TimeoutError("Explanation execution ran too long")

                pair_distances = []
                for reference_pair in reference_pairs:
                    distance = abs(reference_pair[0]["SENT_ID"] - reference_pair[1]["SENT_ID"])
                    pair_distances.append((reference_pair, distance, reference_pair[0]["Article_Number"]))

                pair_distances.sort(key=lambda x: (x[1], x[2]))
                closest_reference_pair_data = pair_distances[0]
                closest_reference_pair = closest_reference_pair_data[0]

                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Edge {edgeId} Article {article_id} closest pair distance {closest_reference_pair_data[1]}."
                ))

                if closest_reference_pair_data[1] > 9:
                    self.logs.append(clsLogEvent(
                        identifier=self.case_id,
                        level="DEBUG",
                        code="",
                        message=f"Edge {edgeId} Sentence distance too great: {closest_reference_pair_data[1]}, stopping."
                    ))
                # if False:
                    attributes = copy.deepcopy(ExplanationX00002.reference_distance_too_large_attributes)
                    attributes[1]["value"] = attributes[1]["value"].format(closest_reference_pair[0]["TOKS"], closest_reference_pair[1]["TOKS"], article_id)
                    result['attributes'] += attributes
                    result['score'] = score
                    # result['attributes'] = attributes
                    # results.append(result)
                    continue

                pairs_to_predict.append((article_id, closest_reference_pair))

                # # combine two sentences and wrap the matched text in special characters
                # subject_special_sentence = closest_reference_pair[0]['SENT_TEXT'].replace(closest_reference_pair[0]['TOKS'], f"<<{closest_reference_pair[0]['TOKS']}>>")
                # object_special_sentence = closest_reference_pair[1]['SENT_TEXT'].replace(closest_reference_pair[1]['TOKS'], f"[[{closest_reference_pair[1]['TOKS']}]]")
                # instance = f"{subject_special_sentence} {object_special_sentence}"
                # # data_try = 'Agonistic activity of << ICI 182 780 >> on activation of GSK 3β/[[ AKT ]] pathway in the rat uterus during the estrous cycle.'
                # self.logs.append(clsLogEvent(
                #     identifier=self.case_id,
                #     level="DEBUG",
                #     code="",
                #     message=f"Edge {edgeId} Beginning relationship classification..."
                # ))
                #
                # relation_label = relationship_classification(instance)
                # self.logs.append(clsLogEvent(
                #     identifier=self.case_id,
                #     level="DEBUG",
                #     code="",
                #     message=f"Edge {edgeId} Relationship classification finished {relation_label}"
                # ))
                # # import random
                # # random.seed(1234)
                # # relation_label = random.choice(["UPREGULATOR|ACTIVATOR|INDIRECT_UPREGULATOR", "DOWNREGULATOR|INHIBITOR|INDIRECT_DOWNREGULATOR", "AGONIST|AGONIST‐ACTIVATOR|AGONIST‐INHIBITOR", "ANTAGONIST", "SUBSTRATE|PRODUCT_OF|SUBSTRATE_PRODUCT_OF" "No RELATION"])
                #
                # if relation_label == "No RELATION":
                #     attributes = copy.deepcopy(ExplanationX00002.reference_model_fail_attributes)
                #     attributes[1]["value"] = attributes[1]["value"].format(closest_reference_pair[0]["TOKS"], closest_reference_pair[1]["TOKS"], article_id)
                #     result['attributes'] += attributes
                # else:
                #     article = article_information.get(article_id, {"Title": "Article Not Found", "Author": "Article Not Found"})
                #     attributes = copy.deepcopy(ExplanationX00002.reference_model_pass_attributes)
                #     attributes[1]["value"] = attributes[1]["value"].format(subject_name=closest_reference_pair[0]["TOKS"],
                #                                                            object_name=closest_reference_pair[1]["TOKS"],
                #                                                            relation_label=relation_label, article_title=article["Title"], article_author=article["Author"])
                #     result['attributes'] += attributes
                #     score = 1.0
                #
                # result['score'] = score

            if len(pairs_to_predict) > 0:
                self.logs.append(clsLogEvent(
                    identifier=self.case_id,
                    level="DEBUG",
                    code="",
                    message=f"Edge {edgeId} explanation queued."
                ))

                predictable_edges.append((edge, result, article_information, pairs_to_predict))
            else:
                results.append(result)

        if len(predictable_edges) > 0:
            prediction_sentences = []
            for edge, result, article_information, pairs_to_predict in predictable_edges:
                for article_id, closest_reference_pair in pairs_to_predict:
                    # combine two sentences and wrap the matched text in special characters
                    subject_pattern = re.compile(re.escape(str(closest_reference_pair[0]['TOKS'])), re.IGNORECASE)
                    subject_special_sentence = subject_pattern.sub(f"<<{closest_reference_pair[0]['TOKS']}>>", closest_reference_pair[0]['SENT_TEXT'])
                    # subject_special_sentence = closest_reference_pair[0]['SENT_TEXT'].replace(closest_reference_pair[0]['TOKS'], f"<<{closest_reference_pair[0]['TOKS']}>>")

                    object_pattern = re.compile(re.escape(str(closest_reference_pair[1]['TOKS'])), re.IGNORECASE)
                    object_special_sentence = object_pattern.sub(f"<<{closest_reference_pair[1]['TOKS']}>>", closest_reference_pair[1]['SENT_TEXT'])
                    # object_special_sentence = closest_reference_pair[1]['SENT_TEXT'].replace(closest_reference_pair[1]['TOKS'], f"[[{closest_reference_pair[1]['TOKS']}]]")
                    instance = f"{subject_special_sentence} {object_special_sentence}"
                    prediction_sentences.append(instance)

            t0 = time.time()
            relation_labels = relationship_classification(prediction_sentences)
            t1 = time.time()
            logging.debug(f"Predicting {len(prediction_sentences)} sentences took {t1-t0} seconds.")
            self.logs.append(clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Predicting {len(prediction_sentences)} sentences took {t1-t0} seconds."
            ))

            # for i, (edge, article_id, closest_reference_pair, result) in enumerate(predictable_edges):
            prediction_index = 0
            for i, (edge, result, article_information, pairs_to_predict) in enumerate(predictable_edges):
                for j, (article_id, closest_reference_pair) in enumerate(pairs_to_predict):
                    relation_label = relation_labels[prediction_index]
                    self.logs.append(clsLogEvent(
                        identifier=self.case_id,
                        level="DEBUG",
                        code="",
                        message=f"Edge {edgeId} Beginning relationship classification..."
                    ))

                    # relation_label = relationship_classification(instance)
                    self.logs.append(clsLogEvent(
                        identifier=self.case_id,
                        level="DEBUG",
                        code="",
                        message=f"Edge {edgeId} Relationship classification finished {relation_label}"
                    ))
                    # import random
                    # random.seed(1234)
                    # relation_label = random.choice(["UPREGULATOR|ACTIVATOR|INDIRECT_UPREGULATOR", "DOWNREGULATOR|INHIBITOR|INDIRECT_DOWNREGULATOR", "AGONIST|AGONIST‐ACTIVATOR|AGONIST‐INHIBITOR", "ANTAGONIST", "SUBSTRATE|PRODUCT_OF|SUBSTRATE_PRODUCT_OF" "No RELATION"])

                    if relation_label == "No RELATION":
                        attributes = copy.deepcopy(ExplanationX00002.reference_model_fail_attributes)
                        attributes[1]["value"] = attributes[1]["value"].format(closest_reference_pair[0]["TOKS"], closest_reference_pair[1]["TOKS"], article_id)
                        result['attributes'] += attributes
                    else:
                        article = article_information.get(article_id, {"Title": "Article Not Found", "Author": "Article Not Found"})
                        attributes = copy.deepcopy(ExplanationX00002.reference_model_pass_attributes)
                        attributes[1]["value"] = attributes[1]["value"].format(subject_name=closest_reference_pair[0]["TOKS"],
                                                                               object_name=closest_reference_pair[1]["TOKS"],
                                                                               relation_label=relation_label, article_title=article["Title"], article_author=article["Author"])
                        result['attributes'] += attributes
                        score = 1.0

                    result['score'] = score

                    prediction_index += 1

                results.append(result)

        self.logs.append(clsLogEvent(
            identifier=self.case_id,
            level="DEBUG",
            code="",
            message=f"Explained all {len(knowledge_graph['edges'])} edges."
        ))

        return results
コード例 #16
0
    def create_results_and_explain(self, case_solution):
        query_graph = case_solution.query_graph
        knowledge_graph = case_solution.knowledge_graph

        e00_id = list(query_graph["edges"].keys())[0]
        node_ids = sorted(list(query_graph["nodes"].keys()))
        n00_id = node_ids[0]
        n01_id = node_ids[1]

        results = []

        # sorting for deterministic results. This can go away after index isn't used for rank.
        for index, (edgeId, edge) in enumerate(
                sorted(list(knowledge_graph['edges'].items()),
                       key=lambda i: i[0])):
            if index == 0:
                self.logs.append(
                    clsLogEvent(
                        identifier=self.case_id,
                        level="DEBUG",
                        code="",
                        message=
                        f"Starting explanations for {len(knowledge_graph['edges'])} edges."
                    ))
            if index % 50 == 0 and index > 0:
                logging.debug(
                    f"{self.case_id} edge {index} of {len(knowledge_graph['edges'])}"
                )
                self.logs.append(
                    clsLogEvent(
                        identifier=self.case_id,
                        level="DEBUG",
                        code="",
                        message=
                        f"Explained {index} of {len(knowledge_graph['edges'])} edges."
                    ))

            result = OrderedDict()
            result['edge_bindings'] = {
                e00_id: [{
                    "id": edgeId,
                }]
            }
            result['node_bindings'] = {
                n00_id: [{
                    "id": edge['subject']
                }],
                n01_id: [{
                    "id": edge['object']
                }],
            }
            result['score'] = 0.0
            result['attributes'] = []

            results.append(result)

        self.logs.append(
            clsLogEvent(
                identifier=self.case_id,
                level="DEBUG",
                code="",
                message=f"Explained all {len(knowledge_graph['edges'])} edges."
            ))

        return results