Beispiel #1
0
 def consume_next(self, pred_pack: MultiPack, _):
     query_pack: DataPack = pred_pack.get_pack(self.configs.pack_name)
     query = list(query_pack.get(Query))[0]
     rank = 1
     for pid, _ in query.results.items():
         doc_id: Optional[str] = query_pack.pack_name
         if doc_id is None:
             raise ProcessExecutionException(
                 'Doc ID of the query pack is not set, '
                 'please double check the reader.')
         self.predicted_results.append((doc_id, pid, str(rank)))
         rank += 1
Beispiel #2
0
    def get_pack_index(self, pack_id: int) -> int:
        """
        Get the pack index from the global pack id.

        Args:
            pack_id: The global pack id to find.

        Returns:

        """
        try:
            return self._inverse_pack_ref[pack_id]
        except KeyError as e:
            raise ProcessExecutionException(
                f"Pack {pack_id} is not in this multi-pack.") from e
    def _process(self, input_pack: DataPack):
        doc = input_pack.text

        # Do all process.
        if self.nlp is None:
            raise ProcessExecutionException(
                "The SpaCy pipeline is not initialized, maybe you "
                "haven't called the initialization function.")
        result = self.nlp(doc)

        # Record NER results.
        self._process_ner(result, input_pack)

        # Process sentence parses.
        self._process_parser(result.sents, input_pack)
    def consume_next(self, pred_pack: MultiPack, _):
        #print(self.configs.pack_name)
        query_pack: DataPack = pred_pack.get_pack(self.configs.pack_name)
        query = list(query_pack.get(Query))[0]
        query_text = query_pack.text
        #print(pred_pack.get_pack('passage_6').text)

        sorted_query_results = sorted(list(query.results.items()),
                                      key=lambda x: x[1],
                                      reverse=True)
        rank = 1
        for pid, _ in sorted_query_results:
            doc_id: Optional[str] = query_pack.pack_name
            if doc_id is None:
                raise ProcessExecutionException(
                    'Doc ID of the query pack is not set, '
                    'please double check the reader.')
            self.predicted_results.append((doc_id, pid, str(rank)))
            rank += 1
Beispiel #5
0
 def __del__(self):
     if len(self._pending_entries) > 0:
         raise ProcessExecutionException(
             f"There are {len(self._pending_entries)} "
             f"entries not added to the index correctly.")