예제 #1
0
    def _suggest(self, text, project, params):
        data = {'text': text}
        if 'project' in params:
            data['project'] = params['project']

        try:
            req = requests.post(params['endpoint'], data=data)
            req.raise_for_status()
        except requests.exceptions.RequestException as err:
            self.warning("HTTP request failed: {}".format(err))
            return ListSuggestionResult([], project.subjects)

        try:
            response = req.json()
        except ValueError as err:
            self.warning("JSON decode failed: {}".format(err))
            return ListSuggestionResult([], project.subjects)

        if 'results' in response:
            results = response['results']
        else:
            results = response

        try:
            return ListSuggestionResult([SubjectSuggestion(uri=h['uri'],
                                                           label=h['label'],
                                                           score=h['score'])
                                         for h in results
                                         if h['score'] > 0.0],
                                        project.subjects)
        except (TypeError, ValueError) as err:
            self.warning("Problem interpreting JSON data: {}".format(err))
            return ListSuggestionResult([], project.subjects)
예제 #2
0
def test_list_suggestions_vector_enforce_score_range(subject_index):
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p7141',
                          label='sinetit',
                          notation=None,
                          score=1.5),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p6479',
                          label='viikingit',
                          notation=None,
                          score=1.0),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p14173',
                          label='kaivaukset',
                          notation=None,
                          score=0.5),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p14588',
                          label='riimukivet',
                          notation=None,
                          score=0.0),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p12738',
                          label='viikinkiaika',
                          notation=None,
                          score=-0.5)
    ])
    vector = suggestions.as_vector(subject_index)
    assert vector.sum() == 2.5
    for subject_id, score in enumerate(vector):
        if subject_index[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subject_index[subject_id][1] == 'viikinkiaika':
            assert score == 0.0
        else:
            assert score in (1.0, 0.5, 0.0)
예제 #3
0
파일: maui.py 프로젝트: mo-fu/Annif
 def _suggest(self, text, params):
     if len(text.strip()) == 0:
         return ListSuggestionResult([])
     response = self._suggest_request(text, params)
     if response:
         return self._response_to_result(response)
     else:
         return ListSuggestionResult([])
예제 #4
0
def test_list_suggestion_result_vector_notfound(subject_index):
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://example.com/notfound',
                          label='not found',
                          notation=None,
                          score=1.0)
    ])
    assert suggestions.as_vector(subject_index).sum() == 0
예제 #5
0
 def _response_to_result(self, response):
     try:
         return ListSuggestionResult([
             SubjectSuggestion(
                 uri=h['id'], label=h['label'], score=h['probability'])
             for h in response['topics'] if h['probability'] > 0.0
         ], self.project.subjects)
     except (TypeError, ValueError) as err:
         self.warning("Problem interpreting JSON data: {}".format(err))
         return ListSuggestionResult([], self.project.subjects)
예제 #6
0
def test_list_suggestion_result_vector_destination(subject_index):
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p7141',
                          label='sinetit',
                          notation=None,
                          score=1.0),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p6479',
                          label='viikingit',
                          notation=None,
                          score=0.5)
    ])
    destination = np.zeros(len(subject_index), dtype=np.float32)
    vector = suggestions.as_vector(subject_index, destination=destination)
    assert vector is destination
예제 #7
0
    def _response_to_result(self, response):
        try:
            subject_suggestions = [SubjectSuggestion(
                uri=hit['id'],
                label=None,
                notation=None,
                score=hit['probability'])
                for hit in response['topics'] if hit['probability'] > 0.0]
        except (TypeError, ValueError) as err:
            self.warning("Problem interpreting JSON data: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)

        return ListSuggestionResult.create_from_index(subject_suggestions,
                                                      self.project.subjects)
예제 #8
0
def test_hitfilter_zero_score(subject_index):
    origsuggestions = ListSuggestionResult(
        [SubjectSuggestion(uri='uri', label='label', score=0.0)],
        subject_index)
    suggestions = SuggestionFilter()(origsuggestions)
    assert isinstance(suggestions, SuggestionResult)
    assert len(suggestions) == 0
예제 #9
0
 def _suggest(self, text, params):
     score = float(params.get('score', 1.0))
     notation = params.get('notation', None)
     return ListSuggestionResult([
         SubjectSuggestion(
             uri=self.uri, label=self.label, notation=notation, score=score)
     ], self.project.subjects)
예제 #10
0
def test_list_suggestions_vector_notfound(document_corpus, subject_index):
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://example.com/notfound',
                          label='not found',
                          notation=None,
                          score=1.0)
    ], subject_index)
    assert suggestions.vector.sum() == 0
예제 #11
0
def generate_suggestions(n, subject_index):
    suggestions = []
    for i in range(n):
        uri = 'http://example.org/{}'.format(i)
        suggestions.append(SubjectSuggestion(uri=uri,
                                             label='hit {}'.format(i),
                                             score=1.0 / (i + 1)))
    return ListSuggestionResult(suggestions, subject_index)
예제 #12
0
파일: omikuji.py 프로젝트: olivierSDM/Annif
 def _suggest(self, text, params):
     self.debug('Suggesting subjects for text "{}..." (len={})'.format(
         text[:20], len(text)))
     vector = self.vectorizer.transform([text])
     if vector.nnz == 0:  # All zero vector, empty result
         return ListSuggestionResult([])
     feature_values = [(col, vector[row, col])
                       for row, col in zip(*vector.nonzero())]
     results = []
     limit = int(params['limit'])
     for subj_id, score in self._model.predict(feature_values, top_k=limit):
         subject = self.project.subjects[subj_id]
         results.append(
             SubjectSuggestion(uri=subject[0],
                               label=subject[1],
                               notation=subject[2],
                               score=score))
     return ListSuggestionResult(results)
예제 #13
0
def test_list_suggestion_result_vector(subject_index):
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p7141',
                          label='sinetit',
                          notation=None,
                          score=1.0),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p6479',
                          label='viikingit',
                          notation=None,
                          score=0.5)
    ])
    vector = suggestions.as_vector(subject_index)
    assert isinstance(vector, np.ndarray)
    assert len(vector) == len(subject_index)
    assert vector.sum() == 1.5
    for subject_id, score in enumerate(vector):
        if subject_index[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subject_index[subject_id][1] == 'viikingit':
            assert score == 0.5
        else:
            assert score == 0.0
예제 #14
0
 def _suggest(self, text, params):
     self.debug('Suggesting subjects for text "{}..." (len={})'.format(
         text[:20], len(text)))
     sentences = self.project.analyzer.tokenize_sentences(text)
     self.debug('Found {} sentences'.format(len(sentences)))
     chunksize = int(params['chunksize'])
     chunktexts = []
     for i in range(0, len(sentences), chunksize):
         chunktexts.append(' '.join(sentences[i:i + chunksize]))
     self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
     if len(chunktexts) == 0:  # no input, empty result
         return ListSuggestionResult([])
     return self._suggest_chunks(chunktexts, params)
예제 #15
0
 def _suggest_chunks(self, chunktexts, project):
     results = []
     for chunktext in chunktexts:
         exampletext = self._inputs_to_exampletext(project, chunktext)
         if not exampletext:
             continue
         example = ' {}'.format(exampletext)
         result = self._model.predict(example)
         results.append(self._convert_result(result, project))
     if not results:  # empty result
         return ListSuggestionResult(hits=[],
                                     subject_index=project.subjects)
     return VectorSuggestionResult(
         np.array(results).mean(axis=0), project.subjects)
예제 #16
0
 def _suggest(self, text, params):
     self.debug(
         f'Suggesting subjects for text "{text[:20]}..." (len={len(text)})')
     result = self._model.suggest_proba([text])[0]
     suggestions = []
     for uri, score in result:
         subject_id = self.project.subjects.by_uri(uri)
         if subject_id:
             label = self.project.subjects[subject_id][1]
         else:
             label = None
         suggestion = SubjectSuggestion(uri, label, None, score)
         suggestions.append(suggestion)
     return ListSuggestionResult(suggestions)
예제 #17
0
    def _suggest_chunks(self, chunktexts, params):
        results = []
        for chunktext in chunktexts:

            exampletext = self._inputs_to_exampletext(chunktext)
            if not exampletext:
                continue
            example = ' {}'.format(exampletext)
            result = self._model.predict(example)
            results.append(self._convert_result(result))
        if not results:  # empty result
            return ListSuggestionResult([])
        return VectorSuggestionResult(
            np.array(results, dtype=np.float32).mean(axis=0))
예제 #18
0
def test_hitfilter_list_suggestion_results_with_deprecated_subjects(
        subject_index):
    subject_index.append('http://example.org/deprecated', None, None)
    suggestions = ListSuggestionResult([
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p7141',
                          label='sinetit',
                          notation=None,
                          score=1.0),
        SubjectSuggestion(uri='http://www.yso.fi/onto/yso/p6479',
                          label='viikingit',
                          notation=None,
                          score=0.5),
        SubjectSuggestion(uri='http://example.org/deprecated',
                          label=None,
                          notation=None,
                          score=0.5)
    ])
    filtered_suggestions = SuggestionFilter(subject_index)(suggestions)
    assert isinstance(filtered_suggestions, SuggestionResult)
    assert len(filtered_suggestions) == 2
    assert filtered_suggestions.as_list(
        subject_index)[0] == suggestions.as_list(subject_index)[0]
    assert filtered_suggestions.as_list(
        subject_index)[1] == suggestions.as_list(subject_index)[1]
예제 #19
0
파일: fasttext.py 프로젝트: MarkWh1te/Annif
    def _suggest_chunks(self, chunktexts, params):
        limit = int(params['limit'])
        chunklabels, chunkscores = self._predict_chunks(chunktexts, limit)
        label_scores = collections.defaultdict(float)
        for labels, scores in zip(chunklabels, chunkscores):
            for label, score in zip(labels, scores):
                label_scores[label] += score
        best_labels = sorted([(score, label)
                              for label, score in label_scores.items()],
                             reverse=True)

        results = []
        for score, label in best_labels[:limit]:
            subject = self._label_to_subject(label)
            results.append(
                SubjectSuggestion(uri=subject[0],
                                  label=subject[1],
                                  score=score / len(chunktexts)))
        return ListSuggestionResult(results, self.project.subjects)
예제 #20
0
    def _suggest(self, text, params):
        self.debug(
            f'Suggesting subjects for text "{text[:20]}..." (len={len(text)})')
        limit = int(params['limit'])

        self._kw_extractor = yake.KeywordExtractor(
            lan=params['language'],
            n=int(params['max_ngram_size']),
            dedupLim=float(params['deduplication_threshold']),
            dedupFunc=params['deduplication_algo'],
            windowsSize=int(params['window_size']),
            top=int(params['num_keywords']),
            features=self.params['features'])
        keyphrases = self._kw_extractor.extract_keywords(text)
        suggestions = self._keyphrases2suggestions(keyphrases)

        subject_suggestions = [
            SubjectSuggestion(uri=uri, label=None, notation=None, score=score)
            for uri, score in suggestions[:limit] if score > 0.0
        ]
        return ListSuggestionResult.create_from_index(subject_suggestions,
                                                      self.project.subjects)
예제 #21
0
def test_list_suggestions_vector(document_corpus):
    subjects = SubjectIndex(document_corpus)
    suggestions = ListSuggestionResult(
        [
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p7141',
                label='sinetit',
                score=1.0),
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p6479',
                label='viikingit',
                score=0.5)],
        subjects)
    assert isinstance(suggestions.vector, np.ndarray)
    assert len(suggestions.vector) == len(subjects)
    assert suggestions.vector.sum() == 1.5
    for subject_id, score in enumerate(suggestions.vector):
        if subjects[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subjects[subject_id][1] == 'viikingit':
            assert score == 0.5
        else:
            assert score == 0.0
예제 #22
0
 def _suggest(self, text, project, params):
     score = float(params.get('score', 1.0))
     return ListSuggestionResult(
         [SubjectSuggestion(uri=self.uri, label=self.label, score=score)],
         project.subjects)
예제 #23
0
 def _suggest(self, text, params):
     response = self._suggest_request(text)
     if response:
         return self._response_to_result(response)
     else:
         return ListSuggestionResult([], self.project.subjects)