コード例 #1
0
    def score(self, key, tagged_gram, test_file_path, wv_dict, **kwargs):
        # tagged_gram: [['esafetyworld', 'comp'], ['inc', 'end']]
        # find ngrams in test file similar to example
        similar_contexts = []
        example_contexts = self.example_tagged_words_contexts_dict[
            tagged_words_to_str(tagged_gram)]
        test_contexts = list(self.context_sized_test_wv_dict.keys())
        # save_path = os.path.join(self.wmd_save_dir, ft.file_name_from_path(test_file_path))
        # try:
        #     wmd_instance = WmdSimilarity.load(save_path)
        # except FileNotFoundError:
        #     file = open(save_path, 'x')
        #     file.close()
        wmd_instance = WmdSimilarity(test_contexts,
                                     self.context_vec_model,
                                     num_best=1)

        for example_context in example_contexts:
            sims = wmd_instance[example_context]
            similar_contexts.append(test_contexts[sims[0][0]])
        # wmd_instance.save(save_path)
        logging.info('similar contexts:')
        print(similar_contexts)
        # similar_contexts = set()
        context_wv_dict = util.subset_dict_by_list2(wv_dict, similar_contexts)
        logging.info('context_wv_dict:')
        logging.info(len(context_wv_dict))
        # print(context_wv_dict)
        gram = util.sentence_from_tagged_ngram(tagged_gram)
        return OneShotTestDoc2Vec.score(self, key, gram, test_file_path,
                                        context_wv_dict)
コード例 #2
0
 def test_file_processing(self, test_file_path):
     OneShotTestDoc2Vec.test_file_processing(self, test_file_path)
     ngrams = ex_parsing.ngrams_from_file(test_file_path,
                                          self.context_size,
                                          tagged=True)
     sentences = [util.sentence_from_tagged_ngram(t) for t in ngrams]
     # logging.info(ngrams)
     # logging.info(sentences)
     self.context_sized_test_wv_dict = self.context_vector_to_dict_by_list(
         self.context_vec_model, sentences)
コード例 #3
0
 def test_file_processing(self, test_file_path):
     super().test_file_processing(test_file_path)
     ngrams = ex_parsing.ngrams_from_file(test_file_path,
                                          self.context_size,
                                          tagged=True)
     sentences = [util.sentence_from_tagged_ngram(t) for t in ngrams]
     # logging.info(ngrams)
     logging.info("sentences: %d" % len(sentences))
     self.context_sized_test_wv_dict = self.doc_vector_to_dict_by_list(
         self.context_vec_model, sentences)
コード例 #4
0
ファイル: example_parsing.py プロジェクト: ling60/coies
def entity_dict_from_tagged_tokens(tagged_tokens):
    entity_dict = {}
    entity_tagged_words_dict = entity_tagged_words_dict_from_tagged_tokens(
        tagged_tokens)
    for entity, tagged_words in entity_tagged_words_dict.items():
        l = []
        for words in tagged_words:
            l.append(utils.sentence_from_tagged_ngram(words))
        entity_dict[entity] = l
    logging.info(entity_dict)
    return entity_dict
コード例 #5
0
    def score(self, key, tagged_gram, test_file_path, wv_dict, **kwargs):
        # tagged_gram: [['esafetyworld', 'comp'], ['inc', 'end']]
        # find ngrams in test file similar to example
        example_tagged_words_ngram_vecs = \
            self.example_tagged_words_contexts_dict[tagged_words_to_str(tagged_gram)]

        context_wv_dict, context_similarity_dict = make_context_dict(
            example_tagged_words_ngram_vecs, self.context_sized_test_wv_dict,
            wv_dict, self.topn * 2, self.context_threshold)

        gram = util.sentence_from_tagged_ngram(tagged_gram)
        return OneShotTestDoc2Vec.score(self, key, gram, test_file_path,
                                        context_wv_dict)
コード例 #6
0
def find_ngrams_by_tagged_words(tagged_ngrams, tagged_words, window_size=None):
    ngrams = []
    for tagged_ngram in tagged_ngrams:
        if window_size:
            window_size = int(window_size)
            t = filter(
                None,
                utils.flatten_list(tagged_ngram[window_size:-window_size]))
        else:
            t = filter(None, utils.flatten_list(tagged_ngram))
        w = filter(None, utils.flatten_list(tagged_words))
        if utils.is_sublist_of(w, t):
            ngrams.append(utils.sentence_from_tagged_ngram(tagged_ngram))
    return ngrams
コード例 #7
0
ファイル: example_parsing.py プロジェクト: ling60/coies
def str_1_to_n_grams_from_file(file_path, n=5, tagged=False):
    grams = []
    for i in range(1, n + 1):
        sequenced_ngrams = sequenced_ngrams_from_file(file_path,
                                                      i,
                                                      tagged=tagged)
        # print('print(sequenced_ngrams)')
        # print(sequenced_ngrams)
        if tagged:
            sequenced_ngrams[:] = [
                tuple(utils.sentence_from_tagged_ngram(u))
                for u in sequenced_ngrams
            ]
        # sequenced_ngrams = utils.sentence_from_tagged_ngram(sequenced_ngrams) if tagged else sequenced_ngrams
        grams.append([utils.iter_to_string(tu) for tu in sequenced_ngrams])
    return grams
コード例 #8
0
ファイル: example_parsing.py プロジェクト: ling60/coies
def m_to_n_grams_from_file(file_path, m=1, n=5, tagged=False):
    assert n >= m > 0
    grams = []
    for i in range(m, n + 1):
        sequenced_ngrams = sequenced_ngrams_from_file(file_path,
                                                      i,
                                                      tagged=tagged)
        # print('print(sequenced_ngrams)')
        # print(sequenced_ngrams)
        if tagged:
            sequenced_ngrams[:] = [
                tuple(utils.sentence_from_tagged_ngram(u))
                for u in sequenced_ngrams
            ]
        # sequenced_ngrams = utils.sentence_from_tagged_ngram(sequenced_ngrams) if tagged else sequenced_ngrams
        grams += sequenced_ngrams
    return grams
コード例 #9
0
    def score(self, key, tagged_gram, test_file_path, wv_dict, **kwargs):
        # tagged_gram: [['esafetyworld', 'comp'], ['inc', 'end']]
        # find ngrams in test file similar to example

        # similar_contexts = \
        #     similar_grams_by_doc_vecs(example_tagged_words_ngram_vecs, self.context_sized_test_wv_dict)
        context_similarity_dict = self.find_example_contexts(
            self.example_entity_dict, self.example_ngrams, test_file_path,
            self.context_size)
        context_wv_dict = util.subset_dict_by_list2(
            wv_dict, context_similarity_dict.keys())

        gram = util.sentence_from_tagged_ngram(tagged_gram)
        return super().score(key,
                             gram,
                             test_file_path,
                             context_wv_dict,
                             context_sim_dict=context_similarity_dict)