Example #1
0
 def reset_lexicon(self, load_from=None, save_to=None):
     if load_from:
         self.lexicon = cPickle.load(open(load_from))
     else:
         self.lexicon = Lexicon()
         self.__add_definitions()
         self.__add_constructions()
     if save_to:
         cPickle.dump(self.lexicon, open(save_to, 'w'))
Example #2
0
    def get_machines_from_deps_and_corefs(self, dep_lists, corefs):
        coref_index = defaultdict(dict)
        for (word, sen_no), mentions in corefs:
            for m_word, m_sen_no in mentions:
                coref_index[m_word][m_sen_no - 1] = word

        # logging.info('coref index: {0}'.format(coref_index))

        lexicon = Lexicon()
        word2machine = {}

        for i, deps in enumerate(dep_lists):
            try:
                for dep, (word1, id1), (word2, id2) in deps:
                    # logging.info('w1: {0}, w2: {1}'.format(word1, word2))
                    c_word1 = coref_index[word1].get(i, word1)
                    c_word2 = coref_index[word2].get(i, word2)
                    """
                    if c_word1 != word1:
                        logging.warning(
                            "unifying '{0}' with canonical '{1}'".format(
                                word1, c_word1))
                    if c_word2 != word2:
                        logging.warning(
                            "unifying '{0}' with canonical '{1}'".format(
                                word2, c_word2))
                    """

                    # logging.info(
                    #    'cw1: {0}, cw2: {1}'.format(c_word1, c_word2))
                    lemma1 = self.lemmatizer.lemmatize(c_word1)
                    lemma2 = self.lemmatizer.lemmatize(c_word2)

                    lemma1 = c_word1 if not lemma1 else lemma1
                    lemma2 = c_word2 if not lemma2 else lemma2

                    # TODO
                    lemma1 = lemma1.replace('/', '_PER_')
                    lemma2 = lemma2.replace('/', '_PER_')

                    # logging.info(
                    #     'lemma1: {0}, lemma2: {1}'.format(lemma1, lemma2))
                    machine1, machine2 = self._add_dependency(
                        dep, (lemma1, id1), (lemma2, id2), lexicon)

                    word2machine[lemma1] = machine1
                    word2machine[lemma2] = machine2
            except:
                logging.error("failure on dep: {0}({1}, {2})".format(
                    dep, word1, word2))
                traceback.print_exc()
                raise Exception("adding dependencies failed")

        return word2machine