Ejemplo n.º 1
0
def abstract():
    my_server.textto4lang.abstract = True
    my_server.textto4lang.expand = True
    my_server.textto4lang.dep_to_4lang.lexicon.lexicon = {}
    r = request.json
    filehyp = '/home/adaamko/projects/4lang/data/' + 'abs_hypothesis' + '.sens'
    fileprem = '/home/adaamko/projects/4lang/data/' + 'abs_premise' + '.sens'
    hyp = r['hyp']
    prem = r['prem']
    return_sentences = {}
    hyp_pro = my_server.textto4lang.preprocess_text(hyp)
    prem_pro = my_server.textto4lang.preprocess_text(prem)
    with open(filehyp, 'w') as f:
        f.write(hyp_pro.encode("utf8"))
    with open(fileprem, 'w') as f:
        f.write(prem_pro.encode("utf8"))
    my_server.textto4lang.input_sens = filehyp
    machines_hyp = my_server.textto4lang.process_file(filehyp, 'abs_hyp')
    graph_hyp = MachineGraph.create_from_machines(machines_hyp[0].values())
    return_sentences['hyp'] = graph_hyp.to_dict()

    my_server.textto4lang.input_sens = fileprem
    machines_prem = my_server.textto4lang.process_file(fileprem, 'abs_prem')
    graph_prem = MachineGraph.create_from_machines(machines_prem[0].values())
    return_sentences['prem'] = graph_prem.to_dict()

    return jsonify(return_sentences)
Ejemplo n.º 2
0
    def __init__(self, machine1, machine2, max_depth):
        G1 = MachineGraph.create_from_machines([machine1], max_depth=max_depth)
        G2 = MachineGraph.create_from_machines([machine2], max_depth=max_depth)
        name1 = machine1.printname()
        name2 = machine2.printname()

        self.subgraph_dict = dict()
        # self.subgraph_dict.update(self._get_subgraph_N(G1.G, G2.G, name1, name2))
        # self.subgraph_dict.update(self._get_subgraph_N_X_N(G1.G, G2.G, name1, name2))
        self.subgraph_dict.update(self._get_subgraph_3_nodes(G1.G, G2.G, name1, name2))
Ejemplo n.º 3
0
    def __init__(self, machine1, machine2, max_depth):
        G1 = MachineGraph.create_from_machines([machine1], max_depth=max_depth)
        G2 = MachineGraph.create_from_machines([machine2], max_depth=max_depth)
        name1 = machine1.printname()
        name2 = machine2.printname()

        self.subgraph_dict = dict()
        # self.subgraph_dict.update(self._get_subgraph_N(G1.G, G2.G, name1, name2))
        self.subgraph_dict.update(
            self._get_subgraph_N_X_N(G1.G, G2.G, name1, name2))
Ejemplo n.º 4
0
 def draw_single_graph(self, word, path):
     clean_word = Machine.d_clean(word)
     for c, machine in enumerate(self.definitions[word]):
         graph = MachineGraph.create_from_machines([machine])
         file_name = os.path.join(path, '{0}_{1}.dot'.format(clean_word, c))
         with open(file_name, 'w') as file_obj:
             file_obj.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 5
0
    def run(self, sentence):
        """Parses a sentence, runs the spreading activation and returns the
        messages that have to be sent to the active plugins."""
        try:
            sp = SentenceParser()
            sa = SpreadingActivation(self.lexicon)
            machines = sp.parse(sentence)
            logging.debug('machines: {}'.format(machines))
            logging.debug('machines: {}'.format(
                [m for m in machines]))
            for machine_list in machines:
                for machine in machine_list:
                    if machine.control.kr['CAT'] == 'VERB':
                        logging.debug('adding verb construction for {}'.format(
                            machine))
                        self.lexicon.add_construction(VerbConstruction(
                            machine.printname(), self.lexicon, self.supp_dict))
            logging.info('constructions: {}'.format(
                self.lexicon.constructions))

            # results is a list of (url, data) tuples
            results = sa.activation_loop(machines)
            print 'results:', results
            print 'machines:', machines

            graph = MachineGraph.create_from_machines(
                [m[0] for m in machines], max_depth=1)
            f = open('machines.dot', 'w')
            f.write(graph.to_dot().encode('utf-8'))

            self.lexicon.clear_active()
        except Exception, e:
            import traceback
            traceback.print_exc(e)
            raise(e)
Ejemplo n.º 6
0
    def add_def_graph(self,
                      word,
                      word_machine,
                      dumped_def_graph,
                      allow_new_base=False,
                      allow_new_ext=False):
        node2machine = {}
        graph = MachineGraph.from_dict(dumped_def_graph)
        for node in graph.nodes_iter():
            pn = "_".join(node.split('_')[:-1])
            if pn == word:
                node2machine[node] = word_machine
            else:
                if not pn:
                    logging.warning(u"empty pn in node: {0}, word: {1}".format(
                        node, word))
                node2machine[node] = self.get_machine(pn, new_machine=True)

        for node1, adjacency in graph.adjacency_iter():
            machine1 = node2machine[node1]
            for node2, edges in adjacency.iteritems():
                machine2 = node2machine[node2]
                for i, attributes in edges.iteritems():
                    part_index = attributes['color']
                    machine1.append(machine2, part_index)
Ejemplo n.º 7
0
    def lemma_similarity(self, lemma1, lemma2, sim_type):
        if (lemma1, lemma2) in self.lemma_sim_cache:
            return self.lemma_sim_cache[(lemma1, lemma2)]
        elif lemma1 == lemma2:
            return 1
        self.log(u'lemma1: {0}, lemma2: {1}'.format(lemma1, lemma2))

        machines1 = self.wrapper.definitions[lemma1]
        machines2 = self.wrapper.definitions[lemma2]

        pairs_by_sim = sorted(
            [(self.machine_similarity(machine1, machine2, sim_type),
              (machine1, machine2)) for machine1 in machines1
             for machine2 in machines2],
            reverse=True)

        sim, (machine1, machine2) = pairs_by_sim[0]

        draw_graphs = True  # use with caution
        if draw_graphs and not self.wrapper.batch:
            graph = MachineGraph.create_from_machines([machine1, machine2
                                                       ])  # , max_depth=1)
            f = open('graphs/{0}_{1}.dot'.format(lemma1, lemma2), 'w')
            f.write(graph.to_dot().encode('utf-8'))

        sim = sim if sim >= 0 else 0
        self.lemma_sim_cache[(lemma1, lemma2)] = sim
        self.lemma_sim_cache[(lemma2, lemma1)] = sim
        return sim
Ejemplo n.º 8
0
    def run(self, sentence):
        """Parses a sentence, runs the spreading activation and returns the
        messages that have to be sent to the active plugins."""
        try:
            sp = SentenceParser()
            sa = SpreadingActivation(self.lexicon)
            machines = sp.parse(sentence)
            logging.debug('machines: {}'.format(machines))
            logging.debug('machines: {}'.format([m for m in machines]))
            for machine_list in machines:
                for machine in machine_list:
                    if machine.control.kr['CAT'] == 'VERB':
                        logging.debug(
                            'adding verb construction for {}'.format(machine))
                        self.lexicon.add_construction(
                            VerbConstruction(machine.printname(), self.lexicon,
                                             self.supp_dict))
            logging.info('constructions: {}'.format(
                self.lexicon.constructions))

            # results is a list of (url, data) tuples
            results = sa.activation_loop(machines)
            print 'results:', results
            print 'machines:', machines

            graph = MachineGraph.create_from_machines([m[0] for m in machines],
                                                      max_depth=1)
            f = open('machines.dot', 'w')
            f.write(graph.to_dot().encode('utf-8'))

            self.lexicon.clear_active()
        except Exception, e:
            import traceback
            traceback.print_exc(e)
            raise (e)
Ejemplo n.º 9
0
 def draw_single_graph(self, word, path):
     clean_word = Machine.d_clean(word)
     for c, machine in enumerate(self.definitions[word]):
         graph = MachineGraph.create_from_machines([machine])
         file_name = os.path.join(path, '{0}_{1}.dot'.format(clean_word, c))
         with open(file_name, 'w') as file_obj:
             file_obj.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 10
0
def print_4lang_graph(word, machine, graph_dir, max_depth=None):
    if machine is None:
        return
    graph = MachineGraph.create_from_machines([machine], max_depth=max_depth)
    fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8')
    with open(fn, 'w') as dot_obj:
        dot_obj.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 11
0
    def lemma_similarity(self, lemma1, lemma2, sim_type):
        if (lemma1, lemma2) in self.lemma_sim_cache:
            return self.lemma_sim_cache[(lemma1, lemma2)]
        elif lemma1 == lemma2:
            return 1
        self.log(u'lemma1: {0}, lemma2: {1}'.format(lemma1, lemma2))

        machines1 = self.wrapper.definitions[lemma1]
        machines2 = self.wrapper.definitions[lemma2]

        pairs_by_sim = sorted([
            (self.machine_similarity(machine1, machine2, sim_type),
             (machine1, machine2))
            for machine1 in machines1 for machine2 in machines2], reverse=True)

        sim, (machine1, machine2) = pairs_by_sim[0]

        draw_graphs = True  # use with caution
        if draw_graphs and not self.wrapper.batch:
            graph = MachineGraph.create_from_machines(
                [machine1, machine2])  # , max_depth=1)
            f = open('graphs/{0}_{1}.dot'.format(lemma1, lemma2), 'w')
            f.write(graph.to_dot().encode('utf-8'))

        sim = sim if sim >= 0 else 0
        self.lemma_sim_cache[(lemma1, lemma2)] = sim
        self.lemma_sim_cache[(lemma2, lemma1)] = sim
        return sim
Ejemplo n.º 12
0
def print_text_graph(words_to_machines, graph_dir, fn='text'):
    graph = MachineGraph.create_from_machines(
        words_to_machines.values())
    fn = os.path.join(graph_dir, '{0}.dot'.format(fn))
    with open(fn, 'w') as f:
        f.write(graph.to_dot().encode('utf-8'))
    return fn
Ejemplo n.º 13
0
def draw_text_graph(words_to_machines, out_dir, fn='text', orig_machines=[]):
    graph = MachineGraph.create_from_machines(words_to_machines.values(),
                                              orig_machines=orig_machines)
    src_str = graph.to_dot().encode('utf-8')
    src = graphviz.Source(src_str, format='png')
    pic_path = src.render(filename=fn, directory=out_dir)
    return pic_path
Ejemplo n.º 14
0
def draw_text_graph(
        words_to_machines, out_dir, fn='text', orig_machines=[]):
    graph = MachineGraph.create_from_machines(
        words_to_machines.values(), orig_machines=orig_machines)
    src_str = graph.to_dot().encode('utf-8')
    src = graphviz.Source(src_str, format='png')
    pic_path = src.render(filename=fn, directory=out_dir)
    return pic_path
Ejemplo n.º 15
0
 def process(self, text):
     preproc = TextTo4lang.preprocess_text(text)
     deps, corefs, parse_trees = self.parser_wrapper.parse_text(preproc)
     machines = self.dep_to_4lang.get_machines_from_deps_and_corefs(
         deps, corefs)
     # print machines
     self.dep_to_4lang.lexicon.expand(machines)
     graph = MachineGraph.create_from_machines(machines.values())
     print graph.to_dot()
Ejemplo n.º 16
0
def main():
    lex_fn, word = sys.argv[1:3]
    lex = Lexicon.load_from_binary(lex_fn)
    machines = lex.lexicon.get(word, lex.ext_lexicon.get(word))
    if machines is None:
        print '404 :('
    else:
        graph = MachineGraph.create_from_machines(machines)
        sys.stdout.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 17
0
def main():
    lex_fn, word = sys.argv[1:3]
    lex = Lexicon.load_from_binary(lex_fn)
    machines = lex.lexicon.get(word, lex.ext_lexicon.get(word))
    if machines is None:
        print '404 :('
    else:
        graph = MachineGraph.create_from_machines(machines)
        sys.stdout.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 18
0
def test_dep():
    print 'building wrapper...'
    w = Wrapper(sys.argv[1])
    for line in sys.stdin:
        w.add_dependency(line)

    active_machines = w.lexicon.active_machines()
    logging.debug('active machines: {}'.format(active_machines))
    graph = MachineGraph.create_from_machines(active_machines)
    f = open('machines.dot', 'w')
    f.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 19
0
def test_dep():
    print 'building wrapper...'
    w = Wrapper(sys.argv[1])
    for line in sys.stdin:
        w.add_dependency(line)

    active_machines = w.lexicon.active_machines()
    logging.debug('active machines: {}'.format(active_machines))
    graph = MachineGraph.create_from_machines(active_machines)
    f = open('machines.dot', 'w')
    f.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 20
0
 def draw_word_graphs(self):
     ensure_dir('graphs/words')
     for c, (word, machines) in enumerate(self.definitions.iteritems()):
         if c % 1000 == 0:
             logging.info("{0}...".format(c))
         for i, machine in enumerate(machines):
             graph = MachineGraph.create_from_machines([machine])
             clean_word = Machine.d_clean(word)
             if clean_word[0] == 'X':
                 clean_word = clean_word[1:]
             f = open('graphs/words/{0}_{1}.dot'.format(clean_word, i), 'w')
             f.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 21
0
 def draw_word_graphs(self):
     ensure_dir('graphs/words')
     for c, (word, machines) in enumerate(self.definitions.iteritems()):
         if c % 1000 == 0:
             logging.info("{0}...".format(c))
         for i, machine in enumerate(machines):
             graph = MachineGraph.create_from_machines([machine])
             clean_word = Machine.d_clean(word)
             if clean_word[0] == 'X':
                 clean_word = clean_word[1:]
             f = open('graphs/words/{0}_{1}.dot'.format(clean_word, i), 'w')
             f.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 22
0
 def run(self):
     logging.info('running QA...')
     input_file = self.cfg.get('qa', 'input_file')
     for entry in QAParser.parse_file(input_file):
         logging.info('processing text...')
         all_text = "\n".join([doc['text'] for doc in entry['docs']])
         model = self.text_to_4lang.process(
             all_text, dep_dir=self.dep_dir, fn='text')
         print_text_graph(model, self.graph_dir)
         model_graph = MachineGraph.create_from_machines(model.values())
         for question in entry['questions']:
             answer = self.answer_question(question, model, model_graph)
             print answer['text']
Ejemplo n.º 23
0
 def run(self):
     logging.info('running QA...')
     input_file = self.cfg.get('qa', 'input_file')
     for entry in QAParser.parse_file(input_file):
         logging.info('processing text...')
         all_text = "\n".join([doc['text'] for doc in entry['docs']])
         model = self.text_to_4lang.process(all_text,
                                            dep_dir=self.dep_dir,
                                            fn='text')
         print_text_graph(model, self.graph_dir)
         model_graph = MachineGraph.create_from_machines(model.values())
         for question in entry['questions']:
             answer = self.answer_question(question, model, model_graph)
             print answer['text']
Ejemplo n.º 24
0
def wikidata():
    my_server.textto4lang.abstract = False
    my_server.textto4lang.expand = True
    #    my_server.textto4lang.dep_to_4lang.lexicon.lexicon = {}
    r = request.json
    word = '/home/adaamko/AACS18/4lang/data/' + 'wikidata' + '.sens'
    hyp = r['word']
    return_sentences = {}
    hyp_pro = my_server.textto4lang.preprocess_text(hyp)
    with open(word, 'w+') as f:
        f.write(hyp_pro.encode("utf8"))
    my_server.textto4lang.input_sens = word
    machines_hyp = my_server.textto4lang.process_file(word, 'wikidata')
    graph_hyp = MachineGraph.create_from_machines(machines_hyp[0].values())
    return_sentences['word'] = graph_hyp.to_dict()
    return_sentences['lem'] = my_server.textto4lang.get_lem_machine(word)
    return jsonify(return_sentences)
Ejemplo n.º 25
0
    def add_def_graph(self, word, word_machine, dumped_def_graph,
                      allow_new_base=False, allow_new_ext=False):
        node2machine = {}
        graph = MachineGraph.from_dict(dumped_def_graph)
        for node in graph.nodes_iter():
            pn = "_".join(node.split('_')[:-1])
            if pn == word:
                node2machine[node] = word_machine
            else:
                if not pn:
                    logging.warning(u"empty pn in node: {0}, word: {1}".format(
                        node, word))
                node2machine[node] = self.get_machine(pn, new_machine=True)

        for node1, adjacency in graph.adjacency_iter():
            machine1 = node2machine[node1]
            for node2, edges in adjacency.iteritems():
                machine2 = node2machine[node2]
                for i, attributes in edges.iteritems():
                    part_index = attributes['color']
                    machine1.append(machine2, part_index)
Ejemplo n.º 26
0
    def add_edges(self, word2machine):
        g = MachineGraph.create_from_machines(word2machine.values())
        g.do_closure()
        binaries = defaultdict(lambda: [set(), set()])
        for n1, n2, edata in g.G.edges(data=True):
            n1_index = self.get_w_index(n1.split('_')[0])
            n2_index = self.get_w_index(n2.split('_')[0])
            if edata['color'] == 0:
                self.add_edge(0, n1_index, n2_index)
            else:
                self.add_binary(n1.split('_')[0])
                if edata['color'] == 1:
                    binaries[n1_index][0].add(n2_index)
                elif edata['color'] == 2:
                    binaries[n1_index][1].add(n2_index)
                else:
                    assert False

        for bin_index, (subjs, objs) in binaries.iteritems():
            for subj_index in subjs:
                for obj_index in objs:
                    self.add_edge(bin_index, subj_index, obj_index)
Ejemplo n.º 27
0
 def score_answer(self, answer, model, model_graph):
     answer_graph = MachineGraph.create_from_machines(
         answer['machines'].values())
     answer['score'], answer['evidence'] = GraphSimilarity.supported_score(
         answer_graph, model_graph)
Ejemplo n.º 28
0
    def get_full_graph(self, fullgraph_options):
        if self.full_graph is not None:
            return self.full_graph
        allwords = set()
        allwords.update(self.lexicon.keys(), self.ext_lexicon.keys(),
                        self.oov_lexicon.keys())
        self.full_graph = nx.MultiDiGraph()

        excluded_words = set()

        # get excluded words set
        with open(fullgraph_options.freq_file) as f:
            for line_no, line in enumerate(f):
                fields = line.strip().decode('utf-8').split('\t')
                freq = int(fields[0])
                word = fields[1]
                if line_no > fullgraph_options.freq_cnt and (
                        fullgraph_options.freq_val == 0
                        or fullgraph_options.freq_val > freq):
                    break
                excluded_words.add(word)

        machinegraph_options = MachineGraphOptions(
            fullgraph_options=fullgraph_options)

        # TODO: only for debugging
        # until = 10
        for i, word in enumerate(allwords):
            # TODO: only for debugging
            # if word not in ['dumb', 'intelligent', 'stupid']:
            #     continue
            # if i > until:
            #     break

            machine = self.get_machine(word)
            MG = MachineGraph.create_from_machines(
                [machine], machinegraph_options=machinegraph_options)
            # TODO: maybe directed is better
            G = MG.G.to_undirected()

            # TODO: to print out all graphs
            # try:
            #     fn = os.path.join(
            #   '/home/eszter/projects/4lang/data/graphs/allwords',
            #   u"{0}.dot".format(word)).encode('utf-8')
            #     with open(fn, 'w') as dot_obj:
            #         dot_obj.write(MG.to_dot_str_graph().encode('utf-8'))
            # except:
            #     print "EXCEPTION: " + word

            # TODO: words to test have nodes
            # if 'other' in G.nodes() and 'car' in G.nodes():
            #     print word
            #
            # if word == 'merry-go-round' or word == 'Klaxon':
            #     print G.edges()

            self.full_graph.add_edges_from(G.edges(data=True))

            # TODO: only for debugging
            # MG.G = self.full_graph
            # fn = os.path.join(
            #   '/home/eszter/projects/4lang/test/graphs/full_graph',
            #   u"{0}.dot".format(i)).encode('utf-8')
            # with open(fn, 'w') as dot_obj:
            #     dot_obj.write(MG.to_dot_str_graph().encode('utf-8'))

        for word in excluded_words:
            if self.full_graph.has_node(word):
                self.full_graph.remove_node(word)

        return self.full_graph
Ejemplo n.º 29
0
 def dump_definition_graph(machine, seen=set()):
     graph = MachineGraph.create_from_machines([machine])
     return graph.to_dict()
Ejemplo n.º 30
0
def print_4lang_graph(word, machine, graph_dir, max_depth=None):
    graph = MachineGraph.create_from_machines([machine], max_depth=max_depth)
    fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8')
    with open(fn, 'w') as dot_obj:
        dot_obj.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 31
0
def print_text_graph(words_to_machines, graph_dir, fn='text'):
    graph = MachineGraph.create_from_machines(words_to_machines.values())
    fn = os.path.join(graph_dir, '{0}.dot'.format(fn))
    with open(fn, 'w') as f:
        f.write(graph.to_dot().encode('utf-8'))
    return fn
Ejemplo n.º 32
0
    def fullgraph(self, name1, name2, machine1, machine2):
        ####################
        # Only for calculating shortest path
        ####################
        if self.calc_path:
            logging.debug('name1 = {0}, name2 = {1}'.format(name1, name2))

            length = 0
            active_graph = None
            unified_machine = None
            if self.expand_path:
                logging.debug("calc active graph")
                active_graph = MachineGraph.create_from_machines(
                    [machine1], machinegraph_options=self.machinegraph_options
                ).G.to_undirected()
                G2 = MachineGraph.create_from_machines(
                    [machine2], machinegraph_options=self.machinegraph_options
                ).G.to_undirected()
                active_graph.add_edges_from(G2.edges(data=True))

                # TODO: e.g. "take" is empty
                if name1 not in active_graph.nodes() or name2 not in G2.nodes(
                ):
                    return {"shortest_path": length}

                i = 0
                while not nx.has_path(active_graph, name1, name2):
                    if i > 5:
                        return {"shortest_path": length}
                    self.lexicon.expand_definition(machine1)
                    self.lexicon.expand_definition(machine2)
                    active_graph = MachineGraph.create_from_machines(
                        [machine1],
                        machinegraph_options=self.machinegraph_options
                    ).G.to_undirected()
                    G2 = MachineGraph.create_from_machines(
                        [machine2],
                        machinegraph_options=self.machinegraph_options
                    ).G.to_undirected()
                    active_graph.add_edges_from(G2.edges(data=True))
                    i += 1
            else:
                active_graph = self.UG

            if name1 not in active_graph.nodes(
            ) or name2 not in active_graph.nodes():
                return {"shortest_path": length}
            if nx.has_path(active_graph, name1, name2):
                path = nx.shortest_path(active_graph,
                                        name1,
                                        name2,
                                        weight='weight')
                if self.fullgraph_options.weighted == True:
                    length = nx.shortest_path_length(active_graph,
                                                     name1,
                                                     name2,
                                                     weight='weight')
                else:
                    length = len(path)
                print "PATH: " + name1 + " " + name2
                print path
                print length
                self.shortest_path_res.write("\t".join(path))
                self.shortest_path_res.write("\n")
            else:
                logging.info("path does not exist between {0} and {1}".format(
                    name1, name2))
                self.no_path_cnt += 1
        else:
            length = self.lexicon.get_shortest_path(
                name1, name2, self.shortest_path_file_name)
        return {"shortest_path": length}
Ejemplo n.º 33
0
    def get_full_graph(self, fullgraph_options):
        if self.full_graph is not None:
            return self.full_graph
        allwords = set()
        allwords.update(
            self.lexicon.keys(), self.ext_lexicon.keys(),
            self.oov_lexicon.keys())
        self.full_graph = nx.MultiDiGraph()

        excluded_words = set()

        # get excluded words set
        with open(fullgraph_options.freq_file) as f:
            for line_no, line in enumerate(f):
                fields = line.strip().decode('utf-8').split('\t')
                freq = int(fields[0])
                word = fields[1]
                if line_no > fullgraph_options.freq_cnt and (
                        fullgraph_options.freq_val == 0 or
                        fullgraph_options.freq_val > freq):
                    break
                excluded_words.add(word)

        machinegraph_options = MachineGraphOptions(
            fullgraph_options=fullgraph_options)

        # TODO: only for debugging
        # until = 10
        for i, word in enumerate(allwords):
            # TODO: only for debugging
            # if word not in ['dumb', 'intelligent', 'stupid']:
            #     continue
            # if i > until:
            #     break

            machine = self.get_machine(word)
            MG = MachineGraph.create_from_machines(
                [machine], machinegraph_options=machinegraph_options)
            # TODO: maybe directed is better
            G = MG.G.to_undirected()

            # TODO: to print out all graphs
            # try:
            #     fn = os.path.join(
            #   '/home/eszter/projects/4lang/data/graphs/allwords',
            #   u"{0}.dot".format(word)).encode('utf-8')
            #     with open(fn, 'w') as dot_obj:
            #         dot_obj.write(MG.to_dot_str_graph().encode('utf-8'))
            # except:
            #     print "EXCEPTION: " + word

            # TODO: words to test have nodes
            # if 'other' in G.nodes() and 'car' in G.nodes():
            #     print word
            #
            # if word == 'merry-go-round' or word == 'Klaxon':
            #     print G.edges()

            self.full_graph.add_edges_from(G.edges(data=True))

            # TODO: only for debugging
            # MG.G = self.full_graph
            # fn = os.path.join(
            #   '/home/eszter/projects/4lang/test/graphs/full_graph',
            #   u"{0}.dot".format(i)).encode('utf-8')
            # with open(fn, 'w') as dot_obj:
            #     dot_obj.write(MG.to_dot_str_graph().encode('utf-8'))

        for word in excluded_words:
            if self.full_graph.has_node(word):
                self.full_graph.remove_node(word)

        return self.full_graph
Ejemplo n.º 34
0
import sys

from pymachine.utils import MachineGraph

from fourlang.lexicon import Lexicon

lexicon = Lexicon.load_from_binary(sys.argv[1])
total = 0
total_size = 0
smallest = 999
largest = 0
for word, machines in lexicon.ext_lexicon.iteritems():
    machine = next(iter(machines))
    graph = MachineGraph.create_from_machines([machine])
    size = len(graph.G) - 1
    if size < 1:
        continue
    total += 1
    total_size += size
    smallest = min(smallest, size)
    largest = max(largest, size)

print 'processed {0} graphs'.format(total)
print 'average size: {0} nodes'.format(total_size/float(total))
print 'smallest: {0}, largest: {1}'.format(smallest, largest)
Ejemplo n.º 35
0
 def dump_definition_graph(machine, seen=set()):
     graph = MachineGraph.create_from_machines([machine])
     return graph.to_dict()
Ejemplo n.º 36
0
 def score_answer(self, answer, model, model_graph):
     answer_graph = MachineGraph.create_from_machines(
         answer['machines'].values())
     answer['score'], answer['evidence'] = GraphSimilarity.supported_score(
         answer_graph, model_graph)
Ejemplo n.º 37
0
def print_4lang_graph(word, machine, graph_dir):
    graph = MachineGraph.create_from_machines([machine])
    fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8')
    with open(fn, 'w') as dot_obj:
        dot_obj.write(graph.to_dot().encode('utf-8'))
Ejemplo n.º 38
0
    def expand(self,
               words_to_machines,
               stopwords=[],
               cached=False,
               abstract=False):
        if len(stopwords) == 0:
            stopwords = self.stopwords
        machines_to_append = []
        for lemma, machine in words_to_machines.iteritems():
            if ((not cached or lemma not in self.expanded)
                    and lemma in self.known_words()
                    and lemma not in stopwords):

                # deepcopy so that the version in the lexicon keeps its links
                definition = self.get_machine(lemma)

                copied_def = copy.deepcopy(definition)
                print("machine: " + str(machine))
                print("defintion: " + str(definition))
                if abstract is True:
                    part_one = False
                    part_two = False
                    if len(copied_def.partitions[1]) > 0:
                        if len(machine.partitions[1]) > 0:
                            part_one = True
                            print("machine partitions 1:")
                            for i in machine.partitions[1]:
                                print(i)
                                for j in copied_def.partitions[1]:
                                    for k in range(0, 3):
                                        for m in j.partitions[k]:
                                            i.append(m, k)
                                    for p in j.parents:
                                        i.append(p[0], p[1])

                    if len(copied_def.partitions[2]) > 0:
                        if len(machine.partitions[2]) > 0:
                            part_two = True
                            print("machine partitions 2:")
                            for i in machine.partitions[2]:
                                for j in copied_def.partitions[2]:
                                    print(j)
                                    for k in range(0, 3):
                                        for m in j.partitions[k]:
                                            i.append(m, k)
                                    for p in j.parents:
                                        i.append(p[0], p[1])

                    machine_for_replace = None
                    def_parents = [
                        parent for parent in copied_def.parents
                        if parent[1] == 0
                    ]

                    if len(copied_def.partitions[0]) > 0:
                        machine_for_replace = copied_def.partitions[0][0]
                    elif len(def_parents) > 0:
                        machine_for_replace = def_parents[0][0]
                    if machine_for_replace is not None:
                        for m in machine_for_replace.parents.copy():
                            if m[0].printname().startswith(lemma):
                                machine_for_replace.parents.remove(m)

                        for i in machine.parents.copy():
                            i[0].remove(machine, i[1])
                            i[0].append(machine_for_replace, i[1])

                        for i in range(0, 3):
                            for m in machine.partitions[i]:
                                try:
                                    machine.remove(m, i)
                                except KeyError:
                                    pass
                                machine_for_replace.append(m, i)
                        machines_to_append.append(machine_for_replace)
                    if machine_for_replace is None and part_one is False and part_two is False:
                        pdb.set_trace()
                        machine_graph = [
                            m for m in MachineTraverser.get_nodes(
                                machine, names_only=False, keep_upper=True)
                        ]
                        def_graph = [
                            m for m in MachineTraverser.get_nodes(
                                copied_def, names_only=False, keep_upper=True)
                        ]
                        g1 = MachineGraph.create_from_machines(machine_graph)
                        g2 = MachineGraph.create_from_machines(def_graph)
                        print("rossz machine: " + str(machine))
                        print("Definicio: " + str(copied_def))
                        print("Machine")
                        print(g1.to_dot())
                        print("Definicio")
                        print(g2.to_dot())
                        machine.unify(copied_def, exclude_0_case=True)
                else:
                    machine.unify(copied_def, exclude_0_case=True)

                #machine_for_replace.parents.remove((machine, 0))
                '''
                print("machine for replace childs")
                for i in range(0,3):
                   for m in machine_for_replace.partitions[i]:
                        print(m)
                        print(i)
                '''
                '''
                helpmachine = [
                    m for m in MachineTraverser.get_nodes(
                        copied_def, names_only=False, keep_upper=True)
                    ]
                '''
                """
                for parent, i in list(definition.parents):
                    copied_parent = copy.deepcopy(parent)
                    for m in list(copied_parent.partitions[i]):
                        if m.printname() == lemma:
                            copied_parent.remove(m, i)
                            break
                    else:
                        raise Exception()
                        # "can't find {0} in partition {1} of {2}: {3}".format(
                        # ))
                    copied_parent.append(copied_def, i)
                """

                case_machines = [
                    m for m in MachineTraverser.get_nodes(
                        copied_def, names_only=False, keep_upper=True)
                    if m.printname().startswith('=')
                ]

                #machine.unify(copied_def, exclude_0_case=True)
                for cm in case_machines:
                    if cm.printname() == "=AGT":
                        if machine.partitions[1]:
                            machine.partitions[1][0].unify(cm)
                    if cm.printname() == "=PAT":
                        if machine.partitions[2]:
                            machine.partitions[2][0].unify(cm)
                #for j in machine_for_replace.parents:
                #    print(j)
                self.expanded.add(lemma)
        for m in machines_to_append:
            words_to_machines[m.printname()] = m
Ejemplo n.º 39
0
    def fullgraph(self, name1, name2, machine1, machine2):
        ####################
        # Only for calculating shortest path
        ####################
        if self.calc_path:
            logging.debug('name1 = {0}, name2 = {1}'.format(name1, name2))

            length = 0
            active_graph = None
            unified_machine = None
            if self.expand_path:
                logging.debug("calc active graph")
                active_graph = MachineGraph.create_from_machines(
                    [machine1], machinegraph_options=self.machinegraph_options).G.to_undirected()
                G2 = MachineGraph.create_from_machines(
                    [machine2], machinegraph_options=self.machinegraph_options).G.to_undirected()
                active_graph.add_edges_from(G2.edges(data=True))
                for word in self.excluded_words:
                    if active_graph.has_node(word) and name1 != word and name2 != word:
                        active_graph.remove_node(word)

                # TODO: e.g. "take" is empty
                if name1 not in active_graph.nodes() or name2 not in G2.nodes():
                    return {"shortest_path": length}

                i = 0
                if self.debug_graph:
                    filename = 'test/temp_graphs/{0}_{1}_{2}.dot'.format(name1, name2, i)
                    nx.drawing.nx_agraph.write_dot(active_graph, filename)

                while not nx.has_path(active_graph, name1, name2):
                    if i > 5:
                        return {"shortest_path": length}
                    self.lexicon.expand_definition(machine1, self.stopwords)
                    self.lexicon.expand_definition(machine2, self.stopwords)
                    active_graph = MachineGraph.create_from_machines(
                        [machine1], machinegraph_options=self.machinegraph_options).G.to_undirected()
                    G2 = MachineGraph.create_from_machines(
                        [machine2], machinegraph_options=self.machinegraph_options).G.to_undirected()
                    active_graph.add_edges_from(G2.edges(data=True))
                    for word in self.excluded_words:
                        if active_graph.has_node(word) and name1 != word and name2 != word:
                            active_graph.remove_node(word)
                    i += 1
                    if self.debug_graph:
                        filename = 'test/temp_graphs/{0}_{1}_{2}.dot'.format(name1, name2, i)
                        nx.drawing.nx_agraph.write_dot(active_graph, filename)

            else:
                active_graph = self.UG

            if name1 not in active_graph.nodes() or name2 not in active_graph.nodes():
                return {"shortest_path" : length}
            if nx.has_path(active_graph, name1, name2):
                if self.node_weights:
                    old_graph = active_graph
                    active_graph = self._transform_node_weights_to_edge_weights(old_graph)
                path = nx.shortest_path(active_graph, name1, name2, weight='weight')
                if self.fullgraph_options.embedding_weighted:
                    length = nx.shortest_path_length(active_graph, name1, name2, weight='weight')
                elif self.node_weights:
                    for w in path:
                        length += self.node_freqs[w]
                    length = length - self.node_freqs[name1] - self.node_freqs[name2]
                else:
                    length = len(path)
                print "PATH: " + name1 + " " + name2
                print path
                print length
                self.shortest_path_res.write("\t".join(path))
                self.shortest_path_res.write("\n")
            else:
                logging.info("path does not exist between {0} and {1}".format(name1, name2))
                self.no_path_cnt += 1
        else:
            length = self.lexicon.get_shortest_path(name1, name2, self.shortest_path_file_name)
        # if length != 0:
        #     length = 1.0 / length
        # else:
        #     length = 1.0
        return {"shortest_path" : length}