def abstract(): my_server.textto4lang.abstract = True my_server.textto4lang.expand = True my_server.textto4lang.dep_to_4lang.lexicon.lexicon = {} r = request.json filehyp = '/home/adaamko/projects/4lang/data/' + 'abs_hypothesis' + '.sens' fileprem = '/home/adaamko/projects/4lang/data/' + 'abs_premise' + '.sens' hyp = r['hyp'] prem = r['prem'] return_sentences = {} hyp_pro = my_server.textto4lang.preprocess_text(hyp) prem_pro = my_server.textto4lang.preprocess_text(prem) with open(filehyp, 'w') as f: f.write(hyp_pro.encode("utf8")) with open(fileprem, 'w') as f: f.write(prem_pro.encode("utf8")) my_server.textto4lang.input_sens = filehyp machines_hyp = my_server.textto4lang.process_file(filehyp, 'abs_hyp') graph_hyp = MachineGraph.create_from_machines(machines_hyp[0].values()) return_sentences['hyp'] = graph_hyp.to_dict() my_server.textto4lang.input_sens = fileprem machines_prem = my_server.textto4lang.process_file(fileprem, 'abs_prem') graph_prem = MachineGraph.create_from_machines(machines_prem[0].values()) return_sentences['prem'] = graph_prem.to_dict() return jsonify(return_sentences)
def __init__(self, machine1, machine2, max_depth): G1 = MachineGraph.create_from_machines([machine1], max_depth=max_depth) G2 = MachineGraph.create_from_machines([machine2], max_depth=max_depth) name1 = machine1.printname() name2 = machine2.printname() self.subgraph_dict = dict() # self.subgraph_dict.update(self._get_subgraph_N(G1.G, G2.G, name1, name2)) # self.subgraph_dict.update(self._get_subgraph_N_X_N(G1.G, G2.G, name1, name2)) self.subgraph_dict.update(self._get_subgraph_3_nodes(G1.G, G2.G, name1, name2))
def __init__(self, machine1, machine2, max_depth): G1 = MachineGraph.create_from_machines([machine1], max_depth=max_depth) G2 = MachineGraph.create_from_machines([machine2], max_depth=max_depth) name1 = machine1.printname() name2 = machine2.printname() self.subgraph_dict = dict() # self.subgraph_dict.update(self._get_subgraph_N(G1.G, G2.G, name1, name2)) self.subgraph_dict.update( self._get_subgraph_N_X_N(G1.G, G2.G, name1, name2))
def draw_single_graph(self, word, path): clean_word = Machine.d_clean(word) for c, machine in enumerate(self.definitions[word]): graph = MachineGraph.create_from_machines([machine]) file_name = os.path.join(path, '{0}_{1}.dot'.format(clean_word, c)) with open(file_name, 'w') as file_obj: file_obj.write(graph.to_dot().encode('utf-8'))
def run(self, sentence): """Parses a sentence, runs the spreading activation and returns the messages that have to be sent to the active plugins.""" try: sp = SentenceParser() sa = SpreadingActivation(self.lexicon) machines = sp.parse(sentence) logging.debug('machines: {}'.format(machines)) logging.debug('machines: {}'.format( [m for m in machines])) for machine_list in machines: for machine in machine_list: if machine.control.kr['CAT'] == 'VERB': logging.debug('adding verb construction for {}'.format( machine)) self.lexicon.add_construction(VerbConstruction( machine.printname(), self.lexicon, self.supp_dict)) logging.info('constructions: {}'.format( self.lexicon.constructions)) # results is a list of (url, data) tuples results = sa.activation_loop(machines) print 'results:', results print 'machines:', machines graph = MachineGraph.create_from_machines( [m[0] for m in machines], max_depth=1) f = open('machines.dot', 'w') f.write(graph.to_dot().encode('utf-8')) self.lexicon.clear_active() except Exception, e: import traceback traceback.print_exc(e) raise(e)
def add_def_graph(self, word, word_machine, dumped_def_graph, allow_new_base=False, allow_new_ext=False): node2machine = {} graph = MachineGraph.from_dict(dumped_def_graph) for node in graph.nodes_iter(): pn = "_".join(node.split('_')[:-1]) if pn == word: node2machine[node] = word_machine else: if not pn: logging.warning(u"empty pn in node: {0}, word: {1}".format( node, word)) node2machine[node] = self.get_machine(pn, new_machine=True) for node1, adjacency in graph.adjacency_iter(): machine1 = node2machine[node1] for node2, edges in adjacency.iteritems(): machine2 = node2machine[node2] for i, attributes in edges.iteritems(): part_index = attributes['color'] machine1.append(machine2, part_index)
def lemma_similarity(self, lemma1, lemma2, sim_type): if (lemma1, lemma2) in self.lemma_sim_cache: return self.lemma_sim_cache[(lemma1, lemma2)] elif lemma1 == lemma2: return 1 self.log(u'lemma1: {0}, lemma2: {1}'.format(lemma1, lemma2)) machines1 = self.wrapper.definitions[lemma1] machines2 = self.wrapper.definitions[lemma2] pairs_by_sim = sorted( [(self.machine_similarity(machine1, machine2, sim_type), (machine1, machine2)) for machine1 in machines1 for machine2 in machines2], reverse=True) sim, (machine1, machine2) = pairs_by_sim[0] draw_graphs = True # use with caution if draw_graphs and not self.wrapper.batch: graph = MachineGraph.create_from_machines([machine1, machine2 ]) # , max_depth=1) f = open('graphs/{0}_{1}.dot'.format(lemma1, lemma2), 'w') f.write(graph.to_dot().encode('utf-8')) sim = sim if sim >= 0 else 0 self.lemma_sim_cache[(lemma1, lemma2)] = sim self.lemma_sim_cache[(lemma2, lemma1)] = sim return sim
def run(self, sentence): """Parses a sentence, runs the spreading activation and returns the messages that have to be sent to the active plugins.""" try: sp = SentenceParser() sa = SpreadingActivation(self.lexicon) machines = sp.parse(sentence) logging.debug('machines: {}'.format(machines)) logging.debug('machines: {}'.format([m for m in machines])) for machine_list in machines: for machine in machine_list: if machine.control.kr['CAT'] == 'VERB': logging.debug( 'adding verb construction for {}'.format(machine)) self.lexicon.add_construction( VerbConstruction(machine.printname(), self.lexicon, self.supp_dict)) logging.info('constructions: {}'.format( self.lexicon.constructions)) # results is a list of (url, data) tuples results = sa.activation_loop(machines) print 'results:', results print 'machines:', machines graph = MachineGraph.create_from_machines([m[0] for m in machines], max_depth=1) f = open('machines.dot', 'w') f.write(graph.to_dot().encode('utf-8')) self.lexicon.clear_active() except Exception, e: import traceback traceback.print_exc(e) raise (e)
def print_4lang_graph(word, machine, graph_dir, max_depth=None): if machine is None: return graph = MachineGraph.create_from_machines([machine], max_depth=max_depth) fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8') with open(fn, 'w') as dot_obj: dot_obj.write(graph.to_dot().encode('utf-8'))
def lemma_similarity(self, lemma1, lemma2, sim_type): if (lemma1, lemma2) in self.lemma_sim_cache: return self.lemma_sim_cache[(lemma1, lemma2)] elif lemma1 == lemma2: return 1 self.log(u'lemma1: {0}, lemma2: {1}'.format(lemma1, lemma2)) machines1 = self.wrapper.definitions[lemma1] machines2 = self.wrapper.definitions[lemma2] pairs_by_sim = sorted([ (self.machine_similarity(machine1, machine2, sim_type), (machine1, machine2)) for machine1 in machines1 for machine2 in machines2], reverse=True) sim, (machine1, machine2) = pairs_by_sim[0] draw_graphs = True # use with caution if draw_graphs and not self.wrapper.batch: graph = MachineGraph.create_from_machines( [machine1, machine2]) # , max_depth=1) f = open('graphs/{0}_{1}.dot'.format(lemma1, lemma2), 'w') f.write(graph.to_dot().encode('utf-8')) sim = sim if sim >= 0 else 0 self.lemma_sim_cache[(lemma1, lemma2)] = sim self.lemma_sim_cache[(lemma2, lemma1)] = sim return sim
def print_text_graph(words_to_machines, graph_dir, fn='text'): graph = MachineGraph.create_from_machines( words_to_machines.values()) fn = os.path.join(graph_dir, '{0}.dot'.format(fn)) with open(fn, 'w') as f: f.write(graph.to_dot().encode('utf-8')) return fn
def draw_text_graph(words_to_machines, out_dir, fn='text', orig_machines=[]): graph = MachineGraph.create_from_machines(words_to_machines.values(), orig_machines=orig_machines) src_str = graph.to_dot().encode('utf-8') src = graphviz.Source(src_str, format='png') pic_path = src.render(filename=fn, directory=out_dir) return pic_path
def draw_text_graph( words_to_machines, out_dir, fn='text', orig_machines=[]): graph = MachineGraph.create_from_machines( words_to_machines.values(), orig_machines=orig_machines) src_str = graph.to_dot().encode('utf-8') src = graphviz.Source(src_str, format='png') pic_path = src.render(filename=fn, directory=out_dir) return pic_path
def process(self, text): preproc = TextTo4lang.preprocess_text(text) deps, corefs, parse_trees = self.parser_wrapper.parse_text(preproc) machines = self.dep_to_4lang.get_machines_from_deps_and_corefs( deps, corefs) # print machines self.dep_to_4lang.lexicon.expand(machines) graph = MachineGraph.create_from_machines(machines.values()) print graph.to_dot()
def main(): lex_fn, word = sys.argv[1:3] lex = Lexicon.load_from_binary(lex_fn) machines = lex.lexicon.get(word, lex.ext_lexicon.get(word)) if machines is None: print '404 :(' else: graph = MachineGraph.create_from_machines(machines) sys.stdout.write(graph.to_dot().encode('utf-8'))
def test_dep(): print 'building wrapper...' w = Wrapper(sys.argv[1]) for line in sys.stdin: w.add_dependency(line) active_machines = w.lexicon.active_machines() logging.debug('active machines: {}'.format(active_machines)) graph = MachineGraph.create_from_machines(active_machines) f = open('machines.dot', 'w') f.write(graph.to_dot().encode('utf-8'))
def draw_word_graphs(self): ensure_dir('graphs/words') for c, (word, machines) in enumerate(self.definitions.iteritems()): if c % 1000 == 0: logging.info("{0}...".format(c)) for i, machine in enumerate(machines): graph = MachineGraph.create_from_machines([machine]) clean_word = Machine.d_clean(word) if clean_word[0] == 'X': clean_word = clean_word[1:] f = open('graphs/words/{0}_{1}.dot'.format(clean_word, i), 'w') f.write(graph.to_dot().encode('utf-8'))
def run(self): logging.info('running QA...') input_file = self.cfg.get('qa', 'input_file') for entry in QAParser.parse_file(input_file): logging.info('processing text...') all_text = "\n".join([doc['text'] for doc in entry['docs']]) model = self.text_to_4lang.process( all_text, dep_dir=self.dep_dir, fn='text') print_text_graph(model, self.graph_dir) model_graph = MachineGraph.create_from_machines(model.values()) for question in entry['questions']: answer = self.answer_question(question, model, model_graph) print answer['text']
def run(self): logging.info('running QA...') input_file = self.cfg.get('qa', 'input_file') for entry in QAParser.parse_file(input_file): logging.info('processing text...') all_text = "\n".join([doc['text'] for doc in entry['docs']]) model = self.text_to_4lang.process(all_text, dep_dir=self.dep_dir, fn='text') print_text_graph(model, self.graph_dir) model_graph = MachineGraph.create_from_machines(model.values()) for question in entry['questions']: answer = self.answer_question(question, model, model_graph) print answer['text']
def wikidata(): my_server.textto4lang.abstract = False my_server.textto4lang.expand = True # my_server.textto4lang.dep_to_4lang.lexicon.lexicon = {} r = request.json word = '/home/adaamko/AACS18/4lang/data/' + 'wikidata' + '.sens' hyp = r['word'] return_sentences = {} hyp_pro = my_server.textto4lang.preprocess_text(hyp) with open(word, 'w+') as f: f.write(hyp_pro.encode("utf8")) my_server.textto4lang.input_sens = word machines_hyp = my_server.textto4lang.process_file(word, 'wikidata') graph_hyp = MachineGraph.create_from_machines(machines_hyp[0].values()) return_sentences['word'] = graph_hyp.to_dict() return_sentences['lem'] = my_server.textto4lang.get_lem_machine(word) return jsonify(return_sentences)
def add_edges(self, word2machine): g = MachineGraph.create_from_machines(word2machine.values()) g.do_closure() binaries = defaultdict(lambda: [set(), set()]) for n1, n2, edata in g.G.edges(data=True): n1_index = self.get_w_index(n1.split('_')[0]) n2_index = self.get_w_index(n2.split('_')[0]) if edata['color'] == 0: self.add_edge(0, n1_index, n2_index) else: self.add_binary(n1.split('_')[0]) if edata['color'] == 1: binaries[n1_index][0].add(n2_index) elif edata['color'] == 2: binaries[n1_index][1].add(n2_index) else: assert False for bin_index, (subjs, objs) in binaries.iteritems(): for subj_index in subjs: for obj_index in objs: self.add_edge(bin_index, subj_index, obj_index)
def score_answer(self, answer, model, model_graph): answer_graph = MachineGraph.create_from_machines( answer['machines'].values()) answer['score'], answer['evidence'] = GraphSimilarity.supported_score( answer_graph, model_graph)
def get_full_graph(self, fullgraph_options): if self.full_graph is not None: return self.full_graph allwords = set() allwords.update(self.lexicon.keys(), self.ext_lexicon.keys(), self.oov_lexicon.keys()) self.full_graph = nx.MultiDiGraph() excluded_words = set() # get excluded words set with open(fullgraph_options.freq_file) as f: for line_no, line in enumerate(f): fields = line.strip().decode('utf-8').split('\t') freq = int(fields[0]) word = fields[1] if line_no > fullgraph_options.freq_cnt and ( fullgraph_options.freq_val == 0 or fullgraph_options.freq_val > freq): break excluded_words.add(word) machinegraph_options = MachineGraphOptions( fullgraph_options=fullgraph_options) # TODO: only for debugging # until = 10 for i, word in enumerate(allwords): # TODO: only for debugging # if word not in ['dumb', 'intelligent', 'stupid']: # continue # if i > until: # break machine = self.get_machine(word) MG = MachineGraph.create_from_machines( [machine], machinegraph_options=machinegraph_options) # TODO: maybe directed is better G = MG.G.to_undirected() # TODO: to print out all graphs # try: # fn = os.path.join( # '/home/eszter/projects/4lang/data/graphs/allwords', # u"{0}.dot".format(word)).encode('utf-8') # with open(fn, 'w') as dot_obj: # dot_obj.write(MG.to_dot_str_graph().encode('utf-8')) # except: # print "EXCEPTION: " + word # TODO: words to test have nodes # if 'other' in G.nodes() and 'car' in G.nodes(): # print word # # if word == 'merry-go-round' or word == 'Klaxon': # print G.edges() self.full_graph.add_edges_from(G.edges(data=True)) # TODO: only for debugging # MG.G = self.full_graph # fn = os.path.join( # '/home/eszter/projects/4lang/test/graphs/full_graph', # u"{0}.dot".format(i)).encode('utf-8') # with open(fn, 'w') as dot_obj: # dot_obj.write(MG.to_dot_str_graph().encode('utf-8')) for word in excluded_words: if self.full_graph.has_node(word): self.full_graph.remove_node(word) return self.full_graph
def dump_definition_graph(machine, seen=set()): graph = MachineGraph.create_from_machines([machine]) return graph.to_dict()
def print_4lang_graph(word, machine, graph_dir, max_depth=None): graph = MachineGraph.create_from_machines([machine], max_depth=max_depth) fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8') with open(fn, 'w') as dot_obj: dot_obj.write(graph.to_dot().encode('utf-8'))
def print_text_graph(words_to_machines, graph_dir, fn='text'): graph = MachineGraph.create_from_machines(words_to_machines.values()) fn = os.path.join(graph_dir, '{0}.dot'.format(fn)) with open(fn, 'w') as f: f.write(graph.to_dot().encode('utf-8')) return fn
def fullgraph(self, name1, name2, machine1, machine2): #################### # Only for calculating shortest path #################### if self.calc_path: logging.debug('name1 = {0}, name2 = {1}'.format(name1, name2)) length = 0 active_graph = None unified_machine = None if self.expand_path: logging.debug("calc active graph") active_graph = MachineGraph.create_from_machines( [machine1], machinegraph_options=self.machinegraph_options ).G.to_undirected() G2 = MachineGraph.create_from_machines( [machine2], machinegraph_options=self.machinegraph_options ).G.to_undirected() active_graph.add_edges_from(G2.edges(data=True)) # TODO: e.g. "take" is empty if name1 not in active_graph.nodes() or name2 not in G2.nodes( ): return {"shortest_path": length} i = 0 while not nx.has_path(active_graph, name1, name2): if i > 5: return {"shortest_path": length} self.lexicon.expand_definition(machine1) self.lexicon.expand_definition(machine2) active_graph = MachineGraph.create_from_machines( [machine1], machinegraph_options=self.machinegraph_options ).G.to_undirected() G2 = MachineGraph.create_from_machines( [machine2], machinegraph_options=self.machinegraph_options ).G.to_undirected() active_graph.add_edges_from(G2.edges(data=True)) i += 1 else: active_graph = self.UG if name1 not in active_graph.nodes( ) or name2 not in active_graph.nodes(): return {"shortest_path": length} if nx.has_path(active_graph, name1, name2): path = nx.shortest_path(active_graph, name1, name2, weight='weight') if self.fullgraph_options.weighted == True: length = nx.shortest_path_length(active_graph, name1, name2, weight='weight') else: length = len(path) print "PATH: " + name1 + " " + name2 print path print length self.shortest_path_res.write("\t".join(path)) self.shortest_path_res.write("\n") else: logging.info("path does not exist between {0} and {1}".format( name1, name2)) self.no_path_cnt += 1 else: length = self.lexicon.get_shortest_path( name1, name2, self.shortest_path_file_name) return {"shortest_path": length}
def get_full_graph(self, fullgraph_options): if self.full_graph is not None: return self.full_graph allwords = set() allwords.update( self.lexicon.keys(), self.ext_lexicon.keys(), self.oov_lexicon.keys()) self.full_graph = nx.MultiDiGraph() excluded_words = set() # get excluded words set with open(fullgraph_options.freq_file) as f: for line_no, line in enumerate(f): fields = line.strip().decode('utf-8').split('\t') freq = int(fields[0]) word = fields[1] if line_no > fullgraph_options.freq_cnt and ( fullgraph_options.freq_val == 0 or fullgraph_options.freq_val > freq): break excluded_words.add(word) machinegraph_options = MachineGraphOptions( fullgraph_options=fullgraph_options) # TODO: only for debugging # until = 10 for i, word in enumerate(allwords): # TODO: only for debugging # if word not in ['dumb', 'intelligent', 'stupid']: # continue # if i > until: # break machine = self.get_machine(word) MG = MachineGraph.create_from_machines( [machine], machinegraph_options=machinegraph_options) # TODO: maybe directed is better G = MG.G.to_undirected() # TODO: to print out all graphs # try: # fn = os.path.join( # '/home/eszter/projects/4lang/data/graphs/allwords', # u"{0}.dot".format(word)).encode('utf-8') # with open(fn, 'w') as dot_obj: # dot_obj.write(MG.to_dot_str_graph().encode('utf-8')) # except: # print "EXCEPTION: " + word # TODO: words to test have nodes # if 'other' in G.nodes() and 'car' in G.nodes(): # print word # # if word == 'merry-go-round' or word == 'Klaxon': # print G.edges() self.full_graph.add_edges_from(G.edges(data=True)) # TODO: only for debugging # MG.G = self.full_graph # fn = os.path.join( # '/home/eszter/projects/4lang/test/graphs/full_graph', # u"{0}.dot".format(i)).encode('utf-8') # with open(fn, 'w') as dot_obj: # dot_obj.write(MG.to_dot_str_graph().encode('utf-8')) for word in excluded_words: if self.full_graph.has_node(word): self.full_graph.remove_node(word) return self.full_graph
import sys from pymachine.utils import MachineGraph from fourlang.lexicon import Lexicon lexicon = Lexicon.load_from_binary(sys.argv[1]) total = 0 total_size = 0 smallest = 999 largest = 0 for word, machines in lexicon.ext_lexicon.iteritems(): machine = next(iter(machines)) graph = MachineGraph.create_from_machines([machine]) size = len(graph.G) - 1 if size < 1: continue total += 1 total_size += size smallest = min(smallest, size) largest = max(largest, size) print 'processed {0} graphs'.format(total) print 'average size: {0} nodes'.format(total_size/float(total)) print 'smallest: {0}, largest: {1}'.format(smallest, largest)
def print_4lang_graph(word, machine, graph_dir): graph = MachineGraph.create_from_machines([machine]) fn = os.path.join(graph_dir, u"{0}.dot".format(word)).encode('utf-8') with open(fn, 'w') as dot_obj: dot_obj.write(graph.to_dot().encode('utf-8'))
def expand(self, words_to_machines, stopwords=[], cached=False, abstract=False): if len(stopwords) == 0: stopwords = self.stopwords machines_to_append = [] for lemma, machine in words_to_machines.iteritems(): if ((not cached or lemma not in self.expanded) and lemma in self.known_words() and lemma not in stopwords): # deepcopy so that the version in the lexicon keeps its links definition = self.get_machine(lemma) copied_def = copy.deepcopy(definition) print("machine: " + str(machine)) print("defintion: " + str(definition)) if abstract is True: part_one = False part_two = False if len(copied_def.partitions[1]) > 0: if len(machine.partitions[1]) > 0: part_one = True print("machine partitions 1:") for i in machine.partitions[1]: print(i) for j in copied_def.partitions[1]: for k in range(0, 3): for m in j.partitions[k]: i.append(m, k) for p in j.parents: i.append(p[0], p[1]) if len(copied_def.partitions[2]) > 0: if len(machine.partitions[2]) > 0: part_two = True print("machine partitions 2:") for i in machine.partitions[2]: for j in copied_def.partitions[2]: print(j) for k in range(0, 3): for m in j.partitions[k]: i.append(m, k) for p in j.parents: i.append(p[0], p[1]) machine_for_replace = None def_parents = [ parent for parent in copied_def.parents if parent[1] == 0 ] if len(copied_def.partitions[0]) > 0: machine_for_replace = copied_def.partitions[0][0] elif len(def_parents) > 0: machine_for_replace = def_parents[0][0] if machine_for_replace is not None: for m in machine_for_replace.parents.copy(): if m[0].printname().startswith(lemma): machine_for_replace.parents.remove(m) for i in machine.parents.copy(): i[0].remove(machine, i[1]) i[0].append(machine_for_replace, i[1]) for i in range(0, 3): for m in machine.partitions[i]: try: machine.remove(m, i) except KeyError: pass machine_for_replace.append(m, i) machines_to_append.append(machine_for_replace) if machine_for_replace is None and part_one is False and part_two is False: pdb.set_trace() machine_graph = [ m for m in MachineTraverser.get_nodes( machine, names_only=False, keep_upper=True) ] def_graph = [ m for m in MachineTraverser.get_nodes( copied_def, names_only=False, keep_upper=True) ] g1 = MachineGraph.create_from_machines(machine_graph) g2 = MachineGraph.create_from_machines(def_graph) print("rossz machine: " + str(machine)) print("Definicio: " + str(copied_def)) print("Machine") print(g1.to_dot()) print("Definicio") print(g2.to_dot()) machine.unify(copied_def, exclude_0_case=True) else: machine.unify(copied_def, exclude_0_case=True) #machine_for_replace.parents.remove((machine, 0)) ''' print("machine for replace childs") for i in range(0,3): for m in machine_for_replace.partitions[i]: print(m) print(i) ''' ''' helpmachine = [ m for m in MachineTraverser.get_nodes( copied_def, names_only=False, keep_upper=True) ] ''' """ for parent, i in list(definition.parents): copied_parent = copy.deepcopy(parent) for m in list(copied_parent.partitions[i]): if m.printname() == lemma: copied_parent.remove(m, i) break else: raise Exception() # "can't find {0} in partition {1} of {2}: {3}".format( # )) copied_parent.append(copied_def, i) """ case_machines = [ m for m in MachineTraverser.get_nodes( copied_def, names_only=False, keep_upper=True) if m.printname().startswith('=') ] #machine.unify(copied_def, exclude_0_case=True) for cm in case_machines: if cm.printname() == "=AGT": if machine.partitions[1]: machine.partitions[1][0].unify(cm) if cm.printname() == "=PAT": if machine.partitions[2]: machine.partitions[2][0].unify(cm) #for j in machine_for_replace.parents: # print(j) self.expanded.add(lemma) for m in machines_to_append: words_to_machines[m.printname()] = m
def fullgraph(self, name1, name2, machine1, machine2): #################### # Only for calculating shortest path #################### if self.calc_path: logging.debug('name1 = {0}, name2 = {1}'.format(name1, name2)) length = 0 active_graph = None unified_machine = None if self.expand_path: logging.debug("calc active graph") active_graph = MachineGraph.create_from_machines( [machine1], machinegraph_options=self.machinegraph_options).G.to_undirected() G2 = MachineGraph.create_from_machines( [machine2], machinegraph_options=self.machinegraph_options).G.to_undirected() active_graph.add_edges_from(G2.edges(data=True)) for word in self.excluded_words: if active_graph.has_node(word) and name1 != word and name2 != word: active_graph.remove_node(word) # TODO: e.g. "take" is empty if name1 not in active_graph.nodes() or name2 not in G2.nodes(): return {"shortest_path": length} i = 0 if self.debug_graph: filename = 'test/temp_graphs/{0}_{1}_{2}.dot'.format(name1, name2, i) nx.drawing.nx_agraph.write_dot(active_graph, filename) while not nx.has_path(active_graph, name1, name2): if i > 5: return {"shortest_path": length} self.lexicon.expand_definition(machine1, self.stopwords) self.lexicon.expand_definition(machine2, self.stopwords) active_graph = MachineGraph.create_from_machines( [machine1], machinegraph_options=self.machinegraph_options).G.to_undirected() G2 = MachineGraph.create_from_machines( [machine2], machinegraph_options=self.machinegraph_options).G.to_undirected() active_graph.add_edges_from(G2.edges(data=True)) for word in self.excluded_words: if active_graph.has_node(word) and name1 != word and name2 != word: active_graph.remove_node(word) i += 1 if self.debug_graph: filename = 'test/temp_graphs/{0}_{1}_{2}.dot'.format(name1, name2, i) nx.drawing.nx_agraph.write_dot(active_graph, filename) else: active_graph = self.UG if name1 not in active_graph.nodes() or name2 not in active_graph.nodes(): return {"shortest_path" : length} if nx.has_path(active_graph, name1, name2): if self.node_weights: old_graph = active_graph active_graph = self._transform_node_weights_to_edge_weights(old_graph) path = nx.shortest_path(active_graph, name1, name2, weight='weight') if self.fullgraph_options.embedding_weighted: length = nx.shortest_path_length(active_graph, name1, name2, weight='weight') elif self.node_weights: for w in path: length += self.node_freqs[w] length = length - self.node_freqs[name1] - self.node_freqs[name2] else: length = len(path) print "PATH: " + name1 + " " + name2 print path print length self.shortest_path_res.write("\t".join(path)) self.shortest_path_res.write("\n") else: logging.info("path does not exist between {0} and {1}".format(name1, name2)) self.no_path_cnt += 1 else: length = self.lexicon.get_shortest_path(name1, name2, self.shortest_path_file_name) # if length != 0: # length = 1.0 / length # else: # length = 1.0 return {"shortest_path" : length}