def evaluateModel(model, modelName, reader): if reader == sick_dev_reader: dataSet = "dev" elif reader == sick_test_reader: dataSet = "test" else: dataSet = "train" predictions = [] goldLabels = [] if modelName == "NB": model.train() for label, t1, t2 in reader(): goldLabels.append(label) predictions = model.predictAll(reader) else: count = 0 for label, t1, t2 in reader(): if count % 10 == 0: print "Processed %d examples" %(count) goldLabels.append(label) s1 = " ".join(leaves(t1)) s2 = " ".join(leaves(t2)) modelPredict = model.predict(s1, s2) predictions.append(modelPredict) count += 1 accuracy = accuracy_score(predictions, goldLabels) print "Accuracy on SICK %s set: %f" %(dataSet, accuracy)
def evaluateModel(model, modelName, reader): if reader == sick_dev_reader: dataSet = "dev" elif reader == sick_test_reader: dataSet = "test" else: dataSet = "train" predictions = [] goldLabels = [] if modelName == "NB": model.train() for label, t1, t2 in reader(): goldLabels.append(label) predictions = model.predictAll(reader) else: count = 0 for label, t1, t2 in reader(): if count % 10 == 0: print "Processed %d examples" % (count) goldLabels.append(label) s1 = " ".join(leaves(t1)) s2 = " ".join(leaves(t2)) modelPredict = model.predict(s1, s2) predictions.append(modelPredict) count += 1 accuracy = accuracy_score(predictions, goldLabels) print "Accuracy on SICK %s set: %f" % (dataSet, accuracy)
def keyword_overlap_feature(s1, s2, cioClient): sent1 = " ".join(leaves(s1)) sent2 = " ".join(leaves(s2)) s1Keywords = cioClient.extractKeywords(sent1) s2Keywords = cioClient.extractKeywords(sent2) return collections.Counter(set(s1Keywords) & set(s2Keywords))
def get_modules_to_remove(self): """ Returns the shortest list of modules to remove""" states = [States.TO_REMOVE] graph = self._sub_graph_from_states(self._graph, states=states) modules = leaves(graph) lcas = self._lowest_common_ancestors(graph, modules, states=states) return sorted(lcas)
def leaves(self): """ Return modules not in dependency of any other module @:returns list<string> (A module names list) """ modules = leaves(self._graph) return modules
def _lowest_common_ancestors(self, graph, names=None, states=None, index=0): """ Recursively return the lowest common ancestor of this modules list @:parameter graph <pygraphviz.AGraph> @:parameter names list<string> (A module names list) @:parameter states list<string> (A state list to filter) @:returns list<string> """ if states is None: states = [] nx_digraph = nx.DiGraph(graph) if not names: names = leaves(graph) lca = names[0] sorted_modules = set(sorted(names[index:])) if states: sorted_modules = filter(lambda m: self.get_state(m) in states, sorted_modules) for next, name in enumerate(sorted_modules): next_name = names[next] if next in names else None if not next_name: return names lca = nx.algorithms.lowest_common_ancestor(nx_digraph, lca, next_name) if lca: if lca == "base": return [lca] del names[next] else: names = self._lowest_common_ancestors(nx_digraph, names, states, next + 1) return names
def get_modules_to_install(self): """ Returns the shortest list of modules to install""" states = [States.TO_INSTALL] graph = self._sub_graph_from_states(self._graph, states=states) modules = leaves(graph) return sorted(modules)