def dump(self): list = self._vector.items() list = chew.sort(list, lambda x: x[1]) list.reverse() for (term, count) in list: print term.encode("utf-8"), count print "Terms:", len(self._vector)
def print_report(self): print "TOTAL COUNT:", self._total items = chew.sort(self._words.items(), lambda x: x[1]) items.reverse() for (term, count) in items[:20]: print "%30s %s" % (term.encode("utf-8"), count) print "..." for (term, count) in items[-20:]: print "%30s %s" % (term.encode("utf-8"), count)
def print_report(self): print "TOTAL COUNT:", self._total items = chew.sort(self._words.items(), lambda x: x[1]) items.reverse() for (term, count) in items[ : 20]: print "%30s %s" % (term.encode("utf-8"), count) print "..." for (term, count) in items[-20 : ]: print "%30s %s" % (term.encode("utf-8"), count)
def display_comparison(self, other, showall=0): terms = {} for (term, count) in self.get_pairs(): terms[term] = count * other.get_count(term) termlist = chew.sort(terms.items(), lambda x: x[1]) termlist.reverse() for (term, score) in termlist: if score: print(term, score) if showall: list = [] for (term, count) in self.get_pairs(): if not terms.get(term, 0): list.append(term.encode("utf-8")) print "+", string.join(list, ", ") list = [] for (term, count) in other.get_pairs(): if not terms.get(term, 0): list.append(term.encode("utf-8")) print "-", string.join(list, ", ")
def display_comparison(self, other, showall = 0): terms = {} for (term, count) in self.get_pairs(): terms[term] = count * other.get_count(term) termlist = chew.sort(terms.items(), lambda x: x[1]) termlist.reverse() for (term, score) in termlist: if score: print (term, score) if showall: list = [] for (term, count) in self.get_pairs(): if not terms.get(term, 0): list.append(term.encode("utf-8")) print "+", string.join(list, ", ") list = [] for (term, count) in other.get_pairs(): if not terms.get(term, 0): list.append(term.encode("utf-8")) print "-", string.join(list, ", ")
def k_nearest_neighbours_2(objects): """Makes clusters of objects. All objects must implement object.compare(other), object.get_key(), and object.get_name().""" pairs = [] for ix in range(len(objects)): for i in range(ix+1, len(objects)): pairs.append((objects[ix], objects[i], objects[ix].compare(objects[i]))) pairs = chew.sort(pairs, lambda x: x[2]) pairs.reverse() clusters = [] clustermap = {} for (t1, t2, score) in pairs: if not score or (clustermap.has_key(t1.get_key()) and clustermap.has_key(t2.get_key())): if score: pass #print "NOT USING:", (t1, t2, score) continue print (t1.get_name(), t2.get_name(), score) #compare(get_matrix(t1, terms), get_matrix(t2, terms)) if clustermap.has_key(t1.get_key()): c = clustermap[t1.get_key()] elif clustermap.has_key(t2.get_key()): c = clustermap[t2.get_key()] else: c = Cluster() clusters.append(c) for t in (t1, t2): if not clustermap.has_key(t.get_key()): c.add(t) clustermap[t.get_key()] = c return clusters
def k_nearest_neighbours_2(objects): """Makes clusters of objects. All objects must implement object.compare(other), object.get_key(), and object.get_name().""" pairs = [] for ix in range(len(objects)): for i in range(ix + 1, len(objects)): pairs.append( (objects[ix], objects[i], objects[ix].compare(objects[i]))) pairs = chew.sort(pairs, lambda x: x[2]) pairs.reverse() clusters = [] clustermap = {} for (t1, t2, score) in pairs: if not score or (clustermap.has_key(t1.get_key()) and clustermap.has_key(t2.get_key())): if score: pass #print "NOT USING:", (t1, t2, score) continue print(t1.get_name(), t2.get_name(), score) #compare(get_matrix(t1, terms), get_matrix(t2, terms)) if clustermap.has_key(t1.get_key()): c = clustermap[t1.get_key()] elif clustermap.has_key(t2.get_key()): c = clustermap[t2.get_key()] else: c = Cluster() clusters.append(c) for t in (t1, t2): if not clustermap.has_key(t.get_key()): c.add(t) clustermap[t.get_key()] = c return clusters