Example #1
0
 def dump(self):
     list = self._vector.items()
     list = chew.sort(list, lambda x: x[1])
     list.reverse()
     for (term, count) in list:
         print term.encode("utf-8"), count
     print "Terms:", len(self._vector)
Example #2
0
 def dump(self):
     list = self._vector.items()
     list = chew.sort(list, lambda x: x[1])
     list.reverse()
     for (term, count) in list:
         print term.encode("utf-8"), count
     print "Terms:", len(self._vector)
Example #3
0
 def print_report(self):
     print "TOTAL COUNT:", self._total
     items = chew.sort(self._words.items(), lambda x: x[1])
     items.reverse()
     for (term, count) in items[:20]:
         print "%30s %s" % (term.encode("utf-8"), count)
     print "..."
     for (term, count) in items[-20:]:
         print "%30s %s" % (term.encode("utf-8"), count)
Example #4
0
 def print_report(self):
     print "TOTAL COUNT:", self._total
     items = chew.sort(self._words.items(), lambda x: x[1])
     items.reverse()
     for (term, count) in items[ : 20]:
         print "%30s %s" % (term.encode("utf-8"), count)
     print "..."
     for (term, count) in items[-20 : ]:
         print "%30s %s" % (term.encode("utf-8"), count)
Example #5
0
 def display_comparison(self, other, showall=0):
     terms = {}
     for (term, count) in self.get_pairs():
         terms[term] = count * other.get_count(term)
     termlist = chew.sort(terms.items(), lambda x: x[1])
     termlist.reverse()
     for (term, score) in termlist:
         if score:
             print(term, score)
     if showall:
         list = []
         for (term, count) in self.get_pairs():
             if not terms.get(term, 0):
                 list.append(term.encode("utf-8"))
         print "+", string.join(list, ", ")
         list = []
         for (term, count) in other.get_pairs():
             if not terms.get(term, 0):
                 list.append(term.encode("utf-8"))
         print "-", string.join(list, ", ")
Example #6
0
 def display_comparison(self, other, showall = 0):
     terms = {}
     for (term, count) in self.get_pairs():
         terms[term] = count * other.get_count(term)
     termlist = chew.sort(terms.items(), lambda x: x[1])
     termlist.reverse()
     for (term, score) in termlist:
         if score:
             print (term, score)
     if showall:
         list = []
         for (term, count) in self.get_pairs():
             if not terms.get(term, 0):
                 list.append(term.encode("utf-8"))
         print "+", string.join(list, ", ")
         list = []
         for (term, count) in other.get_pairs():
             if not terms.get(term, 0):
                 list.append(term.encode("utf-8"))
         print "-", string.join(list, ", ")
Example #7
0
def k_nearest_neighbours_2(objects):
    """Makes clusters of objects. All objects must implement
    object.compare(other), object.get_key(), and object.get_name()."""
    pairs = []
    for ix in range(len(objects)):
        for i in range(ix+1, len(objects)):
            pairs.append((objects[ix],
                          objects[i],
                          objects[ix].compare(objects[i])))

    pairs = chew.sort(pairs, lambda x: x[2])
    pairs.reverse()

    clusters = []
    clustermap = {}
    for (t1, t2, score) in pairs:
        if not score or (clustermap.has_key(t1.get_key()) and
                         clustermap.has_key(t2.get_key())):
            if score:
                pass #print "NOT USING:", (t1, t2, score)
            continue
        print (t1.get_name(), t2.get_name(), score)
        #compare(get_matrix(t1, terms), get_matrix(t2, terms))
        if clustermap.has_key(t1.get_key()):
            c = clustermap[t1.get_key()]
        elif clustermap.has_key(t2.get_key()):
            c = clustermap[t2.get_key()]
        else:
            c = Cluster()
            clusters.append(c)
        
        for t in (t1, t2):
            if not clustermap.has_key(t.get_key()):
                c.add(t)
                clustermap[t.get_key()] = c

    return clusters
Example #8
0
def k_nearest_neighbours_2(objects):
    """Makes clusters of objects. All objects must implement
    object.compare(other), object.get_key(), and object.get_name()."""
    pairs = []
    for ix in range(len(objects)):
        for i in range(ix + 1, len(objects)):
            pairs.append(
                (objects[ix], objects[i], objects[ix].compare(objects[i])))

    pairs = chew.sort(pairs, lambda x: x[2])
    pairs.reverse()

    clusters = []
    clustermap = {}
    for (t1, t2, score) in pairs:
        if not score or (clustermap.has_key(t1.get_key())
                         and clustermap.has_key(t2.get_key())):
            if score:
                pass  #print "NOT USING:", (t1, t2, score)
            continue
        print(t1.get_name(), t2.get_name(), score)
        #compare(get_matrix(t1, terms), get_matrix(t2, terms))
        if clustermap.has_key(t1.get_key()):
            c = clustermap[t1.get_key()]
        elif clustermap.has_key(t2.get_key()):
            c = clustermap[t2.get_key()]
        else:
            c = Cluster()
            clusters.append(c)

        for t in (t1, t2):
            if not clustermap.has_key(t.get_key()):
                c.add(t)
                clustermap[t.get_key()] = c

    return clusters