def topic_pair_report(res,lc): import ansi for topic in res.topics: print print ansi.color("*** Topic: ",'blue'), ansi.color(repr(topic.ngram),'bold') if len(topic.tweets)>100: print "skipping" else: topic_xp(topic,lc)
def topic_pair_report(res, lc): import ansi for topic in res.topics: print print ansi.color("*** Topic: ", 'blue'), ansi.color(repr(topic.ngram), 'bold') if len(topic.tweets) > 100: print "skipping" else: topic_xp(topic, lc)
def merge_topics(topic1, topic2, use_jaccard=True): if use_jaccard: jacc = jaccard(topic1.group_ids, topic2.group_ids) merge = jacc > 0.9 # if jacc > 0.3: # s= "jacc %.2f = %2d/%2d, loseleft %2d loseright %2d %-20s %-20s" % ( # jacc, # len(topic1.group_ids&topic2.group_ids), # len(topic1.group_ids | topic2.group_ids), # len(topic1.group_ids-topic2.group_ids), # len(topic2.group_ids-topic1.group_ids), # topic1.label, topic2.label) # if merge: # s = ansi.color(s, 'blue') # print s else: merge = topic1.group_ids==topic2.group_ids if merge: print ansi.color("group-equivalent topics %s %s" %(topic1.ngram,topic2.ngram),'blue') # if not merge and len(set(topic1.ngram) & set(topic2.ngram)) >= 2: # print "wtf no merge? %-20s %-20s" % (topic1.ngram, topic2.ngram) return merge
def merge_topics(topic1, topic2, use_jaccard=True): if use_jaccard: jacc = jaccard(topic1.group_ids, topic2.group_ids) merge = jacc > 0.9 # if jacc > 0.3: # s= "jacc %.2f = %2d/%2d, loseleft %2d loseright %2d %-20s %-20s" % ( # jacc, # len(topic1.group_ids&topic2.group_ids), # len(topic1.group_ids | topic2.group_ids), # len(topic1.group_ids-topic2.group_ids), # len(topic2.group_ids-topic1.group_ids), # topic1.label, topic2.label) # if merge: # s = ansi.color(s, 'blue') # print s else: merge = topic1.group_ids == topic2.group_ids if merge: print ansi.color( "group-equivalent topics %s %s" % (topic1.ngram, topic2.ngram), 'blue') # if not merge and len(set(topic1.ngram) & set(topic2.ngram)) >= 2: # print "wtf no merge? %-20s %-20s" % (topic1.ngram, topic2.ngram) return merge
def topic_xp(topic, lc): pairs = {} for i in range(len(topic.tweets)): for j in range(i+1, len(topic.tweets)): t1 = topic.tweets[i] t2 = topic.tweets[j] set1 = t1['bigrams'] | t1['unigrams'] set2 = t2['bigrams'] | t2['unigrams'] pairs[t1['id'],t2['id']] = (set1,set2) items = pairs.items() items.sort(key= lambda (ids,(x,y)): -dice(x,y)) import ansi for (id1,id2),(x,y) in items: nums = "%.3f" % dice(x,y) t1,t2 = lc.tweets_by_id[id1], lc.tweets_by_id[id2] f = twokenize.squeeze_whitespace s1,s2 = ["%s %s" % (ansi.color(t['from_user'],'green') + " "*(15-len(t['from_user'])), f(t['text'])) for t in [t1,t2]] print "%-8s %s\n%-8s %s" % (nums, s1, " ", s2)
def topic_xp(topic, lc): pairs = {} for i in range(len(topic.tweets)): for j in range(i + 1, len(topic.tweets)): t1 = topic.tweets[i] t2 = topic.tweets[j] set1 = t1['bigrams'] | t1['unigrams'] set2 = t2['bigrams'] | t2['unigrams'] pairs[t1['id'], t2['id']] = (set1, set2) items = pairs.items() items.sort(key=lambda (ids, (x, y)): -dice(x, y)) import ansi for (id1, id2), (x, y) in items: nums = "%.3f" % dice(x, y) t1, t2 = lc.tweets_by_id[id1], lc.tweets_by_id[id2] f = twokenize.squeeze_whitespace s1, s2 = [ "%s %s" % (ansi.color(t['from_user'], 'green') + " " * (15 - len(t['from_user'])), f(t['text'])) for t in [t1, t2] ] print "%-8s %s\n%-8s %s" % (nums, s1, " ", s2)
def __call__(self,*s,**kw): output(color('green'),s)
def error(self,*s): output(color('red',styles=(bold,)),s)
def note(self,*s): output(color('green',styles=(bold,)),s)
def doit(*s,**kw): return output(color(n,**kw)+escape(bold),s)
import os,io from ansi import color,reset,fg,bg,escape,bold import colored_traceback colored_traceback.add_hook(always=True) import sys white = color('white',styles=(bold,)) def decode(o): if hasattr(o,'value'): return repr(o.value) return str(o) if os.isatty(sys.stdout.fileno()) or 'debug' in os.environ: import sys,time out = sys.stderr.buffer modules = set() here = os.path.dirname(__file__) always = not 'notalways' in os.environ def setroot(where): global here here = os.path.dirname(where) if hasattr(sys,'_getframe'): def getframe(): return sys._getframe(3) else: def getframe():
def __getattr__(self,n): return lambda *s,**kw: output(color(n,**kw),s)
def note(self,*s): output(color('green',bold=True),s)
import os,io from ansi import color,reset white = color('white',bold=True) def decode(o): if isinstance(o,Exception): return repr(o.value) return str(o) if 'debug' in os.environ: import sys,time out = sys.stderr.buffer modules = set() here = os.path.dirname(__file__) always = 'always' in os.environ def setroot(where): global here here = os.path.dirname(where) if hasattr(sys,'_getframe'): def getframe(): return sys._getframe(3) else: def getframe(): tb = sys.exc_info()[2] if not tb: try: raise Exception except Exception as e: tb = e.__traceback__ while tb.tb_next: tb = tb.tb_next
def color_a_match(m): return ansi.color(m[group],'backblack','lgray')
def color_a_match(m): return ansi.color(m[group], 'backblack', 'lgray')
def alarm(self,*s): output(color('red',bold=True),s)