def get_trending_topics_summary(n=10): words = frequency.word_frequency.keys() counts = {w: frequency.get_wf(w, g.ts) for w in words} counts = {w: fl for w, fl in counts.iteritems() if fl[1] > 0 and fl[1] > 3 * fl[0]} mcounts = [(w, log((fl[1] + 0.003) / (max(fl[0],1) + 0.003))) for w, fl in counts.iteritems()] mcounts.sort(key = lambda x: -x[1]) summaries = [] g.penalty = defaultdict(lambda: 0) i = 0 while len(summaries) < n: keyword = mcounts[i][0] #keywords = get_expanded_keywords([keyword]) keywords = set([keyword]) print keywords # select top starting bigrams that contain one of the keywords # to use as seeds for the sentences bigrams = [b for b in g.nw.items() if b[0][0] in keywords or b[0][1] in keywords] start = max( bigrams, key=lambda x: x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]] )[0] summary = build_summary(start) if summary: summaries.append(summary) show_summaries([summary], keywords=start) i += 1
def summarize_partial(start, n=3): summaries = [] g.penalty = defaultdict(lambda: 0) start = tuple(start) while len(summaries) < n: summary = build_summary(start) if summary: summaries.append(summary) show_summaries([summary], keywords=start)
def summarize_top(n=10): summaries = [] g.penalty = defaultdict(lambda: 0) while len(summaries) < n: # select top starting bigrams # to use as seeds for the sentences start = max( g.nw.items(), key=lambda x: x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]] )[0] print "start: %s" % list(start) summary = build_summary(start) if summary: summaries.append(summary) show_summaries([summary], keywords=start)
while len(summaries) < n: # select top starting bigrams that contain one of the keywords # to use as seeds for the sentences # put bigrams containing '_S' or '_E' further down the list bigrams = [b for b in g.nw.items() if b[0][0] in keywords or b[0][1] in keywords] start = max(bigrams, key=lambda x: \ x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]] - (0 if x[0][0] != '_S' and x[0][1] != '_E' else 100)) start = start[0] summary = build_summary(start, keywords) if summary: summaries.append(summary) show_summaries([summary], keywords=start) def get_trending_topics_summary(n=10): words = frequency.word_frequency.keys() counts = {w: frequency.get_wf(w, g.ts) for w in words} counts = {w: fl for w, fl in counts.iteritems() if fl[1] > 0 and fl[1] > 3 * fl[0]} mcounts = [(w, log((fl[1] + 0.003) / (max(fl[0],1) + 0.003))) for w, fl in counts.iteritems()] mcounts.sort(key = lambda x: -x[1]) summaries = [] g.penalty = defaultdict(lambda: 0) i = 0 while len(summaries) < n: keyword = mcounts[i][0] #keywords = get_expanded_keywords([keyword])