def get_news_sentence(self, answer): #Create a database of news articles about the subject of te question cg = NewsCorpusGenerator('temp_news_corpus', 'sqlite') links = cg.google_news_search(answer, 'Standard', 5) cg.generate_corpus(links) conn = sqlite3.connect('temp_news_corpus/corpus.db') news_strings = [] for row in conn.execute('SELECT body FROM articles'): news_strings.append( str(row).decode('unicode_escape').encode('ascii', 'ignore')) os.remove('temp_news_corpus/corpus.db') # Remove the database for n in news_strings[1:]: summary = summarize(n) if (summary != u"" and summary != []): if (summary[0:3] == '(u"'): return summary[3:] else: return summary return ''
import os from news_corpus_builder import NewsCorpusGenerator from iab_cat_load import iab_tier2 # Location to save generated corpus news_corpus_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'news_data') # Save results to sqlite or files per article ex = NewsCorpusGenerator(news_corpus_dir) for subcategory, category in iab_tier2.iteritems(): print 'Getting search result for [' + subcategory + '] in [' + category + ']' # Retrieve 50 links related to the search term dogs and assign a category of Pet to the retrieved links links = ex.google_news_search(subcategory, category, 100) print 'saving...' # Generate and save corpus try: ex.generate_corpus(links) except: pass
import os from news_corpus_builder import NewsCorpusGenerator from iab_cat_load import iab_tier2 # Location to save generated corpus news_corpus_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'news_data') # Save results to sqlite or files per article ex = NewsCorpusGenerator(news_corpus_dir) for subcategory, category in iab_tier2.iteritems(): print 'Getting search result for [' + subcategory + '] in [' + category + ']' # Retrieve 50 links related to the search term dogs and assign a category of Pet to the retrieved links links = ex.google_news_search(subcategory, category, 100) print 'saving...' # Generate and save corpus try: ex.generate_corpus(links) except: pass