Ejemplo n.º 1
0
 def get_news_sentence(self, answer):
     #Create a database of news articles about the subject of te question
     cg = NewsCorpusGenerator('temp_news_corpus', 'sqlite')
     links = cg.google_news_search(answer, 'Standard', 5)
     cg.generate_corpus(links)
     conn = sqlite3.connect('temp_news_corpus/corpus.db')
     news_strings = []
     for row in conn.execute('SELECT body FROM articles'):
         news_strings.append(
             str(row).decode('unicode_escape').encode('ascii', 'ignore'))
     os.remove('temp_news_corpus/corpus.db')  # Remove the database
     for n in news_strings[1:]:
         summary = summarize(n)
         if (summary != u"" and summary != []):
             if (summary[0:3] == '(u"'):
                 return summary[3:]
             else:
                 return summary
     return ''
Ejemplo n.º 2
0
import os
from news_corpus_builder import NewsCorpusGenerator
from iab_cat_load import iab_tier2


# Location to save generated corpus
news_corpus_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'news_data')

# Save results to sqlite or  files per article
ex = NewsCorpusGenerator(news_corpus_dir)


for subcategory, category in iab_tier2.iteritems():
    print 'Getting search result for [' + subcategory + '] in [' + category + ']'
    # Retrieve 50 links related to the search term dogs and assign a category of Pet to the retrieved links
    links = ex.google_news_search(subcategory, category, 100)
    print 'saving...'
    # Generate and save corpus
    try:
        ex.generate_corpus(links)
    except:
        pass
Ejemplo n.º 3
0
import os
from news_corpus_builder import NewsCorpusGenerator
from iab_cat_load import iab_tier2

# Location to save generated corpus
news_corpus_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               'news_data')

# Save results to sqlite or  files per article
ex = NewsCorpusGenerator(news_corpus_dir)

for subcategory, category in iab_tier2.iteritems():
    print 'Getting search result for [' + subcategory + '] in [' + category + ']'
    # Retrieve 50 links related to the search term dogs and assign a category of Pet to the retrieved links
    links = ex.google_news_search(subcategory, category, 100)
    print 'saving...'
    # Generate and save corpus
    try:
        ex.generate_corpus(links)
    except:
        pass