Ejemplo n.º 1
0
 def load_several_clustered_news(self):
     """ get 704 news """
     if not self.news_by_docs:
         raise MissedValueError('news_by_docs',
                                'build_docs_news_dependencies')
     several_doc_ids = read_lines('.conf/clustered.txt')
     if not several_doc_ids:
         raise Exception(u'Ошибка загрузки файла clustered.txt')
     several_doc_ids = map(int, several_doc_ids)
     self.several_doc_ids = set(several_doc_ids)
     self.several_news_ids = list()
     for doc_id in several_doc_ids:
         self.several_news_ids.append(self.news_by_docs[doc_id])
Ejemplo n.º 2
0
 def load_several_clustered_news(self):
     """ get 704 news """
     if not self.news_by_docs:
         raise MissedValueError('news_by_docs',
                                'build_docs_news_dependencies')
     several_doc_ids = read_lines('.conf/clustered.txt')
     if not several_doc_ids:
         raise Exception(u'Ошибка загрузки файла clustered.txt')
     several_doc_ids = map(int, several_doc_ids)
     self.several_doc_ids = set(several_doc_ids)
     self.several_news_ids = list()
     for doc_id in several_doc_ids:
         self.several_news_ids.append(self.news_by_docs[doc_id])
Ejemplo n.º 3
0
 def load_stop_words(self):
     self.stop_words = read_lines('.conf/stop_words.txt', 'cp1251')
Ejemplo n.º 4
0
 def load_stop_words(self):
     self.stop_words = read_lines('.conf/stop_words.txt', 'cp1251')
Ejemplo n.º 5
0
## create NewsKeyword and ParagraphKeyword
# NewsStemmed.objects.create_keywords(stop_words, angry_mode=True)
# ParagraphStemmed.objects.create_keywords(stop_words, angry_mode=True)

# create NewsStats and ParagraphStats
# NewsKeywords.objects.create_stats()
# ParagraphKeywords.objects.create_stats()
'''
## gen_reports
coefficients =  [(0, 100)]
for alpha, beta in coefficients:
    print dt(), 'alpha=%.2f, beta=%.2f' % (alpha, beta)
    NewsKeywords.objects.create_keyword_items(alpha, beta, gen_report=True)
'''
# get 704 news
several_doc_ids = read_lines('.conf/clustered.txt')  # load clustered several_doc_ids
several_doc_ids = map(int, several_doc_ids)
docs = dict()
news_by_docs = dict()
for news in News.objects.only('doc_id'):
    docs[news.pk] = news.doc_id
    news_by_docs[news.doc_id] = news.pk

several_news_ids = []
if several_doc_ids and news_by_docs:
    several_doc_ids = set(several_doc_ids)
    for doc_id in several_doc_ids:
        several_news_ids.append(news_by_docs[doc_id])
print 'loaded clustered ids'

items = NewsParagraph.objects.filter(news__in=several_news_ids).only('news')
Ejemplo n.º 6
0
## create NewsKeyword and ParagraphKeyword
# NewsStemmed.objects.create_keywords(stop_words, angry_mode=True)
# ParagraphStemmed.objects.create_keywords(stop_words, angry_mode=True)

# create NewsStats and ParagraphStats
# NewsKeywords.objects.create_stats()
# ParagraphKeywords.objects.create_stats()
'''
## gen_reports
coefficients =  [(0, 100)]
for alpha, beta in coefficients:
    print dt(), 'alpha=%.2f, beta=%.2f' % (alpha, beta)
    NewsKeywords.objects.create_keyword_items(alpha, beta, gen_report=True)
'''
# get 704 news
several_doc_ids = read_lines(
    '.conf/clustered.txt')  # load clustered several_doc_ids
several_doc_ids = map(int, several_doc_ids)
docs = dict()
news_by_docs = dict()
for news in News.objects.only('doc_id'):
    docs[news.pk] = news.doc_id
    news_by_docs[news.doc_id] = news.pk

several_news_ids = []
if several_doc_ids and news_by_docs:
    several_doc_ids = set(several_doc_ids)
    for doc_id in several_doc_ids:
        several_news_ids.append(news_by_docs[doc_id])
print 'loaded clustered ids'

items = NewsParagraph.objects.filter(news__in=several_news_ids).only('news')