Ejemplo n.º 1
0
def view_entry(date, dir_name):
    dir_path = os.path.join(NEWSDIR, date, dir_name)
    if not IsDirectory(dir_path):
        return redirect(url_for('error_page', errcode='No this directory'))
    # get news contents and news comments
    news = {}
    comments = []
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if IsFile(file_path) and file_name[-4:] == 'json':
            f = codecs.open(file_path, 'r', 'utf-8')
            js = json.load(f)
            f.close()
            news[js['source']] = js
            comments.extend(GetComments(js))

    # sort the comments
    comments.sort(key=lambda x: x['time'])
    comment_abstract = GetPassageAbstract(
        '\n'.join([comment['content'] for comment in comments]), 0.5, 0.1, '|')
    #comment_abstract.encode('utf-8')
    return render_template('view_news.html',
                           news=news,
                           comments=comments,
                           comment_abstract=comment_abstract)
Ejemplo n.º 2
0
def GetTermFreqFromFile(tags, file_path):
    if not IsFile(file_path):
        print(file_path + " not exists or not a file, can't get TF")
        return None
    f = open(file_path, 'r')
    js = json.load(f)
    passage = js['contents']['passage']
    f.close()
    return GetTermFreqFromContent(tags, passage)
Ejemplo n.º 3
0
def ExtractTagsFromFile(file_path, num_of_tags):
#    print 'Extracting from ' + file_path + ' ...'
    if not IsFile(file_path):
        print "Path not exists or not a file"
        sys.exit(2)
    f = codecs.open(file_path, 'r', 'utf-8')
    js = json.load(f)
    content = js['contents']['passage'] # f.read()
    tags = ExtractTagsFromContent(content, num_of_tags) # jieba.analyse.extract_tags(content, topK = num_of_tags)
    f.close()
    return tags
Ejemplo n.º 4
0
 def init_object(self):
     if len(self.title) == 0:
         current_path = os.path.join(self.parent_dir, self.dir_name)
         for file_name in os.listdir(current_path):
             file_path = os.path.join(current_path, file_name)
             if IsFile(file_path) and file_name[-4:] == 'json':
                 self.file_paths.append(file_path)
                 f = codecs.open(file_path, 'r', 'utf-8')
                 js = json.load(f)
                 f.close()
                 if len(self.title) == 0:
                     self.title = js['contents']['title']
                 self.sources.append(js['source'])
Ejemplo n.º 5
0
def ExtractTagsFromFile(file_path, num_of_tags):
    """

    :param file_path: 输入文章路径
    :param num_of_tags: 输入关键词个数
    :return: 返回文章关键词
    """
    # print 'Extracting from ' + file_path + ' ...'
    if not IsFile(file_path):
        print("Path not exists or not a file")
        sys.exit(2)
    f = open(file_path, 'r')
    js = json.load(f)
    content = js['contents']['passage']  # f.read()
    tags = ExtractTagsFromContent(
        content,
        num_of_tags)  # jieba.analyse.extract_tags(content, topK = num_of_tags)
    f.close()
    return tags