from collections import defaultdict import itertools from base import Summarizer from log_conf import Logger from util.tokenization import WordTokenizer, SentTokenizer logger = Logger('.'.join(__file__.split('/')[-2:-1])).logger class Summarizer(Summarizer): ''' classdocs ''' def __init__(self, args, opts): ''' Constructor ''' def summarize(self, extracted_refs, facet_results, max_length=250): ''' Summarizes the extracted references based on the facet results Chooses from facets naively Args: extracted_refs(list) -- results of the method.run (e.g. simple.py) facet_results(dict) -- facets for each extracted reference Look at data/task1b_results1.json
from _collections import defaultdict from util.rouge.PythonROUGE.rouge_wrapper import calc_rouge from random import randint from copy import deepcopy import itertools from log_conf import Logger from summarizer.mmr_summarizer import MMR from util.aritmatic_operations import mean_conf from util.tokenization import WordTokenizer from util.common import write_json_as_csv, hash_obj, hash_dict import gzip w_t = WordTokenizer(stem=False) logger = Logger(__file__.split('/')[-1]).logger path = constants.get_path() result_outpath = 'tmp/tmpres/' _ANNS_DIR = path['ann'] _ANNS_PATH = path['ann_json'] CACHE = path['cache'] valid_topics = ['all'] # doc_mod = DocumentsModel(_ANNS_DIR) CACHE_FILE = constants.join_path( CACHE, 'umls.json') if os.path.isfile(CACHE_FILE): try: