def main(): if len(sys.argv) != 3: raise Exception( "Usage: python summarize.py <timestamp_file> <transcript_file>") _, timestamp_path, transcript_path = sys.argv transcript_json = parse.get_transcript_json(transcript_path) # print(len(transcript_json)) selected_texts = ht.text_generator(timestamp_path, transcript_json) # print(len(selected_texts)) key_points = [] for selected_text in selected_texts: key_point = [ summary.get_summary(selected_text), summary.get_keywords(selected_text) ] key_points.append(key_point) # print(key_points) with open("output.txt", "w") as f: for i in range(0, len(selected_texts)): f.write("Selected Texts:\n") f.write(selected_texts[i]) f.write("\n\n") f.write("Summary:\n") f.write(key_points[i][0]) f.write("\nKeywords:\n") f.write(key_points[i][1]) f.write("\n\n")
def index(): if request.method == 'POST': text = request.form['text'] summaries = get_summary(text) return render_template("summary.html", summaries=summaries) return render_template("index.html")
def content(): link = request.args.get('url') title = request.args.get('title') updated = request.args.get('updated') html = requests.get(link).text soup = BeautifulSoup(html) text = ''.join(soup.findAll(text=True)) sum = '' try: sum = summary.get_summary(text.encode('utf-8')) getEntityGraph.kindamain(text.encode('utf-8')) except: pass info, entity = analyze_policy.analyze(text) return render_template('index.html', info=info, entity=entity, sum=sum, title=title, updated=updated) # return ''
def init(): """Inits the bot.""" reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET, user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME, password=config.REDDIT_PASSWORD) processed_posts = load_log() whitelist = load_whitelist() for subreddit in config.SUBREDDITS: for submission in reddit.subreddit(subreddit).new(): if submission.id not in processed_posts: clean_url = submission.url.replace("amp.", "") ext = tldextract.extract(clean_url) domain = "{}.{}".format(ext.domain, ext.suffix) if domain in whitelist: try: article, title = extract_article_from_url(clean_url) summary_dict = summary.get_summary(article, title) except Exception as e: log_error("{},{}".format(clean_url, e)) update_log(submission.id) print("Failed:", submission.id) continue # To reduce low quality submissions, we only process those that made a meaningful summary. if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD: # Create a wordcloud, upload it to Imgur and get back the url. image_url = cloud.generate_word_cloud( summary_dict["article_words"]) # We start creating the comment body. post_body = "" for sentence in summary_dict["top_sentences"]: post_body += """> {}\n\n""".format(sentence) top_words = "" for index, word in enumerate(summary_dict["top_words"]): top_words += "{}^#{} ".format(word, index+1) post_message = HEADER.format( summary_dict["title"], submission.url, summary_dict["reduction"]) + post_body + FOOTER.format(image_url, top_words) reddit.submission(submission).reply(post_message) update_log(submission.id) print("Replied to:", submission.id) else: update_log(submission.id) print("Skipped:", submission.id)
def summary(): json_data = {} json_data = get_summary(connect=conn, my_log=LOG) module_list = json_data['module_list'] host_list = json_data['host_list'] return render_template('Summary.html', data=json_data, host_list=host_list, module_list=module_list )
async def articlePost(req): start = time.time() article = req.json article_text = pdf_to_string_process(article['path']) article['text'] = article_text article['tokentree'] = preprocessing_article(article_text) article['summary'] = get_summary(article_text, 10) end = time.time() print(f"Total time in python create new article: {end - start:.2f} s") return res.json(article)
def get_families(**kwargs): base = { "title": "Family", "families": db.execute(''' SELECT family, COUNT (DISTINCT genus) FROM fgs GROUP BY family ''').fetchall(), "f": "", "genera": [], "g": "", "species": [], "s": "" } base.update(kwargs) base.update({"summary": get_summary(base["f"], base["g"], base["s"])}) return render_template("display.html", **base)
def sumry(): if request.method == 'POST': text = request.form['content'] title = request.form['title'] updated = request.form['updated'] sum = summary.get_summary(text) return json.dumps({ "modal": [{ "title": "Summary", "content": sum, "type": "text" }], "title": title, "updated": updated, "status": "success" }) return '{"status": "error", "message": "invalid request method"}'
def infer(database: sqlite3.Connection, table: schema.Node, column: schema.Node) -> str: tname = schema.get_attributes(table, 'name') cname, type_ = schema.get_attributes(column, 'name', 'type') if not type_: return UNKNOWN type_ = type_.lower() if _is_boolean(type_) or _is_single_char(type_): return BAR_CHART if _is_characters(type_): col_summary = summary.get_summary(database, schema.get_name(table), schema.get_name(column)) if col_summary['distinct'] <= 100 and col_summary['max'] <= 100: return HORIZONTAL_BAR_CHART return SUMMARY if _is_date(type_) or _is_numeric(type_): return HISTOGRAM return SUMMARY
def init(): """Inits the bot.""" reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET, user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME, password=config.REDDIT_PASSWORD) processed_posts = load_log() whitelist = load_whitelist() for subreddit in config.SUBREDDITS: for submission in reddit.subreddit(subreddit).new(limit=50): if submission.id not in processed_posts: clean_url = submission.url.replace("amp.", "") ext = tldextract.extract(clean_url) domain = "{}.{}".format(ext.domain, ext.suffix) if domain in whitelist: try: with requests.get(clean_url, headers=HEADERS, timeout=10) as response: # Sometimes Requests makes an incorrect guess, we force it to use utf-8 if response.encoding == "ISO-8859-1": response.encoding = "utf-8" html_source = response.text article_title, article_date, article_body = scraper.scrape_html( html_source) summary_dict = summary.get_summary(article_body) except Exception as e: log_error("{},{}".format(clean_url, e)) update_log(submission.id) print("Failed:", submission.id) continue # To reduce low quality submissions, we only process those that made a meaningful summary. if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD: # Create a wordcloud, upload it to Imgur and get back the url. image_url = cloud.generate_word_cloud( summary_dict["article_words"]) # We start creating the comment body. post_body = "\n\n".join( ["> " + item for item in summary_dict["top_sentences"]]) top_words = "" for index, word in enumerate(summary_dict["top_words"]): top_words += "{}^#{} ".format(word, index+1) post_message = TEMPLATE.format( article_title, clean_url, summary_dict["reduction"], article_date, post_body, image_url, top_words) reddit.submission(submission).reply(post_message) update_log(submission.id) print("Replied to:", submission.id) else: update_log(submission.id) print("Skipped:", submission.id)
import os import summary as sum #import same_doc_tf_idf_summary as sum #import retuers_corpus_tf_idf_summary as sum article_text="" with open("in.txt", "r") as lines: for line in lines: article_text=article_text+line summary_text=sum.get_summary(article_text) f = open("out.txt", "w") f.write(summary_text) f.close()
def get_summary(): check_auth(request.headers) summary_data = request.json summary_text = summary.get_summary(summary_data["text"]) return jsonify({"summary": summary_text})
def train(gpu, args): # Initialize workers # NOTE : the worker with gpu=0 will do logging dist.init_process_group(backend='nccl', init_method='env://', world_size=args.num_gpus, rank=gpu) torch.cuda.set_device(gpu) # Prepare dataset data = get_data(args) data_train = data(args, 'train') data_val = data(args, 'val') sampler_train = DistributedSampler(data_train, num_replicas=args.num_gpus, rank=gpu) sampler_val = DistributedSampler(data_val, num_replicas=args.num_gpus, rank=gpu) batch_size = args.batch_size // args.num_gpus loader_train = DataLoader(dataset=data_train, batch_size=batch_size, shuffle=False, num_workers=args.num_threads, pin_memory=True, sampler=sampler_train, drop_last=True) loader_val = DataLoader(dataset=data_val, batch_size=1, shuffle=False, num_workers=args.num_threads, pin_memory=True, sampler=sampler_val, drop_last=False) # Network model = get_model(args) net = model(args) net.cuda(gpu) if gpu == 0: if args.pretrain is not None: assert os.path.exists(args.pretrain), \ "file not found: {}".format(args.pretrain) checkpoint = torch.load(args.pretrain) net.load_state_dict(checkpoint['net']) print('Load network parameters from : {}'.format(args.pretrain)) # Loss loss = get_loss(args) loss = loss(args) loss.cuda(gpu) # Optimizer optimizer, scheduler = utility.make_optimizer_scheduler(args, net) net = apex.parallel.convert_syncbn_model(net) net, optimizer = amp.initialize(net, optimizer, opt_level=args.opt_level, verbosity=0) if gpu == 0: if args.pretrain is not None: if args.resume: try: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) amp.load_state_dict(checkpoint['amp']) print('Resume optimizer, scheduler and amp ' 'from : {}'.format(args.pretrain)) except KeyError: print('State dicts for resume are not saved. ' 'Use --save_full argument') del checkpoint net = DDP(net) metric = get_metric(args) metric = metric(args) summary = get_summary(args) if gpu == 0: utility.backup_source_code(args.save_dir + '/code') try: os.makedirs(args.save_dir, exist_ok=True) os.makedirs(args.save_dir + '/train', exist_ok=True) os.makedirs(args.save_dir + '/val', exist_ok=True) except OSError: pass if gpu == 0: writer_train = summary(args.save_dir, 'train', args, loss.loss_name, metric.metric_name) writer_val = summary(args.save_dir, 'val', args, loss.loss_name, metric.metric_name) with open(args.save_dir + '/args.json', 'w') as args_json: json.dump(args.__dict__, args_json, indent=4) if args.warm_up: warm_up_cnt = 0.0 warm_up_max_cnt = len(loader_train) + 1.0 for epoch in range(1, args.epochs + 1): # Train net.train() sampler_train.set_epoch(epoch) if gpu == 0: current_time = time.strftime('%y%m%d@%H:%M:%S') list_lr = [] for g in optimizer.param_groups: list_lr.append(g['lr']) print('=== Epoch {:5d} / {:5d} | Lr : {} | {} | {} ==='.format( epoch, args.epochs, list_lr, current_time, args.save_dir)) num_sample = len( loader_train) * loader_train.batch_size * args.num_gpus if gpu == 0: pbar = tqdm(total=num_sample) log_cnt = 0.0 log_loss = 0.0 for batch, sample in enumerate(loader_train): sample = { key: val.cuda(gpu) for key, val in sample.items() if val is not None } if epoch == 1 and args.warm_up: warm_up_cnt += 1 for param_group in optimizer.param_groups: lr_warm_up = param_group['initial_lr'] \ * warm_up_cnt / warm_up_max_cnt param_group['lr'] = lr_warm_up optimizer.zero_grad() output = net(sample) loss_sum, loss_val = loss(sample, output) # Divide by batch size loss_sum = loss_sum / loader_train.batch_size loss_val = loss_val / loader_train.batch_size with amp.scale_loss(loss_sum, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() if gpu == 0: metric_val = metric.evaluate(sample, output, 'train') writer_train.add(loss_val, metric_val) log_cnt += 1 log_loss += loss_sum.item() current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{:<10s}| {} | Loss = {:.4f}'.format( 'Train', current_time, log_loss / log_cnt) if epoch == 1 and args.warm_up: list_lr = [] for g in optimizer.param_groups: list_lr.append(round(g['lr'], 6)) error_str = '{} | Lr Warm Up : {}'.format( error_str, list_lr) pbar.set_description(error_str) pbar.update(loader_train.batch_size * args.num_gpus) if gpu == 0: pbar.close() writer_train.update(epoch, sample, output) if args.save_full or epoch == args.epochs: state = { 'net': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'amp': amp.state_dict(), 'args': args } else: state = {'net': net.module.state_dict(), 'args': args} torch.save(state, '{}/model_{:05d}.pt'.format(args.save_dir, epoch)) # Val torch.set_grad_enabled(False) net.eval() num_sample = len(loader_val) * loader_val.batch_size * args.num_gpus if gpu == 0: pbar = tqdm(total=num_sample) log_cnt = 0.0 log_loss = 0.0 for batch, sample in enumerate(loader_val): sample = { key: val.cuda(gpu) for key, val in sample.items() if val is not None } output = net(sample) loss_sum, loss_val = loss(sample, output) # Divide by batch size loss_sum = loss_sum / loader_val.batch_size loss_val = loss_val / loader_val.batch_size if gpu == 0: metric_val = metric.evaluate(sample, output, 'train') writer_val.add(loss_val, metric_val) log_cnt += 1 log_loss += loss_sum.item() current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{:<10s}| {} | Loss = {:.4f}'.format( 'Val', current_time, log_loss / log_cnt) pbar.set_description(error_str) pbar.update(loader_val.batch_size * args.num_gpus) if gpu == 0: pbar.close() writer_val.update(epoch, sample, output) print('') writer_val.save(epoch, batch, sample, output) torch.set_grad_enabled(True) scheduler.step()
def test(args): # Prepare dataset data = get_data(args) data_test = data(args, 'test') loader_test = DataLoader(dataset=data_test, batch_size=1, shuffle=False, num_workers=args.num_threads) # Network model = get_model(args) net = model(args) net.cuda() if args.pretrain is not None: assert os.path.exists(args.pretrain), \ "file not found: {}".format(args.pretrain) checkpoint = torch.load(args.pretrain) key_m, key_u = net.load_state_dict(checkpoint['net'], strict=False) if key_u: print('Unexpected keys :') print(key_u) if key_m: print('Missing keys :') print(key_m) raise KeyError net = nn.DataParallel(net) metric = get_metric(args) metric = metric(args) summary = get_summary(args) try: os.makedirs(args.save_dir, exist_ok=True) os.makedirs(args.save_dir + '/test', exist_ok=True) except OSError: pass writer_test = summary(args.save_dir, 'test', args, None, metric.metric_name) net.eval() num_sample = len(loader_test) * loader_test.batch_size pbar = tqdm(total=num_sample) t_total = 0 for batch, sample in enumerate(loader_test): sample = { key: val.cuda() for key, val in sample.items() if val is not None } t0 = time.time() output = net(sample) t1 = time.time() t_total += (t1 - t0) metric_val = metric.evaluate(sample, output, 'train') writer_test.add(None, metric_val) # Save data for analysis if args.save_image: writer_test.save(args.epochs, batch, sample, output) current_time = time.strftime('%y%m%d@%H:%M:%S') error_str = '{} | Test'.format(current_time) pbar.set_description(error_str) pbar.update(loader_test.batch_size) pbar.close() writer_test.update(args.epochs, sample, output) t_avg = t_total / num_sample print('Elapsed time : {} sec, ' 'Average processing time : {} sec'.format(t_total, t_avg))
if step == 3: system, step = get_matrixproperties(system, materials, step) if step == 4: system, step = get_sorptionproperties(system, step) if step == 5: system, step = get_layerproperties(system, step) if step == 6: system, step = get_reactionproperties(system, step) if step == 7: system, step = get_reactioncoefficients(system, step) if step == 8: system, step = get_systemproperties(system, step) if step == 9: system, step = get_layerconditions(system, step) if step == 10: system, step = get_solidlayerconditions(system, step) if step == 11: system, step = get_solveroptions(system, step) if step == 12: system, step = get_inputoptions(system, step) if step == 13: while (1): #show the summary window system = get_summary(system, database, materials) #run the simulation if system is not None: output, main = solve_system(system) #postprocess if output is not None: main = postprocess_data(system, output) if main == 1: break else: break #Loads an existing cpsm file if option == 1: cpsmfile = open(filename, 'r') system = pickle.load(cpsmfile) cpsmfile.close()
def run(self): if callable(self.dataset): X, y = self.dataset() loader = self.dataset else: X, y = self.dataset loader = lambda: (X, y) for name, estimator, grid in self.estimators: print(name) cache_dir = '%s/%s/' % (self.dir, name) if hasattr(self.cv, '__len__'): cv = list(self.cv) elif callable(self.cv): cv = list(self.cv(y)) else: raise NotImplementedError() meta = { 'X_shape': X.shape, 'y_unique': np.unique(y), 'cv': cv, 'name': name, 'estimator': estimator, 'grid': grid, 'search': self.search, 'search_kwargs': self.search_kwargs } old_meta = None meta_filename = cache_dir + 'meta.pkl' if os.path.exists(meta_filename): try: with open(meta_filename, 'rb') as f: old_meta = dill.load(f) except: pass if old_meta: validate_cache(meta, old_meta) cacher = MultipleFilesCacher(cache_dir, flush_every_n=5) callback = TqdmCallback() def record_metadata(index, fit_arguments): meta_cacher = RemoteMultipleFilesCacher( cache_dir, flush_every_n=1, file_name_source=lambda key: '%d_meta.pkl' % key) X = fit_arguments.pop('X') y = fit_arguments.pop('y') estimator = fit_arguments.pop('estimator') test = fit_arguments['test'] y_pred = estimator.predict(X[test]) fit_arguments['y_pred'] = y_pred fit_arguments['y_true'] = y[test] meta_cacher[index] = fit_arguments search = self.search(estimator, grid, scoring=self.scorer, cv=cv, callback=callback, cacher=cacher, loader=loader, mapper=self.mapper, fit_callback=record_metadata, **self.search_kwargs) try: with open(meta_filename, 'wb') as f: dill.dump(meta, f, -1) search.fit(X, y) cacher.save() print(name, search.best_score_) summary = get_summary(self.experiment_name, X, y, cv, estimator, search.grid_scores_) print(summary) meta['best_score'] = search.best_score_ meta['grid_scores'] = search.grid_scores_ meta['summary'] = summary self.results[name] = meta with open(meta_filename, 'wb') as f: dill.dump(meta, f, -1) except Exception as e: e_type, e_value, e_tb = sys.exc_info() tb = ''.join(traceback.format_tb(e_tb)) print(e_type, e_value) print(tb) summary = '\n'.join( map(lambda m: m['summary'], self.results.itervalues())) with open(self.dir + 'summary.txt', 'w') as f: f.write(summary)
def summariser(): text = request.form['text'] if (len(text) > 0): print("got it") text, text_summary = get_summary(text) return render_template('output.html', text=text, text_summary=text_summary)