def _eval_pas(self, arguments_set, dataset: PASDataset, corpus: str, suffix: str = '') -> Dict[str, ScoreResult]: prediction_output_dir = self.save_dir / f'{corpus}_out{suffix}' prediction_writer = PredictionKNPWriter( dataset, self.logger, use_knp_overt=(not self.predict_overt)) documents_pred = prediction_writer.write(arguments_set, prediction_output_dir, add_pas_tag=False) log = {} for pas_target in self.pas_targets: scorer = Scorer(documents_pred, dataset.gold_documents, target_cases=dataset.target_cases, target_exophors=dataset.target_exophors, coreference=dataset.coreference, bridging=dataset.bridging, pas_target=pas_target) result = scorer.run() target = corpus + (f'_{pas_target}' if pas_target else '') + suffix scorer.write_html(self.save_dir / f'{target}.html') result.export_txt(self.save_dir / f'{target}.txt') result.export_csv(self.save_dir / f'{target}.csv') log[pas_target] = result return log
def main(config, args): analyzer = Analyzer(config, remote_knp=args.remote_knp) if args.input is not None: source = args.input elif args.knp_dir is not None: source = Path(args.knp_dir) else: source = ''.join(sys.stdin.readlines()) arguments_set, dataset = analyzer.analyze(source) prediction_writer = PredictionKNPWriter(dataset, logger) if args.export_dir is not None: destination = Path(args.export_dir) elif args.tab is True: destination = sys.stdout else: destination = None documents_pred: List[Document] = prediction_writer.write( arguments_set, destination, skip_untagged=args.skip_untagged, add_pas_tag=(not args.rel_only)) if args.tab is False: for document_pred in documents_pred: for sid in document_pred.sid2sentence.keys(): draw_tree(document_pred, sid, dataset.target_cases, dataset.bridging, dataset.coreference, sys.stdout)
def _eval_pas(self, arguments_set, dataset: PASDataset, corpus: str, suffix: str = ''): prediction_output_dir = self.save_dir / f'{corpus}_out{suffix}' prediction_writer = PredictionKNPWriter(dataset, self.logger, use_knp_overt=(not self.predict_overt)) documents_pred = prediction_writer.write(arguments_set, prediction_output_dir) documents_gold = dataset.joined_documents if corpus == 'kc' else dataset.documents result = {} for pas_target in self.pas_targets: scorer = Scorer(documents_pred, documents_gold, target_cases=dataset.target_cases, target_exophors=dataset.target_exophors, coreference=dataset.coreference, bridging=dataset.bridging, pas_target=pas_target) stem = corpus if pas_target: stem += f'_{pas_target}' stem += suffix if self.target != 'test': scorer.write_html(self.save_dir / f'{stem}.html') scorer.export_txt(self.save_dir / f'{stem}.txt') scorer.export_csv(self.save_dir / f'{stem}.csv') metrics = self._eval_metrics(scorer.result_dict()) for met, value in zip(self.metrics, metrics): met_name = met.__name__ if 'case_analysis' in met_name or 'zero_anaphora' in met_name: if pas_target: met_name = f'{pas_target}_{met_name}' result[met_name] = value return result
def _valid_epoch(self, data_loader, corpus): """ Validate after training an epoch :return: A log that contains information about validation Note: The validation metrics in log must have the key 'val_metrics'. """ self.model.eval() total_loss = 0 arguments_set: List[List[List[int]]] = [] contingency_set: List[int] = [] with torch.no_grad(): for step, batch in enumerate(data_loader): batch = {label: t.to(self.device, non_blocking=True) for label, t in batch.items()} loss, *output = self.model(**batch) if len(loss.size()) > 0: loss = loss.mean() pas_scores = output[0] # (b, seq, case, seq) if corpus != 'commonsense': arguments_set += torch.argmax(pas_scores, dim=3).tolist() # (b, seq, case) total_loss += loss.item() * pas_scores.size(0) if step % self.log_step == 0: self.logger.info('Validation [{}/{} ({:.0f}%)] Time: {}'.format( step * data_loader.batch_size, len(data_loader.dataset), 100.0 * step / len(data_loader), datetime.datetime.now().strftime('%H:%M:%S'))) log = {'loss': total_loss / len(data_loader.dataset)} self.writer.add_scalar(f'loss/{corpus}', log['loss']) if corpus != 'commonsense': dataset = data_loader.dataset prediction_writer = PredictionKNPWriter(dataset, self.logger) documents_pred = prediction_writer.write(arguments_set, None, add_pas_tag=False) targets2label = {tuple(): '', ('pred',): 'pred', ('noun',): 'noun', ('pred', 'noun'): 'all'} scorer = Scorer(documents_pred, dataset.gold_documents, target_cases=dataset.target_cases, target_exophors=dataset.target_exophors, coreference=dataset.coreference, bridging=dataset.bridging, pas_target=targets2label[tuple(dataset.pas_targets)]) result = scorer.run() log['result'] = result else: log['f1'] = self._eval_commonsense(contingency_set) return log
def analyze_raw_data_from_client(knp_result: str): log_dir = Path('log') / datetime.now().strftime(r'%Y%m%d_%H%M%S') arguments_set, dataset = analyzer.analyze_from_knp(knp_result, knp_dir=log_dir) prediction_writer = PredictionKNPWriter(dataset, logger) with io.StringIO() as string: _ = prediction_writer.write(arguments_set, string, skip_untagged=False) knp_result = string.getvalue() with log_dir.joinpath('pas.knp').open('wt') as f: f.write(knp_result) return knp_result
def api(): input_string = request.args['input'] log_dir = Path('log') / datetime.now().strftime(r'%Y%m%d_%H%M%S') arguments_set, dataset = analyzer.analyze(input_string, knp_dir=log_dir) prediction_writer = PredictionKNPWriter(dataset, logger) with io.StringIO() as string: document: Document = prediction_writer.write(arguments_set, string, skip_untagged=False)[0] knp_result: str = string.getvalue() with log_dir.joinpath('pas.knp').open('wt') as f: f.write(knp_result) html_string = textwrap.dedent(''' <style type="text/css"> pre { font-family: "MS ゴシック", "Osaka-Mono", "Osaka-等幅", "さざなみゴシック", "Sazanami Gothic", sans-serif; white-space: pre; } </style> ''') html_string += '<pre>\n' for sid in document.sid2sentence.keys(): with io.StringIO() as string: draw_tree(document, sid, dataset.target_cases, dataset.bridging, dataset.coreference, string, html=True) tree_string = string.getvalue() logger.info('output:\n' + tree_string) html_string += tree_string html_string += '</pre>\n' return make_response(jsonify({ "input": analyzer.sanitize_string(input_string), "output": [ {'result': html_string}, {'results in a KNP format': html.escape(knp_result).replace('\n', '<br>')} ] }))
def _valid_epoch(self, data_loader, corpus): """ Validate after training an epoch :return: A log that contains information about validation Note: The validation metrics in log must have the key 'val_metrics'. """ self.model.eval() total_loss = 0 arguments_set: List[List[List[int]]] = [] contingency_set: List[int] = [] with torch.no_grad(): for step, batch in enumerate(data_loader): batch = { label: t.to(self.device, non_blocking=True) for label, t in batch.items() } loss, *output = self.model(**batch) if len(loss.size()) > 0: loss = loss.mean() if re.match(r'.*(CaseInteraction|Refinement|Duplicate).*Model', self.config['arch']['type']): pas_scores = output[-1] # (b, seq, case, seq) elif self.config['arch']['type'] == 'CommonsenseModel': pas_scores = output[0] # (b, seq, case, seq) contingency_set += output[1].gt(0.5).int().tolist() else: pas_scores = output[0] # (b, seq, case, seq) if corpus != 'commonsense': arguments_set += torch.argmax( pas_scores, dim=3).tolist() # (b, seq, case) total_loss += loss.item() * pas_scores.size(0) if step % self.log_step == 0: self.logger.info( 'Validation [{}/{} ({:.0f}%)] Time: {}'.format( step * data_loader.batch_size, len(data_loader.dataset), 100.0 * step / len(data_loader), datetime.datetime.now().strftime('%H:%M:%S'))) log = {'loss': total_loss / len(data_loader.dataset)} self.writer.add_scalar(f'loss/{corpus}', log['loss']) if corpus != 'commonsense': dataset = data_loader.dataset prediction_writer = PredictionKNPWriter(dataset, self.logger) documents_pred = prediction_writer.write(arguments_set, None) documents_gold = dataset.joined_documents if corpus == 'kc' else dataset.documents targets2label = { tuple(): '', ('pred', ): 'pred', ('noun', ): 'noun', ('pred', 'noun'): 'all' } scorer = Scorer(documents_pred, documents_gold, target_cases=dataset.target_cases, target_exophors=dataset.target_exophors, coreference=dataset.coreference, bridging=dataset.bridging, pas_target=targets2label[tuple( dataset.pas_targets)]) val_metrics = self._eval_metrics(scorer.result_dict(), corpus) log.update( dict(zip([met.__name__ for met in self.metrics], val_metrics))) else: log['f1'] = self._eval_commonsense(contingency_set) return log