def __init__(self): self.model = self.model_cls() if has_gpus(): self.model.cuda() logging.info(self.model) task = self.task_cls() self.dl_reg = DataLoaderRegistry() self.dl_reg.register_data_loader(task, g.data_path) self.evaluator = LMEvaluator(self.model, self.dl_reg[task]) self.trainer = self.trainer_cls(self.model, [task], [1.0], 'total_step', evaluator=self.evaluator, check_interval=g.check_interval, eval_interval=g.eval_interval)
def convert_to_dense(feat_matrix: LT) -> DenseFeatureMatrix: names = feat_matrix.names bs = feat_matrix.size('batch') ml = feat_matrix.size('length') fm = _g2f[feat_matrix.rename(None)].refine_names(*names) dfms = dict() for cat in Category: e = get_enum_by_cat(cat) dfm_idx = fm[..., cat.value] dfm = get_zeros(bs, ml, len(e), cpu=True) dfm = dfm.scatter(2, dfm_idx.rename(None).unsqueeze(dim=-1), 1.0) dfms[cat] = dfm.refine_names('batch', 'length', f'{cat.name}_feat') if has_gpus(): dfms = {k: v.cuda() for k, v in dfms.items()} return dfms
def translate_kernel(ipa_tokens: str, tgt_lang: str): global manager tgt_lang_id = manager.lang2id[tgt_lang] batch = SourceOnlyBatch.from_ipa_tokens(ipa_tokens, manager.src_abc, tgt_lang_id) if has_gpus(): batch.cuda() manager.model.eval() with torch.no_grad(): hyps = manager.model.predict(batch) preds, pred_lengths, ended = hyps.translate(manager.tgt_abc) preds = preds[0] # NOTE(j_luo) Add back EOT if needed. pred_lengths = pred_lengths[0] + ended[0] def get_pred_unit(pred: List[str], ind: int) -> str: if ind < len(pred): return pred[ind] if ind != len(pred): raise RuntimeError('Miscalculation of prediction length?') return EOT beam_id = 0 pred = preds[beam_id] src_units = batch.src_seqs.units[0] src_l = len(src_units) tgt_l = pred_lengths[beam_id] almt = hyps.almt[0, beam_id].t().detach().cpu().numpy()[:src_l, :tgt_l] x, y = np.meshgrid(range(src_l), range(tgt_l), indexing='ij') df = pd.DataFrame({'x': x.ravel(), 'y': y.ravel(), 'almt': almt.ravel()}) df['x_text'] = df['x'].apply(lambda i: src_units[i]) df['y_text'] = df['y'].apply(lambda i: get_pred_unit(pred, i)) df['x_ou'] = df[['x', 'x_text']].apply(lambda item: f'{item[0]}-{item[1]}', axis=1) df['y_ou'] = df[['y', 'y_text']].apply(lambda item: f'{item[0]}-{item[1]}', axis=1) heatmap = alt.Chart(df).mark_rect().encode( x=alt.X('x_ou', type='nominal', sort=sorted(df['x_ou'].unique(), key=lambda s: int(s.split('-')[0]))), y=alt.Y('y_ou:O', sort=alt.EncodingSortField('y', order='descending')), color='almt:Q', tooltip=['x_text', 'y_text', 'almt'] ).configure_view(step=100).configure_axis(labelFontSize=20, titleFontSize=20).configure_axisBottom(labelAngle=0) heatmap.save('chart.html') return heatmap
def __init__(self): self.model = DecipherModel() if has_gpus(): self.model.cuda() train_task = DecipherTask('train') dev_task = DecipherTask('dev') self.dl_reg = DataLoaderRegistry() eval_tasks = [train_task, dev_task] if g.in_domain_dev_data_path: in_domain_dev_task = DecipherTask('in_domain_dev') self.dl_reg.register_data_loader(in_domain_dev_task, g.in_domain_dev_data_path) eval_tasks.append(in_domain_dev_task) train_tasks = [train_task] if g.aux_train_data_path: aux_train_task = DecipherTask('aux_train') self.dl_reg.register_data_loader(aux_train_task, g.aux_train_data_path) train_tasks.append(aux_train_task) self.dl_reg.register_data_loader(train_task, g.data_path) self.dl_reg.register_data_loader(dev_task, g.dev_data_path) self.evaluator = DecipherEvaluator(self.model, self.dl_reg, eval_tasks) self.trainer = DecipherTrainer(self.model, train_tasks, [1.0] * len(train_tasks), 'total_step', evaluator=self.evaluator, check_interval=g.check_interval, eval_interval=g.eval_interval) if g.train_phi: freeze(self.model.self_attn_layers) freeze(self.model.positional_embedding) freeze(self.model.emb_for_label) freeze(self.model.label_predictor) if g.saved_model_path: self.trainer.load(g.saved_model_path, load_phi_scorer=True) if g.fix_phi: freeze(self.model.phi_scorer) # freeze(self.model.self_attn_layers) # freeze(self.model.positional_embedding) # freeze(self.model.emb_for_label) # freeze(self.model.label_predictor) self.trainer.set_optimizer()
def __init__(self): task = ExtractTask() self.dl_reg = DataLoaderRegistry() self.dl_reg.register_data_loader(task, g.data_path) lu_size = None if g.input_format == 'text': lu_size = self.dl_reg[task].dataset.unit_vocab_size self.model = ExtractModel(lu_size=lu_size) if has_gpus(): self.model.cuda() self.evaluator = ExtractEvaluator(self.model, self.dl_reg[task]) self.trainer = ExtractTrainer(self.model, [task], [1.0], 'total_step', stage_tnames=['round', 'total_step'], evaluator=self.evaluator, check_interval=g.check_interval, eval_interval=g.eval_interval, save_interval=g.save_interval) if g.saved_model_path: self.trainer.load(g.saved_model_path)