def create_and_load_models(): teacher_model = MODELS[hparams.teacher_model](hparams=get_model_params( task, hparams.teacher_model, hparams.teacher_config)) student_model = MODELS[hparams.student_model](hparams=get_model_params( task, hparams.student_model, hparams.student_config)) teacher_log_dir = os.path.join( hparams.logdir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, hparams.teacher_config, hparams.teacher_exp_name ])) teacher_ckpt_dir = os.path.join( hparams.chkpt_dir, task.name, '_'.join([ teacher_model.model_name, hparams.teacher_config, hparams.teacher_exp_name ])) student_log_dir = os.path.join( hparams.logdir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, str(hparams.teacher_config), hparams.teacher_exp_name, "student", student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) student_ckpt_dir = os.path.join( hparams.chkpt_dir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, str(hparams.teacher_config), hparams.teacher_exp_name, "student", student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) return teacher_model, student_model, teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir
def main(argv): task = WordSvAgreementLM(task_params=get_task_params(), data_dir='data') # Create the Model model_params = get_model_params(task, hparams.model_name, hparams.model_config) print("model_params: ", model_params.__dict__) cl_token = task.databuilder.sentence_encoder().encode(constants.bos) model = MODELS[hparams.model_name](hparams=get_model_params(task, hparams.model_name, hparams.model_config), cl_token=cl_token) trainer_params = get_train_params(hparams.train_config) if len(hparams.prefix) > 0: hparams.prefix = hparams.prefix + "_" log_dir = os.path.join(hparams.logdir, task.name, hparams.prefix+model.model_name + "_" + str(hparams.model_config) + "_" + str( trainer_params.learning_rate) + "_" + hparams.exp_name) ckpt_dir = os.path.join(hparams.chkpt_dir, task.name, hparams.prefix+model.model_name + "_" + str(hparams.model_config) + "_" + ((str( trainer_params.learning_rate) + "_") if hparams.withlr else '') + hparams.exp_name) print(ckpt_dir) trainer = Trainer(task=task, model=model, train_params=trainer_params, log_dir=log_dir, ckpt_dir=ckpt_dir) trainer.restore() distance_hits, distance_total, diff_hits, diff_total = evaluate_vp(trainer.model, trainer.task, hparams.split) compute_and_print_acc_stats(distance_hits, distance_total, diff_hits, diff_total)
def run(): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Currently, memory growth needs to be the same across GPUs try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) strategy = tf.distribute.MirroredStrategy() log_dir = "logs" chkpt_dir = "tf_ckpts" # Create task with strategy.scope(): task = TASKS[hparams.task](get_task_params()) # Create the Model model_params = get_model_params(task, hparams.model, hparams.model_config) print("model_params: ", model_params.__dict__) model = MODELS[hparams.model](hparams=get_model_params( task, hparams.model, hparams.model_config)) trainer_params = get_train_params(hparams.train_config) log_dir = os.path.join( log_dir, task.name, model.model_name + "_" + str(hparams.model_config) + "_" + str(trainer_params.learning_rate) + "_" + hparams.exp_name) ckpt_dir = os.path.join( chkpt_dir, task.name, model.model_name + "_" + str(hparams.model_config) + "_" + str(trainer_params.learning_rate) + "_" + hparams.exp_name) # Create task trainer = Trainer(hparams, strategy=strategy, task=task, model=model, train_params=trainer_params, log_dir=log_dir, ckpt_dir=ckpt_dir) trainer.restore() trainer.train()
def create_and_load_models(): if hasattr(task.databuilder, 'sentence_encoder'): cl_token = task.databuilder.sentence_encoder().encode(constants.bos) else: cl_token = 0 teacher_model = MODELS[hparams.teacher_model](hparams=get_model_params( task, hparams.teacher_model, hparams.teacher_config), cl_token=cl_token) student_model = MODELS[hparams.student_model](hparams=get_model_params( task, hparams.student_model, hparams.student_config), cl_token=cl_token) teacher_log_dir = os.path.join( hparams.logdir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, hparams.teacher_config, hparams.teacher_exp_name ])) teacher_ckpt_dir = os.path.join( hparams.chkpt_dir, task.name, '_'.join([ teacher_model.model_name, hparams.teacher_config, hparams.teacher_exp_name ])) student_log_dir = os.path.join( hparams.logdir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, str(hparams.teacher_config), hparams.teacher_exp_name, "student", student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) student_ckpt_dir = os.path.join( hparams.chkpt_dir, task.name, '_'.join([ hparams.distill_mode, hparams.distill_config, "teacher", teacher_model.model_name, str(hparams.teacher_config), hparams.teacher_exp_name, "student", student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) return teacher_model, student_model, teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir
ckptz = {} config = { 'student_exp_name': 'gc_f_std124', 'teacher_exp_name': 'gc_o_tchr124', 'task_name': 'word_sv_agreement_vp', 'teacher_model': 'cl_lstm', 'student_model': 'cl_lstm', 'teacher_config': 'small_lstm_v4', 'student_config': 'small_lstm_v4', 'distill_config': 'pure_dstl_4_crs_slw', 'distill_mode': 'offline', 'chkpt_dir': '../tf_ckpts', } std_hparams = get_model_params(task, config['student_model'], config['student_config']) model, ckpt = get_student_model(config, task, std_hparams, cl_token) modelz['l2l_std124'] = model ckptz['l2l_std124'] = ckpt tchr_hparams = get_model_params(task, config['teacher_model'], config['teacher_config']) teacher_model, teacger_ckpt = get_teacher_model(config, task, tchr_hparams, cl_token) modelz['l2l_tchr124'] = teacher_model ckptz['l2l_tchr124'] = teacger_ckpt config = { 'student_exp_name': 'gc_f_std125',
task = TASKS['word_sv_agreement_lm'](task_params=get_task_params(), data_dir='../data') cl_token = task.databuilder.sentence_encoder().encode(constants.bos) modelz = {} ckptz = {} config = { 'model_name': 'lm_lstm_shared_emb', 'model_config': 'lstm_drop31_v2', 'learning_rate': 0.001, 'exp_name': 'lisa_crs_fst_offlineteacher_v23', 'chkpt_dir': '../tf_ckpts' } hparams = get_model_params(task, config['model_name'], config['model_config']) hparams.output_attentions = True hparams.output_embeddings = True lstm1, lstm_ckpt1 = get_model(config, task, hparams, cl_token) modelz['lstm1'] = lstm1 ckptz['lstm1'] = lstm_ckpt1 config = { 'model_name': 'lm_lstm_shared_emb', 'model_config': 'lstm_drop31_v2', 'learning_rate': 0.001, 'exp_name': 'lisa_crs_fst_offlineteacher_v24', 'chkpt_dir': '../tf_ckpts' } hparams = get_model_params(task, config['model_name'], config['model_config'])