def music_model_learner(data: DataBunch, arch=MusicTransformerXL, config: dict = None, drop_mult: float = 1., pretrained_path: PathOrStr = None, **learn_kwargs) -> 'LanguageLearner': "Create a `Learner` with a language model from `data` and `arch`." meta = _model_meta[arch] if pretrained_path: state = torch.load(pretrained_path, map_location='cpu') if config is None: config = state['config'] model = get_language_model(arch, len(data.vocab.itos), config=config, drop_mult=drop_mult) learn = MusicLearner(data, model, split_func=meta['split_lm'], **learn_kwargs) if pretrained_path: get_model(model).load_state_dict(state['model'], strict=False) if not hasattr(learn, 'opt'): learn.create_opt(defaults.lr, learn.wd) try: learn.opt.load_state_dict(state['opt']) except: pass del state gc.collect() return learn
def music_model_learner(data:DataBunch, config:dict=None, drop_mult:float=1., pretrained:bool=False, pretrained_fnames:OptStrTuple=None, **learn_kwargs) -> 'LanguageLearner': "Create a `Learner` with a language model from `data` and `arch`." _model_meta[MusicTransformerXL] = _model_meta[TransformerXL] model = get_language_model(MusicTransformerXL, len(data.vocab.itos), config=config, drop_mult=drop_mult) meta = _model_meta[TransformerXL] learn = MusicLearner(data, model, config=config, split_func=meta['split_lm'], **learn_kwargs) return learn
def language_model_learner(data, arch, config=None, drop_mult=1., pretrained=True, pretrained_fnames=None, **learn_kwargs): "Create a `Learner` with a language model from `data` and `arch`." model = get_language_model(arch, len(data.vocab.itos), config=config, drop_mult=drop_mult) meta = _model_meta[arch] learn = LanguageLearner(data, model, split_func=meta['split_lm'], **learn_kwargs) # url = 'url_bwd' if data.backwards else 'url' fnames = [PRE_TRAINED_FILES/f'{fn}.{ext}' for fn, ext in zip(pretrained_fnames, ['pth', 'pkl'])] learn.load_pretrained(*fnames) learn.freeze() return learn
def music_model_learner(data: DataBunch, arch=MusicTransformerXL, config: dict = None, drop_mult: float = 1., pretrained_path: PathOrStr = None, **learn_kwargs) -> 'LanguageLearner': "Create a `Learner` with a language model from `data` and `arch`." meta = _model_meta[arch] model = get_language_model(arch, len(data.vocab.itos), config=config, drop_mult=drop_mult) learn = MusicLearner(data, model, split_func=meta['split_lm'], **learn_kwargs) if pretrained_path: state = torch.load(pretrained_path, map_location='cpu') get_model(model).load_state_dict(state['model'], strict=False) return learn
def run_baseline(output_dir = '/notebook/touche/output_tira/', input_dir = '/notebook/touche/', input_file = 'topics-task-2-only-titles.xml'): # clean answers from ChatNoir to avoid repeating my_response_list = create_list_of_unigue_answers(input_dir = input_dir, input_file = input_file) print ("len my responses list", len(my_response_list)) common_list = [] awd_lstm_lm_config_custom = dict(emb_sz=768, n_hid=1152, n_layers=3, pad_token=1, qrnn=False, bidir=False, output_p=0.1, hidden_p=0.15, input_p=0.25, embed_p=0.02, weight_p=0.2, tie_weights=True, out_bias=True) m = get_language_model(arch=AWD_LSTM, vocab_sz = 60004, config = awd_lstm_lm_config_custom) my_rnn = over_AWD_LSTM(m, 768) scores_llist = [] print ("lstm ulm cam obj", flush = True) scores_lstm_list = load_obj("lstm_scores") scores_cam_list = load_obj("cam_scores") scores_obj_list = load_obj("obj_scores") for ind_q, elem in enumerate(list_of_tuples): qid = elem[0] Q0 = 'Q0' query = elem[1] tag = 'ULMFIT_LSTM_CAM_OBJ' responses = list(zip(*my_response_list[str(ind_q + 1)])) scores0 = responses[0] print (ind_q) docs = responses[1] #print ("1") titles = responses[2] #print ("2") answers_bodies = responses[3] print ("3") # print (scores0, scores3, scores) scores = scores_lstm_list[ind_q]#make_scores_lstm(my_rnn, query, answers_bodies) scores_llist.append(scores) print ("4") qids = [qid]*len(scores) Q0s = [Q0 for elem in scores] queries = query*len(scores) tags = [tag for elem in scores] scores_obj = scores_obj_list[ind_q] scores_cam = scores_cam_list[ind_q] multiplicat = [0.5*scores + scores_obj[ind] for ind, scores in enumerate(scores_cam)] new_scores = [multiplicat[ind]*score for ind, score in enumerate(scores[:20])] part_of_commom_list = list(zip(qids, Q0s, docs, new_scores, tags)) part_of_commom_list = sorted(part_of_commom_list, key = lambda x: x[3], reverse = True) qids, Q0s, docs, new_scores, tags = zip(*part_of_commom_list) ranks = range(1, len(new_scores) + 1) part_of_commom_list = list(zip(qids, Q0s, docs, ranks, new_scores, tags)) common_list = common_list + part_of_commom_list save_obj(scores_llist, "lstm_scores0") with open(output_dir + 'run_lstm_cam_obj.txt', 'w') as fp: fp.write('\n'.join('%s %s %s %s %s %s' % x for x in common_list))
dframe = prepare_sentence_DF(sentenсes, obj1, obj2) answ = classify_sentences(dframe, 'infersent') filt = (answ["BETTER"] >= 0.2) | (answ["WORSE"] >= 0.2) new_answ_df = answ.where(filt) new_answ_df = new_answ_df.dropna() number_of_comparative_sentences.append(len(new_answ_df)) return number_of_comparative_sentences import sys sys.path.append("./cam/src/Backend/") from ml_approach.classify import classify_sentences awd_lstm_lm_config_custom = dict(emb_sz=768, n_hid=1152, n_layers=3, pad_token=1, qrnn=False, bidir=False, output_p=0.1, hidden_p=0.15, input_p=0.25, embed_p=0.02, weight_p=0.2, tie_weights=True, out_bias=True) m = get_language_model(arch=AWD_LSTM, vocab_sz = 60004, config = awd_lstm_lm_config_custom) # state = torch.load('./wt103/fwd_wt103.h5') import pickle def save_obj(obj, name ): with open(name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(name ): with open(name + '.pkl', 'rb') as f: return pickle.load(f) class over_AWD_LSTM(nn.Module): def __init__(self, extra_model, emb_size, ulm_fit_emb_size = 400): super(over_AWD_LSTM, self).__init__() self.modules = [module for module in extra_model.modules()]
def run_baseline(output_dir='/notebook/touche/output/', input_dir='/notebook/touche/', input_file='topics-task-2-only-titles.xml'): # load responses for all queries from file my_response_list = create_list_of_unigue_answers(input_dir=input_dir, input_file=input_file) list_of_tuples = read_xml(input_dir + input_file) common_list = [] awd_lstm_lm_config_custom = dict(emb_sz=768, n_hid=1152, n_layers=3, pad_token=1, qrnn=False, bidir=False, output_p=0.1, hidden_p=0.15, input_p=0.25, embed_p=0.02, weight_p=0.2, tie_weights=True, out_bias=True) m = get_language_model(arch=AWD_LSTM, vocab_sz=60004, config=awd_lstm_lm_config_custom) my_rnn = over_AWD_LSTM(m, 768) print("lstm ulm", flush=True) for ind_q, elem in enumerate(list_of_tuples[:5]): qid = elem[0] Q0 = 'Q0' query = elem[1] tag = 'ULMFIT_LSTM' responses = list(zip(*my_response_list[str(ind_q + 1)])) scores0 = responses[0] #print ("0") docs = responses[1] #print ("1") titles = responses[2] #print ("2") answers_bodies = responses[3] #print ("3") # print (scores0, scores3, scores) scores = make_scores_lstm(my_rnn, query, answers_bodies) qids = qid * len(scores) Q0s = [Q0 for elem in scores] queries = query * len(scores) tags = [tag for elem in scores] part_of_commom_list = list(zip(qids, Q0s, docs, scores, tags)) part_of_commom_list = sorted(part_of_commom_list, key=lambda x: x[3], reverse=True) qids, Q0s, docs, scores, tags = zip(*part_of_commom_list) ranks = range(1, len(scores) + 1) part_of_commom_list = list(zip(qids, Q0s, docs, ranks, scores, tags)) common_list = common_list + part_of_commom_list with open(output_dir + 'run_example_ulm1.txt', 'w') as fp: fp.write('\n'.join('%s %s %s %s %s %s' % x for x in common_list))