def test(encdec): # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, SRC_VOCAB_SIZE) trg_vocab = make_vocab(TRG_TRAIN_FILE, TRG_VOCAB_SIZE) inv_trg_vocab = make_inv_vocab(trg_vocab) for line in sys.stdin: trg_ids = test_batch(encdec, src_vocab, trg_vocab, [line_to_sent(line.strip(), src_vocab)])[0] # Prints the result. print(" ".join(inv_trg_vocab[wid] for wid in trg_ids))
def test(encdec, args): # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, args.src_vocab) trg_vocab = make_vocab(TRG_TRAIN_FILE, args.trg_vocab) inv_trg_vocab = make_inv_vocab(trg_vocab) for line in sys.stdin: sent = [line_to_sent(line.strip(), src_vocab)] trg_ids = test_batch(encdec, src_vocab, trg_vocab, sent, args.generation_limit)[0] # Prints the result. print(" ".join(inv_trg_vocab[wid] for wid in trg_ids))
def __init__(self, applyDict=False, **kwargs): #TODO: document this """ :param applyDict: :param kwargs: 'train_path' 'dev_path' 'test_path', 'dict_path', 'applyDict' """ self.debug = False # if 'debug' in kwargs.keys(): # self.debug = kwargs['debug'] self.keywords_dist = {} emotion_special_processing = kwargs['emotion_special_processing'] emotion_type = kwargs['emotion_type'] use_emotion_supervision = kwargs['use_emotion_supervision'] if applyDict: print("[CORPUS]: Loading dictionary from provided path : ", kwargs['dict_path']) self.dictionary = load_pickle(kwargs['dict_path']) # a previously saved pickle of a Dictionary print('[dictionary]: len(dictionary.word2idx) = ', len(self.dictionary.word2idx)) if 'train_path' in kwargs.keys(): self.train = self.tokenize(kwargs['train_path'],applyDict=applyDict, emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) if 'dev_path' in kwargs.keys(): self.valid = self.tokenize(kwargs['dev_path'],applyDict=applyDict, emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) if 'test_path' in kwargs.keys(): self.test = self.tokenize(kwargs['test_path'],applyDict=applyDict, emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) else: self.dictionary = Dictionary() if 'train_path' in kwargs.keys(): self.train = self.tokenize(kwargs['train_path'], emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) if 'dev_path' in kwargs.keys(): self.valid = self.tokenize(kwargs['dev_path'], emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) if 'test_path' in kwargs.keys(): self.test = self.tokenize(kwargs['test_path'], emotion_special_processing=emotion_special_processing, emotion_type=emotion_type, use_emotion_supervision=use_emotion_supervision) # save file when done make_vocab(self.dictionary, kwargs['output'])
def __init__(self, dataset_path: str, max_length: int): """ :param dataset_path: 데이터셋 root path :param max_length: 문자열의 최대 길이 """ self.dataset_path = dataset_path with open(os.path.join(dataset_path, 'sample_data'), 'r', encoding='utf8') as f: self.train_sentences, self.train_labels = get_data(f) with open(os.path.join(dataset_path, 'test_data'), 'r', encoding='utf8') as f: self.test_sentences, self.test_labels = get_data(f) print('data loading complete!') if os.path.isfile('./data/vocab.txt'): self.vocab = read_vocab() else: self.vocab = make_vocab(self.train_sentences) print('make vocab complete! vocab size = {}'.format(len(self.vocab))) self.sentences = preprocess(self.vocab, self.train_sentences, max_length) self.labels = [np.float32(x) for x in self.train_labels] print('training sentences :', len(self.sentences))
def preprocess(data_dir="./data"): print("begin to preprocess...") train_data_path = os.path.join(data_dir, "train.csv") new_train_data_path = os.path.join(data_dir, "train_prcssd.csv") test_data_path = os.path.join(data_dir, "test.csv") new_test_data_path = os.path.join(data_dir, "test_prcssd.csv") vocab_path = os.path.join(data_dir, "vocab.txt") # 读数据 logging.info("loading data...") train_data = pd.read_csv(train_data_path) test_data = pd.read_csv(test_data_path) # 预处理 train_data["tag"] = "train" test_data["tag"] = "test" data = train_data.append(test_data) logging.info("replacing bad words...") data["comment_text"] = data.apply(lambda d : my_utils.replace(d["comment_text"]), axis=1) logging.info("tokenizing...") data["tokens"] = data.apply(lambda d: my_utils.tokenize(d["comment_text"]), axis=1) logging.info("making vocabulary...") vocab = my_utils.make_vocab(data["tokens"]) data["tokens"] = data.apply(lambda d: " ".join(d["tokens"])) train_data = data[data.tag == "train"] test_data = data[data.tag == "test"] #保存 logging.info("saving...") train_data.to_csv(new_train_data_path) test_data.to_csv(new_test_data_path) my_utils.dump_vocab(vocab, vocab_path) logging.info("preprocess finished!") return train_data, test_data
def main(args): # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug) # make net print('vocab size:', len(word2id)) ids = [id for word, id in word2id.items()] print(max(ids)) print(list(sorted(ids))[0]) net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden, args.bi, args.n_layer, args.load_from) # configure training setting criterion, train_params = configure_training('adam', args.lr, args.clip, args.decay, args.batch) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['net'] = 'base_abstractor' meta['net_args'] = net_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer if args.cuda: net = net.cuda() val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.AdamW(net.parameters(), **train_params['optimizer'][1]) #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') trainer.train()
def main(args): assert args.net_type in ['ff', 'rnn'] # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) train_batcher, val_batcher = build_batchers(args.net_type, word2id, args.cuda, args.debug) # make net net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim, args.conv_hidden, args.lstm_hidden, args.lstm_layer, args.bi) if args.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = make_embedding({i: w for w, i in word2id.items()}, args.w2v) net.set_embedding(embedding) # configure training setting criterion, train_params = configure_training(args.net_type, 'adam', args.lr, args.clip, args.decay, args.batch) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['net'] = 'ml_{}_extractor'.format(args.net_type) meta['net_args'] = net_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()
def __init__(self, applyDict=False, **kwargs): #TODO: document this """ :param applyDict: :param kwargs: 'train_path' 'dev_path' 'test_path', 'dict_path', 'applyDict' """ if applyDict: self.dictionary = load_pickle(kwargs['dict_path']) # a previously saved pickle of a Dictionary else: self.dictionary = Dictionary() if 'train_path' in kwargs.keys(): self.train = self.tokenize(kwargs['train_path']) if 'dev_path' in kwargs.keys(): self.valid = self.tokenize(kwargs['dev_path']) if 'test_path' in kwargs.keys(): self.test = self.tokenize(kwargs['test_path']) # save file when done make_vocab(self.dictionary, kwargs['output'])
def main(args): assert args.net_type in ['ff', 'rnn'] # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) train_batcher, val_batcher = build_batchers(args.net_type, word2id, args.cuda, args.debug) # make net net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim, args.conv_hidden, args.lstm_hidden, args.lstm_layer, args.bi) if args.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = make_embedding( {i: w for w, i in word2id.items()}, args.w2v) net.set_embedding(embedding) # configure training setting criterion, train_params = configure_training( args.net_type, 'adam', args.lr, args.clip, args.decay, args.batch ) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['net'] = 'ml_{}_extractor'.format(args.net_type) meta['net_args'] = net_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()
def __init__(self, applyDict=False, **kwargs): """ :param applyDict: whether to create a corpus with an already made dictionary :param kwargs: 'train_path' 'dev_path' 'test_path', 'dict_path', 'output'. For most uses you need all types of path, though you could make a Corpus without a train-dev-test split. dict_path is only accessed if applyDict is true. """ if applyDict: self.dictionary = load_pickle( kwargs['dict_path'] ) # a previously saved pickle of a Dictionary else: self.dictionary = Dictionary() if 'train_path' in kwargs.keys(): self.train = self.tokenize(kwargs['train_path']) if 'dev_path' in kwargs.keys(): self.valid = self.tokenize(kwargs['dev_path']) if 'test_path' in kwargs.keys(): self.test = self.tokenize(kwargs['test_path']) # save file when done make_vocab(self.dictionary, kwargs['output'])
def __init__(self, options, session): self._options = options self._session = session word_freq, word_id, id_word, phrase_ids = utils.make_vocab(vocabfile=self._options.vocab, corpus=self._options.train_data, phrase_ids_file=self._options.phrase_data, phrase_reverse=self._options.reverse) self._word_freq = word_freq self._word_id = word_id self._id_word = id_word self._phrase_ids = phrase_ids self.save_setting() self.freq_table = self.make_freq_table(self._id_word, self._word_freq) phrase_max_size = max([len(word_seq) for word_seq in phrase_ids.values()] + [0]) self.build_graph(phrase_max_size, self._options.composition_function, self._options.dim, self._options.batch_size, self._options.neg, self._options.learning_rate, self._id_word, self.freq_table, self._options.init_word_data, self._options.init_context_data, self._options.epoch_num, not self._options.not_embedding_train)
def main(args): with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) abs_args = SimpleNamespace( **vars(args), path='./uni_pretrained_abstractor', w2v='./word_vectors/word2vec.128d.226k.bin', n_layer=1, n_hidden=256, max_art=100, max_abs=30, ) abs_trainer, abs_net = abs_prep_trainer(abs_args, word2id=word2id) exs_args = SimpleNamespace( **vars(args), path='./uni_pretrained_extractor', w2v=None, # no embedding since reuse abs's encoder net_type='rnn', lstm_layer=1, lstm_hidden=256, max_word=100, max_sent=60 ) exs_trainer, _ = exs_prep_trainer(exs_args, word2id=word2id, encoder=abs_net.encoder) # training generator exs_train_gen = exs_trainer.train_gen('extractor') abs_train_gen = abs_trainer.train_gen('abstractor') for exs_end, abs_end in zip(exs_train_gen, abs_train_gen): if exs_end and abs_end: print('Uni Training End') break
[en, cn] = line.strip('\n').split('\t') outputs.append(cn[:-1]) # 去掉汉语标签句末标点 inputs.append(en.replace( ',', ' ,')[:-1].lower()) # 句中逗号后本有空格,在逗号前增加空格,然后将逗号按一个元素分隔,去掉句末标点,转为小写 #print('分词前:',inputs[:10]) #print('分词前:',outputs[:10]) inputs = cn_segment(inputs) outputs = en_segment(outputs) #print('分词后:',inputs[:10]) #print('分词后:',outputs[:10]) # print(outputs) encoder_vocab, decoder_vocab = make_vocab(inputs, outputs) print('\n-----------vocab have made-----------') encoder_inputs, decoder_inputs, decoder_targets = data_format( inputs, outputs, encoder_vocab, decoder_vocab) arg = create_hparams() arg.input_vocab_size = len(encoder_vocab) arg.label_vocab_size = len(decoder_vocab) arg.epochs = epoch arg.batch_size = batch_size g = Graph(arg) saver = tf.train.Saver() with tf.Session() as sess:
def main(args): # create data batcher, vocabulary # batcher with open(join(args.data_path, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize, args.max_target_sent) #一个word的词典 train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug) # make net if args.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = make_embedding({i: w for w, i in word2id.items()}, args.w2v) #提供一个embedding矩阵 net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden, args.bi, args.n_layer, args.sampling_teaching_force, args.self_attn, args.hi_encoder, embedding) else: print("please provide pretrain_w2v") return # configure training setting criterion, train_params = configure_training('adam', args.lr, args.clip, args.decay, args.batch) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) net_args_backup = net_args.copy() del net_args_backup["embedding"] meta = {} meta['net'] = 'base_abstractor' meta['net_args'] = net_args_backup meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()
from sklearn.utils import shuffle from utils import make_vocab, load_data_cnn from wordCNN import * init() #coloroma init # embedding_dim = 200 # golveFileName = os.path.join("data", "twitter_hate_off_word_vectors.txt") # saveFileName = os.path.join("data", "twitter_hate_off_word_vectors" + str(embedding_dim) + ".npy") embedding_dim = 100 golveFileName = os.path.join( "data", "glove.twitter.27B." + str(embedding_dim) + "d.txt") saveFileName = os.path.join("data", "filteredGlove" + str(embedding_dim) + ".npy") vocab_size = make_vocab(file=golveFileName, save_name=saveFileName, embedding_dim=embedding_dim) wordVecs = np.load(saveFileName).astype(np.float32) train_x, train_y, dev_x, dev_y, test_x, test_y, doc_emb_train, doc_emb_test, doc_emb_dev = load_data_cnn( ) n_epochs = 20 train_instances = len(train_x) batch_size = 128 train_batches = train_instances // batch_size use_gcn = True path1 = "./saved/use_gcn_cnn" path2 = "./saved/no_use_gcn_cnn" if use_gcn:
def mlp(tr_data, te_data, eng_para, col_name, grid_size, \ optimizer, batch_size, hidden_size, mlp_feature, \ nb_epoch, prediction, model_name, is_train): # Load the dataset print 'Loading dataset ...' tr_feature, tr_label, tr_ids = mlp_feature(tr_data, eng_para, True, col_name) te_feature, te_label, te_ids = mlp_feature(te_data, eng_para, True, col_name) rg = RoadGrid(np.vstack((tr_label, te_label)), grid_size) tr_label = rg.transform(tr_label) # te_label = rg.transform(te_label) ## !!! maybe here need to ensure train data are the same shape as test data train_size, n_con = tr_feature.shape test_size, n_con = te_feature.shape n_dis = len(tr_ids) # Create neural network model print 'Preprocessing data ...' # Standardize continous input # tr_feature, te_feature = preprocess(tr_feature, te_feature) tr_feature, te_feature = preprocess(tr_feature, te_feature) # te_feature = preprocess(te_feature) tr_input = {'con_input' : tr_feature, 'output' : tr_label} te_input = {'con_input' : te_feature} # Prepare embedding input dis_dims, vocab_sizes = [], [] for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_) tr_id_idx_, te_id_idx_ = [], [] dis_dim = len(tr_ids_) for i in range(dis_dim): tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i]) te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i]) tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose() te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose() ## Add discrete feature to dict tr_input['emb_input%d' % ii] = tr_ids te_input['emb_input%d' % ii] = te_ids dis_dims.append(dis_dim) vocab_sizes.append(vocab_size) print 'Building model and compiling functions ...' # Define network structure grid_info = rg.grid_center network = build_mlp(n_con, n_dis, dis_dims, vocab_sizes, len(grid_info), hidden_size) #network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=SGD(lr=1e-2, momentum=0.9, nesterov=True)) network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=optimizer) # Build network # pickle_name = 'MLP-softmax-0.4.pickle' pickle_name = model_name if is_train: history = network.fit(tr_input, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1) # Dump Network with open('model/'+pickle_name, 'wb') as f: pickle.dump(network, f, -1) else: # Load Network f = open('model/'+pickle_name) network = pickle.load(f) # Make prediction ## 1. weighted if prediction == 'weighted': te_pred = np.asarray(network.predict(te_input)['output']) te_pred = te_pred.dot(grid_info) # Generate report # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Weighted']) elif prediction == 'argmax': ## 2. argmax te_pred = np.asarray(network.predict(te_input)['output']) te_pred = np.argmax(te_pred, axis=1) te_pred = [grid_info[idx] for idx in te_pred] # Generate report # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Argmax']) else: te_pred = None return te_pred
import numpy as np import os from sklearn import metrics from sklearn.utils import shuffle from utils import make_vocab, load_data_cnn from wordCNN import * init() #coloroma init # embedding_dim = 200 # golveFileName = os.path.join("data", "twitter_hate_off_word_vectors.txt") # saveFileName = os.path.join("data", "twitter_hate_off_word_vectors" + str(embedding_dim) + ".npy") embedding_dim = 100 golveFileName = os.path.join("data", "glove.twitter.27B." + str(embedding_dim) + "d.txt") saveFileName = os.path.join("data", "filteredGlove" + str(embedding_dim) + ".npy") vocab_size = make_vocab(data = "twitter_hate_off", file = golveFileName, save_name = saveFileName, embedding_dim = embedding_dim) print(vocab_size) wordVecs = np.load(saveFileName).astype(np.float32) train_x, train_y, dev_x, dev_y, test_x, test_y, doc_emb_train, doc_emb_test, doc_emb_dev = load_data_cnn(data = "twitter_hate_off") n_epochs = 5 train_instances = len(train_x) batch_size = 128 train_batches = train_instances // batch_size use_gcn = False path1 = "./saved/use_gcn_cnn" path2 = "./saved/no_use_gcn_cnn" if use_gcn: path = path1
default=1, help="num epoches for traning") parser.add_argument('--batch_size', type=int, default=64, help="batch size for traning") parser.add_argument('--site_path', type=str, default='nodejs_brows/static', help='path to your site for storing model') args = parser.parse_args() train_data = load_data(os.path.join(args.data, 'train.txt')) valid_data = load_data(os.path.join(args.data, 'valid.txt')) words_vocab = make_vocab(train_data['words']) tags_vocab = make_vocab(train_data['tags']) train_data['words_sequences'] = make_sequences(train_data['words'], words_vocab) valid_data['words_sequences'] = make_sequences(valid_data['words'], words_vocab) train_data['tags_sequences'] = make_sequences(train_data['tags'], tags_vocab) valid_data['tags_sequences'] = make_sequences(valid_data['tags'], tags_vocab) train_X = pad_sequences(train_data['words_sequences'], maxlen=MAX_SEQUENCE_LENGTH, value=PAD_ID,
def main(num_epochs=500): # Load the dataset print 'Loading dataset ...' eng_para = pd.read_csv('data/2g_gongcan.csv') #eng_para = eng_para.loc[:, ['LAC', 'CI', 'Angle', 'Longitude', 'Latitude', 'Power', 'GSM Neighbor Count', 'TD Neighbor Count']] tr_feature, tr_label, tr_ids = load_dataset('data/forward_recovered.csv', eng_para, True) te_feature, te_label, te_ids = load_dataset('data/backward_recovered.csv', eng_para, False) ## !!! maybe here need to ensure train data are the same shape as test data train_size, n_con = tr_feature.shape test_size, n_con = te_feature.shape n_dis = len(tr_ids) # Create neural network model print 'Preprocessing data ...' # Standardize continous input tr_feature, te_feature = preprocess(tr_feature, te_feature) tr_input = {'con_input' : tr_feature} te_input = {'con_input' : te_feature} # Prepare embedding input dis_dims, vocab_sizes = [], [] for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_) tr_id_idx_, te_id_idx_ = [], [] dis_dim = len(tr_ids_) for i in range(dis_dim): tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i]) te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i]) tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose() te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose() ## Add discrete feature to dict tr_input['emb_input%d' % ii] = tr_ids te_input['emb_input%d' % ii] = te_ids dis_dims.append(dis_dim) vocab_sizes.append(vocab_size) print 'Building model and compiling functions ...' # Define network structure l_output = build_mlp(n_con, n_dis, dis_dims, vocab_sizes) # Set batch size bi = BatchIterator(batch_size=10) # Build network network = NeuralNet(l_output, regression=True, update_learning_rate=1e-5, update=nesterov_momentum, update_momentum=0.9, train_split=TrainSplit(eval_size=0.05), verbose=1, batch_iterator_train=bi, objective_loss_function=lasagne.objectives.squared_error, max_epochs=5000) pickle_name = 'MLP-0.10.pickle' mul_val = 10000. lon_offset = np.mean(tr_label[:, 0]) lon_std = np.mean(tr_label[:, 0]) lat_offset = np.mean(tr_label[:, 1]) lat_std = np.mean(tr_label[:, 1]) ######## Change Target tr_label[:, 0] = (tr_label[:, 0] - lon_offset) * mul_val tr_label[:, 1] = (tr_label[:, 1] - lat_offset) * mul_val tr_label = tr_label.astype(np.float32) print tr_label is_train = True if is_train: network.fit(tr_input, tr_label) # Dump Network with open('model/'+pickle_name, 'wb') as f: pickle.dump(network, f, -1) else: # Load Network f = open('model/'+pickle_name) network = pickle.load(f) # Make prediction te_pred = network.predict(te_input) te_pred[:, 0] = te_pred[:, 0] / mul_val + lon_offset te_pred[:, 1] = te_pred[:, 1] / mul_val + lat_offset f_out = open('pred.csv', 'w') for pred_pt, true_pt in zip(te_pred, te_label): f_out.write('%f,%f,%f,%f\n' % (pred_pt[0], pred_pt[1], true_pt[0], true_pt[1])) # Generate report gen_report(te_label, te_pred, pickle_name)
def train(args): assert args.encoder in ['BiLSTM', 'DeepLSTM', 'Transformer'] assert args.decoder in ['SL', 'PN'] assert args.emb_type in ['W2V', 'BERT'] # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) train_batcher, val_batcher = build_batchers(args.decoder, args.emb_type, word2id, args.cuda, args.debug) # make model model, model_args = configure_net(args.encoder, args.decoder, args.emb_type, len(word2id), args.emb_dim, args.conv_hidden, args.encoder_hidden, args.encoder_layer) if args.emb_type == 'W2V': # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained w2v_path='./CNNDM/word2vec/word2vec.128d.226k.bin' embedding, _ = make_embedding( {i: w for w, i in word2id.items()}, w2v_path) model.set_embedding(embedding) # configure training setting criterion, train_params = configure_training( args.decoder, 'adam', args.lr, args.clip, args.decay, args.batch ) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['model_args'] = model_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(model, criterion, args.decoder) grad_fn = get_basic_grad_fn(model, args.clip) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=2e-5, patience=args.lr_p) if args.cuda: model = model.cuda() pipeline = BasicPipeline(model, args.decoder, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) # for name, para in net.named_parameters(): # if para.requires_grad: # print(name) print('Start training with the following hyper-parameters:') print(meta) trainer.train()
def main(args): # create data batcher, vocabulary # batcher if args.bert: import logging logging.basicConfig(level=logging.ERROR) if not args.bert: with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) if not args.gat: if args.bert: train_batcher, val_batcher, word2id = build_batchers_bert( args.cuda, args.debug, args.bertmodel) else: train_batcher, val_batcher = build_batchers( word2id, args.cuda, args.debug) else: if args.bert: train_batcher, val_batcher, word2id = build_batchers_gat_bert( args.cuda, args.debug, args.gold_key, args.adj_type, args.mask_type, args.topic_flow_model, num_worker=args.num_worker, bert_model=args.bertmodel) else: train_batcher, val_batcher = build_batchers_gat( word2id, args.cuda, args.debug, args.gold_key, args.adj_type, args.mask_type, args.topic_flow_model, num_worker=args.num_worker) # make net if args.gat: _args = {} _args['rtoks'] = 1 _args['graph_hsz'] = args.n_hidden _args['blockdrop'] = 0.1 _args['sparse'] = False _args['graph_model'] = 'transformer' _args['adj_type'] = args.adj_type net, net_args = configure_net_gat( len(word2id), args.emb_dim, args.n_hidden, args.bi, args.n_layer, args.load_from, gat_args=_args, adj_type=args.adj_type, mask_type=args.mask_type, feed_gold=False, graph_layer_num=args.graph_layer, feature=args.feat, subgraph=args.topic_flow_model, hierarchical_attn=args.topic_flow_model, bert=args.bert, bert_length=args.max_art) else: net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden, args.bi, args.n_layer, args.load_from, args.bert, args.max_art) if args.w2v: assert not args.bert # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = make_embedding({i: w for w, i in word2id.items()}, args.w2v) net.set_embedding(embedding) # configure training setting if 'soft' in args.mask_type and args.gat: criterion, train_params = configure_training_multitask( 'adam', args.lr, args.clip, args.decay, args.batch, args.mask_type, args.bert) else: criterion, train_params = configure_training('adam', args.lr, args.clip, args.decay, args.batch, args.bert) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['net'] = 'base_abstractor' meta['net_args'] = net_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer if args.cuda: net = net.cuda() if 'soft' in args.mask_type and args.gat: val_fn = multitask_validate(net, criterion) else: val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) print(net._embedding.weight.requires_grad) optimizer = optim.AdamW(net.parameters(), **train_params['optimizer'][1]) #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) # pipeline = BasicPipeline(meta['net'], net, # train_batcher, val_batcher, args.batch, val_fn, # criterion, optimizer, grad_fn) # trainer = BasicTrainer(pipeline, args.path, # args.ckpt_freq, args.patience, scheduler) if 'soft' in args.mask_type and args.gat: pipeline = MultiTaskPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = MultiTaskTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) else: pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()
type=str) parser.add_argument('vocab_file', help='a newline-delimited list of vocabulary words for which ' \ 'to generate embeddings', type=str) parser.add_argument( 'output_file', help='path and filename where embeddings should be written', type=str) parser.add_argument('--count_file', help='optional path and filename for a file where counts of the number ' \ 'of context sentences per vocabulary word should be written', default=None, type=str) args = parser.parse_args() tokenizer = RobertaTokenizer.from_pretrained(args.model_path) model = RobertaForMaskedLM.from_pretrained(args.model_path) model.eval() vocab = utils.make_vocab(args.vocab_file) FEATURE_COUNT = 768 # Change this value to 1024 for the large RoBERTa model. MAX_LINES = 2000 # Maximum number of context lines to average per vocabulary embedding. if __name__ == "__main__": # Process vocabulary words in the outer loop. for v in vocab: with open(args.context_file, 'r') as lines: v_sum = torch.zeros([1, FEATURE_COUNT]) v_tokens = utils.tokenize_text(v, tokenizer) utils.print_tokenized_text(v_tokens, tokenizer) count_sentence = 0 count_tensor = 0 # Process all lines in the context file in the inner loop. for line in lines:
def main(): # Loads vocab. vocab = make_vocab("data/ptb.train.txt") print("#vocab:", len(vocab)) # maybe 10000 eos_id = vocab["<s>"] # Loads all corpus. train_corpus = load_corpus("data/ptb.train.txt", vocab) valid_corpus = load_corpus("data/ptb.valid.txt", vocab) num_train_sents = len(train_corpus) num_valid_sents = len(valid_corpus) num_train_labels = count_labels(train_corpus) num_valid_labels = count_labels(valid_corpus) print("train:", num_train_sents, "sentences,", num_train_labels, "labels") print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels") # Device and computation graph. dev = D.CUDA(0) Device.set_default(dev) g = Graph() Graph.set_default(g) # Our LM. lm = RNNLM(len(vocab), eos_id) # Optimizer. optimizer = O.SGD(1) #optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) optimizer.add(lm) # Sentence IDs. train_ids = list(range(num_train_sents)) valid_ids = list(range(num_valid_sents)) best_valid_ppl = 1e10 # Train/valid loop. for epoch in range(MAX_EPOCH): print("epoch", epoch + 1, "/", MAX_EPOCH, ":") # Shuffles train sentence IDs. random.shuffle(train_ids) # Training. train_loss = 0 for ofs in range(0, num_train_sents, BATCH_SIZE): batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)] batch = make_batch(train_corpus, batch_ids, eos_id) g.clear() outputs = lm.forward(batch, True) loss = lm.loss(outputs, batch) train_loss += loss.to_float() * len(batch_ids) optimizer.reset_gradients() loss.backward() optimizer.update() print("%d" % ofs, end="\r") sys.stdout.flush() train_ppl = math.exp(train_loss / num_train_labels) print(" train ppl =", train_ppl) # Validation. valid_loss = 0 for ofs in range(0, num_valid_sents, BATCH_SIZE): batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)] batch = make_batch(valid_corpus, batch_ids, eos_id) g.clear() outputs = lm.forward(batch, False) loss = lm.loss(outputs, batch) valid_loss += loss.to_float() * len(batch_ids) print("%d" % ofs, end="\r") sys.stdout.flush() valid_ppl = math.exp(valid_loss / num_valid_labels) print(" valid ppl =", valid_ppl) if valid_ppl < best_valid_ppl: best_valid_ppl = valid_ppl print(" BEST") else: old_lr = optimizer.get_learning_rate_scaling() new_lr = 0.5 * old_lr optimizer.set_learning_rate_scaling(new_lr) print(" learning rate scaled:", old_lr, "->", new_lr)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--make-vocab", action="store_true", help="Set this flag if you want to make vocab from train data.") parser.add_argument("--do-train", action="store_true", help="Whether to run training.") parser.add_argument("--do-predict", action="store_true", help="Whether to run prediction.") parser.add_argument("--epoch-idx", type=int, default=EPOCHS, help="Choose which model to predict.") args = parser.parse_args() logger = config_log() if args.make_vocab: make_vocab(train_file=TRAIN_FILE, do_lower_case=DO_LOWER_CASE, result_dir=RESULT_DIR, text_col_name=TEXT_COL_NAME) if args.do_train: train(logger=logger) if args.do_predict: predict(args.epoch_idx, logger=logger)
def main(task_config, n=21, k=2, device=0, d=100, epochs=100): # Global parameters debug_mode = True verbose = True save = True freeze_word_embeddings = True over_population_threshold = 100 relative_over_population = True data_augmentation = True if debug_mode: data_augmentation = False over_population_threshold = None logging.info("Task name: {}".format(task_config['name'])) logging.info("Debug mode: {}".format(debug_mode)) logging.info("Verbose: {}".format(verbose)) logging.info("Freeze word embeddings: {}".format(freeze_word_embeddings)) logging.info( "Over population threshold: {}".format(over_population_threshold)) logging.info( "Relative over population: {}".format(relative_over_population)) logging.info("Data augmentation: {}".format(data_augmentation)) use_gpu = torch.cuda.is_available() # use_gpu = False if use_gpu: cuda_device = device torch.cuda.set_device(cuda_device) logging.info('Using GPU') # Load dataset dataset = task_config['dataset'](debug_mode, relative_path='./data/') all_sentences = dataset.get_train_sentences + dataset.get_valid_sentences + dataset.get_test_sentences word_embeddings = load_embeddings( './data/glove_embeddings/glove.6B.{}d.txt'.format(d)) chars_embeddings = load_embeddings( './predicted_char_embeddings/char_mimick_glove_d100_c20') # Prepare vectorizer word_to_idx, char_to_idx = make_vocab(all_sentences) vectorizer = WordsInContextVectorizer(word_to_idx, char_to_idx) vectorizer = vectorizer # Initialize training parameters model_name = '{}_n{}_k{}_d{}_e{}'.format(task_config['name'], n, k, d, epochs) lr = 0.001 if debug_mode: model_name = 'testing_' + model_name save = False epochs = 3 # Create the model net = LRComick( characters_vocabulary=char_to_idx, words_vocabulary=word_to_idx, characters_embedding_dimension=20, # characters_embeddings=chars_embeddings, word_embeddings_dimension=d, words_embeddings=word_embeddings, # context_dropout_p=0.5, # fc_dropout_p=0.5, freeze_word_embeddings=freeze_word_embeddings) model_name = "{}_{}_v{}".format(model_name, net.__class__.__name__.lower(), net.version) handler = logging.FileHandler('{}.log'.format(model_name)) logger.addHandler(handler) model = Model( model=net, optimizer=Adam(net.parameters(), lr=lr), loss_function=square_distance, metrics=[cosine_sim], ) if use_gpu: model.cuda() # Prepare examples train_loader, valid_loader, test_loader, oov_loader = prepare_data( dataset=dataset, embeddings=word_embeddings, vectorizer=vectorizer, n=n, use_gpu=use_gpu, k=k, over_population_threshold=over_population_threshold, relative_over_population=relative_over_population, data_augmentation=data_augmentation, debug_mode=debug_mode, verbose=verbose, ) # Set up the callbacks and train train( model, model_name, train_loader=train_loader, valid_loader=valid_loader, epochs=epochs, ) test_embeddings = evaluate(model, test_loader=test_loader, test_embeddings=word_embeddings, save=save, model_name=model_name + '.txt') predicted_oov_embeddings = predict_mean_embeddings(model, oov_loader) # Override embeddings with the training ones # Make sure we only have embeddings from the corpus data logging.info("Evaluating embeddings...") predicted_oov_embeddings.update(word_embeddings) for task in task_config['tasks']: logging.info("Using predicted embeddings on {} task...".format( task['name'])) task['script'](predicted_oov_embeddings, task['name'] + "_" + model_name, device, debug_mode) logger.removeHandler(handler)
def train(encdec, optimizer, prefix, best_valid_ppl): # Registers all parameters to the optimizer. optimizer.add_model(encdec) # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, SRC_VOCAB_SIZE) trg_vocab = make_vocab(TRG_TRAIN_FILE, TRG_VOCAB_SIZE) inv_trg_vocab = make_inv_vocab(trg_vocab) print("#src_vocab:", len(src_vocab)) print("#trg_vocab:", len(trg_vocab)) # Loads all corpus train_src_corpus = load_corpus(SRC_TRAIN_FILE, src_vocab) train_trg_corpus = load_corpus(TRG_TRAIN_FILE, trg_vocab) valid_src_corpus = load_corpus(SRC_VALID_FILE, src_vocab) valid_trg_corpus = load_corpus(TRG_VALID_FILE, trg_vocab) test_src_corpus = load_corpus(SRC_TEST_FILE, src_vocab) test_ref_corpus = load_corpus_ref(REF_TEST_FILE, trg_vocab) num_train_sents = len(train_trg_corpus) num_valid_sents = len(valid_trg_corpus) num_test_sents = len(test_ref_corpus) num_train_labels = count_labels(train_trg_corpus) num_valid_labels = count_labels(valid_trg_corpus) print("train:", num_train_sents, "sentences,", num_train_labels, "labels") print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels") # Sentence IDs train_ids = list(range(num_train_sents)) valid_ids = list(range(num_valid_sents)) # Train/valid loop. for epoch in range(MAX_EPOCH): # Computation graph. g = Graph() Graph.set_default(g) print("epoch %d/%d:" % (epoch + 1, MAX_EPOCH)) print(" learning rate scale = %.4e" % optimizer.get_learning_rate_scaling()) # Shuffles train sentence IDs. random.shuffle(train_ids) # Training. train_loss = 0. for ofs in range(0, num_train_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)] src_batch = make_batch(train_src_corpus, batch_ids, src_vocab) trg_batch = make_batch(train_trg_corpus, batch_ids, trg_vocab) g.clear() encdec.encode(src_batch, True) loss = encdec.loss(trg_batch, True) train_loss += loss.to_float() * len(batch_ids) optimizer.reset_gradients() loss.backward() optimizer.update() train_ppl = math.exp(train_loss / num_train_labels) print(" train PPL = %.4f" % train_ppl) # Validation. valid_loss = 0. for ofs in range(0, num_valid_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)] src_batch = make_batch(valid_src_corpus, batch_ids, src_vocab) trg_batch = make_batch(valid_trg_corpus, batch_ids, trg_vocab) g.clear() encdec.encode(src_batch, False) loss = encdec.loss(trg_batch, False) valid_loss += loss.to_float() * len(batch_ids) valid_ppl = math.exp(valid_loss / num_valid_labels) print(" valid PPL = %.4f" % valid_ppl) # Calculates test BLEU. stats = defaultdict(int) for ofs in range(0, num_test_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() src_batch = test_src_corpus[ofs:min(ofs + BATCH_SIZE, num_test_sents)] ref_batch = test_ref_corpus[ofs:min(ofs + BATCH_SIZE, num_test_sents)] hyp_ids = test_batch(encdec, src_vocab, trg_vocab, src_batch) for hyp_line, ref_line in zip(hyp_ids, ref_batch): for k, v in get_bleu_stats(ref_line[1:-1], hyp_line).items(): stats[k] += v bleu = calculate_bleu(stats) print(" test BLEU = %.2f" % (100 * bleu)) # Saves best model/optimizer. if valid_ppl < best_valid_ppl: best_valid_ppl = valid_ppl print(" saving model/optimizer ... ", end="") sys.stdout.flush() encdec.save(prefix + ".model") optimizer.save(prefix + ".optimizer") save_ppl(prefix + ".valid_ppl", best_valid_ppl) print("done.") else: # Learning rate decay by 1/sqrt(2) new_scale = .7071 * optimizer.get_learning_rate_scaling() optimizer.set_learning_rate_scaling(new_scale)
def main(args): # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug) # make net net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden, args.bi, args.n_layer) if args.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, oov = make_embedding({i: w for w, i in word2id.items()}, args.w2v) net.set_embedding(embedding) # configure training setting criterion, train_params = configure_training('adam', args.lr, args.clip, args.decay, args.batch) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = {} meta['net'] = 'base_abstractor' meta['net_args'] = net_args meta['traing_params'] = train_params with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) # # Print model's state_dict # print("Model's state_dict:") # for param_tensor in net.state_dict(): # print(param_tensor, "\t", net.state_dict()[param_tensor].size()) # # # Print optimizer's state_dict # print("Optimizer's state_dict:") # for var_name in optimizer.state_dict(): # print(var_name, "\t", optimizer.state_dict()[var_name]) # # IMPORT PRETRAINED MODEL PARAMETERS # net.load_state_dict(torch.load( # 'pretrained_eng_model/abstractor/ckpt/ckpt-0-0')['state_dict']) # net.eval() # do I need that or not? # copy net # from copy import deepcopy # net_copy = deepcopy(net) # net_copy.load_state_dict(torch.load('pretrained_eng_model/abstractor/ckpt/ckpt-0-0', map_location='cpu')['state_dict']) # for key in net_copy.state_dict(): # print('key: ', key) # param = net_copy.state_dict()[key] # print('param.shape: ', param.shape) # print('param.requires_grad: ', param.requires_grad) # print('param.shape, param.requires_grad: ', param.shape, param.requires_grad) # print('isinstance(param, nn.Module) ', isinstance(param, nn.Module)) # print('isinstance(param, nn.Parameter) ', isinstance(param, nn.Parameter)) # print('isinstance(param, torch.Tensor): ', isinstance(param, torch.Tensor)) # print('=====') # save current state dict model_dict = net.state_dict() # save some parameters for testing purposes if the dict was loaded successfully p1 = net._embedding.weight[0][0].detach().cpu().numpy() p2 = net._enc_lstm.weight_hh_l0[0][0].detach().cpu().numpy() p3 = net._attn_wm.data[0][0].detach().cpu().numpy() # print(p1) # print(p2) # print(p3) # load dict from pretrained net ABS_DIR = os.environ['ABS'] print(ABS_DIR) # uncomment for gpu # pretrained_dict = torch.load(ABS_DIR)['state_dict'] pretrained_dict = torch.load(ABS_DIR)['state_dict'] # skip embedding weights pretrained_dict = { k: v for k, v in pretrained_dict.items() if k != '_embedding.weight' } # overwrite entries in the existing state dict model_dict.update(pretrained_dict) print('Model will be trained on device:') print(model_dict['_embedding.weight'].device) # load the new state dict net.load_state_dict(model_dict) # check if the update was correct pn1 = net._embedding.weight[0][0].detach().cpu().numpy() pn2 = net._enc_lstm.weight_hh_l0[0][0].detach().cpu().numpy() pn3 = net._attn_wm.data[0][0].detach().cpu().numpy() # print(pn1) # print(pn2) # print(pn3) assert p1 == pn1 # embedding layer has to be the same assert p2 != pn2 assert p3 != pn3 print('Embedding layer has not been overwritten') # set updating of the parameters for name, param in net.named_parameters(): #param.requires_grad = True print(name, param.requires_grad) trainer.train()