def train_model(data_folder, data_name, level, model_name, is_aspect_term=True): config.data_folder = data_folder config.data_name = data_name if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)): os.makedirs(os.path.join(config.checkpoint_dir, data_folder)) config.level = level config.model_name = model_name config.is_aspect_term = is_aspect_term config.init_input() config.exp_name = '{}_{}_wv_{}'.format(model_name, level, config.word_embed_type) config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix' if config.use_aspect_input: config.exp_name += '_aspv_{}'.format(config.aspect_embed_type) config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix' if config.use_elmo: config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format( config.use_elmo_alone, config.elmo_output_mode, 'update' if config.elmo_trainable else 'fix') print(config.exp_name) model = SentimentModel(config) test_input = load_input_data( data_folder, 'test', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) test_label = load_label(data_folder, 'test') if not os.path.exists( os.path.join(config.checkpoint_dir, '%s/%s.hdf5' % (data_folder, config.exp_name))): start_time = time.time() train_input = load_input_data( data_folder, 'train', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) train_label = load_label(data_folder, 'train') valid_input = load_input_data( data_folder, 'valid', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) valid_label = load_label(data_folder, 'valid') train_combine_valid_input = [] for i in range(len(train_input)): train_combine_valid_input.append(train_input[i] + valid_input[i]) train_combine_valid_label = train_label + valid_label model.train(train_combine_valid_input, train_combine_valid_label, test_input, test_label) elapsed_time = time.time() - start_time print('training time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) # load the best model model.load() # print('score over valid data...') # model.score(valid_input, valid_label) print('score over test data...') model.score(test_input, test_label)
def train_model(data_folder, data_name, level, model_name, is_aspect_term=True): config.data_folder = data_folder config.data_name = data_name # 新建存处 if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)): os.makedirs(os.path.join(config.checkpoint_dir, data_folder)) config.level = level # char 中文 config.model_name = model_name # atae_lstm or tsa config.is_aspect_term = is_aspect_term # true config.init_input() # 给保存时候的名字 config.exp_name = '{}_{}_wv_{}'.format(model_name, level, config.word_embed_type) # 可更新 config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix' if config.use_aspect_input: config.exp_name += '_aspv_{}'.format(config.aspect_embed_type) config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix' # 不用 ,否则tensorflow_hub问题难解决? # if config.use_elmo: # config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format(config.use_elmo_alone, config.elmo_output_mode, # 'update' if config.elmo_trainable else 'fix') print(config.exp_name) # 建 model = SentimentModel(config) test_input = load_input_data(data_folder, 'test', level, config.use_text_input, config.use_aspect_input, config.use_aspect_text_input) test_label = load_label(data_folder, 'test') print(test_input) # there's no dev data of laptop # dev_input = load_input_data(data_folder, 'valid', level, config.use_text_input, # config.use_aspect_input,config.use_aspect_text_input) # # dev_label = load_label(data_folder, 'valid') # # print(dev_input) # 无现有模型,开始训练 if not os.path.exists( os.path.join(config.checkpoint_dir, '%s/%s.hdf5' % (data_folder, config.exp_name))): start_time = time.time() train_input = load_input_data(data_folder, 'train', level, config.use_text_input, config.use_aspect_input, config.use_aspect_text_input) train_label = load_label(data_folder, 'train') # valid_input = load_input_data(data_folder, 'valid', level, config.use_text_input, # config.use_aspect_input, config.use_aspect_text_input) # valid_label = load_label(data_folder, 'valid') # train model.train(train_input, train_label, test_input, test_label) elapsed_time = time.time() - start_time print('training time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) # load the best model model.load() print("start to score ...") # print('score over dev data...') # model.score(dev_input, dev_label) print('score over test data...') model.score(test_input, test_label) print("score done!") print('start to predict and save the results...') # print('predict over dev data...') # result = model.predict(dev_input) # print("save prediction and actual labels of dev...") # print(dev_label) # print(result) # concat(result, dev_label, model_name, 1, config.word_embed_type) print('predict over test data...') result2 = model.predict(test_input) print("save prediction and actual labels of dev...") concat(result2, test_label, model_name, 2, config.word_embed_type) print('predict and save the results done!') print('totally done!')
def train_model(data_folder, data_name, level, model_name, is_aspect_term=True, classWeights=None, imBalanced=False): config.data_folder = data_folder config.data_name = data_name if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)): os.makedirs(os.path.join(config.checkpoint_dir, data_folder)) config.level = level config.model_name = model_name config.is_aspect_term = is_aspect_term config.init_input() config.exp_name = '{}_{}_wv_{}'.format(model_name, level, config.word_embed_type) config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix' if config.use_aspect_input: config.exp_name += '_aspv_{}'.format(config.aspect_embed_type) config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix' if config.use_elmo: config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format( config.use_elmo_alone, config.elmo_output_mode, 'update' if config.elmo_trainable else 'fix') print(config.exp_name) model = SentimentModel(config) test_input = load_input_data( data_folder, 'test', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) test_label = load_label(data_folder, 'test') if not os.path.exists( os.path.join(config.checkpoint_dir, '%s/%s.hdf5' % (data_folder, config.exp_name))): start_time = time.time() train_input = load_input_data( data_folder, 'train', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) train_label = load_label(data_folder, 'train') valid_input = load_input_data( data_folder, 'valid', level, config.use_text_input, config.use_text_input_l, config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) valid_label = load_label(data_folder, 'valid') ''' Note: Here I combine the training data and validation data together, use them as training input to the model, while I use test data to server as validation input. The reason behind is that i want to fully explore how well can the model perform on the test data (Keras's ModelCheckpoint callback can help usesave the model which perform best on validation data (here the test data)). But generally, we won't do that, because test data will not (and should not) be accessible during training process. ''' # train_combine_valid_input = [] # for i in range(len(train_input)): # train_combine_valid_input.append(train_input[i] + valid_input[i]) # train_combine_valid_label = train_label + valid_label model.train(train_input, train_label, test_input, test_label, classWeights, imBalanced) # model.train(train_combine_valid_input, train_combine_valid_label, test_input, test_label) elapsed_time = time.time() - start_time print('training time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) # load the best model model.load() # print('score over valid data...') # model.score(valid_input, valid_label) print('score over test data...') model.score(test_input, test_label)
config = Config() # load our config file config.use_elmo = True config.use_elmo_alone = True config.elmo_trainable = True config.word_embed_trainable = False config.aspect_embed_trainable = True model = loadModel( 'alta2', 'twitter', 'word', modelName) # pick when model to load and to do the test #td_lstm predict_input = load_input_data( 'output', 'test', config.level, config.use_text_input, config.use_text_input_l, #temp workaround config.use_text_input_r, config.use_text_input_r_with_pad, config.use_aspect_input, config.use_aspect_text_input, config.use_loc_input, config.use_offset_input, config.use_mask) documentVec = np.load(saveFolder + "/totalsentence.npy") labels = getPredictedValue(model, documentVec, predict_input) np.save(saveFolder + "/predictedval.npy", labels) #added the option to save labels # predictValue(model,[26,31],predict_input) # element = model.predict(predict_input) # print(element[0:25]) # tester = element[0:26] # # print(element)