예제 #1
0
def train_model(data_folder,
                data_name,
                level,
                model_name,
                is_aspect_term=True):
    config.data_folder = data_folder
    config.data_name = data_name
    if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)):
        os.makedirs(os.path.join(config.checkpoint_dir, data_folder))
    config.level = level
    config.model_name = model_name
    config.is_aspect_term = is_aspect_term
    config.init_input()
    config.exp_name = '{}_{}_wv_{}'.format(model_name, level,
                                           config.word_embed_type)
    config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix'
    if config.use_aspect_input:
        config.exp_name += '_aspv_{}'.format(config.aspect_embed_type)
        config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix'
    if config.use_elmo:
        config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format(
            config.use_elmo_alone, config.elmo_output_mode,
            'update' if config.elmo_trainable else 'fix')

    print(config.exp_name)
    model = SentimentModel(config)

    test_input = load_input_data(
        data_folder, 'test', level, config.use_text_input,
        config.use_text_input_l, config.use_text_input_r,
        config.use_text_input_r_with_pad, config.use_aspect_input,
        config.use_aspect_text_input, config.use_loc_input,
        config.use_offset_input, config.use_mask)
    test_label = load_label(data_folder, 'test')

    if not os.path.exists(
            os.path.join(config.checkpoint_dir, '%s/%s.hdf5' %
                         (data_folder, config.exp_name))):
        start_time = time.time()

        train_input = load_input_data(
            data_folder, 'train', level, config.use_text_input,
            config.use_text_input_l, config.use_text_input_r,
            config.use_text_input_r_with_pad, config.use_aspect_input,
            config.use_aspect_text_input, config.use_loc_input,
            config.use_offset_input, config.use_mask)
        train_label = load_label(data_folder, 'train')
        valid_input = load_input_data(
            data_folder, 'valid', level, config.use_text_input,
            config.use_text_input_l, config.use_text_input_r,
            config.use_text_input_r_with_pad, config.use_aspect_input,
            config.use_aspect_text_input, config.use_loc_input,
            config.use_offset_input, config.use_mask)
        valid_label = load_label(data_folder, 'valid')

        train_combine_valid_input = []
        for i in range(len(train_input)):
            train_combine_valid_input.append(train_input[i] + valid_input[i])
        train_combine_valid_label = train_label + valid_label

        model.train(train_combine_valid_input, train_combine_valid_label,
                    test_input, test_label)

        elapsed_time = time.time() - start_time
        print('training time:',
              time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    # load the best model
    model.load()

    # print('score over valid data...')
    # model.score(valid_input, valid_label)
    print('score over test data...')
    model.score(test_input, test_label)
예제 #2
0
def train_model(data_folder,
                data_name,
                level,
                model_name,
                is_aspect_term=True):
    config.data_folder = data_folder
    config.data_name = data_name
    # 新建存处
    if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)):
        os.makedirs(os.path.join(config.checkpoint_dir, data_folder))
    config.level = level  # char 中文
    config.model_name = model_name  # atae_lstm or tsa
    config.is_aspect_term = is_aspect_term  # true
    config.init_input()
    # 给保存时候的名字
    config.exp_name = '{}_{}_wv_{}'.format(model_name, level,
                                           config.word_embed_type)
    # 可更新
    config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix'
    if config.use_aspect_input:
        config.exp_name += '_aspv_{}'.format(config.aspect_embed_type)
        config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix'
    # 不用 ,否则tensorflow_hub问题难解决?
    # if config.use_elmo:
    #     config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format(config.use_elmo_alone, config.elmo_output_mode,
    #                                                           'update' if config.elmo_trainable else 'fix')

    print(config.exp_name)

    # 建
    model = SentimentModel(config)

    test_input = load_input_data(data_folder, 'test', level,
                                 config.use_text_input,
                                 config.use_aspect_input,
                                 config.use_aspect_text_input)
    test_label = load_label(data_folder, 'test')

    print(test_input)

    # there's no dev data of laptop
    # dev_input = load_input_data(data_folder, 'valid', level, config.use_text_input,
    #                              config.use_aspect_input,config.use_aspect_text_input)
    #
    # dev_label = load_label(data_folder, 'valid')
    #
    # print(dev_input)

    # 无现有模型,开始训练
    if not os.path.exists(
            os.path.join(config.checkpoint_dir, '%s/%s.hdf5' %
                         (data_folder, config.exp_name))):
        start_time = time.time()

        train_input = load_input_data(data_folder, 'train', level,
                                      config.use_text_input,
                                      config.use_aspect_input,
                                      config.use_aspect_text_input)

        train_label = load_label(data_folder, 'train')
        # valid_input = load_input_data(data_folder, 'valid', level, config.use_text_input,
        #                               config.use_aspect_input, config.use_aspect_text_input)
        # valid_label = load_label(data_folder, 'valid')

        # train
        model.train(train_input, train_label, test_input, test_label)

        elapsed_time = time.time() - start_time
        print('training time:',
              time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    # load the best model
    model.load()

    print("start to score ...")
    # print('score over dev data...')
    # model.score(dev_input, dev_label)
    print('score over test data...')
    model.score(test_input, test_label)
    print("score done!")
    print('start to predict and save the results...')
    # print('predict over dev data...')
    # result = model.predict(dev_input)
    # print("save prediction and actual labels of dev...")
    # print(dev_label)
    # print(result)
    # concat(result, dev_label, model_name, 1, config.word_embed_type)
    print('predict over test data...')
    result2 = model.predict(test_input)
    print("save prediction and actual labels of dev...")
    concat(result2, test_label, model_name, 2, config.word_embed_type)
    print('predict and save the results done!')
    print('totally done!')
예제 #3
0
def train_model(data_folder,
                data_name,
                level,
                model_name,
                is_aspect_term=True,
                classWeights=None,
                imBalanced=False):
    config.data_folder = data_folder
    config.data_name = data_name
    if not os.path.exists(os.path.join(config.checkpoint_dir, data_folder)):
        os.makedirs(os.path.join(config.checkpoint_dir, data_folder))
    config.level = level
    config.model_name = model_name
    config.is_aspect_term = is_aspect_term
    config.init_input()
    config.exp_name = '{}_{}_wv_{}'.format(model_name, level,
                                           config.word_embed_type)
    config.exp_name = config.exp_name + '_update' if config.word_embed_trainable else config.exp_name + '_fix'
    if config.use_aspect_input:
        config.exp_name += '_aspv_{}'.format(config.aspect_embed_type)
        config.exp_name = config.exp_name + '_update' if config.aspect_embed_trainable else config.exp_name + '_fix'
    if config.use_elmo:
        config.exp_name += '_elmo_alone_{}_mode_{}_{}'.format(
            config.use_elmo_alone, config.elmo_output_mode,
            'update' if config.elmo_trainable else 'fix')

    print(config.exp_name)
    model = SentimentModel(config)

    test_input = load_input_data(
        data_folder, 'test', level, config.use_text_input,
        config.use_text_input_l, config.use_text_input_r,
        config.use_text_input_r_with_pad, config.use_aspect_input,
        config.use_aspect_text_input, config.use_loc_input,
        config.use_offset_input, config.use_mask)
    test_label = load_label(data_folder, 'test')

    if not os.path.exists(
            os.path.join(config.checkpoint_dir, '%s/%s.hdf5' %
                         (data_folder, config.exp_name))):
        start_time = time.time()

        train_input = load_input_data(
            data_folder, 'train', level, config.use_text_input,
            config.use_text_input_l, config.use_text_input_r,
            config.use_text_input_r_with_pad, config.use_aspect_input,
            config.use_aspect_text_input, config.use_loc_input,
            config.use_offset_input, config.use_mask)
        train_label = load_label(data_folder, 'train')
        valid_input = load_input_data(
            data_folder, 'valid', level, config.use_text_input,
            config.use_text_input_l, config.use_text_input_r,
            config.use_text_input_r_with_pad, config.use_aspect_input,
            config.use_aspect_text_input, config.use_loc_input,
            config.use_offset_input, config.use_mask)
        valid_label = load_label(data_folder, 'valid')
        '''
		Note: Here I combine the training data and validation data together, use them as training input to the model, 
			  while I use test data to server as validation input. The reason behind is that i want to fully explore how 
			  well can the model perform on the test data (Keras's ModelCheckpoint callback can help usesave the model 
			  which perform best on validation data (here the test data)).
			  But generally, we won't do that, because test data will not (and should not) be accessible during training 
			  process.
		'''
        # train_combine_valid_input = []
        # for i in range(len(train_input)):
        #     train_combine_valid_input.append(train_input[i] + valid_input[i])
        # train_combine_valid_label = train_label + valid_label
        model.train(train_input, train_label, test_input, test_label,
                    classWeights, imBalanced)
        # model.train(train_combine_valid_input, train_combine_valid_label, test_input, test_label)

        elapsed_time = time.time() - start_time
        print('training time:',
              time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    # load the best model
    model.load()

    # print('score over valid data...')
    # model.score(valid_input, valid_label)
    print('score over test data...')
    model.score(test_input, test_label)
예제 #4
0
 config = Config()  # load our config file
 config.use_elmo = True
 config.use_elmo_alone = True
 config.elmo_trainable = True
 config.word_embed_trainable = False
 config.aspect_embed_trainable = True
 model = loadModel(
     'alta2', 'twitter', 'word',
     modelName)  # pick when model to load and to do the test #td_lstm
 predict_input = load_input_data(
     'output',
     'test',
     config.level,
     config.use_text_input,
     config.use_text_input_l,  #temp workaround
     config.use_text_input_r,
     config.use_text_input_r_with_pad,
     config.use_aspect_input,
     config.use_aspect_text_input,
     config.use_loc_input,
     config.use_offset_input,
     config.use_mask)
 documentVec = np.load(saveFolder + "/totalsentence.npy")
 labels = getPredictedValue(model, documentVec, predict_input)
 np.save(saveFolder + "/predictedval.npy",
         labels)  #added the option to save labels
 # predictValue(model,[26,31],predict_input)
 # element = model.predict(predict_input)
 # print(element[0:25])
 # tester = element[0:26]
 # # print(element)