def main(unused_argv): if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting running in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary tf.set_random_seed(6) # a seed value for randomness cnn_classifier = CNN(config) #cnn_batcher = ClaBatcher(hps_discriminator, vocab) cnn_batcher = ClaBatcher(FLAGS, vocab) sess_cnn, saver_cnn, train_dir_cnn = setup_training_classifier(cnn_classifier) run_train_cnn_classifier(cnn_classifier, cnn_batcher, 15, sess_cnn, saver_cnn, train_dir_cnn) #util.load_ckpt(saver_cnn, sess_cnn, ckpt_dir="train-classifier") acc = run_test_classification(cnn_classifier, cnn_batcher, sess_cnn, saver_cnn, str('last')) print("The accuracy of sentiment classifier is {:.3f}".format(acc)) generate_confident_examples(cnn_classifier, cnn_batcher, sess_cnn) ## train_conf print("Start training emotional words detection model...") model_class = Classification(FLAGS, vocab) cla_batcher = AttenBatcher(FLAGS, vocab) # read from train_conf sess_cls, saver_cls, train_dir_cls = setup_training_attention_classification(model_class) run_train_attention_classification(model_class, cla_batcher, 15, sess_cls, saver_cls, train_dir_cls) #util.load_ckpt(saver_cls, sess_cls, ckpt_dir="train-classification") acc = run_test_classification(model_class, cla_batcher, sess_cls, saver_cls, str("final_acc")) print("The sentiment classification accuracy of the emotional words detection model is {:.3f}".format(acc)) generated = Generate_training_sample(model_class, vocab, cla_batcher, sess_cls) print("Generating training examples......") generated.generate_training_example("train_filtered") #wirte train generated.generator_valid_test_example("valid_test_filtered") model = Seq2seq_AE(FLAGS, vocab) # Create a batcher object that will create minibatches of data batcher = GenBatcher(vocab, FLAGS) ##read from train sess_ge, saver_ge, train_dir_ge = setup_training_generator(model) generated = Generated_sample(model, vocab, batcher, sess_ge) print("Start training generator......") run_train_auto_encoder(model, batcher, 15, sess_ge, saver_ge, train_dir_ge, generated, cnn_classifier, sess_cnn, cla_batcher)
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting running in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps_generator = namedtuple("HParams", hps_dict.keys())(**hps_dict) hparam_list = [ 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps_discriminator = namedtuple("HParams", hps_dict.keys())(**hps_dict) tf.set_random_seed( 111 ) # a seed value for randomness # train-classification train-sentiment train-cnn-classificatin train-generator if FLAGS.mode == "train-classifier": #print("Start pre-training......") model_class = Classification(hps_discriminator, vocab) cla_batcher = ClaBatcher(hps_discriminator, vocab) sess_cls, saver_cls, train_dir_cls = setup_training_classification( model_class) print("Start pre-training classification......") run_pre_train_classification(model_class, cla_batcher, 1, sess_cls, saver_cls, train_dir_cls) #10 generated = Generate_training_sample(model_class, vocab, cla_batcher, sess_cls) print("Generating training examples......") generated.generate_training_example("train") generated.generate_test_example("test") elif FLAGS.mode == "train-sentimentor": model_class = Classification(hps_discriminator, vocab) cla_batcher = ClaBatcher(hps_discriminator, vocab) sess_cls, saver_cls, train_dir_cls = setup_training_classification( model_class) print("Start pre_train_sentimentor......") model_sentiment = Sentimentor(hps_generator, vocab) sentiment_batcher = SenBatcher(hps_generator, vocab) sess_sen, saver_sen, train_dir_sen = setup_training_sentimentor( model_sentiment) util.load_ckpt(saver_cls, sess_cls, ckpt_dir="train-classification") run_pre_train_sentimentor(model_sentiment, sentiment_batcher, 1, sess_sen, saver_sen, train_dir_sen) #1 elif FLAGS.mode == "test": config = { 'n_epochs': 5, 'kernel_sizes': [3, 4, 5], 'dropout_rate': 0.5, 'val_split': 0.4, 'edim': 300, 'n_words': None, # Leave as none 'std_dev': 0.05, 'sentence_len': 50, 'n_filters': 100, 'batch_size': 50 } config['n_words'] = 50000 cla_cnn_batcher = CNN_ClaBatcher(hps_discriminator, vocab) cnn_classifier = CNN(config) sess_cnn_cls, saver_cnn_cls, train_dir_cnn_cls = setup_training_cnnclassifier( cnn_classifier) #util.load_ckpt(saver_cnn_cls, sess_cnn_cls, ckpt_dir="train-cnnclassification") run_train_cnn_classifier(cnn_classifier, cla_cnn_batcher, 1, sess_cnn_cls, saver_cnn_cls, train_dir_cnn_cls) #1 files = os.listdir("test-generate-transfer/") for file_ in files: run_test_our_method(cla_cnn_batcher, cnn_classifier, sess_cnn_cls, "test-generate-transfer/" + file_ + "/*") #elif FLAGS.mode == "test": elif FLAGS.mode == "train-generator": model_class = Classification(hps_discriminator, vocab) cla_batcher = ClaBatcher(hps_discriminator, vocab) sess_cls, saver_cls, train_dir_cls = setup_training_classification( model_class) model_sentiment = Sentimentor(hps_generator, vocab) sentiment_batcher = SenBatcher(hps_generator, vocab) sess_sen, saver_sen, train_dir_sen = setup_training_sentimentor( model_sentiment) config = { 'n_epochs': 5, 'kernel_sizes': [3, 4, 5], 'dropout_rate': 0.5, 'val_split': 0.4, 'edim': 300, 'n_words': None, # Leave as none 'std_dev': 0.05, 'sentence_len': 50, 'n_filters': 100, 'batch_size': 50 } config['n_words'] = 50000 cla_cnn_batcher = CNN_ClaBatcher(hps_discriminator, vocab) cnn_classifier = CNN(config) sess_cnn_cls, saver_cnn_cls, train_dir_cnn_cls = setup_training_cnnclassifier( cnn_classifier) model = Generator(hps_generator, vocab) batcher = GenBatcher(vocab, hps_generator) sess_ge, saver_ge, train_dir_ge = setup_training_generator(model) util.load_ckpt(saver_cnn_cls, sess_cnn_cls, ckpt_dir="train-cnnclassification") util.load_ckpt(saver_sen, sess_sen, ckpt_dir="train-sentimentor") generated = Generated_sample(model, vocab, batcher, sess_ge) print("Start pre-training generator......") run_pre_train_generator(model, batcher, 1, sess_ge, saver_ge, train_dir_ge, generated, cla_cnn_batcher, cnn_classifier, sess_cnn_cls) # 4 generated.generate_test_negetive_example( "temp_negetive", batcher) # batcher, model_class, sess_cls, cla_batcher generated.generate_test_positive_example("temp_positive", batcher) #run_test_our_method(cla_cnn_batcher, cnn_classifier, sess_cnn_cls, # "temp_negetive" + "/*") loss_window = 0 t0 = time.time() print("begin reinforcement learning:") for epoch in range(30): batches = batcher.get_batches(mode='train') for i in range(len(batches)): current_batch = copy.deepcopy(batches[i]) sentiment_batch = batch_sentiment_batch( current_batch, sentiment_batcher) result = model_sentiment.max_generator(sess_sen, sentiment_batch) weight = result['generated'] current_batch.weight = weight sentiment_batch.weight = weight cla_batch = batch_classification_batch(current_batch, batcher, cla_batcher) result = model_class.run_ypred_auc(sess_cls, cla_batch) cc = SmoothingFunction() reward_sentiment = 1 - np.abs(0.5 - result['y_pred_auc']) reward_BLEU = [] for k in range(FLAGS.batch_size): reward_BLEU.append( sentence_bleu( [current_batch.original_reviews[k].split()], cla_batch.original_reviews[k].split(), smoothing_function=cc.method1)) reward_BLEU = np.array(reward_BLEU) reward_de = (2 / (1.0 / (1e-6 + reward_sentiment) + 1.0 / (1e-6 + reward_BLEU))) result = model.run_train_step(sess_ge, current_batch) train_step = result[ 'global_step'] # we need this to update our running average loss loss = result['loss'] loss_window += loss if train_step % 100 == 0: t1 = time.time() tf.logging.info( 'seconds for %d training generator step: %.3f ', train_step, (t1 - t0) / 100) t0 = time.time() tf.logging.info('loss: %f', loss_window / 100) # print the loss to screen loss_window = 0.0 if train_step % 10000 == 0: generated.generate_test_negetive_example( "test-generate-transfer/" + str(epoch) + "epoch_step" + str(train_step) + "_temp_positive", batcher) generated.generate_test_positive_example( "test-generate/" + str(epoch) + "epoch_step" + str(train_step) + "_temp_positive", batcher) #saver_ge.save(sess, train_dir + "/model", global_step=train_step) #run_test_our_method(cla_cnn_batcher, cnn_classifier, sess_cnn_cls, # "test-generate-transfer/" + str(epoch) + "epoch_step" + str( # train_step) + "_temp_positive" + "/*") cla_batch, bleu = output_to_classification_batch( result['generated'], current_batch, batcher, cla_batcher, cc) result = model_class.run_ypred_auc(sess_cls, cla_batch) reward_result_sentiment = result['y_pred_auc'] reward_result_bleu = np.array(bleu) reward_result = (2 / (1.0 / (1e-6 + reward_result_sentiment) + 1.0 / (1e-6 + reward_result_bleu))) current_batch.score = 1 - current_batch.score result = model.max_generator(sess_ge, current_batch) cla_batch, bleu = output_to_classification_batch( result['generated'], current_batch, batcher, cla_batcher, cc) result = model_class.run_ypred_auc(sess_cls, cla_batch) reward_result_transfer_sentiment = result['y_pred_auc'] reward_result_transfer_bleu = np.array(bleu) reward_result_transfer = ( 2 / (1.0 / (1e-6 + reward_result_transfer_sentiment) + 1.0 / (1e-6 + reward_result_transfer_bleu))) #tf.logging.info("reward_nonsentiment: "+str(reward_sentiment) +" output_original_sentiment: "+str(reward_result_sentiment)+" output_original_bleu: "+str(reward_result_bleu)) reward = reward_result_transfer #reward_de + reward_result_sentiment + #tf.logging.info("reward_de: "+str(reward_de)) model_sentiment.run_train_step(sess_sen, sentiment_batch, reward)
def main(unused_argv): if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting running in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps'] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps_generator = namedtuple("HParams", hps_dict.keys())(**hps_dict) hparam_list = ['lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps'] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps_discriminator = namedtuple("HParams", hps_dict.keys())(**hps_dict) tf.set_random_seed(6) # a seed value for randomness cnn_classifier = CNN(config) cnn_batcher = ClaBatcher(hps_discriminator, vocab) sess_cnn, saver_cnn, train_dir_cnn = setup_training_classifier(cnn_classifier) run_train_cnn_classifier(cnn_classifier, cnn_batcher, 0, sess_cnn, saver_cnn, train_dir_cnn) #util.load_ckpt(saver_cnn, sess_cnn, ckpt_dir="train-classifier") acc = run_test_classification(cnn_classifier, cnn_batcher, sess_cnn, saver_cnn, str('last')) print("the last stored cnn model acc = ", acc) generate_confident_examples(cnn_classifier, cnn_batcher, sess_cnn) ## train_conf print("Start pre-training attention classification......") model_class = Classification(hps_discriminator, vocab) cla_batcher = AttenBatcher(hps_discriminator, vocab) # read from train_conf sess_cls, saver_cls, train_dir_cls = setup_training_classification(model_class) run_pre_train_classification(model_class, cla_batcher, 0, sess_cls, saver_cls, train_dir_cls) #util.load_ckpt(saver_cls, sess_cls, ckpt_dir="train-classification") acc = run_test_classification(model_class, cla_batcher, sess_cls, saver_cls, str("final_acc")) print("the last stored attention model acc = ", acc) acc = run_test_classification(cnn_classifier, cla_batcher, sess_cnn, saver_cnn, str("final_acc")) print("the last stored classifier model acc = ", acc) generated = Generate_training_sample(model_class, vocab, cla_batcher, sess_cls) print("Generating training examples......") #generated.generate_training_example("train_filtered") #wirte train #generated.generator_validation_example("valid_filtered") #generated.generator_test_example("test_filtered") model = Seq2seq_AE(hps_generator, vocab) # Create a batcher object that will create minibatches of data batcher = GenBatcher(vocab, hps_generator) ##read from train sess_ge, saver_ge, train_dir_ge = setup_training_generator(model) generated = Generated_sample(model, vocab, batcher, sess_ge) print("Start pre-training generator......") run_pre_train_auto_encoder(model, batcher, 0, sess_ge, saver_ge, train_dir_ge, generated, cnn_classifier, sess_cnn, cla_batcher)