def main(_): print("Parameters: ") for k, v in FLAGS.__flags.items(): print("{} = {}".format(k, v)) if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if FLAGS.prepro: img_feat, tags_idx, a_tags_idx, vocab_processor = data_utils.load_train_data(FLAGS.train_dir, FLAGS.tag_path, FLAGS.prepro_dir, FLAGS.vocab) else: img_feat = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "img_feat.dat"), 'rb')) tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "tag_ids.dat"), 'rb')) a_tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "a_tag_ids.dat"), 'rb')) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) img_feat = np.array(img_feat, dtype='float32')/127.5 - 1. test_tags_idx = data_utils.load_test(FLAGS.test_path, vocab_processor) print("Image feature shape: {}".format(img_feat.shape)) print("Tags index shape: {}".format(tags_idx.shape)) print("Attribute Tags index shape: {}".format(a_tags_idx.shape)) print("Vocab size: {}".format(len(vocab_processor._reverse_mapping))) print("Vocab max length: {}".format(vocab_processor.max_document_length)) data = Data(img_feat, tags_idx, a_tags_idx, test_tags_idx, FLAGS.z_dim, vocab_processor) Model = getattr(sys.modules[__name__], FLAGS.model) print(Model) model = Model(data, vocab_processor, FLAGS) model.build_model() model.train()
def main(_): print("Parameters: ") log_writeln(outf, "Parameters: ") for k, v in FLAGS.__flags.items(): print("{} = {}".format(k, v)) log_writeln(outf, "{} = {}".format(k, v)) if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if FLAGS.prepro: img_feat, tags_idx, a_tags_idx, vocab_processor = data_utils.load_train_data( FLAGS.train_dir, FLAGS.tag_path, FLAGS.prepro_dir, FLAGS.vocab) else: img_feat = cPickle.load( open(os.path.join(FLAGS.prepro_dir, "img_feat.dat"), 'rb')) tags_idx = cPickle.load( open(os.path.join(FLAGS.prepro_dir, "tag_ids.dat"), 'rb')) a_tags_idx = cPickle.load( open(os.path.join(FLAGS.prepro_dir, "a_tag_ids.dat"), 'rb')) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) img_feat = np.array(img_feat, dtype='float32') / 127.5 - 1. test_tags_idx = data_utils.load_test(FLAGS.test_path, vocab_processor) log_writeln(outf, "Image feature shape: {}".format(img_feat.shape)) log_writeln(outf, "Tags index shape: {}".format(tags_idx.shape)) log_writeln(outf, "Attribute Tags index shape: {}".format(a_tags_idx.shape)) log_writeln(outf, "Vocab size: {}".format(len(vocab_processor._reverse_mapping))) log_writeln( outf, "Vocab max length: {}".format(vocab_processor.max_document_length)) data = Data(img_feat, tags_idx, a_tags_idx, test_tags_idx, FLAGS.z_dim, vocab_processor) Model = getattr(sys.modules[__name__], FLAGS.model) print(Model) log_writeln(outf, Model) model = Model(data, vocab_processor, FLAGS) model.build_model() model.train()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 4 11:40:46 2018 @author: yangyang """ import data_utils as utils test = utils.load_test([ '../data/dataset-E2E-goal-oriented-test-v1.0/tst1/dialog-task2REFINE-kb1_atmosphere-test1.json' ], FLAGS)
def main(_): print("Parameter:") for k, v in FLAGS.__flags.items(): print("{} = {}".format(k, v)) if not os.path.exists("./prepro"): os.makedirs("./prepro") if FLAGS.eval: print("Evaluation...") threshold = 0.5 Aspects = ['服務', '環境', '價格', '交通', '餐廳'] vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) test_dict = data_utils.load_test(vocab_processor, FLAGS.test_data) graph = tf.Graph() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True f = open('out.txt', 'w') r_words, r_words_dict = data_utils.load_new_aspect('new_aspect.txt') print(r_words) print(r_words_dict) with graph.as_default(), tf.Session(config=config) as sess: model = load_model(graph, sess, FLAGS.checkpoint_file) for k, v in sorted(test_dict.items(), key=lambda x: x[0]): attns, preds = sess.run( [model["attn_w"], model["pred"]], feed_dict={model["seq_in"]: v["test_x"]}) t_asp = {'服務': 0, '環境': 0, '價格': 0, '交通': 0, '餐廳': 0} t_asp_s = {'服務': 0, '環境': 0, '價格': 0, '交通': 0, '餐廳': 0} for idx, p in enumerate(preds): label = 1 if p == 1 else -1 for word in r_words: if v['parsed_sents'][idx].find(word) >= 0: t_asp_s[r_words_dict[word]] = 1 t_asp[r_words_dict[word]] = label # for a_idx, a in enumerate(attns[idx]): # if a >= threshold and a > t_asp_s[Aspects[a_idx]]: # t_asp_s[Aspects[a_idx]] = a # t_asp[Aspects[a_idx]] = label test_dict[k]["aspect"] = t_asp f.write(test_dict[k]['raw_context'] + '\n') f.write(str(t_asp)) f.write("\n") ans = [] with open(FLAGS.query, 'r') as f: for idx, row in enumerate(csv.reader(f)): if idx != 0: ans.append(test_dict[int(row[1])]["aspect"][row[2]]) print(len(ans)) with open(FLAGS.output, 'w') as f: f.write("Id,Label\n") for idx, p in enumerate(ans): f.write("{},{}\n".format(idx + 1, p)) else: if FLAGS.prepro: print("Start preprocessing data...") train_x, labels, vocab_processor, g_train_x, ( g_sent_dist, g_pos_label) = data_utils.load_data( FLAGS.polarity_data, FLAGS.aspect_data, FLAGS.prepro_train, FLAGS.prepro_labels, FLAGS.vocab) (use_Pdist, use_Ndist, neg_label, pos_label, sent_Pdist, sent_Ndist, polarity, aspect_idx) = labels print("Start loading pre-train word embedding...") w2v_W = data_utils.build_w2v_matrix(vocab_processor, FLAGS.w2v_data, FLAGS.vector_file, FLAGS.embedding_size) else: train_x = cPickle.load(open(FLAGS.prepro_train, 'rb')) labels = cPickle.load(open(FLAGS.prepro_labels, 'rb')) g_train_x = cPickle.load(open("./prepro/g_train.dat", 'rb')) (g_sent_dist, g_pos_label) = cPickle.load(open("./prepro/g_label.dat", 'rb')) (use_Pdist, use_Ndist, neg_label, pos_label, sent_Pdist, sent_Ndist, polarity, aspect_idx) = labels vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) w2v_W = cPickle.load(open(FLAGS.w2v_data, 'rb')) print(len(vocab_processor._reverse_mapping)) print(len(train_x)) print(len(use_Pdist)) print(len(w2v_W)) data = Data(train_x, (use_Pdist, use_Ndist, neg_label, pos_label, sent_Pdist, sent_Ndist, polarity), aspect_idx, FLAGS.dev_size, g_train_x, (g_sent_dist, g_pos_label)) model = Model(data, w2v_W, vocab_processor) model.build_model() model.train()
if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if args.pre_parameter == True: img_feat = cPickle.load(open(os.path.join(args.prepro_dir, "img_feat.dat"), 'rb')) tags_idx = cPickle.load(open(os.path.join(args.prepro_dir, "tag_ids.dat"), 'rb')) a_tags_idx = cPickle.load(open(os.path.join(args.prepro_dir, "a_tag_ids.dat"), 'rb')) k_tmp_vocab = cPickle.load(open(os.path.join(args.prepro_dir, "k_tmp_vocab_ids.dat"), 'rb')) vocab_processor = Vocab_Operator.restore(args.vocab) else: img_feat, tags_idx, a_tags_idx, vocab_processor, k_tmp_vocab = data_utils.load_train_data(args.train_dir, args.tag_path, args.prepro_dir, args.vocab) img_feat = np.array(img_feat, dtype='float32')/127.5 - 1. test_tags_idx = data_utils.load_test(args.test_path, vocab_processor, k_tmp_vocab) print("Image feature shape: {}".format(img_feat.shape)) print("Tags index shape: {}".format(tags_idx.shape)) print("Attribute Tags index shape: {}".format(a_tags_idx.shape)) print("Test Tags index shape: {}".format(test_tags_idx.shape)) data = Data(img_feat, tags_idx, a_tags_idx, test_tags_idx, args.z_dim, vocab_processor) dcgan = dcgan.DCGAN(model_options, training_options, data, args.mode, args.resume, args.model_dir) input_tensors, variables, loss, outputs, checks = dcgan.build_model() if args.mode == 0:
def main(): def _print_config(config): import pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(vars(config)) ## 설정 불러오기. args = build_parser() _print_config(args) ## 유효성 검사 if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and not args.overwrite_output_dir): raise ValueError( "결과를 저장할 폴더에 ({}) 이미 다른 파일이 있습니다. 이 폴더에 저장하려면 overwrite_output_dir 설정을 true로 변경하시기 바랍니다." .format(args.output_dir)) if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) if args.do_train: if args.train_file is None: raise ValueError("학습할 파일을 입력하세요.") if args.do_eval: if args.test_file is None: raise ValueError("평가할 파일을 입력하시오.") if args.do_train is None and args.do_eval: if args.pre_trained_dir is None: raise ValueError("평가만을 하려면 학습된 모델을 넣으세요.") if args.do_train is None and args.do_summary: if args.pre_trained_dir is None: raise ValueError("SUMMARY만을 하려면 학습된 모델을 넣으세요.") if args.do_summary: if args.summary_dir is None or args.test_file is None: raise ValueError("SUMMARY를 만드려면 SUMMARY를 저장할 위치와 평가할 파일을 입력하세요.") if not os.path.isdir(args.summary_dir): os.mkdir(args.summary_dir) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) # Set seed set_seed(args) ########################### Load Model ########################## logger.info("Load model %s", args.model_type) model = build_model(args) results = {} writer = ResultWriter(args.experiments_dir) ## 학습 설정 if args.do_train: logger.info("train model %s", args.model_type) ## 학습용 데이터 불러오기 train_dataset, valid_dataset = load_train_valid( file_path=args.train_file, valid_portion=args.valid_portion, shuffle=True, window_size=args.window_size, ) ## 학습시작 now = datetime.now() model.fit(train_dataset.get_data(), train_dataset.get_label()) elapsed = datetime.now() - now time_taken = str(timedelta(seconds=elapsed.total_seconds())) ## 추론 시작 val_predicted_label = model.predict(valid_dataset.get_data()) val_true_label = valid_dataset.get_label() val_mse = mean_squared_error(val_true_label, val_predicted_label) val_mae = mean_absolute_error(val_true_label, val_predicted_label) train_predicted_label = model.predict(train_dataset.get_data()) train_true_label = train_dataset.get_label() train_mse = mean_squared_error(train_true_label, train_predicted_label) train_mae = mean_absolute_error(train_true_label, train_predicted_label) ## Summay 파일에 정보를 저장하기 results.update({ 'train_mae': train_mae, 'train_mse': train_mse, 'val_mae': val_mae, 'val_mse': val_mse, 'training time': time_taken, }) ## 모델 저장 save_model(args.output_dir, model) ## Config 저장 save_config(args.output_dir, args) args.pre_trained_dir = args.output_dir # 평가 설정 if args.do_eval: ## 최종 학습된 모델 설정 불러오기 saved_config = load_saved_config(args.pre_trained_dir) ## 최종 학습된 모델 설정 불러오기 model = load_saved_model(args.pre_trained_dir) logger.info("test model %s", saved_config.model_type) ## 평가용 데이터 불러오기 test_dataset = load_train_valid( file_path=saved_config.test_file, window_size=saved_config.window_size, ) ## 추론 시작 test_predicted_label = model.predict(test_dataset.get_data()) test_true_label = test_dataset.get_label() test_mse = mean_squared_error(test_true_label, test_predicted_label) test_mae = mean_absolute_error(test_true_label, test_predicted_label) ## Summay 파일에 정보를 저장하기 results.update({ 'test_mse': test_mse, 'test_mae': test_mae, }) writer.update(args, **results) if args.do_summary: ## 최종 학습된 모델 설정 불러오기 saved_config = load_saved_config(args.pre_trained_dir) ## 최종 학습된 모델 설정 불러오기 model = load_saved_model(args.pre_trained_dir) logger.info("make summary of model %s", saved_config.model_type) ## 평가용 데이터 불러오기 test_dataset = load_test( file_path=saved_config.test_file, window_size=saved_config.window_size, ) ## 추론 시작 test_predicted_label = model.predict(test_dataset.get_data()) test_true_label = test_dataset.get_label() test_mse = mean_squared_error(test_true_label, test_predicted_label) test_mae = mean_absolute_error(test_true_label, test_predicted_label) ## SUMMARY 그림 만들기 summary_df = test_dataset.df summary_df['predicted_label'] = test_predicted_label name_dict = { "var_name": "abnormal_score", "confidence_name": "abnormal class", 'title': "MSE:[{}] MAE:[{}]".format(test_mse, test_mae), "var_color": 'black', "confidence_color": 'red', "var_plot": 'line', "confidence_plot": 'line', } ## 그림 생성 save_name = "abnormal_score.html" temp_save_path = os.path.join(args.summary_dir, save_name) # plot_picture(summary_df['date'].values, None, summary_df['count'].values, summary_df['predicted_label'].values, # temp_save_path, name_dict) plot_picture_ver2(summary_df['date'].values, summary_df['count'].values, summary_df['predicted_label'].values, temp_save_path, name_dict) if args.with_csv: save_name = "summary.csv" temp_save_path = os.path.join(args.summary_dir, save_name) summary_df.to_csv(temp_save_path, index=False)