def test(config): model = Model(config).to(config.device) if args.snapshot: print('\nLoading model from {}...\n'.format(args.snapshot)) model.load_state_dict(torch.load(args.snapshot)) dev_loader = load_dataset(config.dev_path, config) eval(dev_loader, model, config)
def train(config): train_loader = load_dataset(config.train_path, config) dev_loader = load_dataset(config.dev_path, config) model = Model(config).to(config.device) optimizer = BertAdam(model.parameters(), lr=config.lr, warmup=0.05, t_total=len(train_loader) * config.num_epoches) loss_func = torch.nn.CrossEntropyLoss() print_loss = 0 best_acc = 0 model.train() for epoch in range(config.num_epoches): for step, (batch_texts, batch_span) in enumerate(train_loader): max_len = max([len(i) for i in batch_texts]) x = config.tokenizer.batch_encode_plus(batch_texts, add_special_tokens=True, return_tensors="pt", max_length=max_len, pad_to_max_length=True) x["input_ids"] = x["input_ids"].to(config.device) x["attention_mask"] = x["attention_mask"].to(config.device) x["token_type_ids"] = x["token_type_ids"].to(config.device) batch_span = batch_span.to(config.device) out = model(input_ids=x["input_ids"], attention_mask=x["attention_mask"], token_type_ids=x["token_type_ids"]) optimizer.zero_grad() loss = loss_func(out, batch_span) loss.backward() optimizer.step() if step % 1 == 0: corrects = (torch.max(out, 1)[1].view(batch_span.size()).data == batch_span.data).sum() train_acc = 100.0 * corrects / config.batch_size # print("epoch:", epoch, "step:", step, "loss:", print_loss.item() / 50) sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format(step, loss.item(), train_acc, corrects, config.batch_size)) if step % 50 == 0: dev_acc = eval(dev_loader, model, config) if dev_acc > best_acc: best_acc = dev_acc print( 'Saving best model, acc: {:.4f}%\n'.format(best_acc)) save(model, config.model_path, 'best', step)
def main(_): dataset, max_q_len, max_c_len = load_dataset(FLAGS.data_dir, FLAGS.data_size, FLAGS.max_question_length, FLAGS.max_context_length) embed_path = join("data", "squad", "glove.trimmed.100.npz") vocab_path = join(FLAGS.data_dir, "vocab.dat") vocab, rev_vocab = initialize_vocab(vocab_path) embeddings = load_glove_embeddings(embed_path) model = InferModel(FLAGS, embeddings, vocab) with tf.device("/gpu:{}".format(FLAGS.gpu_id)): config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_fraction with tf.Session(config=config) as sess: logging.info("Created model with fresh parameters.") sess.run(tf.global_variables_initializer()) logging.info('Num params: %d' % sum(v.get_shape().num_elements() for v in tf.trainable_variables())) model.train(sess,dataset)
def main(_): config_fname = FLAGS.config_path assert os.path.exists(config_fname), "config file does not exist" logging.info("Loaded configs from: " + config_fname) with open(config_fname, "rb") as fp: json_flag = json.load(fp) # print(json_flag) print(vars(FLAGS)) for key, value in json_flag.iteritems(): if key == "eval_on_train": continue if key == "dev_path": continue if key == "train_dir": continue FLAGS.__setattr__(key, value) print(vars(FLAGS)) assert os.path.exists(FLAGS.train_dir), "train dir does not exist" # assert False vocab, rev_vocab = initialize_vocab(FLAGS.vocab_path) embed_path = FLAGS.embed_path or pjoin( "data", "squad", "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) file_handler = logging.FileHandler(pjoin(FLAGS.log_dir, "log.txt")) logging.getLogger().addHandler(file_handler) with open(os.path.join(FLAGS.log_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # ========= Model-specific ========= # You must change the following code to adjust to your model embed_path = FLAGS.embed_path or pjoin( "data", "squad", "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = load_glove_embeddings(embed_path) encoder = Encoder(size=FLAGS.state_size, vocab_dim=FLAGS.embedding_size) # mixer = Mixer() # decoder = Decoder(FLAGS) if FLAGS.model == 'baseline': qa = QASystem(encoder, FLAGS, embeddings, 1) elif FLAGS.model == 'matchLSTM': qa = QASystemMatchLSTM(FLAGS, embeddings, 1) print('\n\nrand_unknown is set to be ' + str(FLAGS.rand_unknown)) if FLAGS.load_from_json: dev_dirname = os.path.dirname(os.path.abspath(FLAGS.dev_path)) dev_filename = os.path.basename(FLAGS.dev_path) dataset = prepare_dev(dev_dirname, dev_filename, vocab) # remove answer # context_tokens_data, context_data, question_tokens_data, question_data, question_uuid_data, s_labels, e_labels, true_answers = dataset context_tokens_data, context_data, question_tokens_data, question_data, question_uuid_data = dataset for i in range(1): logging.debug('context') logging.debug(' '.join(context_tokens_data[i])) logging.debug('context_data') logging.debug(context_data[i]) logging.debug('question') logging.debug(' '.join(question_tokens_data[i])) logging.debug('question_data') logging.debug(question_data[i]) logging.debug('uuid_data') logging.debug(question_uuid_data[i]) with tf.Session() as sess: # train_dir = get_normalized_train_dir(FLAGS.train_dir) train_dir = FLAGS.train_dir initialize_model(sess, qa, train_dir) print('About to start generate_answers') print(FLAGS.eval_on_train) answers = generate_answers(sess, qa, dataset) # write to json file to root dir with io.open('dev-prediction.json', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers, ensure_ascii=False))) else: # load from files converted from json FLAGS.data_dir = os.path.join("data", "squad", "qa_answer") if (not os.path.isdir("/home/el")) or not os.listdir(FLAGS.data_dir): process_dev_json_to_files() else: print('Data directory %s is not empty: ' % (FLAGS.data_dir, str(os.listdir(FLAGS.data_dir)))) dataset, max_q_len, max_c_len = load_dataset(FLAGS.data_dir, FLAGS.data_size, FLAGS.max_question_length, FLAGS.max_context_length, ['dev']) dev_set = dataset['dev'] print('Start running evaluate_answer on %d of data' % len(dev_set)) dev_examples = qa.preprocess_question_answer(dev_set) dev_raw = dataset['dev_raw'] dev_uuid = dataset['dev_uuid'] dev_dataset = [dev_examples, dev_raw, dev_uuid] with tf.Session() as sess: train_dir = FLAGS.train_dir initialize_model(sess, qa, train_dir) answers_model = qa.evaluate_answer(session=sess, dataset=dev_dataset, sample=len(dev_set), return_answer_dict=True) # write to json file to root dir with io.open('dev-prediction-model.json', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_model, ensure_ascii=False)))
def main(): train_vids, test_vids = data_util.load_dataset(args) iters = args.iters best_model = args.best_model # will pick last model prefix = ("sto" + "_h=" + str(args.image_size_h) + "_w=" + str(args.image_size_w) + "_K=" + str(args.K) + "_T=" + str(args.T) + "_B=" + str(args.B) + "_batch_size=" + str(32) + "_beta1=" + str(args.beta1) + "_alpha=" + str(args.alpha) + "_gamma=" + str(args.gamma) + "_lr=" + str(args.lr) + "_mode=" + str(args.mode) + "_space_aware=" + str(space_aware) + "_z_channel=" + str(args.z_channel) + "_p_loss=" + str(args.pixel_loss) + "_cell_type=" + str(args.cell_type) + "_norm=" + str(not args.no_normalized) + "_mask_w=" + str(args.mask_weight) + "_res_type=" + str(args.res_type) + "_neg_noise=" + str(not args.no_negative_noise) + "_res_ref=" + str(not args.no_res_ref) + "_pic_norm=" + str(not args.no_pic_norm) + "_start_perc=" + str(args.start_percentage)) checkpoint_dir = "../../models/stochastic/" \ + args.dataset + '/' + prefix + "/" # if args.best_model!="": # prefix+="_" + args.best_model device_string = "" if args.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" device_string = "/cpu:0" elif args.gpu: device_string = "/gpu:%d" % args.gpu[0] os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu[0]) gt_dir = "../../results/images/stochastic/" + args.dataset + '/' + str( args.T) + "/gt/" c_dim = args.color_channel_num flipable = False if args.dataset == "kth": flipable = True if args.save_gt: write2gt(gt_dir, test_vids, flipable, c_dim=c_dim) return with tf.device(device_string): if args.mode == "bi_sto": model = stochastic_bi_net( [args.image_size_h, args.image_size_w], batch_size=args.batch_size, c_dim=args.color_channel_num, K=args.K, T=args.T, B=args.B, debug=False, pixel_loss=args.pixel_loss, convlstm_kernel=[3, 3], mode=args.mode, space_aware=space_aware, cell_type=args.cell_type, z_channel=args.z_channel, normalize=not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise=not args.no_negative_noise, res_ref=not args.no_res_ref, pic_norm=not args.no_pic_norm) elif args.mode == "learned_prior": model = stochastic_learned_prior( [args.image_size_h, args.image_size_w], batch_size=args.batch_size, c_dim=args.color_channel_num, K=args.K, T=args.T, B=args.B, debug=False, pixel_loss=args.pixel_loss, convlstm_kernel=[3, 3], mode=args.mode, space_aware=space_aware, cell_type=args.cell_type, z_channel=args.z_channel, normalize=not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise=not args.no_negative_noise, res_ref=not args.no_res_ref, pic_norm=not args.no_pic_norm) elif args.mode == "deter_flexible": model = deter_flexible([args.image_size_h, args.image_size_w], batch_size=args.batch_size, c_dim=args.color_channel_num, K=args.K, T=args.T, B=args.B, debug=False, pixel_loss=args.pixel_loss, convlstm_kernel=[3, 3], mode=args.mode, space_aware=space_aware, cell_type=args.cell_type, z_channel=args.z_channel, normalize=not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise=not args.no_negative_noise, res_ref=not args.no_res_ref, pic_norm=not args.no_pic_norm) # global_step = tf.Variable(0, trainable=False) # global_rate = tf.train.exponential_decay(args.lr, global_step, # args.decay_step, args.decay_rate, staircase=True) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # with tf.control_dependencies(update_ops): # g_full = model.L_train_p + args.alpha * model.L_train_kl # if args.gamma != 0: # g_full += args.gamma * model.L_train_kl_exlusive # g_optim = tf.train.AdamOptimizer(global_rate, beta1=args.beta1).minimize( # g_full, var_list=model.trainable_variables, global_step=global_step # ) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() print("checkpoint_dir:", checkpoint_dir) loaded, model_name = ops.load(model, sess, checkpoint_dir, best_model) gen_dir = "../../results/images/stochastic/" + args.dataset + '/' + str(args.T) + "/" \ + prefix + "/generated/" + model_name + "/" quant_dir = "../../results/quantitative/stochastic/" + args.dataset + '/' + str( args.T) + "/" + prefix + "/quant/" + model_name + "/" if args.save_gt: check_create_dir(gt_dir, clean=True) if not args.save_gt and args.qualitative: check_create_dir(gen_dir, clean=False) if not args.save_gt and args.quantitative: check_create_dir(quant_dir, clean=True) save_path = quant_dir + "results_model=" + model_name + ".npz" save_path_post = quant_dir + "results_model=" + model_name + "_post.npz" save_path_one = quant_dir + "results_model=" + model_name + "_one.npz" p_loss_percentage = 1.0 psnr_err = np.zeros((0, args.T)) flow_err = np.zeros((0, 1)) ssim_err = np.zeros((0, args.T)) psnr_err_post = np.zeros((0, args.T)) flow_err_post = np.zeros((0, 1)) ssim_err_post = np.zeros((0, args.T)) for img_dir in subdir(gt_dir): gensub_dir = gen_dir + img_dir.split('/')[-1] + "/" check_create_dir(gensub_dir, clean=(not args.testtrain)) inf_batch = np.zeros((1, args.K + args.T + 1, args.image_size_h, args.image_size_w, c_dim), dtype="float32") ref_batch = np.zeros( (1, args.B + 1, args.image_size_h, args.image_size_w, c_dim), dtype="float32") for t in range(args.B + 1): img = cv2.imread(img_dir + "/ref_" + "{0:04d}".format(t + 1) + ".png") if c_dim == 1: img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if c_dim == 1: ref_batch[0, t, ...] = transform(img[:, :, None]) else: ref_batch[0, t, ...] = transform(img[:, :, :]) for t in range(args.K + args.T + 1): img = cv2.imread(img_dir + "/gt_" + "{0:04d}".format(t + 1) + ".png") if c_dim == 1: img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if c_dim == 1: inf_batch[0, t, ...] = transform(img[:, :, None]) else: inf_batch[0, t, ...] = transform(img[:, :, :]) true_data = inf_batch.copy() pred_data = np.zeros((1, args.K + args.T + 1, args.image_size_h, args.image_size_w, c_dim), dtype="float32") pred_data_post = np.zeros( (1, args.K + args.T + 1, args.image_size_h, args.image_size_w, c_dim), dtype="float32") G_test, G_test_post, test_mask_binary = sess.run( [model.G_real, model.G_post_real, model.mask_binary], feed_dict={ model.ref_seq: ref_batch, model.inf_seq: inf_batch, model.is_train: args.testtrain, model.p_loss_percentage: p_loss_percentage }) print G_test.shape, inf_batch[:, 0, ...].shape pred_data[0] = np.concatenate( (np.expand_dims(inf_batch[:, 0, ...], axis=1), G_test, np.expand_dims(inf_batch[:, -1, ...], axis=1)), axis=1) pred_data_post[0] = np.concatenate( (np.expand_dims(inf_batch[:, 0, ...], axis=1), G_test_post, np.expand_dims(inf_batch[:, -1, ...], axis=1)), axis=1) true_data_unit = normalized2uint(true_data) pred_data_unit = normalized2uint(pred_data) pred_data_post_unit = normalized2uint(pred_data_post) cpsnr = np.zeros((args.T)) cssim = np.zeros((args.T)) cpsnr_post = np.zeros((args.T)) cssim_post = np.zeros((args.T)) flow_l2 = np.zeros((1, 1)) flow_l2_post = np.zeros((1, 1)) if args.quantitative: for t in xrange(args.T): pred = pred_data_unit[0, t + args.K, ...] pred_post = pred_data_post_unit[0, t + args.K, ...] target = true_data_unit[0, t + args.K, ...] cpsnr[t] = measure.compare_psnr(target, pred) cssim[t] = ssim.compute_ssim( Image.fromarray( cv2.cvtColor( target, cv2.COLOR_GRAY2BGR if c_dim == 1 else cv2.COLOR_RGB2BGR)), Image.fromarray( cv2.cvtColor( pred, cv2.COLOR_GRAY2BGR if c_dim == 1 else cv2.COLOR_RGB2BGR))) cpsnr_post[t] = measure.compare_psnr(target, pred_post) cssim_post[t] = ssim.compute_ssim( Image.fromarray( cv2.cvtColor( target, cv2.COLOR_GRAY2BGR if c_dim == 1 else cv2.COLOR_RGB2BGR)), Image.fromarray( cv2.cvtColor( pred_post, cv2.COLOR_GRAY2BGR if c_dim == 1 else cv2.COLOR_RGB2BGR))) flow_target = cv2.calcOpticalFlowFarneback( true_data_unit[0, args.T + args.K - 1, ...] if c_dim == 1 else cv2.cvtColor(true_data_unit[0, args.T + args.K - 1, ...], cv2.COLOR_RGB2GRAY), true_data_unit[0, args.T + args.K, ...] if c_dim == 1 else cv2.cvtColor(true_data_unit[0, args.T + args.K, ...], cv2.COLOR_RGB2GRAY), 0.5, 3, 15, 3, 5, 1.2, 0) flow_pred = cv2.calcOpticalFlowFarneback( pred_data_unit[0, args.T + args.K - 1, ...] if c_dim == 1 else cv2.cvtColor(pred_data_unit[0, args.T + args.K - 1, ...], cv2.COLOR_RGB2GRAY), pred_data_unit[0, args.T + args.K, ...] if c_dim == 1 else cv2.cvtColor(pred_data_unit[0, args.T + args.K, ...], cv2.COLOR_RGB2GRAY), 0.5, 3, 15, 3, 5, 1.2, 0) flow_pred_post = cv2.calcOpticalFlowFarneback( pred_data_post_unit[0, args.T + args.K - 1, ...] if c_dim == 1 else cv2.cvtColor( pred_data_post_unit[0, args.T + args.K - 1, ...], cv2.COLOR_RGB2GRAY), pred_data_post_unit[0, args.T + args.K, ...] if c_dim == 1 else cv2.cvtColor(pred_data_post_unit[0, args.T + args.K, ...], cv2.COLOR_RGB2GRAY), 0.5, 3, 15, 3, 5, 1.2, 0) flow_l2[0, 0] = np.mean(np.square(flow_target - flow_pred)) flow_l2_post[0, 0] = np.mean( np.square(flow_target - flow_pred_post)) if args.qualitative: for t in xrange(args.K * 2 + args.T): pred_frame = draw_frame( cv2.cvtColor(pred_data_unit[0, t, ...], cv2.COLOR_GRAY2BGR) if c_dim == 1 else pred_data_unit[0, t, ...], t % (args.T + args.K) < args.K) pred_post_frame = draw_frame( cv2.cvtColor(pred_data_post_unit[0, t, ...], cv2.COLOR_GRAY2BGR) if c_dim == 1 else pred_data_post_unit[0, t, ...], t % (args.T + args.K) < args.K) if args.testtrain: cv2.imwrite( gensub_dir + "predone_" + "{0:04d}".format(t) + ".png", pred_data_unit[0, t, ...]) cv2.imwrite( gensub_dir + "predoneframe_" + "{0:04d}".format(t) + ".png", pred_frame) else: cv2.imwrite( gensub_dir + "pred_" + "{0:04d}".format(t) + ".png", pred_data_unit[0, t, ...]) cv2.imwrite( gensub_dir + "predframe_" + "{0:04d}".format(t) + ".png", pred_frame) cv2.imwrite( gensub_dir + "predpost_" + "{0:04d}".format(t) + ".png", pred_data_post_unit[0, t, ...]) cv2.imwrite( gensub_dir + "predpostframe_" + "{0:04d}".format(t) + ".png", pred_post_frame) # blank = (inverse_transform(inf_batch[0, t, :, :]) * 255).astype("uint8") # cv2.imwrite(savedir + "/blk_gt_" + "{0:04d}".format(t) + ".png", blank) if args.qualitative: cmd1 = "rm " + gensub_dir + "predframe.gif" cmd4 = "rm " + gensub_dir + "predpostframe.gif" cmd7 = "rm " + gensub_dir + "predoneframe.gif" cmd2 = ("ffmpeg -f image2 -framerate 7 -i " + gensub_dir + "predframe_%04d.png " + gensub_dir + "predframe.gif") cmd5 = ("ffmpeg -f image2 -framerate 7 -i " + gensub_dir + "predpostframe_%04d.png " + gensub_dir + "predpostframe.gif") cmd8 = ("ffmpeg -f image2 -framerate 7 -i " + gensub_dir + "predoneframe_%04d.png " + gensub_dir + "predoneframe.gif") cmd3 = "rm " + gensub_dir + "predframe*.png" cmd6 = "rm " + gensub_dir + "predpostframe*.png" cmd9 = "rm " + gensub_dir + "predoneframe*.png" # Comment out "system(cmd3)" if you want to keep the output images # Otherwise only the gifs will be kept if args.testtrain: system(cmd7) system(cmd8) system(cmd9) else: system(cmd1) system(cmd2) system(cmd3) system(cmd4) system(cmd5) system(cmd6) if args.quantitative: print psnr_err.shape, cpsnr.shape print ssim_err.shape, cssim.shape print "ssim_err of this sequence", np.mean(cssim) print "ssim_err_post of this sequence", np.mean(cssim_post) print "psnr_err of this sequence", np.mean(cpsnr) print "psnr_err_post of this sequence", np.mean(cpsnr_post) psnr_err = np.concatenate((psnr_err, cpsnr[None, :]), axis=0) ssim_err = np.concatenate((ssim_err, cssim[None, :]), axis=0) flow_err = np.concatenate((flow_err, flow_l2[:]), axis=0) psnr_err_post = np.concatenate( (psnr_err_post, cpsnr_post[None, :]), axis=0) ssim_err_post = np.concatenate( (ssim_err_post, cssim_post[None, :]), axis=0) flow_err_post = np.concatenate( (flow_err_post, flow_l2_post[:]), axis=0) if args.quantitative: if args.testtrain: np.savez(save_path_one, psnr=psnr_err, ssim=ssim_err, flow=flow_err) else: np.savez(save_path, psnr=psnr_err, ssim=ssim_err, flow=flow_err) np.savez(save_path_post, psnr=psnr_err_post, ssim=ssim_err_post, flow=flow_err_post) if args.testtrain: print("PriorOne Results saved to " + save_path) print "PriorOne PSNR per frame:", np.mean(psnr_err, axis=0) print "PriorOne SSIM per frame:", np.mean(ssim_err, axis=0) print "PriorOne PSNR overall average:", np.mean(psnr_err), "PriorOne SSIM overall average", \ np.mean(ssim_err), "PriorOne flow_err average", np.mean(flow_err) else: print("Prior Results saved to " + save_path) print("Post Results saved to " + save_path_post) print "Prior PSNR per frame:", np.mean(psnr_err, axis=0) print "Prior SSIM per frame:", np.mean(ssim_err, axis=0) print "Prior PSNR overall average:", np.mean(psnr_err), "Prior SSIM overall average", \ np.mean(ssim_err), "Prior flow_err average", np.mean(flow_err) print "Post PSNR per frame:", np.mean(psnr_err_post, axis=0) print "Post SSIM per frame:", np.mean(ssim_err_post, axis=0) print "Post PSNR overall average:", np.mean(psnr_err_post), "Post SSIM overall average",\ np.mean(ssim_err_post), "Post flow_err average", np.mean(flow_err_post) print("Done.")
def translate(sentence): sp_input_tensor, en_target_tensor, sp_input_language_tokenizer, en_target_language_tokenizer = data_util.load_dataset( resource_data_file) encoder, decoder = reload_model() sentence = data_util.preprocess_sentence(sentence) inputs = [ sp_input_language_tokenizer.word_index[i] for i in sentence.split(' ') ] # inputs--> [1, input_max_length] inputs = tf.keras.preprocessing.sequence.pad_sequences( [inputs], maxlen=sp_input_tensor.shape[1], padding='post') inputs = tf.convert_to_tensor(inputs) enc_hidden = tf.zeros((1, config['enc_hidden_size'])) enc_output, enc_state = encoder(inputs, enc_hidden) dec_hidden = enc_state # dec_input-->[1, 1] dec_input = tf.expand_dims( [en_target_language_tokenizer.word_index['<start>']], 0) result = "" for t in range(en_target_tensor.shape[1]): # prediction/dec_hidden-->[1, dec_vocab_size] predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) predictioned_id = tf.argmax(predictions[0]).numpy() result += en_target_language_tokenizer.index_word[predictioned_id] + " " if en_target_language_tokenizer.index_word[predictioned_id] == '<end>': return result, sentence # feedback 回去 dec_input = tf.expand_dims([predictioned_id], 0) return result, sentence
def train(): # 创建训练、验证 批数据 sp_input_tensor, en_target_tensor, sp_input_language_tokenizer, en_target_language_tokenizer = data_util.load_dataset( resource_data_file) sp_input_tensor_train, sp_input_tensor_val, en_target_tensor_train, en_target_tensor_val = data_util.train_test_split( en_input_tensor, sp_target_tensor, test_size=0.2) dataset = tf.data.Dataset.from_tensor_slices( (sp_input_tensor_train, en_target_tensor_train)).shuffle(len(sp_input_tensor_train)) dataset = dataset.batch(config['batch_size'], drop_remainder=True) for epoch in range(config['epochs']): start = time.time() enc_hidden = tf.zeros( (config["batch_size"], config["enc_hidden_size"])) total_loss = 0 for (batch, (input_lang, target_lang)) in enumerate(dataset): # input_lang--> [batch_size, max_input_length], target_lang-->[batch_size, max_target_length] batch_loss = seq2seqModel.train_step(input_lang, target_lang, en_target_language_tokenizer, enc_hidden) total_loss += batch_loss if batch % 100 == 0: print('Epoch %d, Batch %d, Loss %.4f' % (epoch + 1, batch, batch_loss.numpy())) # 2 个 epoch, 保存一个 checkpoint if (epoch + 1) % 2 == 0: seq2seqModel.checkpoint.save(file_prefix=config['model_dir']) print("Epoch %d, Loss %.4f" % (epoch + 1, total_loss / (len(sp_input_tensor_train) // config['batch_size']))) print("Time taken for one epoch %f sec" % (time.time() - start))
def run(args): all_lstm_history = dict() all_rnn_history = dict() for i in range(1, 16): dataset, length, nb_features, nb_skills = data_util.load_dataset( fn=args.f, batch_size=args.batch_size, shuffle=False, num_kc=i) train_set, test_set = data_util.split_dataset( dataset=dataset, total_size=length, test_fraction=args.test_split) print("\n[----- COMPILING ------]") lstm = deepkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=args.hidden_units, LSTM=True) lstm.compile(optimizer='adam', metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall() ]) rnn = deepkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=args.hidden_units) rnn.compile(optimizer='adam', metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall() ]) print(lstm.summary()) print(rnn.summary()) print("\n[-- COMPILING DONE --]") print("\n[----- TRAINING ------]") lstm_history = lstm.fit(dataset=train_set, epochs=args.epochs, verbose=args.v) rnn_history = rnn.fit(dataset=train_set, epochs=args.epochs, verbose=args.v) print("\n[--- TRAINING DONE ---]") print("\n[----- TESTING ------]") print("Number of KCs: ", i) lstm.evaluate(dataset=test_set, verbose=args.v) rnn.evaluate(dataset=test_set, verbose=args.v) print("\n[--- TESTING DONE ---]") all_lstm_history[i] = lstm_history.history all_rnn_history[i] = rnn_history.history if i == 15: answers = data_util.get_answers(args.f) lstm_preds = lstm.get_predictions(test_set) rnn_preds = rnn.get_predictions(test_set) with open("lstm_roc.csv", 'w') as f: writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred']) writer.writeheader() for i in range(len(answers)): student_answers = answers[i] student = lstm_preds[i][0] for j in range(len(student)): question = student_answers[j] skill = question[0] y = question[1] y_pred = student[j][skill] writer.writerow({'y_pred': y_pred, 'y_actual': y}) with open("rnn_roc.csv", 'w') as f: writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred']) writer.writeheader() for i in range(len(answers)): student_answers = answers[i] student = rnn_preds[i][0] for j in range(len(student)): question = student_answers[j] skill = question[0] y = question[1] y_pred = student[j][skill] writer.writerow({'y_pred': y_pred, 'y_actual': y}) write_accuracy(all_lstm_history, all_rnn_history)
def load_data(self, path='./dataset/mt_en2ch.txt', num_examples=20000, batch_size=64): print(">>>>>正在加载训练数据") input_tensor, target_tensor, self.inp_lang, self.targ_lang, self.max_length_inp, self.max_length_targ = load_dataset( path, num_examples) # Creating training and validation sets using an 80-20 split self.input_tensor_train, self.input_tensor_val, self.target_tensor_train, self.target_tensor_val = train_test_split( input_tensor, target_tensor, test_size=0.2) self.vocab_inp_size = len(self.inp_lang.word2idx) self.vocab_tar_size = len(self.targ_lang.word2idx) self.BUFFER_SIZE = len(self.input_tensor_train) self.dataset = tf.data.Dataset.from_tensor_slices( (self.input_tensor_train, self.target_tensor_train)).shuffle( self.BUFFER_SIZE).batch(batch_size, drop_remainder=True) print(">>>>>加载完毕")
def main(_): print(vars(FLAGS)) # assert False # Do what you need to load datasets from FLAGS.data_dir # dataset = load_dataset(FLAGS.data_dir, "full") dataset, max_q_len, max_c_len = load_dataset(FLAGS.data_dir, FLAGS.data_size, FLAGS.max_question_length, FLAGS.max_context_length) # FLAGS.max_context_length = max_c_len # FLAGS.max_question_length = max_q_len embed_path = FLAGS.embed_path or pjoin( "data", "squad", "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) vocab_path = FLAGS.vocab_path or pjoin(FLAGS.data_dir, "vocab.dat") vocab, rev_vocab = initialize_vocab(vocab_path) embeddings = load_glove_embeddings(embed_path) # test = 10 # ans = dataset['training'][test][4] # context = (dataset['training_raw'][test])[1] # for i in range(ans[0],ans[1]+1): # print(context[i]) # return encoder = Encoder(size=FLAGS.state_size, vocab_dim=FLAGS.embedding_size) # mixer = Mixer() # decoder = Decoder(FLAGS) with tf.device("/gpu:{}".format(FLAGS.gpu_id)): # TODO VERY HACKY since this could be inconsistent with what qa.train uses num_per_epoch = len(dataset['training']) print("num_per_epoch: {}".format(num_per_epoch)) if FLAGS.model == 'baseline': qa = QASystem(encoder, FLAGS, embeddings, num_per_epoch) elif FLAGS.model == 'matchLSTM': qa = QASystemMatchLSTM(FLAGS, embeddings, num_per_epoch) # saver = tf.train.Saver() if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) file_handler = logging.FileHandler(pjoin(FLAGS.log_dir, "log.txt")) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.log_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) np.random.seed(1234) tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_fraction config.allow_soft_placement = True with tf.Session(config=config) as sess: # load_train_dir = get_normalized_train_dir(FLAGS.load_train_dir or FLAGS.train_dir) load_train_dir = os.path.abspath(FLAGS.train_dir) initialize_model(sess, qa, load_train_dir) # save_train_dir = get_normalized_train_dir(FLAGS.train_dir) save_train_dir = os.path.abspath(FLAGS.train_dir) qa.train(sess, dataset, save_train_dir)
def main(): train_vids, test_vids = data_util.load_dataset(args) iters = args.iters prefix = ("sto" + "_h=" + str(args.image_size_h) + "_w=" + str(args.image_size_w) + "_K=" + str(args.K) + "_T=" + str(args.T) + "_B=" + str(args.B) + "_batch_size=" + str(args.batch_size) + "_beta1=" + str(args.beta1) + "_alpha=" + str(args.alpha) + "_gamma=" + str(args.gamma) + "_lr=" + str(args.lr) + "_mode=" + str(args.mode) + "_space_aware=" + str(space_aware) + "_z_channel=" + str(args.z_channel) + "_p_loss=" + str(args.pixel_loss) + "_cell_type=" + str(args.cell_type) + "_norm=" + str(not args.no_normalized) + "_mask_w=" + str(args.mask_weight) + "_res_type=" + str(args.res_type) + "_neg_noise=" + str(not args.no_negative_noise) + "_res_ref=" + str(not args.no_res_ref) + "_pic_norm=" + str(not args.no_pic_norm) + "_start_perc=" + str(args.start_percentage) ) print("\n" + prefix + "\n") checkpoint_dir = "../../models/stochastic/" + args.dataset + '/' + prefix + "/" samples_dir = "../../samples/stochastic/" + args.dataset + '/' + prefix + "/" summary_dir = "../../logs/stochastic/" + args.dataset + '/' + prefix + "/" if not exists(checkpoint_dir): makedirs(checkpoint_dir) # save synthesized frame sample if not exists(samples_dir): makedirs(samples_dir) if not exists(summary_dir): makedirs(summary_dir) device_string = "" if args.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" device_string = "/cpu:0" elif args.gpu: device_string = "/gpu:%d" % args.gpu[0] os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu[0]) with tf.device(device_string): if args.mode == "bi_sto": model = stochastic_bi_net([args.image_size_h, args.image_size_w], batch_size = args.batch_size, c_dim = args.color_channel_num, K=args.K, T=args.T, B=args.B, debug = False, pixel_loss = args.pixel_loss, convlstm_kernel = [3, 3], mode = args.mode, space_aware = space_aware, cell_type=args.cell_type, z_channel = args.z_channel, normalize = not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise = not args.no_negative_noise, res_ref = not args.no_res_ref, pic_norm = not args.no_pic_norm) elif args.mode == "learned_prior": model = stochastic_learned_prior([args.image_size_h, args.image_size_w], batch_size = args.batch_size, c_dim = args.color_channel_num, K=args.K, T=args.T, B=args.B, debug = False, pixel_loss = args.pixel_loss, convlstm_kernel = [3, 3], mode = args.mode, space_aware = space_aware, cell_type=args.cell_type, z_channel = args.z_channel, normalize = not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise = not args.no_negative_noise, res_ref = not args.no_res_ref, pic_norm = not args.no_pic_norm) elif args.mode == "deter_flexible": model = deter_flexible([args.image_size_h, args.image_size_w], batch_size = args.batch_size, c_dim = args.color_channel_num, K=args.K, T=args.T, B=args.B, debug = False, pixel_loss = args.pixel_loss, convlstm_kernel = [3, 3], mode = args.mode, space_aware = space_aware, cell_type=args.cell_type, z_channel = args.z_channel, normalize = not args.no_normalized, weight=args.mask_weight, res_type=args.res_type, negative_noise = not args.no_negative_noise, res_ref = not args.no_res_ref, pic_norm = not args.no_pic_norm) global_step = tf.Variable(0, trainable=False) global_rate = tf.train.exponential_decay(args.lr, global_step, args.decay_step, args.decay_rate, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): g_full = model.L_train_p + args.alpha * model.L_train_kl if args.gamma != 0: g_full += args.gamma * model.L_train_kl_exlusive g_optim = tf.train.AdamOptimizer(global_rate, beta1=args.beta1).minimize( g_full, var_list=model.trainable_variables, global_step=global_step ) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() if args.load_pretrain: if ops.load(model, sess, checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") train_sum = tf.summary.merge([model.L_train_p_sum, model.L_train_kl_sum, model.L_train_p_l1_diff_sum, model.L_trainTest_p_l1_diff_sum]) test_sum = tf.summary.merge([model.L_test_p_sum, model.L_test_p_l1_diff_sum, model.L_testTrain_p_l1_diff_sum]) writer = tf.summary.FileWriter(summary_dir, sess.graph) start_time = time.time() full_train = True # if (len(args.pretrain_model) > 0): # # Create a saver. include gen_vars and encoder_vars # model.saver.restore(sess, args.pretrain_model) blank = None p_loss_percentage = 1.0 flipable = False if args.dataset=="kth": flipable = True blank1 = None while iters <= args.num_iter: mini_batches = get_minibatches_idx(len(train_vids), args.batch_size, shuffle=True) for _, batchidx in mini_batches: if args.start_percentage == 0.0: p_loss_percentage = 0.5 else: if iters >= (args.num_iter * args.start_percentage): if iters < args.num_iter * (1 - args.start_percentage): p_loss_percentage = 1 - 0.6 * ( (1.0 * iters / args.num_iter - args.start_percentage) / (1.0 - 2 * args.start_percentage)) else: p_loss_percentage = 0.4 if iters > args.num_iter: break if len(batchidx) == args.batch_size: sess.run(tf.get_collection('update_dup')) # batch, time, height, width, color ref_batch, inf_batch = load_stochastic_data_from_list(train_vids, batchidx, args.image_size_h, args.image_size_w, args.K, args.T, args.B, flipable=flipable, channel=args.color_channel_num) if args.debug: print ref_batch.shape, inf_batch.shape _, summary_str , L_train_p, L_train_kl\ = sess.run([g_optim, train_sum, model.L_train_p, model.L_train_kl], feed_dict={model.ref_seq: ref_batch, model.inf_seq: inf_batch, model.is_train: True, model.p_loss_percentage: p_loss_percentage}) if not args.no_store: writer.add_summary(summary_str, iters) print( "Iters: [%2d] time: %4.4f, L_train_p: %.8f, L_train_kl: %.8f" % (iters, time.time() - start_time, L_train_p, L_train_kl) ) if np.mod(iters, 2500) == 0: print("validation at iters:", iters) ref_batch_train, inf_batch_train = load_stochastic_data_from_list(train_vids, range(3, 3 + args.batch_size/2)+range(60, 60 + args.batch_size/2), args.image_size_h, args.image_size_w, args.K, args.T, args.B, flipable=flipable, channel=args.color_channel_num) ref_batch_test, inf_batch_test = load_stochastic_data_from_list(test_vids, range(3, 3 + args.batch_size/2)+range(60, 60 + args.batch_size/2), args.image_size_h, args.image_size_w, args.K, args.T, args.B, flipable=flipable, channel=args.color_channel_num) if blank1 is None: blank1 = np.zeros_like(ref_batch_train[0, :args.B // 2 + 1, ...]) blank2 = np.zeros_like(ref_batch_train[0, args.B//2+1: , ...]) summary_test, L_test_p, L_test_kl, \ G_test, G_test_post, test_mask_binary, last_frame_test = sess.run( [test_sum, model.L_train_p, model.L_train_kl, model.G_real, model.G_post_real, model.mask_binary, model.last_frame], feed_dict={model.ref_seq: ref_batch_test, model.inf_seq: inf_batch_test, model.is_train: False, model.p_loss_percentage: p_loss_percentage}) _, _, _, _, _, mean_batch_psnr_test_post, mean_batch_ssim_test_post\ = metrics.cal_seq(inf_batch_test[:, 1:-1, ...], G_test_post) _, _, _, _, _, mean_batch_psnr_test, mean_batch_ssim_test \ = metrics.cal_seq(inf_batch_test[:, 1:-1, ...], G_test) writer.add_summary(summary_test, iters) print( "Iters: [%2d] time: %4.4f, L_test_p: %.8f, L_test_kl: %.8f" % (iters, time.time() - start_time, L_test_p, L_test_kl) ) print("ref_batch_test.min, ref_batch_test.max", np.min(ref_batch_test), np.max(ref_batch_test)) print("mean_batch_psnr_test_post, mean_batch_ssim_test_post", mean_batch_psnr_test_post, mean_batch_ssim_test_post) print("mean_batch_psnr_test, mean_batch_ssim_test", mean_batch_psnr_test, mean_batch_ssim_test) print "test G_test.shape", G_test.shape summary_train, L_train_p, L_train_kl, G_train, \ G_train_post, train_mask_binary, last_frame_train = sess.run( [train_sum, model.L_train_p, model.L_train_kl, model.G_real, model.G_post_real, model.mask_binary, model.last_frame], feed_dict={model.ref_seq: ref_batch_train, model.inf_seq: inf_batch_train, model.is_train: True, model.p_loss_percentage: p_loss_percentage}) _, _, _, _, _, mean_batch_psnr_train_post, mean_batch_ssim_train_post \ = metrics.cal_seq(inf_batch_train[:, 1:-1, ...], G_train_post) _, _, _, _, _, mean_batch_psnr_train, mean_batch_ssim_train \ = metrics.cal_seq(inf_batch_train[:, 1:-1, ...], G_train) print("mean_batch_psnr_train_post, mean_batch_ssim_train_post", mean_batch_psnr_train_post, mean_batch_ssim_train_post) print("mean_batch_psnr_train, mean_batch_ssim_train", mean_batch_psnr_train, mean_batch_ssim_train) for i in [1, args.batch_size/2 ,args.batch_size - 1]: sample_train = depth_to_width(np.concatenate( (ref_batch_train[i,:args.B//2,...], inf_batch_train[i,...], ref_batch_train[i,args.B//2+2:,...]), axis=0)) gen_train_mask = depth_to_width(np.concatenate( (blank1, train_mask_binary[i, ...], blank2),axis=0)) gen_train_post = depth_to_width(np.concatenate( (blank1, G_train_post[i, ...], blank2), axis=0)) gen_train = depth_to_width(np.concatenate( (blank1, G_train[i, ...], blank2),axis=0)) sample_test = depth_to_width(np.concatenate( (ref_batch_test[i,:args.B//2,...], inf_batch_test[i,...], ref_batch_test[i,args.B//2+2:,...]),axis=0)) gen_test_mask = depth_to_width(np.concatenate( (blank1, test_mask_binary[i, ...], blank2), axis=0)) gen_test_post = depth_to_width(np.concatenate( (blank1, G_test_post[i, ...], blank2), axis=0)) gen_test = depth_to_width(np.concatenate( (blank1, G_test[i, ...], blank2),axis=0)) if i == 1: print sample_train.shape, gen_train.shape, sample_train.shape sample_train_cat = np.concatenate((sample_train, gen_train_mask, gen_train_post, gen_train), axis=0) sample_test_cat = np.concatenate((sample_test, gen_test_mask, gen_test_post, gen_test), axis=0) else: sample_train_cat = np.concatenate( (sample_train_cat, sample_train, gen_train_mask, gen_train_post, gen_train), axis=0) sample_test_cat = np.concatenate( (sample_test_cat, sample_test, gen_test_mask, gen_test_post, gen_test), axis=0) print("Saving sample at iter"), iters img_summary = sess.run(model.summary_merge_seq_img, feed_dict={ model.train_seq_img: np.expand_dims(image_clipping(sample_train_cat), axis=0), model.test_seq_img: np.expand_dims(image_clipping(sample_test_cat), axis=0) }) metrics_summary = sess.run( model.summary_merge_metrics, feed_dict={ model.mean_batch_psnr_test_post: mean_batch_psnr_test_post, model.mean_batch_psnr_test: mean_batch_psnr_test, model.mean_batch_psnr_train_post: mean_batch_psnr_train_post, model.mean_batch_psnr_train: mean_batch_psnr_train, model.mean_batch_ssim_test_post: mean_batch_ssim_test_post, model.mean_batch_ssim_test: mean_batch_ssim_test, model.mean_batch_ssim_train_post: mean_batch_ssim_train_post, model.mean_batch_ssim_train: mean_batch_ssim_train } ) if not args.no_store: writer.add_summary(img_summary, iters) writer.add_summary(metrics_summary, iters) if np.mod(iters, 10000) == 0 and iters != 0 and not args.no_store: ops.save(model, sess, checkpoint_dir, iters) iters += 1 print "finish Training"
def train(): if FLAGS.dataset == 'dontknow': dataset = load_dontknow_dataset(FLAGS.data_size, FLAGS.max_question_length, FLAGS.max_context_length) embed_path = join(FLAGS.data_dir, "glove.trimmed.100.npz") vocab_path = join(FLAGS.data_dir, "vocab.dat") vocab, rev_vocab = initialize_vocab(vocab_path) embeddings = load_glove_embeddings(embed_path) elif FLAGS.dataset == 'squad': dataset = load_dataset(FLAGS.data_dir, FLAGS.data_size, FLAGS.max_question_length, FLAGS.max_context_length) embed_path = join(FLAGS.data_dir, "glove.trimmed.100.npz") vocab_path = join(FLAGS.data_dir, "vocab.dat") vocab, rev_vocab = initialize_vocab(vocab_path) embeddings = load_glove_embeddings(embed_path) else: print("enter either squad or dontknow for dataset flag") return FLAGS.dataset_size = len(dataset['training']) model = InferModel(FLAGS, embeddings, vocab) trainer = Trainer(model, FLAGS) saver = tf.train.Saver() validation_scores = [] with tf.device("/gpu:{}".format(FLAGS.gpu_id)): config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_fraction config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: logging.info("Created a new model") sess.run(tf.global_variables_initializer()) logging.info('Num params: %d' % sum(v.get_shape().num_elements() for v in tf.trainable_variables())) train_set = dataset['training'] valid_set = dataset['validation'] train_raw = dataset['training_raw'] valid_raw = dataset['validation_raw'] with open('log.txt', 'w') as e: for line in train_raw: question = ' '.join(line[0]) context = ' '.join(line[1]) answer = ' '.join(line[2]) e.write("-" * 5) e.write(" -- VALID- - -") for line in valid_raw: question = ' '.join(line[0]) context = ' '.join(line[1]) answer = ' '.join(line[2]) e.write(context + "- - - " + question + '\n') e.write("-" * 5) for epoch in range(FLAGS.num_epochs): logging.info('-' * 5 + "TRAINING-EPOCH-" + str(epoch) + '-' * 5) score = trainer.run_epoch(sess, train_set, train_raw, epoch) logging.info('-' * 5 + "-VALIDATE-" + str(epoch) + '-' * 5) val_score = trainer.validate(sess, valid_set, valid_raw, epoch) validation_scores.append(val_score) #TODO early stopping print("Saving Model") save_path = saver.save( sess, "./{}/{}/model.ckpt".format(FLAGS.logdir, FLAGS.dataset))
import tensorflow as tf import dpkt import data_util import metrics file = 'skill_builder_data.csv' optimizer = 'adam' CSV_Log = "./logs/train.log" model_path = "./weights/bestmodel" log_dir = "logs" dataset, length, nb_features, nb_skills = data_util.load_dataset(file, batch_size=32, shuffle=True) train_set, test_set, val_set = data_util.split_dataset(dataset=dataset, total_size=length, test_fraction=0.2, val_fraction=0.2) print('-------compiling---------') model = dpkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=128, dropout_rate=0.3) model.compile(optimizer=optimizer, metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(),
import data_util # print(__doc__) # Generating the sample data from make_blobs # This particular setting has one distinct cluster and 3 clusters placed close # together. # X, y = make_blobs(n_samples=500, # n_features=2, # centers=4, # cluster_std=1, # center_box=(-10.0, 10.0), # shuffle=True, # random_state=1) # For reproducibility dataset = 'normalized.csv' X, y_true, feature_names = data_util.load_dataset( dataset, exclusive_all_did_wrong=False) all_did_wrong = data_util.outlier_exp(tolerate=0) X = X[all_did_wrong] y = y_true[all_did_wrong] range_n_clusters = [2, 3, 4, 5, 6] for n_clusters in range_n_clusters: # Create a subplot with 1 row and 2 columns fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7) # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example all # lie within [-0.1, 1] ax1.set_xlim([-0.1, 1]) # The (n_clusters+1)*10 is for inserting blank space between silhouette