def decode(): '''Load dictionaries''' # Load vocabularies. print(os.getcwd()) doc_dict = data_util.load_dict(FLAGS.data_dir + "/doc_dict.txt") sum_dict = data_util.load_dict(FLAGS.data_dir + "/sum_dict.txt") if doc_dict is None or sum_dict is None: logging.warning("Dict not found.") print("Loading testing data") data = data_util.load_test_data(FLAGS.test_file, doc_dict) with tf.Session() as sess: # Create model and load parameters. logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, True) result = [] for idx, token_ids in enumerate(data): # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, encoder_len, decoder_len =\ model.get_batch( {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS])]}, 0) if FLAGS.batch_size == 1 and FLAGS.geneos: print('ran code') loss, outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) outputs = [np.argmax(item) for item in outputs[0]] else: outputs = model.step_beam(sess, encoder_inputs, encoder_len, geneos=FLAGS.geneos) # If there is an EOS symbol in outputs, cut them at that point. if data_util.ID_EOS in outputs: outputs = outputs[:outputs.index(data_util.ID_EOS)] gen_sum = " ".join(data_util.sen_map2tok(outputs, sum_dict[1])) print(gen_sum) gen_sum = data_util.sen_postprocess(gen_sum) print(gen_sum) result.append(gen_sum) logging.info("Finish {} samples. :: {}".format(idx, gen_sum[:75])) with open(FLAGS.test_output, "w") as f: for item in result: print(item, file=f)
def decode(): # Load vocabularies. doc_dict = data_util.load_dict(FLAGS.data_dir + "/doc_dict.txt") sum_dict = data_util.load_dict(FLAGS.data_dir + "/sum_dict.txt") if doc_dict is None or sum_dict is None: logging.warning("Dict not found.") data = data_util.load_test_data(FLAGS.data_dir + "/" + FLAGS.test_file, doc_dict) with tf.Session() as sess: # Create model and load parameters. logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) # create reverse table reverse_table = tf.contrib.lookup.index_to_string_table_from_file( vocabulary_file=FLAGS.data_dir + "/sum_ordered_words.txt", default_value="<UNK>") reverse_table.init.run() model = create_model(sess, reverse_table, is_training=False) result = [] for idx, token_ids in enumerate(data): # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, encoder_len, decoder_len = model.get_batch( {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS])]}, 0) # repeat if encoder_inputs.shape[0] == 1: encoder_inputs = np.repeat(encoder_inputs, FLAGS.batch_size, axis=0) encoder_len = np.repeat(encoder_len, FLAGS.batch_size, axis=0) # outputs = [batch_size,length] step, outputs = model.inference(sess, encoder_inputs, encoder_len) # If there is an EOS symbol in outputs, cut them at that point. target_output = [item[0].decode() for item in outputs] if data_util.MARK_EOS in target_output: target_output = target_output[:target_output.index(data_util. MARK_EOS)] gen_sum = " ".join(target_output) result.append(gen_sum) logging.info("Finish {} samples. :: {}".format(idx, gen_sum[:75])) with open(FLAGS.test_output, "w") as f: for item in result: print(item, file=f)
zoneout = 0.0 filter_width = 3 embedding_size = 300 num_layers = 1 summary_len = 100 attention_hidden_size = 100 beam_depth = 5 state_size = 120 mode = "test" doc_file = "data/test_article.txt" sum_file = "data/test_abstract.txt" vocab_file = "data/vocab" checkpoint_dir = "./save/baseline/checkpoints" checkpoint_prefix = os.path.join(checkpoint_dir, "baseline") vocab = data_util.Vocab("data/vocab", max_vocab_size) docs = data_util.load_test_data(doc_file, vocab, max_num_tokens) summary_file = "result/summaries.txt" with tf.Graph().as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.Session() log_writer = tf.summary.FileWriter(checkpoint_dir, graph=sess.graph) model = DenseQuasiGRU(vocab_size=max_vocab_size, embedding_size=embedding_size, num_layers=num_layers, state_size=state_size, decoder_vocab_size=max_vocab_size, filter_width=filter_width, zoneout=zoneout, attention_hidden_size=attention_hidden_size,
def decode(): # Load vocabularies. doc_dict = data_util.load_dict(FLAGS.data_dir + "/doc_dict.txt") sum_dict = data_util.load_dict(FLAGS.data_dir + "/sum_dict.txt") en_dict = data_util.load_dict(FLAGS.data_dir + "/en_dict.txt") if doc_dict is None or sum_dict is None: logging.warning("Dict not found.") data, en_data = data_util.load_test_data( FLAGS.test_file, doc_dict, FLAGS.data_dir + "/test.entity.txt", en_dict) with tf.Session() as sess: # Create model and load parameters. logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, True, None, None, None) result = [] for idx, token_ids in enumerate(data): en_ids = en_data[idx] if len(en_ids) == 0: en_ids = [data_util.ID_PAD] # token_ids, en_ids = d #print(idx) #print(token_ids) # Get a 1-element batch to feed the sentence to the model. shiva = model.get_batch( { 0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS], [data_util.ID_PAD, data_util.ID_PAD, data_util.ID_PAD] + en_ids + [data_util.ID_PAD, data_util.ID_PAD, data_util.ID_PAD])] }, 0) #print(shiva) encoder_inputs, decoder_inputs, encoder_len, decoder_len, entity_inputs, entity_len = shiva K = min(FLAGS.K, np.amax(entity_len) - 6) #print("K", K) if FLAGS.batch_size == 1 and FLAGS.geneos: loss, outputs, att, t = model.step(sess, encoder_inputs, decoder_inputs, entity_inputs, encoder_len, decoder_len, entity_len, K, True) #outputs = [np.argmax(item) for item in outputs[0]] else: outputs = model.step_beam(sess, encoder_inputs, encoder_len, entity_inputs, entity_len, K, geneos=FLAGS.geneos) # If there is an EOS symbol in outputs, cut them at that point. #print(outputs) f2 = open(FLAGS.test_output + '.disambig', 'a') f2.write(' '.join( str(y) + ":" + str(x.mean()) for x, y in zip(t[0], entity_inputs[0][3:])) + '\n') f2.close() f2 = open(FLAGS.test_output + '.attention', 'a') f2.write(' '.join( str(y) + ":" + str(x) for x, y in zip(att[0], entity_inputs[0][3:])) + '\n') f2.close() outputs = list(outputs[0]) if data_util.ID_EOS in output: outputs = outputs[:outputs.index(data_util.ID_EOS)] #outputs = list(outputs) gen_sum = " ".join(data_util.sen_map2tok( outputs, sum_dict[1])) #sum_dict[1])) #lvt_str gen_sum = data_util.sen_postprocess(gen_sum) result.append(gen_sum) logging.info("Finish {} samples. :: {}".format(idx, gen_sum[:75])) with open(FLAGS.test_output, "w") as f: for item in result: print(item, file=f)
def main(_): #1.load test data vocab_cn, vocab_en = load_vocab_as_dict(FLAGS.vocabulary_cn_path, FLAGS.vocabulary_en_path) flag_data_en_test_processed_path = os.path.exists( FLAGS.data_en_test_processed_path) print("processed of english source file exists or not:", flag_data_en_test_processed_path) if not flag_data_en_test_processed_path: preprocess_english_file(FLAGS.data_en_test_path, FLAGS.data_en_test_processed_path) test = load_test_data(FLAGS.data_en_test_processed_path, vocab_en, FLAGS.decoder_sent_length) print("test[0:10]:", test[0:10]) test = pad_sequences(test, maxlen=FLAGS.sequence_length, value=0.) # padding to max length sequence_length_batch = [FLAGS.sequence_length] * FLAGS.batch_size #2.create session,model,feed data to make a prediction config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = seq2seq_attention_model( len(vocab_cn), FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length, len(vocab_en), FLAGS.embed_size, FLAGS.hidden_size, sequence_length_batch, FLAGS.is_training, decoder_sent_length=FLAGS.decoder_sent_length, l2_lambda=FLAGS.l2_lambda, use_beam_search=FLAGS.use_beam_search) saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir + "checkpoint"): print("Restoring Variables from Checkpoint") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print("Can't find the checkpoint.going to stop") return #feed data, to get logits number_of_test_data = len(test) print("number_of_test_data:", number_of_test_data) index = 0 predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8') decoder_input = np.array([[vocab_cn[_GO]] + [vocab_cn[_PAD]] * (FLAGS.decoder_sent_length - 1)] * FLAGS.batch_size) print("decoder_input:", decoder_input.shape) decoder_input = np.reshape(decoder_input, [-1, FLAGS.decoder_sent_length]) print("decoder_input:", decoder_input.shape) vocab_cn_index2word = dict([val, key] for key, val in vocab_cn.items()) for start, end in zip( range(0, number_of_test_data, FLAGS.batch_size), range(FLAGS.batch_size, number_of_test_data + 1, FLAGS.batch_size)): predictions = sess.run( model. predictions, # predictions:[batch_size,decoder_sent_length] feed_dict={ model.input_x: test[start:end], model.decoder_input: decoder_input, model.dropout_keep_prob: 1 }) # 'shape of logits:', ( 1, 1999) # 6. get lable using logtis output_sentence_list = get_label_using_logits( predictions, vocab_cn_index2word, vocab_cn) # 7. write question id and labels to file system. for sentence in output_sentence_list: predict_target_file_f.write(sentence + "\n") predict_target_file_f.close()
def decode(): # Load vocabularies. doc_dict = data_util.load_dict(FLAGS.data_dir + "/doc_dict.txt") sum_dict = data_util.load_dict(FLAGS.data_dir + "/sum_dict.txt") if doc_dict is None or sum_dict is None: logging.warning("Dict not found.") docs, data = data_util.load_test_data(FLAGS.test_file, doc_dict) with tf.Session() as sess: # Create model and load parameters. logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, True) class_model = create_class_model(sess, True) result = [] for idx, token_ids in enumerate(data): # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len =\ data_util.get_batch( {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS],[0,0])]}, _buckets, 0, FLAGS.batch_size, False, 0) if FLAGS.batch_size == 1 and FLAGS.geneos: loss, outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) outputs = [np.argmax(item) for item in outputs[0]] else: outputs = model.step_beam(sess, encoder_inputs, encoder_len, geneos=FLAGS.geneos) # If there is an EOS symbol in outputs, cut them at that point. if data_util.ID_EOS in outputs: outputs = outputs[:outputs.index(data_util.ID_EOS)] gen_sum = " ".join(data_util.sen_map2tok(outputs, sum_dict[1])) gen_sum = data_util.sen_postprocess(gen_sum) result.append(gen_sum) logging.info("Finish {} samples. :: {}".format(idx, gen_sum[:75])) #Get Encoder outputs batchidx = 0 final_inputs = [] final_outputs = [] final_len = [] while batchidx + FLAGS.batch_size <= len(data): encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len =\ data_util.get_batch( {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS],[0,0])]}, _buckets, 0, FLAGS.batch_size, False, 0) _, _, enc_outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) enc_outputs = data_util.add_pad_for_hidden(enc_outputs, _buckets[0][0]) final_inputs.append(enc_outputs) final_outputs.append(class_output) final_len.append(class_len) batchidx += FLAGS.batch_size final_inputs = np.asarray(final_inputs) final_inputs = np.concatenate(final_inputs, 0) final_outputs = np.asarray(final_outputs) final_outputs = np.concatenate(final_outputs, 0) final_len = np.asarray(final_len) final_len = np.concatenate(final_len, 0) print(final_inputs.shape, final_outputs.shape, final_len.shape) #Hidden classifier step_loss, output = class_model.step(sess, final_inputs[:], final_outputs[:], final_len[:], True) clipped = np.array(output > 0.5, dtype=np.int) #label = data_util.hidden_label_gen(FLAGS.test_file, "data/test.1981.msg.txt") #make confusion matrix to get precision #tn, fp, fn, tp = confusion_matrix(label.flatten(), clipped.flatten()).ravel() #print("Test precision : ", tp/(tp+fp)) with open(FLAGS.test_output, "w") as f: for idx, item in enumerate(result): print(item, file=f) for j in range(len(docs[idx])): if clipped[idx][j] == 1: print("Recommended identifier: " + docs[idx][j] + " ", file=f) print("\n", file=f)
def train(): logging.info("Preparing summarization data.") docid, sumid, doc_dict, sum_dict, hidden_label= \ data_util.load_data( FLAGS.data_dir + "/train.48615.diff", FLAGS.data_dir + "/train.48615.msg", FLAGS.data_dir + "/doc_dict.txt", FLAGS.data_dir + "/sum_dict.txt", FLAGS.doc_vocab_size, FLAGS.sum_vocab_size) val_docid, val_sumid, val_hidd_label = \ data_util.load_valid_data( FLAGS.data_dir + "/valid.3000.diff", FLAGS.data_dir + "/valid.3000.msg", doc_dict, sum_dict) with tf.Session() as sess: # Create model. logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) train_writer = tf.summary.FileWriter(FLAGS.tfboard, sess.graph) model = create_model(sess, False) # Read data into buckets and compute their sizes. logging.info("Create buckets.") dev_set = create_bucket(val_docid, val_sumid, val_hidd_label) train_set = create_bucket(docid, sumid, hidden_label) train_bucket_sizes = [len(train_set[b]) for b in range(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes)) ] for (s_size, t_size, _), nsample in zip(_buckets, train_bucket_sizes): logging.info("Train set bucket ({}, {}) has {} samples.".format( s_size, t_size, nsample)) # This is the training loop. step_time, loss = 0.0, 0.0 current_step = sess.run(model.global_step) while current_step < FLAGS.max_iter: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len = \ data_util.get_batch(train_set, _buckets, bucket_id, FLAGS.batch_size, False,0) step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, False, train_writer) step_time += (time.time() - start_time) / \ FLAGS.steps_per_validation loss += step_loss * FLAGS.batch_size / np.sum(decoder_len) \ / FLAGS.steps_per_validation current_step += 1 # Once in a while, we save checkpoint. if current_step % FLAGS.steps_per_checkpoint == 0: # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Once in a while, we print statistics and run evals. if current_step % FLAGS.steps_per_validation == 0: # Print statistics for the previous epoch. perplexity = np.exp(float(loss)) logging.info("global step %d step-time %.2f ppl %.2f" % (model.global_step.eval(), step_time, perplexity)) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in range(len(_buckets)): if len(dev_set[bucket_id]) == 0: logging.info(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len =\ data_util.get_batch(dev_set, _buckets, bucket_id, FLAGS.batch_size, False, 0) #cl_eval_loss, _ = class_model.step(sess, class_input, class_output, class_len, True) eval_loss, _, _ = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) eval_loss = eval_loss * FLAGS.batch_size \ / np.sum(decoder_len) eval_ppx = np.exp(float(eval_loss)) logging.info(" eval: bucket %d ppl %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush() #Get Encoder outputs batchidx = 0 final_inputs = [] final_outputs = [] final_len = [] while batchidx + FLAGS.batch_size <= train_bucket_sizes[0]: encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len = \ data_util.get_batch(train_set, _buckets, bucket_id, FLAGS.batch_size, True, batchidx) _, _, enc_outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) enc_outputs = data_util.add_pad_for_hidden(enc_outputs, _buckets[0][0]) final_inputs.append(enc_outputs) final_outputs.append(class_output) final_len.append(class_len) batchidx += FLAGS.batch_size final_inputs = np.asarray(final_inputs) final_inputs = np.concatenate(final_inputs, 0) final_outputs = np.asarray(final_outputs) final_outputs = np.concatenate(final_outputs, 0) final_len = np.asarray(final_len) final_len = np.concatenate(final_len, 0) print(final_inputs.shape, final_outputs.shape, final_len.shape) #Hidden classifier class_model = create_class_model(sess, False) classification_curr_step = sess.run(class_model.global_step) i = 0 while classification_curr_step <= FLAGS.class_max_iter: _, step_loss, output = class_model.step(sess, final_inputs[i:(i + 160)], final_outputs[i:(i + 160)], final_len[i:(i + 160)], False) classification_curr_step += 1 clipped = np.array(output > 0.5, dtype=np.int) #print("i", i) #print("clfcurrstep",classification_curr_step) #print("clipped", clipped.flatten()) #print("final_outputs", final_outputs[i:(i+160)].flatten()) tn, fp, fn, tp = confusion_matrix( final_outputs[i:(i + 160)].flatten(), clipped.flatten()).ravel() if (classification_curr_step % 40 == 0): print("Train Precision", tp / (tp + fp + 0.1)) print("Train Accuracy", (tp + tn) / (tp + fp + tn + fn)) if (i + 160 == len(final_len)): i = 0 else: i += 160 # Once in a while, we save checkpoint. if classification_curr_step % FLAGS.steps_per_checkpoint == 0: # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.class_train_dir, "class_model.ckpt") class_model.saver.save(sess, checkpoint_path, global_step=class_model.global_step) print("test_file", FLAGS.test_file) docs, data = data_util.load_test_data(FLAGS.test_file, doc_dict) #test # Create model and load parameters. ''' logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) result = [] for idx, token_ids in enumerate(data): # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len =\ data_util.get_batch( {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS],[0,0])]}, _buckets, 0, FLAGS.batch_size, False, 0) if FLAGS.batch_size == 1 and FLAGS.geneos: loss, outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, False) outputs = [np.argmax(item) for item in outputs[0]] else: outputs = model.step_beam( sess, encoder_inputs, encoder_len, geneos=FLAGS.geneos) # If there is an EOS symbol in outputs, cut them at that point. if data_util.ID_EOS in outputs: outputs = outputs[:outputs.index(data_util.ID_EOS)] gen_sum = " ".join(data_util.sen_map2tok(outputs, sum_dict[1])) gen_sum = data_util.sen_postprocess(gen_sum) result.append(gen_sum) logging.info("Finish {} samples. :: {}".format(idx, gen_sum[:75])) ''' #Get Encoder outputs docid, sumid, doc_dict, sum_dict, hidden_label= \ data_util.load_data( FLAGS.data_dir + "/test.1981.diff.txt", FLAGS.data_dir + "/test.1981.msg.txt", FLAGS.data_dir + "/doc_dict.txt", FLAGS.data_dir + "/sum_dict.txt", FLAGS.doc_vocab_size, FLAGS.sum_vocab_size) test_set = create_bucket(docid, sumid, hidden_label) test_bucket_sizes = [len(test_set[b]) for b in range(len(_buckets))] test_total_size = float(sum(test_bucket_sizes)) test_buckets_scale = [ sum(test_bucket_sizes[:i + 1]) / test_total_size for i in range(len(test_bucket_sizes)) ] batchidx = 0 final_inputs = [] final_outputs = [] final_len = [] #data.shape == (1, 158, 3) so I changed FLAGS.batch_size FLAGS.batch_size = 158 while batchidx + FLAGS.batch_size <= len(data): #bucket_id = (i for i in range(len(test_buckets_scale)) encoder_inputs, decoder_inputs, encoder_len, decoder_len, class_output, class_len = \ data_util.get_batch(test_set, _buckets, bucket_id, FLAGS.batch_size, True, batchidx) _, _, enc_outputs = model.step(sess, encoder_inputs, decoder_inputs, encoder_len, decoder_len, True) enc_outputs = data_util.add_pad_for_hidden(enc_outputs, _buckets[0][0]) final_inputs.append(enc_outputs) final_outputs.append(class_output) final_len.append(class_len) batchidx += 1 final_inputs = np.asarray(final_inputs) final_inputs = np.concatenate(final_inputs, 0) final_outputs = np.asarray(final_outputs) final_outputs = np.concatenate(final_outputs, 0) final_len = np.asarray(final_len) final_len = np.concatenate(final_len, 0) print(final_inputs.shape, final_outputs.shape, final_len.shape) #Hidden classifier step_loss, output = class_model.step(sess, final_inputs[:], final_outputs[:], final_len[:], True) clipped = np.array(output > 0.5, dtype=np.int) tn, fp, fn, tp = confusion_matrix(final_outputs[:].flatten(), clipped.flatten()).ravel() #with open('data/test.1981.msg.txt')as reader: # testmsg=[] # for i in range(1981): # testmsg.append(reader.readline()) #sums = list(map(lambda x: x.split(), testmsg)) #labels = data_util.hidden_label_gen(FLAGS.test_file, sums) #tn, fp, fn, tp = confusion_matrix(labels.flatten(), clipped.flatten()) print("Test Precision : ", tp / (tp + fp + 0.1)) print("Test Accuracy", (tp + tn) / (tp + fp + tn + fn)) with open(FLAGS.test_output, "w") as f: for idx in range(1981): for j in range(len(docs[idx])): if clipped[idx][j] == 1: print("Recommended identifier: " + docs[idx][j] + " ", file=f) print("\n", file=f)
def ATL_DGP(): num_inducing_points = 10 # Number of inducing points num_hidden_units_source = 2 # Number of hidden units per class for the source task num_hidden_units_target = 2 # Number of hidden units per class for the target task max_iteration_count = 1 # Maximum number of iterations allowed learning_rate_start = 0.001 # Starting learning rate inducing_kernel = RBFKernel(np.sqrt(num_hidden_units_source)) Xtrain_source, ytrain_source, Xtrain_target, ytrain_target = load_train_data( ) Xtrain_source = Xtrain_source[:1000, :] ytrain_source = ytrain_source[:1000] Xtrain_target = Xtrain_target[:1000, :] ytrain_target = ytrain_target[:1000] print 'Xtrain_source.shape:', Xtrain_source.shape print 'ytrain_source.shape:', ytrain_source.shape Nsrc = Xtrain_source.shape[0] print 'Xtrain_target.shape:', Xtrain_target.shape print 'ytrain_target.shape:', ytrain_target.shape # Concatenate source and target data sets Data = np.concatenate((Xtrain_source, Xtrain_target)) labels = np.concatenate((ytrain_source, ytrain_target)) print 'Data.shape:', Data.shape print 'labels.shape:', labels.shape # Construct the source-target info map. # 0: data point is on the source task # 1: data point is on the target task source_target_info = np.ones([Data.shape[0], 1]) source_target_info[0:Nsrc] = 0 # Construct kernel lists kernels_source = list() for rr in range(num_hidden_units_source): length_scale = Data.shape[1] kernel = RBFKernel(length_scale) kernels_source.append(kernel) kernels_target = list() for rr in range(num_hidden_units_target): length_scale = Data.shape[1] kernel = RBFKernel(length_scale) kernels_target.append(kernel) # Comment in the lines below to try out the symmetric classifier # common_dimensions=2 # model=DSGPSymmetricTransferClassifier(inducing_kernel,kernels_source, kernels_target, 2, \ # num_inducing=num_inducing_points, \ # max_iter=iter_cnt, \ # learning_rate_start=learning_rate_start) # Create the class object for the asymmetric classifier model = DSGPAsymmetricTransferClassifier(inducing_kernel, kernels_source, kernels_target, num_inducing_points, \ max_iteration_count, learning_rate_start=learning_rate_start) print 'model training ...' # Train the model model.train(Data, labels, source_target_info) print 'model train done.' Xtest_target, ytest_target = load_test_data() print 'Xtest_target.shape:', Xtest_target.shape print 'ytest_target.shape:', ytest_target.shape print 'predicting ...' # Predict on test data and report accuracy predictions = model.predict(Xtest_target) print "Accuracy: %.2f %% " % ( np.mean(predictions.predictions == ytest_target) * 100) calculate_metrics(ytest_target[0], predictions.predictions)