def main(): holdout_test = True fine_tune_task = config["task"] logger.info("init model...") pretrained_name = get_pretrained_name(config["model_name"]) if fine_tune_task == "paws": num_labels = 2 train_loader, dev_loader = load_paws(config["PAWS-QQP"], balanced=config["balanced"]) elif fine_tune_task == "standsent": num_labels = 5 train_loader, dev_loader, test_loader = load_stanford( config["STANFORD_LOC"], phrase_len=config["phrase_len"], reserve_test=holdout_test) else: logger.error("unsupport fine tune task: {}".format(fine_tune_task)) model, tokenizer = load_model(pretrained_name=pretrained_name, load_tuned=False, num_labels=num_labels) optimizer = AdamW(model.parameters(), lr=1e-5) logger.info("training...") model.train() init_prec, init_loss, best_prec, best_loss, best_model = train_iter( model, tokenizer, optimizer, train_loader, dev_loader, task=fine_tune_task, early_stopping=True, max_epochs=config["n_epochs"], print_every=config["print_every"], evaluate_every=config["evaluate_every"]) logger.info("done training.") training_info_str = \ """ training summary: training loss {} -> {} test precision {} -> {} """.format(init_loss, best_loss, init_prec, best_prec) logger.info(training_info_str) if holdout_test and (fine_tune_task == "standsent"): # evaluate on holdout test set (with phrases of various lengths) test_prec = evaluate(best_model, tokenizer, test_loader, fine_tune_task) logger.info("precision on holdout test: {}".format(test_prec))
def generate_predictions(dump_out_loc): # pretrained_name = get_pretrained_name("bert") paws_location = "" models_dir = "" model_locs = { "bert": os.path.join(models_dir, "fine_tuned_bert_balanced_paws.pt"), "roberta": os.path.join(models_dir, "fine_tuned_roberta_balanced_paws.pt"), "distillbert": os.path.join(models_dir, "fine_tuned_distillbert_balanced_paws.pt"), "xlmroberta": os.path.join(models_dir, "fine_tuned_xlmroberta_balanced_paws.pt"), "xlnet": os.path.join(models_dir, "fine_tuned_xlnet_balanced_paws.pt"), } train_loader, dev_loader = load_paws(paws_location, balanced=True) for model_name, model_path in model_locs.items(): pretrained_name = get_pretrained_name(model_name) model, tokenizer = load_model(model_loc=model_path, load_tuned=True, pretrained_name=pretrained_name) pickle_out_loc = os.path.join(dump_out_loc, model_name) generate_model_prediction(model, tokenizer, dev_loader, pickle_out_loc)
def main(): random_seed = eval_config["rand_seed"] logger.info("current random seed: {}".format(random_seed)) model_name = eval_config["model_name"] logger.info("preprocessing input...") if eval_config["workload"] == "bird": input_filename, score_dic, score_range, phrase_pos, phrase_text = bird_preprocess( eval_config["BIRD_LOC"], random_seed, eval_config["sample_size"], normalize=eval_config["normalize"], out_folder="./out") phrase_dic = score_dic elif eval_config["workload"] == "ppdb": input_filename, score_dic, score_range, phrase_pos, phrase_text, samples_dic = \ ppdb_preprocess(eval_config["PPDB_LOC"], random_seed, eval_config["sample_size"], negative_sampling_mode=eval_config["negative_sample_mode"], overlap_threshold=eval_config["overlap_threshold"], out_folder="./out/") phrase_dic = score_dic elif eval_config["workload"] == "ppdb_exact": input_filename, exact_label_dic, phrase_pos, phrase_text = ppdb_exact_preprocess( eval_config["PPDB_LOC"], random_seed, eval_config["sample_size"], out_folder="./out") phrase_dic = exact_label_dic elif eval_config["workload"] == "stanford_sent": input_filename, phrase_pos, phrase_text, phrase_labels, phrase_scores = stanfordsent_preprocess( random_seed, eval_config["sample_size"]) # TODO embed in sents not support phrase_dic = None elif eval_config["workload"] == "kintsch": input_filename, landmark_samples, inference_samples, phrase_pos, phrase_text = kintsch_preprocess( random_seed) # TODO embed in sents not support phrase_dic = None else: print("unsupport workload " + eval_config["workload"]) exit(1) logger.info("current eval_configuration: {}".format(eval_config)) if eval_config["embed_in_sent"]: logger.info("Embedding phrase in wiki text") if eval_config["workload"] == "ppdb_exact": logger.info("Before truncating: {}".format(len(phrase_text))) sentence_texts, phrase_text, exact_label_dic = embed_phrase_and_truncate( phrase_dic, phrase_text, eval_config["TEXT_CORPUS"]) logger.info("After truncating: {}".format(len(sentence_texts))) else: sentence_texts = embed_phrase_transformer( phrase_dic, phrase_text, eval_config["TEXT_CORPUS"]) sents_loc = "out/embedded_sents_" + random_seed + ".txt" sent_out = open(sents_loc, "w") for sentence in sentence_texts: sent_out.write(sentence) sent_out.close() logger.info("loading model...") pretrained_name = get_pretrained_name(eval_config["model_name"]) model, tokenizer = load_model(model_loc=eval_config["trained_model_loc"], load_tuned=True, pretrained_name=pretrained_name) model = remove_clf_head(model) if eval_config["compare_model"]: base_model, base_tokenizer = init_base_model(model_name) logger.info("model being evaluated: {}".format(model.config)) model_config = model.config n_layers, n_heads = model_config.num_hidden_layers, model_config.num_attention_heads logger.info("encoding input...") if eval_config["embed_in_sent"]: eval_text_dataset = EvalDataset(sentence_texts) else: eval_text_dataset = EvalDataset(phrase_text) # shuffling has to be turned off. need to keep the order to adjust phrase position etc. eval_text_loader = DataLoader(dataset=eval_text_dataset, shuffle=False, batch_size=eval_config["batch_size"]) # input_id_list, attention_mask_list, phrase_length_list = encode_input(tokenizer, eval_text_loader, eval_config["model_name"]) input_id_list, attention_mask_list, input_sequence_length_list = encode_input( tokenizer, eval_text_loader, eval_config["model_name"]) logger.info("adjusting phrase position & genreating label dic") if (model_name in ['roberta']) and (eval_config["embed_in_sent"] is True): # tokenizer is space sensitive. 'access' has different id than ' access' add_space_before_phrase = True else: add_space_before_phrase = False phrase_pos = adjust_transformer_range( phrase_text, input_id_list, tokenizer, model_name, space_before_phrase=add_space_before_phrase) if eval_config["classification"] and (eval_config["workload"] in ["bird", "ppdb"]): # generate label dic for classification task if eval_config["negative_sample_mode"] is None: label_dic = nontrivial_score_to_label(score_dic, score_range) else: label_dic = trivial_score_to_label(score_dic) #----------------------------- evaluation -------------------------------# logger.info("evaluating model") model.eval() dump_filename = "{}-dump-{}.npy".format(model_name, random_seed) dump_path = os.path.join(eval_config["dump_path"], dump_filename) batch_size = eval_config["batch_size"] eval_data = TensorDataset(input_id_list, attention_mask_list) data_loader = DataLoader(eval_data, batch_size=batch_size, shuffle=False) eval_and_dump_embeddings(model, model_name, data_loader, dump_path) if eval_config["compare_model"]: base_model.eval() base_dump_filename = "{}-dump-{}-base.npy".format( model_name, random_seed) base_dump_path = os.path.join(eval_config["dump_path"], base_dump_filename) eval_and_dump_embeddings(base_model, model_name, data_loader, base_dump_path) logger.info("dumping segment size: {} samples per segment".format( batch_size * eval_config["dump_every"])) logger.info("working on downstream task") analyzer = TransformerAnalyzer(dump_path, n_layers, phrase_text, phrase_text, input_sequence_length_list, model_name, eval_config["include_input_emb"]) if eval_config["compare_model"]: base_analyzer = TransformerAnalyzer(base_dump_path, n_layers, phrase_text, phrase_text, input_sequence_length_list, model_name, eval_config["include_input_emb"]) embedding_sims = modelwise_compare([analyzer, base_analyzer], phrase_text, phrase_pos, model_name) analyze_embedding_dic(embedding_sims) analyze_max_changes(embedding_sims) analyzer.reset_handler() base_analyzer.reset_handler() max_change_pairs_by_layer = modelwise_phrase_pair_analysis( [analyzer, base_analyzer], score_dic, phrase_pos, phrase_text) # finished comparison. no need to run other tasks return if eval_config["workload"] == "kintsch": logger.info("writing out kintsch embeddings") out_embedding_dir = os.path.join(eval_config["EMBEDDING_OUT_LOC"], model_name) if os.path.exists(out_embedding_dir) is False: os.mkdir(out_embedding_dir) dump_read_handler = open(dump_path, "rb") generate_kintsch_embeddings_transformer( dump_read_handler, out_embedding_dir, phrase_pos, input_sequence_length_list, landmark_samples, inference_samples, n_layers, eval_config["include_input_emb"]) dump_read_handler.close() logger.info("evaluating kintsch embeddings") evaluate_kintsch_embeddings(os.path.join(out_embedding_dir, "kintsch"), landmark_samples, inference_samples, n_layers, eval_config["include_input_emb"]) elif eval_config["workload"] in ["bird", "ppdb"]: if eval_config["correlation"]: logger.info("analyzing correlation...") coe_by_layer, cos_sim_by_layer, target_score = analyze_correlation_by_layer( analyzer, score_dic, phrase_pos, eval_config["include_input_emb"]) print_stats_by_layer(coe_by_layer, is_list=False, stat_type="cor", out_folder="./out") analyzer.reset_handler() if eval_config["classification"]: logger.info("generating classification workloads...") generate_classifier_workloads(analyzer, eval_config, random_seed, phrase_text, label_dic, phrase_pos, eval_config["include_input_emb"]) elif eval_config["workload"] == "ppdb_exact": generate_classifier_workloads(analyzer, eval_config, random_seed, phrase_text, exact_label_dic, phrase_pos, eval_config["include_input_emb"]) elif eval_config["workload"] == "stanford_sent": generate_stanford_classifier_workloads( analyzer, eval_config, random_seed, phrase_text, phrase_labels, phrase_pos, eval_config["include_input_emb"]) else: logger.error("unsupport task {}".format(eval_config["workload"])) #----------------------------- training classifiers (if workload is classification) -------------------------------# if eval_config["classification"]: logger.info("training classifiers on embeddings...") n_layers = eval_config["n_layers"] working_dir = os.path.join(eval_config["EMBEDDING_OUT_LOC"], str(eval_config["rand_seed"])) verify_embeddings(n_layers, working_dir) label_handler = open(os.path.join(working_dir, "label.txt"), "r") configure_handler = open(os.path.join(working_dir, "config.txt"), "r") labels = [] core_count = mp.cpu_count() pool = mp.Pool(core_count) logger.info("Using {} cores".format(core_count)) logger.info("Current configurations:") text = configure_handler.readlines() logger.info(text) for line in label_handler: line = line.strip() labels.append(line) # logger.info("classification by layer...") # classify_by_layer(n_layers, labels, pool, working_dir) logger.info("classification by token...") classify_by_token(n_layers, labels, pool, working_dir) label_handler.close() configure_handler.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get image output') parser.add_argument('imageID', help='ID of image for input', type=int) parser.add_argument('-d', '--dataset', choices=['train', 'val', 'test'], default='test') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-i', '--input', help='only get input image', action='store_true') parser.add_argument('-w', '--draw-weights', help='only draw weights, give the width of kernel', action='store_true') args = parser.parse_args() model = args.model batch_size = 1 separate = not args.no_separate model_file = args.model_file layer_name = args.layer chosen_set = args.dataset load_first_part = args.first_part imageID = args.imageID only_input = args.input only_weights = args.draw_weights if not only_weights: filename = str(imageID) + '_' + model + '_' + layer_name + '_output.png' else: filename = 'weight_' + model + '_' + layer_name + '_output.png' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output images will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") if not only_weights: if only_input: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part, substract_mean=False) else: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') print('getting from' + chosen_set) if chosen_set == 'train': X_set = X_train y_set = y_train elif chosen_set == 'val': X_set = X_val y_set = y_val else: X_set = X_test y_set = y_test if only_input: image_data = X_set[imageID] if model == 'cifar': image_data = image_data.reshape((3, 32, 32)) image_data = np.rollaxis(image_data, 0, 3) # 3 32 32 to 32 32 3 else: image_data = image_data.reshape((28, 28)) image_data *= 255 image_data = image_data.astype('uint8') image = Image.fromarray(image_data) image.save(filename) print('image saved to :', filename) exit() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) if not only_weights: print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output_image = theano.function([input_var], output.flatten(3)) output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name])) foo, nKernel, h, w = output_shape print('layer ' + layer_name + ' shape :', output_shape) batch_output = get_output_image(np.array([X_set[imageID]])) images_output = batch_output[0] prediction = lasagne.layers.get_output(net_output) get_pred = theano.function([input_var], prediction) pred = get_pred(np.array([X_set[imageID]])) else: if model == 'cifar': weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 3 flatten_w = net[layer_name].W.flatten(3) images_output = flatten_w.eval() images_output = np.rollaxis(images_output, 1, 0) # nKernel 3 w*h to 3 nKernel w*h print('flatten weights shape :', images_output.shape) else: weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 1 flatten_w = net[layer_name].W.flatten(2) images_output = flatten_w.eval() print('flatten weights shape :', images_output.shape) width = 1 while width * width < nKernel: width += 1 if width * width > nKernel: if images_output.ndim == 2: images_output = np.concatenate((images_output, np.zeros((width * width - nKernel, w * h))), axis=0) elif images_output.ndim == 3: images_output = np.concatenate((images_output, np.zeros((3, width * width - nKernel, w * h))), axis=1) else: assert False image = Image.fromarray(tile_raster_images( X=images_output, # chose batch 0 img_shape=(h, w), tile_shape=(width, width), tile_spacing=(1, 1))) image.save(filename) print('image saved to :', filename)
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument('-n', '--num-epochs', type=int, default=20) parser.add_argument('-f', '--model-file', help="model file") parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--second-part', help='take second part of data instead of the first', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-l', '--learning-rate', type=float, default=0.01) parser.add_argument('-t', '--test-only', action='store_true') parser.add_argument( '-T', '--train-from-layer', help='only train on this layer and those layers after it, \ don\'t update weights of layers before this layer') parser.add_argument( '-p', '--prefix', help='prefix to add at the beginning of model save file') args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file num_epochs = args.num_epochs learning_rate = args.learning_rate save_file_name = model + '_model' test_only = args.test_only load_first_part = not args.second_part train_from_layer = args.train_from_layer prefix = args.prefix if test_only and not model_file: print('you need to specify a model file to test') exit() if separate: if load_first_part: save_file_name = 'firsthalf_' + save_file_name else: save_file_name = 'secondhalf_' + save_file_name nOutput = 5 else: nOutput = 10 if train_from_layer: save_file_name = 'from_' + train_from_layer + save_file_name if prefix: save_file_name = prefix + save_file_name else: save_file_name = str(random.randint(10000, 99999)) + '_' + save_file_name logfile = save_file_name + '_log.txt' log_print = functools.partial(log_and_print, logfile=logfile) log_print('--Parameter--') log_print(' model={}'.format(model)) log_print(' batch_size={}'.format(batch_size)) log_print(' num_epochs={}'.format(num_epochs)) log_print(' learning_rate={}'.format(learning_rate)) log_print(' separate data :{}'.format(separate)) if separate: s = ' take first or second part of data :' + ( 'first' if load_first_part else 'second') log_print(s) log_print(' model_file :{}'.format(model_file)) log_print(' nOutput = {}'.format(nOutput)) log_print(' model will be saved to : {}'.format(save_file_name + '*.npz')) log_print(' log will be saved to : {}'.format(logfile)) log_print(' test only :{}'.format(test_only)) log_print(' only train from this layer : {}'.format(train_from_layer)) log_print(' prefix to save file : {}'.format(prefix)) log_print('') log_print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset( model, separate, load_first_part) log_print('{} train images'.format(len(X_train))) log_print('{} val images'.format(len(X_val))) log_print('{} test images'.format(len(X_test))) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) log_print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) prediction = lasagne.layers.get_output(net_output) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() if train_from_layer: layers_to_train = lasagne.layers.get_all_layers( net_output, treat_as_input=[net[train_from_layer]]) params = get_all_params_from_layers(layers_to_train, trainable=True) else: params = lasagne.layers.get_all_params(net_output, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9) test_prediction = lasagne.layers.get_output(net_output, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) if not test_only: log_print("Starting training...") for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() print("Training stage:") for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=True): time_batch = time.time() inputs, targets = batch this_train_err = train_fn(inputs, targets) train_err += this_train_err train_batches += 1 print('train batch', train_batches, 'err+=', this_train_err, '{:.2f}'.format(time.time() - time_batch), 'seconds') val_err = 0 val_acc = 0 val_batches = 0 print("Validation stage ..") for batch in load_data.iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: log1 = "Epoch {} of {} took {:.3f}m".format( epoch + 1, num_epochs, (time.time() - start_time) / 60.) log2 = " training loss:\t\t{:.6f}".format(train_err / train_batches) log3 = " validation loss:\t\t{:.6f}".format(val_err / val_batches) log4 = " validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100) log_print(log1) log_print(log2) log_print(log3) log_print(log4) # Optionally, you could now dump the network weights to a file like this: model_file = save_file_name + str(epoch) + '.npz' log_print('model saved to ' + model_file) model_io.save_model(model_file, net_output) log_print('testing network ...') # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 log_print("Final results:") log_print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) log_print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get output') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-n', '--data-num', type=int) args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file layer_name = args.layer load_first_part = args.first_part data_num = args.data_num filename = model + '_' + layer_name + '_output.save' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) if data_num: X_train = X_train[:data_num] y_train = y_train[:data_num] X_val = X_val[:data_num] y_val = y_val[:data_num] X_test = X_test[:data_num] y_test = y_test[:data_num] print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) # middle_output = theano.function([input_var], net[layer_name].output) print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output = theano.function([input_var], output.flatten(2)) output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name])) print('layer ' + layer_name + ' shape :', output_shape) all_train_output = [] all_train_y = [] all_test_output = [] all_test_y = [] print('getting from train') for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_train_output.extend(batch_output.tolist()) all_train_y.extend(targets.tolist()) print() print('getting from test') for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_test_output.extend(batch_output.tolist()) all_test_y.extend(targets.tolist()) print() print("train output shape : ", np.array(all_train_output).shape) print("train y shape : ", np.array(all_train_y).shape) print("test output shape : ", np.array(all_test_output).shape) print("test y shape : ", np.array(all_test_y).shape) with open(filename, 'wb') as f: pickle.dump([all_train_output, all_train_y, all_test_output, all_test_y], f, protocol=pickle.HIGHEST_PROTOCOL) print('... saved to ', filename)
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument('-n', '--num-epochs', type=int, default=20) parser.add_argument('-f', '--model-file', help="model file") parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--second-part', help='take second part of data instead of the first', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-l', '--learning-rate', type=float, default=0.01) parser.add_argument('-t', '--test-only', action='store_true') parser.add_argument('-T', '--train-from-layer', help='only train on this layer and those layers after it, \ don\'t update weights of layers before this layer') parser.add_argument('-p', '--prefix', help='prefix to add at the beginning of model save file') args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file num_epochs = args.num_epochs learning_rate = args.learning_rate save_file_name = model + '_model' test_only = args.test_only load_first_part = not args.second_part train_from_layer = args.train_from_layer prefix = args.prefix if test_only and not model_file: print('you need to specify a model file to test') exit() if separate: if load_first_part: save_file_name = 'firsthalf_' + save_file_name else: save_file_name = 'secondhalf_' + save_file_name nOutput = 5 else: nOutput = 10 if train_from_layer: save_file_name = 'from_' + train_from_layer + save_file_name if prefix: save_file_name = prefix + save_file_name else: save_file_name = str(random.randint(10000, 99999)) + '_' + save_file_name logfile = save_file_name + '_log.txt' log_print = functools.partial(log_and_print, logfile=logfile) log_print('--Parameter--') log_print(' model={}'.format(model)) log_print(' batch_size={}'.format(batch_size)) log_print(' num_epochs={}'.format(num_epochs)) log_print(' learning_rate={}'.format(learning_rate)) log_print(' separate data :{}'.format(separate)) if separate: s = ' take first or second part of data :' + ('first' if load_first_part else 'second') log_print(s) log_print(' model_file :{}'.format(model_file)) log_print(' nOutput = {}'.format(nOutput)) log_print(' model will be saved to : {}'.format(save_file_name + '*.npz')) log_print(' log will be saved to : {}'.format(logfile)) log_print(' test only :{}'.format(test_only)) log_print(' only train from this layer : {}'.format(train_from_layer)) log_print(' prefix to save file : {}'.format(prefix)) log_print('') log_print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) log_print('{} train images'.format(len(X_train))) log_print('{} val images'.format(len(X_val))) log_print('{} test images'.format(len(X_test))) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) log_print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) prediction = lasagne.layers.get_output(net_output) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() if train_from_layer: layers_to_train = lasagne.layers.get_all_layers(net_output, treat_as_input=[net[train_from_layer]]) params = get_all_params_from_layers(layers_to_train, trainable=True) else: params = lasagne.layers.get_all_params(net_output, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9) test_prediction = lasagne.layers.get_output(net_output, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) if not test_only: log_print("Starting training...") for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() print("Training stage:") for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=True): time_batch = time.time() inputs, targets = batch this_train_err = train_fn(inputs, targets) train_err += this_train_err train_batches += 1 print('train batch', train_batches, 'err+=', this_train_err, '{:.2f}'.format(time.time() - time_batch), 'seconds') val_err = 0 val_acc = 0 val_batches = 0 print("Validation stage ..") for batch in load_data.iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: log1 = "Epoch {} of {} took {:.3f}m".format(epoch + 1, num_epochs, (time.time() - start_time) / 60.) log2 = " training loss:\t\t{:.6f}".format(train_err / train_batches) log3 = " validation loss:\t\t{:.6f}".format(val_err / val_batches) log4 = " validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100) log_print(log1) log_print(log2) log_print(log3) log_print(log4) # Optionally, you could now dump the network weights to a file like this: model_file = save_file_name + str(epoch) + '.npz' log_print('model saved to ' + model_file) model_io.save_model(model_file, net_output) log_print('testing network ...') # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 log_print("Final results:") log_print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) log_print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100))
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get image output') parser.add_argument('imageID', help='ID of image for input', type=int) parser.add_argument('-d', '--dataset', choices=['train', 'val', 'test'], default='test') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-i', '--input', help='only get input image', action='store_true') parser.add_argument('-w', '--draw-weights', help='only draw weights, give the width of kernel', action='store_true') args = parser.parse_args() model = args.model batch_size = 1 separate = not args.no_separate model_file = args.model_file layer_name = args.layer chosen_set = args.dataset load_first_part = args.first_part imageID = args.imageID only_input = args.input only_weights = args.draw_weights if not only_weights: filename = str( imageID) + '_' + model + '_' + layer_name + '_output.png' else: filename = 'weight_' + model + '_' + layer_name + '_output.png' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output images will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") if not only_weights: if only_input: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset( model, separate, load_first_part, substract_mean=False) else: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset( model, separate, load_first_part) print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') print('getting from' + chosen_set) if chosen_set == 'train': X_set = X_train y_set = y_train elif chosen_set == 'val': X_set = X_val y_set = y_val else: X_set = X_test y_set = y_test if only_input: image_data = X_set[imageID] if model == 'cifar': image_data = image_data.reshape((3, 32, 32)) image_data = np.rollaxis(image_data, 0, 3) # 3 32 32 to 32 32 3 else: image_data = image_data.reshape((28, 28)) image_data *= 255 image_data = image_data.astype('uint8') image = Image.fromarray(image_data) image.save(filename) print('image saved to :', filename) exit() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) if not only_weights: print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output_image = theano.function([input_var], output.flatten(3)) output_shape = np.array( lasagne.layers.get_output_shape(net[layer_name])) foo, nKernel, h, w = output_shape print('layer ' + layer_name + ' shape :', output_shape) batch_output = get_output_image(np.array([X_set[imageID]])) images_output = batch_output[0] prediction = lasagne.layers.get_output(net_output) get_pred = theano.function([input_var], prediction) pred = get_pred(np.array([X_set[imageID]])) else: if model == 'cifar': weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 3 flatten_w = net[layer_name].W.flatten(3) images_output = flatten_w.eval() images_output = np.rollaxis(images_output, 1, 0) # nKernel 3 w*h to 3 nKernel w*h print('flatten weights shape :', images_output.shape) else: weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 1 flatten_w = net[layer_name].W.flatten(2) images_output = flatten_w.eval() print('flatten weights shape :', images_output.shape) width = 1 while width * width < nKernel: width += 1 if width * width > nKernel: if images_output.ndim == 2: images_output = np.concatenate( (images_output, np.zeros((width * width - nKernel, w * h))), axis=0) elif images_output.ndim == 3: images_output = np.concatenate( (images_output, np.zeros((3, width * width - nKernel, w * h))), axis=1) else: assert False image = Image.fromarray( tile_raster_images( X=images_output, # chose batch 0 img_shape=(h, w), tile_shape=(width, width), tile_spacing=(1, 1))) image.save(filename) print('image saved to :', filename)
def _test(): from model_io import load_model, save_pickle_model, save_tf_learn_model import model_io def get_here_path(relative_path): cur_dir_path = os.path.dirname(__file__) return os.path.join(cur_dir_path, relative_path) save_dir = get_here_path("saved") pkl_bin_type = "pickle" x, y = np.random.randn(20, 3), np.random.randint(2, size=20) # test trans trans = get_fitted_transformer("min_max", {"feature_range": (-1, 1)}, x) fitted_x = exec_transformer(trans, x) print(fitted_x) save_pickle_model(trans, "min_max_1.pkl", save_dir) trans = load_model(get_here_path("saved/min_max_1.pkl"), model_io.bin_type.PICKLE) print(exec_transformer(trans, x)) # test sklearn model lr = get_fitted_sklearn_model("lr", {"penalty": 'l1'}, x, y) save_pickle_model(lr, "lr_1.pkl", save_dir) lr = load_model(get_here_path("saved/lr_1.pkl"), model_io.bin_type.PICKLE) print(lr) print(exec_sklearn_model(lr, x)) rf = get_fitted_sklearn_model("rf", {}, x, y) print(rf) print(exec_sklearn_model(rf, x)) # test xgb model para_dict = {} para_dict["params"] = { 'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic' } para_dict["num_boost_round"] = 2 xgb = get_fitted_xgb_model("xgb_1", para_dict, x, y) save_pickle_model(xgb, "xgb_1.pkl", save_dir) xgb = load_model(get_here_path("saved/xgb_1.pkl"), model_io.bin_type.PICKLE) print(str(xgb)) print(exec_xgb_model(xgb, x)) # test tf dnn classifier para_dict = {} para_dict["hidden_units"] = [10, 20, 10] para_dict["n_classes"] = 2 dnn, dnn2 = get_fitted_tfdnn_model("dnn_1", para_dict, x.astype(np.float32), y.astype(np.float32)) save_pickle_model(dnn2, "dnn_1.pkl", save_dir) save_tf_learn_model( dnn, "dnn_1", save_dir, learn.infer_real_valued_columns_from_input(x.astype(np.float32)), ) print(list(dnn.predict(x.astype(np.float32), as_iterable=False))) print("begin to load dnn...") m = model_io.load_model("saved/dnn_1.pkl", model_io.bin_type.TF_LEARN) print(list(m.predict(x.astype(np.float32))))
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get output') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-n', '--data-num', type=int) args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file layer_name = args.layer load_first_part = args.first_part data_num = args.data_num filename = model + '_' + layer_name + '_output.save' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset( model, separate, load_first_part) if data_num: X_train = X_train[:data_num] y_train = y_train[:data_num] X_val = X_val[:data_num] y_val = y_val[:data_num] X_test = X_test[:data_num] y_test = y_test[:data_num] print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) # middle_output = theano.function([input_var], net[layer_name].output) print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output = theano.function([input_var], output.flatten(2)) output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name])) print('layer ' + layer_name + ' shape :', output_shape) all_train_output = [] all_train_y = [] all_test_output = [] all_test_y = [] print('getting from train') for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_train_output.extend(batch_output.tolist()) all_train_y.extend(targets.tolist()) print() print('getting from test') for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_test_output.extend(batch_output.tolist()) all_test_y.extend(targets.tolist()) print() print("train output shape : ", np.array(all_train_output).shape) print("train y shape : ", np.array(all_train_y).shape) print("test output shape : ", np.array(all_test_output).shape) print("test y shape : ", np.array(all_test_y).shape) with open(filename, 'wb') as f: pickle.dump( [all_train_output, all_train_y, all_test_output, all_test_y], f, protocol=pickle.HIGHEST_PROTOCOL) print('... saved to ', filename)