def main():
    holdout_test = True
    fine_tune_task = config["task"]
    logger.info("init model...")
    pretrained_name = get_pretrained_name(config["model_name"])
    if fine_tune_task == "paws":
        num_labels = 2
        train_loader, dev_loader = load_paws(config["PAWS-QQP"],
                                             balanced=config["balanced"])
    elif fine_tune_task == "standsent":
        num_labels = 5
        train_loader, dev_loader, test_loader = load_stanford(
            config["STANFORD_LOC"],
            phrase_len=config["phrase_len"],
            reserve_test=holdout_test)
    else:
        logger.error("unsupport fine tune task: {}".format(fine_tune_task))

    model, tokenizer = load_model(pretrained_name=pretrained_name,
                                  load_tuned=False,
                                  num_labels=num_labels)
    optimizer = AdamW(model.parameters(), lr=1e-5)

    logger.info("training...")
    model.train()
    init_prec, init_loss, best_prec, best_loss, best_model = train_iter(
        model,
        tokenizer,
        optimizer,
        train_loader,
        dev_loader,
        task=fine_tune_task,
        early_stopping=True,
        max_epochs=config["n_epochs"],
        print_every=config["print_every"],
        evaluate_every=config["evaluate_every"])
    logger.info("done training.")

    training_info_str = \
    """ training summary:
    training loss {} -> {}
    test precision {} -> {}
    """.format(init_loss, best_loss, init_prec, best_prec)
    logger.info(training_info_str)

    if holdout_test and (fine_tune_task == "standsent"):
        # evaluate on holdout test set (with phrases of various lengths)
        test_prec = evaluate(best_model, tokenizer, test_loader,
                             fine_tune_task)
        logger.info("precision on holdout test: {}".format(test_prec))
def generate_predictions(dump_out_loc):
    # pretrained_name = get_pretrained_name("bert")
    paws_location = ""
    models_dir = ""

    model_locs = {
        "bert": os.path.join(models_dir, "fine_tuned_bert_balanced_paws.pt"),
        "roberta": os.path.join(models_dir, "fine_tuned_roberta_balanced_paws.pt"),
        "distillbert": os.path.join(models_dir, "fine_tuned_distillbert_balanced_paws.pt"),
        "xlmroberta": os.path.join(models_dir, "fine_tuned_xlmroberta_balanced_paws.pt"),
        "xlnet": os.path.join(models_dir, "fine_tuned_xlnet_balanced_paws.pt"),
    }
    train_loader, dev_loader = load_paws(paws_location, balanced=True)

    for model_name, model_path in model_locs.items():
        pretrained_name = get_pretrained_name(model_name)
        model, tokenizer = load_model(model_loc=model_path, load_tuned=True, pretrained_name=pretrained_name)
        pickle_out_loc = os.path.join(dump_out_loc, model_name)
        generate_model_prediction(model, tokenizer, dev_loader, pickle_out_loc)
def main():
    random_seed = eval_config["rand_seed"]
    logger.info("current random seed: {}".format(random_seed))
    model_name = eval_config["model_name"]
    logger.info("preprocessing input...")
    if eval_config["workload"] == "bird":
        input_filename, score_dic, score_range, phrase_pos, phrase_text = bird_preprocess(
            eval_config["BIRD_LOC"],
            random_seed,
            eval_config["sample_size"],
            normalize=eval_config["normalize"],
            out_folder="./out")
        phrase_dic = score_dic
    elif eval_config["workload"] == "ppdb":
        input_filename, score_dic, score_range, phrase_pos, phrase_text, samples_dic = \
            ppdb_preprocess(eval_config["PPDB_LOC"], random_seed, eval_config["sample_size"],
                            negative_sampling_mode=eval_config["negative_sample_mode"],
                            overlap_threshold=eval_config["overlap_threshold"], out_folder="./out/")
        phrase_dic = score_dic
    elif eval_config["workload"] == "ppdb_exact":
        input_filename, exact_label_dic, phrase_pos, phrase_text = ppdb_exact_preprocess(
            eval_config["PPDB_LOC"],
            random_seed,
            eval_config["sample_size"],
            out_folder="./out")
        phrase_dic = exact_label_dic
    elif eval_config["workload"] == "stanford_sent":
        input_filename, phrase_pos, phrase_text, phrase_labels, phrase_scores = stanfordsent_preprocess(
            random_seed, eval_config["sample_size"])
        # TODO embed in sents not support
        phrase_dic = None
    elif eval_config["workload"] == "kintsch":
        input_filename, landmark_samples, inference_samples, phrase_pos, phrase_text = kintsch_preprocess(
            random_seed)
        # TODO embed in sents not support
        phrase_dic = None
    else:
        print("unsupport workload " + eval_config["workload"])
        exit(1)

    logger.info("current eval_configuration: {}".format(eval_config))

    if eval_config["embed_in_sent"]:
        logger.info("Embedding phrase in wiki text")
        if eval_config["workload"] == "ppdb_exact":
            logger.info("Before truncating: {}".format(len(phrase_text)))
            sentence_texts, phrase_text, exact_label_dic = embed_phrase_and_truncate(
                phrase_dic, phrase_text, eval_config["TEXT_CORPUS"])
            logger.info("After truncating: {}".format(len(sentence_texts)))
        else:
            sentence_texts = embed_phrase_transformer(
                phrase_dic, phrase_text, eval_config["TEXT_CORPUS"])

        sents_loc = "out/embedded_sents_" + random_seed + ".txt"
        sent_out = open(sents_loc, "w")
        for sentence in sentence_texts:
            sent_out.write(sentence)
        sent_out.close()

    logger.info("loading model...")
    pretrained_name = get_pretrained_name(eval_config["model_name"])
    model, tokenizer = load_model(model_loc=eval_config["trained_model_loc"],
                                  load_tuned=True,
                                  pretrained_name=pretrained_name)
    model = remove_clf_head(model)
    if eval_config["compare_model"]:
        base_model, base_tokenizer = init_base_model(model_name)

    logger.info("model being evaluated: {}".format(model.config))

    model_config = model.config
    n_layers, n_heads = model_config.num_hidden_layers, model_config.num_attention_heads

    logger.info("encoding input...")
    if eval_config["embed_in_sent"]:
        eval_text_dataset = EvalDataset(sentence_texts)
    else:
        eval_text_dataset = EvalDataset(phrase_text)

    # shuffling has to be turned off. need to keep the order to adjust phrase position etc.
    eval_text_loader = DataLoader(dataset=eval_text_dataset,
                                  shuffle=False,
                                  batch_size=eval_config["batch_size"])
    # input_id_list, attention_mask_list, phrase_length_list = encode_input(tokenizer, eval_text_loader, eval_config["model_name"])
    input_id_list, attention_mask_list, input_sequence_length_list = encode_input(
        tokenizer, eval_text_loader, eval_config["model_name"])

    logger.info("adjusting phrase position & genreating label dic")
    if (model_name in ['roberta']) and (eval_config["embed_in_sent"] is True):
        # tokenizer is space sensitive. 'access' has different id than ' access'
        add_space_before_phrase = True
    else:
        add_space_before_phrase = False

    phrase_pos = adjust_transformer_range(
        phrase_text,
        input_id_list,
        tokenizer,
        model_name,
        space_before_phrase=add_space_before_phrase)

    if eval_config["classification"] and (eval_config["workload"]
                                          in ["bird", "ppdb"]):
        # generate label dic for classification task
        if eval_config["negative_sample_mode"] is None:
            label_dic = nontrivial_score_to_label(score_dic, score_range)
        else:
            label_dic = trivial_score_to_label(score_dic)

    #----------------------------- evaluation -------------------------------#
    logger.info("evaluating model")
    model.eval()
    dump_filename = "{}-dump-{}.npy".format(model_name, random_seed)
    dump_path = os.path.join(eval_config["dump_path"], dump_filename)
    batch_size = eval_config["batch_size"]

    eval_data = TensorDataset(input_id_list, attention_mask_list)
    data_loader = DataLoader(eval_data, batch_size=batch_size, shuffle=False)

    eval_and_dump_embeddings(model, model_name, data_loader, dump_path)

    if eval_config["compare_model"]:
        base_model.eval()
        base_dump_filename = "{}-dump-{}-base.npy".format(
            model_name, random_seed)
        base_dump_path = os.path.join(eval_config["dump_path"],
                                      base_dump_filename)
        eval_and_dump_embeddings(base_model, model_name, data_loader,
                                 base_dump_path)

    logger.info("dumping segment size: {} samples per segment".format(
        batch_size * eval_config["dump_every"]))

    logger.info("working on downstream task")
    analyzer = TransformerAnalyzer(dump_path, n_layers, phrase_text,
                                   phrase_text, input_sequence_length_list,
                                   model_name,
                                   eval_config["include_input_emb"])
    if eval_config["compare_model"]:
        base_analyzer = TransformerAnalyzer(base_dump_path, n_layers,
                                            phrase_text, phrase_text,
                                            input_sequence_length_list,
                                            model_name,
                                            eval_config["include_input_emb"])
        embedding_sims = modelwise_compare([analyzer, base_analyzer],
                                           phrase_text, phrase_pos, model_name)
        analyze_embedding_dic(embedding_sims)
        analyze_max_changes(embedding_sims)

        analyzer.reset_handler()
        base_analyzer.reset_handler()

        max_change_pairs_by_layer = modelwise_phrase_pair_analysis(
            [analyzer, base_analyzer], score_dic, phrase_pos, phrase_text)

        # finished comparison. no need to run other tasks
        return

    if eval_config["workload"] == "kintsch":
        logger.info("writing out kintsch embeddings")
        out_embedding_dir = os.path.join(eval_config["EMBEDDING_OUT_LOC"],
                                         model_name)
        if os.path.exists(out_embedding_dir) is False:
            os.mkdir(out_embedding_dir)

        dump_read_handler = open(dump_path, "rb")
        generate_kintsch_embeddings_transformer(
            dump_read_handler, out_embedding_dir, phrase_pos,
            input_sequence_length_list, landmark_samples, inference_samples,
            n_layers, eval_config["include_input_emb"])
        dump_read_handler.close()

        logger.info("evaluating kintsch embeddings")
        evaluate_kintsch_embeddings(os.path.join(out_embedding_dir, "kintsch"),
                                    landmark_samples, inference_samples,
                                    n_layers, eval_config["include_input_emb"])
    elif eval_config["workload"] in ["bird", "ppdb"]:
        if eval_config["correlation"]:
            logger.info("analyzing correlation...")
            coe_by_layer, cos_sim_by_layer, target_score = analyze_correlation_by_layer(
                analyzer, score_dic, phrase_pos,
                eval_config["include_input_emb"])
            print_stats_by_layer(coe_by_layer,
                                 is_list=False,
                                 stat_type="cor",
                                 out_folder="./out")
            analyzer.reset_handler()

        if eval_config["classification"]:
            logger.info("generating classification workloads...")
            generate_classifier_workloads(analyzer, eval_config, random_seed,
                                          phrase_text, label_dic, phrase_pos,
                                          eval_config["include_input_emb"])
    elif eval_config["workload"] == "ppdb_exact":
        generate_classifier_workloads(analyzer, eval_config, random_seed,
                                      phrase_text, exact_label_dic, phrase_pos,
                                      eval_config["include_input_emb"])
    elif eval_config["workload"] == "stanford_sent":
        generate_stanford_classifier_workloads(
            analyzer, eval_config, random_seed, phrase_text, phrase_labels,
            phrase_pos, eval_config["include_input_emb"])
    else:
        logger.error("unsupport task {}".format(eval_config["workload"]))

    #----------------------------- training classifiers (if workload is classification) -------------------------------#
    if eval_config["classification"]:
        logger.info("training classifiers on embeddings...")
        n_layers = eval_config["n_layers"]

        working_dir = os.path.join(eval_config["EMBEDDING_OUT_LOC"],
                                   str(eval_config["rand_seed"]))

        verify_embeddings(n_layers, working_dir)
        label_handler = open(os.path.join(working_dir, "label.txt"), "r")
        configure_handler = open(os.path.join(working_dir, "config.txt"), "r")
        labels = []
        core_count = mp.cpu_count()
        pool = mp.Pool(core_count)
        logger.info("Using {} cores".format(core_count))
        logger.info("Current configurations:")
        text = configure_handler.readlines()
        logger.info(text)

        for line in label_handler:
            line = line.strip()
            labels.append(line)

        # logger.info("classification by layer...")
        # classify_by_layer(n_layers, labels, pool, working_dir)
        logger.info("classification by token...")
        classify_by_token(n_layers, labels, pool, working_dir)

        label_handler.close()
        configure_handler.close()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get image output')
    parser.add_argument('imageID', help='ID of image for input', type=int)
    parser.add_argument('-d', '--dataset', choices=['train', 'val', 'test'], default='test')
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true')
    parser.add_argument('-i', '--input', help='only get input image', action='store_true')
    parser.add_argument('-w', '--draw-weights', help='only draw weights, give the width of kernel', action='store_true')

    args = parser.parse_args()

    model = args.model
    batch_size = 1
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    chosen_set = args.dataset
    load_first_part = args.first_part
    imageID = args.imageID
    only_input = args.input
    only_weights = args.draw_weights
    if not only_weights:
        filename = str(imageID) + '_' + model + '_' + layer_name + '_output.png'
    else:
        filename = 'weight_' + model + '_' + layer_name + '_output.png'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output images will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :', 'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    if not only_weights:
        if only_input:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part,
                                                                                    substract_mean=False)
        else:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)

        print(len(X_train), 'train images')
        print(len(X_val), 'val images')
        print(len(X_test), 'test images')

        print('getting from' + chosen_set)
        if chosen_set == 'train':
            X_set = X_train
            y_set = y_train
        elif chosen_set == 'val':
            X_set = X_val
            y_set = y_val
        else:
            X_set = X_test
            y_set = y_test

        if only_input:
            image_data = X_set[imageID]
            if model == 'cifar':
                image_data = image_data.reshape((3, 32, 32))
                image_data = np.rollaxis(image_data, 0, 3) # 3 32 32 to 32 32 3
            else:
                image_data = image_data.reshape((28, 28))
            image_data *= 255
            image_data = image_data.astype('uint8')
            image = Image.fromarray(image_data)
            image.save(filename)
            print('image saved to :', filename)
            exit()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    if not only_weights:
        print("Getting middle output...")

        output = lasagne.layers.get_output(net[layer_name])
        get_output_image = theano.function([input_var], output.flatten(3))

        output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name]))
        foo, nKernel, h, w = output_shape
        print('layer ' + layer_name + ' shape :', output_shape)

        batch_output = get_output_image(np.array([X_set[imageID]]))
        images_output = batch_output[0]
        prediction = lasagne.layers.get_output(net_output)

        get_pred = theano.function([input_var], prediction)
        pred = get_pred(np.array([X_set[imageID]]))
    else:
        if model == 'cifar':
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 3
            flatten_w = net[layer_name].W.flatten(3)
            images_output = flatten_w.eval()
            images_output = np.rollaxis(images_output, 1, 0)  # nKernel 3 w*h to 3 nKernel w*h
            print('flatten weights shape :', images_output.shape)
        else:
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 1
            flatten_w = net[layer_name].W.flatten(2)
            images_output = flatten_w.eval()
            print('flatten weights shape :', images_output.shape)



    width = 1
    while width * width < nKernel:
        width += 1

    if width * width > nKernel:
        if images_output.ndim == 2:
            images_output = np.concatenate((images_output, np.zeros((width * width - nKernel, w * h))), axis=0)
        elif images_output.ndim == 3:
            images_output = np.concatenate((images_output, np.zeros((3, width * width - nKernel, w * h))), axis=1)
        else:
            assert False

    image = Image.fromarray(tile_raster_images(
        X=images_output,  # chose batch 0
        img_shape=(h, w), tile_shape=(width, width),
        tile_spacing=(1, 1)))
    image.save(filename)
    print('image saved to :', filename)
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument('-n', '--num-epochs', type=int, default=20)
    parser.add_argument('-f', '--model-file', help="model file")
    parser.add_argument('--no-separate',
                        help='split the data',
                        action='store_true')
    parser.add_argument('--second-part',
                        help='take second part of data instead of the first',
                        action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-l', '--learning-rate', type=float, default=0.01)
    parser.add_argument('-t', '--test-only', action='store_true')
    parser.add_argument(
        '-T',
        '--train-from-layer',
        help='only train on this layer and those layers after it, \
    don\'t update weights of layers before this layer')
    parser.add_argument(
        '-p',
        '--prefix',
        help='prefix to add at the beginning of model save file')

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    num_epochs = args.num_epochs
    learning_rate = args.learning_rate
    save_file_name = model + '_model'
    test_only = args.test_only
    load_first_part = not args.second_part
    train_from_layer = args.train_from_layer
    prefix = args.prefix

    if test_only and not model_file:
        print('you need to specify a model file to test')
        exit()

    if separate:
        if load_first_part:
            save_file_name = 'firsthalf_' + save_file_name
        else:
            save_file_name = 'secondhalf_' + save_file_name
        nOutput = 5
    else:
        nOutput = 10

    if train_from_layer:
        save_file_name = 'from_' + train_from_layer + save_file_name

    if prefix:
        save_file_name = prefix + save_file_name
    else:
        save_file_name = str(random.randint(10000,
                                            99999)) + '_' + save_file_name

    logfile = save_file_name + '_log.txt'
    log_print = functools.partial(log_and_print, logfile=logfile)
    log_print('--Parameter--')
    log_print('  model={}'.format(model))
    log_print('  batch_size={}'.format(batch_size))
    log_print('  num_epochs={}'.format(num_epochs))
    log_print('  learning_rate={}'.format(learning_rate))
    log_print('  separate data :{}'.format(separate))
    if separate:
        s = '    take first or second part of data :' + (
            'first' if load_first_part else 'second')
        log_print(s)
    log_print('  model_file :{}'.format(model_file))
    log_print('  nOutput = {}'.format(nOutput))
    log_print('  model will be saved to : {}'.format(save_file_name + '*.npz'))
    log_print('  log will be saved to : {}'.format(logfile))
    log_print('  test only :{}'.format(test_only))
    log_print('  only train from this layer : {}'.format(train_from_layer))
    log_print('  prefix to save file : {}'.format(prefix))

    log_print('')

    log_print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(
        model, separate, load_first_part)

    log_print('{} train images'.format(len(X_train)))
    log_print('{} val images'.format(len(X_val)))
    log_print('{} test images'.format(len(X_test)))

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    log_print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput,
                                          input_var)

    prediction = lasagne.layers.get_output(net_output)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    if train_from_layer:
        layers_to_train = lasagne.layers.get_all_layers(
            net_output, treat_as_input=[net[train_from_layer]])
        params = get_all_params_from_layers(layers_to_train, trainable=True)
    else:
        params = lasagne.layers.get_all_params(net_output, trainable=True)

    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=learning_rate,
                                                momentum=0.9)

    test_prediction = lasagne.layers.get_output(net_output, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()

    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    if not test_only:
        log_print("Starting training...")

        for epoch in range(num_epochs):

            train_err = 0
            train_batches = 0
            start_time = time.time()
            print("Training stage:")
            for batch in load_data.iterate_minibatches(X_train,
                                                       y_train,
                                                       batch_size,
                                                       shuffle=True):
                time_batch = time.time()
                inputs, targets = batch
                this_train_err = train_fn(inputs, targets)
                train_err += this_train_err
                train_batches += 1
                print('train batch', train_batches, 'err+=', this_train_err,
                      '{:.2f}'.format(time.time() - time_batch), 'seconds')

            val_err = 0
            val_acc = 0
            val_batches = 0
            print("Validation stage ..")
            for batch in load_data.iterate_minibatches(X_val,
                                                       y_val,
                                                       batch_size,
                                                       shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            log1 = "Epoch {} of {} took {:.3f}m".format(
                epoch + 1, num_epochs, (time.time() - start_time) / 60.)
            log2 = "  training loss:\t\t{:.6f}".format(train_err /
                                                       train_batches)
            log3 = "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
            log4 = "  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100)
            log_print(log1)
            log_print(log2)
            log_print(log3)
            log_print(log4)

            # Optionally, you could now dump the network weights to a file like this:

            model_file = save_file_name + str(epoch) + '.npz'
            log_print('model saved to ' + model_file)
            model_io.save_model(model_file, net_output)

    log_print('testing network ...')
    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in load_data.iterate_minibatches(X_test,
                                               y_test,
                                               batch_size,
                                               shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    log_print("Final results:")
    log_print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    log_print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches *
                                                    100))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get output')
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-n', '--data-num', type=int)

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    load_first_part = args.first_part
    data_num = args.data_num

    filename = model + '_' + layer_name + '_output.save'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :', 'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)
    if data_num:
        X_train = X_train[:data_num]
        y_train = y_train[:data_num]
        X_val = X_val[:data_num]
        y_val = y_val[:data_num]
        X_test = X_test[:data_num]
        y_test = y_test[:data_num]

    print(len(X_train), 'train images')
    print(len(X_val), 'val images')
    print(len(X_test), 'test images')

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    # middle_output = theano.function([input_var], net[layer_name].output)
    print("Getting middle output...")

    output = lasagne.layers.get_output(net[layer_name])
    get_output = theano.function([input_var], output.flatten(2))

    output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name]))
    print('layer ' + layer_name + ' shape :', output_shape)

    all_train_output = []
    all_train_y = []
    all_test_output = []
    all_test_y = []
    print('getting from train')
    for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_train_output.extend(batch_output.tolist())
        all_train_y.extend(targets.tolist())
    print()
    print('getting from test')
    for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_test_output.extend(batch_output.tolist())
        all_test_y.extend(targets.tolist())
    print()

    print("train output shape : ", np.array(all_train_output).shape)
    print("train y shape : ", np.array(all_train_y).shape)
    print("test output shape : ", np.array(all_test_output).shape)
    print("test y shape : ", np.array(all_test_y).shape)

    with open(filename, 'wb') as f:
        pickle.dump([all_train_output, all_train_y, all_test_output, all_test_y], f, protocol=pickle.HIGHEST_PROTOCOL)
    print('... saved to ', filename)
예제 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument('-n', '--num-epochs', type=int, default=20)
    parser.add_argument('-f', '--model-file', help="model file")
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--second-part', help='take second part of data instead of the first', action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-l', '--learning-rate', type=float, default=0.01)
    parser.add_argument('-t', '--test-only', action='store_true')
    parser.add_argument('-T', '--train-from-layer', help='only train on this layer and those layers after it, \
    don\'t update weights of layers before this layer')
    parser.add_argument('-p', '--prefix', help='prefix to add at the beginning of model save file')

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    num_epochs = args.num_epochs
    learning_rate = args.learning_rate
    save_file_name = model + '_model'
    test_only = args.test_only
    load_first_part = not args.second_part
    train_from_layer = args.train_from_layer
    prefix = args.prefix

    if test_only and not model_file:
        print('you need to specify a model file to test')
        exit()

    if separate:
        if load_first_part:
            save_file_name = 'firsthalf_' + save_file_name
        else:
            save_file_name = 'secondhalf_' + save_file_name
        nOutput = 5
    else:
        nOutput = 10

    if train_from_layer:
        save_file_name = 'from_' + train_from_layer + save_file_name

    if prefix:
        save_file_name = prefix + save_file_name
    else:
        save_file_name = str(random.randint(10000, 99999)) + '_' + save_file_name

    logfile = save_file_name + '_log.txt'
    log_print = functools.partial(log_and_print, logfile=logfile)
    log_print('--Parameter--')
    log_print('  model={}'.format(model))
    log_print('  batch_size={}'.format(batch_size))
    log_print('  num_epochs={}'.format(num_epochs))
    log_print('  learning_rate={}'.format(learning_rate))
    log_print('  separate data :{}'.format(separate))
    if separate:
        s = '    take first or second part of data :' + ('first' if load_first_part else 'second')
        log_print(s)
    log_print('  model_file :{}'.format(model_file))
    log_print('  nOutput = {}'.format(nOutput))
    log_print('  model will be saved to : {}'.format(save_file_name + '*.npz'))
    log_print('  log will be saved to : {}'.format(logfile))
    log_print('  test only :{}'.format(test_only))
    log_print('  only train from this layer : {}'.format(train_from_layer))
    log_print('  prefix to save file : {}'.format(prefix))

    log_print('')

    log_print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)

    log_print('{} train images'.format(len(X_train)))
    log_print('{} val images'.format(len(X_val)))
    log_print('{} test images'.format(len(X_test)))

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    log_print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    prediction = lasagne.layers.get_output(net_output)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    if train_from_layer:
        layers_to_train = lasagne.layers.get_all_layers(net_output, treat_as_input=[net[train_from_layer]])
        params = get_all_params_from_layers(layers_to_train, trainable=True)
    else:
        params = lasagne.layers.get_all_params(net_output, trainable=True)

    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9)

    test_prediction = lasagne.layers.get_output(net_output, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()

    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    if not test_only:
        log_print("Starting training...")

        for epoch in range(num_epochs):

            train_err = 0
            train_batches = 0
            start_time = time.time()
            print("Training stage:")
            for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
                time_batch = time.time()
                inputs, targets = batch
                this_train_err = train_fn(inputs, targets)
                train_err += this_train_err
                train_batches += 1
                print('train batch', train_batches, 'err+=', this_train_err,
                      '{:.2f}'.format(time.time() - time_batch), 'seconds')

            val_err = 0
            val_acc = 0
            val_batches = 0
            print("Validation stage ..")
            for batch in load_data.iterate_minibatches(X_val, y_val, batch_size, shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            log1 = "Epoch {} of {} took {:.3f}m".format(epoch + 1, num_epochs, (time.time() - start_time) / 60.)
            log2 = "  training loss:\t\t{:.6f}".format(train_err / train_batches)
            log3 = "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
            log4 = "  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)
            log_print(log1)
            log_print(log2)
            log_print(log3)
            log_print(log4)

            # Optionally, you could now dump the network weights to a file like this:

            model_file = save_file_name + str(epoch) + '.npz'
            log_print('model saved to ' + model_file)
            model_io.save_model(model_file, net_output)

    log_print('testing network ...')
    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    log_print("Final results:")
    log_print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    log_print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get image output')
    parser.add_argument('imageID', help='ID of image for input', type=int)
    parser.add_argument('-d',
                        '--dataset',
                        choices=['train', 'val', 'test'],
                        default='test')
    parser.add_argument('--no-separate',
                        help='split the data',
                        action='store_true')
    parser.add_argument('--first-part',
                        help='take first part of data instead of the second',
                        action='store_true')
    parser.add_argument('-i',
                        '--input',
                        help='only get input image',
                        action='store_true')
    parser.add_argument('-w',
                        '--draw-weights',
                        help='only draw weights, give the width of kernel',
                        action='store_true')

    args = parser.parse_args()

    model = args.model
    batch_size = 1
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    chosen_set = args.dataset
    load_first_part = args.first_part
    imageID = args.imageID
    only_input = args.input
    only_weights = args.draw_weights
    if not only_weights:
        filename = str(
            imageID) + '_' + model + '_' + layer_name + '_output.png'
    else:
        filename = 'weight_' + model + '_' + layer_name + '_output.png'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output images will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :',
              'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    if not only_weights:
        if only_input:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(
                model, separate, load_first_part, substract_mean=False)
        else:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(
                model, separate, load_first_part)

        print(len(X_train), 'train images')
        print(len(X_val), 'val images')
        print(len(X_test), 'test images')

        print('getting from' + chosen_set)
        if chosen_set == 'train':
            X_set = X_train
            y_set = y_train
        elif chosen_set == 'val':
            X_set = X_val
            y_set = y_val
        else:
            X_set = X_test
            y_set = y_test

        if only_input:
            image_data = X_set[imageID]
            if model == 'cifar':
                image_data = image_data.reshape((3, 32, 32))
                image_data = np.rollaxis(image_data, 0,
                                         3)  # 3 32 32 to 32 32 3
            else:
                image_data = image_data.reshape((28, 28))
            image_data *= 255
            image_data = image_data.astype('uint8')
            image = Image.fromarray(image_data)
            image.save(filename)
            print('image saved to :', filename)
            exit()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput,
                                          input_var)

    if not only_weights:
        print("Getting middle output...")

        output = lasagne.layers.get_output(net[layer_name])
        get_output_image = theano.function([input_var], output.flatten(3))

        output_shape = np.array(
            lasagne.layers.get_output_shape(net[layer_name]))
        foo, nKernel, h, w = output_shape
        print('layer ' + layer_name + ' shape :', output_shape)

        batch_output = get_output_image(np.array([X_set[imageID]]))
        images_output = batch_output[0]
        prediction = lasagne.layers.get_output(net_output)

        get_pred = theano.function([input_var], prediction)
        pred = get_pred(np.array([X_set[imageID]]))
    else:
        if model == 'cifar':
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 3
            flatten_w = net[layer_name].W.flatten(3)
            images_output = flatten_w.eval()
            images_output = np.rollaxis(images_output, 1,
                                        0)  # nKernel 3 w*h to 3 nKernel w*h
            print('flatten weights shape :', images_output.shape)
        else:
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 1
            flatten_w = net[layer_name].W.flatten(2)
            images_output = flatten_w.eval()
            print('flatten weights shape :', images_output.shape)

    width = 1
    while width * width < nKernel:
        width += 1

    if width * width > nKernel:
        if images_output.ndim == 2:
            images_output = np.concatenate(
                (images_output, np.zeros((width * width - nKernel, w * h))),
                axis=0)
        elif images_output.ndim == 3:
            images_output = np.concatenate(
                (images_output, np.zeros((3, width * width - nKernel, w * h))),
                axis=1)
        else:
            assert False

    image = Image.fromarray(
        tile_raster_images(
            X=images_output,  # chose batch 0
            img_shape=(h, w),
            tile_shape=(width, width),
            tile_spacing=(1, 1)))
    image.save(filename)
    print('image saved to :', filename)
예제 #9
0
def _test():
    from model_io import load_model, save_pickle_model, save_tf_learn_model
    import model_io

    def get_here_path(relative_path):
        cur_dir_path = os.path.dirname(__file__)
        return os.path.join(cur_dir_path, relative_path)

    save_dir = get_here_path("saved")

    pkl_bin_type = "pickle"
    x, y = np.random.randn(20, 3), np.random.randint(2, size=20)

    # test trans
    trans = get_fitted_transformer("min_max", {"feature_range": (-1, 1)}, x)
    fitted_x = exec_transformer(trans, x)
    print(fitted_x)
    save_pickle_model(trans, "min_max_1.pkl", save_dir)

    trans = load_model(get_here_path("saved/min_max_1.pkl"),
                       model_io.bin_type.PICKLE)
    print(exec_transformer(trans, x))

    # test sklearn model
    lr = get_fitted_sklearn_model("lr", {"penalty": 'l1'}, x, y)
    save_pickle_model(lr, "lr_1.pkl", save_dir)

    lr = load_model(get_here_path("saved/lr_1.pkl"), model_io.bin_type.PICKLE)
    print(lr)
    print(exec_sklearn_model(lr, x))

    rf = get_fitted_sklearn_model("rf", {}, x, y)
    print(rf)
    print(exec_sklearn_model(rf, x))

    # test xgb model
    para_dict = {}
    para_dict["params"] = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective': 'binary:logistic'
    }
    para_dict["num_boost_round"] = 2

    xgb = get_fitted_xgb_model("xgb_1", para_dict, x, y)
    save_pickle_model(xgb, "xgb_1.pkl", save_dir)

    xgb = load_model(get_here_path("saved/xgb_1.pkl"),
                     model_io.bin_type.PICKLE)
    print(str(xgb))
    print(exec_xgb_model(xgb, x))

    # test tf dnn classifier
    para_dict = {}
    para_dict["hidden_units"] = [10, 20, 10]
    para_dict["n_classes"] = 2
    dnn, dnn2 = get_fitted_tfdnn_model("dnn_1", para_dict,
                                       x.astype(np.float32),
                                       y.astype(np.float32))
    save_pickle_model(dnn2, "dnn_1.pkl", save_dir)
    save_tf_learn_model(
        dnn,
        "dnn_1",
        save_dir,
        learn.infer_real_valued_columns_from_input(x.astype(np.float32)),
    )
    print(list(dnn.predict(x.astype(np.float32), as_iterable=False)))

    print("begin to load dnn...")
    m = model_io.load_model("saved/dnn_1.pkl", model_io.bin_type.TF_LEARN)
    print(list(m.predict(x.astype(np.float32))))
예제 #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get output')
    parser.add_argument('--no-separate',
                        help='split the data',
                        action='store_true')
    parser.add_argument('--first-part',
                        help='take first part of data instead of the second',
                        action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-n', '--data-num', type=int)

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    load_first_part = args.first_part
    data_num = args.data_num

    filename = model + '_' + layer_name + '_output.save'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :',
              'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(
        model, separate, load_first_part)
    if data_num:
        X_train = X_train[:data_num]
        y_train = y_train[:data_num]
        X_val = X_val[:data_num]
        y_val = y_val[:data_num]
        X_test = X_test[:data_num]
        y_test = y_test[:data_num]

    print(len(X_train), 'train images')
    print(len(X_val), 'val images')
    print(len(X_test), 'test images')

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput,
                                          input_var)

    # middle_output = theano.function([input_var], net[layer_name].output)
    print("Getting middle output...")

    output = lasagne.layers.get_output(net[layer_name])
    get_output = theano.function([input_var], output.flatten(2))

    output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name]))
    print('layer ' + layer_name + ' shape :', output_shape)

    all_train_output = []
    all_train_y = []
    all_test_output = []
    all_test_y = []
    print('getting from train')
    for batch in load_data.iterate_minibatches(X_train,
                                               y_train,
                                               batch_size,
                                               shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_train_output.extend(batch_output.tolist())
        all_train_y.extend(targets.tolist())
    print()
    print('getting from test')
    for batch in load_data.iterate_minibatches(X_test,
                                               y_test,
                                               batch_size,
                                               shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_test_output.extend(batch_output.tolist())
        all_test_y.extend(targets.tolist())
    print()

    print("train output shape : ", np.array(all_train_output).shape)
    print("train y shape : ", np.array(all_train_y).shape)
    print("test output shape : ", np.array(all_test_output).shape)
    print("test y shape : ", np.array(all_test_y).shape)

    with open(filename, 'wb') as f:
        pickle.dump(
            [all_train_output, all_train_y, all_test_output, all_test_y],
            f,
            protocol=pickle.HIGHEST_PROTOCOL)
    print('... saved to ', filename)