Beispiel #1
0
 def _build_loader(self):
     logging.info("Loading data...")
     self.train_data, self.test_data = get_dataset(self.args.path,
                                                   self.args.scale,
                                                   train=True)
     if self.args.evaluate:
         self.eval_data = get_dataset(self.args.path,
                                      self.args.scale,
                                      train=False)
def visual_dataset():

    data_args = args[DATASET]['data_args']

    config = {
        'CIFAR10' : {'class_name': ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck'],
                    'transform': transforms.Compose(
                            [transforms.ToTensor(),
                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
                    },
        'PLANKTON10': {'class_name': ['Trichodesmium puff', 'Protist', 'Acantharia protist', 'Appendicularian s-shape', \
                                        'Hydromedusae solmaris', 'Trichodesmium bowtie', 'Chaetognath sagitta', 'Copepod cyclopoid oithona eggs', \
                                        'Detritus', 'Echinoderm larva seastar brachiolaria'],
                        'transform': transforms.Compose(
                            [transforms.ToTensor(),
                            transforms.Normalize(mean=(0.95,), std=(0.2,))])
                        },
        'AILARON': {'class_name': ['fish_egg', 'copepod', 'diatom_chain', 'other', 'faecal_pellets', 'bubble'],
                    'transform': transforms.Compose(
                            [transforms.ToTensor(),
                            transforms.Normalize(mean=(0.95,), std=(0.2,))
                            ])
                    },
        'PASTORE': {'class_name': ['volvox', 'spirostomum_ambigum', 'blepharisma_americanum', 'actinosphaerium_nucleofilum', 'euplotes_eurystomus', 'stentor_coeruleus', \
                    'dileptus', 'didinum_nasutum', 'paramecium_bursaria', 'arcella_vulgaris'],
                    'transform': transforms.Compose(
                            [transforms.ToTensor(),
                            transforms.Normalize(mean=(0.95,), std=(0.2,))])
                    }
        }
    class_names = config[DATASET]['class_name']
    transform = config[DATASET]['transform']

    X_tr, Y_tr, _, _, _, _ = get_dataset(DATASET, data_args)

    data_handler = DataHandler(X_tr, Y_tr, transform)
    data_loader = DataLoader(data_handler, batch_size=100, num_workers=4)

    indices_plotted = [0 for _ in range(len(class_names))]
    print(len(indices_plotted))
    images, indices, _ = next(iter(data_loader))
    example_rows = 2
    example_cols = int(len(class_names) // 2)
    # Show a grid of example images
    fig, axes = plt.subplots(example_rows, example_cols,
                             figsize=(11, 5))  #  sharex=True, sharey=True)
    axes = axes.flatten()
    for image, index in zip(images, indices):
        if (indices_plotted[index] == 1):
            continue
        else:
            ax = axes[index]
            ax.imshow(denormalise(image))
            ax.set_axis_off()
            ax.set_title(class_names[index], fontsize=7)
            indices_plotted[index] = 1

    fig.subplots_adjust(wspace=0.06, hspace=1)
    #fig.subtitle(DATASET, fontsize=20)
    plt.savefig(f'./dataset_plots/{DATASET}.png')
Beispiel #3
0
def main():
    batch_size_for_capture = 300
    x_train, y_train = get_dataset(batch_size_for_capture)
    model = create_model()
    model = train_model(model, x_train, y_train)
    save_model(model)
    return model
Beispiel #4
0
def main():
    from get_dataset import get_dataset
    X, X_test, Y, Y_test = get_dataset()
    #print('Step 1 done')
    #print(X.shape)
    #print(X_test.shape)
    #print(Y.shape)
    #print(Y_test.shape)
    X=X.reshape(X.shape[0],X.shape[1],X.shape[2],1)
    X_test=X_test.reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2],1)
    #X=X.reshape(1,X.shape[0],X.shape[1],X.shape[2])
    #Y=Y.reshape(Y.shape[0],1,1,1)
    #Y=Y.reshape(Y.shape[0],1)
    
    #print(X.shape)
    #print(Y.shape)
    
    from makensave_model import get_model
    model = get_model(len(Y[0]))
    
    print('Step 2 done')
    model = train_my_model(model, X, X_test, Y, Y_test)
    
    
    from makensave_model import save_model
    save_model(model)
    
    print('Step 3 done')
    return model
Beispiel #5
0
def main():

    net = Net()
    trainset, testset = get_dataset()
    optimizer = optim.Adam(net.parameters(), lr=1e-3)

    # training
    print('Starting training...\n')
    EPOCHS = 3
    for epoch in range(EPOCHS):
        for data in tqdm(trainset):  # data is a batch of feature sets & labels
            X, y = data
            net.zero_grad()  # reset gradients
            output = net(X.view(-1, 28 * 28))  # -1: (batch-)size not known

            loss = F.nll_loss(output, y)
            loss.backward()  # magical
            optimizer.step()  # adjust weights

    # evaluation
    correct, total = 0, 0
    with torch.no_grad():  # don't train on out_of_sample data
        for data in testset:
            X, y = data
            output = net(X.view(-1, 28 * 28))
            for idx, i in enumerate(output):
                if np.argmax(i) == y[idx]:
                    correct += 1
                total += 1

    print('Accuracy:', round(correct / total, 3))  # ~0.97, great accuracy!
    torch.save(
        net, os.path.join(PATH_TO_SAVE_NETWORK,
                          'handwritten_digit_classifier'))
def visual_augmentation():

    learning_args = args[DATASET]['learning_args']
    data_args = args[DATASET]['data_args']
    transform = learning_args['transform']
    X_tr, Y_tr, _, _, _, _ = get_dataset(DATASET, data_args)

    transform_no_aug = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.95, ), std=(0.2, ))
    ])
    data_handler = DataHandler(X_tr, Y_tr, transform)
    data_handler_no_aug = DataHandler(X_tr, Y_tr, transform_no_aug)
    data_loader = DataLoader(data_handler, batch_size=8, num_workers=4)
    data_loader_no_aug = DataLoader(data_handler_no_aug,
                                    batch_size=8,
                                    num_workers=4)

    # Get a batch of training data
    inputs, _, _ = next(iter(data_loader))
    inputs_no_aug, _, _ = next(iter(data_loader_no_aug))

    # Make a grid from batch
    out = torchvision.utils.make_grid(inputs)
    out_no_aug = torchvision.utils.make_grid(inputs_no_aug)

    imshow(out_no_aug, out, img_name='comparison')
def run_final_setup(lr_list,
                    decay_list,
                    save_list,
                    num_steps=20000,
                    steps_per_eval=50):
    """Gets the results for the final setup.

    Args:
        lr_list: List of learning rate values.
        decay_list: List of decay values.
        save_list: List of save filenames.
    """
    # Pretrain
    dataset = get_dataset('mnist')
    data_list = [
        dataset.next_batch(100) for step in six.moves.xrange(num_steps)
    ]
    data_list_eval = data_list[:600]
    dataset_test = get_dataset('mnist', test=True)
    data_list_test = [
        dataset_test.next_batch(100) for step in six.moves.xrange(100)
    ]

    for ii, (lr, decay, save) in enumerate(
            zip(lr_list, decay_list, save_list)):
        print('-' * 80)
        log.info('Running lr = {:.3e} decay = {:.3e}'.format(lr, decay))
        with tf.Graph().as_default():
            results = train_mnist_mlp_with_test(
                init_lr=lr,
                num_steps=num_steps,
                decay_const=decay,
                steps_per_eval=steps_per_eval,
                inverse_decay=True,
                pretrain_ckpt=PRETRAIN_FILE,
                print_step=False,
                data_list=data_list,
                data_list_eval=data_list_eval,
                data_list_test=data_list_test)
        log.info(
            'Final Train Cost {:.3e} Train Acc {:.3f} Test Cost {:.3e} Test Acc {:.3f}'.
            format(results.train_xent[-1], results.train_acc[-1],
                   results.test_xent[-1], results.test_acc[-1]))
        print(results.train_xent)
        print('Train Cost', results.train_xent[-1])
        save_results(save, results)
Beispiel #8
0
def main():
    from get_dataset import get_dataset
    X, X_test, Y, Y_test = get_dataset()
    from get_model import get_model, save_model
    model = get_model(len(Y[0]))
    import numpy
    model = train_model(model, X, X_test, Y, Y_test)
    save_model(model)
    return model
Beispiel #9
0
    def _build_loader(self):
        print("Loading data...")

        TEXT = Field(batch_first=True, fix_length=self.args.max_words)
        LABEL = LabelField(sequential=False, batch_first=True, use_vocab=False)
        field = [('text', TEXT), ('label', LABEL)]

        train = get_dataset("train", field)
        test = get_dataset("test", field)
        evl = get_dataset("eval", field)
        TEXT.build_vocab(train, test, evl, min_freq=3)

        self.vocab = TEXT
        self.train_iter, self.test_iter, self.evl_iter = BucketIterator.splits(
            (train, test, evl),
            batch_sizes=(self.args.batch_size, self.args.batch_size,
                         self.args.batch_size),
            device=self.device,
            shuffle=True,
            sort=False,
            repeat=False,
        )
Beispiel #10
0
def main():

    # setup net
    net = Net()
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    loss_function = nn.MSELoss()

    # prepare data
    X, y = get_dataset()
    # separate dataset into training and testing
    VAL_PCT = 0.1
    val_size = int(len(X) * VAL_PCT)
    train_X = X[:-val_size]
    train_y = y[:-val_size]
    test_X = X[-val_size:]
    test_y = y[-val_size:]

    # train
    print('Starting traing...\n')
    BATCH_SIZE, EPOCHS = 100, 10  # if memory issues -> decrease batch size
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            batch_X = train_X[i:i + BATCH_SIZE].view(-1, 1, 50, 50)
            batch_y = train_y[i:i + BATCH_SIZE]

            net.zero_grad()
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()
        print()

    # evaluate
    correct, total = 0, 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i])
            net_out = net(test_X[i].view(-1, 1, 50, 50))[0]
            predicted_class = torch.argmax(net_out)
            if predicted_class == real_class:
                correct += 1
            total += 1

    print('Accuracy:', round(correct / total, 3))
    torch.save(net, './cat_dog_classifier')
Beispiel #11
0
def main(_):

    # get experiment folder and create dir for plots
    exp_folder = os.path.join(FLAGS.exp_root, FLAGS.exp_name,
                              'exp{}'.format(FLAGS.exp_nr))
    test_folder = os.path.join(exp_folder, 'test')
    tf.io.gfile.mkdir(test_folder)

    # get experiment FLAGS
    TRAINING_FLAGS = yaml.safe_load(
        tf.io.gfile.GFile(os.path.join(exp_folder, 'FLAGS.yml'), 'r'))

    # get dataset
    test_set, test_labels = get_dataset(
        '.',
        TRAINING_FLAGS['num_feat'],
        TRAINING_FLAGS['slice_length'],
        type='test',
        return_sequences=TRAINING_FLAGS['return_sequences'])

    sequence_length = test_set.shape[1]
    feature_dim = test_set.shape[2]
    if TRAINING_FLAGS['model'] == 'tcn':

        model = get_tcn(
            sequence_length,
            feature_dim,
            nb_filters=TRAINING_FLAGS['num_filters'],
            nb_stacks=TRAINING_FLAGS['num_stacks'],
            use_skip_connections=TRAINING_FLAGS['use_skip_connections'],
            use_batch_norm=TRAINING_FLAGS['bn'],
            return_sequences=False,  #TRAINING_FLAGS['return_sequences'],
            dilation_stages=TRAINING_FLAGS['dilation_stages'])
    elif TRAINING_FLAGS['model'] == 'cnn':
        model = get_cnn((sequence_length, feature_dim))

    else:
        assert False, 'Unknown model!'

    model(tf.zeros((1, sequence_length, feature_dim)))
    model.load_weights(os.path.join(exp_folder, 'model.h5'))
    model.compile()
    model.summary()

    #if tcn, we have to cut off the model above the strided slice since it is not supported in NNTool, we perform the last Dense layer as a matrix product
    if TRAINING_FLAGS['model'] == 'tcn':
        model = tf.keras.Model(
            inputs=[model.input],
            outputs=[model.get_layer(name='reshape_1').output])

    model.summary()
    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    # Convert the model to the TensorFlow Lite format with quantization
    tflite_model_name = 'quant_model'
    quantize = True
    if (quantize):

        def representative_dataset():
            for i in range(100):
                yield [test_set[i].reshape(1, sequence_length, feature_dim)]

        # Set the optimization flag.
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        # Enforce full-int8 quantization
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.uint8  # or tf.uint8
        converter.inference_output_type = tf.uint8  # or tf.uint8
        # Provide a representative dataset to ensure we quantize correctly.
        converter.representative_dataset = representative_dataset
    tflite_model = converter.convert()
    model_path = os.path.join(exp_folder, tflite_model_name + '.tflite')
    open(model_path, 'wb').write(tflite_model)
Beispiel #12
0
class CrossEntropy(Loss):
    def compute_loss(self, inputs):
        y_true, y_pred = inputs
        loss = K.categorical_crossentropy(y_true, K.softmax(y_pred))
        return K.mean(loss)

if __name__ == '__main__':
    num_classes = 4
    vocab_size = 33106
    max_length = 181
    hidden_dim = 64
    train_batch_size = 128
    val_batch_size = 500

    (X_train, Y_train), (X_val, Y_val) = get_dataset()
    dictionary = corpora.Dictionary(pd.concat([X_train, X_val]))

    X_train = [str2id(x, dictionary.token2id) for x in X_train]
    X_val = [str2id(x, dictionary.token2id) for x in X_val]

    X_train = sequence_padding(X_train, max_length=max_length)
    Y_train = np.array(Y_train, dtype='int32')
    X_val = sequence_padding(X_val, max_length=max_length)
    Y_val = np.array(Y_val, dtype='int32')

    train_dataset = Dataset(X_train, Y_train, label_transform=ToOneHot(num_classes))
    val_dataset = Dataset(X_val, Y_val, label_transform=ToOneHot(num_classes))
    train_generator = generator(train_dataset, batch_size=train_batch_size, shuffle=True)
    val_generator = generator(val_dataset, batch_size=val_batch_size, shuffle=False)
Beispiel #13
0
def main():
    x, x_test, y, y_test = get_dataset()
    model = get_model()
    model = train_model(model, x, x_test, y, y_test)
    save_model(model)
    return model
Beispiel #14
0
def main(online_boost):
    print("------------------------")
    print("Running Online Boosting = {:s}".format(str(online_boost)))
    print("------------------------")
    from textmenu import textmenu
    # ------------- Dataset -------------
    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx is None:
        return
    dataset = datasets[indx]
    x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
    model_name_suffix = dataset

    # default params:
    lr_gamma = 0.3
    max_epoch = 200
    batch_weak_learner_max_epoch = 40

    if dataset == 'arun_1d':
        n_nodes = [20, 10, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls-1) ]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {
            'type': 'res',
            'res_inter_dim': 1,
            'res_add_linear': False
        }
        weak_classification = False

        lr_boost_adam = 0.3 * 1e-2
        lr_leaf_adam = 0.3 * 1e-1
        lr_decay_step = x_tra.shape[0] * 10
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'mnist':
        n_nodes = [10, 1]
        batch_weak_learner_max_epoch = 24
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.relu for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        weak_learner_params = {
            'type': 'conv',
            'filter_size': [5, 5, 1, 5],
            'stride': [2, 2]
        }
        weak_classification = True

        # mnist lr
        lr_boost_adam = 1e-8
        lr_leaf_adam = 5e-4
        lr_decay_step = x_tra.shape[0] * 100
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'grasp_hog':
        n_nodes = [32, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(logistic_loss_eltws_masked)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err_masked
        weak_learner_params = {'type': 'res', 'res_inter_dim': x_tra.shape[1]}
        weak_classification = True

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'cifar':
        n_nodes = [50, 1]
        n_lvls = len(n_nodes)
        mean_types = [tf.sin for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        #opt_types =  [ tf.train.GradientDescentOptimizer for lvl in range(n_lvls) ]
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        # Use all train and test:
        x_tra = x_all
        y_tra = y_all
        yp_tra = yp_all
        x_val = x_test
        y_val = y_test
        yp_val = yp_test

        train_set = list(range(x_tra.shape[0]))

        weak_learner_params = {'type': 'linear'}
        weak_classification = True

        # cifar lr
        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 0.5
        reg_lambda = 0

    elif dataset == 'a9a':
        n_nodes = [4, 1]
        batch_weak_learner_max_epoch = 5
        max_epoch = 50
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.sigmoid for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = logit_binary_clf_err

        weak_learner_params = {'type': 'res', 'res_inter_dim': 1}
        weak_classification = False

        # mnist lr
        lr_boost_adam = 1e-8
        lr_leaf_adam = 1e-2  #1e-1 for online. 1e-2 for batch
        lr_decay_step = x_tra.shape[0] * 5
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'slice':
        n_nodes = [7, 1]
        batch_weak_learner_max_epoch = 25
        max_epoch = 100
        n_lvls = len(n_nodes)
        mean_types = [
            lambda x: tf.maximum(0.3 * x, x) for lvl in range(n_lvls - 1)
        ]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls-1) ]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {'type': 'res', 'res_inter_dim': 10}
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 4
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    elif dataset == 'year':
        n_nodes = [10, 1]
        batch_weak_learner_max_epoch = 10
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.relu for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {'type': 'res', 'res_inter_dim': 10}
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-3
        lr_decay_step = x_tra.shape[0] * 200
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    elif dataset == 'abalone':
        n_nodes = [3, 1]
        batch_weak_learner_max_epoch = 25
        max_epoch = 100
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.sigmoid for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {
            'type': 'res',
            'res_inter_dim': 1,
            'res_add_linear': False
        }
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 1000
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    else:
        raise Exception('Did not recognize datset: {}'.format(dataset))

    # modify the default tensorflow graph.
    train_set = list(range(x_tra.shape[0]))

    input_dim = len(x_val[0].ravel())
    output_dim = len(y_val[0].ravel())

    dims = [output_dim for _ in xrange(n_lvls + 2)]
    dims[0] = input_dim

    lr_boost = lr_boost_adam
    lr_leaf = lr_leaf_adam
    lr_global_step = 0

    dbg = TFDeepBoostGraph(dims, n_nodes, weak_classification, mean_types,
                           loss_types, opt_types, weak_learner_params,
                           eval_type)

    init = tf.initialize_all_variables()
    #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.20)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    print 'Initializing...'
    sess.run(init)
    print 'Initialization done'

    t = 0
    # As we can waste an epoch with the line search, max_epoch will be incremented when a line search
    # is done. However, to prevent infintie epochs, we set an ultimatum on the number of epochs
    # (max_epoch_ult) that stops this.
    epoch = -1
    max_epoch_ult = max_epoch * 2
    batch_size = 64
    val_interval = batch_size * 10

    # if line search, these will shrink learning rate until result improves.
    do_line_search = False
    min_non_ls_epochs = 4  # min. number of epochs in the beginning where we don't do line search
    # linesearch variables
    worsen_cnt = 0
    best_avg_loss = np.Inf
    restore_threshold = len(train_set) / val_interval

    # Model saving paths.
    model_dir = '../model/'
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    best_model_fname = 'best_model_{}.ckpt'.format(model_name_suffix)
    init_model_fname = 'initial_model_{}.ckpt'.format(model_name_suffix)
    best_model_path = os.path.join(model_dir, best_model_fname)
    init_model_path = os.path.join(model_dir, init_model_fname)
    dbg.saver.save(sess, init_model_path)

    tf.train.SummaryWriter(logdir='../log/', graph=tf.get_default_graph())

    stop_program = False

    # Total number of samples
    global_step = 0
    num_preds = 0
    tra_err = []
    val_err = []

    if online_boost:
        while not stop_program and epoch < max_epoch and epoch < max_epoch_ult:
            epoch += 1
            print("-----Epoch {:d}-----".format(epoch))
            np.random.shuffle(train_set)
            for si in range(0, len(train_set), batch_size):
                # print 'train epoch={}, start={}'.format(epoch, si)
                si_end = min(si + batch_size, len(train_set))
                x = x_tra[train_set[si:si_end]]
                y = y_tra[train_set[si:si_end]]

                if dbg.sigint_capture == True:
                    # don't do any work this iteration, restart all computation with the next
                    break
                n_applies = len(dbg.training_update())
                sess.run(dbg.training(),
                         feed_dict=dbg.fill_feed_dict(x, y, lr_boost, lr_leaf,
                                                      ps_ws_val, reg_lambda))

                # Evaluate
                t += si_end - si
                if si_end - si < batch_size:
                    t = 0
                lr_global_step += si_end - si
                global_step += si_end - si
                num_preds += (si_end - si) * n_nodes[0]
                if lr_global_step > lr_decay_step:
                    lr_global_step -= lr_decay_step
                    lr_boost *= lr_gamma
                    lr_leaf *= lr_gamma
                    print("----------------------")
                    print('Decayed step size: lr_boost={:.3g}, lr_leaf={:.3g}'.
                          format(lr_boost, lr_leaf))
                    print("----------------------")
                if t % val_interval == 0:
                    preds_tra, avg_loss_tra, avg_tgt_loss_tra =\
                        sess.run([dbg.inference(), dbg.evaluation(), dbg.evaluation(loss=True)],
                                 feed_dict=dbg.fill_feed_dict(x_tra[:5000], y_tra[:5000],
                                                              lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    assert (not np.isnan(avg_loss_tra))
                    preds, avg_loss, avg_tgt_loss = sess.run(
                        [
                            dbg.inference(),
                            dbg.evaluation(),
                            dbg.evaluation(loss=True)
                        ],
                        feed_dict=dbg.fill_feed_dict(x_val, y_val, lr_boost,
                                                     lr_leaf, ps_ws_val,
                                                     reg_lambda))
                    assert (not np.isnan(avg_loss))

                    tra_err.append((global_step, avg_loss_tra,
                                    avg_tgt_loss_tra, num_preds))
                    val_err.append(
                        (global_step, avg_loss, avg_tgt_loss, num_preds))

                    # Plotting the fit.
                    #if dataset == 'arun_1d':
                    #    weak_predictions = sess.run(dbg.weak_learner_inference(),
                    #                                feed_dict=dbg.fill_feed_dict(x_val, y_val,
                    #                                                             lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    #    tgts = sess.run(dbg.ll_nodes[-1][0].children_tgts,
                    #                    feed_dict=dbg.fill_feed_dict(x_val, y_val,
                    #                                                 lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    #    plt.figure(1)
                    #    plt.clf()
                    #    plt.plot(x_val, y_val, lw=3, color='green', label='GT')
                    #    for wi, wpreds in enumerate(weak_predictions):
                    #        plt.plot(x_val, -wpreds, label='w' + str(wi))
                    #    # for wi, tgt in enumerate(tgts):
                    #    #  plt.plot(x_val, -tgt, label='t'+str(wi))
                    #    plt.plot(x_val, preds, lw=3, color='blue', label='Yhat')
                    #    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
                    #    plt.title('deepboost')
                    #    plt.tight_layout()
                    #    plt.draw()
                    #    plt.show(block=False)
                    print 'epoch={},t={} \n avg_loss={} avg_tgt_loss={} \n loss_tra={} tgt_loss_tra={}'.format(
                        epoch, t, avg_loss, avg_tgt_loss, avg_loss_tra,
                        avg_tgt_loss_tra)

                    #if epoch < min_non_ls_epochs:
                    #    continue

                    #if do_line_search:
                    #    # restores if is worse than the best multiple times
                    #    if avg_loss > best_avg_loss:
                    #        worsen_cnt += 1
                    #        if worsen_cnt > restore_threshold:
                    #            print 'Restore to previous best loss: {}'.format(best_avg_loss)
                    #            dbg.saver.restore(sess, best_model_path)
                    #            worsen_cnt = 0
                    #            max_epoch += 1
                    #            lr_boost *= gamma_boost
                    #            lr_leaf *= gamma_leaf
                    #    else:
                    #        worsen_cnt = 0
                    #        lr_boost = lr_boost_adam
                    #        lr_leaf = lr_leaf_adam
                    #        dbg.saver.save(sess, best_model_path)
                    #        best_avg_loss = avg_loss
            # endfor
            # end of epoch, so save out the results so far
            np.savez('../log/err_vs_gstep_{:s}_{:d}.npz'.format(
                model_name_suffix, n_nodes[0]),
                     tra_err=np.asarray(tra_err),
                     val_err=np.asarray(val_err))
            if dbg.sigint_capture == True:
                print("----------------------")
                print(
                    "Paused. Set parameters before loading the initial model again..."
                )
                print("----------------------")
                # helper functions
                save_model = lambda fname: dbg.saver.save(sess, fname)
                save_best = partial(save_model, best_model_path)
                save_init = partial(save_model, init_model_path)
                pdb.set_trace()
                dbg.saver.restore(sess, init_model_path)
                epoch = -1
                t = 0
                dbg.sigint_capture = False
        # endwhile
        np.savez('../log/err_vs_gstep_{:s}_{:d}.npz'.format(
            model_name_suffix, n_nodes[0]),
                 tra_err=np.asarray(tra_err),
                 val_err=np.asarray(val_err))

    #### Batch boost####
    else:

        for learneri in range(1, n_nodes[0] + 1):
            max_epoch = batch_weak_learner_max_epoch  #12
            epoch = -1
            t = 0
            print("---------------------")
            print(" Weak learner: {:d}".format(learneri))
            # for a new weak learner, reset the learning rates
            lr_global_step = 0
            lr_boost = lr_boost_adam
            lr_leaf = lr_leaf_adam
            while not stop_program and epoch < max_epoch:
                epoch += 1
                print("-----Epoch {:d}-----".format(epoch))
                np.random.shuffle(train_set)
                for si in range(0, len(train_set), batch_size):
                    # print 'train epoch={}, start={}'.format(epoch, si)
                    si_end = min(si + batch_size, len(train_set))
                    x = x_tra[train_set[si:si_end]]
                    y = y_tra[train_set[si:si_end]]

                    if dbg.sigint_capture == True:
                        # don't do any work this iteration, restart all computation with the next
                        break
                    n_applies = len(dbg.training_update())

                    if learneri == 0:  # bias
                        # ll_train_ops is a list of list of 3-tuples of (grads, apply_ops, child_tgts)
                        # Each element of the 3-tuple is a list.
                        #
                        # Get the last node (boostnode a.k.a. root), and access its first gradient and first
                        # apply ops, which are for the global bias.
                        # NVM
                        # NVM ... when convert_y == weak_classification == False, ps_w and ps_b are not learned so this is
                        # empty.
                        train_op = [
                            dbg.ll_train_ops[-1][0][0][0],
                            dbg.ll_train_ops[-1][0][1][0]
                        ]
                    else:
                        # For each learneri = 1... ,n_nodes[0]+1,
                        # we access the associated leaf node to get its gradients ans apply_ops
                        train_op = dbg.ll_train_ops[0][
                            learneri - 1][0] + dbg.ll_train_ops[0][learneri -
                                                                   1][1]
                    sess.run(train_op,
                             feed_dict=dbg.fill_feed_dict(
                                 x, y, lr_boost, lr_leaf, ps_ws_val,
                                 reg_lambda))

                    # Evaluate
                    t += si_end - si
                    if si_end - si < batch_size:
                        t = 0
                    lr_global_step += si_end - si
                    global_step += si_end - si
                    num_preds += learneri * (si_end - si)
                    if lr_global_step > lr_decay_step:
                        lr_global_step -= lr_decay_step
                        lr_boost *= lr_gamma
                        lr_leaf *= lr_gamma
                        print("----------------------")
                        print(
                            'Decayed step size: lr_boost={:.3g}, lr_leaf={:.3g}'
                            .format(lr_boost, lr_leaf))
                        print("----------------------")
                    if t % val_interval == 0:
                        prediction_tensor = dbg.ll_nodes[-1][0].psums[learneri]
                        tgt_loss_tensor = dbg.ll_nodes[-1][0].losses[learneri]
                        preds_tra, avg_loss_tra, avg_tgt_loss_tra =\
                            sess.run([prediction_tensor, dbg.evaluation(False, prediction_tensor), tgt_loss_tensor],
                                     feed_dict=dbg.fill_feed_dict(x_tra[:5000], y_tra[:5000],
                                                                  lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                        preds, avg_loss, avg_tgt_loss = \
                            sess.run([prediction_tensor, dbg.evaluation(False, prediction_tensor), tgt_loss_tensor],
                                     feed_dict=dbg.fill_feed_dict(x_val, y_val,
                                                                  lr_boost, lr_leaf, ps_ws_val, reg_lambda))

                        tra_err.append((global_step, avg_loss_tra,
                                        avg_tgt_loss_tra, num_preds))
                        val_err.append(
                            (global_step, avg_loss, avg_tgt_loss, num_preds))

                        assert (not np.isnan(avg_loss))
                        # Plotting the fit.
                        if dataset == 'arun_1d':
                            # weak_predictions = sess.run(dbg.weak_learner_inference(),
                            #  feed_dict=dbg.fill_feed_dict(x_val, y_val,
                            #                               lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                            # tgts = sess.run(dbg.ll_nodes[-1][0].children_tgts[2:],
                            #  feed_dict=dbg.fill_feed_dict(x_val, y_val,
                            #                               lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                            plt.figure(1)
                            plt.clf()
                            plt.plot(x_val,
                                     y_val,
                                     lw=3,
                                     color='green',
                                     label='Ground Truth')
                            # for wi, wpreds in enumerate(weak_predictions):
                            #  if wi==0:
                            #    # recall the first one learns y directly.
                            #    plt.plot(x_val, wpreds, label=str(wi))
                            #  else:
                            #    plt.plot(x_val, -wpreds, label=str(wi))
                            # for wi, tgt in enumerate(tgts):
                            #  plt.plot(x_val, tgt, label=str(wi))
                            # plt.legend(loc=4)
                            plt.plot(x_val,
                                     preds,
                                     lw=3,
                                     color='blue',
                                     label='Prediction')
                            plt.draw()
                            plt.show(block=False)
                        print 'learner={},epoch={},t={} \n avg_loss={} avg_tgt_loss={} \n loss_tra={} tgt_loss_tra={}'.format(
                            learneri, epoch, t, avg_loss, avg_tgt_loss,
                            avg_loss_tra, avg_tgt_loss_tra)

                # endfor
                save_fname = '../log/batch_err_vs_gstep_{:s}.npz'.format(
                    model_name_suffix)
                np.savez(save_fname,
                         tra_err=np.asarray(tra_err),
                         val_err=np.asarray(val_err),
                         learners=learneri)
                print('Saved error rates to {}'.format(save_fname))
                if dbg.sigint_capture == True:
                    print("----------------------")
                    print(
                        "Paused. Set parameters before loading the initial model again..."
                    )
                    print("----------------------")
                    # helper functions
                    save_model = lambda fname: dbg.saver.save(sess, fname)
                    save_best = partial(save_model, best_model_path)
                    save_init = partial(save_model, init_model_path)
                    pdb.set_trace()
                    dbg.saver.restore(sess, init_model_path)
                    epoch = -1
                    t = 0
                    dbg.sigint_capture = False
            # endfor
        # endfor
        np.savez(
            '../log/batch_err_vs_gstep_{:s}.npz'.format(model_name_suffix),
            tra_err=np.asarray(tra_err),
            val_err=np.asarray(val_err))
    # endif

    print("Program Finished")

    if online_boost:
        save_fname = '../log/err_vs_gstep_{:s}.npz'.format(model_name_suffix)
    else:
        save_fname = '../log/batch_err_vs_gstep_{:s}.npz'.format(
            model_name_suffix)
    np.savez(save_fname,
             tra_err=np.asarray(tra_err),
             val_err=np.asarray(val_err))
    print('Saved results to: {}'.format(save_fname))
    pdb.set_trace()
Beispiel #15
0
]

#define no of samples (to select from pool or seed set) per round
samples = 2000
trainset_size = 50000

#store the accuray and losses in a dataframe
recordmodel1 = pd.DataFrame(columns=('TrainDS', 'Seedset', 'Train_Accuracy',
                                     'Train_Loss', 'Val_Accuracy', 'Val_Loss',
                                     'Test_Accuracy', 'Test_Loss'))
recordmodel2 = pd.DataFrame(columns=('TrainDS', 'Seedset', 'Train_Accuracy',
                                     'Train_Loss', 'Val_Accuracy', 'Val_Loss',
                                     'Test_Accuracy', 'Test_Loss'))

#get cifar dataset
X_train_full, y_train_full, X_test, y_test = get_dataset()

#get initial 2000 samples
permutation, X_train, y_train = get_k_random_samples(X_train_full.shape[0],
                                                     samples, X_train_full,
                                                     y_train_full)

print("Train set size X :", X_train.shape)
print(y_train.shape)

#deine the seedset or pool
X_seedset = np.array([])
y_seedset = np.array([])
X_seedset = np.copy(X_train_full)
X_seedset = np.delete(X_seedset, permutation, axis=0)
y_seedset = np.copy(y_train_full)
Beispiel #16
0
    d = np.load(fname)
    d_n_preds = deepboost_n_samples_to_n_preds(N, d[traval][:, 0])
    Kd = d[traval].shape[0] // Kdb
    d_select_indices = np.arange(0, d_n_preds.shape[0], Kd) + Kd - 1
    if d_select_indices[-1] > d_n_preds.shape[0] - 1:
        d_select_indices[-1] = d_n_preds.shape[0] - 1
    return d_n_preds[d_select_indices], average_end_at(d[traval][:, col],
                                                       d_select_indices, 5)


datasets = get_dataset.all_names()
indx = textmenu(datasets)
if indx is None:
    exit(0)
dataset = datasets[indx]
x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
model_name_suffix = dataset

Kdb = None
if dataset == 'a9a':
    n_nodes = 8
    col = 1
elif dataset == 'mnist':
    n_nodes = 5
    cost_multiplier = 1
    col = 1
elif dataset == 'slice':
    n_nodes = 7
    col = 2
    Kdb = 25
elif dataset == 'year':
Beispiel #17
0
def main():
    np.random.seed(1)
    random.seed(1)
    data_names = ["grid_stability_c", "skin", "HTRU2"]
    corr_list = np.zeros((len(data_names)))

    C_list = [100, 0.001, 1]
    fontsize = 24
    fontsize_corr = 40
    batch_size = 1

    for i, data_name in enumerate(data_names):
        save_dir = "result/BLR/"
        os.makedirs(save_dir, exist_ok=True)
        [X, y] = get_dataset.get_dataset(data_name)
        [whole_size, dim] = X.shape
        y = y[:, 0].astype(int)
        test_size = 5000
        train_size = whole_size - test_size
        init_sample_size = 100
        sample_size = min([3000, train_size - init_sample_size])
        X_test = X[:test_size]
        y_test = y[:test_size]
        X_train = X[test_size:test_size + train_size]
        y_train = y[test_size:test_size + train_size]

        pool_indecies = set(range(train_size))
        sampled_indecies = set(random.sample(pool_indecies, init_sample_size))
        pool_indecies = list(pool_indecies - sampled_indecies)
        sampled_indecies = list(sampled_indecies)

        X_sampled = X_train[sampled_indecies]
        y_sampled = y_train[sampled_indecies]

        basis_size = 5
        x_range = [X_train.min(), X_train.max()]
        blr = active_BLR(basis_size=basis_size,
                         x_range=x_range,
                         C=C_list[i],
                         solver="newton-cg")

        validate_size = 10
        threshold = 0.2
        error_stability1 = error_stability_criterion(threshold, validate_size)
        threshold = 0.15
        error_stability2 = error_stability_criterion(threshold, validate_size)
        threshold = 0.1
        error_stability3 = error_stability_criterion(threshold, validate_size)
        criteria = [error_stability1, error_stability2, error_stability3]

        test_error = np.empty(0, float)
        blr.fit(X_sampled, y_sampled)
        color = {
            error_stability1.criterion_name: "r",
            error_stability2.criterion_name: "g",
            error_stability3.criterion_name: "b"
        }
        for e in tqdm(range(sample_size)):
            new_data_index = blr.data_acquire(X_train, pool_indecies)
            sampled_indecies.append(new_data_index)
            pool_indecies.remove(new_data_index)

            X_sampled = X_train[sampled_indecies]
            y_sampled = y_train[sampled_indecies]

            pos_old = blr.get_pos()
            blr.fit(X_sampled, y_sampled, blr.coef_[0])
            pos_new = blr.get_pos()
            KL_pq = utils.calcKL_gauss(pos_old, pos_new)
            KL_qp = utils.calcKL_gauss(pos_new, pos_old)
            error = utils.calc_cross_entropy(y_test,
                                             blr.predict_proba(X_test)[:, 1])
            test_error = np.append(test_error, error)

            error_stability1.check_threshold(KL_pq, KL_qp, e)
            error_stability2.check_threshold(KL_pq, KL_qp, e)
            error_stability3.check_threshold(KL_pq, KL_qp, e)

        draw_result.draw_gene_error(test_error, criteria, init_sample_size,
                                    batch_size, color, fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_gene_error_" + data_name + ".pdf")
        draw_result.draw_correlation(
            test_error[validate_size:],
            error_stability1.error_ratio[validate_size:], "BLR", "b",
            fontsize_corr)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_correlation_" + data_name + ".pdf")
        draw_result.draw_epsilon(criteria, init_sample_size, batch_size, color,
                                 fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_criterion_" + data_name + ".pdf")

        indecies = utils.calc_min_list(
            error_stability1.error_ratio[validate_size:])
        corr_list[i] = np.corrcoef(
            test_error[validate_size:][indecies],
            error_stability1.error_ratio[validate_size:][indecies])[1, 0]

    np.savetxt(save_dir + "corr_list.txt", corr_list)
    np.savetxt(save_dir + "loss.txt", np.array(test_error))
    np.savetxt(save_dir + "lambda.txt", error_stability1.error_ratio)
from data_visualization.view_plays_freq import view_plays_freq 
from data_visualization.view_metrics import view_metrics


fakeDataset = True
datasetPath = './fake_dataset/' if fakeDataset else './dataset/'
resultsPath = './fake_results/' if fakeDataset else './results/'

kk = [5 ,10 ,100,200,500]

metrics = {'map': True, 'diversity': True, 'ndcg': True,'mrr': True,'rnd':  True,'ub': True}
methods = {'cf': True, 'cb': True, 'hb': True}



if not fakeDataset: get_dataset()


split_data(datasetPath)

if args.visualize:
    view_plays_freq(datasetPath,True)
    view_plays_freq(datasetPath)

scale_data(datasetPath)
read_bios(datasetPath)
generate_model(datasetPath)
generate_tfIdfRecommender(datasetPath)

print('evaluate!')
evaluate(datasetPath,resultsPath,kk=kk, metrics=metrics, methods=methods)
Beispiel #19
0
def train_mnist_mlp_with_test(init_lr=0.1,
                              momentum=0.9,
                              num_steps=50000,
                              middle_decay=False,
                              inverse_decay=False,
                              decay_const=0.0,
                              time_const=5000.0,
                              steps_per_eval=100,
                              batch_size=100,
                              pretrain_ckpt=None,
                              save_ckpt=None,
                              print_step=False,
                              data_list=None,
                              data_list_eval=None,
                              data_list_test=None):
    """Train an MLP for MNIST.

    Args:
        init_lr:
        momentum:
        num_steps:
        middle_decay:
        pretrain_ckpt:

    Returns:
        results: Results tuple object.
    """
    if data_list is None:
        dataset = get_dataset('mnist')
    if data_list_eval is None:
        dataset_train = get_dataset('mnist')
    if data_list_test is None:
        dataset_test = get_dataset('mnist', test=True)
    x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x")
    y = tf.placeholder(tf.int64, [None], name="y")
    config = get_mnist_mlp_config(init_lr, momentum)
    with tf.name_scope('Train'):
        with tf.variable_scope('Model'):
            m = get_mnist_mlp_model(config, x, y, training=True)
    with tf.name_scope('Test'):
        with tf.variable_scope('Model', reuse=True):
            mtest = get_mnist_mlp_model(config, x, y, training=False)

    final_lr = 1e-4
    midpoint = num_steps // 2

    if True:
        num_train = 60000
        num_test = 10000
    lr_ = init_lr
    bsize = batch_size
    steps_per_epoch = num_train // bsize
    steps_test_per_epoch = num_test // bsize
    tau = (num_steps - midpoint) / np.log(init_lr / final_lr)

    train_xent_list = []
    train_cost_list = []
    train_acc_list = []
    test_xent_list = []
    test_cost_list = []
    test_acc_list = []
    lr_list = []
    step_list = []
    var_to_restore = list(
        filter(lambda x: 'momentum' not in x.name.lower(),
               tf.global_variables()))
    var_to_restore = list(
        filter(lambda x: 'global_step' not in x.name.lower(), var_to_restore))
    var_to_restore = list(
        filter(lambda x: 'lr' not in x.name.lower(), var_to_restore))
    var_to_restore = list(
        filter(lambda x: 'mom' not in x.name.lower(), var_to_restore))
    var_to_restore = list(
        filter(lambda x: 'decay' not in x.name.lower(), var_to_restore))
    var_to_init = list(
        filter(lambda x: x not in var_to_restore, tf.global_variables()))
    restorer = tf.train.Saver(var_to_restore)
    if inverse_decay:
        log.info(
            'Applying inverse decay with time constant = {:.3e} and decay constant = {:.3e}'.
            format(time_const, decay_const))
    if middle_decay:
        log.info(
            'Applying decay at midpoint with final learning rate = {:.3e}'.
            format(final_lr))
    assert not (
        inverse_decay and middle_decay
    ), 'Inverse decay and middle decay cannot be applied at the same time.'

    with tf.Session() as sess:
        if pretrain_ckpt is None:
            sess.run(tf.global_variables_initializer())
        else:
            sess.run(tf.variables_initializer(var_to_init))
            restorer.restore(sess, pretrain_ckpt)
        # Assign initial learning rate.
        m.optimizer.assign_hyperparam(sess, 'lr', lr_)
        train_iter = six.moves.xrange(num_steps)
        if not print_step:
            train_iter = tqdm(train_iter, ncols=0)

        for ii in train_iter:
            if data_list is None:
                xd, yd = dataset.next_batch(bsize)
            else:
                xd, yd = data_list[ii]
            if lr_ > 1e-6:
                cost_, _ = sess.run(
                    [m.cost, m.train_op], feed_dict={
                        x: xd,
                        y: yd
                    })
            test_acc = 0.0
            test_xent = 0.0
            train_acc = 0.0
            train_xent = 0.0
            epoch = ii // steps_per_epoch

            if inverse_decay:
                lr_ = init_lr / ((1.0 + ii / time_const)**decay_const)

            if middle_decay and ii > midpoint:
                lr_ = np.exp(-(ii - midpoint) / tau) * init_lr

            m.optimizer.assign_hyperparam(sess, 'lr', lr_)

            # Evaluate every certain number of steps.
            if ii == 0 or (ii + 1) % steps_per_eval == 0:
                for jj in six.moves.xrange(steps_per_epoch):
                    if data_list_eval is None:
                        xd, yd = dataset_train.next_batch(bsize)
                    else:
                        xd, yd = data_list_eval[jj]
                    xent_, acc_ = sess.run(
                        [m.cost, m.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    train_xent += xent_ / float(steps_per_epoch)
                    train_acc += acc_ / float(steps_per_epoch)
                step_list.append(ii + 1)
                train_xent_list.append(train_xent)
                train_acc_list.append(train_acc)

                if data_list_eval is None:
                    dataset_train.reset()

                for jj in six.moves.xrange(steps_test_per_epoch):
                    if data_list_test is None:
                        xd, yd = dataset_test.next_batch(bsize)
                    else:
                        xd, yd = data_list_test[jj]
                    xent_, acc_ = sess.run(
                        [mtest.cost, mtest.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    test_xent += xent_ / float(steps_test_per_epoch)
                    test_acc += acc_ / float(steps_test_per_epoch)
                test_xent_list.append(test_xent)
                test_acc_list.append(test_acc)

                if data_list_test is None:
                    dataset_test.reset()

                lr_list.append(lr_)
                if print_step:
                    log.info((
                        'Steps {:d} T Xent {:.3e} T Acc {:.3f} V Xent {:.3e} V Acc {:.3f} '
                        'LR {:.3e}').format(ii + 1, train_xent,
                                            train_acc * 100.0, test_xent,
                                            test_acc * 100.0, lr_))
        if save_ckpt is not None:
            saver = tf.train.Saver()
            saver.save(sess, save_ckpt)

    return Results(
        step=np.array(step_list),
        train_xent=np.array(train_xent_list),
        train_acc=np.array(train_acc_list),
        test_xent=np.array(test_xent_list),
        test_acc=np.array(test_acc_list),
        lr=np.array(lr_list),
        decay=decay_const)
def run_random_search(num_steps,
                      lr_limit,
                      decay_limit,
                      num_samples,
                      ckpt,
                      output,
                      seed=0):
    """Random search hyperparameters to plot the surface.

    Args:
        num_steps: Int. Number of look ahead steps.
        lr_limit: Tuple. Two float denoting the lower and upper search bound.
        decay_limit: Tuple. Two float denoting the lower and upper search bound.
        num_samples: Int. Number of samples to try.
        ckpt: String. Pretrain checkpoint name.
        output: String. Output CSV results file name.

    Returns:
    """
    bsize = BATCH_SIZE
    log.info('Writing output to {}'.format(output))
    log_folder = os.path.dirname(output)
    if not os.path.exists(log_folder):
        os.makedirs(log_folder)

    with tf.Graph().as_default(), tf.Session() as sess:
        dataset = get_dataset('mnist')
        config = get_mnist_mlp_config(0.0, MOMENTUM)
        x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x")
        y = tf.placeholder(tf.int64, [None], name="y")
        with tf.name_scope('Train'):
            with tf.variable_scope('Model'):
                m = get_mnist_mlp_model(config, x, y, training=True)
        var_to_restore = list(
            filter(lambda x: 'Momentum' not in x.name, tf.global_variables()))
        saver = tf.train.Saver(var_to_restore)
        # 200 points in the learning rate list, and 100 points in the decay list.
        # random sample 1000.
        rnd = np.random.RandomState(seed)
        # Get a list of stochastic batches first.
        data_list = [
            dataset.next_batch(bsize) for step in six.moves.xrange(num_steps)
        ]
        settings = []
        for run in tqdm(
                six.moves.xrange(num_samples),
                ncols=0,
                desc='{} steps'.format(num_steps)):
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, ckpt)
            lr = np.random.rand() * (lr_limit[1] - lr_limit[0]) + lr_limit[0]
            lr = np.exp(lr * np.log(10))
            decay = rnd.uniform(0, 1) * (
                decay_limit[1] - decay_limit[0]) + decay_limit[0]
            decay = np.exp(decay * np.log(10))
            m.optimizer.assign_hyperparam(sess, 'lr', lr)
            loss, final_loss = train_steps(
                sess, m, data_list, init_lr=lr, decay_const=decay)
            settings.append([lr, decay, final_loss])
        settings = np.array(settings)
        np.savetxt(output, settings, delimiter=',', header='lr,decay,loss')
        loss = settings[:, 2]
        sort_idx = np.argsort(loss)
        sorted_settings = settings[sort_idx]
        print('======')
        print('Best 10 settings')
        for ii in six.moves.xrange(10):
            aa = sorted_settings[ii, 0]
            decay = sorted_settings[ii, 1]
            loss = sorted_settings[ii, 2]
            print('Alpha', aa, 'Decay', decay, 'Loss', loss)
    return sorted_settings[0, 0], sorted_settings[0, 1], sorted_settings[0, 2]
Beispiel #21
0
def main(_):

    # get experiment folder and create dir for plots
    exp_folder = os.path.join(FLAGS.exp_root, FLAGS.exp_name,
                              'exp{}'.format(FLAGS.exp_nr))
    test_folder = os.path.join(exp_folder, 'test')
    tf.io.gfile.mkdir(test_folder)

    # get experiment FLAGS
    TRAINING_FLAGS = yaml.safe_load(
        tf.io.gfile.GFile(os.path.join(exp_folder, 'FLAGS.yml'), 'r'))

    # get dataset
    test_set, test_labels = get_dataset(
        '.',
        TRAINING_FLAGS['num_feat'],
        TRAINING_FLAGS['slice_length'],
        type='test',
        return_sequences=TRAINING_FLAGS['return_sequences'])

    sequence_length = test_set.shape[1]
    feature_dim = test_set.shape[2]
    if TRAINING_FLAGS['model'] == 'tcn':

        model = get_tcn(
            sequence_length,
            feature_dim,
            nb_filters=TRAINING_FLAGS['num_filters'],
            nb_stacks=TRAINING_FLAGS['num_stacks'],
            use_skip_connections=TRAINING_FLAGS['use_skip_connections'],
            use_batch_norm=TRAINING_FLAGS['bn'],
            return_sequences=TRAINING_FLAGS['return_sequences'],
            dilation_stages=TRAINING_FLAGS['dilation_stages'])
    elif TRAINING_FLAGS['model'] == 'cnn':
        model = get_cnn((sequence_length, feature_dim))

    else:
        assert False, 'Unknown model!'

    model(tf.zeros((1, sequence_length, feature_dim)))
    model.load_weights(os.path.join(exp_folder, 'model.h5'))
    model.compile()
    model.summary()
    # print(model.count_params())
    # weights = model.get_layer('dense').get_weights()
    # kernel = weights[0]
    # bias = weights[1]
    # scale = 128 / max(kernel.min(), kernel.max(), bias.min(), bias.max())
    # kernel_scaled = (kernel * scale).astype('int8')
    # bias_scaled = (bias * scale).astype('int8')

    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    # Convert the model to the TensorFlow Lite format with quantization
    tflite_model_name = 'quant_model'
    quantize = True
    if (quantize):

        def representative_dataset():
            for i in range(100):
                yield [test_set[i].reshape(1, sequence_length, feature_dim)]

        # Set the optimization flag.
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        # Enforce full-int8 quantization
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.uint8  # or tf.uint8
        converter.inference_output_type = tf.uint8  # or tf.uint8
        # Provide a representative dataset to ensure we quantize correctly.
        converter.representative_dataset = representative_dataset
    tflite_model = converter.convert()
    model_path = os.path.join('/tmp', tflite_model_name + '.tflite')
    open(model_path, 'wb').write(tflite_model)

    tflite_interpreter = tf.lite.Interpreter(model_path=model_path)
    tflite_interpreter.allocate_tensors()
    input_details = tflite_interpreter.get_input_details()
    output_details = tflite_interpreter.get_output_details()

    predictions = []
    for i in range(len(test_set)):
        val_batch = test_set[i]
        val_batch = np.expand_dims(val_batch,
                                   axis=0).astype(input_details[0]["dtype"])
        tflite_interpreter.set_tensor(input_details[0]['index'], val_batch)
        tflite_interpreter.allocate_tensors()
        tflite_interpreter.invoke()
        output = tflite_interpreter.get_tensor(output_details[0]['index'])
        predictions += [output]

    METRICS = [
        tf.keras.metrics.TruePositives(name='tp'),
        tf.keras.metrics.FalsePositives(name='fp'),
        tf.keras.metrics.TrueNegatives(name='tn'),
        tf.keras.metrics.FalseNegatives(name='fn'),
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.AUC(name='auc'),
    ]
    wrapped_metrics = list(map(lambda m: MetricWrapper(m, dims=2), METRICS))
    predictions = np.stack(predictions).squeeze()
    res = {}
    for m in wrapped_metrics:
        m.update_state(y_true=test_labels, y_pred=predictions)
        res[m.name] = m.result().numpy()

    with open(os.path.join(exp_folder, 'test/quant_metrics.p'),
              'wb') as handle:
        pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)
    pprint.pprint(res)
Beispiel #22
0
def online_smd(dataset_name='mnist',
               init_lr=1e-1,
               momentum=0.001,
               num_steps=20000,
               middle_decay=False,
               steps_per_update=10,
               smd=True,
               steps_look_ahead=5,
               num_meta_steps=10,
               steps_per_eval=100,
               batch_size=100,
               meta_lr=1e-2,
               print_step=False,
               effective_lr=True,
               negative_momentum=True,
               optimizer='momentum',
               stochastic=True,
               exp_folder='.'):
    """Train an MLP for MNIST.

    Args:
        dataset_name: String. Name of the dataset.
        init_lr: Float. Initial learning rate, default 0.1.
        momentum: Float. Initial momentum, default 0.9.
        num_steps: Int. Total number of steps, default 20000.
        middle_decay: Whether applying manual learning rate decay to 1e-4 from the middle, default False.
        steps_per_update: Int. Number of steps per update, default 10.
        smd: Bool. Whether run SMD.
        steps_look_ahead: Int. Number of steps to look ahead, default 5.
        num_meta_steps: Int. Number of meta steps, default 10.
        steps_per_eval: Int. Number of training steps per evaluation, default 100.
        batch_size: Int. Mini-batch size, default 100.
        meta_lr: Float. Meta learning rate, default 1e-2.
        print_step: Bool. Whether to print loss during training, default True.
        effective_lr: Bool. Whether to re-parameterize learning rate as lr / (1 - momentum), default True.
        negative_momentum: Bool. Whether to re-parameterize momentum as (1 - momentum), default True.
        optimizer: String. Name of the optimizer. Options: `momentum`, `adam, default `momentum`.
        stochastic: Bool. Whether to do stochastic or deterministic look ahead, default True.

    Returns:
        results: Results tuple object.
    """
    dataset = get_dataset(dataset_name)
    dataset_train = get_dataset(
        dataset_name)  # For evaluate training progress (full epoch).
    dataset_test = get_dataset(
        dataset_name, test=True)  # For evaluate test progress (full epoch).

    if dataset_name == 'mnist':
        input_shape = [None, 28, 28, 1]
    elif dataset_name.startswith('cifar'):
        input_shape = [None, 32, 32, 3]

    x = tf.placeholder(tf.float32, input_shape, name="x")
    y = tf.placeholder(tf.int64, [None], name="y")

    if effective_lr:
        init_lr_ = init_lr / (1.0 - momentum)
    else:
        init_lr_ = init_lr

    if negative_momentum:
        init_mom_ = 1.0 - momentum
    else:
        init_mom_ = momentum
    if dataset_name == 'mnist':
        config = get_mnist_mlp_config(
            init_lr_,
            init_mom_,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
    elif dataset_name == 'cifar-10':
        config = get_cifar_cnn_config(
            init_lr_,
            init_mom_,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
    else:
        raise NotImplemented
    with tf.name_scope('Train'):
        with tf.variable_scope('Model'):
            if dataset_name == 'mnist':
                m = get_mnist_mlp_model(
                    config, x, y, optimizer=optimizer, training=True)
                model = m
            elif dataset_name == 'cifar-10':
                m = get_cifar_cnn_model(
                    config, x, y, optimizer=optimizer, training=True)
                model = m
    with tf.name_scope('Test'):
        with tf.variable_scope('Model', reuse=True):
            if dataset_name == 'mnist':
                mtest = get_mnist_mlp_model(config, x, y, training=False)
            elif dataset_name == 'cifar-10':
                mtest = get_cifar_cnn_model(config, x, y, training=False)

    final_lr = 1e-4
    midpoint = num_steps // 2

    if dataset_name == 'mnist':
        num_train = 60000
        num_test = 10000
    elif dataset_name.startswith('cifar'):
        num_train = 50000
        num_test = 10000

    lr_ = init_lr_
    mom_ = init_mom_
    bsize = batch_size
    steps_per_epoch = num_train // bsize
    steps_test_per_epoch = num_test // bsize

    train_xent_list = []
    train_acc_list = []
    test_xent_list = []
    test_acc_list = []
    lr_list = []
    mom_list = []
    step_list = []
    log.info(
        'Applying decay at midpoint with final learning rate = {:.3e}'.format(
            final_lr))

    if 'momentum' in optimizer:
        mom_name = 'mom'
    elif 'adam' in optimizer:
        mom_name = 'beta1'
    else:
        raise ValueError('Unknown optimizer')
    hp_dict = {'lr': init_lr} #, mom_name: momentum}
    hp_names = hp_dict.keys()
    hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name])
                        for hp_name in hp_names])
    grads = model.optimizer.grads
    accumulators = model.optimizer.accumulators
    new_accumulators = model.optimizer.new_accumulators
    loss = model.cost

    # Build look ahead graph.
    look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads(
        hyperparams, grads, accumulators, new_accumulators, loss)

    # Meta optimizer, use Adam on the log space.
    meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr))
    hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names]
    hp_grads_dict = {
        'lr': tf.placeholder(tf.float32, [], name='lr_grad'),
        # mom_name: tf.placeholder(
        #     tf.float32, [], name='{}_grad'.format(mom_name))
    }
    hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names]
    hp_grads_and_vars = list(zip(hp_grads_plh, hp))
    cgrad = {'lr': (-1e1, 1e1)} #, mom_name: (-1e1, 1e1)}
    cval = {'lr': (1e-4, 1e1)} #, mom_name: (1e-4, 1e0)}
    cgrad_ = [cgrad[hp_name] for hp_name in hp_names]
    cval_ = [cval[hp_name] for hp_name in hp_names]
    meta_train_op = meta_opt.apply_gradients(
        hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_)

    var_list = tf.global_variables()
    ckpt = build_checkpoint(tf.global_variables())
    write_op = write_checkpoint(ckpt, var_list)
    read_op = read_checkpoint(ckpt, var_list)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        exp_logger = _get_exp_logger(sess, exp_folder)

        def log_hp(hp_dict):
            lr_ = hp_dict['lr']
            mom_ = hp_dict['mom']
            # Log current learning rate and momentum.
            if negative_momentum:
                exp_logger.log(ii, 'mom', 1.0 - mom_)
                exp_logger.log(ii, 'log neg mom', np.log10(mom_))
                mom__ = 1.0 - mom_
            else:
                exp_logger.log(ii, 'mom', mom_)
                exp_logger.log(ii, 'log neg mom', np.log10(1.0 - mom_))
                mom__ = mom_

            if effective_lr:
                lr__ = lr_ * (1.0 - mom__)
                eflr_ = lr_
            else:
                lr__ = lr_
                eflr_ = lr_ / (1.0 - mom__)
            exp_logger.log(ii, 'eff lr', eflr_)
            exp_logger.log(ii, 'log eff lr', np.log10(eflr_))
            exp_logger.log(ii, 'lr', lr__)
            exp_logger.log(ii, 'log lr', np.log10(lr__))
            exp_logger.flush()
            return lr__, mom__

        # Assign initial learning rate and momentum.
        m.optimizer.assign_hyperparam(sess, 'lr', lr_)
        m.optimizer.assign_hyperparam(sess, 'mom', mom_)
        train_iter = six.moves.xrange(num_steps)
        if not print_step:
            train_iter = tqdm(train_iter, ncols=0)
        for ii in train_iter:
            # Meta-optimization loop.
            if ii == 0 or ii % steps_per_update == 0:
                if ii < midpoint and smd:
                    if stochastic:
                        data_list = [
                            dataset.next_batch(bsize)
                            for step in six.moves.xrange(steps_look_ahead)
                        ]
                        # Take next few batches for last step evaluation.
                        eval_data_list = [
                            dataset.next_batch(bsize)
                            for step in six.moves.xrange(steps_look_ahead)
                        ]
                    else:
                        data_entry = dataset.next_batch(bsize)
                        data_list = [data_entry] * steps_look_ahead
                        # Use the deterministic batch for last step evaluation.
                        eval_data_list = [data_list[0]]
                    sess.run(write_op)
                    for ms in six.moves.xrange(num_meta_steps):
                        cost, hp_dict = meta_step(sess, model, data_list,
                                                  look_ahead_ops, hp_grad_ops,
                                                  hp_grads_plh, meta_train_op,
                                                  eval_data_list)
                        sess.run(read_op)
                        for hpname, hpval in hp_dict.items():
                            model.optimizer.assign_hyperparam(
                                sess, hpname, hpval)
                    lr_ = hp_dict['lr']
                    # mom_ = hp_dict['mom']
                else:
                    hp_dict = sess.run(model.optimizer.hyperparams)
                lr_log, mom_log = log_hp(hp_dict)
                lr_list.append(lr_log)
                mom_list.append(mom_log)

            if ii == midpoint // 2:
                m.optimizer.assign_hyperparam(sess, 'mom', 1 - 0.9)

            if ii == midpoint:
                lr_before_mid = hp_dict['lr']
                tau = (num_steps - midpoint) / np.log(lr_before_mid / final_lr)

            if ii > midpoint:
                lr_ = np.exp(-(ii - midpoint) / tau) * lr_before_mid
                m.optimizer.assign_hyperparam(sess, 'lr', lr_)

            # Run regular training.
            if lr_ > 1e-6:
                # Use CBL for first half of training
                xd, yd = data_entry if (smd and not stochastic and ii < midpoint) else dataset.next_batch(bsize)
                cost_, _ = sess.run(
                    [m.cost, m.train_op], feed_dict={
                        m.x: xd,
                        m.y: yd
                    })
                if ii < midpoint:
                    sess.run(m._retrieve_ema_op)

            # Evaluate every certain number of steps.
            if ii == 0 or (ii + 1) % steps_per_eval == 0:
                test_acc = 0.0
                test_xent = 0.0
                train_acc = 0.0
                train_xent = 0.0

                # Report full epoch training loss.
                for jj in six.moves.xrange(steps_per_epoch):
                    xd, yd = dataset_train.next_batch(bsize)
                    xent_, acc_ = sess.run(
                        [m.cost, m.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    train_xent += xent_ / float(steps_per_epoch)
                    train_acc += acc_ / float(steps_per_epoch)
                step_list.append(ii + 1)
                train_xent_list.append(train_xent)
                train_acc_list.append(train_acc)
                dataset_train.reset()

                # Report full epoch validation loss.
                for jj in six.moves.xrange(steps_test_per_epoch):
                    xd, yd = dataset_test.next_batch(bsize)
                    xent_, acc_ = sess.run(
                        [mtest.cost, mtest.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    test_xent += xent_ / float(steps_test_per_epoch)
                    test_acc += acc_ / float(steps_test_per_epoch)
                test_xent_list.append(test_xent)
                test_acc_list.append(test_acc)
                dataset_test.reset()

                # Log training progress.
                exp_logger.log(ii, 'train loss', train_xent)
                exp_logger.log(ii, 'log train loss', np.log10(train_xent))
                exp_logger.log(ii, 'test loss', test_xent)
                exp_logger.log(ii, 'log test loss', np.log10(test_xent))
                exp_logger.log(ii, 'train acc', train_acc)
                exp_logger.log(ii, 'test acc', test_acc)
                exp_logger.flush()

                if print_step:
                    log.info((
                        'Steps {:d} T Xent {:.3e} T Acc {:.3f} V Xent {:.3e} V Acc {:.3f} '
                        'LR {:.3e}').format(ii + 1, train_xent,
                                            train_acc * 100.0, test_xent,
                                            test_acc * 100.0, lr_))

    return Results(
        step=np.array(step_list),
        train_xent=np.array(train_xent_list),
        train_acc=np.array(train_acc_list),
        test_xent=np.array(test_xent_list),
        test_acc=np.array(test_acc_list),
        lr=np.array(lr_list),
        momentum=np.array(mom_list))
def test_get_dataset(capsys):
    get_dataset.get_dataset(PROJECT_ID, DATASET_ID)
    out, _ = capsys.readouterr()
    assert "Dataset name: " in out
Beispiel #24
0
    if querys_id_irrelevant.get(query_id, []) == []:
        querys_id_irrelevant[query_id] = []

    querys_id_irrelevant[query_id].append(file_id)


if __name__ == "__main__":
    n = 10

    dataset_name = ''
    while dataset_name not in ['CISI', 'CRAN']:
        dataset_name = input(
            'Escriba el nombre del dataset a utilizar (CISI o CRAN): ')

    files, querys, rel = get_dataset(dataset_name)

    proc_type = -1
    while proc_type not in [0, 1]:
        proc_type = int(
            input(
                'Si desea solo tener en cuenta sustantivos y verbos en el procesamiento de la informacion escriba 1, en otro caso escriba 0: '
            ))

    write_query = -1
    while write_query not in [0, 1]:
        write_query = int(
            input(
                'Si desea realizar una query escriba 1 y si desea realizar una evaluacion del sistema escriba 0: '
            ))
def main():
    X, X_test, Y, Y_test = get_dataset()
    model = get_model()
    model = train_model(model, X, X_test, Y, Y_test)
    save_model(model)
    return model
def load_images(args):
    data_args = args[DATASET]['data_args']
    X_tr, Y_tr, _, _, _, _ = get_dataset(DATASET, data_args)
    return X_tr[:10], Y_tr[:10]
    epochs = range(1, C.EPOCHS + 1)

    plt.plot(epochs, training_loss, label='Training loss')
    plt.plot(validation_loss, label='Validation loss')
    plt.title('Training and validation loss for ' + str(C.EPOCHS) + ' epochs')
    plt.gca().set_xlabel('Epochs')
    plt.gca().set_ylabel('Loss')
    plt.legend()
    plt.tight_layout()

    plt.gcf().savefig('loss.png')


if __name__ == '__main__':
    # Load the dataset and split them into training and test sets
    X_train, X_test, Y_train, Y_test = get_dataset()

    # Create the model and compile it
    model = create_model()
    compile_model(model)

    print(model.summary())
    print()

    print('Training model...')
    training_history = fit_model(model, X_train, Y_train)
    print()

    print('Evaluating model...')
    metrics = evaluate_model(model, X_test, Y_test)
    print()
def main():
    current_datetime = '{}'.format(datetime.datetime.today())

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='mnist')
    parser.add_argument('--dataset_path', type=str)
    parser.add_argument('--model', default='cnn')
    parser.add_argument('--merge_label', action='store_true')
    parser.add_argument('--train_size', type=float, default=1)
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--batchsize', '-b', type=int, default=128)
    parser.add_argument('--epoch', '-e', type=int, default=100)
    parser.add_argument('--pretrained_model', default=None)
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--out', '-o', default='result')
    # for cnn
    parser.add_argument('--out_dims', type=str, default='16 16 16')
    parser.add_argument('--filter_sizes', type=str, default='3 3 3')
    parser.add_argument('--pool_sizes', type=str, default='2 2 2')
    parser.add_argument('--dropout', type=float, default=0.1)
    # for lstm
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--n_emb', type=int, default=8)
    parser.add_argument('--n_dim', type=int, default=16)
    # for nlp
    parser.add_argument('--min_vocab', type=int, default=5)

    args = parser.parse_args()

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()

    random.seed(args.seed)
    np.random.seed(args.seed)
    chainer.cuda.cupy.random.seed(args.seed)

    train, dev, test, label_dic, vocab = get_dataset.get_dataset(
        args.dataset,
        args.train_size,
        merge=args.merge_label,
        model=args.model,
        path=args.dataset_path,
        threshold=args.min_vocab)
    convert = get_convertor.get_convertor(args.model, args.dataset)

    if not os.path.isdir(args.out):
        os.makedirs(args.out)
    current = os.path.dirname(os.path.abspath(__file__))
    model_path = os.path.join(current, args.out, 'best_model.npz')
    model_setup = args.__dict__.copy()
    model_setup['model_path'] = model_path
    model_setup['datetime'] = current_datetime

    with open(os.path.join(args.out, 'setting.json'), 'w') as f:
        json.dump(model_setup, f, ensure_ascii=False)

    if vocab is not None:
        model_setup['vocab_path'] = os.path.join(args.out, 'vocab.json')
        with open(os.path.join(args.out, 'vocab.json'), 'w') as f:
            json.dump(vocab, f, ensure_ascii=False)

    model_setup['label_path'] = os.path.join(args.out, 'label.json')
    with open(os.path.join(args.out, 'label.json'), 'w') as f:
        json.dump(label_dic, f, ensure_ascii=False)

    model_setup['data_path'] = os.path.join(args.out, 'encoded_data.pkl')
    import pickle
    all_data = {'train': train, 'dev': dev, 'test': test}
    with open(model_setup['data_path'], mode='wb') as f:
        pickle.dump(all_data, f)

    get_model_fn = get_model_fns[args.dataset]
    setting = args.__dict__.copy()
    if args.merge_label:
        model = get_model_fn(args.model, setting, args.out, num_class=2)
    else:
        model = get_model_fn(args.model, setting, args.out)

    if args.gpu >= 0:
        model.to_gpu(args.gpu)

    if args.pretrained_model is not None:
        serializers.load_npz(args.pretrained_model, model)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.005))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/acc',
        'validation/main/acc', 'elapsed_time'
    ]),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, filename='model_epoch_{.updater.epoch}.npz'),
                   trigger=(10, 'epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=1))

    eval_model = model.copy()
    val_iter = chainer.iterators.SerialIterator(dev,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)
    trainer.extend(
        extensions.Evaluator(val_iter,
                             eval_model,
                             converter=convert,
                             device=args.gpu))
    record_trigger = training.triggers.MaxValueTrigger('validation/main/acc',
                                                       (1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model, 'best_model.npz'),
                   trigger=record_trigger)

    if not os.path.isdir(args.out):
        os.makedirs(args.out)
    current = os.path.dirname(os.path.abspath(__file__))
    model_path = os.path.join(current, args.out, 'best_model.npz')
    model_setup = args.__dict__.copy()
    model_setup['model_path'] = model_path
    model_setup['datetime'] = current_datetime

    with open(os.path.join(args.out, 'setting.json'), 'w') as f:
        json.dump(model_setup, f, ensure_ascii=False)

    if vocab is None:
        model_setup['vocab_path'] = os.path.join(args.out, 'vocab.json')
        with open(os.path.join(args.out, 'vocab.json'), 'w') as f:
            json.dump(vocab, f, ensure_ascii=False)

    model_setup['label_path'] = os.path.join(args.out, 'label.json')
    with open(os.path.join(args.out, 'label.json'), 'w') as f:
        json.dump(label_dic, f, ensure_ascii=False)

    model_setup['data_path'] = os.path.join(args.out, 'encoded_data.pkl')
    import pickle
    all_data = {'train': train, 'dev': dev, 'test': test}
    with open(model_setup['data_path'], mode='wb') as f:
        pickle.dump(all_data, f)

    print('start training')
    trainer.run()
def run_offline_smd(num_steps,
                    init_lr,
                    init_decay,
                    meta_lr,
                    num_meta_steps,
                    momentum=MOMENTUM,
                    effective_lr=False,
                    negative_momentum=False,
                    pretrain_ckpt=None,
                    output_fname=None,
                    seed=0):
    """Run offline SMD experiments.

    Args:
        init_lr: Initial learning rate.
        init_decay: Initial decay constant.
        data_list: List of tuples of inputs and labels.
        meta_lr: Float. Meta descent learning rate.
        num_meta_steps: Int. Number of meta descent steps.
        momentum: Float. Momentum.
        effective_lr: Bool. Whether to optimize in the effective LR space.
        negative_momentum: Bool. Whether to optimize in the negative momentum space.
    """
    bsize = BATCH_SIZE
    if output_fname is not None:
        log_folder = os.path.dirname(output_fname)
    else:
        log_folder = os.path.join('results', 'mnist', 'offline', 'smd')
        log_folder = os.path.join(log_folder, _get_run_number(log_folder))
    if not os.path.exists(log_folder):
        os.makedirs(log_folder)
    with tf.Graph().as_default(), tf.Session() as sess:
        dataset = get_dataset('mnist')
        exp_logger = _get_exp_logger(sess, log_folder)
        if effective_lr:
            init_lr_ = init_lr / float(1.0 - momentum)
        else:
            init_lr_ = init_lr

        if negative_momentum:
            init_mom_ = 1.0 - momentum
        else:
            init_mom_ = momentum

        config = get_mnist_mlp_config(
            init_lr_,
            init_mom_,
            decay=init_decay,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
        x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x")
        y = tf.placeholder(tf.int64, [None], name="y")
        with tf.name_scope('Train'):
            with tf.variable_scope('Model'):
                model = get_mnist_mlp_model(
                    config,
                    x,
                    y,
                    optimizer='momentum_inv_decay',
                    training=True)
        all_vars = tf.global_variables()
        var_to_restore = list(
            filter(lambda x: 'momentum' not in x.name.lower(), all_vars))
        var_to_restore = list(
            filter(lambda x: 'global_step' not in x.name.lower(),
                   var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'lr' not in x.name.lower(), var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'mom' not in x.name.lower(), var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'decay' not in x.name.lower(), var_to_restore))
        saver = tf.train.Saver(var_to_restore)
        rnd = np.random.RandomState(seed)

        hp_dict = {'lr': init_lr, 'decay': init_decay}
        hp_names = hp_dict.keys()
        hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name])
                            for hp_name in hp_names])
        grads = model.optimizer.grads
        accumulators = model.optimizer.accumulators
        new_accumulators = model.optimizer.new_accumulators
        loss = model.cost

        # Build look ahead graph.
        look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads(
            hyperparams, grads, accumulators, new_accumulators, loss)

        # Meta optimizer, use Adam on the log space.
        # meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr))
        meta_opt = LogOptimizer(tf.train.MomentumOptimizer(meta_lr, 0.9))
        hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names]
        hp_grads_dict = {
            'lr': tf.placeholder(tf.float32, [], name='lr_grad'),
            'decay': tf.placeholder(tf.float32, [], name='decay_grad')
        }
        hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names]
        hp_grads_and_vars = list(zip(hp_grads_plh, hp))
        cgrad = {'lr': (-1e1, 1e1), 'decay': (-1e1, 1e1)}
        cval = {'lr': (1e-4, 1e1), 'decay': (1e-4, 1e3)}
        cgrad_ = [cgrad[hp_name] for hp_name in hp_names]
        cval_ = [cval[hp_name] for hp_name in hp_names]
        meta_train_op = meta_opt.apply_gradients(
            hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_)

        if output_fname is not None:
            msg = '{} exists, please remove previous experiment data.'.format(
                output_fname)
            assert not os.path.exists(output_fname), msg
            log.info('Writing to {}'.format(output_fname))
            with open(output_fname, 'w') as f:
                f.write('Step,LR,Mom,Decay,Loss\n')

        # Initialize all variables.
        sess.run(tf.global_variables_initializer())
        var_list = tf.global_variables()
        if pretrain_ckpt is not None:
            saver.restore(sess, pretrain_ckpt)
        ckpt = build_checkpoint(var_list)
        write_op = write_checkpoint(ckpt, var_list)
        read_op = read_checkpoint(ckpt, var_list)
        sess.run(write_op)

        # Progress bar.
        it = tqdm(
            six.moves.xrange(num_meta_steps),
            ncols=0,
            desc='look_{}_ilr_{:.0e}_decay_{:.0e}'.format(
                num_steps, init_lr, init_decay))

        for run in it:
            # Stochastic data list makes the SMD converge faster.
            data_list = [
                dataset.next_batch(bsize)
                for step in six.moves.xrange(num_steps)
            ]
            eval_data_list = [
                dataset.next_batch(bsize)
                for step in six.moves.xrange(NUM_TRAIN // bsize)
            ]
            # Run meta descent step.
            cost, hp_dict = meta_step(sess, model, data_list, look_ahead_ops,
                                      hp_grad_ops, hp_grads_plh, meta_train_op,
                                      eval_data_list)

            # Early stop if hits NaN.
            if np.isnan(cost):
                break

            # Restore parameters.
            sess.run(read_op)
            for hpname, hpval in hp_dict.items():
                model.optimizer.assign_hyperparam(sess, hpname, hpval)

            # Read out hyperparameters in normal parameterization.
            if negative_momentum:
                mom = 1 - hp_dict['mom']
            else:
                mom = hp_dict['mom']
            if effective_lr:
                lr = hp_dict['lr'] * (1 - mom)
            else:
                lr = hp_dict['lr']

            # Write to logs.
            if output_fname is not None:
                with open(output_fname, 'a') as f:
                    f.write('{:d},{:f},{:f},{:f},{:f}\n'.format(
                        run, lr, hp_dict['mom'], hp_dict['decay'], cost))
            # Log to TensorBoard.
            exp_logger.log(run, 'lr', lr)
            exp_logger.log(run, 'decay', hp_dict['decay'])
            exp_logger.log(run, 'log loss', np.log10(cost))
            exp_logger.flush()

            # Update progress bar.
            it.set_postfix(
                lr='{:.3e}'.format(lr),
                decay='{:.3e}'.format(hp_dict['decay']),
                loss='{:.3e}'.format(cost))

        exp_logger.close()
Beispiel #30
0
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            key: torch.tensor(val[idx])
            for key, val in self.encodings.items()
        }
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


train_encodings, train_labels, val_encodings, val_labels = get_dataset()

train_dataset = LeetcodeDataset(train_encodings, train_labels)
val_dataset = LeetcodeDataset(val_encodings, val_labels)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')

model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.to(device)
model.train()

train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)

optim = AdamW(model.parameters(), lr=5e-5)