Ejemplo n.º 1
0
    # train model and save weights
    train(model, {
        "x": comp_trainX,
        "y": comp_trainY
    }, {
        "x": comp_valX,
        "y": comp_valY
    },
          weight_file,
          epochs=EPOCHS)

    # load weights from file
    model.load_weights(weight_file)

    # test competition data
    comp_eval = test_model(model, comp_testX, comp_testY)

    # cross validate transfer learning
    print(f'fold {i+1}')
    pilot_fold_avg = {'acc': 0, 'bal': 0, 'kap': 0}
    for i, (train_index, test_index) in enumerate(skf.split(_pilotX, _pilotY)):
        # reset weights
        model.load_weights(weight_file)

        # stratified train-val-test split for pilot data
        pilot_trainX, pilot_testX = _pilotX[train_index], _pilotX[test_index]
        pilot_trainY, pilot_testY = _pilotY[train_index], _pilotY[test_index]
        pilot_trainX, pilot_valX, pilot_trainY, pilot_valY = train_test_split(
            pilot_trainX,
            pilot_trainY,
            test_size=1 / (TRANSFER_FOLDS - 1),
Ejemplo n.º 2
0
                   F2=16,
                   dropoutType='Dropout')

    # compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    # train model and save weights
    # train(model, {"x": comp_trainX, "y": comp_trainY}, {"x": comp_valX, "y": comp_valY}, weight_file, epochs=EPOCHS)

    # load weights from file
    model.load_weights(weight_file)

    # test competition data
    comp_eval = test_model(model, comp_testX, comp_testY)

    # cross validate transfer learning
    print(f'fold {i+1}')
    pilot_fold_avg = {'acc': 0, 'bal': 0, 'kap': 0}
    for i, (train_index, test_index) in enumerate(skf.split(_pilotX, _pilotY)):
        # reset weights
        model.load_weights(weight_file)

        # stratified train-val-test split for pilot data
        pilot_trainX, pilot_testX = _pilotX[train_index], _pilotX[test_index]
        pilot_trainY, pilot_testY = _pilotY[train_index], _pilotY[test_index]
        pilot_trainX, pilot_valX, pilot_trainY, pilot_valY = train_test_split(
            pilot_trainX,
            pilot_trainY,
            test_size=1 / (TRANSFER_FOLDS - 1),
Ejemplo n.º 3
0
        if model_options['save']:
            saver = tf.train.Saver(var_list=variables, max_to_keep=100)

        if not os.path.exists('checkpoints/' + model_options['name']):
            os.system('mkdir -p checkpoints/' + model_options['name'])

        save_name = model_options['name']
        # -name is also used as the path to checkpoint file for testing

        if model_options['mode'] == 'test':
            saver = tf.train.Saver(var_list=variables, max_to_keep=100)
            model_options['start_from'] = 0
            saver.restore(sess, save_name)
            print 'Restored -', save_name
            test_model(model_options, sess, model_vars)

        if model_options['mode'] == 'save':
            saver.restore(sess, save_name)
            print 'Restored -', save_name
            saver.save(sess, save_name + '.eval_model')
            print 'Saved'

        if model_options['mode'] == 'train':
            save_name = './checkpoints/' + model_options[
                'name'] + '/' + model_options['name']
            minibatch_idx = -1
            epoch_idx = 0
            step = -1
            model_options['start_from'] = 0
            ######## UNCOMMENT AND CHANGE WHILE RESUMING ##############
            sys.argv[0], USAGE, USAGE_HINT)
        exit(-1)


if __name__ == '__main__':
    trainSet_clicks, trainSet_buys, testSet, model, num_recommended_items = getArgs(
    )
    clicks, buys, test = utils.load_data(trainSet_clicks, trainSet_buys,
                                         testSet)
    test_count = utils.parse_test(test)
    user_id = test_count['Session ID']
    source_items_map, test_items_map = utils.split_test(test_count)

    mod = MODELS.get(model)

    start_time = time.time()
    mod.learn(clicks, buys, price_threshold)
    print 'Training time: %s seconds' % (time.time() - start_time)

    model_data = mod.load_model(neighborhood_size)
    recommend_time = time.time()
    user_recommend_map = mod.recommend_items(model_data, user_id,
                                             source_items_map,
                                             num_recommended_items)
    print 'Average throughput: {} users per second'.\
        format(len(user_recommend_map) / (time.time() - recommend_time))
    precision, recall = test_model(user_recommend_map, test_items_map)

    print 'Precision for clicks items: {}'.format(precision)
    print 'Recall for clicks items: {}'.format(recall)
Ejemplo n.º 5
0
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        # train model and save weights
        train(model, {
            "x": pilot_trainX,
            "y": pilot_trainY
        }, {
            "x": pilot_valX,
            "y": pilot_valY
        },
              epochs=EPOCHS)

        # test pilot data
        pilot_eval = test_model(model, pilot_testX, pilot_testY)
        pilot_fold_avg = {
            k: pilot_fold_avg.get(k, 0) + pilot_eval.get(k, 0)
            for k in set(pilot_fold_avg) & set(pilot_eval)
        }

        print(f"\tpilot fold {i+1}:", sorted(pilot_eval.items()))

    pilot_avg = {
        k: pilot_avg.get(k, 0) + pilot_fold_avg.get(k, 0)
        for k in set(pilot_avg) & set(pilot_fold_avg)
    }

    print('avg from folds')
    print(
        'pilot:      ',
Ejemplo n.º 6
0
def crunch(surf_file, net, w, s, d, dataloader, loss_key, acc_key, comm, rank,
           args):
    """
        Calculate the loss values and accuracies of modified models in parallel
        using MPI reduce.
    """

    f = h5py.File(surf_file, 'r+' if rank == 0 else 'r')
    losses, accuracies = [], []
    xcoordinates = f['xcoordinates'][:]
    ycoordinates = f['ycoordinates'][:] if 'ycoordinates' in f.keys() else None

    if loss_key not in f.keys():
        shape = xcoordinates.shape if ycoordinates is None else (
            len(xcoordinates), len(ycoordinates))
        losses = -np.ones(shape=shape)
        accuracies = -np.ones(shape=shape)
        if rank == 0:
            f[loss_key] = losses
            f[acc_key] = accuracies
    else:
        losses = f[loss_key][:]
        accuracies = f[acc_key][:]

    # Generate a list of indices of 'losses' that need to be filled in.
    # The coordinates of each unfilled index (with respect to the direction vectors
    # stored in 'd') are stored in 'coords'.
    inds, coords, inds_nums = scheduler.get_job_indices(
        losses, xcoordinates, ycoordinates, comm)

    print('Computing %d values for rank %d' % (len(inds), rank))
    start_time = time.time()
    total_sync = 0.0

    criterion = nn.CrossEntropyLoss()
    if args.loss_name == 'mse':
        criterion = nn.MSELoss()

    net.cuda()
    # Loop over all uncalculated loss values
    for count, ind in enumerate(inds):
        # Get the coordinates of the loss value being calculated
        coord = coords[count]

        # evaluation.test_model(net, criterion, testloader, printing=True)

        # Load the weights corresponding to those coordinates into the net
        if args.dir_type == 'weights':
            # pdb.set_trace()
            net_plotter.set_weights(net, w, d, coord)
        elif args.dir_type == 'states':
            net_plotter.set_states(net, s, d, coord)

        # Record the time to compute the loss value
        loss_start = time.time()
        # loss, acc = evaluation.eval_loss(net, criterion, dataloader, args.cuda)
        loss, acc = evaluation.test_model(net, criterion, dataloader)
        loss_compute_time = time.time() - loss_start

        # Record the result in the local array
        losses.ravel()[ind] = loss
        accuracies.ravel()[ind] = acc

        # Send updated plot data to the master node
        syc_start = time.time()
        # pdb.set_trace()

        #following lines bypassed since mpi.reduce_max just returns the array itself if comm is none
        # losses     = mpi.reduce_max(losses)
        # accuracies = mpi.reduce_max(accuracies)

        syc_time = time.time() - syc_start
        total_sync += syc_time

        # Only the master node writes to the file - this avoids write conflicts
        if rank == 0:
            f[loss_key][:] = losses
            f[acc_key][:] = accuracies
            f.flush()

        print(
            'Evaluating rank %d  %d/%d  (%.1f%%)  coord=%s \t%s= %.3f \t%s=%.2f \ttime=%.2f \tsync=%.2f'
            % (rank, count, len(inds), 100.0 * count / len(inds), str(coord),
               loss_key, loss, acc_key, acc, loss_compute_time, syc_time))

        #reset weights
        net_plotter.set_weights(net, w, d, np.array([0., 0.]))
    # This is only needed to make MPI run smoothly. If this process has less work than
    # the rank0 process, then we need to keep calling reduce so the rank0 process doesn't block
    # for i in range(max(inds_nums) - len(inds)):
    #     losses = mpi.reduce_max(losses)
    #     accuracies = mpi.reduce_max(accuracies)

    total_time = time.time() - start_time
    print('Rank %d done!  Total time: %.2f Sync: %.2f' %
          (rank, total_time, total_sync))

    f.close()
Ejemplo n.º 7
0
    # download CIFAR10 if it does not exit
    if rank == 0 and args.dataset == 'cifar10':
        torchvision.datasets.CIFAR10(root=args.dataset + '/data',
                                     train=True,
                                     download=True)

    # mpi.barrier(comm)
    time.sleep(.05)

    trainloader, testloader = dataloader.load_dataset(
        args.dataset, args.datapath, args.batch_size, args.threads,
        args.raw_data, args.data_split, args.split_idx, args.trainloader,
        args.testloader)
    criterion = nn.CrossEntropyLoss()

    evaluation.test_model(net, criterion, testloader, printing=True)
    # exit()
    #--------------------------------------------------------------------------
    # Start the computation
    #--------------------------------------------------------------------------
    crunch(surf_file, net, w, s, d, trainloader, 'train_loss', 'train_acc',
           comm, rank, args)
    # crunch(surf_file, net, w, s, d, testloader, 'test_loss', 'test_acc', comm, rank, args)

    #--------------------------------------------------------------------------
    # Plot figures
    #--------------------------------------------------------------------------
    if args.plot and rank == 0:
        if args.y and args.proj_file:
            plot_2D.plot_contour_trajectory(surf_file, dir_file,
                                            args.proj_file, 'train_loss',
Ejemplo n.º 8
0
            (skf_comp_trainX, skf_comp_valX, skf_comp_testX), kernels=KERNELS)
        skf_comp_trainY, skf_comp_valY, skf_comp_testY = onehot(
            (skf_comp_trainY, skf_comp_valY, skf_comp_testY))

        # transter learn on comp data
        train(model, {
            "x": skf_comp_trainX,
            "y": skf_comp_trainY
        }, {
            "x": skf_comp_valX,
            "y": skf_comp_valY
        },
              epochs=TRANSFER_EPOCHS)

        # test comp data
        comp_eval = test_model(model, skf_comp_testX, skf_comp_testY)
        comp_fold_avg = {
            k: comp_fold_avg.get(k, 0) + comp_eval.get(k, 0)
            for k in set(comp_fold_avg) & set(comp_eval)
        }

        print(f"\tcomp fold {i+1}:", sorted(comp_eval.items()))

    comp_fold_avg = {
        k: comp_fold_avg.get(k, 0) / TRANSFER_FOLDS
        for k in set(comp_fold_avg)
    }

    ### cross validate pilot transfer learning
    pilot_fold_avg = {'acc': 0, 'bal': 0, 'kap': 0}
    print()
    results = []

    for PAI in TEST_MATERIALS:

        netD = DISCRIMINATOR().to(DEVICE)
        netG = GENERATOR().to(DEVICE)

        print("[Dataset] - " + DATASET + " -> Material number " + str(PAI))
        
        train_loader, valid_loader, test_loader = get_data_loaders(IMG_PATH, DATASET, test_material = PAI, img_size = IMG_SIZE, batch_size = BATCH_SIZE, croped=True, unseen_attack=UNSEEN_ATTACK)

        #netD, train_history = fit((netD, netG), DATASET, PAI, (train_loader, valid_loader), EPOCHS, EPOCHS_WITH_MATCHER, DEVICE, with_generator = USE_GENERATOR)

        netD, train_history = fit((netD, netG), DATASET, PAI, (train_loader, valid_loader), EPOCHS, EPOCHS_WITH_MATCHER, DEVICE, with_generator = USE_GENERATOR, just_train_classifier = True)

        test_loss, test_acc, test_apcer, test_bpcer, test_eer, test_bpcer_apcer1, test_bpcer_apcer5, test_bpcer_apcer10, test_apcer1, test_apcer5, test_apcer10 = test_model(netD, test_loader, DEVICE)

        results.append((test_loss.item(), test_acc, test_apcer, test_bpcer, test_eer, test_bpcer_apcer1, test_bpcer_apcer5, test_bpcer_apcer10, test_apcer1, test_apcer5, test_apcer10))

    #PRINTS -------------------------------------------------------------------------------------

    # Compute average and std
    acc_array = np.array([i[1] for i in results])
    apcer_array = np.array([i[2] for i in results])
    bpcer_array = np.array([i[3] for i in results])
    eer_array = np.array([i[4] for i in results])
    bpcer_apcer1_array = np.array([i[5] for i in results])
    bpcer_apcer5_array = np.array([i[6] for i in results])
    bpcer_apcer10_array = np.array([i[7] for i in results])
    apcer1_array = np.array([i[8] for i in results])
    apcer5_array = np.array([i[9] for i in results])