예제 #1
0
 def compute_fold(ARG):
     cv_train_idx, cv_val_idx = ARG
     train = slice_data(data_stuff, cv_train_idx, N_inputs)
     val = slice_data(data_stuff, cv_val_idx, N_inputs)
     model, metric = make_model(input_shape, data_stuff[1].shape,
                                config['model_params'])
     logger.info('Model build')
     result = model.fit(train[0],
                        train[1],
                        sample_weight=train[2],
                        validation_data=val[:2],
                        **config['training_cfg'])
     return result.history, metric
예제 #2
0
def make_cv_on_current_set(data_stuff, indices_to_use, config):
    '''On current set, perform cv and extract model, score and best iteration '''
    history = []
    N_inputs, input_shape = get_input_shapes(data_stuff, config)

    #for cv_train_idx, cv_val_idx in pu_cv_splitter(indices_to_use, data_stuff[1]):
    def compute_fold(ARG):
        cv_train_idx, cv_val_idx = ARG
        train = slice_data(data_stuff, cv_train_idx, N_inputs)
        val = slice_data(data_stuff, cv_val_idx, N_inputs)
        model, metric = make_model(input_shape, data_stuff[1].shape,
                                   config['model_params'])
        logger.info('Model build')
        result = model.fit(train[0],
                           train[1],
                           sample_weight=train[2],
                           validation_data=val[:2],
                           **config['training_cfg'])
        return result.history, metric

    #   history.append(result.history)

    splits = pu_cv_splitter(indices_to_use, data_stuff[1])
    if args.nproc <= 1:
        history = [compute_fold(ARG) for ARG in splits]
    else:
        p = Pool(args.nproc)
        history = p.map(compute_fold, splits)
        del p

    metric = history[0][1]
    history = [ARG[0] for ARG in history]

    #====== Average ===============
    cv_av, cv_std = make_av_std(history, metric)
    cv_val_av, cv_val_std = make_av_std(history, 'val_%s' % metric)
    best_cv_idx = cv_val_av.argmax()

    #====== test ===========
    logger.info('Making Final model')
    train_stuff = slice_data(data_stuff, indices_to_use, N_inputs)
    model, metric = make_model(input_shape, data_stuff[1].shape,
                               config['model_params'])
    logger.info('Model build')
    saver = SaveSelected(best_cv_idx)
    result = model.fit(train_stuff[0],
                       train_stuff[1],
                       sample_weight=train_stuff[2],
                       callbacks=[saver],
                       **config['training_cfg'])
    saver.reset()

    return {
        'train': (cv_av, cv_std),
        'val': (cv_val_av, cv_val_std),
        'it': best_cv_idx,
        'model': model
    }
예제 #3
0
def extract_reliable_negatives_fuselier(model,
                                        data_stuff,
                                        negative_idx,
                                        th=0.5,
                                        yshape=2,
                                        N_inputs=1):
    negative_stuff = slice_data(data_stuff, negative_idx, N_inputs)
    drugability = model.predict(negative_stuff[0])
    if yshape == 2:
        drugability = drugability[:, 1]
    new_reliable_negatives_internal = np.where(drugability < th)[0]
    new_reliable_negatives = negative_idx[new_reliable_negatives_internal]

    return new_reliable_negatives, make_stats_from_vector(
        drugability), np.where(drugability > 0.5, 1, 0).sum()
예제 #4
0
batch_size = 100  ## add config?? later??
epochs = args.epochs
history = []

N_inputs = config['model_params'].get('num_inputs', 1)
if N_inputs == 1:
    input_shape = x.shape[1]
else:
    input_shape = [arr.shape for arr in x]

data_shapes = config.get('data_shapes', 'none')
if data_shapes != 'none':
    input_shape = data_shapes

for cv_train_idx, cv_val_idx in cv_splits:
    train = slice_data(data_stuff, cv_train_idx, N_inputs)
    val = slice_data(data_stuff, cv_val_idx, N_inputs)
    if is_multitask:
        logger.info('Train Y: %s' % str(np.sum(train[1], axis=1)))
        for i, vy in enumerate(train[1]):
            key = 'out%i' % i
            train[2][key] = balance_masked_weights(vy, train[2][key])
            val[2][key] = balance_masked_weights(val[1][i], val[2][key])
    else:
        train[2] = scale_weights(train[1], train[2], args.scale_positive)
        val[2] = scale_weights(val[1], val[2], args.scale_positive)
        logger.info('Train Y: %s' % str(np.sum(train[1], axis=0)))
    model, metric = make_model(input_shape, np.shape(y),
                               config['model_params'])
    logger.info('Model build')
    result = model.fit(train[0],