def main():

    print('Available GPUs', get_available_gpus())

    tf.logging.set_verbosity(tf.logging.DEBUG)
    loader = p1b3.DataLoader(val_split=VAL_SPLIT,
                             test_cell_split=TEST_CELL_SPLIT,
                             cell_features=['expression'],
                             drug_features=['descriptors'],
                             feature_subsample=FEATURE_SUBSAMPLE,
                             scaling=SCALING,
                             scramble=False,
                             min_logconc=MIN_LOGCONC,
                             max_logconc=MAX_LOGCONC,
                             subsample='naive_balancing',
                             category_cutoffs=CATEGORY_CUTOFFS)

    tf.logging.info('Loader input dim: {}'.format(loader.input_dim))
    gen_shape = None

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=BATCH_SIZE,
                                   shape=gen_shape,
                                   name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=BATCH_SIZE,
                                 shape=gen_shape,
                                 name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='test_gen').flow()

    # Prep for distribution using mirrorred strategy
    devices = [
        "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3"
    ]
    distribution = tf.contrib.distribute.MirroredStrategy(
        devices)  # alternately specify num_gpus
    config = tf.estimator.RunConfig(train_distribute=distribution)

    # Create the Estimator
    p1b3_regressor = tf.estimator.Estimator(
        model_fn=fc_model_fn,
        model_dir="/tmp/fc_regression_model",
        config=config)

    # Train & eval
    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen))
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen))
    tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
Beispiel #2
0
 def __init__(self,
              data_loader,
              partition='train',
              ndata=None,
              lshape=None,
              datatype=np.float32):
     """
     During initialization, the input data will be converted to backend tensor objects
     (e.g. CPUTensor or GPUTensor). If the backend uses the GPU, the data is copied over to the
     device.
     """
     super(ConcatDataIter, self).__init__()
     self.data = data_loader
     self.gen = p1b3.DataGenerator(data_loader,
                                   partition=partition,
                                   batch_size=self.be.bsz,
                                   concat=True)
     self.ndata = ndata or self.gen.num_data
     assert self.ndata >= self.be.bsz
     self.datatype = datatype
     self.gen = self.gen.flow()
     self.start = 0
     self.ybuf = None
     self.shape = lshape or data_loader.input_dim
     self.lshape = lshape
Beispiel #3
0
def main():
    tf.logging.set_verbosity(tf.logging.DEBUG)
    loader = p1b3.DataLoader(val_split=VAL_SPLIT,
                             test_cell_split=TEST_CELL_SPLIT,
                             cell_features=['expression'],
                             drug_features=['descriptors'],
                             feature_subsample=FEATURE_SUBSAMPLE,
                             scaling=SCALING,
                             scramble=False,
                             min_logconc=MIN_LOGCONC,
                             max_logconc=MAX_LOGCONC,
                             subsample='naive_balancing',
                             category_cutoffs=CATEGORY_CUTOFFS)

    tf.logging.info('Loader input dim: {}'.format(loader.input_dim))
    gen_shape = None

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=BATCH_SIZE,
                                   shape=gen_shape,
                                   name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=BATCH_SIZE,
                                 shape=gen_shape,
                                 name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='test_gen').flow()

    # Create the Estimator
    p1b3_regressor = tf.estimator.Estimator(
        model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model")

    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen))
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen))
    tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
 def __init__(self,
              data_loader,
              partition='train',
              batch_size=32,
              num_data=None,
              shape=None):
     super(ConcatDataIter, self).__init__()
     self.data = data_loader
     self.batch_size = batch_size
     self.gen = p1b3.DataGenerator(data_loader,
                                   partition=partition,
                                   batch_size=batch_size,
                                   shape=shape,
                                   concat=True)
     self.num_data = num_data or self.gen.num_data
     self.cursor = 0
     self.gen = self.gen.flow()
def main():
    parser = get_parser()
    args = parser.parse_args()
    print('Args:', args)

    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')

    ext = extension_from_parameters(args)

    loader = p1b3.DataLoader(feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             drug_features=args.drug_features,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    gen_shape = None
    out_dim = 1

    model = Sequential()
    if args.convolution and args.convolution[0]:
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = args.convolution[i]
            filter_len = args.convolution[i + 1]
            stride = args.convolution[i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            if args.locally_connected:
                model.add(
                    LocallyConnected1D(nb_filter,
                                       filter_len,
                                       subsample_length=stride,
                                       input_shape=(loader.input_dim, 1),
                                       activation=args.activation))
            else:
                model.add(
                    Convolution1D(nb_filter,
                                  filter_len,
                                  subsample_length=stride,
                                  input_shape=(loader.input_dim, 1),
                                  activation=args.activation))
            if args.pool:
                model.add(MaxPooling1D(pool_length=args.pool))
        model.add(Flatten())

    for layer in args.dense:
        if layer:
            model.add(
                Dense(layer,
                      input_dim=loader.input_dim,
                      activation=args.activation))
            if args.drop:
                model.add(Dropout(args.drop))
    model.add(Dense(out_dim))

    model.summary()
    model.compile(loss=args.loss, optimizer=args.optimizer)

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=args.batch_size,
                                   shape=gen_shape).flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=args.batch_size,
                                 shape=gen_shape).flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=args.batch_size,
                                  shape=gen_shape).flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=args.batch_size,
                                  shape=gen_shape).flow()

    train_samples = int(loader.n_train / args.batch_size) * args.batch_size
    val_samples = int(loader.n_val / args.batch_size) * args.batch_size
    test_samples = int(loader.n_test / args.batch_size) * args.batch_size

    train_samples = args.train_samples if args.train_samples else train_samples
    val_samples = args.val_samples if args.val_samples else val_samples

    checkpointer = ModelCheckpoint(filepath=args.save + '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger()
    history = MyLossHistory(progbar=progbar,
                            val_gen=val_gen2,
                            test_gen=test_gen,
                            val_samples=val_samples,
                            test_samples=test_samples,
                            metric=args.loss,
                            category_cutoffs=args.category_cutoffs,
                            ext=ext,
                            pre=args.save)

    model.fit_generator(train_gen,
                        train_samples,
                        nb_epoch=args.epochs,
                        validation_data=val_gen,
                        nb_val_samples=val_samples,
                        verbose=0,
                        callbacks=[checkpointer, history, progbar],
                        pickle_safe=True,
                        nb_worker=args.workers)
Beispiel #6
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    ext = extension_from_parameters(args)

    logfile = args.logfile if args.logfile else os.path.join(
        args.out_dir, args.save)+ext+'.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(logging.Formatter(
        "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if args.verbose else logging.INFO)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.addHandler(sh)

    logger.info('Args: {}'.format(args))

    loader = p1b3.DataLoader(val_split=args.val_split,
                             test_cell_split=args.test_cell_split,
                             cell_features=args.cell_features,
                             drug_features=args.drug_features,
                             feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    print('Loader input dim', loader.input_dim)

    gen_shape = None
    out_dim = 1

    model = Sequential()
    if args.conv and args.conv[0]:
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(args.conv), 3))
        for l, i in enumerate(layer_list):
            filters = args.conv[i]
            filter_len = args.conv[i+1]
            stride = args.conv[i+2]
            if filters <= 0 or filter_len <= 0 or stride <= 0:
                break
            if args.locally_connected:
                model.add(LocallyConnected1D(filters, filter_len,
                                             strides=stride, input_shape=(loader.input_dim, 1)))
            else:
                model.add(Conv1D(filters, filter_len, strides=stride,
                                 input_shape=(loader.input_dim, 1)))
            if args.batch_normalization:
                model.add(BatchNormalization())
            model.add(Activation(args.activation))
            if args.pool:
                model.add(MaxPooling1D(pool_size=args.pool))
        model.add(Flatten())

    for layer in args.dense:
        if layer:
            model.add(Dense(layer, input_dim=loader.input_dim))
            if args.batch_normalization:
                model.add(BatchNormalization())
            model.add(Activation(args.activation))
            if args.drop:
                model.add(Dropout(args.drop))
    model.add(Dense(out_dim))

    model.summary()
    logger.debug('Model: {}'.format(model.to_json()))

    parallel_model = multi_gpu_model(model, gpus=4, cpu_merge=False)
    parallel_model.compile(loss=args.loss, optimizer=args.optimizer)

    train_gen = p1b3.DataGenerator(
        loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow()
    val_gen = p1b3.DataGenerator(
        loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(
        loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(
        loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow()

    train_steps = int(loader.n_train/args.batch_size)
    val_steps = int(loader.n_val/args.batch_size)
    test_steps = int(loader.n_test/args.batch_size)

    train_steps = args.train_steps if args.train_steps else train_steps
    val_steps = args.val_steps if args.val_steps else val_steps
    test_steps = args.test_steps if args.test_steps else test_steps

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True)
    progbar = MyProgbarLogger(train_steps * args.batch_size)
    history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen,
                            val_steps=val_steps, test_steps=test_steps,
                            metric=args.loss, category_cutoffs=args.category_cutoffs,
                            ext=ext, pre=os.path.join(args.out_dir, args.save))

    tensorboard = TensorBoard(
        log_dir="{}/{}".format(os.path.join(args.out_dir), time()))
    parallel_model.fit_generator(train_gen, train_steps,
                        epochs=args.epochs,
                        validation_data=val_gen,
                        validation_steps=val_steps,
                        verbose=0,
                        callbacks=[checkpointer, history,
                                   progbar, tensorboard],
                        pickle_safe=True,
                        workers=args.workers)
def main():
    parser = get_parser()
    args = parser.parse_args()

    ext = extension_from_parameters(args)

    logfile = args.logfile if args.logfile else os.path.join(
        args.out_dir, args.save)+ext+'.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(logging.Formatter(
        "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if args.verbose else logging.INFO)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.addHandler(sh)

    logger.info('Args: {}'.format(args))

    loader = p1b3.DataLoader(val_split=args.val_split,
                             test_cell_split=args.test_cell_split,
                             cell_features=args.cell_features,
                             drug_features=args.drug_features,
                             feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    print('Loader input dim', loader.input_dim)

    gen_shape = None
    out_dim = 1

    X = tf.placeholder(tf.float32, [None, loader.input_dim])
    Y_ = tf.placeholder(tf.float32, [None, 1])
    Y = model(X, loader.input_dim)

    set_trace()

    train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size,
                                   shape=gen_shape, name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size,
                                 shape=gen_shape, name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size,
                                  shape=gen_shape, name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size,
                                  shape=gen_shape, name='test_gen').flow()

    # objective = tf.reduce_mean(tf.square(Y - Y_))
    # train = tf.train.GradientDescentOptimizer(0.001).minimize(objective)
    mse = tf.losses.mean_squared_error(Y_, Y)  # the loss function
    train = tf.train.GradientDescentOptimizer(0.001).minimize(mse)
    
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        for i, (X_batch, y_batch) in enumerate(train_gen):
            feed_dict = {X: X_batch.reshape(args.batch_size, loader.input_dim),
                         Y_: y_batch.reshape(args.batch_size, 1)}
            # cost, _ = sess.run([objective, train], feed_dict)
            cost, _ = sess.run([mse, train], feed_dict)
            if i % 50 == 0:
                print('Batch :', i, 'Cost :', cost)

    train_steps = int(loader.n_train/args.batch_size)
    val_steps = int(loader.n_val/args.batch_size)
    test_steps = int(loader.n_test/args.batch_size)

    train_steps = args.train_steps if args.train_steps else train_steps
    val_steps = args.val_steps if args.val_steps else val_steps
    test_steps = args.test_steps if args.test_steps else test_steps

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True)
    progbar = MyProgbarLogger(train_steps * args.batch_size)
    history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen,
                            val_steps=val_steps, test_steps=test_steps,
                            metric=args.loss, category_cutoffs=args.category_cutoffs,
                            ext=ext, pre=os.path.join(args.out_dir, args.save))
Beispiel #8
0
def run(gParameters):
    """
    Runs the model using the specified set of parameters

    Args:
       gParameters: a python dictionary containing the parameters (e.g. epoch)
       to run the model with.
    """
    #
    if 'dense' in gParameters:
        dval = gParameters['dense']
        if type(dval) != list:
            res = list(dval)
            #try:
            #is_str = isinstance(dval, basestring)
            #except NameError:
            #is_str = isinstance(dval, str)
            #if is_str:
            #res = str2lst(dval)
            gParameters['dense'] = res
        print(gParameters['dense'])

    if 'conv' in gParameters:
        #conv_list = p1_common.parse_conv_list(gParameters['conv'])
        #cval = gParameters['conv']
        #try:
        #is_str = isinstance(cval, basestring)
        #except NameError:
        #is_str = isinstance(cval, str)
        #if is_str:
        #res = str2lst(cval)
        #gParameters['conv'] = res
        print('Conv input', gParameters['conv'])
    # print('Params:', gParameters)
    # Construct extension to save model
    ext = benchmark.extension_from_parameters(gParameters, '.keras')
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else gParameters['output_dir'] + ext + '.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(
        logging.Formatter("[%(asctime)s %(process)d] %(message)s",
                          datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO)

    benchmark.logger.setLevel(logging.DEBUG)
    benchmark.logger.addHandler(fh)
    benchmark.logger.addHandler(sh)
    benchmark.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = benchmark.DataLoader(
        seed=seed,
        dtype=gParameters['data_type'],
        val_split=gParameters['val_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define model architecture
    gen_shape = None
    out_dim = 1

    model = Sequential()
    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                model.add(
                    Dense(layer,
                          input_dim=loader.input_dim,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias))
                if gParameters['batch_normalization']:
                    model.add(BatchNormalization())
                model.add(Activation(gParameters['activation']))
                if gParameters['dropout']:
                    model.add(Dropout(gParameters['dropout']))
    else:  # Build convolutional layers
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(gParameters['conv'])))
        lc_flag = False
        if 'locally_connected' in gParameters:
            lc_flag = True

        for l, i in enumerate(layer_list):
            if i == 0:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               input_dim=loader.input_dim,
                               locally_connected=lc_flag)
            else:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               locally_connected=lc_flag)
            if gParameters['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Activation(gParameters['activation']))
            if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=gParameters['pool']))
        model.add(Flatten())

    model.add(Dense(out_dim))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # Compile and display model
    model.compile(loss=gParameters['loss'], optimizer=optimizer)
    model.summary()
    benchmark.logger.debug('Model: {}'.format(model.to_json()))

    train_gen = benchmark.DataGenerator(
        loader,
        batch_size=gParameters['batch_size'],
        shape=gen_shape,
        name='train_gen',
        cell_noise_sigma=gParameters['cell_noise_sigma']).flow()
    val_gen = benchmark.DataGenerator(loader,
                                      partition='val',
                                      batch_size=gParameters['batch_size'],
                                      shape=gen_shape,
                                      name='val_gen').flow()
    val_gen2 = benchmark.DataGenerator(loader,
                                       partition='val',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='val_gen2').flow()
    test_gen = benchmark.DataGenerator(loader,
                                       partition='test',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='test_gen').flow()

    train_steps = int(loader.n_train / gParameters['batch_size'])
    val_steps = int(loader.n_val / gParameters['batch_size'])
    test_steps = int(loader.n_test / gParameters['batch_size'])

    if 'train_steps' in gParameters:
        train_steps = gParameters['train_steps']
    if 'val_steps' in gParameters:
        val_steps = gParameters['val_steps']
    if 'test_steps' in gParameters:
        test_steps = gParameters['test_steps']

    checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] +
                                   '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger(train_steps * gParameters['batch_size'])
    loss_history = MyLossHistory(
        progbar=progbar,
        val_gen=val_gen2,
        test_gen=test_gen,
        val_steps=val_steps,
        test_steps=test_steps,
        metric=gParameters['loss'],
        category_cutoffs=gParameters['category_cutoffs'],
        ext=ext,
        pre=gParameters['output_dir'])

    # Seed random generator for training
    np.random.seed(seed)

    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)

    history = model.fit_generator(
        train_gen,
        train_steps,
        epochs=gParameters['epochs'],
        validation_data=val_gen,
        validation_steps=val_steps,
        verbose=0,
        callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor],
    )

    benchmark.logger.removeHandler(fh)
    benchmark.logger.removeHandler(sh)

    return history