def main():

    print('Available GPUs', get_available_gpus())

    tf.logging.set_verbosity(tf.logging.DEBUG)
    loader = p1b3.DataLoader(val_split=VAL_SPLIT,
                             test_cell_split=TEST_CELL_SPLIT,
                             cell_features=['expression'],
                             drug_features=['descriptors'],
                             feature_subsample=FEATURE_SUBSAMPLE,
                             scaling=SCALING,
                             scramble=False,
                             min_logconc=MIN_LOGCONC,
                             max_logconc=MAX_LOGCONC,
                             subsample='naive_balancing',
                             category_cutoffs=CATEGORY_CUTOFFS)

    tf.logging.info('Loader input dim: {}'.format(loader.input_dim))
    gen_shape = None

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=BATCH_SIZE,
                                   shape=gen_shape,
                                   name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=BATCH_SIZE,
                                 shape=gen_shape,
                                 name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='test_gen').flow()

    # Prep for distribution using mirrorred strategy
    devices = [
        "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3"
    ]
    distribution = tf.contrib.distribute.MirroredStrategy(
        devices)  # alternately specify num_gpus
    config = tf.estimator.RunConfig(train_distribute=distribution)

    # Create the Estimator
    p1b3_regressor = tf.estimator.Estimator(
        model_fn=fc_model_fn,
        model_dir="/tmp/fc_regression_model",
        config=config)

    # Train & eval
    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen))
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen))
    tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
Example #2
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    print('Args:', args)

    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')

    ext = extension_from_parameters(args)

    loader = p1b3.DataLoader(feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             drug_features=args.drug_features,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    # initializer = Gaussian(loc=0.0, scale=0.01)
    initializer = GlorotUniform()
    activation = get_function(args.activation)()

    layers = []
    reshape = None

    if args.convolution and args.convolution[0]:
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = args.convolution[i]
            filter_len = args.convolution[i+1]
            stride = args.convolution[i+2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation))
            if args.pool:
                layers.append(Pooling((1, args.pool)))

    for layer in args.dense:
        if layer:
            layers.append(Affine(nout=layer, init=initializer, activation=activation))
        if args.drop:
            layers.append(Dropout(keep=(1-args.drop)))
    layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity()))

    model = Model(layers=layers)

    train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype)
    val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype)

    cost = GeneralizedCost(get_function(args.loss)())
    optimizer = get_function(args.optimizer)()
    callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args)

    model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Example #3
0
def main():
    tf.logging.set_verbosity(tf.logging.DEBUG)
    loader = p1b3.DataLoader(val_split=VAL_SPLIT,
                             test_cell_split=TEST_CELL_SPLIT,
                             cell_features=['expression'],
                             drug_features=['descriptors'],
                             feature_subsample=FEATURE_SUBSAMPLE,
                             scaling=SCALING,
                             scramble=False,
                             min_logconc=MIN_LOGCONC,
                             max_logconc=MAX_LOGCONC,
                             subsample='naive_balancing',
                             category_cutoffs=CATEGORY_CUTOFFS)

    tf.logging.info('Loader input dim: {}'.format(loader.input_dim))
    gen_shape = None

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=BATCH_SIZE,
                                   shape=gen_shape,
                                   name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=BATCH_SIZE,
                                 shape=gen_shape,
                                 name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=BATCH_SIZE,
                                  shape=gen_shape,
                                  name='test_gen').flow()

    # Create the Estimator
    p1b3_regressor = tf.estimator.Estimator(
        model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model")

    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen))
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen))
    tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
def main():
    parser = get_parser()
    args = parser.parse_args()
    print('Args:', args)

    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')

    ext = extension_from_parameters(args)

    loader = p1b3.DataLoader(feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             drug_features=args.drug_features,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    gen_shape = None
    out_dim = 1

    model = Sequential()
    if args.convolution and args.convolution[0]:
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = args.convolution[i]
            filter_len = args.convolution[i + 1]
            stride = args.convolution[i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            if args.locally_connected:
                model.add(
                    LocallyConnected1D(nb_filter,
                                       filter_len,
                                       subsample_length=stride,
                                       input_shape=(loader.input_dim, 1),
                                       activation=args.activation))
            else:
                model.add(
                    Convolution1D(nb_filter,
                                  filter_len,
                                  subsample_length=stride,
                                  input_shape=(loader.input_dim, 1),
                                  activation=args.activation))
            if args.pool:
                model.add(MaxPooling1D(pool_length=args.pool))
        model.add(Flatten())

    for layer in args.dense:
        if layer:
            model.add(
                Dense(layer,
                      input_dim=loader.input_dim,
                      activation=args.activation))
            if args.drop:
                model.add(Dropout(args.drop))
    model.add(Dense(out_dim))

    model.summary()
    model.compile(loss=args.loss, optimizer=args.optimizer)

    train_gen = p1b3.DataGenerator(loader,
                                   batch_size=args.batch_size,
                                   shape=gen_shape).flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=args.batch_size,
                                 shape=gen_shape).flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=args.batch_size,
                                  shape=gen_shape).flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=args.batch_size,
                                  shape=gen_shape).flow()

    train_samples = int(loader.n_train / args.batch_size) * args.batch_size
    val_samples = int(loader.n_val / args.batch_size) * args.batch_size
    test_samples = int(loader.n_test / args.batch_size) * args.batch_size

    train_samples = args.train_samples if args.train_samples else train_samples
    val_samples = args.val_samples if args.val_samples else val_samples

    checkpointer = ModelCheckpoint(filepath=args.save + '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger()
    history = MyLossHistory(progbar=progbar,
                            val_gen=val_gen2,
                            test_gen=test_gen,
                            val_samples=val_samples,
                            test_samples=test_samples,
                            metric=args.loss,
                            category_cutoffs=args.category_cutoffs,
                            ext=ext,
                            pre=args.save)

    model.fit_generator(train_gen,
                        train_samples,
                        nb_epoch=args.epochs,
                        validation_data=val_gen,
                        nb_val_samples=val_samples,
                        verbose=0,
                        callbacks=[checkpointer, history, progbar],
                        pickle_safe=True,
                        nb_worker=args.workers)
def main():
    parser = get_parser()
    args = parser.parse_args()
    print('Args:', args)

    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')

    ext = extension_from_parameters(args)

    loader = p1b3.DataLoader(feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             drug_features=args.drug_features,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    net = mx.sym.Variable('concat_features')
    out = mx.sym.Variable('growth')

    if args.convolution and args.convolution[0]:
        net = mx.sym.Reshape(data=net,
                             shape=(args.batch_size, 1, loader.input_dim, 1))
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = args.convolution[i]
            filter_len = args.convolution[i + 1]
            stride = args.convolution[i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            net = mx.sym.Convolution(data=net,
                                     num_filter=nb_filter,
                                     kernel=(filter_len, 1),
                                     stride=(stride, 1))
            net = mx.sym.Activation(data=net, act_type=args.activation)
            if args.pool:
                net = mx.sym.Pooling(data=net,
                                     pool_type="max",
                                     kernel=(args.pool, 1),
                                     stride=(1, 1))
        net = mx.sym.Flatten(data=net)

    for layer in args.dense:
        if layer:
            net = mx.sym.FullyConnected(data=net, num_hidden=layer)
            net = mx.sym.Activation(data=net, act_type=args.activation)
        if args.drop:
            net = mx.sym.Dropout(data=net, p=args.drop)
    net = mx.sym.FullyConnected(data=net, num_hidden=1)
    net = mx.symbol.LinearRegressionOutput(data=net, label=out)

    plot_network(net, 'net' + ext)

    train_iter = ConcatDataIter(loader,
                                batch_size=args.batch_size,
                                num_data=args.train_samples)
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              batch_size=args.batch_size,
                              num_data=args.val_samples)

    devices = mx.cpu()
    if args.gpus:
        devices = [mx.gpu(i) for i in args.gpus]

    mod = mx.mod.Module(net,
                        data_names=('concat_features', ),
                        label_names=('growth', ),
                        context=devices)

    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    mod.fit(train_iter,
            eval_data=val_iter,
            eval_metric=args.loss,
            optimizer=args.optimizer,
            num_epoch=args.epochs,
            initializer=initializer,
            batch_end_callback=mx.callback.Speedometer(args.batch_size, 20))
Example #6
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    ext = extension_from_parameters(args)

    logfile = args.logfile if args.logfile else os.path.join(
        args.out_dir, args.save)+ext+'.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(logging.Formatter(
        "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if args.verbose else logging.INFO)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.addHandler(sh)

    logger.info('Args: {}'.format(args))

    loader = p1b3.DataLoader(val_split=args.val_split,
                             test_cell_split=args.test_cell_split,
                             cell_features=args.cell_features,
                             drug_features=args.drug_features,
                             feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    print('Loader input dim', loader.input_dim)

    gen_shape = None
    out_dim = 1

    model = Sequential()
    if args.conv and args.conv[0]:
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(args.conv), 3))
        for l, i in enumerate(layer_list):
            filters = args.conv[i]
            filter_len = args.conv[i+1]
            stride = args.conv[i+2]
            if filters <= 0 or filter_len <= 0 or stride <= 0:
                break
            if args.locally_connected:
                model.add(LocallyConnected1D(filters, filter_len,
                                             strides=stride, input_shape=(loader.input_dim, 1)))
            else:
                model.add(Conv1D(filters, filter_len, strides=stride,
                                 input_shape=(loader.input_dim, 1)))
            if args.batch_normalization:
                model.add(BatchNormalization())
            model.add(Activation(args.activation))
            if args.pool:
                model.add(MaxPooling1D(pool_size=args.pool))
        model.add(Flatten())

    for layer in args.dense:
        if layer:
            model.add(Dense(layer, input_dim=loader.input_dim))
            if args.batch_normalization:
                model.add(BatchNormalization())
            model.add(Activation(args.activation))
            if args.drop:
                model.add(Dropout(args.drop))
    model.add(Dense(out_dim))

    model.summary()
    logger.debug('Model: {}'.format(model.to_json()))

    parallel_model = multi_gpu_model(model, gpus=4, cpu_merge=False)
    parallel_model.compile(loss=args.loss, optimizer=args.optimizer)

    train_gen = p1b3.DataGenerator(
        loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow()
    val_gen = p1b3.DataGenerator(
        loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(
        loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(
        loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow()

    train_steps = int(loader.n_train/args.batch_size)
    val_steps = int(loader.n_val/args.batch_size)
    test_steps = int(loader.n_test/args.batch_size)

    train_steps = args.train_steps if args.train_steps else train_steps
    val_steps = args.val_steps if args.val_steps else val_steps
    test_steps = args.test_steps if args.test_steps else test_steps

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True)
    progbar = MyProgbarLogger(train_steps * args.batch_size)
    history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen,
                            val_steps=val_steps, test_steps=test_steps,
                            metric=args.loss, category_cutoffs=args.category_cutoffs,
                            ext=ext, pre=os.path.join(args.out_dir, args.save))

    tensorboard = TensorBoard(
        log_dir="{}/{}".format(os.path.join(args.out_dir), time()))
    parallel_model.fit_generator(train_gen, train_steps,
                        epochs=args.epochs,
                        validation_data=val_gen,
                        validation_steps=val_steps,
                        verbose=0,
                        callbacks=[checkpointer, history,
                                   progbar, tensorboard],
                        pickle_safe=True,
                        workers=args.workers)
def main():
    parser = get_parser()
    args = parser.parse_args()

    ext = extension_from_parameters(args)

    logfile = args.logfile if args.logfile else os.path.join(
        args.out_dir, args.save)+ext+'.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(logging.Formatter(
        "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if args.verbose else logging.INFO)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.addHandler(sh)

    logger.info('Args: {}'.format(args))

    loader = p1b3.DataLoader(val_split=args.val_split,
                             test_cell_split=args.test_cell_split,
                             cell_features=args.cell_features,
                             drug_features=args.drug_features,
                             feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    print('Loader input dim', loader.input_dim)

    gen_shape = None
    out_dim = 1

    X = tf.placeholder(tf.float32, [None, loader.input_dim])
    Y_ = tf.placeholder(tf.float32, [None, 1])
    Y = model(X, loader.input_dim)

    set_trace()

    train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size,
                                   shape=gen_shape, name='train_gen').flow()
    val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size,
                                 shape=gen_shape, name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size,
                                  shape=gen_shape, name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size,
                                  shape=gen_shape, name='test_gen').flow()

    # objective = tf.reduce_mean(tf.square(Y - Y_))
    # train = tf.train.GradientDescentOptimizer(0.001).minimize(objective)
    mse = tf.losses.mean_squared_error(Y_, Y)  # the loss function
    train = tf.train.GradientDescentOptimizer(0.001).minimize(mse)
    
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        for i, (X_batch, y_batch) in enumerate(train_gen):
            feed_dict = {X: X_batch.reshape(args.batch_size, loader.input_dim),
                         Y_: y_batch.reshape(args.batch_size, 1)}
            # cost, _ = sess.run([objective, train], feed_dict)
            cost, _ = sess.run([mse, train], feed_dict)
            if i % 50 == 0:
                print('Batch :', i, 'Cost :', cost)

    train_steps = int(loader.n_train/args.batch_size)
    val_steps = int(loader.n_val/args.batch_size)
    test_steps = int(loader.n_test/args.batch_size)

    train_steps = args.train_steps if args.train_steps else train_steps
    val_steps = args.val_steps if args.val_steps else val_steps
    test_steps = args.test_steps if args.test_steps else test_steps

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True)
    progbar = MyProgbarLogger(train_steps * args.batch_size)
    history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen,
                            val_steps=val_steps, test_steps=test_steps,
                            metric=args.loss, category_cutoffs=args.category_cutoffs,
                            ext=ext, pre=os.path.join(args.out_dir, args.save))
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    net = mx.sym.Variable('concat_features')
    out = mx.sym.Variable('growth')

    # Initialize weights and learning rule
    initializer_weights = p1_common_mxnet.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_mxnet.build_initializer(
        'constant', kerasDefaults, 0.)
    init = mx.initializer.Mixed(['bias', '.*'],
                                [initializer_bias, initializer_weights])

    activation = gParameters['activation']

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                net = mx.sym.FullyConnected(data=net, num_hidden=layer)
                net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['drop']:
                net = mx.sym.Dropout(data=net, p=gParameters['drop'])
    else:  # Build convolutional layers
        net = mx.sym.Reshape(data=net,
                             shape=(gParameters['batch_size'], 1,
                                    loader.input_dim, 1))
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            net = mx.sym.Convolution(data=net,
                                     num_filter=nb_filter,
                                     kernel=(filter_len, 1),
                                     stride=(stride, 1))
            net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['pool']:
                net = mx.sym.Pooling(data=net,
                                     pool_type="max",
                                     kernel=(gParameters['pool'], 1),
                                     stride=(1, 1))
        net = mx.sym.Flatten(data=net)

        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    net = mx.sym.FullyConnected(data=net, num_hidden=1)
    net = mx.symbol.LinearRegressionOutput(data=net, label=out)

    # Display model
    p1_common_mxnet.plot_network(net, 'net' + ext)

    # Define mxnet data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                batch_size=gParameters['batch_size'],
                                num_data=train_samples)
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              batch_size=gParameters['batch_size'],
                              num_data=val_samples)

    devices = mx.cpu()
    if gParameters['gpus']:
        devices = [mx.gpu(i) for i in gParameters['gpus']]

    mod = mx.mod.Module(net,
                        data_names=('concat_features', ),
                        label_names=('growth', ),
                        context=devices)

    # Define optimizer
    optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Seed random generator for training
    mx.random.seed(seed)

    freq_log = 1

    #initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    mod.fit(train_iter,
            eval_data=val_iter,
            eval_metric=gParameters['loss'],
            optimizer=optimizer,
            num_epoch=gParameters['epochs'],
            initializer=init,
            epoch_end_callback=mx.callback.Speedometer(
                gParameters['batch_size'], 20))
Example #9
0
def run(gParameters):
    """
    Runs the model using the specified set of parameters

    Args:
       gParameters: a python dictionary containing the parameters (e.g. epoch)
       to run the model with.
    """
    #
    if 'dense' in gParameters:
        dval = gParameters['dense']
        if type(dval) != list:
            res = list(dval)
            #try:
            #is_str = isinstance(dval, basestring)
            #except NameError:
            #is_str = isinstance(dval, str)
            #if is_str:
            #res = str2lst(dval)
            gParameters['dense'] = res
        print(gParameters['dense'])

    if 'conv' in gParameters:
        #conv_list = p1_common.parse_conv_list(gParameters['conv'])
        #cval = gParameters['conv']
        #try:
        #is_str = isinstance(cval, basestring)
        #except NameError:
        #is_str = isinstance(cval, str)
        #if is_str:
        #res = str2lst(cval)
        #gParameters['conv'] = res
        print('Conv input', gParameters['conv'])
    # print('Params:', gParameters)
    # Construct extension to save model
    ext = benchmark.extension_from_parameters(gParameters, '.keras')
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else gParameters['output_dir'] + ext + '.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(
        logging.Formatter("[%(asctime)s %(process)d] %(message)s",
                          datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO)

    benchmark.logger.setLevel(logging.DEBUG)
    benchmark.logger.addHandler(fh)
    benchmark.logger.addHandler(sh)
    benchmark.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = benchmark.DataLoader(
        seed=seed,
        dtype=gParameters['data_type'],
        val_split=gParameters['val_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define model architecture
    gen_shape = None
    out_dim = 1

    model = Sequential()
    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                model.add(
                    Dense(layer,
                          input_dim=loader.input_dim,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias))
                if gParameters['batch_normalization']:
                    model.add(BatchNormalization())
                model.add(Activation(gParameters['activation']))
                if gParameters['dropout']:
                    model.add(Dropout(gParameters['dropout']))
    else:  # Build convolutional layers
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(gParameters['conv'])))
        lc_flag = False
        if 'locally_connected' in gParameters:
            lc_flag = True

        for l, i in enumerate(layer_list):
            if i == 0:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               input_dim=loader.input_dim,
                               locally_connected=lc_flag)
            else:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               locally_connected=lc_flag)
            if gParameters['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Activation(gParameters['activation']))
            if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=gParameters['pool']))
        model.add(Flatten())

    model.add(Dense(out_dim))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # Compile and display model
    model.compile(loss=gParameters['loss'], optimizer=optimizer)
    model.summary()
    benchmark.logger.debug('Model: {}'.format(model.to_json()))

    train_gen = benchmark.DataGenerator(
        loader,
        batch_size=gParameters['batch_size'],
        shape=gen_shape,
        name='train_gen',
        cell_noise_sigma=gParameters['cell_noise_sigma']).flow()
    val_gen = benchmark.DataGenerator(loader,
                                      partition='val',
                                      batch_size=gParameters['batch_size'],
                                      shape=gen_shape,
                                      name='val_gen').flow()
    val_gen2 = benchmark.DataGenerator(loader,
                                       partition='val',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='val_gen2').flow()
    test_gen = benchmark.DataGenerator(loader,
                                       partition='test',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='test_gen').flow()

    train_steps = int(loader.n_train / gParameters['batch_size'])
    val_steps = int(loader.n_val / gParameters['batch_size'])
    test_steps = int(loader.n_test / gParameters['batch_size'])

    if 'train_steps' in gParameters:
        train_steps = gParameters['train_steps']
    if 'val_steps' in gParameters:
        val_steps = gParameters['val_steps']
    if 'test_steps' in gParameters:
        test_steps = gParameters['test_steps']

    checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] +
                                   '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger(train_steps * gParameters['batch_size'])
    loss_history = MyLossHistory(
        progbar=progbar,
        val_gen=val_gen2,
        test_gen=test_gen,
        val_steps=val_steps,
        test_steps=test_steps,
        metric=gParameters['loss'],
        category_cutoffs=gParameters['category_cutoffs'],
        ext=ext,
        pre=gParameters['output_dir'])

    # Seed random generator for training
    np.random.seed(seed)

    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)

    history = model.fit_generator(
        train_gen,
        train_steps,
        epochs=gParameters['epochs'],
        validation_data=val_gen,
        validation_steps=val_steps,
        verbose=0,
        callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor],
    )

    benchmark.logger.removeHandler(fh)
    benchmark.logger.removeHandler(sh)

    return history
Example #10
0
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                layers.append(
                    Affine(nout=layer,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))
            if gParameters['drop']:
                layers.append(Dropout(keep=(1 - gParameters['drop'])))
    else:  # Build convolutional layers
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    layers.append(
        Affine(nout=1,
               init=initializer_weights,
               bias=initializer_bias,
               activation=neon.transforms.Identity()))

    # Build model
    model = Model(layers=layers)

    # Define neon data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                ndata=train_samples,
                                lshape=reshape,
                                datatype=gParameters['datatype'])
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              ndata=val_samples,
                              lshape=reshape,
                              datatype=gParameters['datatype'])

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(model, eval_set=val_iter,
                          eval_freq=1)  #**args.callback_args)

    model.fit(train_iter,
              optimizer=optimizer,
              num_epochs=gParameters['epochs'],
              cost=cost,
              callbacks=callbacks)