def main(): print('Available GPUs', get_available_gpus()) tf.logging.set_verbosity(tf.logging.DEBUG) loader = p1b3.DataLoader(val_split=VAL_SPLIT, test_cell_split=TEST_CELL_SPLIT, cell_features=['expression'], drug_features=['descriptors'], feature_subsample=FEATURE_SUBSAMPLE, scaling=SCALING, scramble=False, min_logconc=MIN_LOGCONC, max_logconc=MAX_LOGCONC, subsample='naive_balancing', category_cutoffs=CATEGORY_CUTOFFS) tf.logging.info('Loader input dim: {}'.format(loader.input_dim)) gen_shape = None train_gen = p1b3.DataGenerator(loader, batch_size=BATCH_SIZE, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=BATCH_SIZE, shape=gen_shape, name='test_gen').flow() # Prep for distribution using mirrorred strategy devices = [ "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3" ] distribution = tf.contrib.distribute.MirroredStrategy( devices) # alternately specify num_gpus config = tf.estimator.RunConfig(train_distribute=distribution) # Create the Estimator p1b3_regressor = tf.estimator.Estimator( model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model", config=config) # Train & eval train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen)) tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def main(): tf.logging.set_verbosity(tf.logging.DEBUG) loader = p1b3.DataLoader(val_split=VAL_SPLIT, test_cell_split=TEST_CELL_SPLIT, cell_features=['expression'], drug_features=['descriptors'], feature_subsample=FEATURE_SUBSAMPLE, scaling=SCALING, scramble=False, min_logconc=MIN_LOGCONC, max_logconc=MAX_LOGCONC, subsample='naive_balancing', category_cutoffs=CATEGORY_CUTOFFS) tf.logging.info('Loader input dim: {}'.format(loader.input_dim)) gen_shape = None train_gen = p1b3.DataGenerator(loader, batch_size=BATCH_SIZE, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=BATCH_SIZE, shape=gen_shape, name='test_gen').flow() # Create the Estimator p1b3_regressor = tf.estimator.Estimator( model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model") train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen)) tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) gen_shape = None out_dim = 1 model = Sequential() if args.convolution and args.convolution[0]: gen_shape = 'add_1d' layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i + 1] stride = args.convolution[i + 2] if nb_filter <= 0 or filter_len <= 0 or stride <= 0: break if args.locally_connected: model.add( LocallyConnected1D(nb_filter, filter_len, subsample_length=stride, input_shape=(loader.input_dim, 1), activation=args.activation)) else: model.add( Convolution1D(nb_filter, filter_len, subsample_length=stride, input_shape=(loader.input_dim, 1), activation=args.activation)) if args.pool: model.add(MaxPooling1D(pool_length=args.pool)) model.add(Flatten()) for layer in args.dense: if layer: model.add( Dense(layer, input_dim=loader.input_dim, activation=args.activation)) if args.drop: model.add(Dropout(args.drop)) model.add(Dense(out_dim)) model.summary() model.compile(loss=args.loss, optimizer=args.optimizer) train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size, shape=gen_shape).flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape).flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape).flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size, shape=gen_shape).flow() train_samples = int(loader.n_train / args.batch_size) * args.batch_size val_samples = int(loader.n_val / args.batch_size) * args.batch_size test_samples = int(loader.n_test / args.batch_size) * args.batch_size train_samples = args.train_samples if args.train_samples else train_samples val_samples = args.val_samples if args.val_samples else val_samples checkpointer = ModelCheckpoint(filepath=args.save + '.model' + ext + '.h5', save_best_only=True) progbar = MyProgbarLogger() history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_samples=val_samples, test_samples=test_samples, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=args.save) model.fit_generator(train_gen, train_samples, nb_epoch=args.epochs, validation_data=val_gen, nb_val_samples=val_samples, verbose=0, callbacks=[checkpointer, history, progbar], pickle_safe=True, nb_worker=args.workers)
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) net = mx.sym.Variable('concat_features') out = mx.sym.Variable('growth') if args.convolution and args.convolution[0]: net = mx.sym.Reshape(data=net, shape=(args.batch_size, 1, loader.input_dim, 1)) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i + 1] stride = args.convolution[i + 2] if nb_filter <= 0 or filter_len <= 0 or stride <= 0: break net = mx.sym.Convolution(data=net, num_filter=nb_filter, kernel=(filter_len, 1), stride=(stride, 1)) net = mx.sym.Activation(data=net, act_type=args.activation) if args.pool: net = mx.sym.Pooling(data=net, pool_type="max", kernel=(args.pool, 1), stride=(1, 1)) net = mx.sym.Flatten(data=net) for layer in args.dense: if layer: net = mx.sym.FullyConnected(data=net, num_hidden=layer) net = mx.sym.Activation(data=net, act_type=args.activation) if args.drop: net = mx.sym.Dropout(data=net, p=args.drop) net = mx.sym.FullyConnected(data=net, num_hidden=1) net = mx.symbol.LinearRegressionOutput(data=net, label=out) plot_network(net, 'net' + ext) train_iter = ConcatDataIter(loader, batch_size=args.batch_size, num_data=args.train_samples) val_iter = ConcatDataIter(loader, partition='val', batch_size=args.batch_size, num_data=args.val_samples) devices = mx.cpu() if args.gpus: devices = [mx.gpu(i) for i in args.gpus] mod = mx.mod.Module(net, data_names=('concat_features', ), label_names=('growth', ), context=devices) initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) mod.fit(train_iter, eval_data=val_iter, eval_metric=args.loss, optimizer=args.optimizer, num_epoch=args.epochs, initializer=initializer, batch_end_callback=mx.callback.Speedometer(args.batch_size, 20))
def main(): parser = get_parser() args = parser.parse_args() ext = extension_from_parameters(args) logfile = args.logfile if args.logfile else os.path.join( args.out_dir, args.save)+ext+'.log' fh = logging.FileHandler(logfile) fh.setFormatter(logging.Formatter( "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if args.verbose else logging.INFO) logger.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(sh) logger.info('Args: {}'.format(args)) loader = p1b3.DataLoader(val_split=args.val_split, test_cell_split=args.test_cell_split, cell_features=args.cell_features, drug_features=args.drug_features, feature_subsample=args.feature_subsample, scaling=args.scaling, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) print('Loader input dim', loader.input_dim) gen_shape = None out_dim = 1 model = Sequential() if args.conv and args.conv[0]: gen_shape = 'add_1d' layer_list = list(range(0, len(args.conv), 3)) for l, i in enumerate(layer_list): filters = args.conv[i] filter_len = args.conv[i+1] stride = args.conv[i+2] if filters <= 0 or filter_len <= 0 or stride <= 0: break if args.locally_connected: model.add(LocallyConnected1D(filters, filter_len, strides=stride, input_shape=(loader.input_dim, 1))) else: model.add(Conv1D(filters, filter_len, strides=stride, input_shape=(loader.input_dim, 1))) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation(args.activation)) if args.pool: model.add(MaxPooling1D(pool_size=args.pool)) model.add(Flatten()) for layer in args.dense: if layer: model.add(Dense(layer, input_dim=loader.input_dim)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation(args.activation)) if args.drop: model.add(Dropout(args.drop)) model.add(Dense(out_dim)) model.summary() logger.debug('Model: {}'.format(model.to_json())) parallel_model = multi_gpu_model(model, gpus=4, cpu_merge=False) parallel_model.compile(loss=args.loss, optimizer=args.optimizer) train_gen = p1b3.DataGenerator( loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator( loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator( loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator( loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow() train_steps = int(loader.n_train/args.batch_size) val_steps = int(loader.n_val/args.batch_size) test_steps = int(loader.n_test/args.batch_size) train_steps = args.train_steps if args.train_steps else train_steps val_steps = args.val_steps if args.val_steps else val_steps test_steps = args.test_steps if args.test_steps else test_steps checkpointer = ModelCheckpoint(filepath=os.path.join( args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * args.batch_size) history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=os.path.join(args.out_dir, args.save)) tensorboard = TensorBoard( log_dir="{}/{}".format(os.path.join(args.out_dir), time())) parallel_model.fit_generator(train_gen, train_steps, epochs=args.epochs, validation_data=val_gen, validation_steps=val_steps, verbose=0, callbacks=[checkpointer, history, progbar, tensorboard], pickle_safe=True, workers=args.workers)
def main(): parser = get_parser() args = parser.parse_args() ext = extension_from_parameters(args) logfile = args.logfile if args.logfile else os.path.join( args.out_dir, args.save)+ext+'.log' fh = logging.FileHandler(logfile) fh.setFormatter(logging.Formatter( "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if args.verbose else logging.INFO) logger.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(sh) logger.info('Args: {}'.format(args)) loader = p1b3.DataLoader(val_split=args.val_split, test_cell_split=args.test_cell_split, cell_features=args.cell_features, drug_features=args.drug_features, feature_subsample=args.feature_subsample, scaling=args.scaling, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) print('Loader input dim', loader.input_dim) gen_shape = None out_dim = 1 X = tf.placeholder(tf.float32, [None, loader.input_dim]) Y_ = tf.placeholder(tf.float32, [None, 1]) Y = model(X, loader.input_dim) set_trace() train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow() # objective = tf.reduce_mean(tf.square(Y - Y_)) # train = tf.train.GradientDescentOptimizer(0.001).minimize(objective) mse = tf.losses.mean_squared_error(Y_, Y) # the loss function train = tf.train.GradientDescentOptimizer(0.001).minimize(mse) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i, (X_batch, y_batch) in enumerate(train_gen): feed_dict = {X: X_batch.reshape(args.batch_size, loader.input_dim), Y_: y_batch.reshape(args.batch_size, 1)} # cost, _ = sess.run([objective, train], feed_dict) cost, _ = sess.run([mse, train], feed_dict) if i % 50 == 0: print('Batch :', i, 'Cost :', cost) train_steps = int(loader.n_train/args.batch_size) val_steps = int(loader.n_val/args.batch_size) test_steps = int(loader.n_test/args.batch_size) train_steps = args.train_steps if args.train_steps else train_steps val_steps = args.val_steps if args.val_steps else val_steps test_steps = args.test_steps if args.test_steps else test_steps checkpointer = ModelCheckpoint(filepath=os.path.join( args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * args.batch_size) history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=os.path.join(args.out_dir, args.save))
def main(): # Get command-line parameters parser = get_p1b3_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b3.read_config_file(args.config_file) #print ('Params:', fileParameters) # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b3.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Build dataset loader object loader = p1b3.DataLoader( seed=seed, dtype=gParameters['datatype'], val_split=gParameters['validation_split'], test_cell_split=gParameters['test_cell_split'], cell_features=gParameters['cell_features'], drug_features=gParameters['drug_features'], feature_subsample=gParameters['feature_subsample'], scaling=gParameters['scaling'], scramble=gParameters['scramble'], min_logconc=gParameters['min_logconc'], max_logconc=gParameters['max_logconc'], subsample=gParameters['subsample'], category_cutoffs=gParameters['category_cutoffs']) net = mx.sym.Variable('concat_features') out = mx.sym.Variable('growth') # Initialize weights and learning rule initializer_weights = p1_common_mxnet.build_initializer( gParameters['initialization'], kerasDefaults) initializer_bias = p1_common_mxnet.build_initializer( 'constant', kerasDefaults, 0.) init = mx.initializer.Mixed(['bias', '.*'], [initializer_bias, initializer_weights]) activation = gParameters['activation'] # Define model architecture layers = [] reshape = None if 'dense' in gParameters: # Build dense layers for layer in gParameters['dense']: if layer: net = mx.sym.FullyConnected(data=net, num_hidden=layer) net = mx.sym.Activation(data=net, act_type=activation) if gParameters['drop']: net = mx.sym.Dropout(data=net, p=gParameters['drop']) else: # Build convolutional layers net = mx.sym.Reshape(data=net, shape=(gParameters['batch_size'], 1, loader.input_dim, 1)) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = gParameters['conv'][i] filter_len = gParameters['conv'][i + 1] stride = gParameters['conv'][i + 2] if nb_filter <= 0 or filter_len <= 0 or stride <= 0: break net = mx.sym.Convolution(data=net, num_filter=nb_filter, kernel=(filter_len, 1), stride=(stride, 1)) net = mx.sym.Activation(data=net, act_type=activation) if gParameters['pool']: net = mx.sym.Pooling(data=net, pool_type="max", kernel=(gParameters['pool'], 1), stride=(1, 1)) net = mx.sym.Flatten(data=net) reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(gParameters['conv']), 3)) for l, i in enumerate(layer_list): nb_filter = gParameters['conv'][i] filter_len = gParameters['conv'][i + 1] stride = gParameters['conv'][i + 2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append( Conv((1, filter_len, nb_filter), strides={ 'str_h': 1, 'str_w': stride }, init=initializer_weights, activation=activation)) if gParameters['pool']: layers.append(Pooling((1, gParameters['pool']))) net = mx.sym.FullyConnected(data=net, num_hidden=1) net = mx.symbol.LinearRegressionOutput(data=net, label=out) # Display model p1_common_mxnet.plot_network(net, 'net' + ext) # Define mxnet data iterators train_samples = int(loader.n_train) val_samples = int(loader.n_val) if 'train_samples' in gParameters: train_samples = gParameters['train_samples'] if 'val_samples' in gParameters: val_samples = gParameters['val_samples'] train_iter = ConcatDataIter(loader, batch_size=gParameters['batch_size'], num_data=train_samples) val_iter = ConcatDataIter(loader, partition='val', batch_size=gParameters['batch_size'], num_data=val_samples) devices = mx.cpu() if gParameters['gpus']: devices = [mx.gpu(i) for i in gParameters['gpus']] mod = mx.mod.Module(net, data_names=('concat_features', ), label_names=('growth', ), context=devices) # Define optimizer optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) # Seed random generator for training mx.random.seed(seed) freq_log = 1 #initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) mod.fit(train_iter, eval_data=val_iter, eval_metric=gParameters['loss'], optimizer=optimizer, num_epoch=gParameters['epochs'], initializer=init, epoch_end_callback=mx.callback.Speedometer( gParameters['batch_size'], 20))
def run(gParameters): """ Runs the model using the specified set of parameters Args: gParameters: a python dictionary containing the parameters (e.g. epoch) to run the model with. """ # if 'dense' in gParameters: dval = gParameters['dense'] if type(dval) != list: res = list(dval) #try: #is_str = isinstance(dval, basestring) #except NameError: #is_str = isinstance(dval, str) #if is_str: #res = str2lst(dval) gParameters['dense'] = res print(gParameters['dense']) if 'conv' in gParameters: #conv_list = p1_common.parse_conv_list(gParameters['conv']) #cval = gParameters['conv'] #try: #is_str = isinstance(cval, basestring) #except NameError: #is_str = isinstance(cval, str) #if is_str: #res = str2lst(cval) #gParameters['conv'] = res print('Conv input', gParameters['conv']) # print('Params:', gParameters) # Construct extension to save model ext = benchmark.extension_from_parameters(gParameters, '.keras') logfile = gParameters['logfile'] if gParameters[ 'logfile'] else gParameters['output_dir'] + ext + '.log' fh = logging.FileHandler(logfile) fh.setFormatter( logging.Formatter("[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO) benchmark.logger.setLevel(logging.DEBUG) benchmark.logger.addHandler(fh) benchmark.logger.addHandler(sh) benchmark.logger.info('Params: {}'.format(gParameters)) # Get default parameters for initialization and optimizer functions kerasDefaults = candle.keras_default_config() seed = gParameters['rng_seed'] # Build dataset loader object loader = benchmark.DataLoader( seed=seed, dtype=gParameters['data_type'], val_split=gParameters['val_split'], test_cell_split=gParameters['test_cell_split'], cell_features=gParameters['cell_features'], drug_features=gParameters['drug_features'], feature_subsample=gParameters['feature_subsample'], scaling=gParameters['scaling'], scramble=gParameters['scramble'], min_logconc=gParameters['min_logconc'], max_logconc=gParameters['max_logconc'], subsample=gParameters['subsample'], category_cutoffs=gParameters['category_cutoffs']) # Initialize weights and learning rule initializer_weights = candle.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.) activation = gParameters['activation'] # Define model architecture gen_shape = None out_dim = 1 model = Sequential() if 'dense' in gParameters: # Build dense layers for layer in gParameters['dense']: if layer: model.add( Dense(layer, input_dim=loader.input_dim, kernel_initializer=initializer_weights, bias_initializer=initializer_bias)) if gParameters['batch_normalization']: model.add(BatchNormalization()) model.add(Activation(gParameters['activation'])) if gParameters['dropout']: model.add(Dropout(gParameters['dropout'])) else: # Build convolutional layers gen_shape = 'add_1d' layer_list = list(range(0, len(gParameters['conv']))) lc_flag = False if 'locally_connected' in gParameters: lc_flag = True for l, i in enumerate(layer_list): if i == 0: add_conv_layer(model, gParameters['conv'][i], input_dim=loader.input_dim, locally_connected=lc_flag) else: add_conv_layer(model, gParameters['conv'][i], locally_connected=lc_flag) if gParameters['batch_normalization']: model.add(BatchNormalization()) model.add(Activation(gParameters['activation'])) if gParameters['pool']: model.add(MaxPooling1D(pool_size=gParameters['pool'])) model.add(Flatten()) model.add(Dense(out_dim)) # Define optimizer optimizer = candle.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) # Compile and display model model.compile(loss=gParameters['loss'], optimizer=optimizer) model.summary() benchmark.logger.debug('Model: {}'.format(model.to_json())) train_gen = benchmark.DataGenerator( loader, batch_size=gParameters['batch_size'], shape=gen_shape, name='train_gen', cell_noise_sigma=gParameters['cell_noise_sigma']).flow() val_gen = benchmark.DataGenerator(loader, partition='val', batch_size=gParameters['batch_size'], shape=gen_shape, name='val_gen').flow() val_gen2 = benchmark.DataGenerator(loader, partition='val', batch_size=gParameters['batch_size'], shape=gen_shape, name='val_gen2').flow() test_gen = benchmark.DataGenerator(loader, partition='test', batch_size=gParameters['batch_size'], shape=gen_shape, name='test_gen').flow() train_steps = int(loader.n_train / gParameters['batch_size']) val_steps = int(loader.n_val / gParameters['batch_size']) test_steps = int(loader.n_test / gParameters['batch_size']) if 'train_steps' in gParameters: train_steps = gParameters['train_steps'] if 'val_steps' in gParameters: val_steps = gParameters['val_steps'] if 'test_steps' in gParameters: test_steps = gParameters['test_steps'] checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] + '.model' + ext + '.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * gParameters['batch_size']) loss_history = MyLossHistory( progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=gParameters['loss'], category_cutoffs=gParameters['category_cutoffs'], ext=ext, pre=gParameters['output_dir']) # Seed random generator for training np.random.seed(seed) candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters) history = model.fit_generator( train_gen, train_steps, epochs=gParameters['epochs'], validation_data=val_gen, validation_steps=val_steps, verbose=0, callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor], ) benchmark.logger.removeHandler(fh) benchmark.logger.removeHandler(sh) return history
def main(): # Get command-line parameters parser = get_p1b3_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b3.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b3.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Build dataset loader object loader = p1b3.DataLoader( seed=seed, dtype=gParameters['datatype'], val_split=gParameters['validation_split'], test_cell_split=gParameters['test_cell_split'], cell_features=gParameters['cell_features'], drug_features=gParameters['drug_features'], feature_subsample=gParameters['feature_subsample'], scaling=gParameters['scaling'], scramble=gParameters['scramble'], min_logconc=gParameters['min_logconc'], max_logconc=gParameters['max_logconc'], subsample=gParameters['subsample'], category_cutoffs=gParameters['category_cutoffs']) # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['datatype'], max_devices=args.max_devices, compat_mode=args.compat_mode) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define model architecture layers = [] reshape = None if 'dense' in gParameters: # Build dense layers for layer in gParameters['dense']: if layer: layers.append( Affine(nout=layer, init=initializer_weights, bias=initializer_bias, activation=activation)) if gParameters['drop']: layers.append(Dropout(keep=(1 - gParameters['drop']))) else: # Build convolutional layers reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(gParameters['conv']), 3)) for l, i in enumerate(layer_list): nb_filter = gParameters['conv'][i] filter_len = gParameters['conv'][i + 1] stride = gParameters['conv'][i + 2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append( Conv((1, filter_len, nb_filter), strides={ 'str_h': 1, 'str_w': stride }, init=initializer_weights, activation=activation)) if gParameters['pool']: layers.append(Pooling((1, gParameters['pool']))) layers.append( Affine(nout=1, init=initializer_weights, bias=initializer_bias, activation=neon.transforms.Identity())) # Build model model = Model(layers=layers) # Define neon data iterators train_samples = int(loader.n_train) val_samples = int(loader.n_val) if 'train_samples' in gParameters: train_samples = gParameters['train_samples'] if 'val_samples' in gParameters: val_samples = gParameters['val_samples'] train_iter = ConcatDataIter(loader, ndata=train_samples, lshape=reshape, datatype=gParameters['datatype']) val_iter = ConcatDataIter(loader, partition='val', ndata=val_samples, lshape=reshape, datatype=gParameters['datatype']) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(model, eval_set=val_iter, eval_freq=1) #**args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks)