def main(): print('Available GPUs', get_available_gpus()) tf.logging.set_verbosity(tf.logging.DEBUG) loader = p1b3.DataLoader(val_split=VAL_SPLIT, test_cell_split=TEST_CELL_SPLIT, cell_features=['expression'], drug_features=['descriptors'], feature_subsample=FEATURE_SUBSAMPLE, scaling=SCALING, scramble=False, min_logconc=MIN_LOGCONC, max_logconc=MAX_LOGCONC, subsample='naive_balancing', category_cutoffs=CATEGORY_CUTOFFS) tf.logging.info('Loader input dim: {}'.format(loader.input_dim)) gen_shape = None train_gen = p1b3.DataGenerator(loader, batch_size=BATCH_SIZE, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=BATCH_SIZE, shape=gen_shape, name='test_gen').flow() # Prep for distribution using mirrorred strategy devices = [ "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3" ] distribution = tf.contrib.distribute.MirroredStrategy( devices) # alternately specify num_gpus config = tf.estimator.RunConfig(train_distribute=distribution) # Create the Estimator p1b3_regressor = tf.estimator.Estimator( model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model", config=config) # Train & eval train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen)) tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
def __init__(self, data_loader, partition='train', ndata=None, lshape=None, datatype=np.float32): """ During initialization, the input data will be converted to backend tensor objects (e.g. CPUTensor or GPUTensor). If the backend uses the GPU, the data is copied over to the device. """ super(ConcatDataIter, self).__init__() self.data = data_loader self.gen = p1b3.DataGenerator(data_loader, partition=partition, batch_size=self.be.bsz, concat=True) self.ndata = ndata or self.gen.num_data assert self.ndata >= self.be.bsz self.datatype = datatype self.gen = self.gen.flow() self.start = 0 self.ybuf = None self.shape = lshape or data_loader.input_dim self.lshape = lshape
def main(): tf.logging.set_verbosity(tf.logging.DEBUG) loader = p1b3.DataLoader(val_split=VAL_SPLIT, test_cell_split=TEST_CELL_SPLIT, cell_features=['expression'], drug_features=['descriptors'], feature_subsample=FEATURE_SUBSAMPLE, scaling=SCALING, scramble=False, min_logconc=MIN_LOGCONC, max_logconc=MAX_LOGCONC, subsample='naive_balancing', category_cutoffs=CATEGORY_CUTOFFS) tf.logging.info('Loader input dim: {}'.format(loader.input_dim)) gen_shape = None train_gen = p1b3.DataGenerator(loader, batch_size=BATCH_SIZE, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=BATCH_SIZE, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=BATCH_SIZE, shape=gen_shape, name='test_gen').flow() # Create the Estimator p1b3_regressor = tf.estimator.Estimator( model_fn=fc_model_fn, model_dir="/tmp/fc_regression_model") train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_gen)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(val_gen)) tf.estimator.train_and_evaluate(p1b3_regressor, train_spec, eval_spec)
def __init__(self, data_loader, partition='train', batch_size=32, num_data=None, shape=None): super(ConcatDataIter, self).__init__() self.data = data_loader self.batch_size = batch_size self.gen = p1b3.DataGenerator(data_loader, partition=partition, batch_size=batch_size, shape=shape, concat=True) self.num_data = num_data or self.gen.num_data self.cursor = 0 self.gen = self.gen.flow()
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) gen_shape = None out_dim = 1 model = Sequential() if args.convolution and args.convolution[0]: gen_shape = 'add_1d' layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i + 1] stride = args.convolution[i + 2] if nb_filter <= 0 or filter_len <= 0 or stride <= 0: break if args.locally_connected: model.add( LocallyConnected1D(nb_filter, filter_len, subsample_length=stride, input_shape=(loader.input_dim, 1), activation=args.activation)) else: model.add( Convolution1D(nb_filter, filter_len, subsample_length=stride, input_shape=(loader.input_dim, 1), activation=args.activation)) if args.pool: model.add(MaxPooling1D(pool_length=args.pool)) model.add(Flatten()) for layer in args.dense: if layer: model.add( Dense(layer, input_dim=loader.input_dim, activation=args.activation)) if args.drop: model.add(Dropout(args.drop)) model.add(Dense(out_dim)) model.summary() model.compile(loss=args.loss, optimizer=args.optimizer) train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size, shape=gen_shape).flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape).flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape).flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size, shape=gen_shape).flow() train_samples = int(loader.n_train / args.batch_size) * args.batch_size val_samples = int(loader.n_val / args.batch_size) * args.batch_size test_samples = int(loader.n_test / args.batch_size) * args.batch_size train_samples = args.train_samples if args.train_samples else train_samples val_samples = args.val_samples if args.val_samples else val_samples checkpointer = ModelCheckpoint(filepath=args.save + '.model' + ext + '.h5', save_best_only=True) progbar = MyProgbarLogger() history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_samples=val_samples, test_samples=test_samples, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=args.save) model.fit_generator(train_gen, train_samples, nb_epoch=args.epochs, validation_data=val_gen, nb_val_samples=val_samples, verbose=0, callbacks=[checkpointer, history, progbar], pickle_safe=True, nb_worker=args.workers)
def main(): parser = get_parser() args = parser.parse_args() ext = extension_from_parameters(args) logfile = args.logfile if args.logfile else os.path.join( args.out_dir, args.save)+ext+'.log' fh = logging.FileHandler(logfile) fh.setFormatter(logging.Formatter( "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if args.verbose else logging.INFO) logger.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(sh) logger.info('Args: {}'.format(args)) loader = p1b3.DataLoader(val_split=args.val_split, test_cell_split=args.test_cell_split, cell_features=args.cell_features, drug_features=args.drug_features, feature_subsample=args.feature_subsample, scaling=args.scaling, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) print('Loader input dim', loader.input_dim) gen_shape = None out_dim = 1 model = Sequential() if args.conv and args.conv[0]: gen_shape = 'add_1d' layer_list = list(range(0, len(args.conv), 3)) for l, i in enumerate(layer_list): filters = args.conv[i] filter_len = args.conv[i+1] stride = args.conv[i+2] if filters <= 0 or filter_len <= 0 or stride <= 0: break if args.locally_connected: model.add(LocallyConnected1D(filters, filter_len, strides=stride, input_shape=(loader.input_dim, 1))) else: model.add(Conv1D(filters, filter_len, strides=stride, input_shape=(loader.input_dim, 1))) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation(args.activation)) if args.pool: model.add(MaxPooling1D(pool_size=args.pool)) model.add(Flatten()) for layer in args.dense: if layer: model.add(Dense(layer, input_dim=loader.input_dim)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation(args.activation)) if args.drop: model.add(Dropout(args.drop)) model.add(Dense(out_dim)) model.summary() logger.debug('Model: {}'.format(model.to_json())) parallel_model = multi_gpu_model(model, gpus=4, cpu_merge=False) parallel_model.compile(loss=args.loss, optimizer=args.optimizer) train_gen = p1b3.DataGenerator( loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator( loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator( loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator( loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow() train_steps = int(loader.n_train/args.batch_size) val_steps = int(loader.n_val/args.batch_size) test_steps = int(loader.n_test/args.batch_size) train_steps = args.train_steps if args.train_steps else train_steps val_steps = args.val_steps if args.val_steps else val_steps test_steps = args.test_steps if args.test_steps else test_steps checkpointer = ModelCheckpoint(filepath=os.path.join( args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * args.batch_size) history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=os.path.join(args.out_dir, args.save)) tensorboard = TensorBoard( log_dir="{}/{}".format(os.path.join(args.out_dir), time())) parallel_model.fit_generator(train_gen, train_steps, epochs=args.epochs, validation_data=val_gen, validation_steps=val_steps, verbose=0, callbacks=[checkpointer, history, progbar, tensorboard], pickle_safe=True, workers=args.workers)
def main(): parser = get_parser() args = parser.parse_args() ext = extension_from_parameters(args) logfile = args.logfile if args.logfile else os.path.join( args.out_dir, args.save)+ext+'.log' fh = logging.FileHandler(logfile) fh.setFormatter(logging.Formatter( "[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if args.verbose else logging.INFO) logger.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(sh) logger.info('Args: {}'.format(args)) loader = p1b3.DataLoader(val_split=args.val_split, test_cell_split=args.test_cell_split, cell_features=args.cell_features, drug_features=args.drug_features, feature_subsample=args.feature_subsample, scaling=args.scaling, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) print('Loader input dim', loader.input_dim) gen_shape = None out_dim = 1 X = tf.placeholder(tf.float32, [None, loader.input_dim]) Y_ = tf.placeholder(tf.float32, [None, 1]) Y = model(X, loader.input_dim) set_trace() train_gen = p1b3.DataGenerator(loader, batch_size=args.batch_size, shape=gen_shape, name='train_gen').flow() val_gen = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen').flow() val_gen2 = p1b3.DataGenerator(loader, partition='val', batch_size=args.batch_size, shape=gen_shape, name='val_gen2').flow() test_gen = p1b3.DataGenerator(loader, partition='test', batch_size=args.batch_size, shape=gen_shape, name='test_gen').flow() # objective = tf.reduce_mean(tf.square(Y - Y_)) # train = tf.train.GradientDescentOptimizer(0.001).minimize(objective) mse = tf.losses.mean_squared_error(Y_, Y) # the loss function train = tf.train.GradientDescentOptimizer(0.001).minimize(mse) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i, (X_batch, y_batch) in enumerate(train_gen): feed_dict = {X: X_batch.reshape(args.batch_size, loader.input_dim), Y_: y_batch.reshape(args.batch_size, 1)} # cost, _ = sess.run([objective, train], feed_dict) cost, _ = sess.run([mse, train], feed_dict) if i % 50 == 0: print('Batch :', i, 'Cost :', cost) train_steps = int(loader.n_train/args.batch_size) val_steps = int(loader.n_val/args.batch_size) test_steps = int(loader.n_test/args.batch_size) train_steps = args.train_steps if args.train_steps else train_steps val_steps = args.val_steps if args.val_steps else val_steps test_steps = args.test_steps if args.test_steps else test_steps checkpointer = ModelCheckpoint(filepath=os.path.join( args.out_dir, args.save)+'.model'+ext+'.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * args.batch_size) history = MyLossHistory(progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=args.loss, category_cutoffs=args.category_cutoffs, ext=ext, pre=os.path.join(args.out_dir, args.save))
def run(gParameters): """ Runs the model using the specified set of parameters Args: gParameters: a python dictionary containing the parameters (e.g. epoch) to run the model with. """ # if 'dense' in gParameters: dval = gParameters['dense'] if type(dval) != list: res = list(dval) #try: #is_str = isinstance(dval, basestring) #except NameError: #is_str = isinstance(dval, str) #if is_str: #res = str2lst(dval) gParameters['dense'] = res print(gParameters['dense']) if 'conv' in gParameters: #conv_list = p1_common.parse_conv_list(gParameters['conv']) #cval = gParameters['conv'] #try: #is_str = isinstance(cval, basestring) #except NameError: #is_str = isinstance(cval, str) #if is_str: #res = str2lst(cval) #gParameters['conv'] = res print('Conv input', gParameters['conv']) # print('Params:', gParameters) # Construct extension to save model ext = benchmark.extension_from_parameters(gParameters, '.keras') logfile = gParameters['logfile'] if gParameters[ 'logfile'] else gParameters['output_dir'] + ext + '.log' fh = logging.FileHandler(logfile) fh.setFormatter( logging.Formatter("[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setFormatter(logging.Formatter('')) sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO) benchmark.logger.setLevel(logging.DEBUG) benchmark.logger.addHandler(fh) benchmark.logger.addHandler(sh) benchmark.logger.info('Params: {}'.format(gParameters)) # Get default parameters for initialization and optimizer functions kerasDefaults = candle.keras_default_config() seed = gParameters['rng_seed'] # Build dataset loader object loader = benchmark.DataLoader( seed=seed, dtype=gParameters['data_type'], val_split=gParameters['val_split'], test_cell_split=gParameters['test_cell_split'], cell_features=gParameters['cell_features'], drug_features=gParameters['drug_features'], feature_subsample=gParameters['feature_subsample'], scaling=gParameters['scaling'], scramble=gParameters['scramble'], min_logconc=gParameters['min_logconc'], max_logconc=gParameters['max_logconc'], subsample=gParameters['subsample'], category_cutoffs=gParameters['category_cutoffs']) # Initialize weights and learning rule initializer_weights = candle.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.) activation = gParameters['activation'] # Define model architecture gen_shape = None out_dim = 1 model = Sequential() if 'dense' in gParameters: # Build dense layers for layer in gParameters['dense']: if layer: model.add( Dense(layer, input_dim=loader.input_dim, kernel_initializer=initializer_weights, bias_initializer=initializer_bias)) if gParameters['batch_normalization']: model.add(BatchNormalization()) model.add(Activation(gParameters['activation'])) if gParameters['dropout']: model.add(Dropout(gParameters['dropout'])) else: # Build convolutional layers gen_shape = 'add_1d' layer_list = list(range(0, len(gParameters['conv']))) lc_flag = False if 'locally_connected' in gParameters: lc_flag = True for l, i in enumerate(layer_list): if i == 0: add_conv_layer(model, gParameters['conv'][i], input_dim=loader.input_dim, locally_connected=lc_flag) else: add_conv_layer(model, gParameters['conv'][i], locally_connected=lc_flag) if gParameters['batch_normalization']: model.add(BatchNormalization()) model.add(Activation(gParameters['activation'])) if gParameters['pool']: model.add(MaxPooling1D(pool_size=gParameters['pool'])) model.add(Flatten()) model.add(Dense(out_dim)) # Define optimizer optimizer = candle.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) # Compile and display model model.compile(loss=gParameters['loss'], optimizer=optimizer) model.summary() benchmark.logger.debug('Model: {}'.format(model.to_json())) train_gen = benchmark.DataGenerator( loader, batch_size=gParameters['batch_size'], shape=gen_shape, name='train_gen', cell_noise_sigma=gParameters['cell_noise_sigma']).flow() val_gen = benchmark.DataGenerator(loader, partition='val', batch_size=gParameters['batch_size'], shape=gen_shape, name='val_gen').flow() val_gen2 = benchmark.DataGenerator(loader, partition='val', batch_size=gParameters['batch_size'], shape=gen_shape, name='val_gen2').flow() test_gen = benchmark.DataGenerator(loader, partition='test', batch_size=gParameters['batch_size'], shape=gen_shape, name='test_gen').flow() train_steps = int(loader.n_train / gParameters['batch_size']) val_steps = int(loader.n_val / gParameters['batch_size']) test_steps = int(loader.n_test / gParameters['batch_size']) if 'train_steps' in gParameters: train_steps = gParameters['train_steps'] if 'val_steps' in gParameters: val_steps = gParameters['val_steps'] if 'test_steps' in gParameters: test_steps = gParameters['test_steps'] checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] + '.model' + ext + '.h5', save_best_only=True) progbar = MyProgbarLogger(train_steps * gParameters['batch_size']) loss_history = MyLossHistory( progbar=progbar, val_gen=val_gen2, test_gen=test_gen, val_steps=val_steps, test_steps=test_steps, metric=gParameters['loss'], category_cutoffs=gParameters['category_cutoffs'], ext=ext, pre=gParameters['output_dir']) # Seed random generator for training np.random.seed(seed) candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters) history = model.fit_generator( train_gen, train_steps, epochs=gParameters['epochs'], validation_data=val_gen, validation_steps=val_steps, verbose=0, callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor], ) benchmark.logger.removeHandler(fh) benchmark.logger.removeHandler(sh) return history