def run(gParameters):

    print ('Params:', gParameters)

    file_train = gParameters['train_data']
    file_test = gParameters['test_data']
    url = gParameters['data_url']
    '''path = '/home/orlandomelchor/Desktop/Research 2018-2019/CANDLE/'
    tr_file = 'nt_train2.csv'
    te_file = 'nt_train2.csv'

    train_file = path + tr_file
    test_file = path + te_file
    X_train, Y_train, X_test, Y_test = load_data(train_file, test_file, gParameters)'''
    path = '../data-05-31-2018/'
    full_data_file = 'formatted_full_data.csv'
    X_train, Y_train, X_test, Y_test = load_data(path+full_data_file, gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    x_train_len = X_train.shape[1]

    # this reshaping is critical for the Conv1D to work

    model = Sequential()

    for layer in gParameters['dense']:
        if layer:
            model.add(Dense(layer,input_shape=(x_train_len,)))
            model.add(Activation(gParameters['activation']))
            if gParameters['drop']:
                    model.add(Dropout(gParameters['drop']))
    model.add(Dense(gParameters['classes']))
    model.add(Activation(gParameters['out_act']))

#Reference case
#model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=1))
#model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=10))
#model.add(Flatten())
#model.add(Dense(200))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(20))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(CLASSES))
#model.add(Activation('softmax'))

    kerasDefaults = p1_common.keras_default_config()

    # Define optimizer
    optimizer = p1_common_keras.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    model.summary()
    model.compile(loss=gParameters['loss'],
                  optimizer=optimizer,
                  metrics=[gParameters['metrics']])

    output_dir = gParameters['save']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # calculate trainable and non-trainable params
    gParameters.update(compute_trainable_params(model))

    # set up a bunch of callbacks to do work during model training..
    model_name = gParameters['model_name']
    path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
    # checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)
    csv_logger = CSVLogger('{}/training.log'.format(output_dir))
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
    candleRemoteMonitor = CandleRemoteMonitor(params=gParameters)
    timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
    history = model.fit(X_train, Y_train,
                    batch_size=gParameters['batch_size'],
                    epochs=gParameters['epochs'],
                    verbose=1,
                    validation_data=(X_test, Y_test),
                    callbacks = [csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])

    score = model.evaluate(X_test, Y_test, verbose=0)

    if True:
        print('Test score:', score[0])
        print('Test accuracy:', score[1])
        # serialize model to JSON
        model_json = model.to_json()
        with open("{}/{}.model.json".format(output_dir, model_name), "w") as json_file:
            json_file.write(model_json)

        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("{}/{}.model.yaml".format(output_dir, model_name), "w") as yaml_file:
            yaml_file.write(model_yaml)

        # serialize model to HDF5
        model.save('{}/{}_network{}.h5'.format(output_dir, model_name, i))
        print("Saved model to disk")

        # load json and create model
        json_file = open('{}/{}.model.json'.format(output_dir, model_name), 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model_json = model_from_json(loaded_model_json)


        # load yaml and create model
        yaml_file = open('{}/{}.model.yaml'.format(output_dir, model_name), 'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        loaded_model_yaml = model_from_yaml(loaded_model_yaml)


        # load into new model
        loaded_model_json.load_weights('{}/{}_network{}.h5'.format(output_dir, model_name, i))
        print("Loaded json model from disk")

        # evaluate json loaded model on test data
        loaded_model_json.compile(loss=gParameters['loss'],
            optimizer=gParameters['optimizer'],
            metrics=[gParameters['metrics']])
        score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

        print('json Test score:', score_json[0])
        print('json Test accuracy:', score_json[1])

        print("json %s: %.2f%%" % (loaded_model_json.metrics_names[1], score_json[1]*100))

        # load weights into new model
        loaded_model_yaml.load_weights('{}/{}_network{}.h5'.format(output_dir, model_name, i))
        print("Loaded yaml model from disk")

        # evaluate loaded model on test data
        loaded_model_yaml.compile(loss=gParameters['loss'],
            optimizer=gParameters['optimizer'],
            metrics=[gParameters['metrics']])
        score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

        print('yaml Test score:', score_yaml[0])
        print('yaml Test accuracy:', score_yaml[1])

        print("yaml %s: %.2f%%" % (loaded_model_yaml.metrics_names[1], score_yaml[1]*100))

        acc_file = open('{}/{}_accuracy.txt'.format(output_dir, model_name),'w')
        acc_file.write(str(round(score_yaml[1],4)*100))
        acc_file.close()
    return history
Example #2
0
def run(gParameters):

    print('Params:', gParameters)

    file_train = gParameters['train_data']
    file_test = gParameters['test_data']
    url = gParameters['data_url']

    train_file = data_utils.get_file(file_train,
                                     url + file_train,
                                     cache_subdir='Pilot1')
    test_file = data_utils.get_file(file_test,
                                    url + file_test,
                                    cache_subdir='Pilot1')

    X_train, Y_train, X_test, Y_test = load_data(train_file, test_file,
                                                 gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    x_train_len = X_train.shape[1]

    # this reshaping is critical for the Conv1D to work

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    model = Sequential()

    layer_list = list(range(0, len(gParameters['conv']), 3))
    for l, i in enumerate(layer_list):
        filters = gParameters['conv'][i]
        filter_len = gParameters['conv'][i + 1]
        stride = gParameters['conv'][i + 2]
        print(int(i / 3), filters, filter_len, stride)
        if gParameters['pool']:
            pool_list = gParameters['pool']
            if type(pool_list) != list:
                pool_list = list(pool_list)

        if filters <= 0 or filter_len <= 0 or stride <= 0:
            break
        if 'locally_connected' in gParameters:
            model.add(
                LocallyConnected1D(filters,
                                   filter_len,
                                   strides=stride,
                                   padding='valid',
                                   input_shape=(x_train_len, 1)))
        else:
            #input layer
            if i == 0:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid',
                           input_shape=(x_train_len, 1)))
            else:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid'))
        model.add(Activation(gParameters['activation']))
        if gParameters['pool']:
            model.add(MaxPooling1D(pool_size=pool_list[int(i / 3)]))

    model.add(Flatten())

    for layer in gParameters['dense']:
        if layer:
            model.add(Dense(layer))
            model.add(Activation(gParameters['activation']))
            # This has to be disabled for tensorrt otherwise I am getting an error
            if False and gParameters['drop']:
                model.add(Dropout(gParameters['drop']))
    #model.add(Dense(gParameters['classes']))
    #model.add(Activation(gParameters['out_act']), name='activation_5')
    model.add(
        Dense(gParameters['classes'],
              activation=gParameters['out_act'],
              name='activation_5'))
    #Reference case
    #model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=1))
    #model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=10))
    #model.add(Flatten())
    #model.add(Dense(200))
    #model.add(Activation('relu'))
    #model.add(Dropout(0.1))
    #model.add(Dense(20))
    #model.add(Activation('relu'))
    #model.add(Dropout(0.1))
    #model.add(Dense(CLASSES))
    #model.add(Activation('softmax'))

    kerasDefaults = p1_common.keras_default_config()

    # Define optimizer
    optimizer = p1_common_keras.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    model.summary()
    for layer in model.layers:
        print(layer.name)

    print([x.op.name for x in model.outputs])

    model.compile(loss=gParameters['loss'],
                  optimizer=optimizer,
                  metrics=[gParameters['metrics']])

    output_dir = gParameters['save']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # calculate trainable and non-trainable params
    gParameters.update(compute_trainable_params(model))

    # set up a bunch of callbacks to do work during model training..
    model_name = gParameters['model_name']
    path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
    # checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)
    csv_logger = CSVLogger('{}/training.log'.format(output_dir))
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=10,
                                  verbose=1,
                                  mode='auto',
                                  epsilon=0.0001,
                                  cooldown=0,
                                  min_lr=0)
    candleRemoteMonitor = CandleRemoteMonitor(params=gParameters)
    timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
    history = model.fit(
        X_train,
        Y_train,
        batch_size=gParameters['batch_size'],
        epochs=2,  #gParameters['epochs'],
        verbose=1,
        validation_data=(X_test, Y_test),
        callbacks=[csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])

    score = model.evaluate(X_test, Y_test, verbose=0)

    #Begin tensorrt code
    config = {
        # Where to save models (Tensorflow + TensorRT)
        "graphdef_file":
        "/gpfs/jlse-fs0/users/pbalapra/tensorrt/Benchmarks/Pilot1/NT3/nt3.pb",
        "frozen_model_file":
        "/gpfs/jlse-fs0/users/pbalapra/tensorrt/Benchmarks/Pilot1/NT3/nt3_frozen_model.pb",
        "snapshot_dir":
        "/gpfs/jlse-fs0/users/pbalapra/tensorrt/Benchmarks/Pilot1/NT3/snapshot",
        "engine_save_dir":
        "/gpfs/jlse-fs0/users/pbalapra/tensorrt/Benchmarks/Pilot1/NT3",

        # Needed for TensorRT
        "inference_batch_size": 1,  # inference batch size
        "input_layer":
        "conv1d_1",  # name of the input tensor in the TF computational graph
        "out_layer":
        "activation_5/Softmax",  # name of the output tensorf in the TF conputational graph
        "output_size": 2,  # number of classes in output (5)
        "precision":
        "fp32"  # desired precision (fp32, fp16) "test_image_path" : "/home/data/val/roses"
    }

    # Now, let's use the Tensorflow backend to get the TF graphdef and frozen graph
    K.set_learning_phase(0)
    sess = K.get_session()
    saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

    # save model weights in TF checkpoint
    checkpoint_path = saver.save(sess,
                                 config['snapshot_dir'],
                                 global_step=0,
                                 latest_filename='checkpoint_state')

    # remove nodes not needed for inference from graph def
    train_graph = sess.graph
    inference_graph = tf.graph_util.remove_training_nodes(
        train_graph.as_graph_def())

    #print(len([n.name for n in tf.get_default_graph().as_graph_def().node]))

    # write the graph definition to a file.
    # You can view this file to see your network structure and
    # to determine the names of your network's input/output layers.
    graph_io.write_graph(inference_graph, '.', config['graphdef_file'])

    # specify which layer is the output layer for your graph.
    # In this case, we want to specify the softmax layer after our
    # last dense (fully connected) layer.
    out_names = config['out_layer']

    # freeze your inference graph and save it for later! (Tensorflow)
    freeze_graph.freeze_graph(config['graphdef_file'], '', False,
                              checkpoint_path, out_names, "save/restore_all",
                              "save/Const:0", config['frozen_model_file'],
                              False, "")

    if False:
        print('Test score:', score[0])
        print('Test accuracy:', score[1])
        # serialize model to JSON
        model_json = model.to_json()
        with open("{}/{}.model.json".format(output_dir, model_name),
                  "w") as json_file:
            json_file.write(model_json)

        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("{}/{}.model.yaml".format(output_dir, model_name),
                  "w") as yaml_file:
            yaml_file.write(model_yaml)

        # serialize weights to HDF5
        model.save_weights("{}/{}.weights.h5".format(output_dir, model_name))
        print("Saved model to disk")

        # load json and create model
        json_file = open('{}/{}.model.json'.format(output_dir, model_name),
                         'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model_json = model_from_json(loaded_model_json)

        # load yaml and create model
        yaml_file = open('{}/{}.model.yaml'.format(output_dir, model_name),
                         'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        loaded_model_yaml = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        loaded_model_json.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded json model from disk")

        # evaluate json loaded model on test data
        loaded_model_json.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

        print('json Test score:', score_json[0])
        print('json Test accuracy:', score_json[1])

        print("json %s: %.2f%%" %
              (loaded_model_json.metrics_names[1], score_json[1] * 100))

        # load weights into new model
        loaded_model_yaml.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded yaml model from disk")

        # evaluate loaded model on test data
        loaded_model_yaml.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

        print('yaml Test score:', score_yaml[0])
        print('yaml Test accuracy:', score_yaml[1])

        print("yaml %s: %.2f%%" %
              (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))

    return history
def main():

    # Get command-line parameters
    parser = get_p1b1_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b1.read_config_file(args.config_file)
    #print ('Params:', fileParameters)
    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Construct extension to save model
    ext = p1b1.extension_from_parameters(gParameters, '.pt')
    logfile = args.logfile if args.logfile else args.save + ext + '.log'
    p1b1.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    X_train, X_val, X_test = p1b1.load_data(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))

    # Set input and target to X_train
    train_data = torch.from_numpy(X_train)
    train_tensor = data.TensorDataset(train_data, train_data)
    train_iter = data.DataLoader(train_tensor,
                                 batch_size=gParameters['batch_size'],
                                 shuffle=gParameters['shuffle'])

    # Validation set
    val_data = torch.from_numpy(X_val)
    val_tensor = torch.utils.data.TensorDataset(val_data, val_data)
    val_iter = torch.utils.data.DataLoader(
        val_tensor,
        batch_size=gParameters['batch_size'],
        shuffle=gParameters['shuffle'])

    # Test set
    test_data = torch.from_numpy(X_test)
    test_tensor = torch.utils.data.TensorDataset(test_data, test_data)
    test_iter = torch.utils.data.DataLoader(
        test_tensor,
        batch_size=gParameters['batch_size'],
        shuffle=gParameters['shuffle'])

    #net = mx.sym.Variable('data')
    #out = mx.sym.Variable('softmax_label')
    input_dim = X_train.shape[1]
    output_dim = input_dim

    # Define Autoencoder architecture
    layers = gParameters['dense']
    activation = p1_common_pytorch.build_activation(gParameters['activation'])
    loss_fn = p1_common_pytorch.get_function(gParameters['loss'])
    '''
    N1 = layers[0]
    NE = layers[1]

    net = nn.Sequential(
      nn.Linear(input_dim,N1),
      activation,
      nn.Linear(N1,NE),
      activation,
      nn.Linear(NE,N1),
      activation,
      nn.Linear(N1,output_dim),
      activation,
    )
    '''

    # Documentation indicates this should work
    net = nn.Sequential()

    if layers != None:
        if type(layers) != list:
            layers = list(layers)
        # Encoder Part
        for i, l in enumerate(layers):
            if i == 0:
                net.add_module('in_dense', nn.Linear(input_dim, l))
                net.add_module('in_act', activation)
                insize = l
            else:
                net.add_module('en_dense%d' % i, nn.Linear(insize, l))
                net.add_module('en_act%d' % i, activation)
                insize = l

        # Decoder Part
        for i, l in reversed(list(enumerate(layers))):
            if i < len(layers) - 1:
                net.add_module('de_dense%d' % i, nn.Linear(insize, l))
                net.add_module('de_act%d' % i, activation)
                insize = l

    net.add_module('out_dense', nn.Linear(insize, output_dim))
    net.add_module('out_act', activation)

    # Initialize weights
    for m in net.modules():
        if isinstance(m, nn.Linear):
            p1_common_pytorch.build_initializer(m.weight,
                                                gParameters['initialization'],
                                                kerasDefaults)
            p1_common_pytorch.build_initializer(m.bias, 'constant',
                                                kerasDefaults, 0.0)

    # Display model
    print(net)

    # Define context

    # Define optimizer
    optimizer = p1_common_pytorch.build_optimizer(net,
                                                  gParameters['optimizer'],
                                                  gParameters['learning_rate'],
                                                  kerasDefaults)

    # Seed random generator for training
    torch.manual_seed(seed)

    #use_gpu = torch.cuda.is_available()
    use_gpu = 0

    train_loss = 0

    freq_log = 1
    for epoch in range(gParameters['epochs']):
        for batch, (in_train, _) in enumerate(train_iter):
            in_train = Variable(in_train)
            #print(in_train.data.shape())
            if use_gpu:
                in_train = in_train.cuda()
            optimizer.zero_grad()
            output = net(in_train)

            loss = loss_fn(output, in_train)
            loss.backward()
            train_loss += loss.data[0]
            optimizer.step()
            if batch % freq_log == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch * len(in_train), len(train_iter.dataset),
                    100. * batch / len(train_iter),
                    loss.data[0]))  # / len(in_train)))
        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(train_iter.dataset)))

        # model save
        #save_filepath = "model_ae_" + ext
        #ae.save(save_filepath)

        # Evalute model on valdation set
        for i, (in_val, _) in enumerate(val_iter):
            in_val = Variable(in_val)
            X_pred = net(in_val).data.numpy()
            if i == 0:
                in_all = in_val.data.numpy()
                out_all = X_pred
            else:
                in_all = np.append(in_all, in_val.data.numpy(), axis=0)
                out_all = np.append(out_all, X_pred, axis=0)

        #print ("Shape in_all: ", in_all.shape)
        #print ("Shape out_all: ", out_all.shape)

        scores = p1b1.evaluate_autoencoder(in_all, out_all)
        print('Evaluation on validation data:', scores)

    # Evalute model on test set
    for i, (in_test, _) in enumerate(test_iter):
        in_test = Variable(in_test)
        X_pred = net(in_test).data.numpy()
        if i == 0:
            in_all = in_test.data.numpy()
            out_all = X_pred
        else:
            in_all = np.append(in_all, in_test.data.numpy(), axis=0)
            out_all = np.append(out_all, X_pred, axis=0)

    #print ("Shape in_all: ", in_all.shape)
    #print ("Shape out_all: ", out_all.shape)

    scores = p1b1.evaluate_autoencoder(in_all, out_all)
    print('Evaluation on test data:', scores)

    diff = in_all - out_all
    plt.hist(diff.ravel(), bins='auto')
    plt.title("Histogram of Errors with 'auto' bins")
    plt.savefig('histogram_mx.pdf')
Example #4
0
def run(gParameters):

    print('Params:', gParameters)

    file_train = gParameters['train_data']
    file_test = gParameters['test_data']
    url = gParameters['data_url']

    train_file = data_utils.get_file(file_train,
                                     url + file_train,
                                     cache_subdir='Pilot1')
    test_file = data_utils.get_file(file_test,
                                    url + file_test,
                                    cache_subdir='Pilot1')

    X_train, Y_train, X_test, Y_test = load_data(train_file, test_file,
                                                 gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    x_train_len = X_train.shape[1]

    # this reshaping is critical for the Conv1D to work

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    model = Sequential()

    layer_list = list(range(0, len(gParameters['conv']), 3))
    for l, i in enumerate(layer_list):
        filters = gParameters['conv'][i]
        filter_len = gParameters['conv'][i + 1]
        stride = gParameters['conv'][i + 2]
        print(int(i / 3), filters, filter_len, stride)
        if gParameters['pool']:
            pool_list = gParameters['pool']
            if type(pool_list) != list:
                pool_list = list(pool_list)

        if filters <= 0 or filter_len <= 0 or stride <= 0:
            break
        if 'locally_connected' in gParameters:
            model.add(
                LocallyConnected1D(filters,
                                   filter_len,
                                   strides=stride,
                                   padding='valid',
                                   input_shape=(x_train_len, 1)))
        else:
            #input layer
            if i == 0:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid',
                           input_shape=(x_train_len, 1)))
            else:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid'))
        model.add(Activation(gParameters['activation']))
        if gParameters['pool']:
            model.add(MaxPooling1D(pool_size=pool_list[int(i / 3)]))

    model.add(Flatten())

    for layer in gParameters['dense']:
        if layer:
            model.add(Dense(layer))
            model.add(Activation(gParameters['activation']))
            if gParameters['drop']:
                model.add(Dropout(gParameters['drop']))
    model.add(Dense(gParameters['classes']))
    model.add(Activation(gParameters['out_act']))

    #Reference case
    #model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=1))
    #model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
    #model.add(Activation('relu'))
    #model.add(MaxPooling1D(pool_size=10))
    #model.add(Flatten())
    #model.add(Dense(200))
    #model.add(Activation('relu'))
    #model.add(Dropout(0.1))
    #model.add(Dense(20))
    #model.add(Activation('relu'))
    #model.add(Dropout(0.1))
    #model.add(Dense(CLASSES))
    #model.add(Activation('softmax'))

    kerasDefaults = p1_common.keras_default_config()

    # Define optimizer
    optimizer = p1_common_keras.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    model.summary()
    model.compile(loss=gParameters['loss'],
                  optimizer=optimizer,
                  metrics=[gParameters['metrics']])

    output_dir = gParameters['save']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # calculate trainable and non-trainable params
    gParameters.update(compute_trainable_params(model))

    # set up a bunch of callbacks to do work during model training..
    model_name = gParameters['model_name']
    path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
    # checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)
    csv_logger = CSVLogger('{}/training.log'.format(output_dir))
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=10,
                                  verbose=1,
                                  mode='auto',
                                  epsilon=0.0001,
                                  cooldown=0,
                                  min_lr=0)
    candleRemoteMonitor = CandleRemoteMonitor(params=gParameters)
    timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
    history = model.fit(
        X_train,
        Y_train,
        batch_size=gParameters['batch_size'],
        epochs=gParameters['epochs'],
        verbose=1,
        validation_data=(X_test, Y_test),
        callbacks=[csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])

    score = model.evaluate(X_test, Y_test, verbose=0)

    if False:
        print('Test score:', score[0])
        print('Test accuracy:', score[1])
        # serialize model to JSON
        model_json = model.to_json()
        with open("{}/{}.model.json".format(output_dir, model_name),
                  "w") as json_file:
            json_file.write(model_json)

        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("{}/{}.model.yaml".format(output_dir, model_name),
                  "w") as yaml_file:
            yaml_file.write(model_yaml)

        # serialize weights to HDF5
        model.save_weights("{}/{}.weights.h5".format(output_dir, model_name))
        print("Saved model to disk")

        # load json and create model
        json_file = open('{}/{}.model.json'.format(output_dir, model_name),
                         'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model_json = model_from_json(loaded_model_json)

        # load yaml and create model
        yaml_file = open('{}/{}.model.yaml'.format(output_dir, model_name),
                         'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        loaded_model_yaml = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        loaded_model_json.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded json model from disk")

        # evaluate json loaded model on test data
        loaded_model_json.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

        print('json Test score:', score_json[0])
        print('json Test accuracy:', score_json[1])

        print("json %s: %.2f%%" %
              (loaded_model_json.metrics_names[1], score_json[1] * 100))

        # load weights into new model
        loaded_model_yaml.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded yaml model from disk")

        # evaluate loaded model on test data
        loaded_model_yaml.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

        print('yaml Test score:', score_yaml[0])
        print('yaml Test accuracy:', score_yaml[1])

        print("yaml %s: %.2f%%" %
              (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))

    return history
Example #5
0
def main():

    # Get command-line parameters
    parser = get_p1b2_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b2.read_config_file(args.config_file)
    #print ('Params:', fileParameters)
    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.mx')
    logfile = args.logfile if args.logfile else args.save + ext + '.log'
    p1b2.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    #(X_train, y_train), (X_val, y_val), (X_test, y_test) = p1b2.load_data(gParameters, seed)
    (X_train,
     y_train), (X_val,
                y_val), (X_test,
                         y_test) = p1b2.load_data_one_hot(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)
    print("Shape y_train: ", y_train.shape)
    print("Shape y_val: ", y_val.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_train --> Min: ", np.min(y_train), ", max: ",
          np.max(y_train))
    print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    # Set input and target to X_train
    train_iter = mx.io.NDArrayIter(X_train,
                                   y_train,
                                   gParameters['batch_size'],
                                   shuffle=gParameters['shuffle'])
    val_iter = mx.io.NDArrayIter(X_val, y_val, gParameters['batch_size'])
    test_iter = mx.io.NDArrayIter(X_test, y_test, gParameters['batch_size'])

    net = mx.sym.Variable('data')  #X')
    out = mx.sym.Variable('softmax_label')  #y')
    num_classes = y_train.shape[1]

    # Initialize weights and learning rule
    initializer_weights = p1_common_mxnet.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_mxnet.build_initializer(
        'constant', kerasDefaults, 0.)
    init = mx.initializer.Mixed(['bias', '.*'],
                                [initializer_bias, initializer_weights])

    activation = gParameters['activation']

    # Define MLP architecture
    layers = gParameters['dense']

    if layers != None:
        if type(layers) != list:
            layers = list(layers)
        for i, l in enumerate(layers):
            net = mx.sym.FullyConnected(data=net, num_hidden=l)
            net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['drop']:
                net = mx.sym.Dropout(data=net, p=gParameters['drop'])

    net = mx.sym.FullyConnected(data=net, num_hidden=num_classes)  # 1)
    net = mx.symbol.SoftmaxOutput(data=net, label=out)

    # Display model
    p1_common_mxnet.plot_network(net, 'net' + ext)

    devices = mx.cpu()
    if gParameters['gpus']:
        devices = [mx.gpu(i) for i in gParameters['gpus']]

    # Build MLP model
    mlp = mx.mod.Module(symbol=net, context=devices)

    # Define optimizer
    optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    metric = p1_common_mxnet.get_function(gParameters['loss'])()

    # Seed random generator for training
    mx.random.seed(seed)

    mlp.fit(
        train_iter,
        eval_data=val_iter,
        #            eval_metric=metric,
        optimizer=optimizer,
        num_epoch=gParameters['epochs'],
        initializer=init)

    # model save
    #save_filepath = "model_mlp_" + ext
    #mlp.save(save_filepath)

    # Evalute model on test set
    y_pred = mlp.predict(test_iter).asnumpy()
    #print ("Shape y_pred: ", y_pred.shape)
    scores = p1b2.evaluate_accuracy_one_hot(y_pred, y_test)
    print('Evaluation on test data:', scores)
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    net = mx.sym.Variable('concat_features')
    out = mx.sym.Variable('growth')

    # Initialize weights and learning rule
    initializer_weights = p1_common_mxnet.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_mxnet.build_initializer(
        'constant', kerasDefaults, 0.)
    init = mx.initializer.Mixed(['bias', '.*'],
                                [initializer_bias, initializer_weights])

    activation = gParameters['activation']

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                net = mx.sym.FullyConnected(data=net, num_hidden=layer)
                net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['drop']:
                net = mx.sym.Dropout(data=net, p=gParameters['drop'])
    else:  # Build convolutional layers
        net = mx.sym.Reshape(data=net,
                             shape=(gParameters['batch_size'], 1,
                                    loader.input_dim, 1))
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            net = mx.sym.Convolution(data=net,
                                     num_filter=nb_filter,
                                     kernel=(filter_len, 1),
                                     stride=(stride, 1))
            net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['pool']:
                net = mx.sym.Pooling(data=net,
                                     pool_type="max",
                                     kernel=(gParameters['pool'], 1),
                                     stride=(1, 1))
        net = mx.sym.Flatten(data=net)

        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    net = mx.sym.FullyConnected(data=net, num_hidden=1)
    net = mx.symbol.LinearRegressionOutput(data=net, label=out)

    # Display model
    p1_common_mxnet.plot_network(net, 'net' + ext)

    # Define mxnet data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                batch_size=gParameters['batch_size'],
                                num_data=train_samples)
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              batch_size=gParameters['batch_size'],
                              num_data=val_samples)

    devices = mx.cpu()
    if gParameters['gpus']:
        devices = [mx.gpu(i) for i in gParameters['gpus']]

    mod = mx.mod.Module(net,
                        data_names=('concat_features', ),
                        label_names=('growth', ),
                        context=devices)

    # Define optimizer
    optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Seed random generator for training
    mx.random.seed(seed)

    freq_log = 1

    #initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    mod.fit(train_iter,
            eval_data=val_iter,
            eval_metric=gParameters['loss'],
            optimizer=optimizer,
            num_epoch=gParameters['epochs'],
            initializer=init,
            epoch_end_callback=mx.callback.Speedometer(
                gParameters['batch_size'], 20))
def main():

    # Get command-line parameters
    parser = get_p1b2_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b2.read_config_file(args.config_file)
    #print ('Params:', fileParameters)
    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.keras')
    logfile = args.logfile if args.logfile else args.save + ext + '.log'
    p1b2.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed)
    (X_train,
     y_train), (X_val,
                y_val), (X_test,
                         y_test) = p1b2.load_data_one_hot(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)
    print("Shape y_train: ", y_train.shape)
    print("Shape y_val: ", y_val.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_train --> Min: ", np.min(y_train), ", max: ",
          np.max(y_train))
    print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    input_dim = X_train.shape[1]
    input_vector = Input(shape=(input_dim, ))
    output_dim = y_train.shape[1]

    # Initialize weights and learning rule
    initializer_weights = p1_common_keras.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_keras.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define MLP architecture
    layers = gParameters['dense']

    if layers != None:
        if type(layers) != list:
            layers = list(layers)
        for i, l in enumerate(layers):
            if i == 0:
                x = Dense(l,
                          activation=activation,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias,
                          kernel_regularizer=l2(gParameters['penalty']),
                          activity_regularizer=l2(
                              gParameters['penalty']))(input_vector)
            else:
                x = Dense(l,
                          activation=activation,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias,
                          kernel_regularizer=l2(gParameters['penalty']),
                          activity_regularizer=l2(gParameters['penalty']))(x)
            if gParameters['drop']:
                x = Dropout(gParameters['drop'])(x)
        output = Dense(output_dim,
                       activation=activation,
                       kernel_initializer=initializer_weights,
                       bias_initializer=initializer_bias)(x)
    else:
        output = Dense(output_dim,
                       activation=activation,
                       kernel_initializer=initializer_weights,
                       bias_initializer=initializer_bias)(input_vector)

    # Build MLP model
    mlp = Model(outputs=output, inputs=input_vector)
    p1b2.logger.debug('Model: {}'.format(mlp.to_json()))

    # Define optimizer
    optimizer = p1_common_keras.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Compile and display model
    mlp.compile(loss=gParameters['loss'],
                optimizer=optimizer,
                metrics=['accuracy'])
    mlp.summary()

    # Seed random generator for training
    np.random.seed(seed)

    mlp.fit(X_train,
            y_train,
            batch_size=gParameters['batch_size'],
            epochs=gParameters['epochs'],
            validation_data=(X_val, y_val))

    # model save
    #save_filepath = "model_mlp_W_" + ext
    #mlp.save_weights(save_filepath)

    # Evalute model on test set
    y_pred = mlp.predict(X_test)
    scores = p1b2.evaluate_accuracy_one_hot(y_pred, y_test)
    print('Evaluation on test data:', scores)
def run(params):
    # Construct extension to save model
    ext = p1b1.extension_from_parameters(params, '.keras')
    prefix = '{}{}'.format(params['save'], ext)
    logfile = params['logfile'] if params['logfile'] else prefix + '.log'

    verify_path(logfile)
    logger = set_up_logger(logfile, params['verbose'])

    logger.info('Params: {}'.format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = p1_common.keras_default_config()
    seed = params['rng_seed']
    set_seed(seed)

    # Load dataset
    x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = p1b1.load_data(
        params, seed)

    start = time.time()
    # cache_file = 'data_l1000_cache.h5'
    # save_cache(cache_file, x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels)
    # x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = load_cache(cache_file)

    logger.info("Shape x_train: {}".format(x_train.shape))
    logger.info("Shape x_val:   {}".format(x_val.shape))
    logger.info("Shape x_test:  {}".format(x_test.shape))

    logger.info("Range x_train: [{:.3g}, {:.3g}]".format(
        np.min(x_train), np.max(x_train)))
    logger.info("Range x_val:   [{:.3g}, {:.3g}]".format(
        np.min(x_val), np.max(x_val)))
    logger.info("Range x_test:  [{:.3g}, {:.3g}]".format(
        np.min(x_test), np.max(x_test)))

    logger.debug('Class labels')
    for i, label in enumerate(y_labels):
        logger.debug('  {}: {}'.format(i, label))

    # clf = build_type_classifier(x_train, y_train, x_val, y_val)

    n_classes = len(y_labels)
    cond_train = y_train
    cond_val = y_val
    cond_test = y_test

    input_dim = x_train.shape[1]
    cond_dim = cond_train.shape[1]
    latent_dim = params['latent_dim']

    activation = params['activation']
    dropout = params['drop']
    dense_layers = params['dense']
    dropout_layer = keras.layers.noise.AlphaDropout if params[
        'alpha_dropout'] else Dropout

    # Initialize weights and learning rule
    initializer_weights = p1_common_keras.build_initializer(
        params['initialization'], keras_defaults, seed)
    initializer_bias = p1_common_keras.build_initializer(
        'constant', keras_defaults, 0.)

    if dense_layers is not None:
        if type(dense_layers) != list:
            dense_layers = list(dense_layers)
    else:
        dense_layers = []

    # Encoder Part
    x_input = Input(shape=(input_dim, ))
    cond_input = Input(shape=(cond_dim, ))
    h = x_input
    if params['model'] == 'cvae':
        h = keras.layers.concatenate([x_input, cond_input])

    for i, layer in enumerate(dense_layers):
        if layer > 0:
            x = h
            h = Dense(layer,
                      activation=activation,
                      kernel_initializer=initializer_weights,
                      bias_initializer=initializer_bias)(h)
            if params['residual']:
                try:
                    h = keras.layers.add([h, x])
                except ValueError:
                    pass
            if params['batch_normalization']:
                h = BatchNormalization()(h)
            if dropout > 0:
                h = dropout_layer(dropout)(h)

    if params['model'] == 'ae':
        encoded = Dense(latent_dim,
                        activation=activation,
                        kernel_initializer=initializer_weights,
                        bias_initializer=initializer_bias)(h)
    else:
        epsilon_std = params['epsilon_std']
        z_mean = Dense(latent_dim, name='z_mean')(h)
        z_log_var = Dense(latent_dim, name='z_log_var')(h)
        encoded = z_mean

        def vae_loss(x, x_decoded_mean):
            xent_loss = binary_crossentropy(x, x_decoded_mean)
            kl_loss = -0.5 * K.sum(
                1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return K.mean(xent_loss + kl_loss / input_dim)

        def sampling(params):
            z_mean_, z_log_var_ = params
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_dim),
                                      mean=0.,
                                      stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var])
        if params['model'] == 'cvae':
            z_cond = keras.layers.concatenate([z, cond_input])

    # Decoder Part
    decoder_input = Input(shape=(latent_dim, ))
    h = decoder_input
    if params['model'] == 'cvae':
        h = keras.layers.concatenate([decoder_input, cond_input])

    for i, layer in reversed(list(enumerate(dense_layers))):
        if layer > 0:
            x = h
            h = Dense(layer,
                      activation=activation,
                      kernel_initializer=initializer_weights,
                      bias_initializer=initializer_bias)(h)
            if params['residual']:
                try:
                    h = keras.layers.add([h, x])
                except ValueError:
                    pass
            if params['batch_normalization']:
                h = BatchNormalization()(h)
            if dropout > 0:
                h = dropout_layer(dropout)(h)

    decoded = Dense(input_dim,
                    activation='sigmoid',
                    kernel_initializer=initializer_weights,
                    bias_initializer=initializer_bias)(h)

    # Build autoencoder model
    if params['model'] == 'cvae':
        encoder = Model([x_input, cond_input], encoded)
        decoder = Model([decoder_input, cond_input], decoded)
        model = Model([x_input, cond_input], decoder([z, cond_input]))
        loss = vae_loss
        metrics = [xent, corr, mse]
    elif params['model'] == 'vae':
        encoder = Model(x_input, encoded)
        decoder = Model(decoder_input, decoded)
        model = Model(x_input, decoder(z))
        loss = vae_loss
        metrics = [xent, corr, mse]
    else:
        encoder = Model(x_input, encoded)
        decoder = Model(decoder_input, decoded)
        model = Model(x_input, decoder(encoded))
        loss = params['loss']
        metrics = [xent, corr]

    model.summary()
    decoder.summary()

    if params['cp']:
        model_json = model.to_json()
        with open(prefix + '.model.json', 'w') as f:
            print(model_json, file=f)

    # Define optimizer
    # optimizer = p1_common_keras.build_optimizer(params['optimizer'],
    #                                             params['learning_rate'],
    #                                             keras_defaults)
    optimizer = optimizers.deserialize({
        'class_name': params['optimizer'],
        'config': {}
    })
    base_lr = params['base_lr'] or K.get_value(optimizer.lr)
    if params['learning_rate']:
        K.set_value(optimizer.lr, params['learning_rate'])

    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    # calculate trainable and non-trainable params
    params.update(compute_trainable_params(model))

    def warmup_scheduler(epoch):
        lr = params['learning_rate'] or base_lr * params['batch_size'] / 100
        if epoch <= 5:
            K.set_value(model.optimizer.lr,
                        (base_lr * (5 - epoch) + lr * epoch) / 5)
        logger.debug('Epoch {}: lr={}'.format(epoch,
                                              K.get_value(model.optimizer.lr)))
        return K.get_value(model.optimizer.lr)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=5,
                                  min_lr=0.00001)
    warmup_lr = LearningRateScheduler(warmup_scheduler)
    checkpointer = ModelCheckpoint(params['save'] + ext + '.weights.h5',
                                   save_best_only=True,
                                   save_weights_only=True)
    tensorboard = TensorBoard(log_dir="tb/tb{}".format(ext))
    candle_monitor = CandleRemoteMonitor(params=params)
    timeout_monitor = TerminateOnTimeOut(params['timeout'])
    history_logger = LoggingCallback(logger.debug)

    callbacks = [candle_monitor, timeout_monitor, history_logger]
    if params['reduce_lr']:
        callbacks.append(reduce_lr)
    if params['warmup_lr']:
        callbacks.append(warmup_lr)
    if params['cp']:
        callbacks.append(checkpointer)
    if params['tb']:
        callbacks.append(tensorboard)

    x_val2 = np.copy(x_val)
    np.random.shuffle(x_val2)
    start_scores = p1b1.evaluate_autoencoder(x_val, x_val2)
    logger.info('\nBetween random pairs of validation samples: {}'.format(
        start_scores))

    if params['model'] == 'cvae':
        inputs = [x_train, cond_train]
        val_inputs = [x_val, cond_val]
        test_inputs = [x_test, cond_test]
    else:
        inputs = x_train
        val_inputs = x_val
        test_inputs = x_test

    outputs = x_train
    val_outputs = x_val
    test_outputs = x_test

    history = model.fit(inputs,
                        outputs,
                        verbose=2,
                        batch_size=params['batch_size'],
                        epochs=params['epochs'],
                        callbacks=callbacks,
                        validation_data=(val_inputs, val_outputs))

    if False and params['cp']:
        encoder.save(prefix + '.encoder.h5')
        decoder.save(prefix + '.decoder.h5')

    if False:
        plot_history(prefix, history, 'loss')
        plot_history(prefix, history, 'corr', 'streaming pearson correlation')

    # Evalute model on test set
    x_pred = model.predict(test_inputs)
    scores = p1b1.evaluate_autoencoder(x_pred, x_test)
    logger.info('\nEvaluation on test data: {}'.format(scores))

    if False:
        x_test_encoded = encoder.predict(test_inputs,
                                         batch_size=params['batch_size'])
        y_test_classes = np.argmax(y_test, axis=1)
        plot_scatter(x_test_encoded, y_test_classes, prefix + '.latent')

    if False and params['tsne']:
        tsne = TSNE(n_components=2, random_state=seed)
        x_test_encoded_tsne = tsne.fit_transform(x_test_encoded)
        plot_scatter(x_test_encoded_tsne, y_test_classes,
                     prefix + '.latent.tsne')

    logger.handlers = []

    elapsed = time.time() - start

    return history, scores, elapsed
Example #9
0
def main():
    # Get command-line parameters
    parser = get_p1b1_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b1.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b1.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    X_train, X_val, X_test = p1b1.load_data(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))

    input_dim = X_train.shape[1]
    output_dim = input_dim

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Set input and target to X_train
    train = ArrayIterator(X_train)
    val = ArrayIterator(X_val)
    test = ArrayIterator(X_test)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define Autoencoder architecture
    layers = []
    reshape = None

    # Autoencoder
    layers_params = gParameters['dense']

    if layers_params != None:
        if type(layers_params) != list:
            layers_params = list(layers_params)
        # Encoder Part
        for i, l in enumerate(layers_params):
            layers.append(
                Affine(nout=l,
                       init=initializer_weights,
                       bias=initializer_bias,
                       activation=activation))
        # Decoder Part
        for i, l in reversed(list(enumerate(layers_params))):
            if i < len(layers) - 1:
                layers.append(
                    Affine(nout=l,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))

    layers.append(
        Affine(nout=output_dim,
               init=initializer_weights,
               bias=initializer_bias,
               activation=activation))

    # Build Autoencoder model
    ae = Model(layers=layers)

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(ae, eval_set=val, eval_freq=1)

    # Seed random generator for training
    np.random.seed(seed)

    ae.fit(train,
           optimizer=optimizer,
           num_epochs=gParameters['epochs'],
           cost=cost,
           callbacks=callbacks)

    # model save
    #save_fname = "model_ae_W" + ext
    #ae.save_params(save_fname)

    # Compute errors
    X_pred = ae.get_outputs(test)
    scores = p1b1.evaluate_autoencoder(X_pred, X_test)
    print('Evaluation on test data:', scores)

    diff = X_pred - X_test
    # Plot histogram of errors comparing input and output of autoencoder
    plt.hist(diff.ravel(), bins='auto')
    plt.title("Histogram of Errors with 'auto' bins")
    plt.savefig('histogram_neon.png')
Example #10
0
def main():
    # Get command-line parameters
    parser = get_p1b2_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b2.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed)
    (X_train, y_train), (X_val,
                         y_val), (X_test,
                                  y_test) = p1b2.load_data(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)
    print("Shape y_train: ", y_train.shape)
    print("Shape y_val: ", y_val.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_train --> Min: ", np.min(y_train), ", max: ",
          np.max(y_train))
    print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    input_dim = X_train.shape[1]
    num_classes = int(np.max(y_train)) + 1
    output_dim = num_classes  # The backend will represent the classes using one-hot representation (but requires an integer class as input !)

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['data_type'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    train = ArrayIterator(X=X_train, y=y_train, nclass=num_classes)
    val = ArrayIterator(X=X_val, y=y_val, nclass=num_classes)
    test = ArrayIterator(X=X_test, y=y_test, nclass=num_classes)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define MLP architecture
    layers = []
    reshape = None

    for layer in gParameters['dense']:
        if layer:
            layers.append(
                Affine(nout=layer,
                       init=initializer_weights,
                       bias=initializer_bias,
                       activation=activation))
        if gParameters['dropout']:
            layers.append(Dropout(keep=(1 - gParameters['dropout'])))

    layers.append(
        Affine(nout=output_dim,
               init=initializer_weights,
               bias=initializer_bias,
               activation=activation))

    # Build MLP model
    mlp = Model(layers=layers)

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(mlp, eval_set=val, metric=Accuracy(), eval_freq=1)

    # Seed random generator for training
    np.random.seed(seed)

    mlp.fit(train,
            optimizer=optimizer,
            num_epochs=gParameters['epochs'],
            cost=cost,
            callbacks=callbacks)

    # model save
    #save_fname = "model_mlp_W_" + ext
    #mlp.save_params(save_fname)

    # Evalute model on test set
    print('Model evaluation by neon: ', mlp.eval(test, metric=Accuracy()))
    y_pred = mlp.get_outputs(test)
    #print ("Shape y_pred: ", y_pred.shape)
    scores = p1b2.evaluate_accuracy(p1_common.convert_to_class(y_pred), y_test)
    print('Evaluation on test data:', scores)
def main():

    # Get command-line parameters
    parser = get_p1b1_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b1.read_config_file(args.config_file)
    #print ('Params:', fileParameters)
    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print ('Params:', gParameters)

    # Construct extension to save model
    ext = p1b1.extension_from_parameters(gParameters, '.mx')
    logfile = args.logfile if args.logfile else args.save+ext+'.log'
    p1b1.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    X_train, X_val, X_test = p1b1.load_data(gParameters, seed)
    
    print ("Shape X_train: ", X_train.shape)
    print ("Shape X_val: ", X_val.shape)
    print ("Shape X_test: ", X_test.shape)

    print ("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train))
    print ("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print ("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))


    # Set input and target to X_train
    train_iter = mx.io.NDArrayIter(X_train, X_train, gParameters['batch_size'], shuffle=gParameters['shuffle'])
    val_iter = mx.io.NDArrayIter(X_val, X_val, gParameters['batch_size'])
    test_iter = mx.io.NDArrayIter(X_test, X_test, gParameters['batch_size'])
    
    net = mx.sym.Variable('data')
    out = mx.sym.Variable('softmax_label')
    input_dim = X_train.shape[1]
    output_dim = input_dim

    # Initialize weights and learning rule
    initializer_weights = p1_common_mxnet.build_initializer(gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_mxnet.build_initializer('constant', kerasDefaults, 0.)
    init = mx.initializer.Mixed(['bias', '.*'], [initializer_bias, initializer_weights])
    
    activation = gParameters['activation']

    # Define Autoencoder architecture
    layers = gParameters['dense']
    
    if layers != None:
        if type(layers) != list:
            layers = list(layers)
        # Encoder Part
        for i,l in enumerate(layers):
            net = mx.sym.FullyConnected(data=net, num_hidden=l)
            net = mx.sym.Activation(data=net, act_type=activation)
        # Decoder Part
        for i,l in reversed( list(enumerate(layers)) ):
            if i < len(layers)-1:
                net = mx.sym.FullyConnected(data=net, num_hidden=l)
                net = mx.sym.Activation(data=net, act_type=activation)
                    
    net = mx.sym.FullyConnected(data=net, num_hidden=output_dim)
    #net = mx.sym.Activation(data=net, act_type=activation)
    net = mx.symbol.LinearRegressionOutput(data=net, label=out)


    # Display model
    p1_common_mxnet.plot_network(net, 'net'+ext)

    # Define context
    devices = mx.cpu()
    if gParameters['gpus']:
        devices = [mx.gpu(i) for i in gParameters['gpus']]
    

    # Build Autoencoder model
    ae = mx.mod.Module(symbol=net, context=devices)

    # Define optimizer
    optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Seed random generator for training
    mx.random.seed(seed)

    freq_log = 1
    ae.fit(train_iter, eval_data=val_iter,
           eval_metric=gParameters['loss'],
           optimizer=optimizer,
           num_epoch=gParameters['epochs'])#,
           #epoch_end_callback = mx.callback.Speedometer(gParameters['batch_size'], freq_log))

    # model save
    #save_filepath = "model_ae_" + ext
    #ae.save(save_filepath)

    # Evalute model on test set
    X_pred = ae.predict(test_iter).asnumpy()
    #print ("Shape X_pred: ", X_pred.shape)
    
    scores = p1b1.evaluate_autoencoder(X_pred, X_test)
    print('Evaluation on test data:', scores)

    diff = X_pred - X_test
    plt.hist(diff.ravel(), bins='auto')
    plt.title("Histogram of Errors with 'auto' bins")
    plt.savefig('histogram_mx.png')
def run(gParameters):
    """
    Runs the model using the specified set of parameters

    Args:
       gParameters: a python dictionary containing the parameters (e.g. epoch)
       to run the model with.
    """
    #
    if 'dense' in gParameters:
        dval = gParameters['dense']
        if type(dval) != list:
            res = list(dval)
            #try:
            #is_str = isinstance(dval, basestring)
            #except NameError:
            #is_str = isinstance(dval, str)
            #if is_str:
            #res = str2lst(dval)
            gParameters['dense'] = res
        print(gParameters['dense'])

    if 'conv' in gParameters:
        #conv_list = p1_common.parse_conv_list(gParameters['conv'])
        #cval = gParameters['conv']
        #try:
        #is_str = isinstance(cval, basestring)
        #except NameError:
        #is_str = isinstance(cval, str)
        #if is_str:
        #res = str2lst(cval)
        #gParameters['conv'] = res
        print('Conv input', gParameters['conv'])
    # print('Params:', gParameters)
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.keras')
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else gParameters['save'] + ext + '.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(
        logging.Formatter("[%(asctime)s %(process)d] %(message)s",
                          datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO)

    p1b3.logger.setLevel(logging.DEBUG)
    p1b3.logger.addHandler(fh)
    p1b3.logger.addHandler(sh)
    p1b3.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Initialize weights and learning rule
    initializer_weights = p1_common_keras.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_keras.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define model architecture
    gen_shape = None
    out_dim = 1

    model = Sequential()
    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                model.add(
                    Dense(layer,
                          input_dim=loader.input_dim,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias))
                if gParameters['batch_normalization']:
                    model.add(BatchNormalization())
                model.add(Activation(gParameters['activation']))
                if gParameters['drop']:
                    model.add(Dropout(gParameters['drop']))
    else:  # Build convolutional layers
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(gParameters['conv'])))
        lc_flag = False
        if 'locally_connected' in gParameters:
            lc_flag = True

        for l, i in enumerate(layer_list):
            if i == 0:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               input_dim=loader.input_dim,
                               locally_connected=lc_flag)
            else:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               locally_connected=lc_flag)
            if gParameters['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Activation(gParameters['activation']))
            if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=gParameters['pool']))
        model.add(Flatten())

    model.add(Dense(out_dim))

    # Define optimizer
    optimizer = p1_common_keras.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Compile and display model
    model.compile(loss=gParameters['loss'], optimizer=optimizer)
    model.summary()
    p1b3.logger.debug('Model: {}'.format(model.to_json()))

    train_gen = p1b3.DataGenerator(
        loader,
        batch_size=gParameters['batch_size'],
        shape=gen_shape,
        name='train_gen',
        cell_noise_sigma=gParameters['cell_noise_sigma']).flow()
    val_gen = p1b3.DataGenerator(loader,
                                 partition='val',
                                 batch_size=gParameters['batch_size'],
                                 shape=gen_shape,
                                 name='val_gen').flow()
    val_gen2 = p1b3.DataGenerator(loader,
                                  partition='val',
                                  batch_size=gParameters['batch_size'],
                                  shape=gen_shape,
                                  name='val_gen2').flow()
    test_gen = p1b3.DataGenerator(loader,
                                  partition='test',
                                  batch_size=gParameters['batch_size'],
                                  shape=gen_shape,
                                  name='test_gen').flow()

    train_steps = int(loader.n_train / gParameters['batch_size'])
    val_steps = int(loader.n_val / gParameters['batch_size'])
    test_steps = int(loader.n_test / gParameters['batch_size'])

    if 'train_steps' in gParameters:
        train_steps = gParameters['train_steps']
    if 'val_steps' in gParameters:
        val_steps = gParameters['val_steps']
    if 'test_steps' in gParameters:
        test_steps = gParameters['test_steps']

    checkpointer = ModelCheckpoint(filepath=gParameters['save'] + '.model' +
                                   ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger(train_steps * gParameters['batch_size'])
    loss_history = MyLossHistory(
        progbar=progbar,
        val_gen=val_gen2,
        test_gen=test_gen,
        val_steps=val_steps,
        test_steps=test_steps,
        metric=gParameters['loss'],
        category_cutoffs=gParameters['category_cutoffs'],
        ext=ext,
        pre=gParameters['save'])

    # Seed random generator for training
    np.random.seed(seed)

    candleRemoteMonitor = CandleRemoteMonitor(params=gParameters)

    history = model.fit_generator(
        train_gen,
        train_steps,
        epochs=gParameters['epochs'],
        validation_data=val_gen,
        validation_steps=val_steps,
        verbose=0,
        callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor],
        pickle_safe=True,
        workers=gParameters['workers'])

    p1b3.logger.removeHandler(fh)
    p1b3.logger.removeHandler(sh)

    return history
Example #13
0
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                layers.append(
                    Affine(nout=layer,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))
            if gParameters['drop']:
                layers.append(Dropout(keep=(1 - gParameters['drop'])))
    else:  # Build convolutional layers
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    layers.append(
        Affine(nout=1,
               init=initializer_weights,
               bias=initializer_bias,
               activation=neon.transforms.Identity()))

    # Build model
    model = Model(layers=layers)

    # Define neon data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                ndata=train_samples,
                                lshape=reshape,
                                datatype=gParameters['datatype'])
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              ndata=val_samples,
                              lshape=reshape,
                              datatype=gParameters['datatype'])

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(model, eval_set=val_iter,
                          eval_freq=1)  #**args.callback_args)

    model.fit(train_iter,
              optimizer=optimizer,
              num_epochs=gParameters['epochs'],
              cost=cost,
              callbacks=callbacks)