def run_mlp(
    step,
    momentum,
    decay,
    n_hidden_full,
    n_hidden_conv,
    hidden_full_transfers,
    hidden_conv_transfers,
    filter_shapes,
    pool_size,
    par_std,
    batch_size,
    opt,
    L2,
    counter,
    X,
    Z,
    TX,
    TZ,
    image_height,
    image_width,
    nouts,
):

    print step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, image_height, image_width, nouts
    seed = 3453
    np.random.seed(seed)
    batch_size = batch_size
    # max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 500
    n_report = X.shape[0] / batch_size
    weights = []
    # input_size = len(X[0])
    # Normalize
    mean = X.mean(axis=0)
    std = (X - mean).std()
    X = (X - mean) / std
    TX = (TX - mean) / std

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    optimizer = opt, {"step_rate": step, "momentum": momentum, "decay": decay}

    typ = "Lenet"

    if typ == "Lenet":
        m = Lenet(
            image_height,
            image_width,
            1,
            X,
            Z,
            n_hiddens_conv=n_hidden_conv,
            filter_shapes=filter_shapes,
            pool_shapes=pool_size,
            n_hiddens_full=n_hidden_full,
            n_output=nouts,
            hidden_transfers_conv=hidden_conv_transfers,
            hidden_transfers_full=hidden_full_transfers,
            out_transfer="identity",
            loss="squared",
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter,
        )

    elif typ == "SimpleCnn2d":
        m = SimpleCnn2d(
            2099,
            [400, 100],
            1,
            X,
            Z,
            TX,
            TZ,
            hidden_transfers=["tanh", "tanh"],
            out_transfer="identity",
            loss="squared",
            p_dropout_inpt=0.1,
            p_dropout_hiddens=0.2,
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter,
        )

    climin.initialize.randomize_normal(m.parameters.data, 0, par_std)
    # m.parameters.data[...] = np.random.normal(0, 0.01, m.parameters.data.shape)

    # Transform the test data
    # TX = m.transformedData(TX)
    m.init_weights()
    TX = np.array([TX for _ in range(10)]).mean(axis=0)
    print TX.shape

    losses = []
    print "max iter", max_iter

    X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)]

    for layer in m.lenet.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = (
        (weights[0] ** 2).sum()
        + (weights[1] ** 2).sum()
        # + (weights[2]**2).sum()
    )

    weight_decay /= m.exprs["inpt"].shape[0]
    m.exprs["true_loss"] = m.exprs["loss"]
    c_wd = L2
    m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay

    mae = T.abs_((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean()
    f_mae = m.function(["inpt", "target"], mae)

    rmse = T.sqrt(
        T.square((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean()
    )
    f_rmse = m.function(["inpt", "target"], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = "#", "seconds", "loss", "val loss", "mae_train", "rmse_train", "mae_test", "rmse_test"
    max_len = max(len(i) for i in keys)
    header = "\t".join(i for i in keys)
    print header
    print "-" * len(header)
    results = open("result_hp.txt", "a")
    results.write(header + "\n")
    results.write("-" * len(header) + "\n")
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp" + str(counter) + ".pkl", "rb") as tp:
            n_iter, best_pars = cp.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info["n_iter"] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info["loss"], info["val_loss"]))
        info.update(
            {
                "time": passed,
                "mae_train": f_mae(X, train_labels),
                "rmse_train": f_rmse(X, train_labels),
                "mae_test": f_mae(TX, test_labels),
                "rmse_test": f_rmse(TX, test_labels),
            }
        )

        info["n_iter"] += n_iter

        row = (
            "%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g"
            % info
        )
        results = open("result_hp.txt", "a")
        print row
        results.write(row + "\n")
        results.close()
        with open("pars_hp" + str(counter) + ".pkl", "wb") as fp:
            cp.dump((info["n_iter"], info["best_pars"]), fp)
        with open("hps" + str(counter) + ".pkl", "wb") as tp:
            cp.dump(
                (
                    step,
                    momentum,
                    decay,
                    n_hidden_full,
                    n_hidden_conv,
                    hidden_full_transfers,
                    hidden_conv_transfers,
                    filter_shapes,
                    pool_size,
                    par_std,
                    batch_size,
                    opt,
                    L2,
                    counter,
                    info["n_iter"],
                ),
                tp,
            )

    m.parameters.data[...] = info["best_pars"]
    cp.dump(info["best_pars"], open("best_pars.pkl", "wb"))

    Y = m.predict(X)
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)

    print "TRAINING SET\n"
    print ("MAE:  %5.2f kcal/mol" % np.abs(output_train - train_labels).mean(axis=0))
    print ("RMSE: %5.2f kcal/mol" % np.square(output_train - train_labels).mean(axis=0) ** 0.5)

    print "TESTING SET\n"
    print ("MAE:  %5.2f kcal/mol" % np.abs(output_test - test_labels).mean(axis=0))
    print ("RMSE: %5.2f kcal/mol" % np.square(output_test - test_labels).mean(axis=0) ** 0.5)

    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0) ** 0.5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0) ** 0.5

    results = open("result.txt", "a")
    results.write("Training set:\n")
    results.write("MAE:\n")
    results.write("%5.2f" % mae_train)
    results.write("\nRMSE:\n")
    results.write("%5.2f" % rmse_train)
    results.write("\nTesting set:\n")
    results.write("MAE:\n")
    results.write("%5.2f" % mae_test)
    results.write("\nRMSE:\n")
    results.write("%5.2f" % rmse_test)

    results.close()
Esempio n. 2
0
def run_mlp(step, momentum, decay, n_hidden_full, n_hidden_conv,
            hidden_full_transfers, hidden_conv_transfers, filter_shapes,
            pool_size, par_std, batch_size, opt, L2, counter, X, Z, TX, TZ,
            image_height, image_width, nouts):

    print step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, image_height, image_width, nouts
    seed = 3453
    np.random.seed(seed)
    batch_size = batch_size
    #max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 25000000
    n_report = X.shape[0] / batch_size
    weights = []
    #input_size = len(X[0])
    #Normalize
    mean = X.mean(axis=0)
    std = (X - mean).std()
    X = (X - mean) / std
    TX = (TX - mean) / std

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    optimizer = opt, {'step_rate': step, 'momentum': momentum, 'decay': decay}

    typ = 'Lenet'

    if typ == 'Lenet':
        m = Lenet(image_height,
                  image_width,
                  1,
                  X,
                  Z,
                  n_hiddens_conv=n_hidden_conv,
                  filter_shapes=filter_shapes,
                  pool_shapes=pool_size,
                  n_hiddens_full=n_hidden_full,
                  n_output=nouts,
                  hidden_transfers_conv=hidden_conv_transfers,
                  hidden_transfers_full=hidden_full_transfers,
                  out_transfer='identity',
                  loss='squared',
                  optimizer=optimizer,
                  batch_size=batch_size,
                  max_iter=max_iter)

    elif typ == 'SimpleCnn2d':
        m = SimpleCnn2d(2099, [400, 100],
                        1,
                        X,
                        Z,
                        TX,
                        TZ,
                        hidden_transfers=['tanh', 'tanh'],
                        out_transfer='identity',
                        loss='squared',
                        p_dropout_inpt=.1,
                        p_dropout_hiddens=.2,
                        optimizer=optimizer,
                        batch_size=batch_size,
                        max_iter=max_iter)

    climin.initialize.randomize_normal(m.parameters.data, 0, par_std)
    #m.parameters.data[...] = np.random.normal(0, 0.01, m.parameters.data.shape)

    # Transform the test data
    #TX = m.transformedData(TX)
    m.init_weights()
    TX = np.array([TX for _ in range(10)]).mean(axis=0)
    print TX.shape

    losses = []
    print 'max iter', max_iter

    X, Z, TX, TZ = [
        breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)
    ]

    for layer in m.lenet.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum()
                    #+ (weights[2]**2).sum()
                    )

    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = L2
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(
        T.square((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result_hp.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp" + str(counter) + ".pkl", 'rb') as tp:
            n_iter, best_pars = cp.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(X, train_labels),
            'rmse_train': f_rmse(X, train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)
        })

        info['n_iter'] += n_iter

        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result_hp.txt', 'a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp" + str(counter) + ".pkl", 'wb') as fp:
            cp.dump((info['n_iter'], info['best_pars']), fp)
        with open("hps" + str(counter) + ".pkl", 'wb') as tp:
            cp.dump((step, momentum, decay, n_hidden_full, n_hidden_conv,
                     hidden_full_transfers, hidden_conv_transfers,
                     filter_shapes, pool_size, par_std, batch_size, opt, L2,
                     counter, info['n_iter']), tp)

    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'], open('best_pars.pkl', 'wb'))

    Y = m.predict(X)
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)

    print 'TRAINING SET\n'
    print('MAE:  %5.2f kcal/mol' %
          np.abs(output_train - train_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol' %
          np.square(output_train - train_labels).mean(axis=0)**.5)

    print 'TESTING SET\n'
    print('MAE:  %5.2f kcal/mol' %
          np.abs(output_test - test_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol' %
          np.square(output_test - test_labels).mean(axis=0)**.5)

    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0)**.5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0)**.5

    results = open('result.txt', 'a')
    results.write('Training set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" % mae_train)
    results.write('\nRMSE:\n')
    results.write("%5.2f" % rmse_train)
    results.write('\nTesting set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" % mae_test)
    results.write('\nRMSE:\n')
    results.write("%5.2f" % rmse_test)

    results.close()
Esempio n. 3
0
results = open('result.txt', 'a')
results.write(header + '\n')
results.write('-' * len(header) + '\n')
results.close()

EXP_DIR = os.getcwd()
base_path = os.path.join(EXP_DIR, "pars.pkl")
base_path1 = os.path.join(EXP_DIR, "best_pars.pkl")
n_iter = 0

if os.path.isfile(base_path):
    with open('pars.pkl', 'rb') as tp:
        n_iter, best_pars = cp.load(tp)
        m.parameters.data[...] = best_pars

for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):

    if info['n_iter'] % n_report != 0:
        continue

    passed = time.time() - start
    losses.append((info['loss'], info['val_loss']))

    info.update({
        'time': passed,
        'mae_train': f_mae(X, train_labels),
        'rmse_train': f_rmse(X, train_labels),
        'mae_test': f_mae(TX, test_labels),
        'rmse_test': f_rmse(TX, test_labels)
    })
def run_mlp(step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2 , counter, X, Z, TX, TZ, image_height, image_width, nouts):

    print step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, image_height, image_width, nouts
    seed = 3453
    np.random.seed(seed)
    batch_size = batch_size
    #max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 500
    n_report = X.shape[0] / batch_size
    weights = []
    #input_size = len(X[0])
    #Normalize
    mean = X.mean(axis=0)
    std = (X - mean).std()
    X = (X - mean) / std
    TX = (TX - mean)/ std

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)


    optimizer = opt, {'step_rate': step, 'momentum': momentum, 'decay': decay}


    typ = 'Lenet'

    if typ == 'Lenet':
        m = Lenet(image_height, image_width, 1, X, Z, n_hiddens_conv=n_hidden_conv, filter_shapes=filter_shapes, pool_shapes=pool_size, n_hiddens_full=n_hidden_full, n_output=nouts, hidden_transfers_conv=hidden_conv_transfers, hidden_transfers_full=hidden_full_transfers, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)

    elif typ == 'SimpleCnn2d':
        m = SimpleCnn2d(2099, [400, 100], 1, X, Z, TX, TZ,
                hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared',
                p_dropout_inpt=.1,
                p_dropout_hiddens=.2,
                optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)


    climin.initialize.randomize_normal(m.parameters.data, 0, par_std)
    #m.parameters.data[...] = np.random.normal(0, 0.01, m.parameters.data.shape)


    # Transform the test data
    #TX = m.transformedData(TX)
    m.init_weights()
    TX = np.array([TX for _ in range(10)]).mean(axis=0)
    print TX.shape

    losses = []
    print 'max iter', max_iter



    X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)]


    for layer in m.lenet.mlp.layers:
        weights.append(m.parameters[layer.weights])


    weight_decay = ((weights[0]**2).sum()
                        + (weights[1]**2).sum()
                        #+ (weights[2]**2).sum()
                    )


    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = L2
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay


    mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)



    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result_hp.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp"+str(counter)+".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp"+str(counter)+".pkl", 'rb') as tp:
            n_iter, best_pars = cp.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(X, train_labels),
            'rmse_train': f_rmse(X, train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)

        })

        info['n_iter'] += n_iter

        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result_hp.txt','a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp"+str(counter)+".pkl", 'wb') as fp:
            cp.dump((info['n_iter'], info['best_pars']), fp)
        with open("hps"+str(counter)+".pkl", 'wb') as tp:
            cp.dump((step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, info['n_iter']), tp)




    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'], open('best_pars.pkl', 'wb'))

    Y = m.predict(X)
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)


    print 'TRAINING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_train - train_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_train - train_labels).mean(axis=0) ** .5)


    print 'TESTING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_test - test_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_test - test_labels).mean(axis=0) ** .5)


    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0) ** .5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0) ** .5


    results = open('result.txt', 'a')
    results.write('Training set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_train)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_train)
    results.write('\nTesting set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_test)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_test)

    results.close()
results.write(header + '\n')
results.write('-' * len(header) + '\n')
results.close()


EXP_DIR = os.getcwd()
base_path = os.path.join(EXP_DIR, "pars.pkl")
base_path1 = os.path.join(EXP_DIR, "best_pars.pkl")
n_iter = 0

if os.path.isfile(base_path):
    with open('pars.pkl', 'rb') as tp:
        n_iter, best_pars = cp.load(tp)
        m.parameters.data[...] = best_pars

for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):

    if info['n_iter'] % n_report != 0:
        continue

    passed = time.time() - start
    losses.append((info['loss'], info['val_loss']))


    info.update({
        'time': passed,
        'mae_train': f_mae(X, train_labels),
        'rmse_train': f_rmse(X, train_labels),
        'mae_test': f_mae(TX, test_labels),
        'rmse_test': f_rmse(TX, test_labels)