TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1, p_dropout_hiddens=.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) m.init_weights() #Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) print TX.shape losses = [] print 'max iter', max_iter X, Z, TX, TZ = [ breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ) ] for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum())
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum, decay, c_wd, counter, opt): seed = 3453 np.random.seed(seed) max_passes = 200 batch_size = 25 max_iter = 5000000 n_report = X.shape[0] / batch_size weights = [] optimizer = 'gd', { 'step_rate': step_rate, 'momentum': momentum, 'decay': decay } stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(2100, [400, 100], 1, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt( T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.write("%f %f %f %f %s" % (step_rate, momentum, decay, c_wd, opt)) results.write('\n') results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp_" + opt + str(counter) + ".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp_" + opt + str(counter) + ".pkl", 'rb') as tp: n_iter, best_pars = dill.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start if math.isnan(info['loss']) == True: info.update({'mae_test': f_mae(TX, test_labels)}) n_iter = info['n_iter'] break losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) info['n_iter'] += n_iter row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt', 'a') print row results.write(row + '\n') results.close() with open("pars_hp_" + opt + str(counter) + ".pkl", 'wb') as fp: dill.dump((info['n_iter'], info['best_pars']), fp) with open("apsis_pars_" + opt + str(counter) + ".pkl", 'rb') as fp: LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load( fp) n_iter1 = info['n_iter'] result1 = info['mae_test'] with open("apsis_pars_" + opt + str(counter) + ".pkl", 'wb') as fp: dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1), fp) return info['mae_test'], info['n_iter']
out_transfer="identity", loss="squared", p_dropout_inpt=0.1, p_dropout_hiddens=0.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) # climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) # Transform the test data # TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print "max iter", max_iter m.init_weights() X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)] """ weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum() + (m.parameters.hidden_to_hidden_0**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = 0.1
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum, decay, c_wd, counter, opt): seed = 3453 np.random.seed(seed) max_passes = 200 batch_size = 25 max_iter = 5000000 n_report = X.shape[0] / batch_size weights = [] optimizer = 'gd', {'step_rate': step_rate, 'momentum': momentum, 'decay': decay} stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(2100, [400, 100], 1, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.write("%f %f %f %f %s" %(step_rate, momentum, decay, c_wd, opt)) results.write('\n') results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp_"+opt+str(counter)+".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp_"+opt+str(counter)+".pkl", 'rb') as tp: n_iter, best_pars = dill.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start if math.isnan(info['loss']) == True: info.update({'mae_test': f_mae(TX, test_labels)}) n_iter = info['n_iter'] break losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) info['n_iter'] += n_iter row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt','a') print row results.write(row + '\n') results.close() with open("pars_hp_"+opt+str(counter)+".pkl", 'wb') as fp: dill.dump((info['n_iter'], info['best_pars']), fp) with open("apsis_pars_"+opt+str(counter)+".pkl", 'rb') as fp: LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load(fp) n_iter1 = info['n_iter'] result1 = info['mae_test'] with open("apsis_pars_"+opt+str(counter)+".pkl", 'wb') as fp: dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1), fp) return info['mae_test'], info['n_iter']
def run_mlp(arch, func, step, batch, X, Z, TX, TZ, wd, opt): batch_size = batch #max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 100000 n_report = X.shape[0] / batch_size weights = [] input_size = len(X[0]) train_labels = Z test_labels = TZ stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = opt, {'step_rate': step} typ = 'plain' if typ == 'plain': m = Mlp(input_size, arch, 1, X, Z, hidden_transfers=func, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) elif typ == 'fd': m = FastDropoutNetwork(2099, [400, 100], 1, X, Z, TX, TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1, p_dropout_hiddens=.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)] for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum() + (weights[3]**2).sum() ) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay ''' weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum() + (m.parameters.hidden_to_hidden_0**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = 0.1 m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay ''' mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.close() for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt','a') print row results.write(row + '\n') results.close() m.parameters.data[...] = info['best_pars'] cp.dump(info['best_pars'], open('best_pars.pkl', 'w')) Y = m.predict(m.transformedData(X)) TY = m.predict(TX) output_train = Y * np.std(train_labels) + np.mean(train_labels) output_test = TY * np.std(train_labels) + np.mean(train_labels) print 'TRAINING SET\n' print('MAE: %5.2f kcal/mol'%np.abs(output_train - train_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol'%np.square(output_train - train_labels).mean(axis=0) ** .5) print 'TESTING SET\n' print('MAE: %5.2f kcal/mol'%np.abs(output_test - test_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol'%np.square(output_test - test_labels).mean(axis=0) ** .5) mae_train = np.abs(output_train - train_labels).mean(axis=0) rmse_train = np.square(output_train - train_labels).mean(axis=0) ** .5 mae_test = np.abs(output_test - test_labels).mean(axis=0) rmse_test = np.square(output_test - test_labels).mean(axis=0) ** .5 results = open('result.txt', 'a') results.write('Training set:\n') results.write('MAE:\n') results.write("%5.2f" %mae_train) results.write('\nRMSE:\n') results.write("%5.2f" %rmse_train) results.write('\nTesting set:\n') results.write('MAE:\n') results.write("%5.2f" %mae_test) results.write('\nRMSE:\n') results.write("%5.2f" %rmse_test) results.close()
def run_mlp(func, step, momentum, X, Z, TX, TZ, wd, opt, counter): print func, step, momentum, wd, opt, counter seed = 3453 np.random.seed(seed) batch_size = 25 # max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 25000000 n_report = X.shape[0] / batch_size weights = [] input_size = len(X[0]) stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = opt, {"step_rate": step, "momentum": momentum} typ = "plain" if typ == "plain": m = Mlp( input_size, [400, 100], 1, X, Z, hidden_transfers=func, out_transfer="identity", loss="squared", optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) elif typ == "fd": m = FastDropoutNetwork( 2099, [400, 100], 1, X, Z, TX, TZ, hidden_transfers=["tanh", "tanh"], out_transfer="identity", loss="squared", p_dropout_inpt=0.1, p_dropout_hiddens=0.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) # climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) # Transform the test data # TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) print TX.shape losses = [] print "max iter", max_iter m.init_weights() X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)] for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = (weights[0] ** 2).sum() + (weights[1] ** 2).sum() + (weights[2] ** 2).sum() weight_decay /= m.exprs["inpt"].shape[0] m.exprs["true_loss"] = m.exprs["loss"] c_wd = wd m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay mae = T.abs_((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean() f_mae = m.function(["inpt", "target"], mae) rmse = T.sqrt( T.square((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean() ) f_rmse = m.function(["inpt", "target"], rmse) start = time.time() # Set up a nice printout. keys = "#", "seconds", "loss", "val loss", "mae_train", "rmse_train", "mae_test", "rmse_test" max_len = max(len(i) for i in keys) header = "\t".join(i for i in keys) print header print "-" * len(header) results = open("result_hp.txt", "a") results.write(header + "\n") results.write("-" * len(header) + "\n") results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp" + str(counter) + ".pkl", "rb") as tp: n_iter, best_pars = cp.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info["n_iter"] % n_report != 0: continue passed = time.time() - start losses.append((info["loss"], info["val_loss"])) info.update( { "time": passed, "mae_train": f_mae(m.transformedData(X), train_labels), "rmse_train": f_rmse(m.transformedData(X), train_labels), "mae_test": f_mae(TX, test_labels), "rmse_test": f_rmse(TX, test_labels), } ) info["n_iter"] += n_iter row = ( "%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g" % info ) results = open("result_hp.txt", "a") print row results.write(row + "\n") results.close() with open("pars_hp" + str(counter) + ".pkl", "wb") as fp: cp.dump((info["n_iter"], info["best_pars"]), fp) with open("hps" + str(counter) + ".pkl", "wb") as tp: cp.dump((func, step, momentum, wd, opt, counter, info["n_iter"]), tp) m.parameters.data[...] = info["best_pars"] cp.dump(info["best_pars"], open("best_pars.pkl", "wb")) Y = m.predict(m.transformedData(X)) TY = m.predict(TX) output_train = Y * np.std(train_labels) + np.mean(train_labels) output_test = TY * np.std(train_labels) + np.mean(train_labels) print "TRAINING SET\n" print ("MAE: %5.2f kcal/mol" % np.abs(output_train - train_labels).mean(axis=0)) print ("RMSE: %5.2f kcal/mol" % np.square(output_train - train_labels).mean(axis=0) ** 0.5) print "TESTING SET\n" print ("MAE: %5.2f kcal/mol" % np.abs(output_test - test_labels).mean(axis=0)) print ("RMSE: %5.2f kcal/mol" % np.square(output_test - test_labels).mean(axis=0) ** 0.5) mae_train = np.abs(output_train - train_labels).mean(axis=0) rmse_train = np.square(output_train - train_labels).mean(axis=0) ** 0.5 mae_test = np.abs(output_test - test_labels).mean(axis=0) rmse_test = np.square(output_test - test_labels).mean(axis=0) ** 0.5 results = open("result_hp.txt", "a") results.write("Training set:\n") results.write("MAE:\n") results.write("%5.2f" % mae_train) results.write("\nRMSE:\n") results.write("%5.2f" % rmse_train) results.write("\nTesting set:\n") results.write("MAE:\n") results.write("%5.2f" % mae_test) results.write("\nRMSE:\n") results.write("%5.2f" % rmse_test) results.close()