def _split_data(x_data, y_data, split=0.9): '''Split data.''' x_data_rand, y_data_rand = theanets_utils.randomise_order(x_data, y_data) # Split data into training and classifying: ind = int(split * len(x_data_rand)) x_train = x_data_rand[:ind] y_train = [[y] for y in y_data_rand[:ind]] x_val = x_data_rand[ind:] y_val = y_data_rand[ind:] return x_train, y_train, x_val, y_val
def _learn(sequences, activities): '''Attempt to learn sequence / activity relationship.''' # Convert sequences to inputs, based on amino acid properties: x_data = sequence_utils.get_aa_props(sequences) x_data, y_data = theanets_utils.randomise_order(x_data, activities) # Split data into training and classifying: ind = int(0.8 * len(x_data)) y_train = [[y] for y in y_data[:ind]] regressor = theanets_utils.Regressor(x_data[:ind], y_train) regressor.train(hidden_layers=[1024]) y_pred = regressor.predict(x_data[ind:]) return regressor, y_data[ind:], y_pred