def build_2ffnn(inp, h1, h2, out): n = FeedForwardNetwork() inLayer = LinearLayer(inp) hiddenLayer1 = TanhLayer(h1) hiddenLayer2 = TanhLayer(h2) outLayer = LinearLayer(out) #outLayer = SoftmaxLayer(out) n.addInputModule(inLayer) n.addModule(hiddenLayer1) n.addModule(hiddenLayer2) n.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2) hidden2_to_out = FullConnection(hiddenLayer2, outLayer) n.addConnection(in_to_hidden1) n.addConnection(hidden1_to_hidden2) n.addConnection(hidden2_to_out) n.sortModules() n.randomize() return n
class MP_Pybrain(Regression): """ Fully connected multilayer perceptron using pybrain library. """ def __init__(self, train_data, hyper, n_targets=None, label_targets=None): """ ------------ train_data: pandas DataFrame Contains columns for features and for target variables. The names of the target variables ends with the suffix "_tau" hyper: dictionary It contains the hyperparameters necessary to run all the functionalities of the model. They are the following: "structure" is a list of integers determining the number of neurons in each hidden layer "epochs" an integer specifying the maximum number of epochs to run during every training session "learning_rate" a float giving the learning rate of the gradient descend "momentum" a float giving the value of the momentum for the algorithm "batch" a bool. If True the method performs full batch learning, i.e. updates of the weights is done using all the instances of the training set. Else, normal online method is performed Other parameters regarding cross validation are explained in the base class """ Regression.__init__(self, train_data, hyper, n_targets=n_targets, label_targets=label_targets) self.N = FeedForwardNetwork() self.structure = [self.n_feature] + hyper['structure'] + [self.n_target] self._build_net(self.structure) self.res_params = [self.N.params[i] for i in range(len(self.N.params))] self.train_fraction = hyper['train_fraction'] self.seed = hyper['seed'] self.epochs = hyper['epochs'] self.learning_rate = hyper['learning_rate'] self.momentum = hyper['momentum'] self.batch = bool(hyper['batch']) def learn(self, train_data = None, seed = None): """ Performs single run training, and it is designed to be called after network instantiation. ---------- train_data: pandas Dataframe It needs to contain datetime objects on index, and both features and target variables. The target variables need to end with the suffix "_tau". If None the self.train_set variable passed at the moment of instantiation will be used. Returns: tuple(MP_Pybrain object,float) It returns the model with the lowest training error, and the value of the training error. """ if train_data is not None: self.train_set = train_data self.randomize() ds_train, ds_valid = self._build_dataset(self.train_set) trainer = BackpropTrainer(self.N, ds_train, learningrate=self.learning_rate, momentum=self.momentum,batchlearning=self.batch) trainer.train() e_train = [self._error(ds_train)] e_valid = [self._error(ds_valid)] final_model = copy(self) fin_error_train = e_train[0] fin_error_valid = e_valid[0] for i in range(1,self.epochs): if i%10 == 0: print "epoch: ", i trainer.train() e_train.append(self._error(ds_train)) e_valid.append(self._error(ds_valid)) if e_train[-1] < fin_error_train: final_model = deepcopy(self) fin_error_train = e_train[-1] fin_error_valid = e_valid[-1] return final_model, fin_error_train, fin_error_valid def xvalidate(self, train_data = None, folds = None): """ Performs n-folds cross-validation on the a data set. The method is designed to reset the network to an initial configuration (decided at the moment of instantiation) every time a new training is started. The purpose is to make model comparison and returning an average error given a specific data set and collection of hyper-parameters. At the moment training and validation sets are chosen based on the input sequence of data, i.e. there is no random shuffling of the instances of the data set. ---------- train_data: pandas Dataframe It needs to contain datetime objects on index, and both features and target variables. The target variables need to end with the suffix "_tau". If None the self.train_set variable passed at the moment of instantiation will be used. folds: integer The number of training/validation partition used in the method. If None it needs to be passed in the constructor when instantiating the object for the first time. If not passed ever, the method cannot work and an exception needs to be thrown. Returns: list, float, float A list of all the models trained for each fold, the mean train error and the cross-validation error, i.e. the average of NRMSE for all the training/validation partitions created. """ if train_data is not None: self.train_set = train_data if folds is not None: self.cv_folds = folds train, validation = self._build_folds(random=False) models = [] train_error = [] cv_error = [] for i in range(self.cv_folds): print "Cross-validation Fold: ", i+1 self.randomize() model, error, _ = self.learn(train_data=train[i]) models.append(deepcopy(model)) train_error.append(error) predicted, actual = self.test(validation[i]) e = 0 for k in predicted.keys(): e += errors.RMSE(np.array(actual[k]),np.array(predicted[k])) cv_error.append(e) return models, np.mean(train_error), np.mean(cv_error) def test(self, data): """ Tests the trained model on data. The usage is two fold: 1) Internal usage to calculate errors on validation sets. 2) For external usage when a test set is provided. Both the validation and test set need to contain target columns. For prediction, where target variables are unknown, please refer to the function self.predict below. ---------- data: pandas Dataframe A pandas dataframe. A deepcopy of it will be made and only the feature columns will be considered. Due to the functionality of the pyBrain library we require (at the moment) that the order of the colums is the same as the one of the training set used for training. Returns: pandas Dataframe A Dataframe with columns containing the predictions of the different target variables and same index as the input DataFrame """ data_x = data[self.features] data_y = data[self.targets] predicted = np.array([]) for i in range(len(data_x)): predicted = np.append(predicted, self.N.activate(data_x.values[i])) return pd.DataFrame(predicted, index=data.index, columns=self.targets), data_y def predict(self, data): """ It returns target variables given a set of features, using the model trained and saved. --------- data: pandas Dataframe It must contain all the feature columns used for training of the model Returns: pandas Dataframe It contains the prediction on the target variables. The name of the variables is the same as the one provided at the moment of instantiation of object. """ data_x = data[self.features] predicted = np.array([]) for i in range(len(data_x)): predicted = np.append(predicted, self.N.activate(data_x.values[i])) return pd.DataFrame(predicted, index=data_x.index, columns=self.targets) def randomize(self): self.N.randomize() pass ### Private functions ### def _error(self, ds): """ Calculates the RMSE over an input dataset, given the current state of the network. ds: Supervised dataset pybrain style Returns: float The total error between prediction and actual values. """ predicted = np.array([list(self.N.activate(x)) for x in ds['input']]).transpose() actual = np.array([list(x) for x in ds['target']]).transpose() total_error = [errors.RMSE(np.array(actual[i]),np.array(predicted[i])) for i in range(len(actual))] return sum(total_error) def _build_net(self,s): layers = [LinearLayer(s[0])] self.N.addInputModule(layers[0]) for i in range(1,len(s)-1): layers.append(SigmoidLayer(s[i])) self.N.addModule(layers[i]) layers.append(SigmoidLayer(s[-1])) self.N.addOutputModule(layers[-1]) self._build_connections(layers) def _build_connections(self, l): for i,j in zip(l,l[1:]): a = FullConnection(i,j) self.N.addConnection(a) self.N.sortModules() def _build_dataset(self, data): """ Given a input training Dataframe with features and targets it returns the formatted training and validation datasets for pybrain usage, and randomly shuffled according to the self.seed given at instantiation. ---------- data: pandas Dataframe It must contains both features and target columns Returns: (pybrain dataset, pybrain dataset) The first is the training dataset and the second is the validation dataset """ np.random.seed(self.seed) permutation = np.random.permutation(np.arange(len(data))) sep = int(self.train_fraction * len(data)) x = data[self.features] y = data[self.targets] ds_train = SupervisedDataSet(self.n_feature, self.n_target) ds_valid = SupervisedDataSet(self.n_feature, self.n_target) for i in permutation[:sep]: ds_train.addSample(x.values[i], y.values[i]) for i in permutation[sep:]: ds_valid.addSample(x.values[i], y.values[i]) return ds_train, ds_valid
class ANN: def __init__(self): self.name = "ANN" def getParams(self): return self.in_to_hidden.params, self.hidden_to_out.params def create_network(self, nFeatures, hidden1Size=20, nClasses=1): # create network object self.ffn = FeedForwardNetwork() # create layer objects inLayer = LinearLayer(nFeatures, name="input") hiddenLayer = SigmoidLayer(hidden1Size, name="hidden1") #hiddenLayer2 = SigmoidLayer(hidden2Size, name="hidden2") outLayer = LinearLayer(nClasses, name="output") # add layers to feed forward network self.ffn.addInputModule(inLayer) self.ffn.addModule(hiddenLayer) #self.ffn.addModule(hiddenLayer2) self.ffn.addOutputModule(outLayer) # add bias unit to layers self.ffn.addModule(BiasUnit(name='bias')) # establish connections between layers self.in_to_hidden = FullConnection(inLayer, hiddenLayer) #hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer2) self.hidden_to_out = FullConnection(hiddenLayer, outLayer) # print "into hidden: {}".format(len(in_to_hidden.params)) # print "into out: {}".format(len(hidden_to_out.params)) # add connections to network self.ffn.addConnection(self.in_to_hidden) #self.ffn.addConnection(hidden_to_hidden) self.ffn.addConnection(self.hidden_to_out) # necessary, sort layers into correct/certain order self.ffn.sortModules() # dataset object self.train_ds = SupervisedDataSet(nFeatures, nClasses) self.validate_ds = SupervisedDataSet(nFeatures, nClasses) # train network def train(self, TrainX, TrainY, ValidateX, ValidateY): # clear old dataset self.train_ds.clear() self.validate_ds.clear() # add data to dataset object (ds) for i in range(TrainX.shape[0]): self.train_ds.addSample(TrainX[i], TrainY[i]) for i in range(ValidateX.shape[0]): self.validate_ds.addSample(ValidateX[i], ValidateY[i]) # randomiz weights self.ffn.randomize() # Backprop trainer object self.trainer = BackpropTrainer(self.ffn, learningrate=.0775, momentum=.1) try: with Timer() as t: self.train_errors, self.val_errors \ = self.trainer.trainUntilConvergence(trainingData=self.train_ds, \ validationData=self.validate_ds, \ maxEpochs=500, \ continueEpochs=10) #return self.train_errors, self.val_errors except: print "Error occured while training model in ANN." #finally: # print("ANN.py - Time to trainUntilConvergence: {:.03f} sec.".format(t.interval)) return 'ANN' # predict depenent variable for dataset def predict(self, data): # if only make prediction for one sample if (len(data.shape) == 1): return self.ffn.activate(data) else: outputs = np.zeros(data.shape[0]) for i in range(data.shape[0]): outputs[i] = self.ffn.activate(data[i]) return outputs
def ANN_blind_analysis(a_network, a_gene, a_dataset, boot_val): "Creates and trains a network that is created to reflect the structure of the hypothesized network" regulatory_network = FeedForwardNetwork() # retrieving needed parameters from the input network upper_case_data_node_list = get_sub_list_from_network(a_network, a_gene, "gene,TF", 1) # to lower case for everything data_node_list = [x.lower() for x in upper_case_data_node_list] a_gene = a_gene.lower() # If the target gene is also a TF, remove it from the list as it will be added if a_gene in data_node_list: data_node_list.remove(a_gene) print 'what is in data_node_list:' print data_node_list if len(data_node_list) == 0: print "No connections to " + a_gene + " found." return [a_gene, '0', '0'] # Check for missing entries in the dataset (DS) # For the main gene #print a_gene #print a_dataset[0].keys() # Check for missing entries in the dataset (DS) # For the main gene if a_gene not in a_dataset[0].keys(): #print 'herp' return [a_gene, '0', '0'] # For the linked genes for each_gene in data_node_list: if each_gene not in a_dataset[0].keys(): data_node_list.remove(each_gene) if len(data_node_list) == 0: print "No connections to " + a_gene + " found." return [a_gene, '0', '0'] print len(data_node_list) print data_node_list # Need to add +1 node to the input layer that represents the "other" control variables # describing network modules to be used inLayer = LinearLayer(len(data_node_list), name="Input_layer") hiddenLayer = SigmoidLayer(len(data_node_list) + 1, name="Hidden_sigmoid_layer_1") outLayer = LinearLayer(1, name="Output_layer") # Adding layers to network regulatory_network.addInputModule(inLayer) regulatory_network.addModule(hiddenLayer) regulatory_network.addOutputModule(outLayer) # Adding connections between layers in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) regulatory_network.addConnection(in_to_hidden) regulatory_network.addConnection(hidden_to_out) get_nn_details(regulatory_network) # Other stuff added regulatory_network.sortModules() # Formatting the dataset input_dimention = len(data_node_list) print "in_dimention = ", input_dimention DS = SupervisedDataSet( input_dimention, 1 ) # Adding data, there may be a problem with order here where tfs are not always the same... seems ok though # This may not be the best way, but is needed due to the next for statement data_node_list.append(a_gene) print 'node list contains: ' print data_node_list # This is where the ordered dict needs to be used to link the input name to the input node. for experiment in a_dataset: tf_list = [] gene_list = [] tf_labels = [] first_round = True for TF in data_node_list: if TF != a_gene: #print TF, "<---" tf_list.append(experiment[TF]) if first_round == True: tf_labels.append(TF) else: #print TF, "<---gene" gene_list.append(experiment[TF]) first_round = False # View the input data sets print tf_labels print tf_list print gene_list if (check_missing_experiments(tf_list) == True) and (check_missing_experiments(gene_list) == True): float_tf_list = [float(i) for i in tf_list] float_gene_list = [float(i) for i in gene_list] DS.appendLinked( float_tf_list, float_gene_list ) print "......" print 'Network before training' print regulatory_network pesos_conexiones(regulatory_network) print regulatory_network.outputerror #print DS # Training trainer = RPropMinusTrainer_Evolved(regulatory_network, verbose=False) trainer.setData(DS) result_list = [] best_run_error = 1000 boot_count = 0 while boot_count < boot_val: print '\n' print 'Bootstrap round ' + str(boot_count + 1) trainer.trainEpochs(500) this = get_nn_details(regulatory_network) # Corrected error print trainer.total_error current_run_error = trainer.total_error print 'Bootstrap round ' + str(boot_count + 1) + ' error: ' + str(current_run_error) if abs(current_run_error) < abs(best_run_error): best_run_error = current_run_error trained_net_filename = a_gene + '_trained_net.xml' NetworkWriter.writeToFile(regulatory_network, trained_net_filename) export_to_gml(regulatory_network, tf_labels, a_gene) #result_list.append(this) regulatory_network.reset() regulatory_network.randomize() trainer = RPropMinusTrainer_Evolved(regulatory_network, verbose=False) trainer.setData(DS) boot_count += 1 #print "TF Labels" #print tf_labels #print regulatory_network.params #print inLayer #print "Pesos Conexiones" #pesos_conexiones(regulatory_network) #print dir(regulatory_network) #print dir(trainer) #print 'look here' #print regulatory_network.outputerror #print '<><><><><>' #print dir(regulatory_network['SigmoidLayer-7']) #print '\n' #print vars(regulatory_network['SigmoidLayer-7']) #print '\n' #print regulatory_network['SigmoidLayer-7'].forward #print regulatory_network['SigmoidLayer-7'].bufferlist result_list.append(a_gene) result_list.append(best_run_error) result_list.append(len(tf_list)) return result_list
class BlondieBrain: def __init__(self, datadir, insize=None, outsize=None, paramfile=None): self.datadir = datadir if insize == None: g = runner.Game() ip = self._game2input(g) self.insize = len(ip) else: self.insize = insize if outsize == None: self.outsize = 1 else: self.outsize = outsize if paramfile: f = os.path.join(self.datadir, paramfile) self.nn = NetworkReader.readFrom(f) try: self.name = re.search("(.*)-bestof-(.*)", paramfile).group(1) except AttributeError: self.name = "blondie-%s" % (datetime.datetime.now()) else: self.nn = FeedForwardNetwork() tmpname = "blondie-%s" % (datetime.datetime.now()) self.name = re.sub("[.: ]", "-", tmpname) inLayer = LinearLayer(self.insize) hiddenLayer1 = SigmoidLayer(self.insize) hiddenLayer2 = SigmoidLayer(self.insize) outLayer = LinearLayer(self.outsize) self.nn.addInputModule(inLayer) self.nn.addModule(hiddenLayer1) self.nn.addModule(hiddenLayer2) self.nn.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2) hidden2_to_out = FullConnection(hiddenLayer2, outLayer) self.nn.addConnection(in_to_hidden1) self.nn.addConnection(hidden1_to_hidden2) self.nn.addConnection(hidden2_to_out) self.nn.sortModules() def nextmove(self, game): inputdata = self._game2input(game) if self.outsize == 1: op = int(self.nn.activate(inputdata)) else: r = self.nn.activate(inputdata) op = r.argmax() return op def save(self, suffix=""): f = os.path.join(self.datadir, self.name + suffix + ".xml") NetworkWriter.writeToFile(self.nn, f) def mutate(self): self.nn.mutate() def randomize(self): self.nn.randomize() def copy(self): x = copy.deepcopy(self) x.nn = x.nn.copy() return x @classmethod def _game2input(cls, game): mysymbol = len(game.moves) % 2 cells = [cls._trsymb(x, mysymbol) for x in itertools.chain.from_iterable(game.grid_columns)] cols = [cls._trsum(c, mysymbol) for c in game.grid_columns] rows = [cls._trsum(r, mysymbol) for r in game.grid_rows] diags = [cls._trsum(r, mysymbol) for r in game.diags] l = itertools.chain.from_iterable([cells, cols, rows, diags]) return list(l) @classmethod def _trsymb(cls, piece, mysymbol): # Transform symbol if piece == None: return 0 elif piece == mysymbol: return 1 else: return -1 @classmethod def _trsum(cls, l, mysymbol): # Transform symbol and sum list s = 0 for ll in l: s += cls._trsymb(ll, mysymbol) return s