Esempio n. 1
0
    def replaceTestData(self, testNodes, maxNeighbors=1000, maskNames=['x']):
        if self.batchesInferences:
            batch_size = self.batch_size
        else:
            batch_size = 1

        testing, testIDs = encode_data_VarLen(self.G,
                                              testNodes,
                                              self.attrKey,
                                              maxNeighbors,
                                              useActualLabs=self.useActualLabs,
                                              useInputX2=self.useInputX2,
                                              onlyLabs=self.onlyLabs,
                                              lastH=self.lastHH,
                                              nodeIDs=True)
        dataset_test = IndexableDataset(testing)
        self.stream_test = DataStream(dataset=dataset_test,
                                      iteration_scheme=SequentialScheme(
                                          examples=dataset_test.num_examples,
                                          batch_size=batch_size))
        #add masks, have to do individually to avoid all dimensions must be equal error
        #write own padding transformer, their's sucks ...
        self.stream_test = Padding(self.stream_test, mask_sources=maskNames)
        #transpose them for rnn input
        self.stream_test = Mapping(self.stream_test, self.transpose_streamTest)
        self.num_examples_test = dataset_test.num_examples

        #replace shareddata with test_all data
        self.test_all, names = self.iterateShared(self.stream_test,
                                                  makeShared=False,
                                                  name="test")

        #if we are doing test in batches
        if self.batchesInferences:
            for key in self.test_all:
                totalTestBatches = len(self.test_all[key])
                if key != 'nodeID':
                    for i in range(0, totalTestBatches):
                        #if test data has more batches, we add more to shared data list
                        #else we just reset
                        if i >= self.totalBatches:
                            newKey = key + '_myinput'
                            self.sharedData[key].append(
                                shared(self.test_all[key][i],
                                       name=self.sharedName + '_' + newKey +
                                       '_test_' + str(i)))
                        else:
                            self.sharedData[key][i].set_value(
                                self.test_all[key][i], borrow=True)

                    self.sharedBatch[key].set_value(
                        self.sharedData[key][0].get_value(borrow=True),
                        borrow=True)

            self.stream_test_int = IntStream(0, totalTestBatches, 1,
                                             'int_stream')
Esempio n. 2
0
    def replaceTestData(self,
                        testNodes,
                        useInputX2=False,
                        maxNeighbors=1000,
                        maskNames=['x']):
        if self.batchesInferences:
            batch_size = self.batch_size
        else:
            batch_size = 1

        #replace shareddata with test_all data
        self.test_all, names = self.iterateShared(self.stream_test,
                                                  makeShared=False,
                                                  name="test")

        #if we are doing test in batches
        if self.batchesInferences:
            for key in self.test_all:
                totalTestBatches = len(self.test_all[key])
                if key != 'nodeID':
                    for i in range(0, totalTestBatches):
                        #if test data has more batches, we add more to shared data list
                        #else we just reset
                        if i >= self.totalBatches:
                            newKey = key + '_myinput'
                            self.sharedData[key].append(
                                shared(self.test_all[key][i],
                                       name=self.sharedName + '_' + newKey +
                                       '_test_' + str(i)))
                        else:
                            self.sharedData[key][i].set_value(
                                self.test_all[key][i], borrow=True)

                    self.sharedBatch[key].set_value(
                        self.sharedData[key][0].get_value(borrow=True),
                        borrow=True)

            self.stream_test_int = IntStream(0, totalTestBatches, 1,
                                             'int_stream')
Esempio n. 3
0
    def dataToShare(self, makeShared):

        sharedDataTrain, sharedNamesTrain = self.iterateShared(
            self.stream_train, makeShared=makeShared, name="train")
        sharedDataValid, sharedNamesValid = self.iterateShared(
            self.stream_valid, makeShared=makeShared, name="valid")
        self.sharedNames = sharedNamesTrain + sharedNamesValid

        #combine shared data
        sharedData = {}
        for key in sharedDataTrain:
            sharedData[key] = sharedDataTrain[key] + sharedDataValid[key]

        #now create new streams
        totalBatchesTrain = len(sharedDataTrain[key])
        totalBatchesValid = len(sharedDataValid[key])
        self.totalBatches = totalBatchesTrain + totalBatchesValid

        self.stream_train_int = IntStream(0, totalBatchesTrain, 1,
                                          'int_stream')
        self.stream_valid_int = IntStream(totalBatchesTrain, totalBatchesValid,
                                          1, 'int_stream')
        return sharedData
Esempio n. 4
0
class RelationalRNNwMini(RelationalRNN):
    def __init__(self, G, trainNodes, validationNodes, dim, mini_dim,
                 summary_dim, input_dimx, input_dimxmini, **kwargs):
        RelationalRNN.__init__(self,
                               G=G,
                               trainNodes=trainNodes,
                               validationNodes=validationNodes,
                               **kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim
        self.input_dimx = input_dimx
        self.input_dimxmini = input_dimxmini

    #train our q net
    #this part has theano + blocks statements
    def train(self):

        x = self.sharedBatch['x']
        x.name = 'x_myinput'
        xmini = self.sharedBatch['xmini']
        xmini.name = 'xmini_myinput'
        y = self.sharedBatch['y']
        y.name = 'y_myinput'

        # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
        # LSTM layer documentation for the explanation
        x_to_h = Linear(self.input_dimx,
                        self.dim,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        xmini_to_h = Linear(self.input_dimxmini,
                            self.mini_dim,
                            name='xmini_to_h',
                            weights_init=IsotropicGaussian(),
                            biases_init=Constant(0.0))

        rnnwmini = RNNwMini(dim=self.dim,
                            mini_dim=self.mini_dim,
                            summary_dim=self.summary_dim)

        h_to_o = Linear(self.summary_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        xmini_transform = xmini_to_h.apply(xmini)

        h = rnnwmini.apply(x=x_transform, xmini=xmini_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        y_hat = h_to_o.apply(h[-1])
        #y_hat = Logistic().apply(y_hat)

        cost = SquaredError().apply(y, y_hat)
        cost.name = 'cost'

        rnnwmini.initialize()
        x_to_h.initialize()
        xmini_to_h.initialize()
        h_to_o.initialize()

        self.f = theano.function(inputs=[], outputs=y_hat)

        #print("self.f === ")
        #print(self.f())
        #print(self.f().shape)
        #print("====")

        self.cg = ComputationGraph(cost)
        m = Model(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=self.cg.parameters,
                                    step_rule=RMSProp(learning_rate=0.01),
                                    on_unused_sources='ignore')
        valid_monitor = DataStreamMonitoringShared(
            variables=[cost],
            data_stream=self.stream_valid_int,
            prefix="valid",
            sharedBatch=self.sharedBatch,
            sharedData=self.sharedData)
        train_monitor = TrainingDataMonitoring(variables=[cost],
                                               prefix="train",
                                               after_epoch=True)

        sharedVarMonitor = SwitchSharedReferences(self.sharedBatch,
                                                  self.sharedData)
        tBest = self.track_best('valid_cost', self.cg)
        self.tracker = tBest[0]
        extensions = [sharedVarMonitor, valid_monitor] + tBest

        if self.debug:
            extensions.append(Printing())

        self.algorithm = algorithm
        self.extensions = extensions
        self.model = m
        self.mainloop = MainLoop(self.algorithm,
                                 self.stream_train_int,
                                 extensions=self.extensions,
                                 model=self.model)
        self.main_loop(True)

    #call this to train again
    #modify the blocks mainloop logic
    def main_loop(self, first):

        log_backend = config.log_backend
        self.mainloop.log = BACKENDS[log_backend]()
        if first:
            self.mainloop.status['training_started'] = False
        else:
            self.mainloop.status['training_started'] = True
        self.mainloop.status['epoch_started'] = False
        self.mainloop.status['epoch_interrupt_received'] = False
        self.mainloop.status['batch_interrupt_received'] = False
        self.mainloop.run()
        #make sure we have the best model
        self.tracker.set_best_model()

    #function empty to prevent from doing extra work
    def indexData(self):
        self.stream_train = self.readSynthetic('train')
        self.stream_valid = self.readSynthetic('valid')
        self.stream_test = self.readSynthetic('test')

    #helper function to read synthetic data
    def readSynthetic(self, name):
        stream = {'x': None, 'xmini': None, 'y': None}
        stream['x'] = np.load("../experiments/data/RNNwMiniSynthetic_" + name +
                              "_x.npy")
        stream['xmini'] = np.load("../experiments/data/RNNwMiniSynthetic_" +
                                  name + "_xmini.npy")
        stream['y'] = np.load("../experiments/data/RNNwMiniSynthetic_" + name +
                              "_y.npy")
        return stream

    #iterate over data stream and make into shared data
    def iterateShared(self, stream, makeShared=True, name="train"):
        names = []
        sharedData = defaultdict(lambda: [])
        for key in stream:
            numBatches = stream[key].shape[0]
            for i in range(0, numBatches):
                data = stream[key][i]
                if len(stream[key][i].shape) == 1:
                    data = np.reshape(data, (data.shape[0], 1))

                newKey = key + '_myinput'
                namePost = self.sharedName + '_' + newKey + '_' + name + "_" + str(
                    i)
                names.append(namePost)
                if makeShared:
                    sharedData[key].append(shared(data, name=namePost))
                else:
                    sharedData[key].append(data)

        return (sharedData, names)

    #convert data to shared data so that everything is on the gpu
    def dataToShare(self, makeShared):

        sharedDataTrain, sharedNamesTrain = self.iterateShared(
            self.stream_train, makeShared=makeShared, name="train")
        sharedDataValid, sharedNamesValid = self.iterateShared(
            self.stream_valid, makeShared=makeShared, name="valid")
        self.sharedNames = sharedNamesTrain + sharedNamesValid

        #combine shared data
        sharedData = {}
        for key in sharedDataTrain:
            sharedData[key] = sharedDataTrain[key] + sharedDataValid[key]

        #now create new streams
        totalBatchesTrain = len(sharedDataTrain[key])
        totalBatchesValid = len(sharedDataValid[key])
        self.totalBatches = totalBatchesTrain + totalBatchesValid

        self.stream_train_int = IntStream(0, totalBatchesTrain, 1,
                                          'int_stream')
        self.stream_valid_int = IntStream(totalBatchesTrain, totalBatchesValid,
                                          1, 'int_stream')
        return sharedData

    #when making predictions, replace buffer of data with new test data
    #for now, only use test data
    def replaceTestData(self,
                        testNodes,
                        useInputX2=False,
                        maxNeighbors=1000,
                        maskNames=['x']):
        if self.batchesInferences:
            batch_size = self.batch_size
        else:
            batch_size = 1

        #replace shareddata with test_all data
        self.test_all, names = self.iterateShared(self.stream_test,
                                                  makeShared=False,
                                                  name="test")

        #if we are doing test in batches
        if self.batchesInferences:
            for key in self.test_all:
                totalTestBatches = len(self.test_all[key])
                if key != 'nodeID':
                    for i in range(0, totalTestBatches):
                        #if test data has more batches, we add more to shared data list
                        #else we just reset
                        if i >= self.totalBatches:
                            newKey = key + '_myinput'
                            self.sharedData[key].append(
                                shared(self.test_all[key][i],
                                       name=self.sharedName + '_' + newKey +
                                       '_test_' + str(i)))
                        else:
                            self.sharedData[key][i].set_value(
                                self.test_all[key][i], borrow=True)

                    self.sharedBatch[key].set_value(
                        self.sharedData[key][0].get_value(borrow=True),
                        borrow=True)

            self.stream_test_int = IntStream(0, totalTestBatches, 1,
                                             'int_stream')

    #output BAE scores and predictions
    def predictBAE(self, changeLabel=True):
        if self.batchesInferences:
            MSE, predictions = self.predInBatches(changeLabel)

        return (MSE, predictions)

    #predict in batches of data, faster since we utilize gpu but perhaps less accurate
    def predInBatches(self, changeLabel):
        err1 = 0.0
        err0 = 0.0
        count1 = 0
        count0 = 0
        predictions = {}
        sqError = 0
        total = 0
        epoch_iterator = (self.stream_test_int.get_epoch_iterator(
            as_dict=True))
        while True:
            try:
                batch = next(epoch_iterator)
                batchInd = batch['int_stream_From']
                #switch batches before running
                for key in self.sharedBatch:
                    self.sharedBatch[key].set_value(
                        self.sharedData[key][batchInd].get_value(borrow=True),
                        borrow=True)

                preds = self.f()
                batchLen = self.test_all['y'][batchInd].shape[0]
                print(self.test_all['y'][batchInd].shape)
                #iterate through a batch
                for i in range(0, batchLen):
                    total += 1
                    actual = self.test_all['y'][batchInd][i][0]
                    pred = preds[i][0]
                    print("actual and pred")
                    print(actual)
                    print(pred)
                    print("====")
                    sqError += (actual - pred) * (actual - pred)

            except StopIteration:
                break
        MSE = sqError / total
        return (MSE, predictions)
Esempio n. 5
0
class RelationalRNN(object):
    def __init__(self,
                 G=0,
                 trainNodes=0,
                 validationNodes=0,
                 dim=10,
                 batch_size=100,
                 num_epochs=0,
                 save_path='',
                 max_epochs=1000,
                 maxNeighbors=100,
                 attrKey='attr',
                 debug=False,
                 load_path='',
                 epsilon=0.0001,
                 useActualLabs=False,
                 onlyLabs=False,
                 usePrevWeights=False,
                 dataAug="none",
                 pageRankOrder="F",
                 sharedName="sharedData",
                 batchesInferences=False,
                 usePro=False,
                 lastH=False):
        self.epsilon = epsilon
        self.attrKey = attrKey
        self.batch_size = batch_size
        self.G = G
        self.useActualLabs = useActualLabs
        self.onlyLabs = onlyLabs
        self.trainNodes = trainNodes
        self.validationNodes = validationNodes
        self.maxNeighbors = maxNeighbors
        self.usePrevWeights = usePrevWeights
        self.dataAug = dataAug
        self.pageRankOrder = pageRankOrder
        self.sharedName = sharedName
        self.batchesInferences = batchesInferences
        self.dim = dim
        self.num_epochs = num_epochs
        self.max_epochs = max_epochs
        self.debug = debug
        self.save_path = save_path
        self.usePro = usePro
        self.lastHH = lastH

        #put data into RNN format
        self.indexData()

        self.sharedData = {}
        #share data for gpu
        self.sharedData = self.dataToShare(makeShared=True)
        self.sharedBatch = {}
        for key in self.sharedData:
            if key != 'nodeID':
                self.sharedBatch[key] = shared(
                    self.sharedData[key][0].get_value(),
                    name="sharedBatch_" + key + "_myinput")
            else:
                self.sharedBatch[key] = self.sharedData[key][0]

    #function used for collective classification when we want pretrained weights
    def resetData(self):
        self.indexData()
        sharedData = self.dataToShare(makeShared=False)
        #reset our data
        for key in sharedData:
            for i in range(0, self.totalBatches):
                if key != 'nodeID':
                    self.sharedData[key][i].set_value(sharedData[key][i],
                                                      borrow=True)
                else:
                    self.sharedData[key][i] = sharedData[key][i]

            #reset sharedbatch as well
            if key != 'nodeID':
                self.sharedBatch[key].set_value(
                    self.sharedData[key][0].get_value(borrow=True),
                    borrow=True)
            else:
                self.sharedBatch[key] = self.sharedData[key][0]

    #given a graph G, inputs node v and its neighbors as a sequential input
    def indexData(self):
        labCounts = graph_helper.getLabelCounts(
            self.G, self.trainNodes + self.validationNodes)
        trainXY, trainIDs = encode_data_VarLen(
            self.G,
            self.trainNodes,
            self.attrKey,
            self.maxNeighbors,
            usePrevWeights=self.usePrevWeights,
            useActualLabs=self.useActualLabs,
            onlyLabs=self.onlyLabs,
            useInputX2=self.useInputX2,
            labCounts=labCounts,
            dataAug=self.dataAug,
            pageRankOrder=self.pageRankOrder,
            usePro=self.usePro,
            lastH=self.lastHH,
            nodeIDs=True)
        validationXY, testIDs = encode_data_VarLen(
            self.G,
            self.validationNodes,
            self.attrKey,
            self.maxNeighbors,
            labCounts=labCounts,
            usePrevWeights=self.usePrevWeights,
            useActualLabs=self.useActualLabs,
            onlyLabs=self.onlyLabs,
            useInputX2=self.useInputX2,
            pageRankOrder=self.pageRankOrder,
            usePro=self.usePro,
            lastH=self.lastHH,
            nodeIDs=True)
        self.input_dimx1 = trainXY['x'][0].shape[1]
        if 'x2' in trainXY:
            self.input_dimx2 = trainXY['x2'].shape[1]

        dataset_train = IndexableDataset(trainXY)
        dataset_valid = IndexableDataset(validationXY)
        self.num_examples_train = dataset_train.num_examples
        self.num_examples_valid = dataset_valid.num_examples
        if self.usePro:
            transpose_stream = self.transpose_streamPro
        else:
            transpose_stream = self.transpose_stream

        self.stream_train = DataStream(dataset=dataset_train,
                                       iteration_scheme=ShuffledScheme(
                                           examples=dataset_train.num_examples,
                                           batch_size=self.batch_size))
        self.stream_train = Padding(self.stream_train, mask_sources=['x'])
        self.stream_train = Mapping(self.stream_train, transpose_stream)

        self.stream_valid = DataStream(dataset=dataset_valid,
                                       iteration_scheme=ShuffledScheme(
                                           examples=dataset_valid.num_examples,
                                           batch_size=self.batch_size))
        self.stream_valid = Padding(self.stream_valid, mask_sources=['x'])
        self.stream_valid = Mapping(self.stream_valid, transpose_stream)

    #when making predictions, replace buffer of data with new test data
    def replaceTestData(self, testNodes, maxNeighbors=1000, maskNames=['x']):
        if self.batchesInferences:
            batch_size = self.batch_size
        else:
            batch_size = 1

        testing, testIDs = encode_data_VarLen(self.G,
                                              testNodes,
                                              self.attrKey,
                                              maxNeighbors,
                                              useActualLabs=self.useActualLabs,
                                              useInputX2=self.useInputX2,
                                              onlyLabs=self.onlyLabs,
                                              lastH=self.lastHH,
                                              nodeIDs=True)
        dataset_test = IndexableDataset(testing)
        self.stream_test = DataStream(dataset=dataset_test,
                                      iteration_scheme=SequentialScheme(
                                          examples=dataset_test.num_examples,
                                          batch_size=batch_size))
        #add masks, have to do individually to avoid all dimensions must be equal error
        #write own padding transformer, their's sucks ...
        self.stream_test = Padding(self.stream_test, mask_sources=maskNames)
        #transpose them for rnn input
        self.stream_test = Mapping(self.stream_test, self.transpose_streamTest)
        self.num_examples_test = dataset_test.num_examples

        #replace shareddata with test_all data
        self.test_all, names = self.iterateShared(self.stream_test,
                                                  makeShared=False,
                                                  name="test")

        #if we are doing test in batches
        if self.batchesInferences:
            for key in self.test_all:
                totalTestBatches = len(self.test_all[key])
                if key != 'nodeID':
                    for i in range(0, totalTestBatches):
                        #if test data has more batches, we add more to shared data list
                        #else we just reset
                        if i >= self.totalBatches:
                            newKey = key + '_myinput'
                            self.sharedData[key].append(
                                shared(self.test_all[key][i],
                                       name=self.sharedName + '_' + newKey +
                                       '_test_' + str(i)))
                        else:
                            self.sharedData[key][i].set_value(
                                self.test_all[key][i], borrow=True)

                    self.sharedBatch[key].set_value(
                        self.sharedData[key][0].get_value(borrow=True),
                        borrow=True)

            self.stream_test_int = IntStream(0, totalTestBatches, 1,
                                             'int_stream')

    #given test nodes, make predictions
    def makePredictions(self,
                        testNodes,
                        maxNeighbors=1000,
                        changeLabel=True,
                        maskNames=['x'],
                        lastH=False):
        self.replaceTestData(testNodes, maxNeighbors, maskNames)

        #get predictions and score

        if lastH:
            accuracy, predictions, hiddenRep = self.predictBAE(
                changeLabel=changeLabel, lastH=lastH)
            return (accuracy, predictions, hiddenRep)
        else:
            accuracy, predictions = self.predictBAE(changeLabel=changeLabel,
                                                    lastH=lastH)
            return (accuracy, predictions)

    #convert data to shared data so that everything is on the gpu
    def dataToShare(self, makeShared):

        sharedDataTrain, sharedNamesTrain = self.iterateShared(
            self.stream_train, makeShared=makeShared, name="train")
        sharedDataValid, sharedNamesValid = self.iterateShared(
            self.stream_valid, makeShared=makeShared, name="valid")
        self.sharedNames = sharedNamesTrain + sharedNamesValid

        #combine shared data
        sharedData = {}
        for key in sharedDataTrain:
            sharedData[key] = sharedDataTrain[key] + sharedDataValid[key]

        #now create new streams
        totalBatchesTrain = len(sharedDataTrain[key])
        totalBatchesValid = len(sharedDataValid[key])
        self.totalBatches = totalBatchesTrain + totalBatchesValid

        self.stream_train_int = IntStream(0, totalBatchesTrain, 1,
                                          'int_stream')
        self.stream_valid_int = IntStream(totalBatchesTrain, totalBatchesValid,
                                          1, 'int_stream')
        return sharedData

    #iterate over data stream and make into shared data
    def iterateShared(self, stream, makeShared=True, name="train"):
        names = []
        sharedData = defaultdict(lambda: [])
        for i, batch in enumerate(stream.get_epoch_iterator(as_dict=True)):
            for key in batch:
                newKey = key + '_myinput'
                namePost = self.sharedName + '_' + newKey + '_' + name + "_" + str(
                    i)
                names.append(namePost)
                if makeShared and key != 'nodeID':
                    sharedData[key].append(shared(batch[key], name=namePost))
                else:
                    sharedData[key].append(batch[key])

        return (sharedData, names)

    #predict in batches of data, faster since we utilize gpu but perhaps less accurate
    def predInBatches(self, changeLabel):
        err1 = 0.0
        err0 = 0.0
        count1 = 0
        count0 = 0
        predictions = {}

        epoch_iterator = (self.stream_test_int.get_epoch_iterator(
            as_dict=True))
        while True:
            try:
                batch = next(epoch_iterator)
                batchInd = batch['int_stream_From']
                #switch batches before running
                for key in self.test_all:
                    if key != 'nodeID':
                        self.sharedBatch[key].set_value(
                            self.sharedData[key][batchInd].get_value(
                                borrow=True),
                            borrow=True)

                preds = self.f()
                #print(self.test_all['y'][batchInd].shape)
                batchLen = self.test_all['y'][batchInd].shape[0]
                #iterate through a batch
                for i in range(0, batchLen):
                    nodeID = self.test_all['nodeID'][batchInd][i]
                    actual = self.test_all['y'][batchInd][i][0]
                    pred = preds[i][0]

                    predictions[nodeID] = pred
                    if changeLabel:
                        self.G.node[nodeID]['dynamic_label'] = [pred]
                    self.G.node[nodeID]['pred_label'] = [pred]

                    if actual == 1:
                        err1 += (1 - pred)
                        count1 += 1
                    elif actual == 0:
                        err0 += pred
                        count0 += 1

            except StopIteration:
                break
        return (err1, err0, count1, count0, predictions)

    #predict per example
    def predInExample(self, changeLabel, lastH=False):
        err1 = 0.0
        err0 = 0.0
        count1 = 0
        count0 = 0
        predictions = {}
        hiddenRep = {}
        numExamples = len(self.test_all['nodeID'])
        for i in range(0, numExamples):
            #switch batches before running
            for key in self.test_all:
                if key != 'nodeID':
                    self.sharedBatch[key].set_value(self.test_all[key][i],
                                                    borrow=True)
                else:
                    self.sharedBatch[key] = self.test_all[key][i]

            preds = self.f()

            nodeID = self.test_all['nodeID'][i][0]
            actual = self.test_all['y'][i][0][0]
            pred = preds[0][0]
            predictions[nodeID] = pred

            if lastH:
                lH = self.lastH()[0]
                hiddenRep[nodeID] = lH.tolist()

            if changeLabel:
                self.G.node[nodeID]['dynamic_label'] = [pred]
            self.G.node[nodeID]['pred_label'] = [pred]

            if actual == 1:
                err1 += (1 - pred)
                count1 += 1
            elif actual == 0:
                err0 += pred
                count0 += 1

        if lastH:
            return (err1, err0, count1, count0, predictions, hiddenRep)
        else:
            return (err1, err0, count1, count0, predictions)

    #grab activations per train or valid set
    def generateHidden(self, name="train"):
        if name == "train":
            tSet = self.stream_train_int
        elif name == "valid":
            tSet = self.stream_valid_int

        hiddenRep = {}
        for batch in tSet.get_epoch_iterator(as_dict=True):
            i = batch['int_stream_From']
            #switch batches before running
            for key in self.sharedBatch:
                if key != 'nodeID':
                    self.sharedBatch[key].set_value(
                        self.sharedData[key][i].get_value(borrow=True),
                        borrow=True)
                else:
                    self.sharedBatch[key] = self.sharedData[key][i]

            lastH = self.lastH()
            for j in range(0, lastH.shape[0]):
                nodeID = self.sharedBatch['nodeID'][j]
                hiddenRep[nodeID] = lastH[j].tolist()

        return hiddenRep

    #output BAE scores and predictions
    def predictBAE(self, changeLabel=True, lastH=False):
        if self.batchesInferences:
            err1, err0, count1, count0, predictions = self.predInBatches(
                changeLabel)
        else:
            if lastH:
                err1, err0, count1, count0, predictions, hiddenRep = self.predInExample(
                    changeLabel, lastH=lastH)
            else:
                err1, err0, count1, count0, predictions = self.predInExample(
                    changeLabel, lastH=lastH)

        divideBy = 0
        if count1 != 0:
            err1 = err1 / count1
            divideBy += 1
        if count0 != 0:
            err0 = err0 / count0
            divideBy += 1

        if divideBy == 0:
            return (1.0, predictions)

        BAE = (err1 + err0) / divideBy

        if lastH:
            return (BAE, predictions, hiddenRep)
        else:
            return (BAE, predictions)

    # track the best model evaluated on the validation set
    def track_best(self, channel, cg):
        tracker = myTrackBest(channel)
        finishNoimprove = FinishIfNoImprovementEpsilonAfter(
            channel + '_best_so_far',
            epochs=self.num_epochs,
            epsilon=self.epsilon)
        finishAfter = FinishAfter(after_n_epochs=self.max_epochs)

        return [tracker, finishNoimprove, finishAfter]