Beispiel #1
0
    def __init__(self):
        types, groundTruths, dataVectors = self.getData()
        # create all layers
        self.inputLayer = InputLayer(len(dataVectors[0]))
        self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens,
                                       "lrelu")
        self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu")

        t0 = time.time()
        self.trainNetwork(types, groundTruths, dataVectors)
        t1 = time.time()
        print("\nTime: " + str(t1 - t0))
        print(self.hiddenLayer.weights)
        print("------------------------------------")
        print(self.outputLayer.weights)
Beispiel #2
0
 def __init__(self, shape, ActivFun):
     """
         Input:
         shape:  a tuple, the length represents the number of layers,
                 its elements represent the number of nodes/neurons
                 at the corresponding layer
         ActivFun: a list, elements of a name of an activation
         function or an activation function itself
         Desc:
         Inside the class we create the list of all hidden layers
         + output layer, an elements of this list
         are objects of Layer class
     """
     self.shape = np.array(shape)
     self.size = self.shape.size
     if len(ActivFun) == 1:
         self.ActivFun = [ActivFun[0] for k in range(self.size - 1)]
     elif len(ActivFun) == 2:
         self.ActivFun = [ActivFun[0]
                          for k in range(self.size - 2)] + [ActivFun[-1]]
     else:
         assert len(ActivFun) == self.size - 1
         self.ActivFun = ActivFun
     self.layers = [
         Dense((shape[k - 1], shape[k]), self.ActivFun[k - 1])
         for k in range(1, self.size - 1)
     ] + [OutputLayer((shape[-2], shape[-1]), self.ActivFun[-1])]
Beispiel #3
0
 def create_output_layer(self, vocab, domain_size, hidden_size):
     return OutputLayer(vocab, domain_size, hidden_size)
Beispiel #4
0
        #print(epoch) # to avoid unused variable error; delete later


# testing
np.random.seed(1)

# data
x = np.random.rand(1, 10)
y = np.random.rand(1, 3)

# network
NN = NeuralNetwork(x, y)
NN.add(Inputlayer(x.shape, (1, 5)))
NN.add(HiddenLayer((1, 5), (1, 9), tanh))
NN.add(HiddenLayer((1, 9), (1, 3), tanh))
NN.add(OutputLayer((1, 3), y.shape))

# feedforward
output1 = NN.layers[0].feedforward(x)
output2 = NN.layers[1].feedforward(output1)
output3 = NN.layers[2].feedforward(output2)
output4 = NN.layers[3].feedforward(output3)

#print(NN.fit(x,y,1,1))
"""
print("outout1",output1)
print("outout2",output2)
print("outout3",output3)
print("outout4",output4)
"""
Beispiel #5
0
    def __init__(self, max_context_length, max_query_length):
        self.max_context_length = max_context_length
        self.max_query_length = max_query_length
        self.settings = {
            'window_size': 2,
            'n': 100,
            'epochs': 5,
            'learning_rate': 0.0001
        }

        self.extract_training_inputs(
            "Question Answering System\\Training and testing data")

        #input_h = tf.keras.Input(shape = (self.max_context_length, self.settings["n"]), dtype = "float32", name = "context_input")
        #input_u = tf.keras.Input(shape = (self.max_query_length, self.settings["n"]), dtype = "float32", name = "query_input")
        input_h = tf.keras.Input(shape=(114, self.settings["n"]),
                                 dtype="float32",
                                 name="context_input")
        input_u = tf.keras.Input(shape=(20, self.settings["n"]),
                                 dtype="float32",
                                 name="query_input")

        # LSTM layer for Context and for Query:
        lstm_context = tf.keras.layers.LSTM(
            self.settings["n"], recurrent_dropout=0, return_sequences=True
        )  # Define an LSTM LAYER for context matrix of size dxT
        bidirectional_context_layer = tf.keras.layers.Bidirectional(
            lstm_context
        )  # Define BiLSTM LAYER by wrapping the lstm_context LAYER
        h = bidirectional_context_layer(
            input_h
        )  # context TENSOR of size (1, T, 2d) is returned by plugging an Input tensor context_inputs
        #bidirectional_context_layer_tensor = bidirectional_context_layer(in_h) # context TENSOR of size (1, T, 2d) is returned by plugging an Input tensor context_inputs

        lstm_query = tf.keras.layers.LSTM(
            self.settings["n"], recurrent_dropout=0, return_sequences=True
        )  # Define an LSTM layer for query matrix of size dxT
        bidirectional_query_layer = tf.keras.layers.Bidirectional(
            lstm_query)  # Define BiLSTM LAYER by wrapping the lstm_query LAYER
        u = bidirectional_query_layer(
            input_u
        )  # query TENSOR of size (1, J, 2d) is returned by plugging an Input tensor query_inputs

        #bidirectional_query_layer_tensor = bidirectional_query_layer(in_u) # query TENSOR of size (1, J, 2d) is returned by plugging an Input tensor query_inputs

        #
        #
        #_____________________________ FORMING SIMILARITY MATRIX S _____________________________

        # Initiate a 1x6d trainable weight vector with random weights. The shape is 1x6d since this vector will be used in multiplication with concatenated version of outputs from Context (H) and Query (U) biLSTMs: S = alpha(H, U)

        similarity_matrix = Similarity()([h, u])

        context2query = Context2Query()(u, similarity_matrix)

        query2context = Query2Context()(h, similarity_matrix)

        #megamerge = MegaMerge()(h, context2query, query2context, self.max_context_length)
        megamerge = MegaMerge()(h, context2query, query2context, 114)

        #
        #
        #_____________________________ MODELING LAYER _____________________________

        G = tf.transpose(megamerge, [
            0, 2, 1
        ])  # Transpose G to shape (?, timesteps = num_words, features = 800)
        print("G after expanding dims is: ", G)

        lstm_m1 = tf.keras.layers.LSTM(
            self.settings["n"], recurrent_dropout=0,
            return_sequences=True)  # Define an LSTM LAYER for M1
        bidirectional_m1_layer = tf.keras.layers.Bidirectional(
            lstm_m1)  # Define BiLSTM LAYER by wrapping the lstm_m1 LAYER
        m1_tensor = bidirectional_m1_layer(
            G
        )  # M1 TENSOR of size (?, T, 2d) is returned by plugging an Input tensor G

        lstm_m2 = tf.keras.layers.LSTM(
            self.settings["n"], recurrent_dropout=0,
            return_sequences=True)  # Define an LSTM LAYER for M1
        bidirectional_m2_layer = tf.keras.layers.Bidirectional(
            lstm_m2)  # Define BiLSTM LAYER by wrapping the lstm_m1 LAYER
        m2_tensor = bidirectional_m2_layer(
            m1_tensor
        )  # M2 TENSOR of size (?, T, 2d) is returned by plugging an Input tensor m1_tensor

        print("\nm1_tensor is: ", m1_tensor)
        print("m2_tensor is: ", m2_tensor)

        #
        #
        #_____________________________ OUTPUT LAYER _____________________________

        # Discharge the first dimension from G, M1 and M2 because they won't be used anymore. Their shape will be (T, 8d), (T, 2d) and (T, 2d) respectively. We next transpose them to
        # coherent shape of (8d, T) and (2d, T)
        G = tf.transpose(G, [0, 2, 1])
        m1_tensor = tf.transpose(m1_tensor, [0, 2, 1])
        m2_tensor = tf.transpose(m2_tensor, [0, 2, 1])

        start_end_index_pred = OutputLayer(name="output_indices")(
            [G, m1_tensor, m2_tensor])

        model = tf.keras.Model(inputs=[input_h, input_u],
                               outputs=start_end_index_pred)
        sgd = tf.keras.optimizers.SGD(learning_rate=0.001)

        model.compile(loss=some_loss_function,
                      optimizer=sgd,
                      metrics=[some_accuracy_metric])

        self.model = model
Beispiel #6
0
class Network:
    numOfHiddens = 8
    # numOfOutputs = 4
    bias = 0.1
    lrate = 0.01

    sumError = 0
    successRateLastTurn = .5
    precision = 0.00001
    continueTraining = True

    numOfSuccess = 1
    numOfFailure = 0

    def __init__(self):
        types, groundTruths, dataVectors = self.getData()
        # create all layers
        self.inputLayer = InputLayer(len(dataVectors[0]))
        self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens,
                                       "lrelu")
        self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu")

        t0 = time.time()
        self.trainNetwork(types, groundTruths, dataVectors)
        t1 = time.time()
        print("\nTime: " + str(t1 - t0))
        print(self.hiddenLayer.weights)
        print("------------------------------------")
        print(self.outputLayer.weights)

    def getData(self):
        rawData = self.readCSV()
        shuffle(rawData)
        types, groundTruths = self.getGTs(rawData)
        dataVectors = np.array([self._assignBias(row) for row in rawData],
                               float)
        return types, groundTruths, dataVectors

    def _assignBias(self, vector):
        vector[-1] = self.bias
        return vector

    def readCSV(self):
        with open("samples_4_classes_normalized.csv", mode="r") as dataFile:
            return list(csv.reader(dataFile))[1:]

    def getGTs(self, rawData):
        types = list({row[-1] for row in rawData})
        groundTruths = np.full((len(rawData), len(types)), 0, int)

        for i, row in enumerate(rawData):
            groundTruths[i][types.index(row[-1])] = 1
        return types, groundTruths

    def trainNetwork(self, types, groundTruths, dataVectors):
        epoCounter = 0
        while self.continueTraining:
            epoCounter += 1
            self.trainEpoch(types, groundTruths, dataVectors, str(epoCounter))

    def trainEpoch(self, types, groundTruths, dataVectors, epoCounter):
        counter = 0
        for vector, groundTruth in zip(dataVectors, groundTruths):
            self.feedSample(vector, groundTruth)

            # control operation
            counter += 1
            if counter % 100 == 0:
                print("Epo: " + epoCounter + " Data: " + str(counter) +
                      "/40000 Prec: " + " TErr: " +
                      str(1 - self.sumError / counter) + " Clf SR: " +
                      str(1 - self.numOfFailure / self.numOfSuccess),
                      end="\r")
                if abs(self.successRateLastTurn -
                       self.sumError / counter) < self.precision:
                    self.continueTraining = False
                    return
                self.successRateLastTurn = self.sumError / counter

    def feedSample(self, dataVector, groundTruth):
        actVectorInput = self.inputLayer.feedSample(dataVector)
        actVectorHidden = self.hiddenLayer.feedSample(actVectorInput)
        actVectorOutput = self.outputLayer.feedSample(actVectorHidden)

        errorVector = self.getErrorVector(actVectorOutput, groundTruth)
        self.sumError += sum(errorVector) / len(errorVector)
        self.predict(actVectorOutput, groundTruth)
        self.backprop(errorVector, actVectorHidden, actVectorInput)

    def getErrorVector(self, actVectorOutput, groundTruth):
        return [
            truth - act for act, truth in zip(actVectorOutput, groundTruth)
        ]

    def backprop(self, errorVector, actVectorHidden, actVectorInput):
        self.hiddenLayer.backprop(self.lrate, actVectorInput, errorVector,
                                  self.outputLayer)
        self.outputLayer.backprop(self.lrate, errorVector, actVectorHidden)

    def predict(self, actVectorOutput, groundTruth):
        if groundTruth[np.argmax(actVectorOutput)] > 0:
            self.numOfSuccess += 1
        else:
            self.numOfFailure += 1