def __init__(self): types, groundTruths, dataVectors = self.getData() # create all layers self.inputLayer = InputLayer(len(dataVectors[0])) self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens, "lrelu") self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu") t0 = time.time() self.trainNetwork(types, groundTruths, dataVectors) t1 = time.time() print("\nTime: " + str(t1 - t0)) print(self.hiddenLayer.weights) print("------------------------------------") print(self.outputLayer.weights)
def __init__(self, shape, ActivFun): """ Input: shape: a tuple, the length represents the number of layers, its elements represent the number of nodes/neurons at the corresponding layer ActivFun: a list, elements of a name of an activation function or an activation function itself Desc: Inside the class we create the list of all hidden layers + output layer, an elements of this list are objects of Layer class """ self.shape = np.array(shape) self.size = self.shape.size if len(ActivFun) == 1: self.ActivFun = [ActivFun[0] for k in range(self.size - 1)] elif len(ActivFun) == 2: self.ActivFun = [ActivFun[0] for k in range(self.size - 2)] + [ActivFun[-1]] else: assert len(ActivFun) == self.size - 1 self.ActivFun = ActivFun self.layers = [ Dense((shape[k - 1], shape[k]), self.ActivFun[k - 1]) for k in range(1, self.size - 1) ] + [OutputLayer((shape[-2], shape[-1]), self.ActivFun[-1])]
def create_output_layer(self, vocab, domain_size, hidden_size): return OutputLayer(vocab, domain_size, hidden_size)
#print(epoch) # to avoid unused variable error; delete later # testing np.random.seed(1) # data x = np.random.rand(1, 10) y = np.random.rand(1, 3) # network NN = NeuralNetwork(x, y) NN.add(Inputlayer(x.shape, (1, 5))) NN.add(HiddenLayer((1, 5), (1, 9), tanh)) NN.add(HiddenLayer((1, 9), (1, 3), tanh)) NN.add(OutputLayer((1, 3), y.shape)) # feedforward output1 = NN.layers[0].feedforward(x) output2 = NN.layers[1].feedforward(output1) output3 = NN.layers[2].feedforward(output2) output4 = NN.layers[3].feedforward(output3) #print(NN.fit(x,y,1,1)) """ print("outout1",output1) print("outout2",output2) print("outout3",output3) print("outout4",output4) """
def __init__(self, max_context_length, max_query_length): self.max_context_length = max_context_length self.max_query_length = max_query_length self.settings = { 'window_size': 2, 'n': 100, 'epochs': 5, 'learning_rate': 0.0001 } self.extract_training_inputs( "Question Answering System\\Training and testing data") #input_h = tf.keras.Input(shape = (self.max_context_length, self.settings["n"]), dtype = "float32", name = "context_input") #input_u = tf.keras.Input(shape = (self.max_query_length, self.settings["n"]), dtype = "float32", name = "query_input") input_h = tf.keras.Input(shape=(114, self.settings["n"]), dtype="float32", name="context_input") input_u = tf.keras.Input(shape=(20, self.settings["n"]), dtype="float32", name="query_input") # LSTM layer for Context and for Query: lstm_context = tf.keras.layers.LSTM( self.settings["n"], recurrent_dropout=0, return_sequences=True ) # Define an LSTM LAYER for context matrix of size dxT bidirectional_context_layer = tf.keras.layers.Bidirectional( lstm_context ) # Define BiLSTM LAYER by wrapping the lstm_context LAYER h = bidirectional_context_layer( input_h ) # context TENSOR of size (1, T, 2d) is returned by plugging an Input tensor context_inputs #bidirectional_context_layer_tensor = bidirectional_context_layer(in_h) # context TENSOR of size (1, T, 2d) is returned by plugging an Input tensor context_inputs lstm_query = tf.keras.layers.LSTM( self.settings["n"], recurrent_dropout=0, return_sequences=True ) # Define an LSTM layer for query matrix of size dxT bidirectional_query_layer = tf.keras.layers.Bidirectional( lstm_query) # Define BiLSTM LAYER by wrapping the lstm_query LAYER u = bidirectional_query_layer( input_u ) # query TENSOR of size (1, J, 2d) is returned by plugging an Input tensor query_inputs #bidirectional_query_layer_tensor = bidirectional_query_layer(in_u) # query TENSOR of size (1, J, 2d) is returned by plugging an Input tensor query_inputs # # #_____________________________ FORMING SIMILARITY MATRIX S _____________________________ # Initiate a 1x6d trainable weight vector with random weights. The shape is 1x6d since this vector will be used in multiplication with concatenated version of outputs from Context (H) and Query (U) biLSTMs: S = alpha(H, U) similarity_matrix = Similarity()([h, u]) context2query = Context2Query()(u, similarity_matrix) query2context = Query2Context()(h, similarity_matrix) #megamerge = MegaMerge()(h, context2query, query2context, self.max_context_length) megamerge = MegaMerge()(h, context2query, query2context, 114) # # #_____________________________ MODELING LAYER _____________________________ G = tf.transpose(megamerge, [ 0, 2, 1 ]) # Transpose G to shape (?, timesteps = num_words, features = 800) print("G after expanding dims is: ", G) lstm_m1 = tf.keras.layers.LSTM( self.settings["n"], recurrent_dropout=0, return_sequences=True) # Define an LSTM LAYER for M1 bidirectional_m1_layer = tf.keras.layers.Bidirectional( lstm_m1) # Define BiLSTM LAYER by wrapping the lstm_m1 LAYER m1_tensor = bidirectional_m1_layer( G ) # M1 TENSOR of size (?, T, 2d) is returned by plugging an Input tensor G lstm_m2 = tf.keras.layers.LSTM( self.settings["n"], recurrent_dropout=0, return_sequences=True) # Define an LSTM LAYER for M1 bidirectional_m2_layer = tf.keras.layers.Bidirectional( lstm_m2) # Define BiLSTM LAYER by wrapping the lstm_m1 LAYER m2_tensor = bidirectional_m2_layer( m1_tensor ) # M2 TENSOR of size (?, T, 2d) is returned by plugging an Input tensor m1_tensor print("\nm1_tensor is: ", m1_tensor) print("m2_tensor is: ", m2_tensor) # # #_____________________________ OUTPUT LAYER _____________________________ # Discharge the first dimension from G, M1 and M2 because they won't be used anymore. Their shape will be (T, 8d), (T, 2d) and (T, 2d) respectively. We next transpose them to # coherent shape of (8d, T) and (2d, T) G = tf.transpose(G, [0, 2, 1]) m1_tensor = tf.transpose(m1_tensor, [0, 2, 1]) m2_tensor = tf.transpose(m2_tensor, [0, 2, 1]) start_end_index_pred = OutputLayer(name="output_indices")( [G, m1_tensor, m2_tensor]) model = tf.keras.Model(inputs=[input_h, input_u], outputs=start_end_index_pred) sgd = tf.keras.optimizers.SGD(learning_rate=0.001) model.compile(loss=some_loss_function, optimizer=sgd, metrics=[some_accuracy_metric]) self.model = model
class Network: numOfHiddens = 8 # numOfOutputs = 4 bias = 0.1 lrate = 0.01 sumError = 0 successRateLastTurn = .5 precision = 0.00001 continueTraining = True numOfSuccess = 1 numOfFailure = 0 def __init__(self): types, groundTruths, dataVectors = self.getData() # create all layers self.inputLayer = InputLayer(len(dataVectors[0])) self.hiddenLayer = HiddenLayer(len(dataVectors[0]), self.numOfHiddens, "lrelu") self.outputLayer = OutputLayer(self.numOfHiddens, len(types), "lrelu") t0 = time.time() self.trainNetwork(types, groundTruths, dataVectors) t1 = time.time() print("\nTime: " + str(t1 - t0)) print(self.hiddenLayer.weights) print("------------------------------------") print(self.outputLayer.weights) def getData(self): rawData = self.readCSV() shuffle(rawData) types, groundTruths = self.getGTs(rawData) dataVectors = np.array([self._assignBias(row) for row in rawData], float) return types, groundTruths, dataVectors def _assignBias(self, vector): vector[-1] = self.bias return vector def readCSV(self): with open("samples_4_classes_normalized.csv", mode="r") as dataFile: return list(csv.reader(dataFile))[1:] def getGTs(self, rawData): types = list({row[-1] for row in rawData}) groundTruths = np.full((len(rawData), len(types)), 0, int) for i, row in enumerate(rawData): groundTruths[i][types.index(row[-1])] = 1 return types, groundTruths def trainNetwork(self, types, groundTruths, dataVectors): epoCounter = 0 while self.continueTraining: epoCounter += 1 self.trainEpoch(types, groundTruths, dataVectors, str(epoCounter)) def trainEpoch(self, types, groundTruths, dataVectors, epoCounter): counter = 0 for vector, groundTruth in zip(dataVectors, groundTruths): self.feedSample(vector, groundTruth) # control operation counter += 1 if counter % 100 == 0: print("Epo: " + epoCounter + " Data: " + str(counter) + "/40000 Prec: " + " TErr: " + str(1 - self.sumError / counter) + " Clf SR: " + str(1 - self.numOfFailure / self.numOfSuccess), end="\r") if abs(self.successRateLastTurn - self.sumError / counter) < self.precision: self.continueTraining = False return self.successRateLastTurn = self.sumError / counter def feedSample(self, dataVector, groundTruth): actVectorInput = self.inputLayer.feedSample(dataVector) actVectorHidden = self.hiddenLayer.feedSample(actVectorInput) actVectorOutput = self.outputLayer.feedSample(actVectorHidden) errorVector = self.getErrorVector(actVectorOutput, groundTruth) self.sumError += sum(errorVector) / len(errorVector) self.predict(actVectorOutput, groundTruth) self.backprop(errorVector, actVectorHidden, actVectorInput) def getErrorVector(self, actVectorOutput, groundTruth): return [ truth - act for act, truth in zip(actVectorOutput, groundTruth) ] def backprop(self, errorVector, actVectorHidden, actVectorInput): self.hiddenLayer.backprop(self.lrate, actVectorInput, errorVector, self.outputLayer) self.outputLayer.backprop(self.lrate, errorVector, actVectorHidden) def predict(self, actVectorOutput, groundTruth): if groundTruth[np.argmax(actVectorOutput)] > 0: self.numOfSuccess += 1 else: self.numOfFailure += 1