def create_conv_layer(input_data, input_channels_count, filters_count, filter_shape, pool_shape, name): conv_shape = [ filter_shape[0], filter_shape[1], input_channels_count, filters_count ] weights = tf.Variable(tf.truncated_normal(conv_shape, stddev=0.03), name=name + '_weights') bias = tf.Variable(tf.truncated_normal([filters_count]), name=name + '_bias') out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME') out_layer += bias out_layer = tf.nn.relu(out_layer) if pool_shape is None: return out_layer ksize = [1, pool_shape[0], pool_shape[1], 1] strides = [1, 2, 2, 1] out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, padding='SAME') return out_layer
def __init__(self, nHidden, seqLen, guidence, newNet): self.nHidden = nHidden self.seqLen = seqLen tmp = self.getEmbedding() self.embedding = tf.Variable(tmp) with tf.variable_scope("training_variable"): self.weights = { "ATT": tf.Variable( tf.truncated_normal(shape=[2 * self.nHidden, self.nHidden], stddev=0.08, name="text_att")), "ATTG": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="text_att2")), "ATTS": tf.Variable( tf.truncated_normal(shape=[self.nHidden, 1], stddev=0.08, name="text_att3")), "Fw1": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_fw1")), "Fw2": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_fw2")), "Bw1": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_bw1")), "Bw2": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_bw2")), } self.biases = { "Fw1": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Fw1")), "Fw2": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Fw2")), "Bw1": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Bw1")), "Bw2": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Bw2")), } self.X = tf.placeholder(tf.int32, [None, self.seqLen]) self.pKeep = tf.placeholder(tf.float32) self.build(guidence, newNet)
def __init__(self): self.embedding = self.getEmb() self.embSize = self.embedding.shape[1] self.vocabSize = self.embedding.shape[0] self.x = tf.placeholder(tf.int32, [None, 5]) with tf.variable_scope("training_variable"): self.weights = { "MLP1": tf.Variable( tf.truncated_normal( shape=[self.embSize, int(self.embSize / 2)], stddev=0.08)), "MLP2": tf.Variable( tf.truncated_normal(shape=[int(self.embSize / 2), 1], stddev=0.08)) } self.biases = { "MLP1": tf.Variable( tf.constant(0.01, shape=[int(self.embSize / 2)], dtype=tf.float32)), "MLP2": tf.Variable(tf.constant(0.01, shape=[1], dtype=tf.float32)) } self.inputEmb = tf.nn.embedding_lookup(self.embedding, self.x) p1 = tf.matmul(tf.reshape(self.inputEmb, [-1, self.embSize]), self.weights["MLP1"]) + self.biases["MLP1"] p1 = tf.matmul(tf.nn.relu(p1), self.weights["MLP2"]) + self.biases["MLP2"] p1 = tf.reshape(p1, [-1, 5]) p1 = tf.reshape(tf.nn.softmax(p1), [-1, 1, 5]) self.finalState = tf.reshape(tf.matmul(p1, self.inputEmb), [-1, self.embSize])
def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)
"c3_filter": [3, 3, 256, 384], "c4_filter": [3, 3, 192, 384], "c5_filter": [3, 3, 192, 256] } # Fully connected shapes fc_connection_shapes = { "f1_shape": [13 * 13 * 256, 4096], "f2_shape": [4096, 4096], "f3_shape": [4096, dataset_dict["num_labels"]] } # Weights for each layer conv_weights = { "c1_weights": tf.Variable(tf.truncated_normal(conv_filter_shapes["c1_filter"]), name="c1_weights"), "c2_weights": tf.Variable(tf.truncated_normal(conv_filter_shapes["c2_filter"]), name="c2_weights"), "c3_weights": tf.Variable(tf.truncated_normal(conv_filter_shapes["c3_filter"]), name="c3_weights"), "c4_weights": tf.Variable(tf.truncated_normal(conv_filter_shapes["c4_filter"]), name="c4_weights"), "c5_weights": tf.Variable(tf.truncated_normal(conv_filter_shapes["c5_filter"]), name="c5_weights"), "f1_weights": tf.Variable(tf.truncated_normal(fc_connection_shapes["f1_shape"]),
def main(trainModel=True, buildConfusionMatrix=True, restore=False, buildClassifiedMatrix=True): tf.disable_v2_behavior() input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input") real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes") layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]), 1, 28, [5, 5], [2, 2], name="conv_no_pool") layer2 = create_conv_layer(layer1, 28, 56, [5, 5], [2, 2], name='conv_with_pool') conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56]) relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000], stddev=STDDEV * 2), name='relu_layer_weight') rely_layer_bias = tf.Variable(tf.truncated_normal([1000], stddev=STDDEV / 2), name='rely_layer_bias') relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias relu_layer = tf.nn.relu(relu_layer) relu_layer = tf.nn.dropout(relu_layer, DROPOUT) final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES], stddev=STDDEV * 2), name='final_layer_weight') final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES], stddev=STDDEV / 2), name='final_layer_bias') final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias predicts = tf.nn.softmax(final_layer) predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999) #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1)) loss = -tf.reduce_mean( tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1), axis=0) #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1) #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real)) optimiser = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) correct_prediction = tf.equal(tf.argmax(real, axis=1), tf.argmax(predicts, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1), predictions=tf.argmax(predicts, axis=1), num_classes=CLASSES) saver = tf.train.Saver() # dataset = get_mnist_dataset() dataset = get_fashion_dataset() with tf.Session() as session: session.run(tf.global_variables_initializer()) if restore: saver.restore(session, SAVE_PATH) if trainModel: train(input_images, real, session, optimiser, loss, accuracy, saver, dataset) if buildConfusionMatrix: test_cm = session.run(confusion_matrix, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) draw_confusion_matrix(test_cm) if buildClassifiedMatrix: all_probs = session.run(predicts, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) max_failure_picture_index = [[(-1, -1.0)] * CLASSES for _ in range(CLASSES)] for i in range(len(all_probs)): real = np.argmax(dataset.test_y[i]) for j in range(CLASSES): if max_failure_picture_index[real][j][1] < all_probs[i][j]: max_failure_picture_index[real][j] = (i, all_probs[i][j]) draw_max_failure_pictures(dataset.test_x, max_failure_picture_index)
def __init__(self, nHidden, seqLen): self.representation_score = {} self.y = tf.placeholder(tf.float32, shape=[None, 1]) self.extractFeature = ExtractFeature.ExtractFeature() self.imageFeature = ImageFeature.ImageFeature() newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0) self.textFeature = TextFeature.TextFeature( nHidden, seqLen, self.extractFeature.finalState, newNet) self.l2_para = 1e-7 with tf.variable_scope("training_variable"): self.weights = { "MLP1": tf.Variable( tf.truncated_normal(shape=[512, 256], stddev=0.08, name="MLP1_W")), "MLP2": tf.Variable( tf.truncated_normal(shape=[256, 1], stddev=0.08, name="MLP2_W")), "ATT_attr1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.truncated_normal(shape=[ 2 * self.extractFeature.embSize, self.extractFeature.embSize ], stddev=0.08, name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable( tf.truncated_normal(shape=[self.extractFeature.embSize, 1], stddev=0.08, name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize * 2, self.imageFeature.defaultFeatureSize ], stddev=0.08, name="ATT_image1_3")), "ATT_img2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_image2_1")), "ATT_img2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_image2_2")), "ATT_img2_3": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 1], stddev=0.08, name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 4, self.textFeature.nHidden * 2 ], stddev=0.08, name="ATT_text1_3")), "ATT_text2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_text2_1")), "ATT_text2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_text2_2")), "ATT_text2_3": tf.Variable( tf.truncated_normal( shape=[self.textFeature.nHidden * 2, 1], stddev=0.08, name="ATT_text2_3")), "ATT_WI1": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI")), "ATT_WT1": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT")), "ATT_WA1": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA")), "ATT_WI2": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI2")), "ATT_WT2": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT2")), "ATT_WA2": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA2")), "ATT_WF_1": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_1")), "ATT_WF_2": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_2")), "ATT_WF_3": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_3")), } self.biases = { "MLP1": tf.Variable( tf.constant(0.01, shape=[256], dtype=tf.float32, name="MLP1_b")), "MLP2": tf.Variable( tf.constant(0.01, shape=[1], dtype=tf.float32, name="MLP2_b")), "ATT_attr1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.constant(0.01, shape=[self.extractFeature.embSize], name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.constant(0.01, shape=[self.imageFeature.defaultFeatureSize], name="ATT_image1_3")), "ATT_img2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")), "ATT_img2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")), "ATT_img2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.constant(0.01, shape=[self.textFeature.nHidden * 2], name="ATT_text1_3")), "ATT_text2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")), "ATT_text2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")), "ATT_text2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")), "ATT_WW": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")), "ATT_WI": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")), "ATT_WT": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")), "ATT_WI1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")), "ATT_WT1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")), "ATT_WA": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")), "ATT_WF_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")), "ATT_WF_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")), "ATT_WF_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")), } print("newnet dimension :", newNet) imageVec = self.Attention(newNet, self.imageFeature.outputLS, self.textFeature.RNNState, self.extractFeature.finalState, "ATT_img1", "ATT_img2", 196, True) textVec = self.Attention(self.textFeature.RNNState, self.textFeature.outputs, newNet, self.extractFeature.finalState, "ATT_text1", "ATT_text2", self.textFeature.seqLen, False) attrVec = self.Attention(self.extractFeature.finalState, self.extractFeature.inputEmb, newNet, self.textFeature.RNNState, "ATT_attr1", "ATT_attr2", 5, False) attHidden = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI1"]) + self.biases["ATT_WI1"]) attHidden2 = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT1"]) + self.biases["ATT_WT1"]) attHidden3 = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA1"]) + self.biases["ATT_WW"]) scores1 = tf.matmul(attHidden, self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"] scores2 = tf.matmul(attHidden2, self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"] scores3 = tf.matmul(attHidden3, self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"] scoreLS = [scores1, scores2, scores3] scoreLS = tf.nn.softmax(scoreLS, dim=0) imageVec = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI2"]) + self.biases["ATT_WI"]) textVec = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT2"]) + self.biases["ATT_WT"]) attrVec = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA2"]) + self.biases["ATT_WA"]) self.concatInput = scoreLS[0] * imageVec + scoreLS[ 1] * textVec + scoreLS[2] * attrVec