def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None): if trained_model is not None: param_dic = {} param_dic['nums_chars'] = self.nums_chars param_dic['nums_tags'] = self.nums_tags param_dic['tag_scheme'] = self.tag_scheme param_dic['graphic'] = self.graphic param_dic['pic_size'] = self.pic_size param_dic['word_vec'] = self.word_vec param_dic['radical'] = self.radical param_dic['crf'] = self.crf param_dic['emb_dim'] = emb_dim param_dic['gru'] = gru param_dic['rnn_dim'] = rnn_dim param_dic['rnn_num'] = rnn_num param_dic['drop_out'] = drop_out param_dic['filter_size'] = con_width param_dic['filters'] = filters param_dic['pooling_size'] = pooling_size param_dic['font'] = self.font param_dic['buckets_char'] = self.buckets_char param_dic['ngram'] = self.ngram #print param_dic f_model = open(trained_model, 'w') pickle.dump(param_dic, f_model) f_model.close() # define shared weights and variables dr = tf.placeholder(tf.float32, [], name='drop_out_holder') self.drop_out = dr self.drop_out_v = drop_out if self.word_vec: self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer') if self.radical: self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer') if self.ngram is not None: if ng_embs is not None: assert len(ng_embs) == len(self.ngram) else: ng_embs = [None for _ in range(len(self.ngram))] for i, n_gram in enumerate(self.ngram): self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer')) wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None if self.graphic: self.input_p = [] assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None self.pixels = pixels pixel_dim = int(math.sqrt(len(pixels[0]))) wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1') wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1') p_size_1 = toolbox.down_pool(pixel_dim, pooling_size) wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2') wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2') p_size_2 = toolbox.down_pool(p_size_1, pooling_size) wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3') wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr') with tf.variable_scope('BiRNN'): if gru: fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) else: fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) if rnn_num > 1: fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True) output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper') #define model for each bucket for idx, bucket in enumerate(self.buckets_char): if idx == 1: scope.reuse_variables() t1 = time() input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket)) self.input_v.append([input_v]) emb_set = [] if self.word_vec: word_out = self.emb_layer(input_v) emb_set.append(word_out) if self.radical: input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket)) self.input_v[-1].append(input_r) radical_out = self.radical_layer(input_r) emb_set.append(radical_out) if self.ngram is not None: for i in range(len(self.ngram)): input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket)) self.input_v[-1].append(input_g) gram_out = self.gram_layers[i](input_g) emb_set.append(gram_out) if self.graphic: input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim]) self.input_p.append(input_p) pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1]) pix_out = tf.unpack(pix_out, axis=1) conv_out_1 = wrapper_conv_1(pix_out) pooling_out_1 = wrapper_mp_1(conv_out_1) conv_out_2 = wrapper_conv_2(pooling_out_1) pooling_out_2 = wrapper_mp_2(conv_out_2) assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1] pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters]) pooling_out = tf.unpack(pooling_out, axis=1) graphic_out = wrapper_dense(pooling_out) graphic_out = wrapper_dr(graphic_out) emb_set.append(graphic_out) if len(emb_set) > 1: emb_out = tf.concat(2, emb_set) emb_out = tf.unpack(emb_out) else: emb_out = emb_set[0] rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v) output = output_wrapper(rnn_out) output_c = tf.pack(output, axis=1) self.output.append([output_c]) self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))]) self.bucket_dit[bucket] = idx print 'Bucket %d, %f seconds' % (idx + 1, time() - t1) assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts) self.params = tf.trainable_variables() self.saver = tf.train.Saver()
x = x.astype(np.float) x /= 255.0 x -= x.mean() x /= x.std() return x x = preprocessing(x) xt = preprocessing(xt) #x = np.random.random((n, 1, 28, 28)) #y = np.random.randint(2, size=(n)) # Model net = Net() net.push(Conv2d(5, 5, 1, 6)) # 1x28x28 -> 6x24x24 net.push(Relu()) net.push(Maxpooling(2, 2)) # 6x24x24 -> 6x12x12 net.push(Conv2d(5, 5, 6, 16)) # 6x12x12 -> 16x8x8 net.push(Relu()) net.push(Maxpooling(2, 2)) # 16x8x8 -> 16x4x4 net.push(Reshape((256))) net.push(Linear(256, 84)) net.push(Relu()) net.push(Softmax(84, 10)) # Data data = DataProvider() n = 10000 data.train_input(x[:n], y[:n]) data.test_input(xt, yt) data.batch_size(16)
def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ngram_embedding=None, pixels=None, con_width=None, filters=None, pooling_size=None): """ :param trained_model: :param scope: :param emb_dim: :param gru: :param rnn_dim: :param rnn_num: :param drop_out: :param rad_dim: n :param emb: :param ngram_embedding: 预训练 ngram embeddig 文件 :param pixels: :param con_width: :param filters: :param pooling_size: :return: """ # trained_model: 模型存储路径 if trained_model is not None: param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'tag_scheme': self.tag_scheme, 'graphic': self.graphic, 'pic_size': self.pic_size, 'word_vec': self.word_vec, 'radical': self.radical, 'crf': self.crf, 'emb_dim': emb_dim, 'gru': gru, 'rnn_dim': rnn_dim, 'rnn_num': rnn_num, 'drop_out': drop_out, 'filter_size': con_width, 'filters': filters, 'pooling_size': pooling_size, 'font': self.font, 'buckets_char': self.buckets_char, 'ngram': self.ngram} print "RNN dimension is %d" % rnn_dim print "RNN number is %d" % rnn_num print "Character embedding size is %d" % emb_dim print "Ngram embedding dimension is %d" % emb_dim # 存储模型超参数 if self.metric == 'All': # rindex() 返回子字符串 str 在字符串中最后出现的位置 # 截取模型文件名 pindex = trained_model.rindex('/') + 1 for m in self.all_metrics: f_model = open(trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w') pickle.dump(param_dic, f_model) f_model.close() else: f_model = open(trained_model, 'w') pickle.dump(param_dic, f_model) f_model.close() # define shared weights and variables dr = tf.placeholder(tf.float32, [], name='drop_out_holder') self.drop_out = dr self.drop_out_v = drop_out # 字向量层 # 为什么字符数要加 500 ? # emb_dim 是每个字符的特征向量维度,可以通过命令行参数设置 # weights 表示预训练的字向量,可以通过命令行参数设置 if self.word_vec: self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer') # 偏旁部首向量 # 依照《康熙字典》,共有 214 个偏旁部首。 # 只用了常见汉字的偏旁部首,非常见汉字和非汉字的偏旁部首用其他两个特殊符号代替, # 所以共有 216 个偏旁部首 if self.radical: self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer') if self.ngram is not None: if ngram_embedding is not None: assert len(ngram_embedding) == len(self.ngram) else: ngram_embedding = [None for _ in range(len(self.ngram))] for i, n_gram in enumerate(self.ngram): self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ngram_embedding[i], name=str(i + 2) + 'gram_layer')) wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = \ None, None, None, None, None, None if self.graphic: # 使用图像信息,需要用到 CNN self.input_p = [] assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None self.pixels = pixels pixel_dim = int(math.sqrt(len(pixels[0]))) wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1') wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1') p_size_1 = toolbox.down_pool(pixel_dim, pooling_size) wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2') wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2') p_size_2 = toolbox.down_pool(p_size_1, pooling_size) wrapper_dense = TimeDistributed( HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3') wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr') with tf.variable_scope('BiRNN'): if gru: fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) else: fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) if rnn_num > 1: fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell] * rnn_num, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell] * rnn_num, state_is_tuple=True) # 隐藏层,输入是前向 RNN 的输出加上 后向 RNN 的输出,所以输入维度为 rnn_dim * 2 # 输出维度即标签个数 output_wrapper = TimeDistributed( HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper') # define model for each bucket # 每一个 bucket 中的句子长度不一样,所以需要定义单独的模型 # bucket: bucket 中的句子长度 for idx, bucket in enumerate(self.buckets_char): if idx == 1: # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer) # 只需要设置一次 reuse,后面就都 reuse 了 scope.reuse_variables() t1 = time() # 输入的句子,one-hot 向量 # shape = (batch_size, 句子长度) input_sentences = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket)) self.input_v.append([input_sentences]) emb_set = [] if self.word_vec: # 根据 one-hot 向量查找对应的字向量 # word_out: shape=(batch_size, 句子长度,字向量维度(64)) word_out = self.emb_layer(input_sentences) emb_set.append(word_out) if self.radical: # 嵌入偏旁部首信息,shape = (batch_size, 句子长度) input_radicals = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket)) self.input_v[-1].append(input_radicals) radical_out = self.radical_layer(input_radicals) emb_set.append(radical_out) if self.ngram is not None: for i in range(len(self.ngram)): input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket)) self.input_v[-1].append(input_g) gram_out = self.gram_layers[i](input_g) emb_set.append(gram_out) if self.graphic: input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim]) self.input_p.append(input_p) pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1]) conv_out_1 = wrapper_conv_1(pix_out) pooling_out_1 = wrapper_mp_1(conv_out_1) conv_out_2 = wrapper_conv_2(pooling_out_1) pooling_out_2 = wrapper_mp_2(conv_out_2) assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1] pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters]) pooling_out = tf.unstack(pooling_out, axis=1) graphic_out = wrapper_dense(pooling_out) graphic_out = wrapper_dr(graphic_out) emb_set.append(graphic_out) if self.window_size > 1: padding_size = int(np.floor(self.window_size / 2)) word_padded = tf.pad(word_out, [[0, 0], [padding_size, padding_size], [0, 0]], 'CONSTANT') Ws = [] for q in range(1, self.window_size + 1): Ws.append(tf.get_variable("W_%d" % q, shape=[q * emb_dim, self.filters_number])) b = tf.get_variable("b", shape=[self.filters_number]) z = [None for _ in range(0, bucket)] for q in range(1, self.window_size + 1): for i in range(padding_size, bucket + padding_size): low = i - int(np.floor((q - 1) / 2)) high = i + int(np.ceil((q + 1) / 2)) x = word_padded[:, low, :] for j in range(low + 1, high): x = tf.concat(values=[x, word_padded[:, j, :]], axis=1) z_iq = tf.tanh(tf.nn.xw_plus_b(x, Ws[q - 1], b)) if z[i - padding_size] is None: z[i - padding_size] = z_iq else: z[i - padding_size] = tf.concat([z[i - padding_size], z_iq], axis=1) z = tf.stack(z, axis=1) values, indices = tf.nn.top_k(z, sorted=False, k=emb_dim) # highway layer X = tf.unstack(word_out, axis=1) Conv_X = tf.unstack(values, axis=1) X_hat = [] W_t = tf.get_variable("W_t", shape=[emb_dim, emb_dim]) b_t = tf.get_variable("b_t", shape=[emb_dim]) for x, conv_x in zip(X, Conv_X): T_x = tf.sigmoid(tf.nn.xw_plus_b(x, W_t, b_t)) X_hat.append(tf.multiply(conv_x, T_x) + tf.multiply(x, 1 - T_x)) X_hat = tf.stack(X_hat, axis=1) emb_set.append(X_hat) if len(emb_set) > 1: # 各种字向量直接 concat 起来(字向量、偏旁部首、n-gram、图像信息等) emb_out = tf.concat(axis=2, values=emb_set) else: emb_out = emb_set[0] # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值 rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(self.highway(emb_out, "tag"), input_sentences) # 应用全连接层,Wx+b 得到最后的输出 output = output_wrapper(rnn_out) # 为什么要 [output] 而不是 output 呢? self.output.append([output]) self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))]) self.bucket_dit[bucket] = idx # language model lm_rnn_dim = rnn_dim with tf.variable_scope('LM-BiRNN'): if gru: lm_fw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim) lm_bw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim) else: lm_fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True) lm_bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True) if rnn_num > 1: lm_fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_fw_rnn_cell] * rnn_num, state_is_tuple=True) lm_bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_bw_rnn_cell] * rnn_num, state_is_tuple=True) lm_rnn_output = BiLSTM(lm_rnn_dim, fw_cell=lm_fw_rnn_cell, bw_cell=lm_bw_rnn_cell, p=dr, name='LM-BiLSTM' + str(bucket), scope='LM-BiRNN')(self.highway(emb_set[0]), input_sentences) lm_output_wrapper = TimeDistributed( HiddenLayer(lm_rnn_dim * 2, self.nums_chars + 2, activation='linear', name='lm_hidden'), name='lm_wrapper') lm_final_output = lm_output_wrapper(lm_rnn_output) self.lm_predictions.append([lm_final_output]) self.lm_groundtruthes.append([tf.placeholder(tf.int32, [None, bucket], name='lm_targets' + str(bucket))]) print 'Bucket %d, %f seconds' % (idx + 1, time() - t1) assert \ len(self.input_v) == len(self.output) and \ len(self.output) == len(self.output_) and \ len(self.lm_predictions) == len(self.lm_groundtruthes) and \ len(self.output) == len(self.counts) self.params = tf.trainable_variables() self.saver = tf.train.Saver()
def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, fnn_dim, window_size, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None): if trained_model is not None: param_dic = {} param_dic['nums_chars'] = self.nums_chars param_dic['nums_tags'] = self.nums_tags param_dic['tag_scheme'] = self.tag_scheme param_dic['graphic'] = self.graphic param_dic['pic_size'] = self.pic_size param_dic['word_vec'] = self.word_vec param_dic['radical'] = self.radical param_dic['crf'] = self.crf param_dic['emb_dim'] = emb_dim param_dic['gru'] = gru param_dic['rnn_dim'] = rnn_dim param_dic['rnn_num'] = rnn_num param_dic['fnn_dim'] = fnn_dim param_dic['window_size'] = window_size param_dic['drop_out'] = drop_out param_dic['filter_size'] = con_width param_dic['filters'] = filters param_dic['pooling_size'] = pooling_size param_dic['font'] = self.font param_dic['buckets_char'] = self.buckets_char param_dic['ngram'] = self.ngram param_dic['mode'] = self.mode #print param_dic if self.metric == 'All': pindex = trained_model.rindex('/') + 1 for m in self.all_metrics: f_model = open( trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w') pickle.dump(param_dic, f_model) f_model.close() else: f_model = open(trained_model, 'w') pickle.dump(param_dic, f_model) f_model.close() # define shared weights and variables dr = tf.placeholder(tf.float32, [], name='drop_out_holder') self.drop_out = dr self.drop_out_v = drop_out #concat_emb_dim = emb_dim * 2 concat_emb_dim = 0 if self.word_vec: self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer') concat_emb_dim += emb_dim if self.radical: self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer') concat_emb_dim += rad_dim if self.ngram is not None: if ng_embs is not None: assert len(ng_embs) == len(self.ngram) else: ng_embs = [None for _ in range(len(self.ngram))] for i, n_gram in enumerate(self.ngram): self.gram_layers.append( EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name=str(i + 2) + 'gram_layer')) concat_emb_dim += emb_dim wrapper_conv_1, wrapper_mp_1, wrapper_conv_2 = None, None, None wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None if self.graphic: self.input_p = [] assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None self.pixels = pixels pixel_dim = int(math.sqrt(len(pixels[0]))) wrapper_conv_1 = Convolution(con_width, 1, filters, name='conv_1') wrapper_mp_1 = Maxpooling(pooling_size, pooling_size, name='pooling_1') p_size_1 = toolbox.down_pool(pixel_dim, pooling_size) wrapper_conv_2 = Convolution(con_width, filters, filters, name='conv_2') wrapper_mp_2 = Maxpooling(pooling_size, pooling_size, name='pooling_2') p_size_2 = toolbox.down_pool(p_size_1, pooling_size) wrapper_dense = HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense') wrapper_dr = DropoutLayer(self.drop_out) concat_emb_dim += 100 fw_rnn_cell, bw_rnn_cell = None, None if self.mode == 'RNN': with tf.variable_scope('BiRNN'): if gru: fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim) else: fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True) if rnn_num > 1: fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell( [fw_rnn_cell] * rnn_num, state_is_tuple=True) bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell( [bw_rnn_cell] * rnn_num, state_is_tuple=True) output_wrapper = HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='out_wrapper') fnn_weights, fnn_bias = None, None else: with tf.variable_scope('FNN'): fnn_weights = tf.get_variable( 'conv_w', [2 * window_size + 1, concat_emb_dim, 1, fnn_dim]) fnn_bias = tf.get_variable( 'conv_b', [fnn_dim], initializer=tf.constant_initializer(0.1)) output_wrapper = HiddenLayer(fnn_dim, self.nums_tags[0], activation='linear', name='out_wrapper') #define model for each bucket for idx, bucket in enumerate(self.buckets_char): if idx == 1: scope.reuse_variables() t1 = time() input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket)) self.input_v.append([input_v]) emb_set = [] if self.word_vec: word_out = self.emb_layer(input_v) emb_set.append(word_out) if self.radical: input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket)) self.input_v[-1].append(input_r) radical_out = self.radical_layer(input_r) emb_set.append(radical_out) if self.ngram is not None: for i in range(len(self.ngram)): input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket)) self.input_v[-1].append(input_g) gram_out = self.gram_layers[i](input_g) emb_set.append(gram_out) if self.graphic: input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim]) self.input_p.append(input_p) pix_out = tf.reshape(input_p, [-1, pixel_dim, pixel_dim, 1]) conv_out_1 = wrapper_conv_1(pix_out) pooling_out_1 = wrapper_mp_1(conv_out_1) conv_out_2 = wrapper_conv_2(pooling_out_1) pooling_out_2 = wrapper_mp_2(conv_out_2) assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1] pooling_out = tf.reshape( pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters]) graphic_out = wrapper_dense(pooling_out) graphic_out = wrapper_dr(graphic_out) emb_set.append(graphic_out) if len(emb_set) > 1: emb_out = tf.concat(axis=2, values=emb_set) else: emb_out = emb_set[0] if self.mode == 'RNN': rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v) output = output_wrapper(rnn_out) else: emb_out = tf.pad(emb_out, [[0, 0], [window_size, window_size], [0, 0]]) emb_out = tf.reshape( emb_out, [-1, bucket + 2 * window_size, concat_emb_dim, 1]) conv_out = tf.nn.conv2d(emb_out, fnn_weights, [1, 1, 1, 1], padding='VALID') + fnn_bias fnn_out = tf.nn.tanh(conv_out) fnn_out = tf.reshape(fnn_out, [-1, bucket, fnn_dim]) output = output_wrapper(fnn_out) self.output.append([output]) self.output_.append([ tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket)) ]) self.bucket_dit[bucket] = idx print 'Bucket %d, %f seconds' % (idx + 1, time() - t1) assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) \ and len(self.output) == len(self.counts) self.params = tf.trainable_variables() self.saver = tf.train.Saver()
y = y.ravel() yt = yt.ravel() n = 50000 nt = 10000 x = x[:n] y = y[:n] xt = xt[:nt] yt = yt[:nt] # Model net = Net() net.push(Conv2d(5, 5, 3, 20)) # 3x32 -> 10x28 net.push(Relu()) net.push(BatchNorm()) net.push(Maxpooling(4, 4)) # 10x28 -> 10x7 net.push(Reshape((980))) net.push(Linear(980, 200)) net.push(Relu()) net.push(BatchNorm()) net.push(Softmax(200, 10)) # Data data = DataProvider() data.train_input(x, y) data.test_input(xt, yt) data.batch_size(32) data.batch_size_test(1000) lr = 1e-3 gamma = 1