Beispiel #1
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])
                pix_out = tf.unpack(pix_out, axis=1)

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unpack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)


            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)
                emb_out = tf.unpack(emb_out)

            else:
                emb_out = emb_set[0]

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)

            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #2
0
        x = x.astype(np.float)
        x /= 255.0
        x -= x.mean()
        x /= x.std()
        return x

    x = preprocessing(x)
    xt = preprocessing(xt)
    #x = np.random.random((n, 1, 28, 28))
    #y = np.random.randint(2, size=(n))

    # Model
    net = Net()
    net.push(Conv2d(5, 5, 1, 6))  # 1x28x28 -> 6x24x24
    net.push(Relu())
    net.push(Maxpooling(2, 2))  # 6x24x24 -> 6x12x12
    net.push(Conv2d(5, 5, 6, 16))  # 6x12x12 -> 16x8x8
    net.push(Relu())
    net.push(Maxpooling(2, 2))  # 16x8x8 -> 16x4x4
    net.push(Reshape((256)))
    net.push(Linear(256, 84))
    net.push(Relu())
    net.push(Softmax(84, 10))

    # Data
    data = DataProvider()
    n = 10000
    data.train_input(x[:n], y[:n])
    data.test_input(xt, yt)
    data.batch_size(16)
Beispiel #3
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None,
                   ngram_embedding=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        """

        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param rad_dim: n
        :param emb:
        :param ngram_embedding: 预训练 ngram embeddig 文件
        :param pixels:
        :param con_width:
        :param filters:
        :param pooling_size:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'tag_scheme': self.tag_scheme,
                         'graphic': self.graphic, 'pic_size': self.pic_size, 'word_vec': self.word_vec,
                         'radical': self.radical, 'crf': self.crf, 'emb_dim': emb_dim, 'gru': gru, 'rnn_dim': rnn_dim,
                         'rnn_num': rnn_num, 'drop_out': drop_out, 'filter_size': con_width, 'filters': filters,
                         'pooling_size': pooling_size, 'font': self.font, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram}
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            print "Ngram embedding dimension is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ?
        # emb_dim 是每个字符的特征向量维度,可以通过命令行参数设置
        # weights 表示预训练的字向量,可以通过命令行参数设置
        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        # 偏旁部首向量
        # 依照《康熙字典》,共有 214 个偏旁部首。
        # 只用了常见汉字的偏旁部首,非常见汉字和非汉字的偏旁部首用其他两个特殊符号代替,
        # 所以共有 216 个偏旁部首
        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ngram_embedding[i],
                                                       name=str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = \
            None, None, None, None, None, None

        if self.graphic:
            # 使用图像信息,需要用到 CNN
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(
                HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell] * rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell] * rnn_num, state_is_tuple=True)

        # 隐藏层,输入是前向 RNN 的输出加上 后向 RNN 的输出,所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        output_wrapper = TimeDistributed(
            HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'),
            name='wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样,所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse,后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子,one-hot 向量
            # shape = (batch_size, 句子长度)
            input_sentences = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []

            if self.word_vec:
                # 根据 one-hot 向量查找对应的字向量
                # word_out: shape=(batch_size, 句子长度,字向量维度(64))
                word_out = self.emb_layer(input_sentences)
                emb_set.append(word_out)

            if self.radical:
                # 嵌入偏旁部首信息,shape = (batch_size, 句子长度)
                input_radicals = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_radicals)
                radical_out = self.radical_layer(input_radicals)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unstack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if self.window_size > 1:

                padding_size = int(np.floor(self.window_size / 2))
                word_padded = tf.pad(word_out, [[0, 0], [padding_size, padding_size], [0, 0]], 'CONSTANT')

                Ws = []
                for q in range(1, self.window_size + 1):
                    Ws.append(tf.get_variable("W_%d" % q, shape=[q * emb_dim, self.filters_number]))
                b = tf.get_variable("b", shape=[self.filters_number])

                z = [None for _ in range(0, bucket)]

                for q in range(1, self.window_size + 1):
                    for i in range(padding_size, bucket + padding_size):
                        low = i - int(np.floor((q - 1) / 2))
                        high = i + int(np.ceil((q + 1) / 2))
                        x = word_padded[:, low, :]
                        for j in range(low + 1, high):
                            x = tf.concat(values=[x, word_padded[:, j, :]], axis=1)
                        z_iq = tf.tanh(tf.nn.xw_plus_b(x, Ws[q - 1], b))
                        if z[i - padding_size] is None:
                            z[i - padding_size] = z_iq
                        else:
                            z[i - padding_size] = tf.concat([z[i - padding_size], z_iq], axis=1)

                z = tf.stack(z, axis=1)
                values, indices = tf.nn.top_k(z, sorted=False, k=emb_dim)

                # highway layer
                X = tf.unstack(word_out, axis=1)
                Conv_X = tf.unstack(values, axis=1)
                X_hat = []
                W_t = tf.get_variable("W_t", shape=[emb_dim, emb_dim])
                b_t = tf.get_variable("b_t", shape=[emb_dim])
                for x, conv_x in zip(X, Conv_X):
                    T_x = tf.sigmoid(tf.nn.xw_plus_b(x, W_t, b_t))
                    X_hat.append(tf.multiply(conv_x, T_x) + tf.multiply(x, 1 - T_x))
                X_hat = tf.stack(X_hat, axis=1)
                emb_set.append(X_hat)
            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来(字向量、偏旁部首、n-gram、图像信息等)
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr,
                             name='BiLSTM' + str(bucket), scope='BiRNN')(self.highway(emb_out, "tag"), input_sentences)

            # 应用全连接层,Wx+b 得到最后的输出
            output = output_wrapper(rnn_out)
            # 为什么要 [output] 而不是 output 呢?
            self.output.append([output])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            # language model
            lm_rnn_dim = rnn_dim
            with tf.variable_scope('LM-BiRNN'):
                if gru:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                else:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)

                if rnn_num > 1:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_bw_rnn_cell] * rnn_num, state_is_tuple=True)
            lm_rnn_output = BiLSTM(lm_rnn_dim, fw_cell=lm_fw_rnn_cell,
                                   bw_cell=lm_bw_rnn_cell, p=dr,
                                   name='LM-BiLSTM' + str(bucket),
                                   scope='LM-BiRNN')(self.highway(emb_set[0]), input_sentences)

            lm_output_wrapper = TimeDistributed(
                HiddenLayer(lm_rnn_dim * 2, self.nums_chars + 2, activation='linear', name='lm_hidden'),
                name='lm_wrapper')
            lm_final_output = lm_output_wrapper(lm_rnn_output)
            self.lm_predictions.append([lm_final_output])
            self.lm_groundtruthes.append([tf.placeholder(tf.int32, [None, bucket], name='lm_targets' + str(bucket))])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.lm_predictions) == len(self.lm_groundtruthes) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #4
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   fnn_dim,
                   window_size,
                   drop_out=0.5,
                   rad_dim=30,
                   emb=None,
                   ng_embs=None,
                   pixels=None,
                   con_width=None,
                   filters=None,
                   pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['fnn_dim'] = fnn_dim
            param_dic['window_size'] = window_size
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            param_dic['mode'] = self.mode
            #print param_dic
            if self.metric == 'All':
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        #concat_emb_dim = emb_dim * 2
        concat_emb_dim = 0

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                            emb_dim,
                                            weights=emb,
                                            name='emb_layer')
            concat_emb_dim += emb_dim

        if self.radical:
            self.radical_layer = EmbeddingLayer(216,
                                                rad_dim,
                                                name='radical_layer')
            concat_emb_dim += rad_dim

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))
                concat_emb_dim += emb_dim

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2 = None, None, None
        wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = Convolution(con_width, 1, filters, name='conv_1')
            wrapper_mp_1 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = Convolution(con_width,
                                         filters,
                                         filters,
                                         name='conv_2')
            wrapper_mp_2 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_2')
            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = HiddenLayer(p_size_2 * p_size_2 * filters,
                                        100,
                                        activation='tanh',
                                        name='conv_dense')
            wrapper_dr = DropoutLayer(self.drop_out)

            concat_emb_dim += 100

        fw_rnn_cell, bw_rnn_cell = None, None

        if self.mode == 'RNN':
            with tf.variable_scope('BiRNN'):

                if gru:
                    fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                    bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                else:
                    fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)

                if rnn_num > 1:
                    fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [bw_rnn_cell] * rnn_num, state_is_tuple=True)

            output_wrapper = HiddenLayer(rnn_dim * 2,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')
            fnn_weights, fnn_bias = None, None

        else:

            with tf.variable_scope('FNN'):
                fnn_weights = tf.get_variable(
                    'conv_w',
                    [2 * window_size + 1, concat_emb_dim, 1, fnn_dim])
                fnn_bias = tf.get_variable(
                    'conv_b', [fnn_dim],
                    initializer=tf.constant_initializer(0.1))

            output_wrapper = HiddenLayer(fnn_dim,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket],
                                     name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket],
                                         name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32,
                                         [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)
                pix_out = tf.reshape(input_p, [-1, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]

                pooling_out = tf.reshape(
                    pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            if self.mode == 'RNN':
                rnn_out = BiLSTM(rnn_dim,
                                 fw_cell=fw_rnn_cell,
                                 bw_cell=bw_rnn_cell,
                                 p=dr,
                                 name='BiLSTM' + str(bucket),
                                 scope='BiRNN')(emb_out, input_v)

                output = output_wrapper(rnn_out)

            else:
                emb_out = tf.pad(emb_out,
                                 [[0, 0], [window_size, window_size], [0, 0]])
                emb_out = tf.reshape(
                    emb_out, [-1, bucket + 2 * window_size, concat_emb_dim, 1])
                conv_out = tf.nn.conv2d(emb_out,
                                        fnn_weights, [1, 1, 1, 1],
                                        padding='VALID') + fnn_bias
                fnn_out = tf.nn.tanh(conv_out)
                fnn_out = tf.reshape(fnn_out, [-1, bucket, fnn_dim])

                output = output_wrapper(fnn_out)

            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) \
               and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #5
0
    y = y.ravel()
    yt = yt.ravel()

    n = 50000
    nt = 10000
    x = x[:n]
    y = y[:n]
    xt = xt[:nt]
    yt = yt[:nt]

    # Model
    net = Net()
    net.push(Conv2d(5, 5, 3, 20))  # 3x32 -> 10x28
    net.push(Relu())
    net.push(BatchNorm())
    net.push(Maxpooling(4, 4))  # 10x28 -> 10x7
    net.push(Reshape((980)))
    net.push(Linear(980, 200))
    net.push(Relu())
    net.push(BatchNorm())
    net.push(Softmax(200, 10))

    # Data
    data = DataProvider()
    data.train_input(x, y)
    data.test_input(xt, yt)
    data.batch_size(32)
    data.batch_size_test(1000)

    lr = 1e-3
    gamma = 1