def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [32]),
                  ('filterSizes', [[2, 4], [1, 3, 5]]),
                  ('numFeaturesPerFilter', [8])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Ejemplo n.º 2
0
    def full_run(cls, runScale='full', dataScale='full', useCPU=True):
        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [0, 1e-5]),
                  ('vocabSize', [vocabSize]),
                  ('embeddingDim', [64, 128, 300]),
                  ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]),
                  ('numFeaturesPerFilter', [16, 32, 64]),
                  ('pooledKeepProb', [0.5, 0.7, 0.9, 1])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
Ejemplo n.º 3
0
    def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [32]),
                  ('numRnnOutputSteps', [10]),
                  ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]),
                  ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Ejemplo n.º 4
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='full_2occupations',
                    useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-4]), ('l2RegLambda', [1e-5]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [256]),
                  ('convFilterSizesNKeepProbs', [([2, 3, 5], [0.6, 0.6,
                                                              0.6])]),
                  ('numFeaturesPerFilter', [32]), ('pooledKeepProb', [0.9])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Ejemplo n.º 5
0
    def comparison_run(cls,
                       runScale='medium',
                       dataScale='full_2occupations',
                       useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [128, 300]),
                  ('numRnnOutputSteps', [5, 10]),
                  ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]),
                  ('convNumFeaturesPerFilter', [16]),
                  ('pooledKeepProb', [0.5, 0.9])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Ejemplo n.º 6
0
            [self.vocabSize, self.embeddingDim], -1.0, 1.0),
                             name="W")
        self.output = tf.nn.embedding_lookup(self.W, self.input)

    @property
    def output_shape(self):
        return self.inputDim[0], self.inputDim[1], self.embeddingDim

    @classmethod
    def new(cls, vocabSize_, embeddingDim, activation=None):
        return lambda input_, inputDim_, loggerFactory=None: \
            cls(input_, inputDim_, vocabSize_, embeddingDim, activation, loggerFactory)


if __name__ == '__main__':
    dr = TextDataReader('../data/peopleData/tokensfiles/pol_sci.json',
                        'bucketing', 5, 1)
    fd = dr.get_next_training_batch()[0]
    # inputVal = np.array([[4, 5, 1, 0, 0], [2, 0, 0, 0, 0]])
    # inputShape = inputVal.shape
    # v = tf.Variable(inputVal)

    maker = EmbeddingLayer.new(vocabSize_=dr.vocabSize, embeddingDim=32)
    layer = maker(dr.input['x'], [-1, dr.maxXLen])

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    # output = sess.run(layer.output)
    output = sess.run(layer.output, fd)

    print('-------- INPUT --------')