def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True):

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('numRnnOutputSteps', [10]),
                  ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]),
                  ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader, dataScale,
                          make_params_dict(params), runScale, useCPU)
    def full_run(cls, runScale='full', dataScale='full', useCPU=True):

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-5]),
                  ('numRnnOutputSteps', [10]),
                  ('rnnCellUnitsNProbs', [
                      ([128, 64, 64], [0.8, 0.8, 0.9]),
                  ]), ('convNumFeaturesPerFilter', [16]),
                  ('pooledKeepProb', [0.5, 0.9])]

        cls.run_thru_data(EmbeddingDataReader, dataScale,
                          make_params_dict(params), runScale, useCPU)
Example #3
0
    def quick_run(cls, runScale ='basic', dataScale='tiny_fake_2', useCPU = True):

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [1e-3]),
                  ('l2Scheme', ['overall']),
                  ('rnnConfigs', [ [RNNConfig([16, 13], [1,1], 'relu')] ]),
                  # ('rnnCellUnitsNProbs', [ [([16, 13], [1, 1]), ([16, 13], [1, 1])] ]),
                  ('pooledKeepProb', [0.5]),
                  ('pooledActivation', [None])]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
    def quick_learn(cls,
                    runScale='small',
                    dataScale='full_2occupations',
                    useCPU=True):

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('numRnnOutputSteps', [10]),
                  ('rnnCellUnitsNProbs', [([32], [0.7])]),
                  ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader, dataScale,
                          make_params_dict(params), runScale, useCPU)
    def comparison_run(cls,
                       runScale='medium',
                       dataScale='full_2occupations',
                       useCPU=True):

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]),
                  ('numRnnOutputSteps', [5, 10]),
                  ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]),
                  ('pooledKeepProb', [0.5, 0.9])]

        cls.run_thru_data(EmbeddingDataReader, dataScale,
                          make_params_dict(params), runScale, useCPU)
Example #6
0
    def one_case(cls, runScale='medium', dataScale='full', useCPU = True):
        params = [('initialLearningRate', [1e-4]),
                  ('l2RegLambda', [0]),
                  ('l2Scheme', ['final_stage']),

                  ('rnnConfigs', [[RNNConfig([64, 128, 256], [0.5, 0.6, 0.7]),
                                   RNNConfig([64, 64, 64, 64], [0.5, 0.6, 0.7, 0.8])]]),

                  ('pooledKeepProb', [1]),
                  ('pooledActivation', [None])
                  ]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
Example #7
0
    def full_run(cls, runScale='full', dataScale='full', useCPU=True):

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0, 1e-5]),
                  ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]),
                  ('numFeaturesPerFilter', [16, 32, 64]),
                  ('pooledKeepProb', [0.5, 0.7, .9, 1])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
    def full_run(cls, runScale='full', dataScale='full', useCPU=True):
        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [0, 1e-5]),
                  ('vocabSize', [vocabSize]),
                  ('embeddingDim', [64, 128, 300]),
                  ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]),
                  ('numFeaturesPerFilter', [16, 32, 64]),
                  ('pooledKeepProb', [0.5, 0.7, 0.9, 1])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
    def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [32]),
                  ('filterSizes', [[2, 4], [1, 3, 5]]),
                  ('numFeaturesPerFilter', [8])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Example #10
0
    def comparison_run(cls, runScale='small', dataScale='full_2occupations', useCPU = True):
        params = [('initialLearningRate', [5e-4]),
                  ('l2RegLambda', [1e-4]),

                  ('rnnConfigs', [
                      [RNNConfig([128, 8], [0.8, 1]), RNNConfig([32, 8], [0.8, 1]), RNNConfig([16, 8], [0.8, 1])],
                      # [([128], [0.8]), ([32], [0.8]), ([16], [0.8])],
                      # [([128, 32, 16], [0.5, 0.8, 0.8]), ([64, 64], [0.5, 0.8]), ([32], [0.8])],
                  ]),

                  ('pooledKeepProb', [1]),
                  ('pooledActivation', ['relu'])
                  ]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
Example #11
0
    def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [32]),
                  ('numRnnOutputSteps', [10]),
                  ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]),
                  ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Example #12
0
    def comparison_run(cls, runScale='full', dataScale='full_2occupations', useCPU = True):

        numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [1e-4]),
                  ('maxNumSeqs', [numSeqs]),

                  ('rnnCellUnitsNProbs', [([32, 32, 32], [.5]*3)]),

                  ('convFilterSizesNKeepProbs', [([2, 3, 4], [.5]*3)]),
                  ('convNumFeaturesPerFilter', [8]),

                  ('pooledKeepProb', [0.5])]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
Example #13
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='full_2occupations',
                    useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-4]), ('l2RegLambda', [1e-5]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [256]),
                  ('convFilterSizesNKeepProbs', [([2, 3, 5], [0.6, 0.6,
                                                              0.6])]),
                  ('numFeaturesPerFilter', [32]), ('pooledKeepProb', [0.9])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Example #14
0
    def quick_run(cls, runScale ='basic', dataScale='tiny_fake_2', useCPU = True):

        numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [0]),
                  ('maxNumSeqs', [numSeqs]),

                  ('rnnCellUnitsNProbs', [([3], [0.9])
                                          ]),

                  ('convFilterSizesNKeepProbs', [([2], [1.])
                                                 ]),
                  ('convNumFeaturesPerFilter', [4]),

                  ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
Example #15
0
    def comparison_run(cls,
                       runScale='medium',
                       dataScale='full_2occupations',
                       useCPU=True):

        # ok this is silly. But at least it's fast.
        vocabSize = TextDataReader.maker_from_premade_source(dataScale)(
            bucketingOrRandom='bucketing', batchSize_=50,
            minimumWords=0).vocabSize

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]),
                  ('vocabSize', [vocabSize]), ('embeddingDim', [128, 300]),
                  ('numRnnOutputSteps', [5, 10]),
                  ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]),
                  ('convNumFeaturesPerFilter', [16]),
                  ('pooledKeepProb', [0.5, 0.9])]

        cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params),
                          runScale, useCPU)
Example #16
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='small_2occupations',
                    useCPU=True):
        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('maxNumSeqs', [numSeqs]), ('filterSizes', [[2, 4]]),
                  ('numFeaturesPerFilter', [3]), ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
Example #17
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='small_2occupations',
                    useCPU=True):

        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-4]),
                  ('maxNumSeqs', [numSeqs]),
                  ('convFilterShapesNKeepProbs', [([(3, -1)], [1])]),
                  ('convNumFeaturesPerFilter', [32]),
                  ('rnnCellUnitsNProbs', [([16], [0.9])])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
    def comparison_run(cls,
                       runScale='medium',
                       dataScale='full_2occupations',
                       useCPU=True):
        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-6]),
                  ('maxNumSeqs', [numSeqs]),
                  ('filterSizesNKeepProbs', [([1, 2, 3,
                                               4], [0.9, 0.9, 0.9, 0.9])]),
                  ('numFeaturesPerFilter', [128]),
                  ('pooledKeepProb', [0.5, 0.85, 1])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
Example #19
0
    def full_run(cls, runScale='medium', dataScale='full', useCPU = True):

        def _p(start, count, pattern):
            """
            produce a list of dropout probs
            """

            assert pattern in ['inc', 'dec', 'constant']

            if pattern=='constant':
                return [start]*count

            delta = 0.1 if pattern=='inc' else -0.1

            res = [start]

            for _ in range(count-1):
                res.append( max(min(res[-1] + delta, 1), 0.1) )

            return res

        def _c(start, count, pattern):
            """
            produce a list of number of cell units, by halfing or doubling
            """

            assert pattern in ['inc', 'dec', 'constant']

            if pattern == 'constant':
                return [start] * count

            delta = 2 if pattern == 'inc' else 0.5

            res = [start]

            for _ in range(count - 1):
                res.append(int(max(min(res[-1]*delta, 2048), 8)))

            return res

        rnnConfigs = []

        for pd in [0.5, 0.75, 1]:
            for pd_pattern in ['inc', 'dec', 'constant']:
                for numLayers in [2, 3, 4, 5]:
                    for c in [1024, 256, 64, 32, 16]:
                        for c_pattern in ['inc', 'dec', 'constant']:  # finished inc and dec for 1024, 2

                            if c == 1024 and (c_pattern=='inc' or numLayers > 3): continue

                            numCells = _c(c, numLayers, c_pattern)
                            probs = _p(pd, numLayers, pd_pattern)

                            rnnConfigs.append([RNNConfig(numCells, probs)])


        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [1e-6]),
                  ('l2Scheme', ['final_stage', 'overall']),

                  ('rnnConfigs', rnnConfigs),

                  ('pooledKeepProb', [0.8, 1]),
                  ('pooledActivation', [None])
                  ]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)