def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('numRnnOutputSteps', [10]), ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def full_run(cls, runScale='full', dataScale='full', useCPU=True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-5]), ('numRnnOutputSteps', [10]), ('rnnCellUnitsNProbs', [ ([128, 64, 64], [0.8, 0.8, 0.9]), ]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [0.5, 0.9])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_run(cls, runScale ='basic', dataScale='tiny_fake_2', useCPU = True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-3]), ('l2Scheme', ['overall']), ('rnnConfigs', [ [RNNConfig([16, 13], [1,1], 'relu')] ]), # ('rnnCellUnitsNProbs', [ [([16, 13], [1, 1]), ([16, 13], [1, 1])] ]), ('pooledKeepProb', [0.5]), ('pooledActivation', [None])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_learn(cls, runScale='small', dataScale='full_2occupations', useCPU=True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('numRnnOutputSteps', [10]), ('rnnCellUnitsNProbs', [([32], [0.7])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def comparison_run(cls, runScale='medium', dataScale='full_2occupations', useCPU=True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]), ('numRnnOutputSteps', [5, 10]), ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]), ('pooledKeepProb', [0.5, 0.9])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def one_case(cls, runScale='medium', dataScale='full', useCPU = True): params = [('initialLearningRate', [1e-4]), ('l2RegLambda', [0]), ('l2Scheme', ['final_stage']), ('rnnConfigs', [[RNNConfig([64, 128, 256], [0.5, 0.6, 0.7]), RNNConfig([64, 64, 64, 64], [0.5, 0.6, 0.7, 0.8])]]), ('pooledKeepProb', [1]), ('pooledActivation', [None]) ] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def full_run(cls, runScale='full', dataScale='full', useCPU=True): params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0, 1e-5]), ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]), ('numFeaturesPerFilter', [16, 32, 64]), ('pooledKeepProb', [0.5, 0.7, .9, 1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def full_run(cls, runScale='full', dataScale='full', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0, 1e-5]), ('vocabSize', [vocabSize]), ('embeddingDim', [64, 128, 300]), ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]), ('numFeaturesPerFilter', [16, 32, 64]), ('pooledKeepProb', [0.5, 0.7, 0.9, 1])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('vocabSize', [vocabSize]), ('embeddingDim', [32]), ('filterSizes', [[2, 4], [1, 3, 5]]), ('numFeaturesPerFilter', [8])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def comparison_run(cls, runScale='small', dataScale='full_2occupations', useCPU = True): params = [('initialLearningRate', [5e-4]), ('l2RegLambda', [1e-4]), ('rnnConfigs', [ [RNNConfig([128, 8], [0.8, 1]), RNNConfig([32, 8], [0.8, 1]), RNNConfig([16, 8], [0.8, 1])], # [([128], [0.8]), ([32], [0.8]), ([16], [0.8])], # [([128, 32, 16], [0.5, 0.8, 0.8]), ([64, 64], [0.5, 0.8]), ([32], [0.8])], ]), ('pooledKeepProb', [1]), ('pooledActivation', ['relu']) ] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('vocabSize', [vocabSize]), ('embeddingDim', [32]), ('numRnnOutputSteps', [10]), ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def comparison_run(cls, runScale='full', dataScale='full_2occupations', useCPU = True): numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-4]), ('maxNumSeqs', [numSeqs]), ('rnnCellUnitsNProbs', [([32, 32, 32], [.5]*3)]), ('convFilterSizesNKeepProbs', [([2, 3, 4], [.5]*3)]), ('convNumFeaturesPerFilter', [8]), ('pooledKeepProb', [0.5])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def quick_learn(cls, runScale='small', dataScale='full_2occupations', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-4]), ('l2RegLambda', [1e-5]), ('vocabSize', [vocabSize]), ('embeddingDim', [256]), ('convFilterSizesNKeepProbs', [([2, 3, 5], [0.6, 0.6, 0.6])]), ('numFeaturesPerFilter', [32]), ('pooledKeepProb', [0.9])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_run(cls, runScale ='basic', dataScale='tiny_fake_2', useCPU = True): numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('maxNumSeqs', [numSeqs]), ('rnnCellUnitsNProbs', [([3], [0.9]) ]), ('convFilterSizesNKeepProbs', [([2], [1.]) ]), ('convNumFeaturesPerFilter', [4]), ('pooledKeepProb', [1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def comparison_run(cls, runScale='medium', dataScale='full_2occupations', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]), ('vocabSize', [vocabSize]), ('embeddingDim', [128, 300]), ('numRnnOutputSteps', [5, 10]), ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [0.5, 0.9])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_learn(cls, runScale='small', dataScale='small_2occupations', useCPU=True): numSeqs = EmbeddingDataReader( EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('maxNumSeqs', [numSeqs]), ('filterSizes', [[2, 4]]), ('numFeaturesPerFilter', [3]), ('pooledKeepProb', [1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def quick_learn(cls, runScale='small', dataScale='small_2occupations', useCPU=True): numSeqs = EmbeddingDataReader( EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-4]), ('maxNumSeqs', [numSeqs]), ('convFilterShapesNKeepProbs', [([(3, -1)], [1])]), ('convNumFeaturesPerFilter', [32]), ('rnnCellUnitsNProbs', [([16], [0.9])])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def comparison_run(cls, runScale='medium', dataScale='full_2occupations', useCPU=True): numSeqs = EmbeddingDataReader( EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-6]), ('maxNumSeqs', [numSeqs]), ('filterSizesNKeepProbs', [([1, 2, 3, 4], [0.9, 0.9, 0.9, 0.9])]), ('numFeaturesPerFilter', [128]), ('pooledKeepProb', [0.5, 0.85, 1])] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
def full_run(cls, runScale='medium', dataScale='full', useCPU = True): def _p(start, count, pattern): """ produce a list of dropout probs """ assert pattern in ['inc', 'dec', 'constant'] if pattern=='constant': return [start]*count delta = 0.1 if pattern=='inc' else -0.1 res = [start] for _ in range(count-1): res.append( max(min(res[-1] + delta, 1), 0.1) ) return res def _c(start, count, pattern): """ produce a list of number of cell units, by halfing or doubling """ assert pattern in ['inc', 'dec', 'constant'] if pattern == 'constant': return [start] * count delta = 2 if pattern == 'inc' else 0.5 res = [start] for _ in range(count - 1): res.append(int(max(min(res[-1]*delta, 2048), 8))) return res rnnConfigs = [] for pd in [0.5, 0.75, 1]: for pd_pattern in ['inc', 'dec', 'constant']: for numLayers in [2, 3, 4, 5]: for c in [1024, 256, 64, 32, 16]: for c_pattern in ['inc', 'dec', 'constant']: # finished inc and dec for 1024, 2 if c == 1024 and (c_pattern=='inc' or numLayers > 3): continue numCells = _c(c, numLayers, c_pattern) probs = _p(pd, numLayers, pd_pattern) rnnConfigs.append([RNNConfig(numCells, probs)]) params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-6]), ('l2Scheme', ['final_stage', 'overall']), ('rnnConfigs', rnnConfigs), ('pooledKeepProb', [0.8, 1]), ('pooledActivation', [None]) ] cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU)