def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('vocabSize', [vocabSize]), ('embeddingDim', [32]), ('filterSizes', [[2, 4], [1, 3, 5]]), ('numFeaturesPerFilter', [8])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def full_run(cls, runScale='full', dataScale='full', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0, 1e-5]), ('vocabSize', [vocabSize]), ('embeddingDim', [64, 128, 300]), ('filterSizes', [[1, 2, 4], [3, 5, 10, 15]]), ('numFeaturesPerFilter', [16, 32, 64]), ('pooledKeepProb', [0.5, 0.7, 0.9, 1])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_run(cls, runScale='basic', dataScale='tiny_fake_2', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]), ('vocabSize', [vocabSize]), ('embeddingDim', [32]), ('numRnnOutputSteps', [10]), ('rnnCellUnitsNProbs', [([3], [0.9]), ([4, 8], [1, 1])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [1])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def quick_learn(cls, runScale='small', dataScale='full_2occupations', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-4]), ('l2RegLambda', [1e-5]), ('vocabSize', [vocabSize]), ('embeddingDim', [256]), ('convFilterSizesNKeepProbs', [([2, 3, 5], [0.6, 0.6, 0.6])]), ('numFeaturesPerFilter', [32]), ('pooledKeepProb', [0.9])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
def comparison_run(cls, runScale='medium', dataScale='full_2occupations', useCPU=True): # ok this is silly. But at least it's fast. vocabSize = TextDataReader.maker_from_premade_source(dataScale)( bucketingOrRandom='bucketing', batchSize_=50, minimumWords=0).vocabSize params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [5e-4]), ('vocabSize', [vocabSize]), ('embeddingDim', [128, 300]), ('numRnnOutputSteps', [5, 10]), ('rnnCellUnitsNProbs', [([64, 64, 32], [0.8, 0.8, 0.9])]), ('convNumFeaturesPerFilter', [16]), ('pooledKeepProb', [0.5, 0.9])] cls.run_thru_data(TextDataReader, dataScale, make_params_dict(params), runScale, useCPU)
[self.vocabSize, self.embeddingDim], -1.0, 1.0), name="W") self.output = tf.nn.embedding_lookup(self.W, self.input) @property def output_shape(self): return self.inputDim[0], self.inputDim[1], self.embeddingDim @classmethod def new(cls, vocabSize_, embeddingDim, activation=None): return lambda input_, inputDim_, loggerFactory=None: \ cls(input_, inputDim_, vocabSize_, embeddingDim, activation, loggerFactory) if __name__ == '__main__': dr = TextDataReader('../data/peopleData/tokensfiles/pol_sci.json', 'bucketing', 5, 1) fd = dr.get_next_training_batch()[0] # inputVal = np.array([[4, 5, 1, 0, 0], [2, 0, 0, 0, 0]]) # inputShape = inputVal.shape # v = tf.Variable(inputVal) maker = EmbeddingLayer.new(vocabSize_=dr.vocabSize, embeddingDim=32) layer = maker(dr.input['x'], [-1, dr.maxXLen]) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) # output = sess.run(layer.output) output = sess.run(layer.output, fd) print('-------- INPUT --------')