예제 #1
0
    def comparison_run(cls, runScale='full', dataScale='full_2occupations', useCPU = True):

        numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [1e-4]),
                  ('maxNumSeqs', [numSeqs]),

                  ('rnnCellUnitsNProbs', [([32, 32, 32], [.5]*3)]),

                  ('convFilterSizesNKeepProbs', [([2, 3, 4], [.5]*3)]),
                  ('convNumFeaturesPerFilter', [8]),

                  ('pooledKeepProb', [0.5])]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
예제 #2
0
    def quick_run(cls, runScale ='basic', dataScale='tiny_fake_2', useCPU = True):

        numSeqs = EmbeddingDataReader(EmbeddingDataReader.premade_sources()[dataScale], 'bucketing', 100, 40, padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]),
                  ('l2RegLambda', [0]),
                  ('maxNumSeqs', [numSeqs]),

                  ('rnnCellUnitsNProbs', [([3], [0.9])
                                          ]),

                  ('convFilterSizesNKeepProbs', [([2], [1.])
                                                 ]),
                  ('convNumFeaturesPerFilter', [4]),

                  ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader, dataScale, make_params_dict(params), runScale, useCPU, padToFull=True)
예제 #3
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='small_2occupations',
                    useCPU=True):
        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [0]),
                  ('maxNumSeqs', [numSeqs]), ('filterSizes', [[2, 4]]),
                  ('numFeaturesPerFilter', [3]), ('pooledKeepProb', [1])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
def evaluate_mark6(l2Scheme, rnnConfigs, pooledKeepProb, outputFpath=None):

    runConfig = RunConfig('medium')

    # make data reader
    dataReaderMaker = EmbeddingDataReader.maker_from_premade_source('full')
    dataReader = dataReaderMaker(bucketingOrRandom='bucketing',
                                 batchSize_=runConfig.batchSize,
                                 minimumWords=40)

    # make model
    p = dict([
        ('initialLearningRate', 1e-3),
        ('l2RegLambda', 1e-6),
        ('l2Scheme', l2Scheme),

        # ('rnnConfigs', [RNNConfig([1024, 512], [0.6, 0.7])]),
        ('rnnConfigs', rnnConfigs),
        ('pooledKeepProb', pooledKeepProb),
        ('pooledActivation', None)
    ])

    modelMaker = lambda input_, logFac: Mark6(
        input_=input_, **p, loggerFactory_=logFac)
    model = modelMaker(dataReader.input, None)

    # make session
    numCores = cpu_count() - 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=numCores,
                            inter_op_parallelism_threads=numCores)

    with tf.device('/cpu:0'):
        sess = tf.InteractiveSession(config=config)

    # restore from saved path
    savePath = '/Users/jj/Code/brilliant_people/logs/main/Mark6/loadmark6/saved/save.ckpt'
    tf.train.Saver().restore(sess, savePath)

    # feed in some data and evaluate
    res = [[(model.evaluate(sess, fd, full=True), names) for fd, names in bg]
           for bg in [
               dataReader.get_validation_data_in_batches(),
               dataReader.get_test_data_in_batches()
           ]]

    if outputFpath is not None:
        with open(outputFpath, 'wb') as ofile:
            pickle.dump(res, ofile)

    return res
    def comparison_run(cls,
                       runScale='medium',
                       dataScale='full_2occupations',
                       useCPU=True):
        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-6]),
                  ('maxNumSeqs', [numSeqs]),
                  ('filterSizesNKeepProbs', [([1, 2, 3,
                                               4], [0.9, 0.9, 0.9, 0.9])]),
                  ('numFeaturesPerFilter', [128]),
                  ('pooledKeepProb', [0.5, 0.85, 1])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
예제 #6
0
    def quick_learn(cls,
                    runScale='small',
                    dataScale='small_2occupations',
                    useCPU=True):

        numSeqs = EmbeddingDataReader(
            EmbeddingDataReader.premade_sources()[dataScale],
            'bucketing',
            100,
            40,
            padToFull=True).maxXLen

        params = [('initialLearningRate', [1e-3]), ('l2RegLambda', [1e-4]),
                  ('maxNumSeqs', [numSeqs]),
                  ('convFilterShapesNKeepProbs', [([(3, -1)], [1])]),
                  ('convNumFeaturesPerFilter', [32]),
                  ('rnnCellUnitsNProbs', [([16], [0.9])])]

        cls.run_thru_data(EmbeddingDataReader,
                          dataScale,
                          make_params_dict(params),
                          runScale,
                          useCPU,
                          padToFull=True)
    def make_graph(self):

        self.add_layers(RNNLayer.new(self.rnnNumCellUnits), self.input,
                        (-1, -1, self.vecDim))

        self.add_layers(FullyConnectedLayer.new(self.numClasses))


if __name__ == '__main__':

    datadir = '../data/peopleData/2_samples'
    # datadir = '../data/peopleData/earlyLifesWordMats/politician_scientist'

    lr = 1e-3
    dr = EmbeddingDataReader(datadir, 'bucketing', 40, 30)
    model = Mark3(dr.input, lr, [16, 8], [.5, .5])

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    for step in range(100):
        if step % 10 == 0:
            print('Lowering learning rate to', lr)
            lr *= 0.9
            model.assign_lr(sess, lr)

        fd = dr.get_next_training_batch()[0]
        _, c, acc = model.train_op(sess, fd, True)
        print('Step %d: (cost, accuracy): training (%0.3f, %0.3f)' %
              (step, c, acc))
예제 #8
0
            for f, k in zip(self.numLSTMUnits, self.outputKeepProbs)
        ])

    @property
    def output_shape(self):
        return self.inputDim[
            0], self.numStepsToOutput, 2 * self.numLSTMUnits[-1]

    @classmethod
    def new(cls,
            numLSTMUnits_,
            outputKeepProbs_=1.,
            numStepsToOutput_=1,
            activation=None):

        return lambda input_, inputDim_, loggerFactory=None: \
            cls(input_, inputDim_, numLSTMUnits_, outputKeepProbs_, numStepsToOutput_, activation, loggerFactory)


if __name__ == '__main__':
    dr = EmbeddingDataReader('../data/peopleData/2_samples', 'bucketing', 10,
                             50)
    maker = RNNLayer.new([32, 16], [0.5, 1.], 3)
    layer = maker(dr.input, [10, -1])

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    print(sess.run(layer.output,
                   dr.get_next_training_batch()
                   [0]).shape)  # should be of shape 10 x 3 x 32