data_std = train_features_numpy.std() train_features_numpy /= data_std train_features_numpy = train_features_numpy[numpy.random.permutation(numtrain)] train_features_theano = theano.shared(train_features_numpy) test_features -= data_mean test_features /= data_std test_feature_beginnings = test_features[:,:frame_len*3] print '... done' print 'pretraining velocity model ...' pretrainmodel_velocity = gatedAutoencoder.FactoredGatedAutoencoder( numvisX=frame_len, numvisY=frame_len, numfac=200, nummap=100, output_type='real', corruption_type='zeromask', corruption_level=0.3, numpy_rng=numpy_rng, theano_rng=theano_rng) pretrain_features_velocity_numpy = numpy.concatenate([train_features_numpy[i, 2*j*frame_len:2*(j+1)*frame_len][None,:] for j in range(seq_len/(frame_len*2)) for i in range(numtrain)],0) pretrain_features_velocity_numpy = pretrain_features_velocity_numpy[numpy.random.permutation(pretrain_features_velocity_numpy.shape[0])] pretrain_features_velocity_theano = theano.shared(pretrain_features_velocity_numpy) pretrainer_velocity = GraddescentMinibatch(pretrainmodel_velocity, pretrain_features_velocity_theano, batchsize=100, learningrate=0.01) for epoch in xrange(10): pretrainer_velocity.step() print '... done'
batchsize = 100 numvisX = train_features_x.shape[1] numvisY = train_features_y.shape[1] numbatches = train_features.get_value().shape[0] / batchsize # INSTANTIATE MODEL print '... instantiating model' numpy_rng = numpy.random.RandomState(1) theano_rng = RandomStreams(1) model = gatedAutoencoder.FactoredGatedAutoencoder(numvisX=numvisX, numvisY=numvisY, numfac=numfac, nummap=nummap, numhid=numhid, output_type='real', weight_decay_vis=weight_decay_vis, weight_decay_map=weight_decay_map, corruption_type=corruption_type, corruption_level=corruption_level, init_topology = init_topology, numpy_rng=numpy_rng, theano_rng=theano_rng) print '... done' # TRAIN MODEL numepochs = 100 learningrate = 0.01 #trainer = gatedAutoencoder.GraddescentMinibatch(model, train_features, batchsize, learningrate) trainer = GraddescentMinibatch(model, train_features, batchsize, learningrate)
def main(args): ifile = args.input ofile = args.output numfac = args.numfac nummap = args.nummap numepochs = args.numepochs doNorm = args.donorm verbose = args.verbose learningrate = args.learnrate print '... loading data' mat = scipy.io.loadmat(ifile) train_features_x = numpy.float64(mat['x']) train_features_y = numpy.float64(mat['y']) #NORMALIZE DATA: if doNorm == 1: eps = 0 train_features_x -= train_features_x.mean(0)[None, :] train_features_y -= train_features_y.mean(0)[None, :] train_features_x /= train_features_x.std(0)[None, :] + train_features_x.std() * 0.1 + eps train_features_y /= train_features_y.std(0)[None, :] + train_features_y.std() * 0.1 + eps #scipy.io.savemat('train_features_norm.mat',{'nx':train_features_x,'ny':train_features_y},oned_as='column') #SHUFFLE TRAINING DATA TO MAKE SURE ITS NOT SORTED: R = numpy.random.permutation(train_features_x.shape[0]) train_features_x = train_features_x[R, :] train_features_y = train_features_y[R, :] print train_features_x.shape print train_features_y.shape train_features_numpy = numpy.concatenate((train_features_x, train_features_y), 1) train_features = T.cast(theano.shared(train_features_numpy),'float64') print train_features.type print '... done' #numfac = 600 #nummap = 400 numhid = 0 weight_decay_vis = 0.0 weight_decay_map = 0.0 corruption_type = 'zeromask' corruption_level = 0.5 init_topology = None batchsize = 100 numvisX = train_features_x.shape[1] numvisY = train_features_y.shape[1] numbatches = train_features.get_value().shape[0] / batchsize # INSTANTIATE MODEL print '... instantiating model' numpy_rng = numpy.random.RandomState(1) theano_rng = RandomStreams(1) model = gatedAutoencoder.FactoredGatedAutoencoder(numvisX=numvisX, numvisY=numvisY, numfac=numfac, nummap=nummap, numhid=numhid, output_type='real', weight_decay_vis=weight_decay_vis, weight_decay_map=weight_decay_map, corruption_type=corruption_type, corruption_level=corruption_level, init_topology = init_topology, numpy_rng=numpy_rng, theano_rng=theano_rng) print '... done' # TRAIN MODEL #numepochs = 100 #learningrate = 0.01 #trainer = gatedAutoencoder.GraddescentMinibatch(model, train_features, batchsize, learningrate) trainer = GraddescentMinibatch(model, train_features, batchsize, learningrate, 0.9, None, verbose) for epoch in xrange(numepochs): trainer.step() scipy.io.savemat(ofile, {'wxf':model.layer.wxf.get_value(), 'wyf':model.layer.wyf.get_value(), 'whf':model.layer.whf_in.get_value(), 'z_bias':model.layer.bmap.get_value()},oned_as='column')