def init(**kwargs): import py_paddle.swig_paddle as api args = [] args_dict = {} # NOTE: append arguments if they are in ENV for ek, ev in os.environ.iteritems(): if ek.startswith("PADDLE_INIT_"): args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev) args_dict.update(kwargs) # NOTE: overwrite arguments from ENV if it is in kwargs for key in args_dict.keys(): args.append('--%s=%s' % (key, str(args_dict[key]))) set_env_vars(kwargs.get('trainer_count', 1)) if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn'] if 'use_mkl_packed' in kwargs: cp.g_command_config_args['use_mkl_packed'] = kwargs['use_mkl_packed'] assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not " "supported in v2 APIs.") api.initPaddle(*args)
def paddle_predict_main(q, result_q): api.initPaddle("--use_gpu=false") gm = api.GradientMachine.loadFromConfigFile("./output/model/pass-00000/trainer_config.py") assert isinstance(gm, api.GradientMachine) converter = DataProviderConverter(input_types=[dense_vector(28 * 28)]) while True: features = q.get() val = gm.forwardTest(converter([[features]]))[0]['value'][0] result_q.put(val)
def predict(arr): swig_paddle.initPaddle("--use_gpu=0") data = [arr.tolist()] #直接填充4个0 for i in range(4): data[0][0].append(0) sex = prediction_sex.predict(data) age = prediction_age.predict(data) return sex, age
def __init__(self, train_conf, model_dir=None, resize_dim=256, crop_dim=224, mean_file=None, output_layer=None, oversample=False, is_color=True): """ train_conf: network configure. model_dir: string, directory of model. resize_dim: int, resized image size. crop_dim: int, crop size. mean_file: string, image mean file. oversample: bool, oversample means multiple crops, namely five patches (the four corner patches and the center patch) as well as their horizontal reflections, ten crops in all. """ self.train_conf = train_conf self.model_dir = model_dir if model_dir is None: self.model_dir = os.path.dirname(train_conf) self.resize_dim = resize_dim self.crop_dims = [crop_dim, crop_dim] self.oversample = oversample self.is_color = is_color self.output_layer = output_layer if self.output_layer: assert isinstance(self.output_layer, basestring) self.output_layer = self.output_layer.split(",") self.transformer = image_util.ImageTransformer(is_color = is_color) self.transformer.set_transpose((2,0,1)) self.transformer.set_channel_swap((2,1,0)) self.mean_file = mean_file if self.mean_file is not None: mean = np.load(self.mean_file)['data_mean'] mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) self.transformer.set_mean(mean) # mean pixel else: # if you use three mean value, set like: # this three mean value is calculated from ImageNet. self.transformer.set_mean(np.array([103.939,116.779,123.68])) conf_args = "is_test=1,use_gpu=1,is_predict=1" conf = parse_config(train_conf, conf_args) swig_paddle.initPaddle("--use_gpu=1") self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) self.network.loadParameters(self.model_dir) data_size = 3 * self.crop_dims[0] * self.crop_dims[1] slots = [DenseSlot(data_size)] is_sequence = False self.converter = util.DataProviderWrapperConverter(is_sequence, slots)
def main(): options, args = option_parser() train_conf = options.train_conf data = options.data dict_file = options.dict_file model_path = options.model_path label = options.label swig_paddle.initPaddle("--use_gpu=0") predict = SentimentPrediction(train_conf, dict_file, model_path, label) predict.predict(data)
def __init__(self, train_conf, use_gpu=True, model_dir=None, resize_dim=None, crop_dim=None, mean_file=None, oversample=False, is_color=False): """ train_conf: 网络配置文件 model_dir: 模型路径 resize_dim: 设为原图大小 crop_dim: 图像裁剪大小,一般设为原图大小 oversample: bool, oversample表示多次裁剪,这里禁用 """ self.train_conf = train_conf self.model_dir = model_dir if model_dir is None: self.model_dir = os.path.dirname(train_conf) self.resize_dim = resize_dim self.crop_dims = [crop_dim, crop_dim] self.oversample = oversample self.is_color = is_color self.transformer = image_util.ImageTransformer(is_color = is_color) self.transformer.set_transpose((2,0,1)) self.mean_file = mean_file mean = np.load(self.mean_file)['data_mean'] mean = mean.reshape(1, self.crop_dims[0], self.crop_dims[1]) self.transformer.set_mean(mean) # mean pixel gpu = 1 if use_gpu else 0 conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) #使用 parse_config() 解析训练时的配置文件 conf = parse_config(train_conf, conf_args) #PaddlePaddle目前使用Swig对其常用的预测接口进行了封装,使在Python环境下的预测接口更加简单 #使用 swig_paddle.initPaddle() 传入命令行参数初始化 PaddlePaddle swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu))) #使用 swig_paddle.GradientMachine.createFromConfigproto() 根据上一步解析好的配置创建神经网络 self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) #从模型文件加载参数 self.network.loadParameters(self.model_dir) data_size = 1 * self.crop_dims[0] * self.crop_dims[1] slots = [dense_vector(data_size)] ''' 创建一个 DataProviderConverter 对象converter。 swig_paddle接受的原始数据是C++的Matrix,也就是直接写内存的float数组。 这个接口并不用户友好。所以,我们提供了一个工具类DataProviderConverter。 这个工具类接收和PyDataProvider2一样的输入数据 ''' self.converter = DataProviderConverter(slots)
def main(): options = parse_arguments() api.initPaddle("--use_gpu=%s" % options.use_gpu, "--trainer_count=%s" % options.trainer_count) word_dict = load_dict(options.dict_file) train_dataset = list(load_data(options.train_data, word_dict)) if options.test_data: test_dataset = list(load_data(options.test_data, word_dict)) else: test_dataset = None trainer_config = parse_config(options.config, "dict_file=%s" % options.dict_file) # No need to have data provider for trainer trainer_config.ClearField('data_config') trainer_config.ClearField('test_data_config') # create a GradientMachine from the model configuratin model = api.GradientMachine.createFromConfigProto( trainer_config.model_config) # create a trainer for the gradient machine trainer = api.Trainer.create(trainer_config, model) # create a data converter which converts data to PaddlePaddle # internal format input_types = [ integer_value_sequence(len(word_dict)) if options.seq else sparse_binary_vector(len(word_dict)), integer_value(2) ] converter = DataProviderConverter(input_types) batch_size = trainer_config.opt_config.batch_size trainer.startTrain() for train_pass in xrange(options.num_passes): trainer.startTrainPass() random.shuffle(train_dataset) for pos in xrange(0, len(train_dataset), batch_size): batch = itertools.islice(train_dataset, pos, pos + batch_size) size = min(batch_size, len(train_dataset) - pos) trainer.trainOneDataBatch(size, converter(batch)) trainer.finishTrainPass() if test_dataset: trainer.startTestPeriod() for pos in xrange(0, len(test_dataset), batch_size): batch = itertools.islice(test_dataset, pos, pos + batch_size) size = min(batch_size, len(test_dataset) - pos) trainer.testOneDataBatch(size, converter(batch)) trainer.finishTestPeriod() trainer.finishTrain()
def main(): options, args = option_parser() train_conf = options.train_conf data_file = options.data_file dict_file = options.dict_file model_path = options.model_path label_file = options.label_file predict_dict_file = options.predict_dict_file output_file = options.output_file swig_paddle.initPaddle("--use_gpu=0") predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file) predict.predict(data_file, output_file)
def init(**kwargs): args = [] args_dict = {} # NOTE: append arguments if they are in ENV for ek, ev in os.environ.iteritems(): if ek.startswith("PADDLE_INIT_"): args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev) args_dict.update(kwargs) # NOTE: overwrite arguments from ENV if it is in kwargs for key in args_dict.keys(): args.append('--%s=%s' % (key, str(args_dict[key]))) api.initPaddle(*args)
def __init__(self, train_conf, use_gpu=True, model_dir=None, resize_dim=None, crop_dim=None, mean_file=None, oversample=False, is_color=True): """ train_conf: network configure. model_dir: string, directory of model. resize_dim: int, resized image size. crop_dim: int, crop size. mean_file: string, image mean file. oversample: bool, oversample means multiple crops, namely five patches (the four corner patches and the center patch) as well as their horizontal reflections, ten crops in all. """ self.train_conf = train_conf self.model_dir = model_dir if model_dir is None: self.model_dir = os.path.dirname(train_conf) self.resize_dim = resize_dim self.crop_dims = [crop_dim, crop_dim] self.oversample = oversample self.is_color = is_color self.transformer = image_util.ImageTransformer(is_color=is_color) self.transformer.set_transpose((2, 0, 1)) self.mean_file = mean_file mean = np.load(self.mean_file)['data_mean'] mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) self.transformer.set_mean(mean) # mean pixel gpu = 1 if use_gpu else 0 conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) conf = parse_config(train_conf, conf_args) swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) self.network = swig_paddle.GradientMachine.createFromConfigProto( conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) self.network.loadParameters(self.model_dir) data_size = 3 * self.crop_dims[0] * self.crop_dims[1] slots = [dense_vector(data_size)] self.converter = DataProviderConverter(slots)
def __init__(self, train_conf, use_gpu=True, model_dir=None, resize_dim=None, crop_dim=None, mean_file=None, oversample=False, is_color=True): """ train_conf: network configure. model_dir: string, directory of model. resize_dim: int, resized image size. crop_dim: int, crop size. mean_file: string, image mean file. oversample: bool, oversample means multiple crops, namely five patches (the four corner patches and the center patch) as well as their horizontal reflections, ten crops in all. """ self.train_conf = train_conf self.model_dir = model_dir if model_dir is None: self.model_dir = os.path.dirname(train_conf) self.resize_dim = resize_dim self.crop_dims = [crop_dim, crop_dim] self.oversample = oversample self.is_color = is_color self.transformer = image_util.ImageTransformer(is_color=is_color) self.transformer.set_transpose((2, 0, 1)) self.mean_file = mean_file mean = np.load(self.mean_file)['data_mean'] mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) self.transformer.set_mean(mean) # mean pixel gpu = 1 if use_gpu else 0 conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) conf = parse_config(train_conf, conf_args) swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) self.network = swig_paddle.GradientMachine.createFromConfigProto( conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) self.network.loadParameters(self.model_dir) data_size = 3 * self.crop_dims[0] * self.crop_dims[1] slots = [DenseSlot(data_size)] self.converter = util.DataProviderWrapperConverter(False, slots)
def main(): options, args = option_parser() train_conf = options.train_conf batch_size = options.batch_size dict_file = options.dict_file model_path = options.model_path label = options.label swig_paddle.initPaddle("--use_gpu=0") predict = SentimentPrediction(train_conf, dict_file, model_path, label) batch = [] for line in sys.stdin: batch.append([predict.get_index(line)]) if len(batch) == batch_size: predict.batch_predict(batch) batch = [] if len(batch) > 0: predict.batch_predict(batch)
def main(): options, args = option_parser() train_conf = options.train_conf batch_size = options.batch_size dict_file = options.dict_file model_path = options.model_path label = options.label swig_paddle.initPaddle("--use_gpu=0") predict = QuickStartPrediction(train_conf, dict_file, model_path, label) batch = [] labels = [] for line in sys.stdin: [label, text] = line.split("\t") labels.append(int(label)) batch.append([predict.get_index(text)]) print("labels is:") print labels predict.batch_predict(batch)
def main(): options, args = option_parser() train_conf = options.train_conf batch_size = options.batch_size dict_file = options.dict_file model_path = options.model_path label = options.label swig_paddle.initPaddle("--use_gpu=0") predict = SentimentPrediction(train_conf, dict_file, model_path, label) batch = [] for line in sys.stdin: words = predict.get_index(line) if words: batch.append([words]) else: print('All the words in [%s] are not in the dictionary.' % line) if len(batch) == batch_size: predict.batch_predict(batch) batch = [] if len(batch) > 0: predict.batch_predict(batch)
data = [] for i in xrange(batch_size): a = np.random.randint(10) b = self.sparse_binary_reader(20000, 40, non_empty=True) c = self.dense_reader(100) each_sample = (a, b, c) data.append(each_sample) # test multiple features data_types = [('fea0', data_type.dense_vector(100)), ('fea1', data_type.sparse_binary_vector(20000)), ('fea2', data_type.integer_value(10))] feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) arg = feeder(data) out_dense = arg.getSlotValue(0).copyToNumpyMat() out_sparse = arg.getSlotValue(1) out_index = arg.getSlotIds(2).copyToNumpyArray() for i in xrange(batch_size): self.assertEqual(out_dense[i].all(), data[i][2].all()) self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1]) self.assertEqual(out_index[i], data[i][0]) if __name__ == '__main__': api.initPaddle("--use_gpu=0") suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest) unittest.TextTestRunner().run(suite) if api.isGpuVersion(): api.setUseGpu(True) unittest.main()
for h in xrange(m.getHeight()): for w in xrange(m.getWidth()): self.assertEqual(m.get(h, w), numpy_mat[h, w]) mat2 = m.toNumpyMatInplace() mat2[1, 1] = 32.2 self.assertTrue(np.array_equal(mat2, numpy_mat)) def test_numpyGpu(self): if swig_paddle.isGpuVersion(): numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype='float32') gpu_m = swig_paddle.Matrix.createGpuDenseFromNumpy(numpy_mat) assert isinstance(gpu_m, swig_paddle.Matrix) self.assertEqual((int(gpu_m.getHeight()), int(gpu_m.getWidth())), numpy_mat.shape) self.assertTrue(gpu_m.isGpu()) numpy_mat = gpu_m.copyToNumpyMat() numpy_mat[0, 1] = 3.23 for a, e in zip(gpu_m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]): self.assertAlmostEqual(a, e) gpu_m.copyFromNumpyMat(numpy_mat) for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]): self.assertAlmostEqual(a, e) if __name__ == "__main__": swig_paddle.initPaddle("--use_gpu=0") unittest.main()
self.assertTrue(util.doubleEqual(n, v)) numpy_2 = vec.toNumpyArrayInplace() vec[0] = 1.3 for x, y in zip(numpy_arr, numpy_2): self.assertTrue(util.doubleEqual(x, y)) for x, y in zip(numpy_arr, vec): self.assertTrue(util.doubleEqual(x, y)) numpy_3 = vec.copyToNumpyArray() numpy_3[0] = 0.4 self.assertTrue(util.doubleEqual(vec[0], 1.3)) self.assertTrue(util.doubleEqual(numpy_3[0], 0.4)) for i in xrange(1, len(numpy_3)): util.doubleEqual(numpy_3[i], vec[i]) def testCopyFromNumpy(self): vec = swig_paddle.Vector.createZero(1) arr = np.array([1.3, 3.2, 2.4], dtype="float32") vec.copyFromNumpyArray(arr) for i in xrange(len(vec)): self.assertTrue(util.doubleEqual(vec[i], arr[i])) if __name__ == '__main__': swig_paddle.initPaddle("--use_gpu=1" if swig_paddle.isGpuVersion() else "--use_gpu=0") unittest.main()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform") parser.add_argument( "--use_gpu", default="1", help="1 means use gpu for training") parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter") args = parser.parse_args() data_source = args.data_source use_gpu = args.use_gpu assert data_source in ["mnist", "cifar", "uniform"] assert use_gpu in ["0", "1"] if not os.path.exists("./%s_samples/" % data_source): os.makedirs("./%s_samples/" % data_source) if not os.path.exists("./%s_params/" % data_source): os.makedirs("./%s_params/" % data_source) api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100', '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source) if data_source == "uniform": conf = "gan_conf.py" num_iter = 10000 else: conf = "gan_conf_image.py" num_iter = 1000 gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source) dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source) generator_conf = parse_config(conf, "mode=generator,data=" + data_source) batch_size = dis_conf.opt_config.batch_size noise_dim = get_layer_size(gen_conf.model_config, "noise") if data_source == "mnist": data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte") elif data_source == "cifar": data_np = load_cifar_data("./data/cifar-10-batches-py/") else: data_np = load_uniform_data() # this creates a gradient machine for discriminator dis_training_machine = api.GradientMachine.createFromConfigProto( dis_conf.model_config) # this create a gradient machine for generator gen_training_machine = api.GradientMachine.createFromConfigProto( gen_conf.model_config) # generator_machine is used to generate data only, which is used for # training discriminator logger.info(str(generator_conf.model_config)) generator_machine = api.GradientMachine.createFromConfigProto( generator_conf.model_config) dis_trainer = api.Trainer.create(dis_conf, dis_training_machine) gen_trainer = api.Trainer.create(gen_conf, gen_training_machine) dis_trainer.startTrain() gen_trainer.startTrain() # Sync parameters between networks (GradientMachine) at the beginning copy_shared_parameters(gen_training_machine, dis_training_machine) copy_shared_parameters(gen_training_machine, generator_machine) # constrain that either discriminator or generator can not be trained # consecutively more than MAX_strike times curr_train = "dis" curr_strike = 0 MAX_strike = 5 for train_pass in xrange(100): dis_trainer.startTrainPass() gen_trainer.startTrainPass() for i in xrange(num_iter): # Do forward pass in discriminator to get the dis_loss noise = get_noise(batch_size, noise_dim) data_batch_dis_pos = prepare_discriminator_data_batch_pos( batch_size, data_np) dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos) data_batch_dis_neg = prepare_discriminator_data_batch_neg( generator_machine, batch_size, noise) dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg) dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0 # Do forward pass in generator to get the gen_loss data_batch_gen = prepare_generator_data_batch(batch_size, noise) gen_loss = get_training_loss(gen_training_machine, data_batch_gen) if i % 100 == 0: print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg) print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss) # Decide which network to train based on the training history # And the relative size of the loss if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \ ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss): if curr_train == "dis": curr_strike += 1 else: curr_train = "dis" curr_strike = 1 dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg) dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos) copy_shared_parameters(dis_training_machine, gen_training_machine) else: if curr_train == "gen": curr_strike += 1 else: curr_train = "gen" curr_strike = 1 gen_trainer.trainOneDataBatch(batch_size, data_batch_gen) # TODO: add API for paddle to allow true parameter sharing between different GradientMachines # so that we do not need to copy shared parameters. copy_shared_parameters(gen_training_machine, dis_training_machine) copy_shared_parameters(gen_training_machine, generator_machine) dis_trainer.finishTrainPass() gen_trainer.finishTrainPass() # At the end of each pass, save the generated samples/images fake_samples = get_fake_samples(generator_machine, batch_size, noise) if data_source == "uniform": plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) else: save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) dis_trainer.finishTrain() gen_trainer.finishTrain()
:param rho: the :math:`\\rho` in the equation. The forgetting factor. :type rho: float :param epsilon: the :math:`\\epsilon` in the equation. :type epsilon: float """ def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): learning_method = v1_optimizers.RMSPropOptimizer(rho=rho, epsilon=epsilon) super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) ModelAverage = v1_optimizers.ModelAverage L2Regularization = v1_optimizers.L2Regularization if __name__ == '__main__': swig_api.initPaddle('--use_gpu=false') for opt in [ Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), AdaDelta(), RMSProp(), Adam(model_average=ModelAverage(average_window=0.5), regularization=L2Regularization(rate=0.5), gradient_clipping_threshold=25) ]: print opt, opt.enable_types()
if atEnd: break trainer.trainOneDataBatch(batch_size, data) outs = trainer.getForwardOutput() cost += sum(outs[0]['value']) num += batch_size trainer.finishTrainPass() logger.info('train cost=%f' % (cost / num)) trainer.startTestPeriod() num = 0 cost = 0 while True: # Test one batch batch_size = 1000 data, atEnd = util.loadMNISTTrainData(batch_size) if atEnd: break trainer.testOneDataBatch(batch_size, data) outs = trainer.getForwardOutput() cost += sum(outs[0]['value']) num += batch_size trainer.finishTestPeriod() logger.info('test cost=%f' % (cost / num)) trainer.finishTrain() if __name__ == '__main__': swig_paddle.initPaddle("--use_gpu=0", "--trainer_count=1") main()
def __init__(self, train_conf, model_dir=None, resize_dim=256, crop_dim=224, use_gpu=True, mean_file=None, output_layer=None, oversample=False, is_color=True): """ train_conf: network configure. model_dir: string, directory of model. resize_dim: int, resized image size. crop_dim: int, crop size. mean_file: string, image mean file. oversample: bool, oversample means multiple crops, namely five patches (the four corner patches and the center patch) as well as their horizontal reflections, ten crops in all. """ self.train_conf = train_conf self.model_dir = model_dir if model_dir is None: self.model_dir = os.path.dirname(train_conf) self.resize_dim = resize_dim self.crop_dims = [crop_dim, crop_dim] self.oversample = oversample self.is_color = is_color self.output_layer = output_layer if self.output_layer: assert isinstance(self.output_layer, basestring) self.output_layer = self.output_layer.split(",") self.transformer = image_util.ImageTransformer(is_color=is_color) self.transformer.set_transpose((2, 0, 1)) self.transformer.set_channel_swap((2, 1, 0)) self.mean_file = mean_file if self.mean_file is not None: mean = np.load(self.mean_file)['data_mean'] mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) self.transformer.set_mean(mean) # mean pixel else: # if you use three mean value, set like: # this three mean value is calculated from ImageNet. self.transformer.set_mean(np.array([103.939, 116.779, 123.68])) conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu)) conf = parse_config(train_conf, conf_args) swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu))) self.network = swig_paddle.GradientMachine.createFromConfigProto( conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) self.network.loadParameters(self.model_dir) data_size = 3 * self.crop_dims[0] * self.crop_dims[1] slots = [dense_vector(data_size)] self.converter = DataProviderConverter(slots)
# limitations under the License. from py_paddle import swig_paddle, DataProviderConverter from common_utils import * from paddle.trainer.config_parser import parse_config try: import cPickle as pickle except ImportError: import pickle import sys if __name__ == '__main__': model_path = sys.argv[1] swig_paddle.initPaddle('--use_gpu=0') conf = parse_config("trainer_config.py", "is_predict=1") network = swig_paddle.GradientMachine.createFromConfigProto( conf.model_config) assert isinstance(network, swig_paddle.GradientMachine) network.loadParameters(model_path) with open('./data/meta.bin', 'rb') as f: meta = pickle.load(f) headers = [h[1] for h in meta_to_header(meta, 'movie')] headers.extend([h[1] for h in meta_to_header(meta, 'user')]) cvt = DataProviderConverter(headers) while True: movie_id = int(raw_input("Input movie_id: ")) user_id = int(raw_input("Input user_id: ")) movie_meta = meta['movie'][movie_id] # Query Data From Meta. user_meta = meta['user'][user_id]
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--use_gpu", default="1", help="1 means use gpu for training") parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter") args = parser.parse_args() use_gpu = args.use_gpu assert use_gpu in ["0", "1"] if not os.path.exists("./samples/"): os.makedirs("./samples/") if not os.path.exists("./params/"): os.makedirs("./params/") api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=1000', '--gpu_id=' + args.gpu_id, '--save_dir=' + "./params/") conf = "vae_conf.py" trainer_conf = parse_config(conf, "is_generating=False") gener_conf = parse_config(conf, "is_generating=True") batch_size = trainer_conf.opt_config.batch_size noise_dim = get_layer_size(gener_conf.model_config, "noise") mnist = dataloader.MNISTloader(batch_size=batch_size) mnist.load_data() training_machine = api.GradientMachine.createFromConfigProto( trainer_conf.model_config) generator_machine = api.GradientMachine.createFromConfigProto( gener_conf.model_config) trainer = api.Trainer.create(trainer_conf, training_machine) trainer.startTrain() for train_pass in xrange(100): trainer.startTrainPass() mnist.reset_pointer() i = 0 it = 0 while mnist.pointer != 0 or i == 0: X = mnist.next_batch().astype('float32') inputs = api.Arguments.createArguments(1) inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X)) trainer.trainOneDataBatch(batch_size, inputs) if it % 1000 == 0: outputs = api.Arguments.createArguments(0) training_machine.forward(inputs, outputs, api.PASS_TEST) loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat()) print "\niter: {}".format(str(it).zfill(3)) print "VAE loss: {}".format(str(loss).zfill(3)) #Sync parameters between networks (GradientMachine) at the beginning copy_shared_parameters(training_machine, generator_machine) z_samples = np.random.randn(batch_size, noise_dim).astype('float32') samples = get_fake_samples(generator_machine, batch_size, z_samples) #Generating the first 16 images for a picture. figure = plot_samples(samples[:16]) plt.savefig( "./samples/{}_{}.png".format( str(train_pass).zfill(3), str(i).zfill(3)), bbox_inches='tight') plt.close(figure) i += 1 it += 1 trainer.finishTrainPass() trainer.finishTrain()
def main(): api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores optimizer = paddle_v2.optimizer.Adam( learning_rate=1e-4, batch_size=1000, model_average=ModelAverage(average_window=0.5), regularization=L2Regularization(rate=0.5)) # Create Local Updater. Local means not run in cluster. # For a cluster training, here we can change to createRemoteUpdater # in future. updater = optimizer.create_local_updater() assert isinstance(updater, api.ParameterUpdater) # define network images = paddle_v2.layer.data(name='pixel', type=paddle_v2.data_type.dense_vector(784)) label = paddle_v2.layer.data(name='label', type=paddle_v2.data_type.integer_value(10)) hidden1 = paddle_v2.layer.fc(input=images, size=200) hidden2 = paddle_v2.layer.fc(input=hidden1, size=200) inference = paddle_v2.layer.fc(input=hidden2, size=10, act=paddle_v2.activation.Softmax()) cost = paddle_v2.layer.classification_cost(input=inference, label=label) # Create Simple Gradient Machine. model_config = paddle_v2.layer.parse_network(cost) m = api.GradientMachine.createFromConfigProto(model_config, api.CREATE_MODE_NORMAL, optimizer.enable_types()) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm assert isinstance(m, api.GradientMachine) # Initialize Parameter by numpy. init_parameter(network=m) # Initialize ParameterUpdater. updater.init(m) # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. converter = DataProviderConverter(input_types=[images.type, label.type]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' # start gradient machine. # the gradient machine must be started before invoke forward/backward. # not just for training, but also for inference. m.start() # evaluator can print error rate, etc. It is a C++ class. batch_evaluator = m.makeEvaluator() test_evaluator = m.makeEvaluator() # Get Train Data. # TrainData will stored in a data pool. Currently implementation is not care # about memory, speed. Just a very naive implementation. train_data_generator = input_order_converter(read_from_mnist(train_file)) train_data = BatchPool(train_data_generator, 512) # outArgs is Neural Network forward result. Here is not useful, just passed # to gradient_machine.forward outArgs = api.Arguments.createArguments(0) for pass_id in xrange(2): # we train 2 passes. updater.startPass() for batch_id, data_batch in enumerate(train_data()): # data_batch is input images. # here, for online learning, we could get data_batch from network. # Start update one batch. pass_type = updater.startBatch(len(data_batch)) # Start BatchEvaluator. # batch_evaluator can be used between start/finish. batch_evaluator.start() # forwardBackward is a shortcut for forward and backward. # It is sometimes faster than invoke forward/backward separately, # because in GradientMachine, it may be async. m.forwardBackward(converter(data_batch), outArgs, pass_type) for each_param in m.getParameters(): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. cost_vec = outArgs.getSlotValue(0) cost_vec = cost_vec.copyToNumpyMat() cost = cost_vec.sum() / len(data_batch) # Make evaluator works. m.eval(batch_evaluator) # Print logs. print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ cost, batch_evaluator batch_evaluator.finish() # Finish batch. # * will clear gradient. # * ensure all values should be updated. updater.finishBatch(cost) # testing stage. use test data set to test current network. updater.apply() test_evaluator.start() test_data_generator = input_order_converter(read_from_mnist(test_file)) for data_batch in generator_to_batch(test_data_generator, 512): # in testing stage, only forward is needed. m.forward(converter(data_batch), outArgs, api.PASS_TEST) m.eval(test_evaluator) # print error rate for test data set print 'Pass', pass_id, ' test evaluator: ', test_evaluator test_evaluator.finish() updater.restore() updater.catchUpWith() params = m.getParameters() for each_param in params: assert isinstance(each_param, api.Parameter) value = each_param.getBuf(api.PARAMETER_VALUE) value = value.copyToNumpyArray() # Here, we could save parameter to every where you want print each_param.getName(), value updater.finishPass() m.finish()
self.assertTrue(util.doubleEqual(n, v)) numpy_2 = vec.toNumpyArrayInplace() vec[0] = 1.3 for x, y in zip(numpy_arr, numpy_2): self.assertTrue(util.doubleEqual(x, y)) for x, y in zip(numpy_arr, vec): self.assertTrue(util.doubleEqual(x, y)) numpy_3 = vec.copyToNumpyArray() numpy_3[0] = 0.4 self.assertTrue(util.doubleEqual(vec[0], 1.3)) self.assertTrue(util.doubleEqual(numpy_3[0], 0.4)) for i in xrange(1, len(numpy_3)): util.doubleEqual(numpy_3[i], vec[i]) def testCopyFromNumpy(self): vec = swig_paddle.Vector.createZero(1) arr = np.array([1.3, 3.2, 2.4], dtype="float32") vec.copyFromNumpyArray(arr) for i in xrange(len(vec)): self.assertTrue(util.doubleEqual(vec[i], arr[i])) if __name__ == '__main__': swig_paddle.initPaddle( "--use_gpu=1" if swig_paddle.isGpuVersion() else "--use_gpu=0") unittest.main()
def init(**kwargs): args = [] for key in kwargs.keys(): args.append('--%s=%s' % (key, str(kwargs[key]))) api.initPaddle(*args)
def gen_data(batch_size, shape): data = [] for i in xrange(batch_size): each_sample = [] each_sample.append(np.random.random(shape)) data.append(each_sample) return data feeder = DataFeeder([('image', data_type.dense_array(2352))], {'image': 0}) arg = feeder(gen_data(32, (3, 28, 28))) h = arg.getSlotFrameHeight(0) w = arg.getSlotFrameWidth(0) self.assertEqual(h, 28) self.assertEqual(w, 28) arg = feeder(gen_data(32, (3, 30, 32))) h = arg.getSlotFrameHeight(0) w = arg.getSlotFrameWidth(0) self.assertEqual(h, 30) self.assertEqual(w, 32) if __name__ == '__main__': api.initPaddle("--use_gpu=0") suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest) unittest.TextTestRunner().run(suite) if api.isGpuVersion(): api.setUseGpu(True) unittest.main()
machine.backward(backward_callback) for k in optimizers: opt = optimizers[k] opt.finishBatch() for k in optimizers: opt = optimizers[k] opt.finishPass() self.assertTrue(self.isCalled) def test_train_one_pass(self): conf_file_path = './testTrainConfig.py' trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile( conf_file_path) model_config = trainer_config.getModelConfig() machine = swig_paddle.GradientMachine.createByModelConfig(model_config) at_end = False output = swig_paddle.Arguments.createArguments(0) if not at_end: input_, at_end = util.loadMNISTTrainData(1000) machine.forwardBackward(input_, output, swig_paddle.PASS_TRAIN) if __name__ == '__main__': swig_paddle.initPaddle('--use_gpu=0') unittest.main()
self.assertEqual((int(gpu_m.getHeight()), int(gpu_m.getWidth())), numpy_mat.shape) self.assertTrue(gpu_m.isGpu()) numpy_mat = gpu_m.copyToNumpyMat() numpy_mat[0, 1] = 3.23 for a, e in zip(gpu_m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]): self.assertAlmostEqual(a, e) gpu_m.copyFromNumpyMat(numpy_mat) for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]): self.assertAlmostEqual(a, e) def test_numpy(self): numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32") m = swig_paddle.Matrix.createDenseFromNumpy(numpy_mat) self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape) self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu()) for a, e in zip(m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]): self.assertAlmostEqual(a, e) if __name__ == "__main__": swig_paddle.initPaddle("--use_gpu=0") suite = unittest.TestLoader().loadTestsFromTestCase(TestMatrix) unittest.TextTestRunner().run(suite) if swig_paddle.isGpuVersion(): swig_paddle.setUseGpu(True) unittest.main()
def main(): api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores optimizer = paddle_v2.optimizer.Adam( learning_rate=1e-4, batch_size=1000, model_average=ModelAverage(average_window=0.5), regularization=L2Regularization(rate=0.5)) # Create Local Updater. Local means not run in cluster. # For a cluster training, here we can change to createRemoteUpdater # in future. updater = optimizer.create_local_updater() assert isinstance(updater, api.ParameterUpdater) # define network images = paddle_v2.layer.data( name='pixel', type=paddle_v2.data_type.dense_vector(784)) label = paddle_v2.layer.data( name='label', type=paddle_v2.data_type.integer_value(10)) hidden1 = paddle_v2.layer.fc(input=images, size=200) hidden2 = paddle_v2.layer.fc(input=hidden1, size=200) inference = paddle_v2.layer.fc(input=hidden2, size=10, act=paddle_v2.activation.Softmax()) cost = paddle_v2.layer.classification_cost(input=inference, label=label) # Create Simple Gradient Machine. model_config = paddle_v2.layer.parse_network(cost) m = api.GradientMachine.createFromConfigProto(model_config, api.CREATE_MODE_NORMAL, optimizer.enable_types()) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm assert isinstance(m, api.GradientMachine) # Initialize Parameter by numpy. init_parameter(network=m) # Initialize ParameterUpdater. updater.init(m) # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. converter = DataProviderConverter(input_types=[images.type, label.type]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' # start gradient machine. # the gradient machine must be started before invoke forward/backward. # not just for training, but also for inference. m.start() # evaluator can print error rate, etc. It is a C++ class. batch_evaluator = m.makeEvaluator() test_evaluator = m.makeEvaluator() # Get Train Data. # TrainData will stored in a data pool. Currently implementation is not care # about memory, speed. Just a very naive implementation. train_data_generator = input_order_converter(read_from_mnist(train_file)) train_data = BatchPool(train_data_generator, 512) # outArgs is Neural Network forward result. Here is not useful, just passed # to gradient_machine.forward outArgs = api.Arguments.createArguments(0) for pass_id in xrange(2): # we train 2 passes. updater.startPass() for batch_id, data_batch in enumerate(train_data()): # data_batch is input images. # here, for online learning, we could get data_batch from network. # Start update one batch. pass_type = updater.startBatch(len(data_batch)) # Start BatchEvaluator. # batch_evaluator can be used between start/finish. batch_evaluator.start() # forwardBackward is a shortcut for forward and backward. # It is sometimes faster than invoke forward/backward separately, # because in GradientMachine, it may be async. m.forwardBackward(converter(data_batch), outArgs, pass_type) for each_param in m.getParameters(): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. cost_vec = outArgs.getSlotValue(0) cost_vec = cost_vec.copyToNumpyMat() cost = cost_vec.sum() / len(data_batch) # Make evaluator works. m.eval(batch_evaluator) # Print logs. print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ cost, batch_evaluator batch_evaluator.finish() # Finish batch. # * will clear gradient. # * ensure all values should be updated. updater.finishBatch(cost) # testing stage. use test data set to test current network. updater.apply() test_evaluator.start() test_data_generator = input_order_converter(read_from_mnist(test_file)) for data_batch in generator_to_batch(test_data_generator, 512): # in testing stage, only forward is needed. m.forward(converter(data_batch), outArgs, api.PASS_TEST) m.eval(test_evaluator) # print error rate for test data set print 'Pass', pass_id, ' test evaluator: ', test_evaluator test_evaluator.finish() updater.restore() updater.catchUpWith() params = m.getParameters() for each_param in params: assert isinstance(each_param, api.Parameter) value = each_param.getBuf(api.PARAMETER_VALUE) value = value.copyToNumpyArray() # Here, we could save parameter to every where you want print each_param.getName(), value updater.finishPass() m.finish()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform") parser.add_argument("--use_gpu", default="1", help="1 means use gpu for training") parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter") args = parser.parse_args() data_source = args.data_source use_gpu = args.use_gpu assert data_source in ["mnist", "cifar", "uniform"] assert use_gpu in ["0", "1"] if not os.path.exists("./%s_samples/" % data_source): os.makedirs("./%s_samples/" % data_source) if not os.path.exists("./%s_params/" % data_source): os.makedirs("./%s_params/" % data_source) api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100', '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source) if data_source == "uniform": conf = "gan_conf.py" num_iter = 10000 else: conf = "gan_conf_image.py" num_iter = 1000 gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source) dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source) generator_conf = parse_config(conf, "mode=generator,data=" + data_source) batch_size = dis_conf.opt_config.batch_size noise_dim = get_layer_size(gen_conf.model_config, "noise") if data_source == "mnist": data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte") elif data_source == "cifar": data_np = load_cifar_data("./data/cifar-10-batches-py/") else: data_np = load_uniform_data() # this creates a gradient machine for discriminator dis_training_machine = api.GradientMachine.createFromConfigProto( dis_conf.model_config) # this create a gradient machine for generator gen_training_machine = api.GradientMachine.createFromConfigProto( gen_conf.model_config) # generator_machine is used to generate data only, which is used for # training discriminator logger.info(str(generator_conf.model_config)) generator_machine = api.GradientMachine.createFromConfigProto( generator_conf.model_config) dis_trainer = api.Trainer.create(dis_conf, dis_training_machine) gen_trainer = api.Trainer.create(gen_conf, gen_training_machine) dis_trainer.startTrain() gen_trainer.startTrain() # Sync parameters between networks (GradientMachine) at the beginning copy_shared_parameters(gen_training_machine, dis_training_machine) copy_shared_parameters(gen_training_machine, generator_machine) # constrain that either discriminator or generator can not be trained # consecutively more than MAX_strike times curr_train = "dis" curr_strike = 0 MAX_strike = 5 for train_pass in xrange(100): dis_trainer.startTrainPass() gen_trainer.startTrainPass() for i in xrange(num_iter): # Do forward pass in discriminator to get the dis_loss noise = get_noise(batch_size, noise_dim) data_batch_dis_pos = prepare_discriminator_data_batch_pos( batch_size, data_np) dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos) data_batch_dis_neg = prepare_discriminator_data_batch_neg( generator_machine, batch_size, noise) dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg) dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0 # Do forward pass in generator to get the gen_loss data_batch_gen = prepare_generator_data_batch(batch_size, noise) gen_loss = get_training_loss(gen_training_machine, data_batch_gen) if i % 100 == 0: print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg) print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss) # Decide which network to train based on the training history # And the relative size of the loss if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \ ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss): if curr_train == "dis": curr_strike += 1 else: curr_train = "dis" curr_strike = 1 dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg) dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos) copy_shared_parameters(dis_training_machine, gen_training_machine) else: if curr_train == "gen": curr_strike += 1 else: curr_train = "gen" curr_strike = 1 gen_trainer.trainOneDataBatch(batch_size, data_batch_gen) # TODO: add API for paddle to allow true parameter sharing between different GradientMachines # so that we do not need to copy shared parameters. copy_shared_parameters(gen_training_machine, dis_training_machine) copy_shared_parameters(gen_training_machine, generator_machine) dis_trainer.finishTrainPass() gen_trainer.finishTrainPass() # At the end of each pass, save the generated samples/images fake_samples = get_fake_samples(generator_machine, batch_size, noise) if data_source == "uniform": plot2DScatter( fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) else: save_images( fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) dis_trainer.finishTrain() gen_trainer.finishTrain()
v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) :param rho: the :math:`\\rho` in the equation. The forgetting factor. :type rho: float :param epsilon: the :math:`\\epsilon` in the equation. :type epsilon: float """ def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): learning_method = v1_optimizers.RMSPropOptimizer( rho=rho, epsilon=epsilon) super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) ModelAverage = v1_optimizers.ModelAverage L2Regularization = v1_optimizers.L2Regularization if __name__ == '__main__': import py_paddle.swig_paddle as swig_api swig_api.initPaddle('--use_gpu=false') for opt in [ Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), AdaDelta(), RMSProp(), Adam( model_average=ModelAverage(average_window=0.5), regularization=L2Regularization(rate=0.5), gradient_clipping_threshold=25) ]: print opt, opt.enable_types()