def create_network(): train_data = np.random.randint(1, 5, [1000, 2]) weights = np.array([1.0, 2.0]) train_label = train_data.dot(weights) batch_size = 32 di = mx.io.NDArrayIter(train_data, train_label, batch_size=batch_size, shuffle=True, label_name='lin_reg_label') X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden=1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") mod = SVRGModule( symbol=lro, data_names=['data'], label_names=['lin_reg_label'], update_freq=2 ) mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label) mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) return di, mod
def test_module_initializer(): def regression_model(m): x = mx.symbol.var("data", stype='csr') v = mx.symbol.var("v", shape=(m, 1), init=mx.init.Uniform(scale=.1), stype='row_sparse') model = mx.symbol.dot(lhs=x, rhs=v) y = mx.symbol.Variable("label") model = mx.symbol.LinearRegressionOutput(data=model, label=y, name="out") return model #shape of the data n, m = 128, 100 model = regression_model(m) data = mx.nd.zeros(shape=(n, m), stype='csr') label = mx.nd.zeros((n, 1)) iterator = mx.io.NDArrayIter(data=data, label={'label': label}, batch_size=n, last_batch_handle='discard') # create module mod = SVRGModule(symbol=model, data_names=['data'], label_names=['label'], update_freq=2) mod.bind(data_shapes=iterator.provide_data, label_shapes=iterator.provide_label) mod.init_params() v = mod._arg_params['v'] assert v.stype == 'row_sparse' assert np.sum(v.asnumpy()) != 0
def test_module_bind(): x = mx.sym.Variable("data") net = mx.sym.FullyConnected(x, num_hidden=1) mod = SVRGModule(symbol=net, data_names=['data'], label_names=None, update_freq=2) assertRaises(TypeError, mod.bind, data_shapes=['data', mx.nd.zeros(shape=(2, 1))]) mod.bind(data_shapes=[('data', (2, 1))]) assert mod.binded == True assert mod._mod_aux.binded == True
def create_network(): train_data = np.random.randint(1, 5, [10, 2]) weights = np.array([1.0, 2.0]) train_label = train_data.dot(weights) di = mx.io.NDArrayIter(train_data, train_label, batch_size=5, shuffle=True, label_name='lin_reg_label') X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden=1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") mod = SVRGModule(symbol=lro, data_names=['data'], label_names=['lin_reg_label'], update_freq=2) mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label) mod.init_params(initializer=mx.init.One(), allow_missing=False, force_init=False, allow_extra=False) mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.01), ), force_init=False) return di, mod
def create_module_with_sgd(): train_data = np.random.randint(1, 5, [100, 2]) weights = np.array([1.0, 2.0]) train_label = train_data.dot(weights) di = mx.io.NDArrayIter(train_data, train_label, batch_size=10, shuffle=True, label_name='lin_reg_label') X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden=1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") reg_mod = mx.mod.Module( symbol=lro, data_names=['data'], label_names=['lin_reg_label']) reg_mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label) reg_mod.init_params(initializer=mx.init.One(), allow_missing=False, force_init=False, allow_extra=False) reg_mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.01),)) svrg_mod = SVRGModule(symbol=lro, data_names=['data'], label_names=['lin_reg_label'], update_freq=2) svrg_mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label) svrg_mod.init_params(initializer=mx.init.One(), allow_missing=False, force_init=False, allow_extra=False) svrg_mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.01),)) return di,reg_mod, svrg_mod
def create_network(batch_size, update_freq): """Create a linear regression network for performing SVRG optimization. Parameters ---------- batch_size: int Size of data split update_freq: int Update Frequency for calculating full gradients Returns ---------- di: mx.io.NDArrayIter Data iterator update_freq: SVRGModule An instance of SVRGModule for performing SVRG optimization """ import logging head = '%(asctime)-15s %(message)s' logging.basicConfig(level=logging.INFO, format=head) train_data = np.random.randint(1, 5, [1000, 2]) weights = np.array([1.0, 2.0]) train_label = train_data.dot(weights) di = mx.io.NDArrayIter(train_data, train_label, batch_size=batch_size, shuffle=True, label_name='lin_reg_label') X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden=1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") mod = SVRGModule(symbol=lro, data_names=['data'], label_names=['lin_reg_label'], update_freq=update_freq, logger=logging) return di, mod
def create_network(batch_size, update_freq): """Create a linear regression network for performing SVRG optimization. :return: an instance of mx.io.NDArrayIter :return: an instance of mx.mod.svrgmodule for performing SVRG optimization """ head = '%(asctime)-15s %(message)s' logging.basicConfig(level=logging.INFO, format=head) data = np.random.randint(1, 5, [1000, 2]) #Test_Train data split n_train = int(data.shape[0] * 0.8) weights = np.array([1.0, 2.0]) label = data.dot(weights) di = mx.io.NDArrayIter(data[:n_train, :], label[:n_train], batch_size=batch_size, shuffle=True, label_name='lin_reg_label') val_iter = mx.io.NDArrayIter(data[n_train:, :], label[n_train:], batch_size=batch_size) X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden=1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") mod = SVRGModule(symbol=lro, data_names=['data'], label_names=['lin_reg_label'], update_freq=update_freq, logger=logging) return di, val_iter, mod
def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size, update_freq, ctx, logger): # fit a linear regression model with mxnet SVRGModule print("Fitting linear regression with mxnet") train_iter = mx.io.NDArrayIter(train_features, train_labels, batch_size=batch_size, shuffle=True, data_name='data', label_name='label') data = mx.sym.Variable("data") label = mx.sym.Variable("label") weight = mx.sym.Variable("fc_weight", shape=(1, feature_dim)) net = mx.sym.dot(data, weight.transpose()) bias = mx.sym.Variable("fc_bias", shape=(1, ), wd_mult=0.0, lr_mult=10.0) net = mx.sym.broadcast_plus(net, bias) net = mx.sym.LinearRegressionOutput(data=net, label=label) mod = SVRGModule(symbol=net, context=ctx, data_names=['data'], label_names=['label'], logger=logger, update_freq=update_freq) return train_iter, mod
def test_svrgmodule_reshape(): data = mx.sym.Variable("data") sym = mx.sym.FullyConnected(data=data, num_hidden=4, name='fc') dshape = (3, 4) mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=2) mod.bind(data_shapes=[('data', dshape)]) mod.init_params() mod._mod_aux.init_params() mod.init_optimizer(optimizer_params={"learning_rate": 1.0}) data_batch = mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None) mod.forward(data_batch) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape dshape = (2, 4) mod.reshape(data_shapes=[('data', dshape)]) mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None)) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape
def test_module_save_load(tmpdir): import os x = mx.sym.Variable("data") y = mx.sym.Variable("softmax_label") net = mx.sym.FullyConnected(x, y, num_hidden=1) mod = SVRGModule(symbol=net, data_names=['data'], label_names=['softmax_label'], update_freq=2) mod.bind(data_shapes=[('data', (1, 1))]) mod.init_params() mod.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': 0.1}) mod.update() tmp = str(tmpdir) tmp_file = os.path.join(tmp, 'svrg_test_output') mod.save_checkpoint(tmp_file, 0, save_optimizer_states=True) mod2 = SVRGModule.load(tmp_file, 0, load_optimizer_states=True, data_names=('data', )) mod2.bind(data_shapes=[('data', (1, 1))]) mod2.init_optimizer(optimizer_params={'learning_rate': 0.1}) assert mod._symbol.tojson() == mod2._symbol.tojson() # Multi-device mod3 = SVRGModule(symbol=net, data_names=['data'], label_names=['softmax_label'], update_freq=3, context=[mx.cpu(0), mx.cpu(1)]) mod3.bind(data_shapes=[('data', (10, 10))]) mod3.init_params() mod3.init_optimizer(optimizer_params={'learning_rate': 1.0}) mod3.update() mod3.save_checkpoint(tmp_file, 0, save_optimizer_states=True) mod4 = SVRGModule.load(tmp_file, 0, load_optimizer_states=True, data_names=('data', )) mod4.bind(data_shapes=[('data', (10, 10))]) mod4.init_optimizer(optimizer_params={'learning_rate': 1.0}) assert mod3._symbol.tojson() == mod4._symbol.tojson()
def test_module_save_load(): import tempfile import os x = mx.sym.Variable("data") y = mx.sym.Variable("softmax_label") net = mx.sym.FullyConnected(x, y, num_hidden=1) mod = SVRGModule(symbol=net, data_names=['data'], label_names=['softmax_label'], update_freq=2) mod.bind(data_shapes=[('data', (1, 1))]) mod.init_params() mod.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': 0.1}) mod.update() # Create tempfile tmp = tempfile.mkdtemp() tmp_file = os.path.join(tmp, 'svrg_test_output') mod.save_checkpoint(tmp_file, 0, save_optimizer_states=True) mod2 = SVRGModule.load(tmp_file, 0, load_optimizer_states=True, data_names=('data', )) mod2.bind(data_shapes=[('data', (1, 1))]) mod2.init_optimizer(optimizer_params={'learning_rate': 0.1}) assert mod._symbol.tojson() == mod2._symbol.tojson() # Multi-device mod3 = SVRGModule(symbol=net, data_names=['data'], label_names=['softmax_label'], update_freq=3, context=[mx.cpu(0), mx.cpu(1)]) mod3.bind(data_shapes=[('data', (10, 10))]) mod3.init_params() mod3.init_optimizer(optimizer_params={'learning_rate': 1.0}) mod3.update() mod3.save_checkpoint(tmp_file, 0, save_optimizer_states=True) mod4 = SVRGModule.load(tmp_file, 0, load_optimizer_states=True, data_names=('data', )) mod4.bind(data_shapes=[('data', (10, 10))]) mod4.init_optimizer(optimizer_params={'learning_rate': 1.0}) assert mod3._symbol.tojson() == mod4._symbol.tojson()
def test_svrgmodule_reshape(): data = mx.sym.Variable("data") sym = mx.sym.FullyConnected(data=data, num_hidden=4, name='fc') dshape=(3, 4) mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=2) mod.bind(data_shapes=[('data', dshape)]) mod.init_params() mod._mod_aux.init_params() mod.init_optimizer(optimizer_params={"learning_rate": 1.0}) data_batch = mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None) mod.forward(data_batch) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape dshape = (2, 4) mod.reshape(data_shapes=[('data', dshape)]) mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None)) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape