def make_iterators(dbpedia_data, num_iterations, batch_size): train_set = ArrayIterator(dbpedia_data['train'], batch_size=batch_size, total_iterations=num_iterations, shuffle=True) test_set = ArrayIterator(dbpedia_data['test'], batch_size=batch_size) return train_set, test_set
def get_fake_data(dataset, batch_size, n_iter): x_train, y_train = generate_data(dataset, batch_size) train_data = {'image': {'data': x_train, 'axes': ('batch', 'C', 'height', 'width')}, 'label': {'data': y_train, 'axes': ('batch',)}} train_set = ArrayIterator(train_data, batch_size, total_iterations=n_iter) inputs = train_set.make_placeholders(include_iteration=True) return inputs, train_data, train_set
def plot_generated(trainer): # Get a batch from the train set train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False) gen_series = trainer.predict(train_set_one_epoch, num_batches=1) train_set_one_epoch.reset() # Get an example from the batch gen_series = gen_series[4] if args.backward: # If args.backward is set, the autoencoder would have produced the input sequence in reverse. # We flip it again to match the true series gen_series = gen_series[::-1, :] true_series = next(train_set_one_epoch)['X'][4] # Plot the true and generated values of each series ncols = int(np.ceil((dataset.n_sensors + dataset.n_operating_modes) * 1.0 // 3)) fig, ax = plt.subplots(ncols, 3) fig.set_figheight(20) fig.set_figwidth(10) for i in range(dataset.n_operating_modes): plt.subplot(ncols, 3, i + 1) if i == 0: plt.plot(true_series[:, i], label="true", color="blue") else: plt.plot(true_series[:, i], color="blue") if i == 0: plt.plot(gen_series[:, i], label="gen", color="red") else: plt.plot(gen_series[:, i], color="red") plt.title("Operating mode {}".format(i + 1)) for i in range(dataset.n_sensors): plt.subplot(ncols, 3, dataset.n_operating_modes + i + 1) plt.plot(true_series[:, dataset.n_operating_modes + i], color="blue") plt.plot(gen_series[:, dataset.n_operating_modes + i], color="red") plt.title("Sensor {}".format(i + 1)) fig.legend() plt.tight_layout() fig.savefig(os.path.join(args.results_dir, "generated_series.png"))
def get_fake_data(dataset, batch_size, num_iterations, seed=None): x_train, y_train = generate_data(dataset, batch_size, rand_seed=seed) train_data = { 'image': { 'data': x_train, 'axes': ('batch', 'C', 'H', 'W') }, 'label': { 'data': y_train, 'axes': ('batch', ) } } train_set = ArrayIterator(train_data, batch_size, total_iterations=num_iterations) inputs = train_set.make_placeholders(include_iteration=True) return inputs, train_data, train_set
def get_fake_cifar(batch_size, n_iter): cifar = FakeCIFAR() cifar.reset(0) batch_xs, batch_ys = cifar.train.next_batch(batch_size) x_train = np.vstack(batch_xs).reshape(-1, 3, 32, 32) y_train = np.vstack(batch_ys).ravel() train_data = { 'image': { 'data': x_train, 'axes': ('batch', 'C', 'height', 'width') }, 'label': { 'data': y_train, 'axes': ('batch', ) } } train_set = ArrayIterator(train_data, batch_size, total_iterations=n_iter) inputs = train_set.make_placeholders(include_iteration=True) return inputs, train_data, train_set
# parse command line arguments parser = NgraphArgparser() parser.add_argument( '--plot_interval', type=int, default=200, help='save generated images with a period of this many iterations') parser.add_argument('--seed', type=int, default=0, help='random seed') args = parser.parse_args() np.random.seed(args.rng_seed) args.batch_size = 32 # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size) # noise source noise_dim = (2, 1, 3, 3) noise_generator = Noise(train_set.ndata, shape=noise_dim + (args.batch_size, ), seed=args.seed) # generator network g_scope = 'generator' filter_init = GaussianInit(var=0.05) relu = Rectlin(slope=0) deconv_layers = [ Deconvolution((1, 1, 16), filter_init,
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
kernel_size = args.ksize dropout = 1 - args.dropout # amount to keep seq_len = args.seq_len batch_size = args.batch_size n_epochs = args.epochs music_dataset = Music(data_dir=args.datadir, seq_len=seq_len, dataset=args.dataset) seq_len = music_dataset.seq_len n_train = music_dataset.train['X']['data'].shape[0] num_iterations = int(n_train * n_epochs * 1.0 / batch_size) n_features = music_dataset.train['X']['data'].shape[2] train_iterator = ArrayIterator(music_dataset.train, batch_size, total_iterations=num_iterations, shuffle=True) test_iterator = ArrayIterator(music_dataset.test, batch_size) # Name and create axes batch_axis = ng.make_axis(length=batch_size, name="N") time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=n_features, name="F") out_axis = ng.make_axis(length=n_features, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
os.path.join( path_gen + "glove.trimmed.300.npz")) embeddings = embeddingz['glove'] vocab_file = os.path.join(path_gen + 'vocab.dat') print("creating training Set ") train = get_data_array_squad_ngraph(params_dict, data_train, set_val='train') dev = get_data_array_squad_ngraph(params_dict, data_dev, set_val='dev') print('Train Set Size is', len(train['para']['data'])) print('Dev set size is', len(dev['para']['data'])) # Use Array Iterator for training set train_set = ArrayIterator(train, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Use Array Iterator for validation set valid_set = ArrayIterator(dev, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Make placeholderds for training inputs = train_set.make_placeholders(include_iteration=True) # Encoding Layer rlayer_1 = LSTM(hidden_size, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) # Embedding Layer embed_layer = LookupTable( params_dict['vocab_size'],
affine_layer(2 * h_dim, Tanh(), name='d1') ] if minibatch_discrimination: raise NotImplementedError else: discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2')) discriminator_layers.append(affine_layer(1, Logistic(), name='d3')) discriminator = Sequential(discriminator_layers) # TODO discriminator pre-training # dataloader np.random.seed(1) toy_gan_data = ToyGAN(batch_size, num_iterations) train_data = toy_gan_data.load_data() train_set = ArrayIterator(train_data, batch_size, num_iterations) # reset seed for weights np.random.seed(2) # build network graph inputs = train_set.make_placeholders() z = inputs['noise_sample'] G = generator(z) # generated sample x = inputs['data_sample'] D1 = discriminator(x) # discriminator output on real data sample # cast G axes into x G_t = ng.axes_with_order(G, reversed(G.axes)) G_cast = ng.cast_axes(G_t, x.axes)
babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) inputs = train_set.make_placeholders() memn2n = MemN2N_Dialog( babi.cands, babi.num_cands, babi.max_cand_len, babi.memory_size, babi.max_utt_len, babi.vocab_size, args.emb_size, args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs,
discriminator_train_inputs = {'image': image, 'noise': z} generator_train_outputs = { 'batch_cost': mean_cost_g, 'updates': updates_g, 'generated': generated } # for plots discriminator_train_outputs = { 'batch_cost': mean_cost_d, 'updates': updates_d, 'grad_norm': mean_grad_norm } # create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, args.num_iterations) # noise source noise_generator = Noise(shape=noise_dim + (args.batch_size, ), seed=args.rng_seed) with closing(ngt.make_transformer()) as transformer: train_computation_g = make_bound_computation(transformer, generator_train_outputs, generator_train_inputs) train_computation_d = make_bound_computation(transformer, discriminator_train_outputs, discriminator_train_inputs) # train loop
args = parser.parse_args() np.random.seed(args.rng_seed) # Create the dataloader if args.use_aeon: from data import make_aeon_loaders train_set, valid_set = make_aeon_loaders(args.data_dir, args.batch_size, args.num_iterations) else: from ngraph.frontends.neon import ArrayIterator # noqa from ngraph.frontends.neon import CIFAR10 # noqa train_data, valid_data = CIFAR10(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) # we need to ask the dataset to create an iteration # placeholder for our learning rate schedule inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 10 resnet = residual_network(args.stage_depth) learning_rate_policy = { 'name': 'schedule', 'schedule': [32000, 48000], 'gamma': 0.1, 'base_lr': 0.1
args.batch_size = 128 time_steps = 128 hidden_size = 10 gradient_clip_value = 15 embed_size = 128 vocab_size = 20000 pad_idx = 0 # download IMDB imdb_dataset = IMDB(path=args.data_dir, sentence_length=time_steps, pad_idx=pad_idx) imdb_data = imdb_dataset.load_data() train_set = ArrayIterator(imdb_data['train'], batch_size=args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(imdb_data['valid'], batch_size=args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = imdb_dataset.nclass # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False)
seq_len = args.seq_len no_epochs = args.epochs output_dim = 1 dataset = TurboFan(data_dir="../../data/", T=args.seq_len, skip=args.skip, max_rul_predictable=130) feature_dim = dataset.n_features if args.save_plots: dataset.plot_sample(out_folder, trajectory_id=10) # Build input data iterables # Yields an input array of Shape (batch_size, seq_len, input_feature_dim) train_samples = len(dataset.train['X']['data']) num_iterations = (no_epochs * train_samples) // batch_size train_set = ArrayIterator(dataset.train, batch_size, total_iterations=num_iterations, shuffle=True) train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False) test_set = ArrayIterator(dataset.test, batch_size) # Name and create axes batch_axis = ng.make_axis(length=batch_size, name="N") time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=feature_dim, name="F") out_axis = ng.make_axis(length=output_dim, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=()))
if args.model_file is not None: model_file = os.path.expanduser(args.model_file) else: model_file = None wikimovies = WIKIMOVIES(args.data_dir, subset=args.subset, reparse=args.reparse, mem_source=args.mem_mode) ndata = wikimovies.data_dict['train']['query']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(wikimovies.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) test_set = ArrayIterator(wikimovies.data_dict['test'], batch_size=args.batch_size) inputs = train_set.make_placeholders() vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis') memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops, wikimovies.story_length, wikimovies.memory_size, wikimovies.vocab_size, vocab_axis, args.use_v_luts) # Compute answer predictions a_pred, _ = memn2n(inputs) loss = ng.cross_entropy_multi(a_pred, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True)
babi = BABI_Dialog(path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) inputs = train_set.make_placeholders() memn2n = MemN2N_Dialog(babi.cands, babi.num_cands, babi.max_cand_len, babi.memory_size, babi.max_utt_len, babi.vocab_size, args.emb_size, args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs,
from ngraph.frontends.neon import ax, loop_train, make_bound_computation, make_default_callbacks from ngraph.frontends.neon import NgraphArgparser from ngraph.frontends.neon import ArrayIterator from cifar10 import CIFAR10 import ngraph.transformers as ngt parser = NgraphArgparser(description='Train simple CNN on cifar10 dataset') args = parser.parse_args() np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = CIFAR10(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y init_uni = UniformInit(-0.1, 0.1)
babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) dev_set = ArrayIterator(babi.data_dict['dev'], batch_size=args.batch_size) test_set = ArrayIterator(babi.data_dict['test'], batch_size=args.batch_size) inputs = train_set.make_placeholders() memn2n = MemN2N_Dialog( babi.cands, babi.num_cands, babi.max_cand_len, babi.memory_size, babi.max_utt_len, babi.vocab_size, args.emb_size, args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type,
# Generate Lissajous Curve data = timeseries.TimeSeries( train_ratio=0.8, # ratio of samples to set aside for training seq_len=seq_len, # length of the sequence in each sample npoints=no_points, # number of points to take in each cycle ncycles=no_cycles, # number of cycles in the curve batch_size=batch_size, curvetype='Lissajous2', predict_seq=predict_seq, # set True if you want sequences as output look_ahead=look_ahead) # number of time steps to look ahead # Build input data iterables # Yields an input array of Shape (batch_size, seq_len, input_feature_dim) num_iterations = no_epochs * no_batches train_set = ArrayIterator(data.train, batch_size, total_iterations=num_iterations) test_set = ArrayIterator(data.test, batch_size) # Name and create axes batch_axis = ng.make_axis(length=batch_size, name="N") time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=feature_dim, name="feature_axis") out_axis = ng.make_axis(length=output_dim, name="output_axis") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) if (predict_seq is True): out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) else: out_axes = ng.make_axes([batch_axis, out_axis])
params_dict['vocab_size'] = len(vocab_list) print('Loading Embeddings') embeddingz = np.load(os.path.join(path_gen + "glove.trimmed.300.npz")) embeddings = embeddingz['glove'] vocab_file = os.path.join(path_gen + 'vocab.dat') print("creating training Set ") train = get_data_array_squad_ngraph(params_dict, data_train, set_val='train') dev = get_data_array_squad_ngraph(params_dict, data_dev, set_val='dev') print('Train Set Size is', len(train['para']['data'])) print('Dev set size is', len(dev['para']['data'])) # Use Array Iterator for training set train_set = ArrayIterator(train, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Use Array Iterator for validation set valid_set = ArrayIterator(dev, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Make placeholderds for training inputs = train_set.make_placeholders(include_iteration=True) # Encoding Layer rlayer_1 = LSTM(hidden_size, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(),