def check_dataset(name): try: img_shape, channels, data_train, data_valid, data_test = datasets.get_data(name) except IOError as e: raise SkipTest for ds in (data_train, data_valid, data_test): features, = ds.get_data(None, slice(0, 100)) features = features.reshape([100, -1]) assert (features >= 0).all() assert (features <= 1).all()
def check_dataset(name): try: img_shape, data_train, data_valid, data_test = datasets.get_data( name) except IOError as e: raise SkipTest for ds in (data_train, data_valid, data_test): features, = ds.get_data(None, slice(0, 100)) features = features.reshape([100, -1]) assert (features >= 0).all() assert (features <= 1).all()
def check_dataset(name): try: img_shape, channels, data_train, data_valid, data_test = datasets.get_data(name) except IOError as e: raise SkipTest for ds in (data_train, data_valid, data_test): features, = ds.get_data(None, slice(0, 100)) img_height, img_width = img_shape x_dim = img_height * img_width features = features.reshape([100, -1]) assert features.shape == (100, x_dim)
def check_dataset(name): try: img_shape, data_train, data_valid, data_test = datasets.get_data( name) except IOError as e: raise SkipTest for ds in (data_train, data_valid, data_test): features, = ds.get_data(None, slice(0, 100)) img_height, img_width = img_shape x_dim = img_height * img_width features = features.reshape([100, -1]) assert features.shape == (100, x_dim)
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel): image_size, data_train, data_valid, data_test = datasets.get_data(dataset) train_stream = Flatten( DataStream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) valid_stream = Flatten( DataStream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) test_stream = Flatten( DataStream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * read_N**2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2 * x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % ( dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel(n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') #x_recons = 1. + x x_recons, kl_terms = draw.reconstruct(x) #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100) #x_recons = x_recons[-1,:,:] #samples = draw.sample(100) #x_recons = samples[-1, :, :] #x_recons = samples[-1, :, :] recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #algorithm.add_updates(scan_updates) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t, :].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors += [kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring(train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']), Plot(name, channels=plot_channels), ProgressBar(), Printing() ]) if oldmodel is not None: print("Initializing parameters with old model %s" % oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_param_values(oldmodel.get_param_values()) del oldmodel main_loop.run()
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel, live_plotting): image_size, channels, data_train, data_valid, data_test = datasets.get_data(dataset) train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) valid_stream = Flatten(DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = channels * img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } # Configure attention mechanism if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * channels * read_N ** 2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, channels=channels, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, channels=channels, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2*x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- if name is None: name = dataset # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e"%value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S"); longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim+dec_dim), 4*enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [ z_dim, 4*dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel( n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') x_recons, kl_terms = draw.reconstruct(x) recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, parameters=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t,:].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors +=[kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) plotting_extensions = [] if live_plotting: plotting_extensions = [ Plot(name, channels=plot_channels) ] main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring( train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), #Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), PartsOnlyCheckpoint("{}/{}".format(subdir,name), before_training=True, after_epoch=True, save_separately=['log', 'model']), SampleCheckpoint(image_size=image_size[0], channels=channels, save_subdir=subdir, before_training=True, after_epoch=True), ProgressBar(), Printing()] + plotting_extensions) if oldmodel is not None: print("Initializing parameters with old model %s"%oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_param_values(oldmodel.get_param_values()) del oldmodel main_loop.run()
from PIL import Image import unittest from draw.attention import * import SVRT_analysis_helper_functions #Initialize vars and theano function num_estimates = 20 num_training = 6 #per evaluation num_testing = 1 #per evaluation batch_size = num_estimates * num_training + num_estimates * num_testing #get a bunch just to be sure image_size, channels, data_train, data_valid, data_test = datasets.get_data('sketch') rows = 10 cols = 20 N_iter = 64; #Load images train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) train_batch = train_stream.get_epoch_iterator() train_image = train_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) train_labels = train_batch.data_stream.get_data()[1] test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) test_batch = test_stream.get_epoch_iterator() test_image = test_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) test_labels = test_batch.data_stream.get_data()[1]
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel, max_total_duration, results_root_dir, nosync, adam_args_json, rmsprop_args_json): image_size, channels, data_train, data_valid, data_test = datasets.get_data(dataset) # Sequential scheme as originally implemented. #train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) #valid_stream = Flatten(DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) #test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) # Shuffled version makes more sense for distributed training. train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=ShuffledScheme(data_train.num_examples, batch_size))) valid_stream = Flatten(DataStream.default_stream(data_valid, iteration_scheme=ShuffledScheme(data_valid.num_examples, batch_size))) test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=ShuffledScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = channels * img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } # Configure attention mechanism if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * channels * read_N ** 2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, channels=channels, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, channels=channels, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2*x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- if name is None: name = dataset if os.environ.has_key('MOAB_JOBARRAYINDEX'): name = name + ("-%0.3d" % int(os.environ['MOAB_JOBARRAYINDEX'])) # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e"%value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S"); if results_root_dir is not None: subdir = os.path.join(subdir, results_root_dir) longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print(" max_total_duration: %d" % max_total_duration) print(" nosync: %s" % str(nosync)) print(" adam_args_json: %s" % str(adam_args_json)) print(" rmsprop_args_json: %s" % str(rmsprop_args_json)) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim+dec_dim), 4*enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [ z_dim, 4*dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel( n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') x_recons, kl_terms = draw.reconstruct(x) recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) if adam_args_json is not None: print("Setup for Adam with arguments passed by command-line.") import json adam_args = json.loads(adam_args_json) if learning_rate is not None: adam_args['learning_rate'] = learning_rate algorithm = GradientDescent( cost=cost, parameters=params, step_rule=CompositeRule([ StepClipping(10.), Adam(**adam_args), ]) ) elif rmsprop_args_json is not None: print("Setup for RMSProp with arguments passed by command-line.") import json rmsprop_args = json.loads(rmsprop_args_json) if learning_rate is not None: rmsprop_args['learning_rate'] = learning_rate algorithm = GradientDescent( cost=cost, parameters=params, step_rule=CompositeRule([ StepClipping(10.), RMSProp(**rmsprop_args), ]) ) else: print("Setup for Adam by default.") # default original behavior algorithm = GradientDescent( cost=cost, parameters=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t,:].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors +=[kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------------------ # Setup for legion cg = ComputationGraph(cost) params_to_sync = {} #cg.variables counter = 0 print("---- cg.parameters ----") for p in cg.parameters: # `p` is of type theano.sandbox.cuda.var.CudaNdarraySharedVariable # Warning. This is not as deterministic as we would want. # For now, however, we don't have much of a choice. new_name = p.name while params_to_sync.has_key(new_name): counter += 1 new_name = p.name + ("_%d" % counter) params_to_sync[new_name] = p print("Parameter %s now referred to as %s." % (p.name, new_name)) #import pdb; pdb.set_trace() print("---- --.---------- ----") #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) extensions = [ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring( train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), PartsOnlyCheckpoint("{}/{}".format(subdir, name), before_training=True, after_epoch=True, save_separately=['log', 'model']) #SampleCheckpoint(image_size=image_size[0], channels=channels, save_subdir=subdir, before_training=True, after_epoch=True), ] if not nosync: # With parameter sync on legion, #extensions.append(SharedParamsRateLimited(params=params_to_sync, before_training=True, alpha=1.0, beta=0.0, maximum_rate=1.0)) #extensions.append(SharedParamsRateLimited(params=params_to_sync, before_training=True, every_n_batches=4, alpha=0.5, beta=0.5, maximum_rate=0.2, want_sync_timing_log=True)) extensions.append(SharedParamsRateLimited(params=params_to_sync, before_training=True, every_n_batches=1, alpha=0.99, beta=0.01, maximum_rate=0.2, want_sync_timing_log=True)) extensions = extensions + [StopAfterTimeElapsed(every_n_batches=4, total_duration=max_total_duration), # Timing information to facilitate plotting. Timing(every_n_epochs=1), Timestamp(every_n_batches=4), # Plot(name, channels=plot_channels), ProgressBar(), Printing()] main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=extensions) if oldmodel is not None: print("Initializing parameters with old model %s"%oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_parameter_values(oldmodel.get_parameter_values()) del oldmodel main_loop.run()