def save(self, overwrite=False): # save the model if it doesnt exist check_or_create_dir(".models") model_filename = os.path.join(".models", self.get_name() + ".th") if not os.path.isfile(model_filename) or overwrite: print("saving existing model pool") torch.save(self.state_dict(), model_filename)
def save(self, overwrite=False): # save the model if it doesnt exist check_or_create_dir(self.config['model_dir']) model_filename = os.path.join(self.config['model_dir'], self.get_name() + ".th") if not os.path.isfile(model_filename) or overwrite: print("saving existing student-teacher model...") torch.save(self.state_dict(), model_filename)
def eval_model(data_loaders, model, fid_model, args): ''' simple helper to evaluate the model over all the loaders''' for loader in data_loaders: test_loss = test(epoch=-1, model=model, fisher=None, loader=loader.test_loader, grapher=None, prefix='test') # evaluate and save away one-time metrics check_or_create_dir(os.path.join(args.output_dir)) append_to_csv([test_loss['elbo_mean']], os.path.join(args.output_dir, "{}_test_elbo.csv".format(args.uid))) append_to_csv( calculate_consistency(model, loader, args.reparam_type, args.vae_type, args.cuda), os.path.join(args.output_dir, "{}_consistency.csv".format(args.uid))) with open( os.path.join(args.output_dir, "{}_conf.json".format(args.uid)), 'w') as f: json.dump(model.student.config, f) if args.calculate_fid_with is not None: # TODO: parameterize num fid samples, currently use less for inceptionv3 as it's COSTLY num_fid_samples = 4000 if args.calculate_fid_with != 'inceptionv3' else 1000 append_to_csv( calculate_fid(fid_model=fid_model, model=model, loader=loader, grapher=None, num_samples=num_fid_samples, cuda=args.cuda), os.path.join(args.output_dir, "{}_fid.csv".format(args.uid)))
def train_loop(data_loaders, model, fid_model, grapher, args): ''' simple helper to run the entire train loop; not needed for eval modes''' optimizer = build_optimizer(model.student) # collect our optimizer print( "there are {} params with {} elems in the st-model and {} params in the student with {} elems" .format(len(list(model.parameters())), number_of_parameters(model), len(list(model.student.parameters())), number_of_parameters(model.student))) # main training loop fisher = None for j, loader in enumerate(data_loaders): num_epochs = args.epochs # TODO: randomize epochs by something like: + np.random.randint(0, 13) print("training current distribution for {} epochs".format(num_epochs)) early = EarlyStopping( model, max_steps=50, burn_in_interval=None) if args.early_stop else None #burn_in_interval=int(num_epochs*0.2)) if args.early_stop else None test_loss = None for epoch in range(1, num_epochs + 1): train(epoch, model, fisher, optimizer, loader.train_loader, grapher) test_loss = test(epoch, model, fisher, loader.test_loader, grapher) if args.early_stop and early(test_loss['loss_mean']): early.restore() # restore and test+generate again test_loss = test_and_generate(epoch, model, fisher, loader, grapher) break generate(model, grapher, 'student') # generate student samples generate(model, grapher, 'teacher') # generate teacher samples # evaluate and save away one-time metrics, these include: # 1. test elbo # 2. FID # 3. consistency # 4. num synth + num true samples # 5. dump config to visdom check_or_create_dir(os.path.join(args.output_dir)) append_to_csv([test_loss['elbo_mean']], os.path.join(args.output_dir, "{}_test_elbo.csv".format(args.uid))) append_to_csv([test_loss['elbo_mean']], os.path.join(args.output_dir, "{}_test_elbo.csv".format(args.uid))) num_synth_samples = np.ceil(epoch * args.batch_size * model.ratio) num_true_samples = np.ceil(epoch * (args.batch_size - (args.batch_size * model.ratio))) append_to_csv([num_synth_samples], os.path.join(args.output_dir, "{}_numsynth.csv".format(args.uid))) append_to_csv([num_true_samples], os.path.join(args.output_dir, "{}_numtrue.csv".format(args.uid))) append_to_csv([epoch], os.path.join(args.output_dir, "{}_epochs.csv".format(args.uid))) grapher.vis.text(num_synth_samples, opts=dict(title="num_synthetic_samples")) grapher.vis.text(num_true_samples, opts=dict(title="num_true_samples")) grapher.vis.text(pprint.PrettyPrinter(indent=4).pformat( model.student.config), opts=dict(title="config")) # calc the consistency using the **PREVIOUS** loader if j > 0: append_to_csv( calculate_consistency(model, data_loaders[j - 1], args.reparam_type, args.vae_type, args.cuda), os.path.join(args.output_dir, "{}_consistency.csv".format(args.uid))) if args.calculate_fid_with is not None: # TODO: parameterize num fid samples, currently use less for inceptionv3 as it's COSTLY num_fid_samples = 4000 if args.calculate_fid_with != 'inceptionv3' else 1000 append_to_csv( calculate_fid(fid_model=fid_model, model=model, loader=loader, grapher=grapher, num_samples=num_fid_samples, cuda=args.cuda), os.path.join(args.output_dir, "{}_fid.csv".format(args.uid))) grapher.save() # save the remote visdom graphs if j != len(data_loaders) - 1: if args.ewc_gamma > 0: # calculate the fisher from the previous data loader print("computing fisher info matrix....") fisher_tmp = estimate_fisher( model.student, # this is pre-fork loader, args.batch_size, cuda=args.cuda) if fisher is not None: assert len(fisher) == len( fisher_tmp), "#fisher params != #new fisher params" for (kf, vf), (kft, vft) in zip(fisher.items(), fisher_tmp.items()): fisher[kf] += fisher_tmp[kft] else: fisher = fisher_tmp # spawn a new student & rebuild grapher; we also pass # the new model's parameters through a new optimizer. if not args.disable_student_teacher: model.fork() lazy_generate_modules(model, data_loaders[0].img_shp) optimizer = build_optimizer(model.student) print( "there are {} params with {} elems in the st-model and {} params in the student with {} elems" .format(len(list(model.parameters())), number_of_parameters(model), len(list(model.student.parameters())), number_of_parameters(model.student))) else: # increment anyway for vanilla models # so that we can have a separate visdom env model.current_model += 1 grapher = Grapher(env=model.get_name(), server=args.visdom_url, port=args.visdom_port)