def main(_): process_flags() if FLAGS.manual_seed: set_manual_seeds(FLAGS.manual_seed) # Create folders and files to store results and configs run_dir = Path(FLAGS.output_folder, FLAGS.run_name) if not os.path.exists(run_dir): os.makedirs(run_dir) # Logging log_fh = log.FileHandler(Path(run_dir, 'log.log')) log_fmt = log.Formatter("%(asctime)s: %(message)s", datefmt="%m/%d %I:%M:%S %p") log_fh.setFormatter(log_fmt) log.getLogger().addHandler(log_fh) #Store the run description, if any if FLAGS.description: with open(Path(run_dir,'description.txt'),'w') as f: f.write(FLAGS.description) log.info(f'DESCRIPTION: {FLAGS.description}') # Store configuration in same folder as logs and model flagfile = Path(run_dir, 'flagfile.txt') if os.path.exists(flagfile): os.remove(flagfile) open(flagfile, "x") FLAGS.append_flags_into_file(flagfile) if FLAGS.old_pretrain_data: data_dict = get_data_dict_old() else: data_dict = get_data_dict() train_dataset, test_dataset, val_dataset = (data_dict[key] for key in ('train', 'test', 'val')) model = MLMModelWrapper(MODEL_MAPPING[FLAGS.model]) distributed_wrapper(train,model, run_dir, train_dataset, val_dataset) model.cuda(FLAGS.device_idxs[0]) log.info("Evaluating pretraining performance on test split") test_loader = get_loader(test_dataset) model.eval() batch_generator = iter(test_loader) batch_generator = Tqdm.tqdm( batch_generator) total_metrics = {} with torch.no_grad(): for i, batch in enumerate(batch_generator): batch = move_to_device(batch, FLAGS.device_idxs[0]) if isinstance(batch, torch.Tensor): model(batch) else: model(**batch) if i == 0: total_metrics = model.get_metrics() else: total_metrics = {m: total_metrics[m] + model.get_metrics()[m] for m in total_metrics.keys()} average_metrics = {k: v/(i+1) for k,v in total_metrics.items()} log.info(f"Average test metrics:{average_metrics}")
def check_flags(): if FLAGS.node_feat_name: assert (FLAGS.node_feat_encoder == 'onehot') else: assert ('constant_' in FLAGS.node_feat_encoder) assert (0 < FLAGS.valid_percentage < 1) assert (FLAGS.layer_num >= 1) assert (FLAGS.batch_size >= 1) assert (FLAGS.iters >= 1) assert (FLAGS.iters_val_start >= 1) assert (FLAGS.iters_val_every >= 1) assert (FLAGS.gpu >= -1) d = FLAGS.flag_values_dict() ln = d['layer_num'] ls = [False] * ln for k in d.keys(): if 'layer_' in k and 'gc' not in k and 'id' not in k: lt = k.split('_')[1] if lt != 'num': i = int(lt) - 1 if not (0 <= i < len(ls)): raise RuntimeError('Wrong spec {}'.format(k)) ls[i] = True for i, x in enumerate(ls): if not x: raise RuntimeError('layer {} not specified'.format(i + 1)) if is_transductive(): assert (FLAGS.layer_num == 1) # can only have one layer assert (FLAGS.gemb_dim >= 1)
def create_layers(model): layers = [] num_layers = FLAGS.num_layers for i in range(num_layers): sp = FLAGS.flag_values_dict()['layer_{}'.format(i)].split(':') name = sp[0] layer_info = {} if len(sp) > 1: assert (len(sp) == 2) for spec in sp[1].split(','): ssp = spec.split('=') layer_info[ssp[0]] = ssp[1] if name == 'GraphConvolution': layers.append(create_GraphConvolution_layer(layer_info, model, i)) elif name == 'Average': layers.append(create_Average_layer(layer_info, model)) elif name == 'NTN': layers.append(create_NTN_layer(layer_info, model)) elif name == 'Dot': layers.append(create_Dot_layer(layer_info, model)) elif name == 'Dense': layers.append(create_Dense_layer(layer_info, model)) elif name == 'Padding': layers.append(create_Padding_layer(layer_info, model)) else: raise RuntimeError('Unknown layer {}'.format(name)) return layers
def get_model_info_as_str(model_info_table=None): rtn = '' for k, v in sorted(FLAGS.flag_values_dict().items(), key=lambda x: x[0]): s = '{0:26} : {1}\n'.format(k, v) rtn += s if model_info_table: model_info_table.append([k, '**{}**'.format(v)]) return rtn
def create_layers(model, pattern, num_layers): layers = [] for i in range(1, num_layers + 1): # 1-indexed sp = FLAGS.flag_values_dict()['{}_{}'.format(pattern, i)].split(':') name = sp[0] layer_info = {} if len(sp) > 1: assert (len(sp) == 2) for spec in sp[1].split(','): ssp = spec.split('=') layer_info[ssp[0]] = ssp[1] if name == 'GraphConvolution': layers.append(create_GraphConvolution_layer(layer_info, model, i)) elif name == 'GraphConvolutionAttention': layers.append( create_GraphConvolutionAttention_layer(layer_info, model, i)) elif name == 'GraphConvolutionCollector': layers.append(create_GraphConvolutionCollector_layer(layer_info)) elif name == 'Coarsening': layers.append(create_Coarsening_layer(layer_info)) elif name == 'Average': layers.append(create_Average_layer(layer_info)) elif name == 'Attention': layers.append(create_Attention_layer(layer_info)) elif name == 'Supersource': layers.append(create_Supersource_layer(layer_info)) elif name == 'Dot': layers.append(create_Dot_layer(layer_info)) elif name == 'Dist': layers.append(create_Dist_layer(layer_info)) elif name == 'SLM': layers.append(create_SLM_layer(layer_info)) elif name == 'NTN': layers.append(create_NTN_layer(layer_info)) elif name == 'ANPM': layers.append(create_ANPM_layer(layer_info)) elif name == 'ANPMD': layers.append(create_ANPMD_layer(layer_info)) elif name == 'ANNH': layers.append(create_ANNH_layer(layer_info)) elif name == 'Dense': layers.append(create_Dense_layer(layer_info)) elif name == 'Padding': layers.append(create_Padding_layer(layer_info)) elif name == 'PadandTruncate': layers.append(create_PadandTruncate_layer(layer_info)) elif name == 'MNE': layers.append(create_MNE_layer(layer_info)) elif name == 'MNEMatch': layers.append(create_MNEMatch_layer(layer_info)) elif name == 'MNEResize': layers.append(create_MNEResize_layer(layer_info)) elif name == 'CNN': layers.append(create_CNN_layer(layer_info)) else: raise RuntimeError('Unknown layer {}'.format(name)) return layers
def _get_loss_lambdas_flags(self): rtn = [] d = FLAGS.flag_values_dict() for k in d.keys(): if 'lambda_' in k: flag_split = k.split('_') assert (flag_split[0] == 'lambda') assert (flag_split[-1] == 'loss') rtn.append(k) return rtn
def get_model_info_as_str(model_info_table=None): rtn = [] d = FLAGS.flag_values_dict() for k in sorted_nicely(d.keys()): v = d[k] s = '{0:26} : {1}'.format(k, v) rtn.append(s) if model_info_table: model_info_table.append([k, '**{}**'.format(v)]) rtn.append('{0:26} : {1}'.format('ts', get_ts())) return '\n'.join(rtn)
def check_flags(): assert (0 < FLAGS.valid_percentage < 1) assert (FLAGS.layer_num >= 2) assert (FLAGS.batch_size >= 1) assert (FLAGS.iters >= 1) assert (FLAGS.iters_val_start >= 1) assert (FLAGS.iters_val_every >= 1) assert (FLAGS.gpu >= -1) d = FLAGS.flag_values_dict() ln = d['layer_num'] ls = [False] * ln for k in d.keys(): if 'layer_' in k: lt = k.split('_')[1] if lt != 'num': i = int(lt) - 1 if not (0 <= i < len(ls)): raise RuntimeError('Wrong spec {}'.format(k)) ls[i] = True for i, x in enumerate(ls): if not x: raise RuntimeError('layer {} not specified'.format(i + 1))
def main(): FLAGS(sys.argv) running_helper = RunningHelper(FLAGS.use_mpi) global mpi_is_master mpi_is_master = running_helper.is_master # Check stage / image size / dynamic batch size / data consistency. running_helper.check_hps_consistency() # Setup Models mapping = MappingNetwork(FLAGS.ch) generator = StyleGenerator(FLAGS.ch, enable_blur=FLAGS.enable_blur) discriminator = Discriminator(ch=FLAGS.ch, enable_blur=FLAGS.enable_blur) if running_helper.keep_smoothed_gen: smoothed_generator = StyleGenerator(FLAGS.ch, enable_blur=FLAGS.enable_blur) smoothed_mapping = MappingNetwork(FLAGS.ch) models = [mapping, generator, discriminator] model_names = ['Mapping', 'Generator', 'Discriminator'] if running_helper.keep_smoothed_gen: models.append(smoothed_generator) models.append(smoothed_mapping) model_names.append('SmoothedGenerator') model_names.append('SmoothedMapping') if running_helper.device > -1: chainer.cuda.get_device_from_id(running_helper.device).use() for model in models: model.to_gpu() stage_manager = StageManager( stage_interval=running_helper.stage_interval, dynamic_batch_size=running_helper.dynamic_batch_size, make_dataset_func=running_helper.make_dataset, make_iterator_func=make_iterator_func, debug_start_instance=FLAGS.debug_start_instance) #if running_helper.is_master: # chainer.global_config.debug = True updater_args = { "models": models, "optimizer": { "map": running_helper.make_optimizer(mapping, FLAGS.adam_alpha_g / 100, FLAGS.adam_beta1, FLAGS.adam_beta2), "gen": running_helper.make_optimizer(generator, FLAGS.adam_alpha_g, FLAGS.adam_beta1, FLAGS.adam_beta2), "dis": running_helper.make_optimizer(discriminator, FLAGS.adam_alpha_d, FLAGS.adam_beta1, FLAGS.adam_beta2) }, 'stage_manager': stage_manager, 'lambda_gp': FLAGS.lambda_gp, 'smoothing': FLAGS.smoothing, 'style_mixing_rate': FLAGS.style_mixing_rate, 'use_cleargrads': running_helper.use_cleargrads, 'total_gpu': running_helper.fleet_size } updater = Updater(**updater_args) trainer = training.Trainer(updater, (lambda _trainer: _trainer.updater.stage_manager .stage_int >= FLAGS.max_stage), out=FLAGS.out) # Set up extensions if running_helper.is_master: for model, model_name in zip(models, model_names): trainer.extend(extensions.snapshot_object( model, model_name + '_{.updater.iteration}.npz'), trigger=(FLAGS.snapshot_interval, 'iteration')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.iteration}.npz'), trigger=(FLAGS.snapshot_interval, 'iteration')) trainer.extend( extensions.ProgressBar(training_length=(updater.total_iteration, 'iteration'), update_interval=1)) trainer.extend(sample_generate_light(generator, mapping, FLAGS.out, rows=8, cols=8), trigger=(FLAGS.evaluation_sample_interval, 'iteration'), priority=extension.PRIORITY_WRITER) if running_helper.keep_smoothed_gen: trainer.extend(sample_generate_light(smoothed_generator, smoothed_mapping, FLAGS.out, rows=8, cols=8, subdir='preview_smoothed'), trigger=(FLAGS.evaluation_sample_interval, 'iteration'), priority=extension.PRIORITY_WRITER) report_keys = [ 'iteration', 'elapsed_time', 'stage', 'batch_size', 'image_size', 'gen/loss_adv', 'dis/loss_adv', 'dis/loss_gp' ] if FLAGS.fid_interval > 0: report_keys += 'FID' fidapi = FIDAPI(FLAGS.fid_clfs_type, FLAGS.fid_clfs_path, gpu=running_helper.device, load_real_stat=FLAGS.fid_real_stat) trainer.extend(fid_extension(fidapi, batch_generate_func( generator, mapping, trainer), seed=FLAGS.seed, report_key='FID'), trigger=(FLAGS.fid_interval, 'iteration')) if running_helper.keep_smoothed_gen: report_keys += 'S_FID' trainer.extend(fid_extension(fidapi, batch_generate_func( smoothed_generator, smoothed_mapping, trainer), seed=FLAGS.seed, report_key='S_FID'), trigger=(FLAGS.fid_interval, 'iteration')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(FLAGS.display_interval, 'iteration'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(FLAGS.display_interval, 'iteration')) # Recover if possible if FLAGS.get_model_from_interation != '': resume_iteration_str = FLAGS.get_model_from_interation print('Resume from {}'.format(resume_iteration_str)) for model, model_name in zip(models, model_names): chainer.serializers.load_npz( FLAGS.out + '/' + model_name + '_%s.npz' % resume_iteration_str, model, ) chainer.serializers.load_npz( FLAGS.out + '/' + 'snapshot_iter_%s.npz' % resume_iteration_str, trainer, ) elif FLAGS.auto_resume: print("Auto Resume") candidates = [] auto_resume_dir = FLAGS.auto_resume_dir if FLAGS.auto_resume_dir != '' else FLAGS.out for fname in [ f for f in os.listdir(auto_resume_dir) if f.startswith('Generator_') and f.endswith('.npz') ]: fname = re.sub(r'^Generator_', '', fname) fname = re.sub('\.npz$', '', fname) fname_int = None try: fname_int = int(fname) except ValueError: pass if fname_int is not None: all_model_exist = True for m in model_names: if not os.path.exists(auto_resume_dir + '/' + m + '_' + fname + '.npz'): all_model_exist = False if not os.path.exists(auto_resume_dir + '/' + ('snapshot_iter_%s.npz' % fname)): all_model_exist = False if all_model_exist: candidates.append(fname) #print(candidates) candidates.sort(key=lambda _: int(_), reverse=True) if len(candidates) > 0: resume_iteration_str = candidates[0] else: resume_iteration_str = None if resume_iteration_str is not None: print('Automatic resuming: use iteration %s' % resume_iteration_str) for model, model_name in zip(models, model_names): chainer.serializers.load_npz( auto_resume_dir + '/' + model_name + '_%s.npz' % resume_iteration_str, model, ) chainer.serializers.load_npz( auto_resume_dir + '/' + 'snapshot_iter_%s.npz' % resume_iteration_str, trainer, ) # Run the training if FLAGS.enable_cuda_profiling: with cupy.cuda.profile(): trainer.run() else: #with chainer.using_config('debug', True): trainer.run() for model, model_name in zip(models, model_names): chainer.serializers.save_npz( FLAGS.out + '/' + model_name + '_latest.npz', model)
def del_all_flags(FLAGS): flags_dict = FLAGS._flags() keys_list = [keys for keys in flags_dict] for keys in keys_list: FLAGS.__delattr__(keys)
import argparse import importlib parser = argparse.ArgumentParser() parser.add_argument('experiment', type=str) parser.add_argument('function', type=str) if __name__ == "__main__": from config import FLAGS args = parser.parse_args() FLAGS.initialize(args.experiment) FLAGS._define('FUNCTION', args.function) m = importlib.import_module(args.function) m.run()
def clear_all_flags(): for k in FLAGS.flag_values_dict(): delattr(FLAGS, k)
from tqdm import tqdm from config import FLAGS from tfsolver import TFSolver from dataset import DatasetFactory from learning_rate import LRFactory from network_ae import * from ocnn import * tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) if len(sys.argv) < 2: print('Usage: python run_cls.py config.ymal') # update FLAGS config_file = sys.argv[1] FLAGS.merge_from_file(config_file) FLAGS.freeze() # backup the config file if not os.path.exists(FLAGS.SOLVER.logdir): os.makedirs(FLAGS.SOLVER.logdir) shutil.copy2(config_file, FLAGS.SOLVER.logdir) # define the graph def compute_graph(training=True, reuse=False): FLAGSD = FLAGS.DATA.train if training else FLAGS.DATA.test with tf.name_scope('dataset'): dataset = DatasetFactory(FLAGSD) octree, label = dataset() code = octree_encoder(octree, FLAGS.MODEL, training, reuse)
def save_train_val_info(self, train_costs, train_times, val_results_dict): sfn = '{}/train_val_info'.format(self.logdir) flags = FLAGS.flag_values_dict() ts = get_ts() save_as_dict(sfn, train_costs, train_times, val_results_dict, flags, ts)