def get_output_path(args): if args.model.suffix == '.pt': model_path = args.model else: serializer = Serializer(args.model) model_path = serializer._id2path(args.step) return args.output / (model_path.stem + '.pkl')
def preprocess_args(args): args.output = get_output_path(args) args.is_temporary_model = True f = tempfile.NamedTemporaryFile(suffix='.pt', delete=False) Serializer(args.model).finalize(args.step, f.name, map_location=args.device) args.model = Path(f.name) f.close() return args
def process_all(args): args.__dict__.pop('step', None) serializer = Serializer(args.model) all_args = [ SimpleNamespace(step=s, **args.__dict__) for s in serializer.list_known_steps() ] with multiprocessing.Pool(args.tests_per_gpu) as p: GPUPool(p, args.gpus, args.tests_per_gpu)(process_single, all_args) writer = torch.utils.tensorboard.SummaryWriter(args.output / 'log') for step_args in all_args: samples_passed = get_samples_passed(step_args) with get_output_path(step_args).open('rb') as f: results = pickle.load(f) for result in results: tag = f'{result.dataset}/{result.sequence}/{result.step}/' \ f'{result.start}/{result.stop}' writer.add_scalar(f'Test/mean AEE/{tag}', result.mAEE, samples_passed) writer.add_scalar(f'Test/mean %AEE/{tag}', result.mpAEE * 100, samples_passed)
from utils.text import gather_documents from utils.config import Config from utils.serializer import Serializer import os config = Config("./settings.yml") data_dir = config.get("data_dir") output_dir = config.get("output_dir") stats_dir = config.get("stats_dir") def clean(): print("Removing all files in the output folder") output_files = os.listdir(output_dir) for file_name in output_files: os.remove(output_dir + file_name) stats_files = os.listdir(stats_dir) for file_name in stats_files: os.remove(stats_dir + file_name) if __name__ == "__main__": clean() documents = gather_documents(data_dir) indexer = Indexer(documents) indexer.run() merger = Merger() merger.run() serializer = Serializer() serializer.pickle_to_txt() serializer.wrap_up()
def main(): # torch.autograd.set_detect_anomaly(True) args = parse_args(sys.argv[1:]) device = torch.device(args.device) if device.type == 'cuda': torch.cuda.set_device(device) if args.timers: timers = SynchronizedWallClockTimer() else: timers = FakeTimer() model = init_model(args, device) serializer = Serializer(args.model, args.num_checkpoints, args.permanent_interval) args.do_not_continue = (args.do_not_continue or len(serializer.list_known_steps()) == 0) last_step = (0 if args.do_not_continue else serializer.list_known_steps()[-1]) optimizer, scheduler = construct_train_tools(args, model, passed_steps=last_step) losses = init_losses(args.shape, args.bs, model, device, sequence_length=args.prefix_length + args.suffix_length + 1, timers=timers) # allow only manual flush logger = SummaryWriter(str(args.log_path), max_queue=100000000, flush_secs=100000000) periodic_hooks, hooks = create_hooks(args, model, optimizer, losses, logger, serializer) if not args.do_not_continue: global_step, state = serializer.load_checkpoint(model, last_step, optimizer=optimizer, device=device) samples_passed = state.pop('samples_passed', global_step * args.bs) else: global_step = 0 samples_passed = 0 hooks['serialization'](global_step, samples_passed) loader = get_dataloader(get_trainset_params(args), sample_idx=samples_passed, process_only_once=False) if not args.skip_validation: hooks['validation'](global_step, samples_passed) with Profiler(args.profiling, args.model/'profiling'), \ GPUMonitor(args.log_path): train(model, device, loader, optimizer, args.training_steps, scheduler=scheduler, evaluator=losses, logger=logger, weights=args.loss_weights, is_raw=args.is_raw, accumulation_steps=args.accum_step, timers=timers, hooks=periodic_hooks, init_step=global_step, init_samples_passed=samples_passed, max_events_per_batch=args.max_events_per_batch) samples = samples_passed + (args.training_steps - global_step) * args.bs hooks['serialization'](args.training_steps, samples) if not args.skip_validation: hooks['validation'](args.training_steps, samples)
def __init__(self, documents): self.documents = documents self.serializer = Serializer() self.tokenizer = Tokenizer(self.stoplist_path, self.stemmer)
def get_samples_passed(args): serializer = Serializer(args.model) model_path = serializer._id2path(args.step) data = torch.load(model_path, map_location='cpu') return data.get('samples_passed', data['global_step'] * args.bs)
def main(): # torch.autograd.set_detect_anomaly(True) args = parse_args() device = torch.device(args.device) torch.cuda.set_device(device) if args.timers: timers = SynchronizedWallClockTimer() else: timers = FakeTimer() model = init_model(args, device) loader = get_dataloader(get_trainset_params(args)) serializer = Serializer(args.model, args.num_checkpoints, args.permanent_interval) args.do_not_continue = (args.do_not_continue or len(serializer.list_known_steps()) == 0) last_step = (0 if args.do_not_continue else serializer.list_known_steps()[-1]) optimizer, scheduler = construct_train_tools(args, model, passed_steps=last_step) losses = init_losses(get_resolution(args), args.bs, model, device, timers=timers) logger = SummaryWriter(str(args.log_path)) periodic_hooks, hooks = create_hooks(args, model, optimizer, losses, logger, serializer) if not args.do_not_continue: global_step, state = serializer.load_checkpoint(model, last_step, optimizer=optimizer, device=device) samples_passed = state.pop('samples_passed', global_step * args.bs) else: global_step = 0 samples_passed = 0 hooks['serialization'](global_step, samples_passed) hooks['validation'](global_step, samples_passed) with Profiler(args.profiling, args.model / 'profiling'): train(model, device, loader, optimizer, args.training_steps, scheduler=scheduler, evaluator=losses, logger=logger, weights=args.loss_weights, is_raw=args.is_raw, accumulation_steps=args.accum_step, timers=timers, hooks=periodic_hooks, init_step=global_step, init_samples_passed=samples_passed) samples = samples_passed + (args.training_steps - global_step) * args.bs hooks['serialization'](args.training_steps, samples) hooks['validation'](args.training_steps, samples)