Exemple #1
0
    def run_model(criterion, model, data, target,
                  process_model=identity, process_output=identity, process_loss=identity, optimizer=None):
        process_model(model)
        if use_dtr:
            data = data.checkpoint()
            target = target.checkpoint()
        output = model(data)
        process_output(output)
        loss = criterion(output, target)
        process_loss(loss)
        if optimizer:
            optimizer.zero_grad()
        if use_dtr:
            torch.annotate_log('BACKWARD')
        loss.backward()
        # we are not actually using the loss here
        # but a real training loop would so we have to decheckpoint
        if use_dtr:
            data = data.decheckpoint()
            loss = loss.decheckpoint()
            target = target.decheckpoint()

        if optimizer:
            optimizer.step()

        # we include these deletions for generating logs;
        # these will ensure these fields are deallocated
        # before the end of the log, so anything still live
        # will be a gradient or weight
        del data
        del loss
        del target
Exemple #2
0
    def run_model(criterion, model, data, target,
                  process_model=identity, process_output=identity, process_loss=identity, optimizer=None):
        process_model(model)
        if use_dtr:
            data = data.checkpoint()
            target = target.checkpoint()
        output = model(data)
        process_output(output)
        loss = criterion(output, target)
        process_loss(loss)
        if optimizer:
            optimizer.zero_grad()
        if use_dtr:
            torch.annotate_log('BACKWARD')
        loss.backward()
        # we are not actually using the loss here
        # but a real training loop would so we have to decheckpoint
        if use_dtr:
            data = data.decheckpoint()
            target = target.decheckpoint()
            loss = loss.decheckpoint()

        if optimizer:
            optimizer.step()

        del data
        del target
        del loss
        del output
    def run_model(criterion,
                  model,
                  ltree,
                  linput,
                  rtree,
                  rinput,
                  target,
                  process_model=identity,
                  process_output=identity,
                  process_loss=identity,
                  optimizer=None):
        process_model(model)
        if use_dtr:
            linput = linput.checkpoint()
            rinput = rinput.checkpoint()
            target = target.checkpoint()
        output = model(ltree, linput, rtree, rinput)
        process_output(output)
        loss = criterion(output, target)
        process_loss(loss)
        if use_dtr:
            torch.annotate_log('BACKWARD')
        loss.backward()
        if use_dtr:
            loss.decheckpoint()
            linput.decheckpoint()
            rinput = rinput.decheckpoint()
            target = target.decheckpoint()

        del linput
        del rinput
        del target
        del loss
        def run_model(criterion,
                      model,
                      hidden,
                      data,
                      targets,
                      process_model=identity,
                      process_output=identity,
                      process_loss=identity,
                      optimizer=None):
            process_model(model)
            if use_dtr:
                data = data.checkpoint()
                targets = targets.checkpoint()

            output, new_hidden = model(data, hidden)
            process_output(output)
            loss = criterion(output.view(-1, ntokens), targets)
            process_loss(loss)
            if use_dtr:
                torch.annotate_log('BACKWARD')
            loss.backward()
            wlm.main.repackage_hidden(new_hidden)
            # we are not actually using the loss here
            # but a real training loop would so we have to decheckpoint
            if use_dtr:
                loss = loss.decheckpoint()
    def timing_loop(model_name, i, dry_run, n_reps, config, use_dtr,
                    specific_params, extra_params, results_queue,
                    heartbeat_queue):
        measurements = []
        print(f'Running {model_name} : {specific_params}')

        # remove any logs hanging around (so we only have to look for one)
        delete_logs()

        # we only save logs for the final input on DTR
        save_log = use_dtr and specific_params.get(
            'save_logs', config['save_logs']) and i == config['n_inputs'] - 1
        if use_dtr:
            torch.toggle_log(False)

        batch_size = specific_params['batch_size']
        use_profiling = use_dtr and specific_params.get('use_profiling', False)
        use_cudnn = model_util.use_cudnn(model_name)

        with torch.backends.cudnn.flags(enabled=use_cudnn,
                                        benchmark=use_cudnn):
            produce_model, gen_input, run_model, teardown = model_util.prepare_model(
                model_name, batch_size, use_dtr=use_dtr)
            criterion = model_util.get_criterion(model_name)
            inp = gen_input(i, extra_params)

            if use_profiling:
                torch.toggle_profile(use_profiling)

            progress = tqdm(range(dry_run + n_reps))
            for j in progress:
                progress.set_description(
                    f'Rep [{j}]' + '' if j > dry_run else f'Dry run [{j}]')
                gc.collect()
                # Annotate where the final run starts in the log
                if save_log and j == dry_run + n_reps - 1:
                    torch.toggle_log(True)
                    torch.annotate_log('START')

                try:
                    res = run_single_measurement(model_name, produce_model,
                                                 run_model, teardown, inp,
                                                 criterion, extra_params,
                                                 use_dtr, use_profiling)
                except RuntimeError as e:
                    heartbeat_queue.put((False, 0))
                    raise e
                heartbeat_queue.put((True, res["time"]))
                if j >= dry_run:
                    results_queue.put(res)
Exemple #6
0
    def run_model(criterion, model, ltree,
                  process_model=identity, process_output=identity, process_loss=identity, optimizer=None):
        if use_dtr:
            ltree.map_(lambda x: x.detach().checkpoint())

        output = model(ltree)
        output = torch.sum(output)
        if use_dtr:
            torch.annotate_log('BACKWARD')
        output.backward()

        if use_dtr:
            output = output.decheckpoint()
            ltree.map_(lambda x: x.decheckpoint())

        del output
        del ltree
Exemple #7
0
        def run_model(criterion, model, data, targets,
                      process_model=identity, process_output=identity, process_loss=identity, optimizer=None):
            process_model(model)
            if use_dtr:
                data = data.checkpoint()
                targets = targets.checkpoint()
            output = model(data)
            process_output(output)
            loss = criterion(output.view(-1, ntokens), targets)
            process_loss(loss)
            if use_dtr:
                torch.annotate_log('BACKWARD')
            loss.backward()

            del data
            del targets
            del loss
Exemple #8
0
    def run_model(criterion, model, data,
                  process_model=identity, process_output=identity, process_loss=identity, optimizer=None):
        # process_model(model)
        # target = torch.squeeze(target)
        if use_dtr:
            data = list(map(lambda x: x.checkpoint(), data))

        output = model(data)
        output = torch.sum(output[-1])
        if use_dtr:
            torch.annotate_log('BACKWARD')
        output.backward()
        # we are not actually using the loss here
        # but a real training loop would so we have to decheckpoint
        if use_dtr:
            data = list(map(lambda x: x.decheckpoint(), data))

        del output
        del data
Exemple #9
0
def timing_loop(model_name,
                i,
                config,
                use_dtr,
                specific_params,
                writer,
                trial_run=False,
                trial_run_outfile=None,
                memory_budget=-1.0):
    dry_run = config['dry_run']
    measurements = []
    print(f'Running {model_name} : {specific_params}')

    # remove any logs hanging around (so we only have to look for one)
    delete_logs()

    # we only save logs for the final input on DTR
    save_log = use_dtr and specific_params.get(
        'save_logs', config['save_logs']) and i == config['n_inputs'] - 1
    if use_dtr:
        torch.toggle_log(False)

    # whether to report profiling info
    use_profiling = use_dtr and specific_params.get('use_profiling', False)

    use_cudnn = model_util.use_cudnn(model_name)
    with torch.backends.cudnn.flags(enabled=use_cudnn, benchmark=use_cudnn):
        criterion = model_util.get_criterion(model_name)
        produce_model, gen_input, run_model, teardown = model_util.prepare_model(
            model_name, specific_params['batch_size'], use_dtr=use_dtr)
        inp = gen_input(i, specific_params.get('extra_params', dict()))

        n_reps = specific_params.get('n_reps', config['n_reps'])

        if use_profiling:
            torch.toggle_profile(use_profiling)

        progress = tqdm(range(dry_run + n_reps))
        for j in progress:
            progress.set_description(f'Rep [{j}]' +
                                     '' if j > dry_run else f'Dry run [{j}]')
            gc.collect()
            # Annotate where the final run starts in the log
            if save_log and j == dry_run + n_reps - 1:
                torch.toggle_log(True)
                torch.annotate_log('START')

            res = run_single_measurement(model_name,
                                         produce_model,
                                         run_model,
                                         teardown,
                                         inp,
                                         criterion,
                                         extra_params=specific_params.get(
                                             'extra_params', dict()),
                                         use_dtr=use_dtr,
                                         use_profiling=use_profiling)
            if j >= dry_run:
                measurements.append(res)

        # Dump results
        model_name_replace_dict = {
            'tv_resnet152': 'resnet152',
            'tv_resnet50': 'resnet50',
        }

        train_ips_list = []
        batch_size = None
        for res in measurements:
            batch_size = res['batch_size']
            train_ips_list.append(res['ips'])

        out_file = "speed_results.tsv"
        with open(out_file, "a") as fout:
            val_dict = {
                'network': model_name_replace_dict.get(model_name, model_name),
                'algorithm': 'dtr',
                'budget': specific_params['memory_budget'],
                'batch_size': batch_size,
                'ips': np.median(train_ips_list) if train_ips_list else -1,
            }
            print(val_dict)
            fout.write(json.dumps(val_dict) + "\n")
        print(f"save results to {out_file}")

    # write to csv file only when this trial is not
    # for getting a baseline memory usage
    if trial_run:
        write_json(
            os.getcwd(), trial_run_outfile,
            {'mem': max(map(lambda data: data['total_mem'], measurements))})
        return

    if save_log:
        save_trial_log(config['log_dest'],
                       config.get('simrd_config', None),
                       model_name,
                       specific_params,
                       is_baseline=specific_params['memory_budget'] == -1)

    # clean up after ourselves
    delete_logs()

    # do all the writing after the trial is over
    for j in range(len(measurements)):
        data = measurements[j]
        # do unit conversions now: times in ms,
        # memory in MB
        writer.writerow({
            'time': data['time'] * 1e3,
            'sync_time': data['sync_time'] * 1e3,
            # pytorch's cuda elapsed time is already in ms
            'gpu_time': float(data['gpu_time']),
            # 'cuda_time' : float(data['cuda_time']) * 1e-6,
            'input_mem': data['input_mem'] * 1e-6,
            'model_mem': data['model_mem'] * 1e-6,
            'total_mem': data['total_mem'] * 1e-6,
            'memory_budget': memory_budget,
            # profiling (reported in nanoseconds)
            'base_compute_time': data['base_compute_time'] * 1e-6,
            'remat_compute_time': data['remat_compute_time'] * 1e-6,
            'search_time': data['search_time'] * 1e-6,
            'cost_time': data['cost_time'] * 1e-6,
            'rep': j - dry_run,
            'input': i,
            **specific_params
        })
Exemple #10
0
def timing_loop(model_name,
                i,
                config,
                use_dtr,
                specific_params,
                writer,
                trial_run=False,
                trial_run_outfile=None):
    dry_run = config['dry_run']
    measurements = []
    print(f'Running {model_name} : {specific_params}')

    # remove any logs hanging around (so we only have to look for one)
    delete_logs()

    # we only save logs for the final input on DTR
    save_log = use_dtr and config['save_logs'] and i == config['n_inputs'] - 1
    if use_dtr:
        torch.toggle_log(False)

    use_cudnn = model_util.use_cudnn(model_name)
    with torch.backends.cudnn.flags(enabled=use_cudnn, benchmark=use_cudnn):
        criterion = model_util.get_criterion(model_name)
        produce_model, gen_input, run_model, teardown = model_util.prepare_model(
            model_name, specific_params['batch_size'], use_dtr=use_dtr)
        inp = gen_input(i, specific_params.get('extra_params', dict()))

        progress = tqdm(range(dry_run + config['n_reps']))
        for j in progress:
            progress.set_description(f'Rep [{j}]' +
                                     '' if j > dry_run else f'Dry run [{j}]')
            gc.collect()
            # Annotate where the final run starts in the log
            if save_log and j == dry_run + config['n_reps'] - 1:
                torch.toggle_log(config['save_logs'])
                torch.annotate_log('START')

            res = run_single_measurement(model_name,
                                         produce_model,
                                         run_model,
                                         teardown,
                                         inp,
                                         criterion,
                                         extra_params=specific_params.get(
                                             'extra_params', dict()),
                                         use_dtr=use_dtr)
            if j >= dry_run:
                measurements.append(res)

    # write to csv file only when this trial is not
    # for getting a baseline memory usage
    if trial_run:
        write_json(
            os.getcwd(), trial_run_outfile,
            {'mem': max(map(lambda data: data['total_mem'], measurements))})
        return

    if save_log:
        save_trial_log(config['log_dest'], model_name, specific_params)

    # clean up after ourselves
    delete_logs()

    # do all the writing after the trial is over
    for j in range(len(measurements)):
        data = measurements[j]
        # do unit conversions now: times in ms,
        # memory in MB
        writer.writerow({
            'time': data['time'] * 1e3,
            # pytorch's cuda elapsed time is already in ms
            'gpu_time': float(data['gpu_time']),
            # 'cuda_time' : float(data['cuda_time']) * 1e-6,
            'input_mem': data['input_mem'] * 1e-6,
            'model_mem': data['model_mem'] * 1e-6,
            'total_mem': data['total_mem'] * 1e-6,
            'rep': j - dry_run,
            'input': i,
            **specific_params
        })