예제 #1
0
 def infer(self, model_path, save_path):
     loss = []
     self.create_dataloader()
     if not torch.cuda.is_available():
         state_dict = torch.load(model_path,
                                 map_location=torch.device('cpu'))
     else:
         state_dict = torch.load(model_path)
     self.model.load_state_dict(state_dict['model_state_dict'])
     self.optimizer.load_state_dict(state_dict['optimizer_state_dict'])
     self.model.eval()
     with torch.no_grad():
         for i, batch in enumerate(self.dataloader):
             if i % 10 == 0 and i > 0:
                 print('{}: Inferred on {} events'.format(
                     get_time(), i * self.config['val_batch_size']))
             x = batch[0].to(self.device).float()
             y = batch[1].to(self.device).float()
             events = batch[2].to(self.device)
             y_hat = self.model.forward(x)
             loss.append(self.loss(y_hat, y))
             self.on_test_step(y_hat, events)
             # if i == 12:
             #     break
         self.on_test_end()
예제 #2
0
 def early_stopping(self, epoch, epoch_val_loss, model_state_dict,
                    optimizer_state_dict):
     epoch_val_loss = round(epoch_val_loss.item(), 3)
     if epoch == 0 or epoch_val_loss < min(self.epoch_val_loss):
         best_val_loss = epoch_val_loss
         self.save_model_state(epoch, model_state_dict,
                               optimizer_state_dict)
         self.early_stopping_counter = 0
         print('{}: best model yet, saving'.format(get_time()))
     else:
         self.early_stopping_counter += 1
         print('{}: model didn\'t improve for {} epoch(s)'.format(
             get_time(), self.early_stopping_counter))
     self.epoch_val_loss.append(epoch_val_loss)
     if self.early_stopping_counter >= self.config['patience']:
         return True
     else:
         return False
예제 #3
0
 def fit(self):
     self.create_dataloaders()
     for self.epoch in range(self.config['num_epochs']):
         self.reporter.on_epoch_start()
         self.train_epoch()
         epoch_val_loss = self.epoch_validation()
         make_early_stop = self.saver.early_stopping(
             self.epoch, epoch_val_loss, self.model.state_dict(),
             self.optimizer.state_dict())
         self.reporter.on_epoch_end()
         if make_early_stop:
             print('{}: early stopping activated'.format(get_time()))
             break
예제 #4
0
    def on_intermediate_training_end(self):
        self.avg_train_loss = torch.stack(self.train_loss).mean()
        self.training_end_timestamp = datetime.now()
        self.train_time_delta = (
            self.training_end_timestamp -
            self.training_start_timestamp).total_seconds()
        log_text = ('''
{}: Iteration {} / epoch {}
        Train loss: {:.3f} / {} batches / {:.1f} events/s
                '''.format(
            get_time(),
            self.iteration,
            self.current_epoch,
            self.avg_train_loss,
            len(self.train_loss),
            len(self.train_loss) * self.config['batch_size'] /
            self.train_time_delta,
        ))
        print(log_text)
        return self.avg_train_loss
예제 #5
0
    def on_epoch_validation_end(self):
        self.val_end_timestamp = datetime.now()
        self.val_time_delta = (self.val_end_timestamp -
                               self.val_start_timestamp).total_seconds()
        self.iteration += 1
        avg_val_loss = torch.stack(self.val_loss).mean()
        log_text = ('''
{}: Epoch validation / epoch {}
        Val loss:   {:.3f} / {} batches / {:.1f} events/s
                '''.format(
            get_time(), self.current_epoch, avg_val_loss, len(self.val_loss),
            len(self.val_loss) * self.config['val_batch_size'] /
            self.val_time_delta))
        print(log_text)
        self.train_loss = []
        self.training_step = 0
        self.val_loss = []
        self.val_step = 0
        self.first_train = True
        self.first_val = True
        return avg_val_loss
예제 #6
0
from src.modules.inferer import Inferer

RUN_NAME = 'lemon-akita'
ROOT = get_project_root()
DOWNLOAD_FOLDER = ROOT.joinpath('mains/downloads').joinpath(RUN_NAME)
DOWNLOAD_FOLDER.mkdir(exist_ok=True)

api = wandb.Api()

runs = api.runs('ehrhorn/cubeflow')
for run in runs:
    if run.name == RUN_NAME:
        run_id = run.id

run = api.run('ehrhorn/cubeflow/' + run_id)
print('{}: Downloading run data'.format(get_time()))
for file in run.files():
    if file.name == 'model.pt' or file.name.split(
            '.')[-1] == 'py' or file.name == 'cnn.json':
        if file.name != 'code/mains/cnn.py':
            file.download(replace=True, root=str(DOWNLOAD_FOLDER))
            if file.name != 'model.pt':
                model_file_name = file.name.split('/')[-1].split('.')[0]

JSON_FILE = DOWNLOAD_FOLDER.joinpath('cnn.json')
with open(str(JSON_FILE), 'r') as config_file:
    config_dict = json.load(config_file)
config = Bunch(config_dict)
config.save_train_dists = False
config.wandb = False
예제 #7
0
    def on_epoch_start(self):
        print('''
{}: {}: beginning epoch {}
            '''.format(get_time(), self.experiment_name, self.current_epoch))
예제 #8
0
파일: infer.py 프로젝트: DLReseach/CubeFlow
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--run', help='run name')
    args = parser.parse_args()
    experiment_name = args.run

    test_set_transformed_path = Path().home().joinpath('CubeFlowData').joinpath('dbs').joinpath('test_transformed.db')

    dirs, config = get_dirs_and_config(experiment_name, False)

    errors_db_path = dirs['dbs'].joinpath('errors.db')
    predictions_db_path = dirs['dbs'].joinpath('predictions.db')

    mask_and_split = MaskAndSplit(config, dirs, ['test'])
    sets = mask_and_split.split()

    config['val_batch_size'] = 2000

    Loader = getattr(importlib.import_module('src.dataloaders.' + config['dataloader']), 'Dataloader')

    if 'SRTInIcePulses' in '-'.join(config['masks']):
        config['cleaning'] = 'SRTInIcePulses'
        config['cleaning_length'] = 'srt_in_ice_pulses_event_length'
    elif 'SplitInIcePulses' in '-'.join(config['masks']):
        config['cleaning'] = 'SplitInIcePulses'
        config['cleaning_length'] = 'split_in_ice_pulses_event_length'

    sets['test'] = sets['test'][0:20000]

    dataset = Loader(
        sets['test'],
        config,
        test_set_transformed_path,
        test=True
    )

    events = [item for sublist in dataset.events for item in sublist]

    if not dirs['dbs'].joinpath('predictions.db').is_file():
        print('{}: First run with these masks; saving truth and retro_crs_prefit to prediction db'.format(get_time()))
        TruthSaver(config, dirs, events)

    Loss = getattr(importlib.import_module('src.losses.losses'), config['loss'])
    loss_init = Loss()
    loss = loss_init.loss
    Model = getattr(importlib.import_module('src.models.' + config['model']), 'Model')
    model = Model()
    Optimizer = getattr(importlib.import_module('src.optimizers.optimizers'), config['optimizer'])
    optimizer_init = Optimizer(model.parameters(), config['min_learning_rate'])
    optimizer = optimizer_init.optimizer

    inferer = Inferer(model, optimizer, loss, dataset, config, experiment_name, dirs)
    model_path = dirs['run'].joinpath('model.pt')

    print('{}: Beginning inference'.format(get_time()))
    inferer.infer(model_path, dirs['run'])

    print('{}: Beginning error calculation'.format(get_time()))
    if not dirs['dbs'].joinpath('errors.db').is_file():
        print('{}: First run with these masks; calculating retro_crs_prefit errors'.format(get_time()))
        ErrorCalculator('retro_crs_prefit', dirs)
    ErrorCalculator(experiment_name, dirs)

    print('{}: Beginning histogram calculation'.format(get_time()))
    HistogramCalculator(experiment_name, dirs)

    print('{}: Script done!'.format(get_time()))
예제 #9
0
def calculate_and_plot(files_and_dirs,
                       dom_plots=False,
                       use_train_dists=False,
                       only_use_metrics=None,
                       legends=True,
                       reso_hists=False,
                       use_own=True,
                       reporter=None,
                       wandb=False):

    first_metric_plot = True

    file_name = files_and_dirs['run_root'].joinpath(
        'error_dataframe_parquet.gzip')
    errors_df = pd.read_parquet(file_name, engine='fastparquet')

    errors_df = errors_df[errors_df.energy <= 3.0]
    # comparison_df.energy = 10**comparison_df.energy.values

    if use_train_dists:
        TRAIN_DATA_DF_FILE = files_and_dirs['project_root'].joinpath(
            'train_distributions/train_dists_parquet.gzip')
        train_data_df = pd.read_parquet(TRAIN_DATA_DF_FILE,
                                        engine='fastparquet')
        train_data_df = train_data_df[train_data_df.train_true_energy <= 3.0]

    if only_use_metrics is not None:
        errors_df = errors_df[errors_df.metric.isin(only_use_metrics)]

    PLOTS_DIR = files_and_dirs['run_root'].joinpath('plots')
    PLOTS_DIR.mkdir(exist_ok=True)
    RESO_PLOTS_DIR = PLOTS_DIR.joinpath('resolution_plots')
    RESO_PLOTS_DIR.mkdir(exist_ok=True)

    errors_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    errors_df.dropna(inplace=True)

    energy_bins = calculate_energy_bins(errors_df)
    dom_bins = calculate_dom_bins(errors_df)

    metrics = [
        metric.replace('own_', '').replace('_error', '')
        for metric in errors_df.keys() if not metric.find('own')
    ]

    print('{}: Calculating performance data for energy bins'.format(
        get_time()))
    performance_data = PerformanceData(metrics,
                                       df=errors_df,
                                       bins=energy_bins,
                                       bin_type='energy',
                                       percentiles=[0.16, 0.84],
                                       use_own=use_own)

    file_name = files_and_dirs['run_root'].joinpath(
        'own_performance_energy_binned_dataframe_parquet.gzip')
    performance_data.own_performances_df.to_parquet(str(file_name),
                                                    compression='gzip')
    file_name = files_and_dirs['run_root'].joinpath(
        'opponent_performance_energy_binned_dataframe_parquet.gzip')
    performance_data.opponent_performances_df.to_parquet(str(file_name),
                                                         compression='gzip')

    for metric in metrics:
        print('{}: Plotting {} metric, binned in energy'.format(
            get_time(), metric))
        fig, markers_own = comparison_plot(
            metric, performance_data, train_data_df.train_true_energy.values
            if use_train_dists else None, legends)
        file_name = PLOTS_DIR.joinpath('{}_{}_reso_comparison.pdf'.format(
            'energy_bins', metric))
        fig.savefig(file_name)
        if wandb:
            buf = io.BytesIO()
            fig.savefig(buf, format='png')
            buf.seek(0)
            im = Image.open(buf)
            log_text = '{} resolution plot'.format(metric.title())
            reporter.add_plot_to_wandb(im, log_text)
            buf.close()
            log_text = '{} resolution comparison'.format(metric.title())
            reporter.add_metric_comparison_to_wandb(markers_own, log_text)
        plt.close(fig)
        fig = icecube_2d_histogram(metric, performance_data, legends)
        file_name = PLOTS_DIR.joinpath('{}_{}_ic_comparison.pdf'.format(
            'energy_bins', metric))
        fig.savefig(file_name)
        if wandb:
            buf = io.BytesIO()
            fig.savefig(buf, format='png')
            buf.seek(0)
            im = Image.open(buf)
            log_text = '{} IceCube histogram'.format(metric.title())
            reporter.add_plot_to_wandb(im, log_text)
            buf.close()
        plt.close(fig)
        if reso_hists:
            for i, ibin in enumerate(energy_bins):
                indexer = errors_df.energy_binned == ibin
                fig = plot_error_in_bin(
                    errors_df[indexer]['own_' + metric + '_error'].values,
                    errors_df[indexer]['opponent_' + metric + '_error'].values,
                    metric, ibin, 'energy', legends)
                file_name = RESO_PLOTS_DIR.joinpath(
                    '{}_{}_resolution_in_bin_{:02d}.pdf'.format(
                        'energy_bins', metric, i))
                fig.savefig(file_name)
                if wandb:
                    reporter.save_file_to_wandb(str(file_name))
                plt.close(fig)

    if dom_plots:
        print('{}: Calculating performance data for DOM bins'.format(
            get_time()))
        performance_data = PerformanceData(metrics,
                                           df=errors_df,
                                           bins=dom_bins,
                                           bin_type='doms',
                                           percentiles=[0.16, 0.84],
                                           use_own=use_own)
        for metric in metrics:
            print('{}: Plotting {} metric, binned in DOMs'.format(
                get_time(), metric))
            fig, markers_own = comparison_plot(
                metric, performance_data,
                train_data_df.train_event_length.values
                if use_train_dists else None, legends)
            file_name = PLOTS_DIR.joinpath('{}_{}_reso_comparison.pdf'.format(
                'dom_bins', metric))
            fig.savefig(file_name)
            if wandb:
                reporter.save_file_to_wandb(str(file_name))
            plt.close(fig)
            if reso_hists:
                for i, ibin in enumerate(dom_bins):
                    indexer = errors_df.doms_binned == ibin
                    fig = plot_error_in_bin(
                        errors_df[indexer]['own_' + metric + '_error'].values,
                        errors_df[indexer]['opponent_' + metric +
                                           '_error'].values, metric, ibin,
                        'dom', legends)
                    file_name = RESO_PLOTS_DIR.joinpath(
                        '{}_{}_resolution_in_bin_{:02d}.pdf'.format(
                            'dom_bins', metric, i))
                    fig.savefig(file_name)
                    if wandb:
                        reporter.save_file_to_wandb(str(file_name))
                    plt.close(fig)
예제 #10
0
PICKLE_DIRS = [
    directory for directory in DATA_PATH.iterdir() if directory.is_dir()
]

shelve_file_exists = SHELVE_DATA_FILE.is_file()

if shelve_file_exists:
    with shelve.open(str(SHELVE_NAME), 'r') as f:
        EXISTING_EVENTS = list(f.keys())
else:
    EXISTING_EVENTS = []

for directory in PICKLE_DIRS:
    print(
        '{}: Handling directory {}'.format(
            get_time(),
            directory.stem
        )
    )

    time_start = datetime.datetime.now()

    files = [
        file for file in directory.glob('**/*') if file.suffix == '.pickle'
    ]

    with shelve.open(str(SHELVE_NAME), 'c') as db:
        for file in files:
            if file.stem not in EXISTING_EVENTS:
                with open(file, 'rb') as f:
                    db[file.stem] = pickle.load(f)
예제 #11
0
파일: main.py 프로젝트: DLReseach/CubeFlow
def main():
    experiment_name = create_experiment_name(slug_length=2)
    dirs, config = get_dirs_and_config(experiment_name, True)

    if socket.gethostname() == 'air.local':
        train_set = Path().home().joinpath('CubeFlowData').joinpath(
            'dbs').joinpath('test_transformed.db')
        val_set = Path().home().joinpath('CubeFlowData').joinpath(
            'dbs').joinpath('test_transformed.db')
        mask_and_split = MaskAndSplit(config, dirs, ['test'])
        sets = mask_and_split.split()
        sets['train'] = sets['test']
        sets['val'] = sets['test']
    elif socket.gethostname() == 'gpulab':
        train_set = Path(
            '/home/bjoernhm/CubeML/data/oscnext-genie-level5-v01-01-pass2/train_transformed.db'
        )
        val_set = Path(
            '/home/bjoernhm/CubeML/data/oscnext-genie-level5-v01-01-pass2/val_transformed.db'
        )
        mask_and_split = MaskAndSplit(config, dirs, ['train', 'val'])
        sets = mask_and_split.split()

    Loader = getattr(
        importlib.import_module('src.dataloaders.' + config['dataloader']),
        'Dataloader')

    print('{}: The overlap between train and val set is {}'.format(
        get_time(), len(list(set(sets['val']) & set(sets['train'])))))

    if 'SRTInIcePulses' in '-'.join(config['masks']):
        config['cleaning'] = 'SRTInIcePulses'
        config['cleaning_length'] = 'srt_in_ice_pulses_event_length'
    elif 'SplitInIcePulses' in '-'.join(config['masks']):
        config['cleaning'] = 'SplitInIcePulses'
        config['cleaning_length'] = 'split_in_ice_pulses_event_length'

    if config['dev_run']:
        sets['train'] = sets['train'][0:20000]
        sets['val'] = sets['val'][0:20000]

    train_dataset = Loader(sets['train'], config, train_set, test=False)
    val_dataset = Loader(sets['val'], config, val_set, test=True)
    reporter = Reporter(config, experiment_name)
    saver = Saver(config, dirs)

    Loss = getattr(importlib.import_module('src.losses.losses'),
                   config['loss'])
    loss_init = Loss()
    loss = loss_init.loss
    Model = getattr(importlib.import_module('src.models.' + config['model']),
                    'Model')
    model = Model()
    Optimizer = getattr(importlib.import_module('src.optimizers.optimizers'),
                        config['optimizer'])
    optimizer_init = Optimizer(model.parameters(), config['min_learning_rate'])
    optimizer = optimizer_init.optimizer

    trainer = Trainer(config, model, optimizer, loss, reporter, saver,
                      train_dataset, val_dataset)

    trainer.fit()

    print('{}: Script done!'.format(get_time()))