Exemple #1
0
    def _plot_ita(self, first_half: np.ndarray, second_half: np.ndarray,
                  time_series_min, time_series_max):
        """
        Plotting of the Innovative Trend Analysis results.

        It generates a plot that is saved as png in the folder 'results/ita/'.
        If the flag 'plot' is True, the plot will be shown in the screen apart
        of being saved in the folder.

        :param first_half: first half of the time series values
        :param second_half: second half of the time series values
        :param time_series_min: minimum value of the time series
        :param time_series_max: maximum value of the time series
        """
        plt.figure()
        plt.scatter(first_half, second_half, label='data', color='red', s=2)
        plt.title('Innovative Trend Analysis')
        plt.xlabel('First half of the series')
        plt.xlim(time_series_min, time_series_max)
        plt.ylabel('Second half of the series')
        plt.ylim(time_series_min, time_series_max)
        # No trend line
        x_no_trend = np.array([time_series_min, time_series_max])
        y_no_trend = 0 + 1 * x_no_trend
        plt.plot(x_no_trend,
                 y_no_trend,
                 label='(1:1) No trend line',
                 color='black',
                 linewidth=0.75,
                 linestyle='-')
        plt.legend()
        # Save file with timestamp of the execution
        timestamp = generate_timestamp()
        if self.file_id is not None:
            file_id = "{0}_{1}".format(self.file_id, timestamp)
        else:
            file_id = timestamp
        plt.savefig("{0}/{1}_{2}.{3}".format(self.save_path, self.save_name,
                                             file_id, self.save_format))
        if self.plot:
            plt.show()
        plt.close()
def trend_detection_comparison(methods_list: List[Method],
                               file_prefix: str,
                               folder: str = GENERATED_DIR) -> pd.DataFrame:
    """
    Search for all the files in the folder, then perform the detection with the
    methods included in the list `methods_list` and generates a table with the
    return of every method.

    The table is saved in the `RESULTS_DIR` and also returned by the method.

    :param methods_list: list with the methods that will perform the detection
    :param file_prefix: prefix of the files that will be used
    :param folder: folder that will contain the files. Default: data/generated_data
    :return: table with the results of the different methods and results
    """
    files = list()
    results = dict()
    columns = [method.name for method in methods_list]
    timestamp = generate_timestamp()

    for file_path in glob.glob(f'{folder}/{file_prefix}*'):
        files.append(file_path)
    files.sort()  # sort by file name

    for file in files:
        file_results = list()
        name = get_name_from_path(file)

        x, y, trend, seasonality, noise = read_generated_csv(file)

        for method in methods_list:
            result = method.detect_trend(x, y)
            file_results.append(result)
        results[name] = file_results

    table = pd.DataFrame(results, columns)
    table.to_csv(
        f'{RESULTS_DIR}/trend_detection_{file_prefix}_{timestamp}.csv')

    return table
def main(cfg: Config) -> None:
    """Main script."""
    # Create a submdir within the output dir named with a timestamp
    output_dir = Path(to_absolute_path(cfg.output_dir))
    run_dir = output_dir / generate_timestamp()
    run_dir.mkdir(parents=True)

    # Set all seeds for reproducibility
    if cfg.seed is not None:
        pl.seed_everything(seed=cfg.seed)

    # ------------------------
    # 1 INIT DATAMODULE
    # ------------------------
    dm = AcreCascadeDataModule(
        data_dir=Path(to_absolute_path(cfg.data_dir)),
        train_batch_size=cfg.train_batch_size,
        val_batch_size=cfg.val_batch_size,
        val_pcnt=cfg.val_pcnt,
        num_workers=cfg.num_workers,
        download=cfg.download,
        teams=None if cfg.teams is None else [team.name for team in cfg.teams
                                              ],  # type: ignore
        test_teams=None if cfg.test_teams is None else
        [team.name for team in cfg.test_teams],  # type: ignore
        crop=None if cfg.crop is None else cfg.crop.name,  # type: ignore
    )

    # ------------------------
    # 2 INIT LIGHTNING MODEL
    # ------------------------
    loss_fn = MultiLoss({
        CrossEntropyLoss(): cfg.xent_weight,
        DiceLoss(): cfg.dice_weight
    })
    model = UNetSegModel(
        num_classes=dm.num_classes,
        num_layers=cfg.num_layers,
        features_start=cfg.features_start,
        lr=cfg.lr,
        bilinear=cfg.bilinear,
        loss_fn=loss_fn,
        T_max=cfg.T_max,
    )

    # ------------------------
    # 3 SET LOGGER
    # ------------------------
    logger = WandbLogger(config=OmegaConf.to_container(cfg,
                                                       resolve=True,
                                                       enum_to_str=True),
                         offline=cfg.log_offline)

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer(
        gpus=cfg.gpus,
        logger=logger,
        max_epochs=cfg.epochs,
        precision=16 if cfg.use_amp else 32,
        log_every_n_steps=1,
    )

    # ------------------------
    # 6 START TRAINING
    # ------------------------
    trainer.fit(model=model, datamodule=dm)

    # ------------------------
    # 7 START TESTING
    # ------------------------
    trainer.test(model=model, datamodule=dm)

    # ------------------------
    # 8 SAVE THE SUBMISSION
    # ------------------------
    submission_fp = run_dir / "submission.json"
    with open(submission_fp, "w") as f:
        json.dump(model.submission, f)
    LOGGER.info(f"Submission saved to {submission_fp.resolve()}")
Exemple #4
0
def experiment(
        data_dir: Path = typer.Option("data", "--data-dir", "-d"),
        output_dir: Path = typer.Option("output", "--output", "-o"),
        train_batch_size: int = typer.Option(16, "--train-batch-size"),
        val_batch_size: int = typer.Option(32, "--val-batch-size"),
        val_pcnt: float = typer.Option(0.2, "--val-pcnt"),
        num_workers: int = typer.Option(4, "--num-workers"),
        lr: float = typer.Option(1.0e-3, "--learning-rate", "-lr"),
        num_layers: int = typer.Option("--num-layers"),
        features_start: int = typer.Option("--features-start"),
        bilinear: bool = typer.Option(False, "--bilinear"),
        log_to_wandb: bool = typer.Option(False, "--log-to-wandb"),
        gpus: int = typer.Option(0, "--gpus"),
        epochs: int = typer.Option(100, "--epochs"),
        use_amp: bool = typer.Option(False, "--use-amp"),
        seed: Optional[int] = typer.Option(47, "--seed"),
        download: bool = typer.Option(False, "--download", "-dl"),
) -> None:
    """Main script."""
    # Create a submdir within the output dir named with a timestamp
    run_dir = output_dir / generate_timestamp()
    run_dir.mkdir(parents=True)

    # Set all seeds for reproducibility
    if seed is not None:
        pl.seed_everything(seed=seed)

    # ------------------------
    # 1 INIT DATAMODULE
    # ------------------------
    dm = AcreCascadeDataModule(
        data_dir=data_dir,
        train_batch_size=train_batch_size,
        val_batch_size=val_batch_size,
        val_pcnt=val_pcnt,
        num_workers=num_workers,
        download=download,
        teams=["Roseau"],
        crop="Haricot",
    )

    # ------------------------
    # 2 INIT LIGHTNING MODEL
    # ------------------------
    model = UNetSegModel(
        num_classes=dm.num_classes,
        num_layers=num_layers,
        features_start=features_start,
        lr=lr,
        bilinear=bilinear,
    )

    # ------------------------
    # 3 SET LOGGER
    # ------------------------
    logger: Union[bool, WandbLogger] = False
    if log_to_wandb:
        logger = WandbLogger()
        # optional: log model topology
        logger.watch(model.net)

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer(
        gpus=gpus,
        logger=logger,
        max_epochs=epochs,
        precision=16 if use_amp else 32,
    )

    # ------------------------
    # 6 START TRAINING
    # ------------------------
    trainer.fit(model=model, datamodule=dm)

    # ------------------------
    # 7 START TESTING
    # ------------------------
    trainer.test(model=model, datamodule=dm)

    # ------------------------
    # 8 SAVE THE SUBMISSION
    # ------------------------
    submission_fp = run_dir / "submission.json"
    with open(submission_fp, "w") as f:
        json.dump(model.submission, f)
    typer.echo(f"Submission saved to {submission_fp.resolve()}")
def trend_estimation_comparison(methods_list: List[Method],
                                file_prefix: str,
                                folder: str = GENERATED_DIR) -> pd.DataFrame:
    """
    Search for all the files in the folder, then perform the estimation with the
    methods included in the list `methods_list` and generates a table with the
    distance between the estimation and the ground truth.

    The method takes the mean between the number of executions to compare with the
    ground truth. It also generates the plots with the ground truth and the results
    of all the methods for the file.

    The table is saved in the `RESULTS_DIR` and also returned by the method.

    :param methods_list: list with the methods that will perform the detection
    :param file_prefix: prefix of the files that will be used
    :param folder: folder that will contain the files. Default: data/generated_data
    :param test_num: number of executions per method
    :return: table with the results of the different methods and results
    """
    files = list()
    results = dict()
    columns = [method.name for method in methods_list]
    timestamp = generate_timestamp()

    for file_path in glob.glob(f'{folder}/{file_prefix}*'):
        files.append(file_path)
    files.sort()  # sort by file name
    counter = 0.
    for file in files:
        counter += 1.
        print('progress: ', counter / len(files))
        file_results = list()
        name = get_name_from_path(file)

        params = configparser.ConfigParser(allow_no_value=True)
        params.read(f'{SYNTHETIC_DIR}/{name}.ini')

        x, y, trend, seasonality, noise = read_generated_csv(file)

        if params.has_option(TREND_DATA, FUNC):
            trend_title = params[TREND_DATA][FUNC]
        else:
            trend_title = 'Real data'
        if params.has_option(SEASONALITY_DATA, FUNC):
            seasonality_title = params[SEASONALITY_DATA][FUNC]
        else:
            seasonality_title = 'No seasonality'
        if params.has_option(NOISE_DATA, SIGNAL_TO_NOISE):
            noise_title = params[NOISE_DATA][SIGNAL_TO_NOISE]
        else:
            noise_title = 'No noise'

        # plt.figure(figsize=(12, 8))
        # plt.plot(x, y)
        # plt.savefig(f'{PLOTS_DIR}/data_{name}_{timestamp}.png')
        # plt.close()

        # Set the plot
        # plt.figure(figsize=(12, 8))
        # plt.title(f'trend: {trend_title}, '
        #           f'seasonality: {seasonality_title}, '
        #           f'snr: {noise_title}')

        for method in methods_list:
            estimation = method.estimate_trend(x, y)
            # plt.plot(x, estimation, label=method.name)

            distance = np.linalg.norm((estimation - trend))
            file_results.append(distance)

        # plt.plot(x, trend, label='True trend', linewidth=3.0, color='k', linestyle=':')
        # plt.legend()
        # plt.savefig(f'{PLOTS_DIR}/{name}_{timestamp}.png')
        # plt.close()

        results[name] = file_results

    table = pd.DataFrame(results, columns)
    table.to_csv(
        f'{RESULTS_DIR}/trend_estimation_{file_prefix}_{timestamp}.csv')

    return table