Ejemplo n.º 1
0
def construct_args_burdenator_cleanup(parser, cli_args=None):
    """Creates arguments from parser for rearranging the draw files at the end of the burdenator run"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)
    args.tool_name = 'burdenator'

    # Create log directory
    top_out_dir = args.out_dir
    args.cache_dir = '{}/cache'.format(args.out_dir)
    args.log_dir = os.path.join(top_out_dir, 'log_cleanup', str(args.year_id),
                                str(args.measure_id))

    log_filename = "{}_{}_{}.log".format(args.measure_id, args.location_id,
                                         args.year_id)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory("dalynator", log_level,
                                          args.log_dir + "/" + log_filename)

    # Get cod/epi env directories
    args.cod_dir = (args.input_data_root + "/codcorrect/" +
                    str(args.cod_version) + "/draws")
    args.epi_dir = get_folder_structure(
        os.path.join(args.input_data_root, 'como', str(args.epi_version)))
    return args
Ejemplo n.º 2
0
def get_args_pct_change(parser, cli_args=None):
    """Creates arguments from parser for pct change calculation"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)
    args.gbd_round, args.gbd_round_id = ac.populate_gbd_round_args(
        args.gbd_round, args.gbd_round_id)

    args.log_dir = os.path.join(args.out_dir, 'log_pct_change',
                                str(args.location_id))
    makedirs_safely(args.log_dir)
    logfn = "FILEPATH".format(args.start_year, args.end_year)
    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory("dalynator", log_level,
                                          "FILEPATH".format(args.log_dir, logfn),
                                          ['aggregator.aggregators', 'jobmon'])
    # Get cod/epi env directories
    if args.codcorrect_version == 'best':
        args.codcorrect_version = ac.best_version(
            'codcorrect', args.gbd_round_id, args.decomp_step)
    if args.fauxcorrect_version == 'best':
        args.fauxcorrect_version = ac.best_version(
            'fauxcorrect', args.gbd_round_id, args.decomp_step)
    if args.epi_version is None:
        args.epi_version = ac.best_version('como', args.gbd_round_id,
                                           args.decomp_step)
    args.epi_dir = get_como_folder_structure(os.path.join(
        args.input_data_root, 'como', str(args.epi_version)))
    cod_object = to.cod_or_faux_correct(
        args.input_data_root,
        codcorrect_version=args.codcorrect_version,
        fauxcorrect_version=args.fauxcorrect_version)
    args.cod_dir = cod_object.abs_path_to_draws
    args.cod_pattern = cod_object.file_pattern
    return args
Ejemplo n.º 3
0
def get_args_burdenator_loc_agg(parser, cli_args=None):
    """Creates arguments from parser for burdenator location aggregation"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)
    args.gbd_round, args.gbd_round_id = ac.populate_gbd_round_args(
        args.gbd_round, args.gbd_round_id)

    # Create log directory
    top_out_dir = args.data_root
    args.cache_dir = 'FILEPATH'.format(args.data_root)
    args.log_dir = os.path.join(top_out_dir, 'log_loc_agg',
                                str(args.year_id), str(args.measure_id))

    log_filename = "FILEPATH".format(
        args.measure_id, args.rei_id, args.year_id, args.sex_id)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory(
        "dalynator", log_level, args.log_dir + "/" + log_filename,
        ['aggregator.aggregators', 'jobmon'])

    return args
Ejemplo n.º 4
0
def write_csv(df,
              filename,
              write_columns_order=None,
              write_out_star_ids=False,
              dual_upload=False):
    """Assumes we are writing a CSV for the purposes of eventually uploading
    to a database, sorts df accordingly and writes to filename"""

    df = separate_rejected_data_to_csv(df, filename)

    # Prioritize sorting for the database. If that's not possible, fallback
    # to write_columns_order or unsorted
    try:
        write_df = sort_for_db(df)
    except ValueError:
        if write_columns_order:
            write_df = df.sort_values(write_columns_order)
        else:
            write_df = df

    # Write to file, make sure to ignore star_id if stars aren't wanted
    if write_columns_order:
        cols = remove_unwanted_star_id_column(write_columns_order,
                                              write_out_star_ids)
    else:
        cols = remove_unwanted_star_id_column(df.columns.tolist(),
                                              write_out_star_ids)

    write_df.to_csv(filename, columns=cols, index=False)

    if dual_upload:
        pub_up_filename = sub_pub_for_cc(filename)
        pub_up_dir = os.path.dirname(pub_up_filename)
        makedirs_safely(pub_up_dir)
        write_df.to_csv(pub_up_filename, columns=cols, index=False)
Ejemplo n.º 5
0
    def __init__(self, location_set_id, year_id, rei_id, sex_id, measure_id,
                 gbd_round_id, n_draws, data_root, region_locs,
                 write_out_star_ids):
        self.location_set_id = location_set_id
        self.year_id = year_id
        self.rei_id = rei_id
        self.sex_id = sex_id
        self.measure_id = measure_id
        self.gbd_round_id = gbd_round_id
        self.n_draws = n_draws
        self.data_root = data_root
        self.region_locs = region_locs
        self.data_container = DataContainer(
            {'location_set_id': self.location_set_id,
             'year_id': self.year_id,
             'sex_id': self.sex_id},
            n_draws=self.n_draws, gbd_round_id=self.gbd_round_id,
            cache_dir=os.path.join(self.data_root, 'cache'))
        self.loctree = self.data_container[
            'location_hierarchy_{}'.format(self.location_set_id)]

        self.in_dir = os.path.join(self.data_root, 'draws')
        self.out_dir = os.path.join(self.data_root, 'loc_agg_draws/burden')
        mkds.makedirs_safely(self.out_dir)
        self.write_out_star_ids = write_out_star_ids

        # Remove old aggregates in case jobs failed in the middle
        aggregates = [n.id for n in self.loctree.nodes
                      if n not in self.loctree.leaves()]
        for loc in aggregates:
            filename = ('{o}/{lo}/{me}/{m}_{y}_{loc}_{r}_{s}.h5'
                        .format(o=self.out_dir, lo=loc, me=self.measure_id,
                                m=self.measure_id, y=self.year_id,
                                loc=loc, r=self.rei_id, s=self.sex_id))
            logger.debug("Deleting potentially pre-existing loc-agg file"
                         "{e}: '{f}'".format(e=os.path.exists(filename),
                                             f=filename))
            with contextlib.suppress(FileNotFoundError):
                os.remove(filename)

        self.index_cols = ['measure_id', 'metric_id', 'sex_id', 'cause_id',
                           'rei_id', 'year_id', 'age_group_id']
        self.value_cols = ['draw_{}'.format(i) for i in range(self.n_draws)]
        self.draw_filters = {'metric_id': gbd.metrics.NUMBER,
                             'rei_id': self.rei_id,
                             'sex_id': self.sex_id,
                             'measure_id': self.measure_id,
                             'year_id': self.year_id}

        self.operator = self.get_operator()
        self.draw_source, self.draw_sink = self.get_draw_source_sink()
Ejemplo n.º 6
0
def construct_directories(out_dir, log_dir, cache_dir, resume):
    """
    Create the output directory and the run_all logger. Used by both
    burdenator and dalynator.
    Check that both directories are empty. If they are not-empty then only
    continue if we are in resume mode.

    :param out_dir:  The root directory WITH the version number
    :param log_dir:  The path to the log directory
    :param cache_dir: The path to the cache directory
    :param resume: True if this is running in resume mode
    """
    if os.path.isdir(out_dir):
        if os.listdir(out_dir) and not resume:
            raise ValueError(
                "Output directory {} contains files and NOT running in "
                "resume mode".format(out_dir))

    if os.path.isdir(log_dir):
        if os.listdir(log_dir) and not resume:
            raise ValueError("Log directory {} contains files and NOT "
                             "running in resume mode".format(log_dir))

    makedirs_safely(out_dir)
    makedirs_safely(log_dir)
    makedirs_safely(cache_dir)
    if resume:
        # If resuming then rotate (rename) the main log, daly_run_all.log
        rotate_logs(out_dir, log_dir)
    stderr_dir = os.path.join(out_dir, "stderr")
    makedirs_safely(stderr_dir)
    def _prepare_with_external_side_effects(self):
        """
        Creates output directories, loggers.

        Returns:
            Nothing
        """
        makedirs_safely(self.output_draws_dir)
        makedirs_safely(self.log_dir)

        log_level = logging.DEBUG if self.verbose else logging.INFO
        _ = create_logger_in_memory(
            "dalynator", log_level, self.log_dir +
            "/daly_{}_{}.log".format(self.location_id, self.year_id))
Ejemplo n.º 8
0
def get_args_pct_change(parser, cli_args=None):
    """Creates arguments from parser for pct change calculation"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)

    args.log_dir = os.path.join(args.out_dir, 'log_pct_change',
                                str(args.location_id))
    makedirs_safely(args.log_dir)
    logfn = "pc_{}_{}.log".format(args.start_year, args.end_year)
    args.logger = create_logger_in_memory("dalynator", logging.DEBUG,
                                          "{}/{}".format(args.log_dir, logfn))

    return args
Ejemplo n.º 9
0
def get_summ_filename(draw_dir, risk_type, location_id, year_id, measure_id):
    """Return the summary-file (i.e. csv file) name for the given argset,
    creating the directory if necessary"""
    if risk_type == RISK_REI_TYPE:
        file_label = 'risk'
    elif risk_type == ETI_REI_TYPE:
        file_label = 'eti'
    fn = ("{dd}/upload/{m}/single_year/"
          "upload_{fl}_{l}_{y}.csv".format(dd=draw_dir,
                                           fl=file_label,
                                           l=location_id,
                                           m=measure_id,
                                           y=year_id))
    makedirs_safely(os.path.dirname(fn))
    return fn
Ejemplo n.º 10
0
def df_to_csv(this_df, index_cols, this_out_dir, out_file_basename,
              write_columns_order):
    summaries = ComputeSummaries(
        this_df,
        write_columns_order,
        index_cols
    )

    new_df = summaries.get_data_frame()
    makedirs_safely(this_out_dir)

    filename = os.path.join(this_out_dir, out_file_basename)

    logger.info("Summary file output path {}".format(filename))

    write_csv(new_df, filename, write_columns_order=write_columns_order)
Ejemplo n.º 11
0
def rotate_logs(out_dir, log_dir):
    """Move the existing daly_run_all.log and the stderr directories to be timestamped versions.
    Useful during resume, so that
    we don't keep appending to the same log."""
    t = time.localtime()
    time_stamp = "{}-{:02d}-{:02d}_{:02d}:{:02d}:{:02d}".\
        format(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)
    main_log = os.path.join(log_dir, "daly_run_all.log")
    if os.path.exists(main_log):
        os.rename(main_log, "{}.{}".format(main_log, time_stamp))

    stderr_dir = os.path.join(out_dir, "stderr")
    if os.path.exists(stderr_dir):
        os.rename(stderr_dir, "{}.{}".format(stderr_dir, time_stamp))
        # And re-recreate the normal stderr directory to be sure
        makedirs_safely(stderr_dir)
Ejemplo n.º 12
0
def get_args_and_create_dirs(parser, cli_args=None):
    """Parses the command line using the parser and creates output directory
    and logger. Called by run_pipeline_*. Not used by run_all."""

    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)

    # resolve defaults for cod and epi versions
    args.gbd_round, args.gbd_round_id = ac.populate_gbd_round_args(
        args.gbd_round, args.gbd_round_id)
    if args.cod_version is None:
        args.cod_version = ac.best_version('cod', args.gbd_round_id)
    if args.epi_version is None:
        args.epi_version = ac.best_version('como', args.gbd_round_id)

    # Store all years for each location in one directory
    top_out_dir = args.out_dir
    args.cache_dir = '{}/cache'.format(args.out_dir)
    args.log_dir = os.path.join(top_out_dir, 'log', str(args.location_id))
    args.out_dir = os.path.join(top_out_dir, 'draws', str(args.location_id))

    makedirs_safely(args.out_dir)
    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory(
        "dalynator", log_level, args.log_dir +
        "/daly_{}_{}.log".format(args.location_id, args.year_id))

    args.cod_dir = "{}/codcorrect/{}/draws/".format(args.input_data_root,
                                                    args.cod_version)
    if hasattr(args, 'daly_version'):
        args.daly_dir = "{}/dalynator/{}/draws/".format(
            args.input_data_root, args.daly_version)
    else:
        args.daly_dir = None
    args.epi_dir = get_como_folder_structure(
        os.path.join(args.input_data_root, 'como', str(args.epi_version)))

    if hasattr(args, 'paf_version'):
        args.paf_dir = "{}/pafs/{}".format(args.input_data_root,
                                           args.paf_version)
    else:
        args.paf_dir = None

    return args
Ejemplo n.º 13
0
def construct_args_burdenator_cleanup(parser, cli_args=None):
    """Creates arguments from parser for rearranging the draw files at the end
    of the burdenator run"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)
    args.tool_name = 'burdenator'
    args.gbd_round, args.gbd_round_id = ac.populate_gbd_round_args(
        args.gbd_round, args.gbd_round_id)

    # Create log directory
    top_out_dir = args.out_dir
    args.cache_dir = 'FILEPATH'.format(args.out_dir)
    args.log_dir = os.path.join(top_out_dir, 'log_cleanup',
                                str(args.year_id), str(args.measure_id))

    log_filename = "FILEPATH".format(
        args.measure_id, args.location_id, args.year_id)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory(
        "dalynator", log_level, args.log_dir + "/" + log_filename,
        ['aggregator.aggregators', 'jobmon'])

    # Get cod/epi env directories
    if args.codcorrect_version == 'best':
        args.codcorrect_version = ac.best_version(
            'codcorrect', args.gbd_round_id, args.decomp_step)
    if args.fauxcorrect_version == 'best':
        args.fauxcorrect_version = ac.best_version(
            'fauxcorrect', args.gbd_round_id, args.decomp_step)
    if args.epi_version is None:
        args.epi_version = ac.best_version('como', args.gbd_round_id,
                                           args.decomp_step)
    args.epi_dir = get_como_folder_structure(os.path.join(
        args.input_data_root, 'como', str(args.epi_version)))
    cod_object = to.cod_or_faux_correct(
        args.input_data_root,
        codcorrect_version=args.codcorrect_version,
        fauxcorrect_version=args.fauxcorrect_version)
    args.cod_dir = cod_object.abs_path_to_draws
    args.cod_pattern = cod_object.file_pattern
    return args
Ejemplo n.º 14
0
def create_run_all_directories(args):
    """Create the output directory and the run_all logger. Used by both burdenator and dalynator."""

    add_run_all_directories_to_args(args)

    # Check that both directories are empty. If they are not-empty then only continue if we are in resume mode
    if os.path.isdir(args.out_dir):
        if os.listdir(args.out_dir) and not args.resume:
            raise ValueError(
                "Output directory {} contains files and NOT running in resume mode"
                .format(args.out_dir))

    if os.path.isdir(args.log_dir):
        if os.listdir(args.log_dir) and not args.resume:
            raise ValueError(
                "Log directory {} contains files and NOT running in resume mode"
                .format(args.log_dir))

    makedirs_safely(args.log_dir)
    makedirs_safely(args.out_dir)
    makedirs_safely(args.cache_dir)

    if args.resume:
        # If resuming then rotate (rename) the main log, daly_run_all.log
        rotate_logs(args.out_dir, args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logger = create_logger_in_memory("dalynator", log_level,
                                     args.log_dir + "/daly_run_all.log")
Ejemplo n.º 15
0
def get_args_burdenator_loc_agg(parser, cli_args=None):
    """Creates arguments from parser for burdenator location aggregation"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)

    # Create log directory
    top_out_dir = args.out_dir
    args.cache_dir = '{}/cache'.format(args.out_dir)
    args.log_dir = os.path.join(top_out_dir, 'log_loc_agg', str(args.year_id),
                                str(args.measure_id))

    log_filename = "{}_{}_{}_{}.log".format(args.measure_id, args.rei_id,
                                            args.year_id, args.sex_id)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory("dalynator", log_level,
                                          args.log_dir + "/" + log_filename)

    return args
Ejemplo n.º 16
0
def construct_args_dalynator_upload(parser, cli_args=None):
    """Creates arguments from parser for uploading dalynator data"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)
    args.tool_name = 'dalynator'

    # Create log directory
    top_out_dir = args.out_dir
    args.cache_dir = '{}/cache'.format(args.out_dir)
    args.log_dir = os.path.join(top_out_dir, 'log_upload', args.table_type,
                                str(args.measure_id))

    log_filename = "upload_{}_{}.log".format(args.table_type, args.measure_id)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory("dalynator", log_level,
                                          args.log_dir + "/" + log_filename)

    return args
Ejemplo n.º 17
0
def construct_args_upload(parser, cli_args=None):
    """Creates arguments from parser for uploading data"""
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)

    # Create log directory
    top_out_dir = args.out_dir
    args.cache_dir = 'FILEPATH'.format(args.out_dir)
    args.log_dir = os.path.join(
        top_out_dir, 'log_upload', args.table_type)

    log_filename = "FILEPATH".format(
        args.gbd_process_version_id, args.table_type)

    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory(
        "dalynator", log_level, args.log_dir + "/" + log_filename,
        ['aggregator.aggregators', 'jobmon'])

    return args
Ejemplo n.º 18
0
def get_args_and_create_dirs(parser, cli_args=None):
    """Parses the command line using the parser and creates output directory
    and logger. Called by run_pipeline_*. Not used by run_all.
    """
    if cli_args is None:
        cli_args = sys.argv[1:]
    args = parser.parse_args(cli_args)

    cod_object = to.cod_or_faux_correct(args.input_data_root,
                                       args.codcorrect_version,
                                       args.fauxcorrect_version)
    # resolve defaults for cod and epi versions
    if args.codcorrect_version == 'best':
        args.codcorrect_version = ac.best_version(
            'codcorrect', args.gbd_round_id, args.decomp_step)
    if args.fauxcorrect_version == 'best':
        args.fauxcorrect_version = ac.best_version(
            'fauxcorrect', args.gbd_round_id, args.decomp_step)
    if args.epi_version is None:
        args.epi_version = ac.best_version('como', args.gbd_round_id,
                                           args.decomp_step)

    # Store all years for each location in one directory
    top_out_dir = args.out_dir
    args.cache_dir = 'FILEPATH'.format(args.out_dir)
    makedirs_safely(os.path.join(top_out_dir, 'log_most_detailed'))
    args.log_dir = os.path.join(top_out_dir, 'log_most_detailed',
                                str(args.location_id))
    args.out_dir = os.path.join(top_out_dir, 'draws', str(args.location_id))

    makedirs_safely(args.out_dir)
    makedirs_safely(args.log_dir)

    log_level = logging.DEBUG if args.verbose else logging.INFO
    args.logger = create_logger_in_memory(
        "dalynator", log_level,
        args.log_dir + "FILEPATH".format(args.location_id,
                                                args.year_id),
        ['aggregator.aggregators', 'jobmon'])

    args.cod_dir = cod_object.abs_path_to_draws
    args.cod_pattern = cod_object.file_pattern

    # this had daly_version before but I think we still want this
    # differentiated file path
    if hasattr(args, 'tool_name') and args.tool_name == "dalynator":
        args.daly_dir = "FILEPATH".format(args.input_data_root,
                                                        args.output_version)
    else:
        args.daly_dir = None
    # our customers want the flag to be named "epi" not como"
    args.epi_dir = get_como_folder_structure(os.path.join(
        args.input_data_root, 'como', str(args.epi_version)))

    if hasattr(args, 'paf_version'):
        # PAF directory structure has no "draws" sub-folder
        args.paf_dir = "FILEPATH".format(args.input_data_root,
                                           args.paf_version)
    else:
        args.paf_dir = None

    return args