Exemple #1
0
def run_loc(args):
    try:
        loc_id, parent_loc, sex_id, year = args
        cl = Cascade_loc(loc_id, sex_id, year, c, parent_loc=parent_loc)
        cl.run_dismod()
        cl.summarize_posterior()
        cl.draw()
        return loc_id, cl
    except Exception, e:
        print loc_id, e
        return loc_id, None
def main():
    '''Read command line arguments to run dismod for all child location ids of
    given location ids.

    Args:
        mvid(int): model version id
        location_id(int): parent location id
        sex(str): one of 'male'/'female'
        year_id(int): year id
        debug(str, optional): If specified and value == 'debug', will run
            in serial instead of in parallel
    '''
    mvid = int(sys.argv[1])
    location_id = int(sys.argv[2])
    sex = sys.argv[3]
    y = int(sys.argv[4])
    cv_iter = int(sys.argv[5])

    setup_logger()
    log = logging.getLogger(__name__)
    log.info(
        "Starting cascade mvid {} loc {} sex {} year {} cv_iter {}".format(
            mvid, location_id, sex, y, cv_iter))
    # The cascade and parent information are shared across all subprocesses.
    # Make it a global to avoid the memory overhead of passing a copy to
    # each process
    global cascade
    global cl_parent

    try:
        if sys.argv[6] == "debug":
            debug = True
        else:
            debug = False
    except:
        debug = False

    if sex == 'male':
        sex_id = 0.5
    elif sex == 'female':
        sex_id = -0.5

    log.info("Creating cascade")
    cascade = Cascade(mvid, reimport=False, cv_iter=cv_iter)
    log.info("Done with cascade")

    year_split_lvl = cascade.model_version_meta.fix_year.values[0] - 1
    lt = cascade.loctree
    this_lvl = lt.get_nodelvl_by_id(location_id)
    log.info("Generating cascade loc")
    if location_id == 1:
        cl_parent = Cascade_loc(location_id, 0, 2000, cascade, reimport=False)
    else:
        cl_parent = Cascade_loc(location_id,
                                sex_id,
                                y,
                                cascade,
                                reimport=False)
    num_children = len(lt.get_node_by_id(location_id).children)
    log.info("Done generating cascade loc")

    num_cpus = mp.cpu_count()

    num_workers = min(num_cpus, num_children, 10)
    if not debug:
        pool = mp.Pool(num_workers)

    # Run child locations
    arglist = []
    for child_loc in lt.get_node_by_id(location_id).children:
        if this_lvl >= (year_split_lvl - 1):
            full_timespan = False
        else:
            full_timespan = True
        arglist.append((child_loc.id, sex_id, y, full_timespan, debug))

    if debug:
        '..... RUNNING IN SINGLE PROCESS DEBUG MODE .....'
        res = map(run_loc, arglist)
    else:
        log.info(
            "Running {} child locations in parallel with {} processes".format(
                len(arglist), num_workers))
        res = pool.map(run_loc, arglist)
        pool.close()
        pool.join()
        log.info("Done running")

    errors = ['%s: %s' % (str(r[0]), r[1]) for r in res if r[1] != 0]

    if len(errors) == 0:
        log.info("No errors found")
    else:
        num_errors = len(errors)
        error_msg = "; ".join(errors)
        log.error("Found {} errors for mvid {} loc {} sex {} year {} cv_iter"
                  "{}: {}".format(num_errors, mvid, location_id, sex, y,
                                  cv_iter, error_msg))
def run_loc(args):
    '''Meant to be called in parallel using multiprocessing. Run
    dismod.

    Args:
        args(Tuple[int, int, int, Bool, Bool]): tuple of
            (location_id, sex_id, year_id, full_timespan, debug)

    Returns:
        Tuple of location_id and either a string error message or integer 0,
        representing no error
    '''
    gc.collect()
    loc_id, sex_id, year, full_timespan, debug = args
    if debug:
        if full_timespan:
            cl = Cascade_loc(loc_id,
                             sex_id,
                             year,
                             cascade,
                             timespan=50,
                             parent_loc=cl_parent)
        else:
            cl = Cascade_loc(loc_id,
                             sex_id,
                             year,
                             cascade,
                             parent_loc=cl_parent)
        cl.run_dismod()
        cl.summarize_posterior()
        cl.draw()
        cl.predict()
        return loc_id, 0
    else:
        try:
            if full_timespan:
                cl = Cascade_loc(loc_id,
                                 sex_id,
                                 year,
                                 cascade,
                                 timespan=50,
                                 parent_loc=cl_parent)
            else:
                cl = Cascade_loc(loc_id,
                                 sex_id,
                                 year,
                                 cascade,
                                 parent_loc=cl_parent)
            cl.run_dismod()
            cl.summarize_posterior()
            cl.draw()
            cl.predict()
            return loc_id, 0
        except Exception as e:
            logging.exception("Failure running location {}".format(loc_id))
            return loc_id, str(e)
Exemple #4
0
def run_loc(args):
    gc.collect()
    loc_id, sex_id, year, full_timespan, debug = args
    if debug:
        if full_timespan:
            cl = Cascade_loc(loc_id,
                             sex_id,
                             year,
                             c,
                             timespan=50,
                             parent_loc=cl_parent)
        else:
            cl = Cascade_loc(loc_id, sex_id, year, c, parent_loc=cl_parent)
        cl.run_dismod()
        cl.summarize_posterior()
        cl.draw()
        cl.predict()
        return loc_id, 0
    else:
        try:
            if full_timespan:
                cl = Cascade_loc(loc_id,
                                 sex_id,
                                 year,
                                 c,
                                 timespan=50,
                                 parent_loc=cl_parent)
            else:
                cl = Cascade_loc(loc_id, sex_id, year, c, parent_loc=cl_parent)
            cl.run_dismod()
            cl.summarize_posterior()
            cl.draw()
            cl.predict()
            return loc_id, 0
        except Exception as e:
            logging.exception("Failure running location {}".format(loc_id))
            return loc_id, str(e)
Exemple #5
0
    c = Cascade(mvid, reimport=False, cv_iter=cv_iter)

    try:
        j = job.Job(os.path.normpath(os.path.join(c.root_dir, '..')))
        j.start()
    except IOError as e:
        logging.exception(e)
    except Exception as e:
        logging.exception(e)

    year_split_lvl = c.model_version_meta.fix_year.values[0] - 1
    lt = c.loctree
    this_lvl = lt.get_nodelvl_by_id(location_id)
    if location_id == 1:
        cl_parent = Cascade_loc(location_id, 0, 2000, c, reimport=False)
    else:
        cl_parent = Cascade_loc(location_id, sex_id, y, c, reimport=False)
    num_children = len(lt.get_node_by_id(location_id).children)

    num_cpus = mp.cpu_count()

    if not debug:
        pool = mp.Pool(min(num_cpus, num_children, 10))

    # Run child locations
    arglist = []
    for child_loc in lt.get_node_by_id(location_id).children:
        if this_lvl >= (year_split_lvl - 1):
            full_timespan = False
        else:
Exemple #6
0
def run_world(year, cascade, drop_emr=False, reimport=False):
    cl = Cascade_loc(1, 0, year, cascade, timespan=50, reimport=reimport)
    if drop_emr:
        cl.gen_data(1, 0, drop_emr=True)
    cl.run_dismod()
    cl.summarize_posterior()
    cl.draw()
    cl.predict()
    return cascade
Exemple #7
0
if __name__ == "__main__":

    mvid = int(sys.argv[1])
    super_id = int(sys.argv[2])
    sex = sys.argv[3]
    y = int(sys.argv[4])

    if sex == 'male':
        sex_id = 0.5
    elif sex == 'female':
        sex_id = -0.5

    cl_worlds = {}
    c = Cascade(mvid, reimport=False)
    lt = c.loctree
    cl_world = Cascade_loc(1, 0, y, c, reimport=False)
    cl_worlds[y] = cl_world

    num_cpus = mp.cpu_count()
    pool = mp.Pool(min(num_cpus, 8))

    cl_world = cl_worlds[y]
    cl_super = Cascade_loc(super_id,
                           sex_id,
                           y,
                           c,
                           parent_loc=cl_world,
                           reimport=False)
    cl_super.run_dismod()
    cl_super.summarize_posterior()
    cl_super.draw()
Exemple #8
0
def run_world(year, cascade, drop_emr=False, reimport=False):
    ''' Instantiates a global cascade_loc object and runs dismod.

    Since we're running dismod, that means this function reads from and writes
    to file system.

    Args:
        year (int): Year to run dismod for
        cascade (drill.cascade): Cascade object of the model_version_id we're
            running dismod for
        drop_emr (bool, False): If True, exclude excess mortality data before
            running dismod
        reimport (bool, False): if True, read input data from database via
            importer.Importer object. If False, rely on cached csv files
            written during a previous cascade instantiation
            (if a file is missing, will automatically read from db)

    Returns:
        cascade object that was passed in

    '''
    log = logging.getLogger(__name__)
    cl = Cascade_loc(1, 0, year, cascade, timespan=50, reimport=reimport)
    if drop_emr:
        cl.gen_data(1, 0, drop_emr=True)

    log.info("Starting dismod for year {} loc {}".format(year, cl.loc))
    cl.run_dismod()
    log.info("dismod finished")
    log.info("summarizing posterior")
    cl.summarize_posterior()
    log.info("summarizing posterior finished")
    log.info("begin draw")
    cl.draw()
    log.info("draw finished")
    log.info("beginning predict")
    cl.predict()
    log.info("predict finished")
    return cascade