Exemple #1
0
def mode_create(session_dir, args):
    validate_session_name(args.session)

    logger.debug("Init is called - Session: %s, Volume: %s"
                 % (args.session, args.volume))
    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)
        enable_volume_options(args)

    # Add Rollover time to current time to make sure changelogs
    # will be available if we use this time as start time
    time_to_update = int(time.time()) + get_changelog_rollover_time(
        args.volume)

    run_cmd_nodes("create", args, time_to_update=str(time_to_update))

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w") as f:
            f.write(str(time_to_update))

    sys.stdout.write("Session %s created with volume %s\n" %
                     (args.session, args.volume))

    sys.exit(0)
Exemple #2
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix, encode=True)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Exemple #3
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result, is_dir):
            path = path.strip()
            path = path[brick_path_len+1:]

            if args.type == "both":
                output_write(fout, path, args.output_prefix,
                             encode=(not args.no_encode), tag=args.tag,
                             field_separator=args.field_separator)
            else:
                if (is_dir and args.type == "d") or (
                    (not is_dir) and args.type == "f"):
                    output_write(fout, path, args.output_prefix,
                    encode=(not args.no_encode), tag=args.tag,
                    field_separator=args.field_separator)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Exemple #4
0
def main():
    args = _get_args()
    mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

    if args.mode == "list":
        session_dir = conf.get_opt("session_dir")
    else:
        session_dir = os.path.join(conf.get_opt("session_dir"),
                                   args.session)

    if not os.path.exists(session_dir) and args.mode not in ["create", "list"]:
        fail("Invalid session %s" % args.session)

    vol_dir = os.path.join(session_dir, args.volume)
    if not os.path.exists(vol_dir) and args.mode not in ["create", "list"]:
        fail("Session %s not created with volume %s" %
            (args.session, args.volume))

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "cli.log")
    setup_logger(logger, log_file, args.debug)

    # globals() will have all the functions already defined.
    # mode_<args.mode> will be the function name to be called
    globals()["mode_" + args.mode](session_dir, args)
Exemple #5
0
def changelog_crawl(brick, end, args):
    """
    Init function, prepares working dir and calls Changelog query
    """
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    # WORKING_DIR/BRICKHASH/OUTFILE
    working_dir = os.path.dirname(args.outfile)
    brickhash = hashlib.sha1(brick)
    brickhash = str(brickhash.hexdigest())
    working_dir = os.path.join(working_dir, brickhash)

    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)
    create_file(args.outfile + ".gfids", exit_on_err=True, logger=logger)

    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "changelog.%s.log" % brickhash)

    logger.info("%s Started Changelog Crawl. Start: %s, End: %s"
                % (brick, args.start, end))
    get_changes(brick, working_dir, log_file, end, args)
Exemple #6
0
def cook(path, caller_cwd):

    def delete_if_exists(path):
        if os.path.isfile(path):
            os.remove(path)

    local_cwd = os.getcwd()
    # Check if `path` is an absolute path to the recipe
    if os.path.isabs(path):
        recipe_path = os.path.realpath(path)
        recipe_basename = os.path.basename(recipe_path)
        mkdirp('.recipes')
        delete_if_exists(os.path.join(local_cwd, '.recipes', recipe_basename))
        shutil.copyfile(recipe_path, os.path.join(local_cwd, '.recipes', recipe_basename))
        recipe_path = os.path.join('/vagrant', '.recipes', recipe_basename)
    # Check if `path` is a relative path to the recipe (from the caller's perspective)
    elif os.path.isfile(os.path.realpath(os.path.join(caller_cwd, path))):
        recipe_path = os.path.realpath(os.path.join(caller_cwd, path))
        recipe_basename = os.path.basename(recipe_path)
        mkdirp('.recipes')
        delete_if_exists(os.path.join(local_cwd, '.recipes', recipe_basename))
        shutil.copyfile(recipe_path, os.path.join(local_cwd, '.recipes', recipe_basename))
        recipe_path = os.path.join('/vagrant', '.recipes', recipe_basename)
    # Check if `path + (.sh)` is a relative path to the recipe (from the dev-box's perspective)
    elif os.path.isfile(os.path.realpath(os.path.join(local_cwd, 'recipes', path + '.sh'))):
        recipe_path = os.path.realpath(os.path.join(local_cwd, 'recipes', path + '.sh'))
        recipe_basename = os.path.basename(recipe_path)
        recipe_path = os.path.join('/vagrant', 'recipes', recipe_basename)
    # Recipe file was not found
    else:
        print_error('Error: recipe was not found')
        return
    print_green('# DevBox is now cooking')
    return run('sh {0}'.format(recipe_path))
Exemple #7
0
def mode_pre(session_dir, args):
    """
    Read from Session file and write to session.pre file
    """
    endtime_to_update = int(time.time()) - int(
        conf.get_opt("changelog_rollover_time"))
    status_file = os.path.join(session_dir, args.volume, "status")
    status_file_pre = status_file + ".pre"

    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    start = 0
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except ValueError:
        pass
    except (OSError, IOError) as e:
        fail("Error Opening Session file %s: %s"
             % (status_file, e), logger=logger)

    logger.debug("Pre is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s"
                 % (args.session, args.volume, start, endtime_to_update))

    run_in_nodes(args.volume, start, args)

    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(endtime_to_update))

    sys.stdout.write("Generated output file %s\n" % args.outfile)
    def __init__(self, root_folder, channel, version, compress, pretty_print, cached = False):
        self.data_folder = os.path.join(root_folder, channel, version)
        mkdirp(self.data_folder)
        self.compress = compress
        self.pretty_print = pretty_print
        self.max_filter_id = None
        self.cached = cached
        if cached:
            self.cache = {}

        # Load filter-tree
        self.filter_tree = self.json_from_file(
            "filter-tree.json",
            {'_id': 0, 'name': 'reason'}
        )

        # Load histogram definitions
        self.histograms = self.json_from_file("histograms.json", {})

        # Load histogram revision meta-data
        self.revisions = self.json_from_file("revisions.json", {})

        # Histograms.json cache
        self.histograms_json_cache = [(None, None)] * HGRAMS_JSON_CACHE_SIZE
        self.histograms_json_cache_next = 0
Exemple #9
0
def mode_pre(session_dir, args):
    global gtmpfilename

    """
    Read from Session file and write to session.pre file
    """
    endtime_to_update = int(time.time()) - get_changelog_rollover_time(
        args.volume)
    status_file = os.path.join(session_dir, args.volume, "status")
    status_file_pre = status_file + ".pre"

    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    # If Pre status file exists and running pre command again
    if os.path.exists(status_file_pre) and not args.regenerate_outfile:
        fail("Post command is not run after last pre, "
             "use --regenerate-outfile")

    start = 0
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except ValueError:
        pass
    except (OSError, IOError) as e:
        fail("Error Opening Session file %s: %s"
             % (status_file, e), logger=logger)

    logger.debug("Pre is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s"
                 % (args.session, args.volume, start, endtime_to_update))

    prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
    gtmpfilename = prefix + next(tempfile._get_candidate_names())

    run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename)

    # Merger
    if args.full:
        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
        execute(cmd,
                exit_msg="Failed to merge output files "
                "collected from nodes", logger=logger)
    else:
        # Read each Changelogs db and generate finaldb
        create_file(args.outfile, exit_on_err=True, logger=logger)
        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
        write_output(args.outfile, outfilemerger, args.field_separator)

    try:
        os.remove(args.outfile + ".db")
    except (IOError, OSError):
        pass

    run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)

    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(endtime_to_update))

    sys.stdout.write("Generated output file %s\n" % args.outfile)
Exemple #10
0
def main():
    global gtmpfilename

    args = None

    try:
        args = _get_args()
        mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

        # force the default session name if mode is "query"
        if args.mode == "query":
            args.session = "default"

        if args.mode == "list":
            session_dir = conf.get_opt("session_dir")
        else:
            session_dir = os.path.join(conf.get_opt("session_dir"),
                                       args.session)

        if not os.path.exists(session_dir) and \
                args.mode not in ["create", "list", "query"]:
            fail("Invalid session %s" % args.session)

        # "default" is a system defined session name
        if args.mode in ["create", "post", "pre", "delete"] and \
                args.session == "default":
            fail("Invalid session %s" % args.session)

        vol_dir = os.path.join(session_dir, args.volume)
        if not os.path.exists(vol_dir) and args.mode not in \
                ["create", "list", "query"]:
            fail("Session %s not created with volume %s" %
                 (args.session, args.volume))

        mkdirp(os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume),
               exit_on_err=True)
        log_file = os.path.join(conf.get_opt("log_dir"),
                                args.session,
                                args.volume,
                                "cli.log")
        setup_logger(logger, log_file, args.debug)

        # globals() will have all the functions already defined.
        # mode_<args.mode> will be the function name to be called
        globals()["mode_" + args.mode](session_dir, args)
    except KeyboardInterrupt:
        if args is not None:
            if args.mode == "pre" or args.mode == "query":
                # cleanup session
                if gtmpfilename is not None:
                    # no more interrupts until we clean up
                    signal.signal(signal.SIGINT, signal.SIG_IGN)
                    run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)

        # Interrupted, exit with non zero error code
        sys.exit(2)
 def __init__(self, input_queue, output_queue,
                    work_folder, aws_cred):
     super(DownloaderProcess, self).__init__()
     self.input_queue = input_queue
     self.output_queue = output_queue
     self.work_folder = work_folder
     mkdirp(self.work_folder)
     self.input_bucket = "telemetry-published-v2"
     self.aws_cred = aws_cred
     self.s3 = S3Connection(**self.aws_cred)
     self.bucket = self.s3.get_bucket(self.input_bucket, validate = False)
Exemple #12
0
def mode_post(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file_pre = status_file + ".pre"

    if os.path.exists(status_file_pre):
        os.rename(status_file_pre, status_file)
        sys.exit(0)
Exemple #13
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s"
                 % (args.session, args.volume))

    execute(["gluster", "volume", "info", args.volume],
            exit_msg="Unable to get volume details",
            logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)

        execute(["gluster", "volume", "set",
                 args.volume, "build-pgfid", "on"],
                exit_msg="Failed to set volume option build-pgfid on",
                logger=logger)
        logger.info("Volume option set %s, build-pgfid on" % args.volume)

        execute(["gluster", "volume", "set",
                 args.volume, "changelog.changelog", "on"],
                exit_msg="Failed to set volume option "
                "changelog.changelog on", logger=logger)
        logger.info("Volume option set %s, changelog.changelog on"
                    % args.volume)

        execute(["gluster", "volume", "set",
                 args.volume, "changelog.capture-del-path", "on"],
                exit_msg="Failed to set volume option "
                "changelog.capture-del-path on", logger=logger)
        logger.info("Volume option set %s, changelog.capture-del-path on"
                    % args.volume)

    # Add Rollover time to current time to make sure changelogs
    # will be available if we use this time as start time
    time_to_update = int(time.time()) + get_changelog_rollover_time(
        args.volume)

    run_cmd_nodes("create", args, time_to_update=str(time_to_update))

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(str(time_to_update))

    sys.exit(0)
Exemple #14
0
def mode_create(args):
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.parse.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(args.time_to_update)

    sys.exit(0)
 def publish_results(self):
     # Create work folder for update process
     update_folder = os.path.join(self.work_folder, "update")
     shutil.rmtree(update_folder, ignore_errors = True)
     mkdirp(update_folder)
     # Update results
     updateresults(self.data_folder, update_folder, self.bucket_name,
                   self.prefix, self.cache_folder, self.region,
                   self.aws_cred, NB_WORKERS)
     self.put_file(self.files_processed_path, 'FILES_PROCESSED')
     self.put_file(self.files_missing_path, 'FILES_MISSING')
     # Clear data_folder
     shutil.rmtree(self.data_folder, ignore_errors = True)
     mkdirp(self.data_folder)
def mode_post(args):
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(
        session_dir, args.volume,
        "%s.status" % urllib.parse.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    status_file_pre = status_file + ".pre"

    if os.path.exists(status_file_pre):
        os.rename(status_file_pre, status_file)
        sys.exit(0)
Exemple #17
0
def mode_create(args):
    session_dir = os.path.join(conf.get_opt("session_dir"),
                               args.session)
    status_file = os.path.join(session_dir, args.volume,
                     "%s.status" % urllib.quote_plus(args.brick))

    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(args.time_to_update)

    sys.exit(0)
Exemple #18
0
    def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.input_streams is None:
            if isinstance(self, TestOptions):
                opt.input_streams = []
            else:
                raise ValueError("input_streams must be set")

        # if opt.debug:
        #     opt.results_dir_base = opt.results_dir_base.split("/")[0] + "/debug_results"
        opt.no_core_driver = True
        opt.num_workers = 0
        opt.results_dir = opt.results_dir_base + time.strftime(
            "_%Y_%m_%d_%H_%M_%S")

        self.opt = opt

        if isinstance(self, TestOptions):
            options = load_json(
                os.path.join("results", opt.model_dir, "opt.json"))
            for arg in options:
                if arg not in ["debug"]:
                    setattr(opt, arg, options[arg])
            opt.no_core_driver = True
        else:
            mkdirp(opt.results_dir)
            # save a copy of current code
            # code_dir = os.path.dirname(os.path.realpath(__file__))
            # code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            # make_zipfile(code_dir, code_zip_filename,
            #              enclosing_dir="code", exclude_paths=["results"], exclude_extensions=[".pyc", ".ipynb"])
        self.display_save()

        assert opt.num_hard <= opt.num_negatives
        opt.device = torch.device(
            "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        if opt.device.type == "cuda":
            opt.bsz = opt.bsz * len(opt.device_ids)
            opt.test_bsz = opt.test_bsz * len(opt.device_ids)
        opt.h5driver = None if opt.no_core_driver else "core"
        opt.vfeat_flag = "vfeat" in opt.input_streams
        opt.vcpt_flag = "vcpt" in opt.input_streams
        opt.sub_flag = "sub" in opt.input_streams
        self.opt = opt
        return opt
Exemple #19
0
def mode_cleanup(args):
    working_dir = os.path.join(conf.get_opt("working_dir"), args.session,
                               args.volume, args.tmpfilename)

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.log")

    setup_logger(logger, log_file)

    try:
        shutil.rmtree(working_dir, onerror=handle_rm_error)
    except (OSError, IOError) as e:
        logger.error("Failed to delete working directory: %s" % e)
        sys.exit(1)
Exemple #20
0
def check_coords(cubes, write_to='./.jazz/offending_cube'):
    """Check coordinates are matching.  If they are not this could be
    quite a problem!  However, some models' have files which read in with
    slightly different coordinates (CCSM4, for example).  In this case
    the difference is miniscule so we can safely replace the coordinates.
    This method replaces coordinates but also informs the user it is doing
    this. It also prints and optionally saves the summary of the offending
    cube.

    Args:
        cubes (iris.cube.CubeList): list of cubes to check
        write_to (Optional[str]): path to which to write warnings

    """
    # Remove attributes from auxiliary coordinates - these can sometimes
    # prevent merging and concatenation.
    for cube in cubes:
        for coord in cube.aux_coords:
            coord.attributes = {}

    # Get the names of the spatial coords
    coord_names = [coord.name() for coord in cubes[0].dim_coords]
    if 'time' in coord_names:
        coord_names.remove('time')

    for coord_name in coord_names:
        # Make a list of the coordinates' points for each cube
        points_list = [cube.coord(coord_name).points for cube in cubes]

        # Loop over the list of points for all the cubes
        for p in xrange(len(points_list) - 1):

            # If the coordinates are different from the first set,
            # replace them with the first set
            if not (points_list[p + 1] == points_list[0]).all():
                cubes[p + 1].replace_coord(cubes[0].coord(coord_name))

                # Notify user
                warnings.warn('Replacing the coordinates of a cube. '
                              'Offending cube is {}'.format(
                                  cubes[p + 1].summary()))

                if write_to is not None:
                    utils.mkdirp(write_to)
                    utils.write_file(
                        cubes[p + 1].summary(),
                        '{0}_{1}_{2}'.format(write_to, coord_name, p))
Exemple #21
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s" %
                 (args.session, args.volume))

    execute(["gluster", "volume", "info", args.volume],
            exit_msg="Unable to get volume details",
            logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)

        execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"],
                exit_msg="Failed to set volume option build-pgfid on",
                logger=logger)
        logger.info("Volume option set %s, build-pgfid on" % args.volume)

        execute([
            "gluster", "volume", "set", args.volume, "changelog.changelog",
            "on"
        ],
                exit_msg="Failed to set volume option "
                "changelog.changelog on",
                logger=logger)
        logger.info("Volume option set %s, changelog.changelog on" %
                    args.volume)

    # Add Rollover time to current time to make sure changelogs
    # will be available if we use this time as start time
    time_to_update = int(time.time()) + get_changelog_rollover_time(
        args.volume)

    run_cmd_nodes("create", args, time_to_update=str(time_to_update))

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(str(time_to_update))

    sys.exit(0)
Exemple #22
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s" %
                 (args.session, args.volume))

    cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
    _, data, _ = execute(cmd,
                         exit_msg="Failed to Run Gluster Volume Info",
                         logger=logger)
    try:
        tree = etree.fromstring(data)
        statusStr = tree.find('volInfo/volumes/volume/statusStr').text
    except (ParseError, AttributeError) as e:
        fail("Invalid Volume: %s" % e, logger=logger)

    if statusStr != "Started":
        fail("Volume %s is not online" % args.volume, logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)
        enable_volume_options(args)

    # Add Rollover time to current time to make sure changelogs
    # will be available if we use this time as start time
    time_to_update = int(time.time()) + get_changelog_rollover_time(
        args.volume)

    run_cmd_nodes("create", args, time_to_update=str(time_to_update))

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(str(time_to_update))

    sys.stdout.write("Session %s created with volume %s\n" %
                     (args.session, args.volume))

    sys.exit(0)
Exemple #23
0
def check_coords(cubes, write_to="./.jazz/offending_cube"):
    """Check coordinates are matching.  If they are not this could be
    quite a problem!  However, some models' have files which read in with
    slightly different coordinates (CCSM4, for example).  In this case
    the difference is miniscule so we can safely replace the coordinates.
    This method replaces coordinates but also informs the user it is doing
    this. It also prints and optionally saves the summary of the offending
    cube.

    Args:
        cubes (iris.cube.CubeList): list of cubes to check
        write_to (Optional[str]): path to which to write warnings

    """
    # Remove attributes from auxiliary coordinates - these can sometimes
    # prevent merging and concatenation.
    for cube in cubes:
        for coord in cube.aux_coords:
            coord.attributes = {}

    # Get the names of the spatial coords
    coord_names = [coord.name() for coord in cubes[0].dim_coords]
    if "time" in coord_names:
        coord_names.remove("time")

    for coord_name in coord_names:
        # Make a list of the coordinates' points for each cube
        points_list = [cube.coord(coord_name).points for cube in cubes]

        # Loop over the list of points for all the cubes
        for p in xrange(len(points_list) - 1):

            # If the coordinates are different from the first set,
            # replace them with the first set
            if not (points_list[p + 1] == points_list[0]).all():
                cubes[p + 1].replace_coord(cubes[0].coord(coord_name))

                # Notify user
                warnings.warn(
                    "Replacing the coordinates of a cube. " "Offending cube is {}".format(cubes[p + 1].summary())
                )

                if write_to is not None:
                    utils.mkdirp(write_to)
                    utils.write_file(cubes[p + 1].summary(), "{0}_{1}_{2}".format(write_to, coord_name, p))
Exemple #24
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick) - 1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def mtime_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and (st.st_mtime > args.start or st.st_ctime > args.start):
                return True

            return False

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        if args.full:
            find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs)
        else:
            find(brick,
                 callback_func=output_callback,
                 filter_func=mtime_filter,
                 ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
def render(pname):
    "renders the packer template and writes it's json to something like elife-builder/packer/pname.json"
    assert utils.mkdirp('packer'), "failed to create the 'packer' dir"
    out = json.dumps(render_template(pname), indent=4)
    fname = template_path(pname, '.json')
    open(fname, 'w').write(out)
    print out
    print 'wrote',fname
    return fname
Exemple #26
0
    def __init__(self, data_dir, subdir, subsubdir, sub_filetypes):
        ''' function that creatd the sub directories'''

        self.data_dir = data_dir
        self.subdir = subdir
        self.subsubdir = subsubdir
        self.sub_filetypes = sub_filetypes

        [mkdirp(str(args.data_dir + galaxy)) for galaxy in subdir]
        [
            mkdirp(str(args.data_dir + galaxy + '/' + subsub))
            for galaxy in subdir for subsub in subsubdir
        ]
        [
            mkdirp(str(args.data_dir + galaxy + '/' + subsub + '/' + type))
            for galaxy in subdir for subsub in subsubdir
            for type in sub_filetypes
        ]
Exemple #27
0
def mode_create(session_dir, args):
    logger.debug("Init is called - Session: %s, Volume: %s"
                 % (args.session, args.volume))

    cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
    _, data, _ = execute(cmd,
                         exit_msg="Failed to Run Gluster Volume Info",
                         logger=logger)
    try:
        tree = etree.fromstring(data)
        statusStr = tree.find('volInfo/volumes/volume/statusStr').text
    except (ParseError, AttributeError) as e:
        fail("Invalid Volume: %s" % e, logger=logger)

    if statusStr != "Started":
        fail("Volume %s is not online" % args.volume, logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    status_file = os.path.join(session_dir, args.volume, "status")

    if os.path.exists(status_file) and not args.force:
        fail("Session %s already created" % args.session, logger=logger)

    if not os.path.exists(status_file) or args.force:
        ssh_setup(args)
        enable_volume_options(args)

    # Add Rollover time to current time to make sure changelogs
    # will be available if we use this time as start time
    time_to_update = int(time.time()) + get_changelog_rollover_time(
        args.volume)

    run_cmd_nodes("create", args, time_to_update=str(time_to_update))

    if not os.path.exists(status_file) or args.reset_session_time:
        with open(status_file, "w", buffering=0) as f:
            f.write(str(time_to_update))

    sys.stdout.write("Session %s created with volume %s\n" %
                     (args.session, args.volume))

    sys.exit(0)
Exemple #28
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def mtime_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and (st.st_mtime > args.start or st.st_ctime > args.start):
                return True

            return False

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        if args.full:
            find(brick, callback_func=output_callback,
                 ignore_dirs=ignore_dirs)
        else:
            find(brick, callback_func=output_callback,
                 filter_func=mtime_filter,
                 ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Exemple #29
0
def mode_cleanup(args):
    working_dir = os.path.join(conf.get_opt("working_dir"),
                               args.session,
                               args.volume)

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "changelog.log")

    setup_logger(logger, log_file)

    try:
        shutil.rmtree(working_dir, onerror=handle_rm_error)
    except (OSError, IOError) as e:
        logger.error("Failed to delete working directory: %s" % e)
        sys.exit(1)
Exemple #30
0
def main():
    try:
        args = _get_args()
        mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

        # force the default session name if mode is "query"
        if args.mode == "query":
            args.session = "default"

        if args.mode == "list":
            session_dir = conf.get_opt("session_dir")
        else:
            session_dir = os.path.join(conf.get_opt("session_dir"),
                                       args.session)

        if not os.path.exists(session_dir) and \
                args.mode not in ["create", "list", "query"]:
            fail("Invalid session %s" % args.session)

        # "default" is a system defined session name
        if args.mode in ["create", "post", "pre", "delete"] and \
                args.session == "default":
            fail("Invalid session %s" % args.session)

        vol_dir = os.path.join(session_dir, args.volume)
        if not os.path.exists(vol_dir) and args.mode not in \
                ["create", "list", "query"]:
            fail("Session %s not created with volume %s" %
                 (args.session, args.volume))

        mkdirp(os.path.join(conf.get_opt("log_dir"), args.session,
                            args.volume),
               exit_on_err=True)
        log_file = os.path.join(conf.get_opt("log_dir"), args.session,
                                args.volume, "cli.log")
        setup_logger(logger, log_file, args.debug)

        # globals() will have all the functions already defined.
        # mode_<args.mode> will be the function name to be called
        globals()["mode_" + args.mode](session_dir, args)
    except KeyboardInterrupt:
        # Interrupted, exit with non zero error code
        sys.exit(2)
Exemple #31
0
def build(src_dir: str, dest_dir: str) -> None:
    for file_path in traverse_files_in_path(src_dir):
        extension = os.path.splitext(file_path)[1]
        html_content = None
        dest_file = get_full_dest_file_path(file_path, src_dir, dest_dir)
        if extension == '.md':
            try:
                html_content = generate_html_from_markdown_file(
                    file_path, src_dir, dest_dir)
            except Exception as exp:
                print(f"{file_path} {str(exp)}")
                continue

        mkdirp(dest_file.dest_file_folder)
        if html_content:
            with open(dest_file.dest_file_path, "wb") as fout:
                fout.write(html_content.encode("utf-8"))
        else:
            copyfile(file_path, dest_file.dest_file_path)
Exemple #32
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick) - 1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result, is_dir):
            path = path.strip()
            path = path[brick_path_len + 1:]

            if args.type == "both":
                output_write(fout,
                             path,
                             args.output_prefix,
                             encode=(not args.no_encode),
                             tag=args.tag,
                             field_separator=args.field_separator)
            else:
                if (is_dir and args.type == "d") or ((not is_dir)
                                                     and args.type == "f"):
                    output_write(fout,
                                 path,
                                 args.output_prefix,
                                 encode=(not args.no_encode),
                                 tag=args.tag,
                                 field_separator=args.field_separator)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Exemple #33
0
def changelog_crawl(brick, start, end, args):
    """
    Init function, prepares working dir and calls Changelog query
    """
    if brick.endswith("/"):
        brick = brick[0:len(brick) - 1]

    # WORKING_DIR/BRICKHASH/OUTFILE
    working_dir = os.path.dirname(args.outfile)
    brickhash = hashlib.sha1(brick.encode())
    brickhash = str(brickhash.hexdigest())
    working_dir = os.path.join(working_dir, brickhash)

    mkdirp(working_dir, exit_on_err=True, logger=logger)

    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.%s.log" % brickhash)

    logger.info("%s Started Changelog Crawl. Start: %s, End: %s" %
                (brick, start, end))
    return get_changes(brick, working_dir, log_file, start, end, args)
Exemple #34
0
def main():
    args = _get_args()
    mkdirp(conf.get_opt("session_dir"), exit_on_err=True)

    if args.mode == "list":
        session_dir = conf.get_opt("session_dir")
    else:
        session_dir = os.path.join(conf.get_opt("session_dir"), args.session)

    if not os.path.exists(session_dir) and args.mode not in ["create", "list"]:
        fail("Invalid session %s" % args.session)

    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "cli.log")
    setup_logger(logger, log_file, args.debug)

    # globals() will have all the functions already defined.
    # mode_<args.mode> will be the function name to be called
    globals()["mode_" + args.mode](session_dir, args)
Exemple #35
0
def create_markdown_file_from_trello_card(
        card: TrelloCard, trello_folder_name: str = "content/trello") -> None:
    labels = card.labels or []
    if labels:
        card_folder_name = slugify(labels[0].name)
    else:
        card_folder_name = "unclassified"
    card_folder_path = os.path.join(
        os.path.dirname(__file__),
        trello_folder_name,
        card_folder_name,
        datetime.strftime(card.dateLastActivity, "%Y-%m-%d"),
    )
    mkdirp(card_folder_path)
    # TODO: add lastmodified, labels, ect.
    markdown_meta = yaml.dump({"title": card.name})
    markdown_content = "---\n{}\n---\n{}".format(markdown_meta, card.desc)
    markdown_file_name = "{}.md".format(slugify(card.name))
    markdown_file_path = os.path.join(card_folder_path, markdown_file_name)
    with open(markdown_file_path, "wb") as fout:
        fout.write(markdown_content.encode("utf-8"))
Exemple #36
0
def download_file(stackname,
                  path,
                  destination=None,
                  allow_missing="False",
                  use_bootstrap_user="******"):
    """
    Downloads `path` from `stackname` putting it into the `destination` folder, or the `destination` file if it exists and it is a file.

    If `allow_missing` is "True", a not existing `path` will be skipped without errors.

    If `use_bootstrap_user` is "True", the owner_ssh user will be used for connecting instead of the standard deploy user.

    Boolean arguments are expressed as strings as this is the idiomatic way of passing them from the command line.
    """
    if not destination:
        destination = '.'
    utils.mkdirp(destination)
    with stack_conn(stackname, username=_user(use_bootstrap_user)):
        if _should_be_skipped(path, allow_missing):
            return
        get(path, destination, use_sudo=True)
Exemple #37
0
 def run(self):
     if self.compress:
         def write(path):
             return gzip.open(path, 'w')
     else:
         def write(path):
             return open(path, 'w')
     s3 = s3_connect(self.region, **self.aws_cred)
     bucket = s3.get_bucket(self.input_bucket, validate = False)
     while True:
         msg = self.queue.get()
         if msg == None:
             break
         source_prefix, target_path = msg
         retries = 0
         while retries < NB_RETRIES:
             try:
                 retries += 1
                 k = Key(bucket)
                 k.key = source_prefix
                 data = k.get_contents_as_string()
                 if self.decompress:
                     fobj = StringIO(data)
                     with gzip.GzipFile(mode = 'rb', fileobj = fobj) as zobj:
                         data = zobj.read()
                     fobj.close()
                 # Create target folder
                 mkdirp(os.path.dirname(target_path))
                 with write(target_path) as f:
                     f.write(data)
                 break
             except:
                 print >> sys.stderr, "Failed to download %s to %s" % msg
                 print_exc(file = sys.stderr)
                 time.sleep(4 * ((retries - 1) ** 2))
         if retries >= NB_RETRIES:
             sys.exit(1)
         if self.output_queue != None:
             self.output_queue.put(target_path)
     s3.close()
    def setup(self):
        # Remove work folder, no failures allowed
        if os.path.exists(self.work_folder):
            rmtree(self.work_folder, ignore_errors = False)

        # Create work folder
        mkdirp(self.work_folder)
        mkdirp(self.output_folder)

        job_bundle_target = os.path.join(self.work_folder, "job_bundle.tar.gz")
        # If job_bundle_bucket is None then the bundle is stored locally
        if self.job_bundle_bucket == None:
            copyfile(self.job_bundle_prefix, job_bundle_target)
        else:
            s3 = S3Connection(**self.aws_cred)
            bucket = s3.get_bucket(self.job_bundle_bucket, validate = False)
            key = bucket.get_key(self.job_bundle_prefix)
            key.get_contents_to_filename(job_bundle_target)

        # Extract job_bundle
        self.processor_path = os.path.join(self.work_folder, "code")
        mkdirp(self.processor_path)
        tar = tarfile.open(job_bundle_target)
        tar.extractall(path = self.processor_path)
        tar.close()

        # Create processor
        self.processor = Popen(
            ['./processor', os.path.relpath(self.output_folder, self.processor_path)],
            cwd = self.processor_path,
            bufsize = 1,
            stdin = PIPE,
            stdout = sys.stdout,
            stderr = sys.stderr
        )
Exemple #39
0
def setup(override):
    """
    Setup repository according to the plouffile.
    """
    if not valid_repo():
        utils.error('Not a plouf repository. (No \'.plouffile\' file found.)')
        return
    
    try:
        data = {}
        with open(get_pf_path(), 'r') as pf:
            data = json.load(pf)
        
        for name, proj in data.setdefault("projects", {}).items():
            make_structure(name, proj.setdefault("type", "exec"), proj.setdefault("tests", False), override)
            utils.info('Creating structure for %s...' % name)
            utils.list_files(utils.get_file_path(name))
        
        extern_path = utils.get_file_path("extern")
        if "tests" in data:
            utils.mkdirp([extern_path])

            for _, framework_url in data["tests"].items():
                file_path = os.path.join(extern_path, os.path.basename(framework_url))

                if os.path.exists(file_path):
                    utils.warning('%s file already exists.' % file_path)
                    if not click.confirm('Do you want to override it', prompt_suffix='?'):
                        continue
                utils.info('Fetching %s...' % framework_url, nl=False)
                urllib.request.urlretrieve(framework_url, file_path)
                click.echo('[OK]')
        
        utils.success('Setup complete.')
        
    except Exception as e:
        click.echo(
            click.style(e, fg="red"),
            err=True
        )
    def train(self):
        mkdirp(self.config.result_path)
        result_path = self.config.result_path + '/' + start_time()
        mkdirp(result_path)
        filename = os.path.join(result_path, 'train-log.txt')

        if self.config.MultiGPU > 0 and self.config.n_gpu > 1:
            logger.info("Using {} GPU ".format(torch.cuda.device_count()))
            self.Model = nn.DataParallel(self.Model)

        else:
            logger.info("Using Single GPU ")

        self.Model.to(self.device)

        optimizer = optim.Adam(self.Model.parameters(), lr=self.config.lr)

        logger.info("Now Training..")
        self.Model.train()
        for epoch in range(self.config.epochs):
            self.train_epoch(epoch, self.Train_loader, optimizer, self.Model, filename, self.device)

        if self.config.MultiGPU > 0 and self.config.n_gpu > 1:
            checkpoint = {
                "model": self.Model.module.state_dict(),
                "config": self.config,
                "epoch": self.config.epochs
            }

        else:
            checkpoint = {
                "model": self.Model.state_dict(),
                "config": self.config,
                "epoch": self.config.epochs
            }

        logger.info("Now Saving model checkpoint to {}".format(result_path))
        model_name = os.path.join(result_path, 'model.ckpt')
        torch.save(checkpoint, model_name)
def inference():
    parser = argparse.ArgumentParser(description="Image Captioning Evaluation")
    parser.add_argument('--vocab_path', default='data/vocab.pickle', type=str)
    parser.add_argument('--img_path', default='data/test2017/', type=str)
    parser.add_argument('--test_visual_feature_path',
                        default='data/visual_feature_test.pickle',
                        type=str)
    parser.add_argument("--test_path", type=str, help="model path")
    parser.add_argument('--num_workers', default=4, type=int)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--is_train', type=str, default=False)
    parser.add_argument('--eval_coco_idx_path',
                        default='data/test_coco_idx.npy',
                        type=str)
    parser.add_argument("--eval_path",
                        default='eval/',
                        type=str,
                        help="evaluation result path")
    parser.add_argument("--shuffle", default='False', type=str)
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--max_sub_len', type=int, default=30)

    args = parser.parse_args()

    checkpoint = torch.load(os.path.join(args.test_path, 'model.ckpt'))
    eval_dataloader = get_eval_dataloader(args)
    translator = Translator(args, checkpoint)

    eval_result = translate(args, translator, eval_dataloader)

    mkdirp(args.eval_path)
    result_path = os.path.join(args.eval_path, start_time())
    mkdirp(result_path)

    filename = os.path.join(result_path, 'pred.jsonl')
    save_jsonl(eval_result, filename)
    logger.info("Save predict json file at {}".format(result_path))
Exemple #42
0
def make_structure(target_name, what, add_tests, override_flag):
    """
    Depending on the target type what, creates the folder structure.
    """
    rel_paths = [ target_name ]
    files = [
        { "type": "cmake_base", "path": os.path.join(target_name, "CMakeLists.txt") }
    ]
    
    if what == "exec":
        sample_path = os.path.join(target_name, "sample")
        rel_paths += [
            sample_path,
            os.path.join(target_name, "src")
            ]
        files.append({ "type": "main_sample", "path": os.path.join(sample_path, "main.cpp") })
    elif what == "library":
        include_path = os.path.join(target_name, "include", target_name)
        rel_paths += [
            include_path,
            os.path.join(target_name, "src")
        ]
        files.append({ "type": "header_base", "path": os.path.join(include_path, target_name + ".hpp") })
    
    if add_tests:
        tests_path = os.path.join(target_name, "tests")
        rel_paths.append(tests_path)
        files.append({ "type": "main_tests", "path": os.path.join(tests_path, "main_tests.cpp") })
    
    paths = [ utils.get_file_path(k) for k in rel_paths ]

    # creating folders
    utils.mkdirp(paths)

    # creating templates
    make_templates(files, target_name, override_flag)
def gzipclone(source_folder, target_folder, decompress, compress):
    shutil.rmtree(target_folder, ignore_errors = True)
    if decompress:
        def read(path):
            return gzip.open(path, 'r')
    else:
        def read(path):
            return open(path, 'r')
    if compress:
        def write(path):
            return gzip.open(path, 'w')
    else:
        def write(path):
            return open(path, 'w')
    # Walk source_folder
    for path, folder, files in os.walk(source_folder):
        for f in files:
            source_file = os.path.join(path, f)
            relpath = os.path.relpath(source_file, source_folder)
            target_file = os.path.join(target_folder, relpath)
            mkdirp(os.path.dirname(target_file))
            with read(source_file) as i:
                with write(target_file) as o:
                    shutil.copyfileobj(i, o)
 def __init__(self, input_queue, work_folder, bucket, prefix, region, aws_cred):
     self.input_queue_name       = input_queue
     self.work_folder            = work_folder
     self.data_folder            = os.path.join(work_folder, 'data')
     self.bucket_name            = bucket
     self.prefix                 = prefix
     self.region                 = region
     self.aws_cred               = aws_cred
     self.analysis_bucket_name   = "jonasfj-telemetry-analysis"
     if self.prefix != '' and not self.prefix.endswith('/'):
         self.prefix += '/'
     # Clear the work folder
     shutil.rmtree(self.work_folder, ignore_errors = True)
     self.s3 = s3_connect(self.region, **self.aws_cred)
     self.bucket = self.s3.get_bucket(self.bucket_name, validate = False)
     self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name,
                                               validate = False)
     mkdirp(self.data_folder)
     self.cache_folder = os.path.join(self.work_folder, "cache")
     mkdirp(self.cache_folder)
     self.files_missing_path = os.path.join(self.work_folder, 'FILES_MISSING')
     self.files_processed_path = os.path.join(self.work_folder, 'FILES_PROCESSED')
     self.get_file('FILES_PROCESSED', self.files_processed_path)
     self.get_file('FILES_MISSING', self.files_missing_path)
Exemple #45
0
def mode_query(session_dir, args):
    # Verify volume status
    cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
    _, data, _ = execute(cmd,
                         exit_msg="Failed to Run Gluster Volume Info",
                         logger=logger)
    try:
        tree = etree.fromstring(data)
        statusStr = tree.find('volInfo/volumes/volume/statusStr').text
    except (ParseError, AttributeError) as e:
        fail("Invalid Volume: %s" % e, logger=logger)

    if statusStr != "Started":
        fail("Volume %s is not online" % args.volume, logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    # Configure cluster for pasword-less SSH
    ssh_setup(args)

    # Enable volume options for changelog capture
    enable_volume_options(args)

    # Start query command processing
    if args.since_time:
        start = args.since_time
        logger.debug("Query is called - Session: %s, Volume: %s, "
                     "Start time: %s" % ("default", args.volume, start))

        run_cmd_nodes("query", args, start=start)

        # Merger
        # Read each Changelogs db and generate finaldb
        create_file(args.outfile, exit_on_err=True, logger=logger)
        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
        write_output(args, outfilemerger)

        try:
            os.remove(args.outfile + ".db")
        except (IOError, OSError):
            pass

        run_cmd_nodes("cleanup", args)

        sys.stdout.write("Generated output file %s\n" % args.outfile)
    else:
        fail("Please specify --since-time option")
Exemple #46
0
def mode_query(session_dir, args):
    # Verify volume status
    cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
    _, data, _ = execute(cmd,
                         exit_msg="Failed to Run Gluster Volume Info",
                         logger=logger)
    try:
        tree = etree.fromstring(data)
        statusStr = tree.find('volInfo/volumes/volume/statusStr').text
    except (ParseError, AttributeError) as e:
        fail("Invalid Volume: %s" % e, logger=logger)

    if statusStr != "Started":
        fail("Volume %s is not online" % args.volume, logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    # Configure cluster for pasword-less SSH
    ssh_setup(args)

    # Enable volume options for changelog capture
    enable_volume_options(args)

    # Start query command processing
    if args.since_time:
        start = args.since_time
        logger.debug("Query is called - Session: %s, Volume: %s, "
                     "Start time: %s"
                     % ("default", args.volume, start))

        run_cmd_nodes("query", args, start=start)

        # Merger
        # Read each Changelogs db and generate finaldb
        create_file(args.outfile, exit_on_err=True, logger=logger)
        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
        write_output(args, outfilemerger)

        try:
            os.remove(args.outfile + ".db")
        except (IOError, OSError):
            pass

        run_cmd_nodes("cleanup", args)

        sys.stdout.write("Generated output file %s\n" % args.outfile)
    else:
        fail("Please specify --since-time option")
Exemple #47
0
# -*- coding: utf-8 -*-

import os
import matplotlib.pyplot as plt

from framework.mongo import database
from website import settings

from utils import plot_dates, mkdirp


node_collection = database["node"]

FIG_PATH = os.path.join(settings.ANALYTICS_PATH, "figs", "features")
mkdirp(FIG_PATH)


def main():
    dates = [
        record["date_created"]
        for record in node_collection.find({"is_folder": True, "is_dashboard": {"$ne": True}}, {"date_created": True})
    ]
    plot_dates(dates)
    plt.title("folders ({0} total)".format(len(dates)))
    plt.savefig(os.path.join(FIG_PATH, "folder-actions.png"))
    plt.close()


if __name__ == "__main__":
    main()
Exemple #48
0
                        action="store_true")
    parser.add_argument("--output-prefix",
                        help="File prefix in output",
                        default=".")
    parser.add_argument("--type", default="both")
    parser.add_argument("-N",
                        "--only-namespace-changes",
                        help="List only namespace changes",
                        action="store_true")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume,
                            "changelog.log")
    setup_logger(logger, log_file, args.debug)

    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))
    status_file_pre = status_file + ".pre"
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)

    end = -1
    if args.only_query:
        start = args.start
Exemple #49
0
def mode_query(session_dir, args):
    global gtmpfilename

    # Verify volume status
    cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
    _, data, _ = execute(cmd,
                         exit_msg="Failed to Run Gluster Volume Info",
                         logger=logger)
    try:
        tree = etree.fromstring(data)
        statusStr = tree.find('volInfo/volumes/volume/statusStr').text
    except (ParseError, AttributeError) as e:
        fail("Invalid Volume: %s" % e, logger=logger)

    if statusStr != "Started":
        fail("Volume %s is not online" % args.volume, logger=logger)

    mkdirp(session_dir, exit_on_err=True, logger=logger)
    mkdirp(os.path.join(session_dir, args.volume),
           exit_on_err=True,
           logger=logger)
    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    # Configure cluster for pasword-less SSH
    ssh_setup(args)

    # Enable volume options for changelog capture
    enable_volume_options(args)

    # Test options
    if not args.since_time and not args.end_time and not args.full:
        fail(
            "Please specify either {--since-time and optionally --end-time} "
            "or --full",
            logger=logger)

    if args.since_time and args.end_time and args.full:
        fail(
            "Please specify either {--since-time and optionally --end-time} "
            "or --full, but not both",
            logger=logger)

    if args.end_time and not args.since_time:
        fail("Please specify --since-time as well", logger=logger)

    # Start query command processing
    start = -1
    end = -1
    if args.since_time:
        start = args.since_time
        if args.end_time:
            end = args.end_time
    else:
        start = 0  # --full option is handled separately

    logger.debug("Query is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s" %
                 ("default", args.volume, start, end))

    prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
    gtmpfilename = prefix + next(tempfile._get_candidate_names())

    run_cmd_nodes("query",
                  args,
                  start=start,
                  end=end,
                  tmpfilename=gtmpfilename)

    # Merger
    if args.full:
        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
        execute(cmd,
                exit_msg="Failed to merge output files "
                "collected from nodes",
                logger=logger)
    else:
        # Read each Changelogs db and generate finaldb
        create_file(args.outfile, exit_on_err=True, logger=logger)
        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
        write_output(args.outfile, outfilemerger, args.field_separator)

    try:
        os.remove(args.outfile + ".db")
    except (IOError, OSError):
        pass

    run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)

    sys.stdout.write("Generated output file %s\n" % args.outfile)
Exemple #50
0
def run_cmd_nodes(task, args, **kwargs):
    global node_outfiles
    nodes = get_nodes(args.volume)
    pool = []
    for num, node in enumerate(nodes):
        host, brick = node[1].split(":")
        host_uuid = node[0]
        cmd = []
        opts = {}

        # tmpfilename is valid only for tasks: pre, query and cleanup
        tmpfilename = kwargs.get("tmpfilename", "BADNAME")

        node_outfile = os.path.join(conf.get_opt("working_dir"), args.session,
                                    args.volume, tmpfilename,
                                    "tmp_output_%s" % num)

        if task == "pre":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # If Full backup is requested or start time is zero, use brickfind
            change_detector = conf.get_change_detector("changelog")
            tag = None
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True,
                   logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"] if args.only_namespace_changes
                 else []) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "query":
            # If Full backup is requested or start time is zero, use brickfind
            tag = None
            change_detector = conf.get_change_detector("changelog")
            if args.full:
                change_detector = conf.get_change_detector("brickfind")
                tag = args.tag_for_full_find.strip()
                if tag == "":
                    tag = '""' if not is_host_local(host_uuid) else ""

            node_outfiles.append(node_outfile)
            # remote file will be copied into this directory
            mkdirp(os.path.dirname(node_outfile),
                   exit_on_err=True,
                   logger=logger)

            FS = args.field_separator
            if not is_host_local(host_uuid):
                FS = "'" + FS + "'"

            cmd = [change_detector,
                   args.session,
                   args.volume,
                   host,
                   brick,
                   node_outfile] + \
                ([str(kwargs.get("start")), str(kwargs.get("end"))]
                    if not args.full else []) + \
                ([tag] if tag is not None else []) + \
                ["--only-query"] + \
                ["--output-prefix", args.output_prefix] + \
                (["--debug"] if args.debug else []) + \
                (["--no-encode"] if args.no_encode else []) + \
                (["--only-namespace-changes"]
                    if args.only_namespace_changes else []) + \
                (["--field-separator", FS] if args.full else [])

            opts["node_outfile"] = node_outfile
            opts["copy_outfile"] = True
        elif task == "cleanup":
            # After pre/query run, cleanup the working directory and other
            # temp files. Remove the directory to which node_outfile has
            # been copied in main node
            try:
                os.remove(node_outfile)
            except (OSError, IOError):
                logger.warn("Failed to cleanup temporary file %s" %
                            node_outfile)
                pass

            cmd = [conf.get_opt("nodeagent"),
                   "cleanup",
                   args.session,
                   args.volume,
                   os.path.dirname(node_outfile)] + \
                (["--debug"] if args.debug else [])
        elif task == "create":
            if vol_statusStr != "Started":
                fail("Volume %s is not online" % args.volume, logger=logger)

            # When glusterfind create, create session directory in
            # each brick nodes
            cmd = [conf.get_opt("nodeagent"),
                   "create",
                   args.session,
                   args.volume,
                   brick,
                   kwargs.get("time_to_update")] + \
                (["--debug"] if args.debug else []) + \
                (["--reset-session-time"] if args.reset_session_time
                 else [])
        elif task == "post":
            # Rename pre status file to actual status file in each node
            cmd = [conf.get_opt("nodeagent"),
                   "post",
                   args.session,
                   args.volume,
                   brick] + \
                (["--debug"] if args.debug else [])
        elif task == "delete":
            # When glusterfind delete, cleanup all the session files/dirs
            # from each node.
            cmd = [conf.get_opt("nodeagent"),
                   "delete",
                   args.session,
                   args.volume] + \
                (["--debug"] if args.debug else [])

        if cmd:
            p = Process(target=node_cmd,
                        args=(host, host_uuid, task, cmd, args, opts))
            p.start()
            pool.append(p)

    for num, p in enumerate(pool):
        p.join()
        if p.exitcode != 0:
            logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
            if task in ["create", "delete"]:
                fail("Command %s failed in %s" % (task, nodes[num][1]))
            elif task == "pre" and args.disable_partial:
                sys.exit(1)
Exemple #51
0
def run_build(build, store):
    """
    Run any build specified in the database to be build.
    
    This is the main entry.
    """

    # Run scripts in the buildset in the context of the build settings
    dlog("Starting build of buildset " + build.buildset.name)

    # Make sure that the build dir is present
    co_dir = os.path.expanduser(OPTIONS.build_area)
    mkdirp(co_dir)

    build.work_dir = unicode(co_dir)
    build.start_time = datetime.utcnow()

    store = Store.of(build)
    store.commit()
 
    # Run all scripts in this buildset
    index = 0

    # Run script
    logger = log_stdout()

    bs = store.find(db.build_script_status,  
                    db.build_script_status.build_id == build.id).order_by(Asc(db.build_script_status.idx)) 
    build_script_statuses = map(lambda a: a, bs)
    store.commit()

    for build_status in build_script_statuses:

        build_status.start_time = datetime.utcnow()
        store.commit()

        dlog("------------------------------------------------------------------")

        if OPTIONS.stop_index <= index:
            dlog("Skipping rest of scripts in buildset as requested by set option", build_status)
            break

        if OPTIONS.start_index > index:
            dlog("Skipping by until index " + str(OPTIONS.start_index) + " (current step " + str(index) + ")",
                 build_status)
            index += 1
            continue

        index += 1
        def do_log(msg):
            logger(msg)
            build_status.log += unicode(msg)
        ss = build_status.buildset_script

        if ss.script:
            scriptpath = build_status.buildset_script.script.path
            build_status.exit_code = run_script(do_log, scriptpath)
            #    run_script(do_log, spath + os.sep + build_status.buildset_script.script.path)

        build_status.end_time = datetime.utcnow()
        if not build_status.exit_code:
            build_status.buildset_script.last_duration = (build_status.end_time - 
                                                          build_status.start_time).seconds

    build.end_time = datetime.utcnow()
    store.commit()

    #buildset.updateMetadata(timings=timings)

    #timings[None] = (buildset.name, end_time - start_time)
    
    dlog("Build completed")
Exemple #52
0
# -*- coding: utf-8 -*-

import os
import matplotlib.pyplot as plt

from framework.mongo import database
from website import settings

from utils import plot_dates, mkdirp


comment_collection = database['comment']

FIG_PATH = os.path.join(settings.ANALYTICS_PATH, 'figs', 'features')
mkdirp(FIG_PATH)


def main():
    dates = [
        record['date_created']
        for record in comment_collection.find({}, {'date_created': True})
    ]
    plot_dates(dates)
    plt.title('comments ({0} total)'.format(len(dates)))
    plt.savefig(os.path.join(FIG_PATH, 'comment-actions.png'))
    plt.close()


if __name__ == '__main__':
    main()
Exemple #53
0
def startMosaic(opts):
    killMosaic(opts.print_only)
    build.build(False, False)

    meta_dirs = []
    tile_dirs = []
    global_dir = conf.getGlobalsDir(opts.dataset)

    # set up fault-tolerance dir if required
    fault_tolerance_dir = conf.getFaultToleranceDir(opts.dataset)
    if opts.fault_tolerant_mode:
        shutil.rmtree(fault_tolerance_dir, True)
        utils.mkdirp(fault_tolerance_dir, conf.FILE_GROUP)

    perf_events_dir = conf.getPerfEventsDir(opts.dataset)
    if opts.enable_perf_event_collection:
        utils.mkdirp(perf_events_dir, conf.FILE_GROUP)

    for i in range(0, int(opts.nmic)):
        meta_dirs.append(
            conf.getMicSubdir(conf.SG_DATAPATH_VERTEX_ENGINE[i], opts.dataset,
                              "meta", i))
        tile_dirs.append(
            conf.getMicSubdir(conf.SG_DATAPATH_VERTEX_ENGINE[i], opts.dataset,
                              "tile", i))

    in_memory_mode_int = 1 if opts.in_memory_mode else 0

    meta_dirs_string = ":".join(meta_dirs)
    tile_dirs_string = ":".join(tile_dirs)

    enable_tile_partitioning_int = 1 if opts.enable_tile_partitioning else 0
    enable_fault_tolerance_int = 1 if opts.fault_tolerant_mode else 0
    enable_perf_event_collection_int = 1 if opts.enable_perf_event_collection else 0

    # for selective scheduling
    use_selective_scheduling_int = 1 if \
        conf.SG_ALGORITHM_ENABLE_SELECTIVE_SCHEDULING[opts.algorithm] else 0
    if opts.dataset in conf.SG_DATASET_DISABLE_SELECTIVE_SCHEDULING:
        use_selective_scheduling_int = 0

    # For pinning, count threads and determine if we need to use smt or not.
    count_tile_readers = conf.SG_NREADER
    count_tile_processors = conf.SG_NPROCESSOR

    edge_engine_per_socket = opts.nmic / topo.NUM_SOCKET

    count_threads_per_edge_engine = opts.count_indexreader + opts.count_vertex_fetcher + opts.count_vertex_reducer + count_tile_readers + count_tile_processors
    count_threads_per_socket = count_threads_per_edge_engine * edge_engine_per_socket + opts.count_globalreducer / topo.NUM_SOCKET

    use_smt_int = 1 if count_threads_per_socket >= topo.NUM_PHYSICAL_CPU_PER_SOCKET else 0

    # Set the size of the read tiles rb to the in memory value iff not running on
    # the mic and the in memory mode is activated.
    read_tiles_rb_size = conf.SG_RB_SIZE_READ_TILES
    if opts.in_memory_mode:
        read_tiles_rb_size = conf.SG_RB_SIZE_READ_TILES_IN_MEMORY

    args = [
        "--algorithm",
        opts.algorithm,
        "--max-iterations",
        opts.max_iterations,
        "--nmic",
        opts.nmic,
        "--count-applier",
        opts.count_applier,
        "--count-globalreducer",
        opts.count_globalreducer,
        "--count-globalfetcher",
        opts.count_globalfetcher,
        "--count-indexreader",
        opts.count_indexreader,
        "--count-vertex-reducer",
        opts.count_vertex_reducer,
        "--count-vertex-fetcher",
        opts.count_vertex_fetcher,
        "--in-memory-mode",
        in_memory_mode_int,
        "--paths-meta",
        meta_dirs_string,
        "--paths-tile",
        tile_dirs_string,
        "--path-globals",
        global_dir,
        "--use-selective-scheduling",
        use_selective_scheduling_int,
        "--path-fault-tolerance-output",
        fault_tolerance_dir,
        "--enable-fault-tolerance",
        enable_fault_tolerance_int,
        "--enable-tile-partitioning",
        enable_tile_partitioning_int,
        "--count-tile-reader",
        count_tile_readers,
        "--local-fetcher-mode",
        opts.local_fetcher_mode,
        "--global-fetcher-mode",
        opts.global_fetcher_mode,
        "--enable-perf-event-collection",
        enable_perf_event_collection_int,
        "--path-perf-events",
        perf_events_dir,
        "--count-tile-processors",
        count_tile_processors,
        "--use-smt",
        use_smt_int,
        "--host-tiles-rb-size",
        conf.SG_RB_SIZE_HOST_TILES,
        "--local-reducer-mode",
        opts.local_reducer_mode,
        "--processed-rb-size",
        conf.SG_RB_SIZE_PROCESSED,
        "--read-tiles-rb-size",
        read_tiles_rb_size,
        "--tile-processor-mode",
        opts.tile_processor_mode,
        "--tile-processor-input-mode",
        opts.tile_processor_input_mode,
        "--tile-processor-output-mode",
        opts.tile_processor_output_mode,
        "--count-followers",
        opts.count_followers,
    ]

    if opts.enable_log:
        log_dir = os.path.join(conf.LOG_ROOT, (conf.getWeightedName(
            opts.dataset, conf.SG_ALGORITHM_WEIGHTED[opts.algorithm])))
        utils.mkdirp(log_dir, conf.FILE_GROUP)
        args = args + ["--log", log_dir]

    if opts.debug:
        b = conf.DBIN_MOSAIC
    else:
        b = conf.RBIN_MOSAIC

    # We need sudo for scif
    args = [b] + args

    if opts.gdb:
        args = ["gdb", "--args"] + args

    # We need sudo for scif
    # args = ["sudo", "LD_LIBRARY_PATH=/usr/lib64/:$LD_LIBRARY_PATH"] + args
    # args = ["sudo", "valgrind"] + args
    if opts.run == "perfstat":
        args = [
            "perf", "stat", "-B", "-e",
            "cache-references,cache-misses,cycles,instructions,branches,faults,migrations"
        ] + args
    if opts.run == "likwid":
        max_cpu_id = multiprocessing.cpu_count() - 1
        args = [
            "likwid-perfctr", "-f", "-g", "NUMA", "-g", "L2", "-g", "L2CACHE",
            "-g", "BRANCH", "-g", "CYCLE_ACTIVITY", "-g", "L3", "-g",
            "L3CACHE", "-c",
            "0-%d" % max_cpu_id
        ] + args

    args = ["sudo"] + args

    if not opts.print_only:
        if opts.gdb:
            utils.run(opts.print_only, *args)
        else:
            out_file = utils.getVertexEngineLogName(opts)
            utils.run_output(opts.print_only, out_file, *args)
                # Generating the stp
                np.random.seed(i * 100 + xi + occurr)
                stp = generate_stp(occurr, xi, 1 * pq.s,
                                   np.arange(5, 5 * (xi), 5) * pq.ms)
                # Merging the stp in the first xi sts
                sts_pool = [0] * xi
                for st_id, st in enumerate(stp):
                    sts_pool[st_id] = stg._pool_two_spiketrains(st, sts[st_id])
                # Storing datasets containg stps
                if i == 0:
                    sts_rep['sts_%iocc_%ixi' %
                            (occurr, xi)] = [sts_pool + sts[xi:]]
                    sts_rep['stp_%iocc_%ixi' % (occurr, xi)] = [stp]
                else:
                    sts_rep['sts_%iocc_%ixi' % (occurr, xi)].append(sts_pool +
                                                                    sts[xi:])
                    sts_rep['stp_%iocc_%ixi' % (occurr, xi)].append(stp)
            sts_rep['params_background'] = params_background

    # Saving the datasets
    filepath = '../data/'
    path_temp = './'
    for folder in split_path(filepath):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)

    filename = 'stp_data%i' % (data_idx)
    np.save(filepath + filename, sts_rep)

print((time.time() - t0))
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder,
                  region, aws_cred, nb_workers):
    # Find input files
    input_files = []
    for path, folders, files in os.walk(input_folder):
        for f in files:
            # Get channel version
            cv = os.path.relpath(os.path.join(path, f), input_folder)
            input_files.append(cv)

    # Sanitize prefix
    if prefix[-1] != '/':
        prefix += '/'

    # Connect to s3
    s3 = s3_connect(region, **aws_cred)
    s3_bucket = s3.get_bucket(bucket, validate=False)

    # Download versions.json if not in cache and load versions
    versions_json = os.path.join(cache_folder, 'versions.json')
    if not os.path.isfile(versions_json):
        versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {})
        with open(versions_json, 'w') as f:
            json.dump(versions, f)
    else:
        with open(versions_json, 'r') as f:
            versions = json.load(f)

    # Update results in bucket
    for channel_version in input_files:
        print "### Updating: " + channel_version

        # Download all files for channel_version to disk
        rmtree(work_folder, ignore_errors=True)
        data_folder = os.path.join(work_folder, channel_version)
        mkdirp(data_folder)
        snapshot = versions.get(channel_version, None)
        if snapshot:
            fetched = False
            while not fetched:
                fetched = s3get(bucket, prefix + snapshot, data_folder, True,
                                False, region, aws_cred)
                if not fetched:
                    print >> sys.stderr, "Failed to download %s" % snapshot
                    sleep(5 * 60)
            print " - downloaded " + snapshot

        # Create ChannelVersionManager
        channel, version = channel_version.split('/')
        manager = ChannelVersionManager(work_folder, channel, version, False,
                                        False, False)

        # Feed it with rows from input_file
        rows = 0
        with open(os.path.join(input_folder, channel_version), 'r') as f:
            for line in f:
                try:
                    filePath, blob = line.split('\t')
                    channel_, version_, measure, byDateType = filePath.split(
                        '/')
                    blob = json.loads(blob)
                    if channel_ != channel or version_ != version:
                        print >> sys.stderr, (
                            "Error: Found %s/%s within a %s file!" %
                            (channel_, version_, channel_version))
                        continue
                    manager.merge_in_blob(measure, byDateType, blob)
                    rows += 1
                except:
                    print >> sys.stderr, "Error while handling row:"
                    print_exc(file=sys.stderr)
        manager.flush()

        print " - merged rows %i" % rows

        # Upload updated files to S3
        date = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        cv_prefix = "%s-%s-%s" % (date, version, channel)
        uploaded = False
        while not uploaded:
            uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False,
                             True, region, aws_cred, nb_workers)
            if not uploaded:
                print >> sys.stderr, "Failed to upload '%s'" % cv_prefix
                sleep(5 * 60)

        print " - uploaded to " + cv_prefix

        # Store changes in versions
        versions[channel_version] = cv_prefix

    # Upload new versions.json and write to cache
    s3put_json(s3_bucket, prefix + 'versions.json', True, versions)
    with open(versions_json, 'w') as f:
        json.dump(versions, f)

    print "### New snapshot uploaded"

    try:
        # Garbage collect old channel/version folders on S3
        collect_garbage(bucket, prefix, cache_folder, region, aws_cred,
                        nb_workers)
    except:
        print >> sys.stderr, "Failed to collect garbage on S3"
Exemple #56
0
    parser.add_argument("outfile", help="Output File")
    parser.add_argument("start", help="Start Time", type=float)
    parser.add_argument("--debug", help="Debug", action="store_true")
    parser.add_argument("--output-prefix", help="File prefix in output",
                        default=".")

    return parser.parse_args()


if __name__ == "__main__":
    args = _get_args()
    session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
    status_file = os.path.join(session_dir, args.volume,
                               "%s.status" % urllib.quote_plus(args.brick))
    status_file_pre = status_file + ".pre"
    mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
           logger=logger)
    mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
           exit_on_err=True)
    log_file = os.path.join(conf.get_opt("log_dir"),
                            args.session,
                            args.volume,
                            "brickfind.log")
    setup_logger(logger, log_file, args.debug)

    time_to_update = int(time.time())
    brickfind_crawl(args.brick, args)
    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(time_to_update))
    sys.exit(0)
Exemple #57
0
def mode_pre(session_dir, args):
    """
    Read from Session file and write to session.pre file
    """
    endtime_to_update = int(time.time()) - get_changelog_rollover_time(
        args.volume)
    status_file = os.path.join(session_dir, args.volume, "status")
    status_file_pre = status_file + ".pre"

    mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)

    # If Pre status file exists and running pre command again
    if os.path.exists(status_file_pre) and not args.regenerate_outfile:
        fail("Post command is not run after last pre, "
             "use --regenerate-outfile")

    start = 0
    try:
        with open(status_file) as f:
            start = int(f.read().strip())
    except ValueError:
        pass
    except (OSError, IOError) as e:
        fail("Error Opening Session file %s: %s"
             % (status_file, e), logger=logger)

    logger.debug("Pre is called - Session: %s, Volume: %s, "
                 "Start time: %s, End time: %s"
                 % (args.session, args.volume, start, endtime_to_update))

    run_cmd_nodes("pre", args, start=start)

    # Merger
    if args.full:
        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
        execute(cmd,
                exit_msg="Failed to merge output files "
                "collected from nodes", logger=logger)
    else:
        # Read each Changelogs db and generate finaldb
        create_file(args.outfile, exit_on_err=True, logger=logger)
        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)

        with open(args.outfile, "a") as f:
            for row in outfilemerger.get():
                # Multiple paths in case of Hardlinks
                paths = row[1].split(",")
                for p in paths:
                    if p == "":
                        continue
                    f.write("%s %s %s\n" % (row[0], p, row[2]))

    try:
        os.remove(args.outfile + ".db")
    except (IOError, OSError):
        pass

    run_cmd_nodes("cleanup", args)

    with open(status_file_pre, "w", buffering=0) as f:
        f.write(str(endtime_to_update))

    sys.stdout.write("Generated output file %s\n" % args.outfile)
def combine_hypotest_files(inpaths, outpath):

    regions = args.region.split(',')

    #create new files (best expected)
    mkdirp(outpath)

    # Combine text files
    # description = "expectedUpperLimitMinus1Sig/F:upperLimitEstimatedError/F:fitstatus/F:p0d2s/F:p0u2s/F:seed/F:CLsexp/F:sigma1/F:failedfit/F:expectedUpperLimitPlus2Sig/F:nofit/F:nexp/F:sigma0/F:clsd2s/F:m3/F:expectedUpperLimit/F:failedstatus/F:xsec/F:covqual/F:upperLimit/F:p0d1s/F:clsd1s/F:failedp0/F:failedcov/F:p0exp/F:p1/F:p0u1s/F:excludedXsec/F:p0/F:clsu1s/F:clsu2s/F:expectedUpperLimitMinus2Sig/F:expectedUpperLimitPlus1Sig/F:mu/F:mode/F:fID/C:dodgycov/F:CLs/F"

    new_lines_nom = dict()
    new_lines_dn  = dict()
    new_lines_up  = dict()

    cls_dict = dict()

    for i, path in enumerate(inpaths):

        region = regions[i]

        lines_nom = open(path+'/Output_hypotest__1_harvest_list').read().split('\n')
        if sig_xs_syst:
            lines_nom = open(path+'/Output_fixSigXSecNominal_hypotest__1_harvest_list').read().split('\n')
            lines_dn = open(path+'/Output_fixSigXSecDown_hypotest__1_harvest_list').read().split('\n')
            lines_up = open(path+'/Output_fixSigXSecUp_hypotest__1_harvest_list').read().split('\n')

        for jline, line in enumerate(lines_nom):

            if not line:
                continue

            vals = line.split()

            m3 = int(float(vals[14]))
            mu = int(float(vals[33]))
            #cls = float(vals[-1]) # observed CLs
            cls = float(vals[6]) # expected CLs

            if (m3, mu) not in cls_dict:
                cls_dict[(m3, mu)] = cls

                new_lines_nom[(m3, mu)] = line
                if sig_xs_syst:
                    new_lines_dn[(m3, mu)] = lines_dn[jline]
                    new_lines_up[(m3, mu)] = lines_up[jline]

                print 'copying list for (%i, %i) %s CLs = %.3f' % (m3, mu, region, cls)
                    
            else:

                if cls < cls_dict[(m3, mu)]:

                    print 'changing list for (%i, %i) %s CLs = %.3f (old = %.3f)' % (m3, mu, region, cls, cls_dict[(m3, mu)]) 
                    
                    cls_dict[(m3, mu)] = cls

                    new_lines_nom[(m3, mu)] = line
                    if sig_xs_syst:
                        new_lines_dn[(m3, mu)] = lines_dn[jline]
                        new_lines_up[(m3, mu)] = lines_up[jline]



    # Save new list
    if sig_xs_syst:
        with open(outpath+'/Output_fixSigXSecNominal_hypotest__1_harvest_list', 'w') as f:
            for line in new_lines_nom.itervalues():
                f.write(line+'\n')

        with open(outpath+'/Output_fixSigXSecDown_hypotest__1_harvest_list', 'w') as f:
            for line in new_lines_dn.itervalues():
                f.write(line+'\n')

        with open(outpath+'/Output_fixSigXSecUp_hypotest__1_harvest_list', 'w') as f:
            for line in new_lines_up.itervalues():
                f.write(line+'\n')
    else:
        with open(outpath+'/Output_hypotest__1_harvest_list', 'w') as f:
            for line in new_lines_nom.itervalues():
                f.write(line+'\n')
def generate_graph_in_memory(opts):
    def run(args):
        utils.run(opts.print_only, "time", *args)

    # make sure proper config is loaded
    run(["sudo", "./startup-config"])

    if opts.debug:
        grc_in_memory = conf.DBIN_GRC_IN_MEMORY
    else:
        grc_in_memory = conf.RBIN_GRC_IN_MEMORY

    input_weighted = 0
    if conf.SG_INPUT_WEIGHTED.get(opts.dataset, False):
        input_weighted = 1

    # populate hashed dirs
    num_dir = conf.SG_NUM_HASH_DIRS

    meta_dirs = []
    tile_dirs = []
    global_dir = conf.getGlobalsDir(opts.dataset, opts.weighted_output)

    utils.mkdirp(global_dir, conf.FILE_GROUP)

    if (opts.weighted_output):
        unweighted_stat = os.path.join(conf.getGlobalsDir(opts.dataset, False),
                                       "stat.dat")
        weighted_stat = os.path.join(conf.getGlobalsDir(opts.dataset, True),
                                     "stat.dat")
        shutil.copyfile(unweighted_stat, weighted_stat)

        unweighted_deg = os.path.join(conf.getGlobalsDir(opts.dataset, False),
                                      "vertex_deg.dat")
        weighted_deg = os.path.join(conf.getGlobalsDir(opts.dataset, True),
                                    "vertex_deg.dat")
        shutil.copyfile(unweighted_deg, weighted_deg)

        unweighted_global_to_orig = os.path.join(
            conf.getGlobalsDir(opts.dataset, False),
            "vertex_global_to_orig.dat")
        weighted_global_to_orig = os.path.join(
            conf.getGlobalsDir(opts.dataset, True),
            "vertex_global_to_orig.dat")
        shutil.copyfile(unweighted_global_to_orig, weighted_global_to_orig)

    for i in range(0, len(conf.SG_GRC_OUTPUT_DIRS)):
        meta_dir = conf.getGrcMetaDir(opts.dataset, i, opts.weighted_output)
        tile_dir = conf.getGrcTileDir(opts.dataset, i, opts.weighted_output)

        shutil.rmtree(meta_dir, True)
        shutil.rmtree(tile_dir, True)

        utils.mkdirp(meta_dir, conf.FILE_GROUP)
        utils.mkdirp(tile_dir, conf.FILE_GROUP)

        utils.populate_hash_dirs(num_dir, meta_dir)
        utils.populate_hash_dirs(num_dir, tile_dir)

        meta_dirs.append(meta_dir)
        tile_dirs.append(tile_dir)

    output_weighted = 0
    if opts.weighted_output:
        output_weighted = 1

    use_rle_int = 0
    if opts.use_rle:
        use_rle_int = 1

    generator = ""
    delimiter = ""
    count_vertices = 0
    count_edges = 0
    use_original_ids = 0

    input_file = ""
    if opts.rmat:
        generator = "rmat"
        count_vertices = conf.SG_GRAPH_SETTINGS_RMAT[
            opts.dataset]["count_vertices"]
        count_edges = conf.SG_GRAPH_SETTINGS_RMAT[opts.dataset]["count_edges"]
        use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_RMAT[
            opts.dataset]["use_original_ids"] else 0
    elif opts.binary:
        generator = "binary"
        input_file = conf.SG_INPUT_FILE[opts.dataset]["binary"]
        count_vertices = conf.SG_GRAPH_SETTINGS_DELIM[
            opts.dataset]["count_vertices"]
        use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_DELIM[
            opts.dataset]["use_original_ids"] else 0
    else:
        generator = "delim_edges"
        input_file = conf.SG_INPUT_FILE[opts.dataset]["delim"]
        count_vertices = conf.SG_GRAPH_SETTINGS_DELIM[
            opts.dataset]["count_vertices"]
        delimiter = conf.SG_GRAPH_SETTINGS_DELIM[opts.dataset]["delimiter"]
        use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_DELIM[
            opts.dataset]["use_original_ids"] else 0

    if not opts.rmat:
        if not os.path.exists(input_file):
            print("Failed to find %s" % input_file)
            exit(1)
    args = [
        grc_in_memory,
        "--source",
        input_file,
        "--count-vertices",
        count_vertices,
        "--generator",
        generator,
        "--graphname",
        opts.dataset,
        "--path-globals",
        global_dir,
        "--paths-meta",
        ":".join(meta_dirs),
        "--paths-tile",
        ":".join(tile_dirs),
        "--nthreads",
        conf.SG_GRC_NTHREADS_PARTITIONER,
        "--npartition-managers",
        conf.SG_GRC_NPARTITION_MANAGERS,
        "--input-weighted",
        input_weighted,
        "--output-weighted",
        output_weighted,
        "--rmat-count-edges",
        count_edges,
        "--use-run-length-encoding",
        use_rle_int,
        "--use-original-ids",
        use_original_ids,
        "--traversal",
        opts.traversal,
        "--delimiter",
        delimiter,
    ]
    if opts.gdb:
        args = ["gdb", "--args"] + args
    run(args)