def mode_create(session_dir, args): validate_session_name(args.session) logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup(args) enable_volume_options(args) # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + get_changelog_rollover_time( args.volume) run_cmd_nodes("create", args, time_to_update=str(time_to_update)) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w") as f: f.write(str(time_to_update)) sys.stdout.write("Session %s created with volume %s\n" % (args.session, args.volume)) sys.exit(0)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix, encode=True) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result, is_dir): path = path.strip() path = path[brick_path_len+1:] if args.type == "both": output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) else: if (is_dir and args.type == "d") or ( (not is_dir) and args.type == "f"): output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def main(): args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and args.mode not in ["create", "list"]: fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in ["create", "list"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args)
def changelog_crawl(brick, end, args): """ Init function, prepares working dir and calls Changelog query """ if brick.endswith("/"): brick = brick[0:len(brick)-1] # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) brickhash = hashlib.sha1(brick) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) create_file(args.outfile + ".gfids", exit_on_err=True, logger=logger) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.%s.log" % brickhash) logger.info("%s Started Changelog Crawl. Start: %s, End: %s" % (brick, args.start, end)) get_changes(brick, working_dir, log_file, end, args)
def cook(path, caller_cwd): def delete_if_exists(path): if os.path.isfile(path): os.remove(path) local_cwd = os.getcwd() # Check if `path` is an absolute path to the recipe if os.path.isabs(path): recipe_path = os.path.realpath(path) recipe_basename = os.path.basename(recipe_path) mkdirp('.recipes') delete_if_exists(os.path.join(local_cwd, '.recipes', recipe_basename)) shutil.copyfile(recipe_path, os.path.join(local_cwd, '.recipes', recipe_basename)) recipe_path = os.path.join('/vagrant', '.recipes', recipe_basename) # Check if `path` is a relative path to the recipe (from the caller's perspective) elif os.path.isfile(os.path.realpath(os.path.join(caller_cwd, path))): recipe_path = os.path.realpath(os.path.join(caller_cwd, path)) recipe_basename = os.path.basename(recipe_path) mkdirp('.recipes') delete_if_exists(os.path.join(local_cwd, '.recipes', recipe_basename)) shutil.copyfile(recipe_path, os.path.join(local_cwd, '.recipes', recipe_basename)) recipe_path = os.path.join('/vagrant', '.recipes', recipe_basename) # Check if `path + (.sh)` is a relative path to the recipe (from the dev-box's perspective) elif os.path.isfile(os.path.realpath(os.path.join(local_cwd, 'recipes', path + '.sh'))): recipe_path = os.path.realpath(os.path.join(local_cwd, 'recipes', path + '.sh')) recipe_basename = os.path.basename(recipe_path) recipe_path = os.path.join('/vagrant', 'recipes', recipe_basename) # Recipe file was not found else: print_error('Error: recipe was not found') return print_green('# DevBox is now cooking') return run('sh {0}'.format(recipe_path))
def mode_pre(session_dir, args): """ Read from Session file and write to session.pre file """ endtime_to_update = int(time.time()) - int( conf.get_opt("changelog_rollover_time")) status_file = os.path.join(session_dir, args.volume, "status") status_file_pre = status_file + ".pre" mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) start = 0 try: with open(status_file) as f: start = int(f.read().strip()) except ValueError: pass except (OSError, IOError) as e: fail("Error Opening Session file %s: %s" % (status_file, e), logger=logger) logger.debug("Pre is called - Session: %s, Volume: %s, " "Start time: %s, End time: %s" % (args.session, args.volume, start, endtime_to_update)) run_in_nodes(args.volume, start, args) with open(status_file_pre, "w", buffering=0) as f: f.write(str(endtime_to_update)) sys.stdout.write("Generated output file %s\n" % args.outfile)
def __init__(self, root_folder, channel, version, compress, pretty_print, cached = False): self.data_folder = os.path.join(root_folder, channel, version) mkdirp(self.data_folder) self.compress = compress self.pretty_print = pretty_print self.max_filter_id = None self.cached = cached if cached: self.cache = {} # Load filter-tree self.filter_tree = self.json_from_file( "filter-tree.json", {'_id': 0, 'name': 'reason'} ) # Load histogram definitions self.histograms = self.json_from_file("histograms.json", {}) # Load histogram revision meta-data self.revisions = self.json_from_file("revisions.json", {}) # Histograms.json cache self.histograms_json_cache = [(None, None)] * HGRAMS_JSON_CACHE_SIZE self.histograms_json_cache_next = 0
def mode_pre(session_dir, args): global gtmpfilename """ Read from Session file and write to session.pre file """ endtime_to_update = int(time.time()) - get_changelog_rollover_time( args.volume) status_file = os.path.join(session_dir, args.volume, "status") status_file_pre = status_file + ".pre" mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) # If Pre status file exists and running pre command again if os.path.exists(status_file_pre) and not args.regenerate_outfile: fail("Post command is not run after last pre, " "use --regenerate-outfile") start = 0 try: with open(status_file) as f: start = int(f.read().strip()) except ValueError: pass except (OSError, IOError) as e: fail("Error Opening Session file %s: %s" % (status_file, e), logger=logger) logger.debug("Pre is called - Session: %s, Volume: %s, " "Start time: %s, End time: %s" % (args.session, args.volume, start, endtime_to_update)) prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") gtmpfilename = prefix + next(tempfile._get_candidate_names()) run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename) # Merger if args.full: cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] execute(cmd, exit_msg="Failed to merge output files " "collected from nodes", logger=logger) else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) write_output(args.outfile, outfilemerger, args.field_separator) try: os.remove(args.outfile + ".db") except (IOError, OSError): pass run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) with open(status_file_pre, "w", buffering=0) as f: f.write(str(endtime_to_update)) sys.stdout.write("Generated output file %s\n" % args.outfile)
def main(): global gtmpfilename args = None try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) # force the default session name if mode is "query" if args.mode == "query": args.session = "default" if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and \ args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in \ ["create", "list", "query"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: if args is not None: if args.mode == "pre" or args.mode == "query": # cleanup session if gtmpfilename is not None: # no more interrupts until we clean up signal.signal(signal.SIGINT, signal.SIG_IGN) run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) # Interrupted, exit with non zero error code sys.exit(2)
def __init__(self, input_queue, output_queue, work_folder, aws_cred): super(DownloaderProcess, self).__init__() self.input_queue = input_queue self.output_queue = output_queue self.work_folder = work_folder mkdirp(self.work_folder) self.input_bucket = "telemetry-published-v2" self.aws_cred = aws_cred self.s3 = S3Connection(**self.aws_cred) self.bucket = self.s3.get_bucket(self.input_bucket, validate = False)
def mode_post(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file_pre = status_file + ".pre" if os.path.exists(status_file_pre): os.rename(status_file_pre, status_file) sys.exit(0)
def mode_create(session_dir, args): logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) execute(["gluster", "volume", "info", args.volume], exit_msg="Unable to get volume details", logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup(args) execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"], exit_msg="Failed to set volume option build-pgfid on", logger=logger) logger.info("Volume option set %s, build-pgfid on" % args.volume) execute(["gluster", "volume", "set", args.volume, "changelog.changelog", "on"], exit_msg="Failed to set volume option " "changelog.changelog on", logger=logger) logger.info("Volume option set %s, changelog.changelog on" % args.volume) execute(["gluster", "volume", "set", args.volume, "changelog.capture-del-path", "on"], exit_msg="Failed to set volume option " "changelog.capture-del-path on", logger=logger) logger.info("Volume option set %s, changelog.capture-del-path on" % args.volume) # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + get_changelog_rollover_time( args.volume) run_cmd_nodes("create", args, time_to_update=str(time_to_update)) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(str(time_to_update)) sys.exit(0)
def mode_create(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.parse.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(args.time_to_update) sys.exit(0)
def publish_results(self): # Create work folder for update process update_folder = os.path.join(self.work_folder, "update") shutil.rmtree(update_folder, ignore_errors = True) mkdirp(update_folder) # Update results updateresults(self.data_folder, update_folder, self.bucket_name, self.prefix, self.cache_folder, self.region, self.aws_cred, NB_WORKERS) self.put_file(self.files_processed_path, 'FILES_PROCESSED') self.put_file(self.files_missing_path, 'FILES_MISSING') # Clear data_folder shutil.rmtree(self.data_folder, ignore_errors = True) mkdirp(self.data_folder)
def mode_post(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join( session_dir, args.volume, "%s.status" % urllib.parse.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file_pre = status_file + ".pre" if os.path.exists(status_file_pre): os.rename(status_file_pre, status_file) sys.exit(0)
def mode_create(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(args.time_to_update) sys.exit(0)
def parse(self): if not self.initialized: self.initialize() opt = self.parser.parse_args() if opt.input_streams is None: if isinstance(self, TestOptions): opt.input_streams = [] else: raise ValueError("input_streams must be set") # if opt.debug: # opt.results_dir_base = opt.results_dir_base.split("/")[0] + "/debug_results" opt.no_core_driver = True opt.num_workers = 0 opt.results_dir = opt.results_dir_base + time.strftime( "_%Y_%m_%d_%H_%M_%S") self.opt = opt if isinstance(self, TestOptions): options = load_json( os.path.join("results", opt.model_dir, "opt.json")) for arg in options: if arg not in ["debug"]: setattr(opt, arg, options[arg]) opt.no_core_driver = True else: mkdirp(opt.results_dir) # save a copy of current code # code_dir = os.path.dirname(os.path.realpath(__file__)) # code_zip_filename = os.path.join(opt.results_dir, "code.zip") # make_zipfile(code_dir, code_zip_filename, # enclosing_dir="code", exclude_paths=["results"], exclude_extensions=[".pyc", ".ipynb"]) self.display_save() assert opt.num_hard <= opt.num_negatives opt.device = torch.device( "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu") if opt.device.type == "cuda": opt.bsz = opt.bsz * len(opt.device_ids) opt.test_bsz = opt.test_bsz * len(opt.device_ids) opt.h5driver = None if opt.no_core_driver else "core" opt.vfeat_flag = "vfeat" in opt.input_streams opt.vcpt_flag = "vcpt" in opt.input_streams opt.sub_flag = "sub" in opt.input_streams self.opt = opt return opt
def mode_cleanup(args): working_dir = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, args.tmpfilename) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file) try: shutil.rmtree(working_dir, onerror=handle_rm_error) except (OSError, IOError) as e: logger.error("Failed to delete working directory: %s" % e) sys.exit(1)
def check_coords(cubes, write_to='./.jazz/offending_cube'): """Check coordinates are matching. If they are not this could be quite a problem! However, some models' have files which read in with slightly different coordinates (CCSM4, for example). In this case the difference is miniscule so we can safely replace the coordinates. This method replaces coordinates but also informs the user it is doing this. It also prints and optionally saves the summary of the offending cube. Args: cubes (iris.cube.CubeList): list of cubes to check write_to (Optional[str]): path to which to write warnings """ # Remove attributes from auxiliary coordinates - these can sometimes # prevent merging and concatenation. for cube in cubes: for coord in cube.aux_coords: coord.attributes = {} # Get the names of the spatial coords coord_names = [coord.name() for coord in cubes[0].dim_coords] if 'time' in coord_names: coord_names.remove('time') for coord_name in coord_names: # Make a list of the coordinates' points for each cube points_list = [cube.coord(coord_name).points for cube in cubes] # Loop over the list of points for all the cubes for p in xrange(len(points_list) - 1): # If the coordinates are different from the first set, # replace them with the first set if not (points_list[p + 1] == points_list[0]).all(): cubes[p + 1].replace_coord(cubes[0].coord(coord_name)) # Notify user warnings.warn('Replacing the coordinates of a cube. ' 'Offending cube is {}'.format( cubes[p + 1].summary())) if write_to is not None: utils.mkdirp(write_to) utils.write_file( cubes[p + 1].summary(), '{0}_{1}_{2}'.format(write_to, coord_name, p))
def mode_create(session_dir, args): logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) execute(["gluster", "volume", "info", args.volume], exit_msg="Unable to get volume details", logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup(args) execute(["gluster", "volume", "set", args.volume, "build-pgfid", "on"], exit_msg="Failed to set volume option build-pgfid on", logger=logger) logger.info("Volume option set %s, build-pgfid on" % args.volume) execute([ "gluster", "volume", "set", args.volume, "changelog.changelog", "on" ], exit_msg="Failed to set volume option " "changelog.changelog on", logger=logger) logger.info("Volume option set %s, changelog.changelog on" % args.volume) # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + get_changelog_rollover_time( args.volume) run_cmd_nodes("create", args, time_to_update=str(time_to_update)) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(str(time_to_update)) sys.exit(0)
def mode_create(session_dir, args): logger.debug("Init is called - Session: %s, Volume: %s" % (args.session, args.volume)) cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] _, data, _ = execute(cmd, exit_msg="Failed to Run Gluster Volume Info", logger=logger) try: tree = etree.fromstring(data) statusStr = tree.find('volInfo/volumes/volume/statusStr').text except (ParseError, AttributeError) as e: fail("Invalid Volume: %s" % e, logger=logger) if statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) status_file = os.path.join(session_dir, args.volume, "status") if os.path.exists(status_file) and not args.force: fail("Session %s already created" % args.session, logger=logger) if not os.path.exists(status_file) or args.force: ssh_setup(args) enable_volume_options(args) # Add Rollover time to current time to make sure changelogs # will be available if we use this time as start time time_to_update = int(time.time()) + get_changelog_rollover_time( args.volume) run_cmd_nodes("create", args, time_to_update=str(time_to_update)) if not os.path.exists(status_file) or args.reset_session_time: with open(status_file, "w", buffering=0) as f: f.write(str(time_to_update)) sys.stdout.write("Session %s created with volume %s\n" % (args.session, args.volume)) sys.exit(0)
def check_coords(cubes, write_to="./.jazz/offending_cube"): """Check coordinates are matching. If they are not this could be quite a problem! However, some models' have files which read in with slightly different coordinates (CCSM4, for example). In this case the difference is miniscule so we can safely replace the coordinates. This method replaces coordinates but also informs the user it is doing this. It also prints and optionally saves the summary of the offending cube. Args: cubes (iris.cube.CubeList): list of cubes to check write_to (Optional[str]): path to which to write warnings """ # Remove attributes from auxiliary coordinates - these can sometimes # prevent merging and concatenation. for cube in cubes: for coord in cube.aux_coords: coord.attributes = {} # Get the names of the spatial coords coord_names = [coord.name() for coord in cubes[0].dim_coords] if "time" in coord_names: coord_names.remove("time") for coord_name in coord_names: # Make a list of the coordinates' points for each cube points_list = [cube.coord(coord_name).points for cube in cubes] # Loop over the list of points for all the cubes for p in xrange(len(points_list) - 1): # If the coordinates are different from the first set, # replace them with the first set if not (points_list[p + 1] == points_list[0]).all(): cubes[p + 1].replace_coord(cubes[0].coord(coord_name)) # Notify user warnings.warn( "Replacing the coordinates of a cube. " "Offending cube is {}".format(cubes[p + 1].summary()) ) if write_to is not None: utils.mkdirp(write_to) utils.write_file(cubes[p + 1].summary(), "{0}_{1}_{2}".format(write_to, coord_name, p))
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick) - 1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def mtime_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and (st.st_mtime > args.start or st.st_ctime > args.start): return True return False def output_callback(path): path = path.strip() path = path[brick_path_len + 1:] output_write(fout, path, args.output_prefix) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] if args.full: find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) else: find(brick, callback_func=output_callback, filter_func=mtime_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def render(pname): "renders the packer template and writes it's json to something like elife-builder/packer/pname.json" assert utils.mkdirp('packer'), "failed to create the 'packer' dir" out = json.dumps(render_template(pname), indent=4) fname = template_path(pname, '.json') open(fname, 'w').write(out) print out print 'wrote',fname return fname
def __init__(self, data_dir, subdir, subsubdir, sub_filetypes): ''' function that creatd the sub directories''' self.data_dir = data_dir self.subdir = subdir self.subsubdir = subsubdir self.sub_filetypes = sub_filetypes [mkdirp(str(args.data_dir + galaxy)) for galaxy in subdir] [ mkdirp(str(args.data_dir + galaxy + '/' + subsub)) for galaxy in subdir for subsub in subsubdir ] [ mkdirp(str(args.data_dir + galaxy + '/' + subsub + '/' + type)) for galaxy in subdir for subsub in subsubdir for type in sub_filetypes ]
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def mtime_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and (st.st_mtime > args.start or st.st_ctime > args.start): return True return False def output_callback(path): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] if args.full: find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) else: find(brick, callback_func=output_callback, filter_func=mtime_filter, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def mode_cleanup(args): working_dir = os.path.join(conf.get_opt("working_dir"), args.session, args.volume) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file) try: shutil.rmtree(working_dir, onerror=handle_rm_error) except (OSError, IOError) as e: logger.error("Failed to delete working directory: %s" % e) sys.exit(1)
def main(): try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) # force the default session name if mode is "query" if args.mode == "query": args.session = "default" if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and \ args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": fail("Invalid session %s" % args.session) vol_dir = os.path.join(session_dir, args.volume) if not os.path.exists(vol_dir) and args.mode not in \ ["create", "list", "query"]: fail("Session %s not created with volume %s" % (args.session, args.volume)) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: # Interrupted, exit with non zero error code sys.exit(2)
def build(src_dir: str, dest_dir: str) -> None: for file_path in traverse_files_in_path(src_dir): extension = os.path.splitext(file_path)[1] html_content = None dest_file = get_full_dest_file_path(file_path, src_dir, dest_dir) if extension == '.md': try: html_content = generate_html_from_markdown_file( file_path, src_dir, dest_dir) except Exception as exp: print(f"{file_path} {str(exp)}") continue mkdirp(dest_file.dest_file_folder) if html_content: with open(dest_file.dest_file_path, "wb") as fout: fout.write(html_content.encode("utf-8")) else: copyfile(file_path, dest_file.dest_file_path)
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick) - 1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result, is_dir): path = path.strip() path = path[brick_path_len + 1:] if args.type == "both": output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) else: if (is_dir and args.type == "d") or ((not is_dir) and args.type == "f"): output_write(fout, path, args.output_prefix, encode=(not args.no_encode), tag=args.tag, field_separator=args.field_separator) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def changelog_crawl(brick, start, end, args): """ Init function, prepares working dir and calls Changelog query """ if brick.endswith("/"): brick = brick[0:len(brick) - 1] # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) brickhash = hashlib.sha1(brick.encode()) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) mkdirp(working_dir, exit_on_err=True, logger=logger) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.%s.log" % brickhash) logger.info("%s Started Changelog Crawl. Start: %s, End: %s" % (brick, start, end)) return get_changes(brick, working_dir, log_file, start, end, args)
def main(): args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) if args.mode == "list": session_dir = conf.get_opt("session_dir") else: session_dir = os.path.join(conf.get_opt("session_dir"), args.session) if not os.path.exists(session_dir) and args.mode not in ["create", "list"]: fail("Invalid session %s" % args.session) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "cli.log") setup_logger(logger, log_file, args.debug) # globals() will have all the functions already defined. # mode_<args.mode> will be the function name to be called globals()["mode_" + args.mode](session_dir, args)
def create_markdown_file_from_trello_card( card: TrelloCard, trello_folder_name: str = "content/trello") -> None: labels = card.labels or [] if labels: card_folder_name = slugify(labels[0].name) else: card_folder_name = "unclassified" card_folder_path = os.path.join( os.path.dirname(__file__), trello_folder_name, card_folder_name, datetime.strftime(card.dateLastActivity, "%Y-%m-%d"), ) mkdirp(card_folder_path) # TODO: add lastmodified, labels, ect. markdown_meta = yaml.dump({"title": card.name}) markdown_content = "---\n{}\n---\n{}".format(markdown_meta, card.desc) markdown_file_name = "{}.md".format(slugify(card.name)) markdown_file_path = os.path.join(card_folder_path, markdown_file_name) with open(markdown_file_path, "wb") as fout: fout.write(markdown_content.encode("utf-8"))
def download_file(stackname, path, destination=None, allow_missing="False", use_bootstrap_user="******"): """ Downloads `path` from `stackname` putting it into the `destination` folder, or the `destination` file if it exists and it is a file. If `allow_missing` is "True", a not existing `path` will be skipped without errors. If `use_bootstrap_user` is "True", the owner_ssh user will be used for connecting instead of the standard deploy user. Boolean arguments are expressed as strings as this is the idiomatic way of passing them from the command line. """ if not destination: destination = '.' utils.mkdirp(destination) with stack_conn(stackname, username=_user(use_bootstrap_user)): if _should_be_skipped(path, allow_missing): return get(path, destination, use_sudo=True)
def run(self): if self.compress: def write(path): return gzip.open(path, 'w') else: def write(path): return open(path, 'w') s3 = s3_connect(self.region, **self.aws_cred) bucket = s3.get_bucket(self.input_bucket, validate = False) while True: msg = self.queue.get() if msg == None: break source_prefix, target_path = msg retries = 0 while retries < NB_RETRIES: try: retries += 1 k = Key(bucket) k.key = source_prefix data = k.get_contents_as_string() if self.decompress: fobj = StringIO(data) with gzip.GzipFile(mode = 'rb', fileobj = fobj) as zobj: data = zobj.read() fobj.close() # Create target folder mkdirp(os.path.dirname(target_path)) with write(target_path) as f: f.write(data) break except: print >> sys.stderr, "Failed to download %s to %s" % msg print_exc(file = sys.stderr) time.sleep(4 * ((retries - 1) ** 2)) if retries >= NB_RETRIES: sys.exit(1) if self.output_queue != None: self.output_queue.put(target_path) s3.close()
def setup(self): # Remove work folder, no failures allowed if os.path.exists(self.work_folder): rmtree(self.work_folder, ignore_errors = False) # Create work folder mkdirp(self.work_folder) mkdirp(self.output_folder) job_bundle_target = os.path.join(self.work_folder, "job_bundle.tar.gz") # If job_bundle_bucket is None then the bundle is stored locally if self.job_bundle_bucket == None: copyfile(self.job_bundle_prefix, job_bundle_target) else: s3 = S3Connection(**self.aws_cred) bucket = s3.get_bucket(self.job_bundle_bucket, validate = False) key = bucket.get_key(self.job_bundle_prefix) key.get_contents_to_filename(job_bundle_target) # Extract job_bundle self.processor_path = os.path.join(self.work_folder, "code") mkdirp(self.processor_path) tar = tarfile.open(job_bundle_target) tar.extractall(path = self.processor_path) tar.close() # Create processor self.processor = Popen( ['./processor', os.path.relpath(self.output_folder, self.processor_path)], cwd = self.processor_path, bufsize = 1, stdin = PIPE, stdout = sys.stdout, stderr = sys.stderr )
def setup(override): """ Setup repository according to the plouffile. """ if not valid_repo(): utils.error('Not a plouf repository. (No \'.plouffile\' file found.)') return try: data = {} with open(get_pf_path(), 'r') as pf: data = json.load(pf) for name, proj in data.setdefault("projects", {}).items(): make_structure(name, proj.setdefault("type", "exec"), proj.setdefault("tests", False), override) utils.info('Creating structure for %s...' % name) utils.list_files(utils.get_file_path(name)) extern_path = utils.get_file_path("extern") if "tests" in data: utils.mkdirp([extern_path]) for _, framework_url in data["tests"].items(): file_path = os.path.join(extern_path, os.path.basename(framework_url)) if os.path.exists(file_path): utils.warning('%s file already exists.' % file_path) if not click.confirm('Do you want to override it', prompt_suffix='?'): continue utils.info('Fetching %s...' % framework_url, nl=False) urllib.request.urlretrieve(framework_url, file_path) click.echo('[OK]') utils.success('Setup complete.') except Exception as e: click.echo( click.style(e, fg="red"), err=True )
def train(self): mkdirp(self.config.result_path) result_path = self.config.result_path + '/' + start_time() mkdirp(result_path) filename = os.path.join(result_path, 'train-log.txt') if self.config.MultiGPU > 0 and self.config.n_gpu > 1: logger.info("Using {} GPU ".format(torch.cuda.device_count())) self.Model = nn.DataParallel(self.Model) else: logger.info("Using Single GPU ") self.Model.to(self.device) optimizer = optim.Adam(self.Model.parameters(), lr=self.config.lr) logger.info("Now Training..") self.Model.train() for epoch in range(self.config.epochs): self.train_epoch(epoch, self.Train_loader, optimizer, self.Model, filename, self.device) if self.config.MultiGPU > 0 and self.config.n_gpu > 1: checkpoint = { "model": self.Model.module.state_dict(), "config": self.config, "epoch": self.config.epochs } else: checkpoint = { "model": self.Model.state_dict(), "config": self.config, "epoch": self.config.epochs } logger.info("Now Saving model checkpoint to {}".format(result_path)) model_name = os.path.join(result_path, 'model.ckpt') torch.save(checkpoint, model_name)
def inference(): parser = argparse.ArgumentParser(description="Image Captioning Evaluation") parser.add_argument('--vocab_path', default='data/vocab.pickle', type=str) parser.add_argument('--img_path', default='data/test2017/', type=str) parser.add_argument('--test_visual_feature_path', default='data/visual_feature_test.pickle', type=str) parser.add_argument("--test_path", type=str, help="model path") parser.add_argument('--num_workers', default=4, type=int) parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--is_train', type=str, default=False) parser.add_argument('--eval_coco_idx_path', default='data/test_coco_idx.npy', type=str) parser.add_argument("--eval_path", default='eval/', type=str, help="evaluation result path") parser.add_argument("--shuffle", default='False', type=str) parser.add_argument('--device', type=int, default=0) parser.add_argument('--max_sub_len', type=int, default=30) args = parser.parse_args() checkpoint = torch.load(os.path.join(args.test_path, 'model.ckpt')) eval_dataloader = get_eval_dataloader(args) translator = Translator(args, checkpoint) eval_result = translate(args, translator, eval_dataloader) mkdirp(args.eval_path) result_path = os.path.join(args.eval_path, start_time()) mkdirp(result_path) filename = os.path.join(result_path, 'pred.jsonl') save_jsonl(eval_result, filename) logger.info("Save predict json file at {}".format(result_path))
def make_structure(target_name, what, add_tests, override_flag): """ Depending on the target type what, creates the folder structure. """ rel_paths = [ target_name ] files = [ { "type": "cmake_base", "path": os.path.join(target_name, "CMakeLists.txt") } ] if what == "exec": sample_path = os.path.join(target_name, "sample") rel_paths += [ sample_path, os.path.join(target_name, "src") ] files.append({ "type": "main_sample", "path": os.path.join(sample_path, "main.cpp") }) elif what == "library": include_path = os.path.join(target_name, "include", target_name) rel_paths += [ include_path, os.path.join(target_name, "src") ] files.append({ "type": "header_base", "path": os.path.join(include_path, target_name + ".hpp") }) if add_tests: tests_path = os.path.join(target_name, "tests") rel_paths.append(tests_path) files.append({ "type": "main_tests", "path": os.path.join(tests_path, "main_tests.cpp") }) paths = [ utils.get_file_path(k) for k in rel_paths ] # creating folders utils.mkdirp(paths) # creating templates make_templates(files, target_name, override_flag)
def gzipclone(source_folder, target_folder, decompress, compress): shutil.rmtree(target_folder, ignore_errors = True) if decompress: def read(path): return gzip.open(path, 'r') else: def read(path): return open(path, 'r') if compress: def write(path): return gzip.open(path, 'w') else: def write(path): return open(path, 'w') # Walk source_folder for path, folder, files in os.walk(source_folder): for f in files: source_file = os.path.join(path, f) relpath = os.path.relpath(source_file, source_folder) target_file = os.path.join(target_folder, relpath) mkdirp(os.path.dirname(target_file)) with read(source_file) as i: with write(target_file) as o: shutil.copyfileobj(i, o)
def __init__(self, input_queue, work_folder, bucket, prefix, region, aws_cred): self.input_queue_name = input_queue self.work_folder = work_folder self.data_folder = os.path.join(work_folder, 'data') self.bucket_name = bucket self.prefix = prefix self.region = region self.aws_cred = aws_cred self.analysis_bucket_name = "jonasfj-telemetry-analysis" if self.prefix != '' and not self.prefix.endswith('/'): self.prefix += '/' # Clear the work folder shutil.rmtree(self.work_folder, ignore_errors = True) self.s3 = s3_connect(self.region, **self.aws_cred) self.bucket = self.s3.get_bucket(self.bucket_name, validate = False) self.analysis_bucket = self.s3.get_bucket(self.analysis_bucket_name, validate = False) mkdirp(self.data_folder) self.cache_folder = os.path.join(self.work_folder, "cache") mkdirp(self.cache_folder) self.files_missing_path = os.path.join(self.work_folder, 'FILES_MISSING') self.files_processed_path = os.path.join(self.work_folder, 'FILES_PROCESSED') self.get_file('FILES_PROCESSED', self.files_processed_path) self.get_file('FILES_MISSING', self.files_missing_path)
def mode_query(session_dir, args): # Verify volume status cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] _, data, _ = execute(cmd, exit_msg="Failed to Run Gluster Volume Info", logger=logger) try: tree = etree.fromstring(data) statusStr = tree.find('volInfo/volumes/volume/statusStr').text except (ParseError, AttributeError) as e: fail("Invalid Volume: %s" % e, logger=logger) if statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) # Configure cluster for pasword-less SSH ssh_setup(args) # Enable volume options for changelog capture enable_volume_options(args) # Start query command processing if args.since_time: start = args.since_time logger.debug("Query is called - Session: %s, Volume: %s, " "Start time: %s" % ("default", args.volume, start)) run_cmd_nodes("query", args, start=start) # Merger # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) write_output(args, outfilemerger) try: os.remove(args.outfile + ".db") except (IOError, OSError): pass run_cmd_nodes("cleanup", args) sys.stdout.write("Generated output file %s\n" % args.outfile) else: fail("Please specify --since-time option")
# -*- coding: utf-8 -*- import os import matplotlib.pyplot as plt from framework.mongo import database from website import settings from utils import plot_dates, mkdirp node_collection = database["node"] FIG_PATH = os.path.join(settings.ANALYTICS_PATH, "figs", "features") mkdirp(FIG_PATH) def main(): dates = [ record["date_created"] for record in node_collection.find({"is_folder": True, "is_dashboard": {"$ne": True}}, {"date_created": True}) ] plot_dates(dates) plt.title("folders ({0} total)".format(len(dates))) plt.savefig(os.path.join(FIG_PATH, "folder-actions.png")) plt.close() if __name__ == "__main__": main()
action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") parser.add_argument("--type", default="both") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") return parser.parse_args() if __name__ == "__main__": args = _get_args() mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "changelog.log") setup_logger(logger, log_file, args.debug) session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) end = -1 if args.only_query: start = args.start
def mode_query(session_dir, args): global gtmpfilename # Verify volume status cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] _, data, _ = execute(cmd, exit_msg="Failed to Run Gluster Volume Info", logger=logger) try: tree = etree.fromstring(data) statusStr = tree.find('volInfo/volumes/volume/statusStr').text except (ParseError, AttributeError) as e: fail("Invalid Volume: %s" % e, logger=logger) if statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) # Configure cluster for pasword-less SSH ssh_setup(args) # Enable volume options for changelog capture enable_volume_options(args) # Test options if not args.since_time and not args.end_time and not args.full: fail( "Please specify either {--since-time and optionally --end-time} " "or --full", logger=logger) if args.since_time and args.end_time and args.full: fail( "Please specify either {--since-time and optionally --end-time} " "or --full, but not both", logger=logger) if args.end_time and not args.since_time: fail("Please specify --since-time as well", logger=logger) # Start query command processing start = -1 end = -1 if args.since_time: start = args.since_time if args.end_time: end = args.end_time else: start = 0 # --full option is handled separately logger.debug("Query is called - Session: %s, Volume: %s, " "Start time: %s, End time: %s" % ("default", args.volume, start, end)) prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") gtmpfilename = prefix + next(tempfile._get_candidate_names()) run_cmd_nodes("query", args, start=start, end=end, tmpfilename=gtmpfilename) # Merger if args.full: cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] execute(cmd, exit_msg="Failed to merge output files " "collected from nodes", logger=logger) else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) write_output(args.outfile, outfilemerger, args.field_separator) try: os.remove(args.outfile + ".db") except (IOError, OSError): pass run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) sys.stdout.write("Generated output file %s\n" % args.outfile)
def run_cmd_nodes(task, args, **kwargs): global node_outfiles nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): host, brick = node[1].split(":") host_uuid = node[0] cmd = [] opts = {} # tmpfilename is valid only for tasks: pre, query and cleanup tmpfilename = kwargs.get("tmpfilename", "BADNAME") node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, tmpfilename, "tmp_output_%s" % num) if task == "pre": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") tag = None if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "query": # If Full backup is requested or start time is zero, use brickfind tag = None change_detector = conf.get_change_detector("changelog") if args.full: change_detector = conf.get_change_detector("brickfind") tag = args.tag_for_full_find.strip() if tag == "": tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) FS = args.field_separator if not is_host_local(host_uuid): FS = "'" + FS + "'" cmd = [change_detector, args.session, args.volume, host, brick, node_outfile] + \ ([str(kwargs.get("start")), str(kwargs.get("end"))] if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes else []) + \ (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": # After pre/query run, cleanup the working directory and other # temp files. Remove the directory to which node_outfile has # been copied in main node try: os.remove(node_outfile) except (OSError, IOError): logger.warn("Failed to cleanup temporary file %s" % node_outfile) pass cmd = [conf.get_opt("nodeagent"), "cleanup", args.session, args.volume, os.path.dirname(node_outfile)] + \ (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, logger=logger) # When glusterfind create, create session directory in # each brick nodes cmd = [conf.get_opt("nodeagent"), "create", args.session, args.volume, brick, kwargs.get("time_to_update")] + \ (["--debug"] if args.debug else []) + \ (["--reset-session-time"] if args.reset_session_time else []) elif task == "post": # Rename pre status file to actual status file in each node cmd = [conf.get_opt("nodeagent"), "post", args.session, args.volume, brick] + \ (["--debug"] if args.debug else []) elif task == "delete": # When glusterfind delete, cleanup all the session files/dirs # from each node. cmd = [conf.get_opt("nodeagent"), "delete", args.session, args.volume] + \ (["--debug"] if args.debug else []) if cmd: p = Process(target=node_cmd, args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) for num, p in enumerate(pool): p.join() if p.exitcode != 0: logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) elif task == "pre" and args.disable_partial: sys.exit(1)
def run_build(build, store): """ Run any build specified in the database to be build. This is the main entry. """ # Run scripts in the buildset in the context of the build settings dlog("Starting build of buildset " + build.buildset.name) # Make sure that the build dir is present co_dir = os.path.expanduser(OPTIONS.build_area) mkdirp(co_dir) build.work_dir = unicode(co_dir) build.start_time = datetime.utcnow() store = Store.of(build) store.commit() # Run all scripts in this buildset index = 0 # Run script logger = log_stdout() bs = store.find(db.build_script_status, db.build_script_status.build_id == build.id).order_by(Asc(db.build_script_status.idx)) build_script_statuses = map(lambda a: a, bs) store.commit() for build_status in build_script_statuses: build_status.start_time = datetime.utcnow() store.commit() dlog("------------------------------------------------------------------") if OPTIONS.stop_index <= index: dlog("Skipping rest of scripts in buildset as requested by set option", build_status) break if OPTIONS.start_index > index: dlog("Skipping by until index " + str(OPTIONS.start_index) + " (current step " + str(index) + ")", build_status) index += 1 continue index += 1 def do_log(msg): logger(msg) build_status.log += unicode(msg) ss = build_status.buildset_script if ss.script: scriptpath = build_status.buildset_script.script.path build_status.exit_code = run_script(do_log, scriptpath) # run_script(do_log, spath + os.sep + build_status.buildset_script.script.path) build_status.end_time = datetime.utcnow() if not build_status.exit_code: build_status.buildset_script.last_duration = (build_status.end_time - build_status.start_time).seconds build.end_time = datetime.utcnow() store.commit() #buildset.updateMetadata(timings=timings) #timings[None] = (buildset.name, end_time - start_time) dlog("Build completed")
# -*- coding: utf-8 -*- import os import matplotlib.pyplot as plt from framework.mongo import database from website import settings from utils import plot_dates, mkdirp comment_collection = database['comment'] FIG_PATH = os.path.join(settings.ANALYTICS_PATH, 'figs', 'features') mkdirp(FIG_PATH) def main(): dates = [ record['date_created'] for record in comment_collection.find({}, {'date_created': True}) ] plot_dates(dates) plt.title('comments ({0} total)'.format(len(dates))) plt.savefig(os.path.join(FIG_PATH, 'comment-actions.png')) plt.close() if __name__ == '__main__': main()
def startMosaic(opts): killMosaic(opts.print_only) build.build(False, False) meta_dirs = [] tile_dirs = [] global_dir = conf.getGlobalsDir(opts.dataset) # set up fault-tolerance dir if required fault_tolerance_dir = conf.getFaultToleranceDir(opts.dataset) if opts.fault_tolerant_mode: shutil.rmtree(fault_tolerance_dir, True) utils.mkdirp(fault_tolerance_dir, conf.FILE_GROUP) perf_events_dir = conf.getPerfEventsDir(opts.dataset) if opts.enable_perf_event_collection: utils.mkdirp(perf_events_dir, conf.FILE_GROUP) for i in range(0, int(opts.nmic)): meta_dirs.append( conf.getMicSubdir(conf.SG_DATAPATH_VERTEX_ENGINE[i], opts.dataset, "meta", i)) tile_dirs.append( conf.getMicSubdir(conf.SG_DATAPATH_VERTEX_ENGINE[i], opts.dataset, "tile", i)) in_memory_mode_int = 1 if opts.in_memory_mode else 0 meta_dirs_string = ":".join(meta_dirs) tile_dirs_string = ":".join(tile_dirs) enable_tile_partitioning_int = 1 if opts.enable_tile_partitioning else 0 enable_fault_tolerance_int = 1 if opts.fault_tolerant_mode else 0 enable_perf_event_collection_int = 1 if opts.enable_perf_event_collection else 0 # for selective scheduling use_selective_scheduling_int = 1 if \ conf.SG_ALGORITHM_ENABLE_SELECTIVE_SCHEDULING[opts.algorithm] else 0 if opts.dataset in conf.SG_DATASET_DISABLE_SELECTIVE_SCHEDULING: use_selective_scheduling_int = 0 # For pinning, count threads and determine if we need to use smt or not. count_tile_readers = conf.SG_NREADER count_tile_processors = conf.SG_NPROCESSOR edge_engine_per_socket = opts.nmic / topo.NUM_SOCKET count_threads_per_edge_engine = opts.count_indexreader + opts.count_vertex_fetcher + opts.count_vertex_reducer + count_tile_readers + count_tile_processors count_threads_per_socket = count_threads_per_edge_engine * edge_engine_per_socket + opts.count_globalreducer / topo.NUM_SOCKET use_smt_int = 1 if count_threads_per_socket >= topo.NUM_PHYSICAL_CPU_PER_SOCKET else 0 # Set the size of the read tiles rb to the in memory value iff not running on # the mic and the in memory mode is activated. read_tiles_rb_size = conf.SG_RB_SIZE_READ_TILES if opts.in_memory_mode: read_tiles_rb_size = conf.SG_RB_SIZE_READ_TILES_IN_MEMORY args = [ "--algorithm", opts.algorithm, "--max-iterations", opts.max_iterations, "--nmic", opts.nmic, "--count-applier", opts.count_applier, "--count-globalreducer", opts.count_globalreducer, "--count-globalfetcher", opts.count_globalfetcher, "--count-indexreader", opts.count_indexreader, "--count-vertex-reducer", opts.count_vertex_reducer, "--count-vertex-fetcher", opts.count_vertex_fetcher, "--in-memory-mode", in_memory_mode_int, "--paths-meta", meta_dirs_string, "--paths-tile", tile_dirs_string, "--path-globals", global_dir, "--use-selective-scheduling", use_selective_scheduling_int, "--path-fault-tolerance-output", fault_tolerance_dir, "--enable-fault-tolerance", enable_fault_tolerance_int, "--enable-tile-partitioning", enable_tile_partitioning_int, "--count-tile-reader", count_tile_readers, "--local-fetcher-mode", opts.local_fetcher_mode, "--global-fetcher-mode", opts.global_fetcher_mode, "--enable-perf-event-collection", enable_perf_event_collection_int, "--path-perf-events", perf_events_dir, "--count-tile-processors", count_tile_processors, "--use-smt", use_smt_int, "--host-tiles-rb-size", conf.SG_RB_SIZE_HOST_TILES, "--local-reducer-mode", opts.local_reducer_mode, "--processed-rb-size", conf.SG_RB_SIZE_PROCESSED, "--read-tiles-rb-size", read_tiles_rb_size, "--tile-processor-mode", opts.tile_processor_mode, "--tile-processor-input-mode", opts.tile_processor_input_mode, "--tile-processor-output-mode", opts.tile_processor_output_mode, "--count-followers", opts.count_followers, ] if opts.enable_log: log_dir = os.path.join(conf.LOG_ROOT, (conf.getWeightedName( opts.dataset, conf.SG_ALGORITHM_WEIGHTED[opts.algorithm]))) utils.mkdirp(log_dir, conf.FILE_GROUP) args = args + ["--log", log_dir] if opts.debug: b = conf.DBIN_MOSAIC else: b = conf.RBIN_MOSAIC # We need sudo for scif args = [b] + args if opts.gdb: args = ["gdb", "--args"] + args # We need sudo for scif # args = ["sudo", "LD_LIBRARY_PATH=/usr/lib64/:$LD_LIBRARY_PATH"] + args # args = ["sudo", "valgrind"] + args if opts.run == "perfstat": args = [ "perf", "stat", "-B", "-e", "cache-references,cache-misses,cycles,instructions,branches,faults,migrations" ] + args if opts.run == "likwid": max_cpu_id = multiprocessing.cpu_count() - 1 args = [ "likwid-perfctr", "-f", "-g", "NUMA", "-g", "L2", "-g", "L2CACHE", "-g", "BRANCH", "-g", "CYCLE_ACTIVITY", "-g", "L3", "-g", "L3CACHE", "-c", "0-%d" % max_cpu_id ] + args args = ["sudo"] + args if not opts.print_only: if opts.gdb: utils.run(opts.print_only, *args) else: out_file = utils.getVertexEngineLogName(opts) utils.run_output(opts.print_only, out_file, *args)
# Generating the stp np.random.seed(i * 100 + xi + occurr) stp = generate_stp(occurr, xi, 1 * pq.s, np.arange(5, 5 * (xi), 5) * pq.ms) # Merging the stp in the first xi sts sts_pool = [0] * xi for st_id, st in enumerate(stp): sts_pool[st_id] = stg._pool_two_spiketrains(st, sts[st_id]) # Storing datasets containg stps if i == 0: sts_rep['sts_%iocc_%ixi' % (occurr, xi)] = [sts_pool + sts[xi:]] sts_rep['stp_%iocc_%ixi' % (occurr, xi)] = [stp] else: sts_rep['sts_%iocc_%ixi' % (occurr, xi)].append(sts_pool + sts[xi:]) sts_rep['stp_%iocc_%ixi' % (occurr, xi)].append(stp) sts_rep['params_background'] = params_background # Saving the datasets filepath = '../data/' path_temp = './' for folder in split_path(filepath): path_temp = path_temp + '/' + folder mkdirp(path_temp) filename = 'stp_data%i' % (data_idx) np.save(filepath + filename, sts_rep) print((time.time() - t0))
def updateresults(input_folder, work_folder, bucket, prefix, cache_folder, region, aws_cred, nb_workers): # Find input files input_files = [] for path, folders, files in os.walk(input_folder): for f in files: # Get channel version cv = os.path.relpath(os.path.join(path, f), input_folder) input_files.append(cv) # Sanitize prefix if prefix[-1] != '/': prefix += '/' # Connect to s3 s3 = s3_connect(region, **aws_cred) s3_bucket = s3.get_bucket(bucket, validate=False) # Download versions.json if not in cache and load versions versions_json = os.path.join(cache_folder, 'versions.json') if not os.path.isfile(versions_json): versions = s3get_json(s3_bucket, prefix + 'versions.json', True, {}) with open(versions_json, 'w') as f: json.dump(versions, f) else: with open(versions_json, 'r') as f: versions = json.load(f) # Update results in bucket for channel_version in input_files: print "### Updating: " + channel_version # Download all files for channel_version to disk rmtree(work_folder, ignore_errors=True) data_folder = os.path.join(work_folder, channel_version) mkdirp(data_folder) snapshot = versions.get(channel_version, None) if snapshot: fetched = False while not fetched: fetched = s3get(bucket, prefix + snapshot, data_folder, True, False, region, aws_cred) if not fetched: print >> sys.stderr, "Failed to download %s" % snapshot sleep(5 * 60) print " - downloaded " + snapshot # Create ChannelVersionManager channel, version = channel_version.split('/') manager = ChannelVersionManager(work_folder, channel, version, False, False, False) # Feed it with rows from input_file rows = 0 with open(os.path.join(input_folder, channel_version), 'r') as f: for line in f: try: filePath, blob = line.split('\t') channel_, version_, measure, byDateType = filePath.split( '/') blob = json.loads(blob) if channel_ != channel or version_ != version: print >> sys.stderr, ( "Error: Found %s/%s within a %s file!" % (channel_, version_, channel_version)) continue manager.merge_in_blob(measure, byDateType, blob) rows += 1 except: print >> sys.stderr, "Error while handling row:" print_exc(file=sys.stderr) manager.flush() print " - merged rows %i" % rows # Upload updated files to S3 date = datetime.utcnow().strftime("%Y%m%d%H%M%S") cv_prefix = "%s-%s-%s" % (date, version, channel) uploaded = False while not uploaded: uploaded = s3put(data_folder, bucket, prefix + cv_prefix, False, True, region, aws_cred, nb_workers) if not uploaded: print >> sys.stderr, "Failed to upload '%s'" % cv_prefix sleep(5 * 60) print " - uploaded to " + cv_prefix # Store changes in versions versions[channel_version] = cv_prefix # Upload new versions.json and write to cache s3put_json(s3_bucket, prefix + 'versions.json', True, versions) with open(versions_json, 'w') as f: json.dump(versions, f) print "### New snapshot uploaded" try: # Garbage collect old channel/version folders on S3 collect_garbage(bucket, prefix, cache_folder, region, aws_cred, nb_workers) except: print >> sys.stderr, "Failed to collect garbage on S3"
parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=float) parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") return parser.parse_args() if __name__ == "__main__": args = _get_args() session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) log_file = os.path.join(conf.get_opt("log_dir"), args.session, args.volume, "brickfind.log") setup_logger(logger, log_file, args.debug) time_to_update = int(time.time()) brickfind_crawl(args.brick, args) with open(status_file_pre, "w", buffering=0) as f: f.write(str(time_to_update)) sys.exit(0)
def mode_pre(session_dir, args): """ Read from Session file and write to session.pre file """ endtime_to_update = int(time.time()) - get_changelog_rollover_time( args.volume) status_file = os.path.join(session_dir, args.volume, "status") status_file_pre = status_file + ".pre" mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) # If Pre status file exists and running pre command again if os.path.exists(status_file_pre) and not args.regenerate_outfile: fail("Post command is not run after last pre, " "use --regenerate-outfile") start = 0 try: with open(status_file) as f: start = int(f.read().strip()) except ValueError: pass except (OSError, IOError) as e: fail("Error Opening Session file %s: %s" % (status_file, e), logger=logger) logger.debug("Pre is called - Session: %s, Volume: %s, " "Start time: %s, End time: %s" % (args.session, args.volume, start, endtime_to_update)) run_cmd_nodes("pre", args, start=start) # Merger if args.full: cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] execute(cmd, exit_msg="Failed to merge output files " "collected from nodes", logger=logger) else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) with open(args.outfile, "a") as f: for row in outfilemerger.get(): # Multiple paths in case of Hardlinks paths = row[1].split(",") for p in paths: if p == "": continue f.write("%s %s %s\n" % (row[0], p, row[2])) try: os.remove(args.outfile + ".db") except (IOError, OSError): pass run_cmd_nodes("cleanup", args) with open(status_file_pre, "w", buffering=0) as f: f.write(str(endtime_to_update)) sys.stdout.write("Generated output file %s\n" % args.outfile)
def combine_hypotest_files(inpaths, outpath): regions = args.region.split(',') #create new files (best expected) mkdirp(outpath) # Combine text files # description = "expectedUpperLimitMinus1Sig/F:upperLimitEstimatedError/F:fitstatus/F:p0d2s/F:p0u2s/F:seed/F:CLsexp/F:sigma1/F:failedfit/F:expectedUpperLimitPlus2Sig/F:nofit/F:nexp/F:sigma0/F:clsd2s/F:m3/F:expectedUpperLimit/F:failedstatus/F:xsec/F:covqual/F:upperLimit/F:p0d1s/F:clsd1s/F:failedp0/F:failedcov/F:p0exp/F:p1/F:p0u1s/F:excludedXsec/F:p0/F:clsu1s/F:clsu2s/F:expectedUpperLimitMinus2Sig/F:expectedUpperLimitPlus1Sig/F:mu/F:mode/F:fID/C:dodgycov/F:CLs/F" new_lines_nom = dict() new_lines_dn = dict() new_lines_up = dict() cls_dict = dict() for i, path in enumerate(inpaths): region = regions[i] lines_nom = open(path+'/Output_hypotest__1_harvest_list').read().split('\n') if sig_xs_syst: lines_nom = open(path+'/Output_fixSigXSecNominal_hypotest__1_harvest_list').read().split('\n') lines_dn = open(path+'/Output_fixSigXSecDown_hypotest__1_harvest_list').read().split('\n') lines_up = open(path+'/Output_fixSigXSecUp_hypotest__1_harvest_list').read().split('\n') for jline, line in enumerate(lines_nom): if not line: continue vals = line.split() m3 = int(float(vals[14])) mu = int(float(vals[33])) #cls = float(vals[-1]) # observed CLs cls = float(vals[6]) # expected CLs if (m3, mu) not in cls_dict: cls_dict[(m3, mu)] = cls new_lines_nom[(m3, mu)] = line if sig_xs_syst: new_lines_dn[(m3, mu)] = lines_dn[jline] new_lines_up[(m3, mu)] = lines_up[jline] print 'copying list for (%i, %i) %s CLs = %.3f' % (m3, mu, region, cls) else: if cls < cls_dict[(m3, mu)]: print 'changing list for (%i, %i) %s CLs = %.3f (old = %.3f)' % (m3, mu, region, cls, cls_dict[(m3, mu)]) cls_dict[(m3, mu)] = cls new_lines_nom[(m3, mu)] = line if sig_xs_syst: new_lines_dn[(m3, mu)] = lines_dn[jline] new_lines_up[(m3, mu)] = lines_up[jline] # Save new list if sig_xs_syst: with open(outpath+'/Output_fixSigXSecNominal_hypotest__1_harvest_list', 'w') as f: for line in new_lines_nom.itervalues(): f.write(line+'\n') with open(outpath+'/Output_fixSigXSecDown_hypotest__1_harvest_list', 'w') as f: for line in new_lines_dn.itervalues(): f.write(line+'\n') with open(outpath+'/Output_fixSigXSecUp_hypotest__1_harvest_list', 'w') as f: for line in new_lines_up.itervalues(): f.write(line+'\n') else: with open(outpath+'/Output_hypotest__1_harvest_list', 'w') as f: for line in new_lines_nom.itervalues(): f.write(line+'\n')
def generate_graph_in_memory(opts): def run(args): utils.run(opts.print_only, "time", *args) # make sure proper config is loaded run(["sudo", "./startup-config"]) if opts.debug: grc_in_memory = conf.DBIN_GRC_IN_MEMORY else: grc_in_memory = conf.RBIN_GRC_IN_MEMORY input_weighted = 0 if conf.SG_INPUT_WEIGHTED.get(opts.dataset, False): input_weighted = 1 # populate hashed dirs num_dir = conf.SG_NUM_HASH_DIRS meta_dirs = [] tile_dirs = [] global_dir = conf.getGlobalsDir(opts.dataset, opts.weighted_output) utils.mkdirp(global_dir, conf.FILE_GROUP) if (opts.weighted_output): unweighted_stat = os.path.join(conf.getGlobalsDir(opts.dataset, False), "stat.dat") weighted_stat = os.path.join(conf.getGlobalsDir(opts.dataset, True), "stat.dat") shutil.copyfile(unweighted_stat, weighted_stat) unweighted_deg = os.path.join(conf.getGlobalsDir(opts.dataset, False), "vertex_deg.dat") weighted_deg = os.path.join(conf.getGlobalsDir(opts.dataset, True), "vertex_deg.dat") shutil.copyfile(unweighted_deg, weighted_deg) unweighted_global_to_orig = os.path.join( conf.getGlobalsDir(opts.dataset, False), "vertex_global_to_orig.dat") weighted_global_to_orig = os.path.join( conf.getGlobalsDir(opts.dataset, True), "vertex_global_to_orig.dat") shutil.copyfile(unweighted_global_to_orig, weighted_global_to_orig) for i in range(0, len(conf.SG_GRC_OUTPUT_DIRS)): meta_dir = conf.getGrcMetaDir(opts.dataset, i, opts.weighted_output) tile_dir = conf.getGrcTileDir(opts.dataset, i, opts.weighted_output) shutil.rmtree(meta_dir, True) shutil.rmtree(tile_dir, True) utils.mkdirp(meta_dir, conf.FILE_GROUP) utils.mkdirp(tile_dir, conf.FILE_GROUP) utils.populate_hash_dirs(num_dir, meta_dir) utils.populate_hash_dirs(num_dir, tile_dir) meta_dirs.append(meta_dir) tile_dirs.append(tile_dir) output_weighted = 0 if opts.weighted_output: output_weighted = 1 use_rle_int = 0 if opts.use_rle: use_rle_int = 1 generator = "" delimiter = "" count_vertices = 0 count_edges = 0 use_original_ids = 0 input_file = "" if opts.rmat: generator = "rmat" count_vertices = conf.SG_GRAPH_SETTINGS_RMAT[ opts.dataset]["count_vertices"] count_edges = conf.SG_GRAPH_SETTINGS_RMAT[opts.dataset]["count_edges"] use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_RMAT[ opts.dataset]["use_original_ids"] else 0 elif opts.binary: generator = "binary" input_file = conf.SG_INPUT_FILE[opts.dataset]["binary"] count_vertices = conf.SG_GRAPH_SETTINGS_DELIM[ opts.dataset]["count_vertices"] use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_DELIM[ opts.dataset]["use_original_ids"] else 0 else: generator = "delim_edges" input_file = conf.SG_INPUT_FILE[opts.dataset]["delim"] count_vertices = conf.SG_GRAPH_SETTINGS_DELIM[ opts.dataset]["count_vertices"] delimiter = conf.SG_GRAPH_SETTINGS_DELIM[opts.dataset]["delimiter"] use_original_ids = 1 if conf.SG_GRAPH_SETTINGS_DELIM[ opts.dataset]["use_original_ids"] else 0 if not opts.rmat: if not os.path.exists(input_file): print("Failed to find %s" % input_file) exit(1) args = [ grc_in_memory, "--source", input_file, "--count-vertices", count_vertices, "--generator", generator, "--graphname", opts.dataset, "--path-globals", global_dir, "--paths-meta", ":".join(meta_dirs), "--paths-tile", ":".join(tile_dirs), "--nthreads", conf.SG_GRC_NTHREADS_PARTITIONER, "--npartition-managers", conf.SG_GRC_NPARTITION_MANAGERS, "--input-weighted", input_weighted, "--output-weighted", output_weighted, "--rmat-count-edges", count_edges, "--use-run-length-encoding", use_rle_int, "--use-original-ids", use_original_ids, "--traversal", opts.traversal, "--delimiter", delimiter, ] if opts.gdb: args = ["gdb", "--args"] + args run(args)