def sanitize_paths(job): """Remove the common prefix from paths. This method takes a job payload, iterates through all paths, and removes all their common prefixes. This is an effort to only submit information on a need-to-know basis to MythX. Unless it's to distinguish between files, the API does not need to know the absolute path of a file. This may even leak user information and should be removed. If a common prefix cannot be found (e.g. if there is just one element in the source list), the relative path from the current working directory will be returned. This concerns the following fields: - sources - AST absolute path - legacy AST absolute path - source list - main source :param job: The payload to sanitize """ source_list = job.get("source_list") if not source_list: # triggers on None and empty list # if no source list is given, we are analyzing bytecode only return job source_list = [abspath(s) for s in source_list] if len(source_list) > 1: # get common path prefix and remove it prefix = commonpath(source_list) else: # fallback: replace with CWD and get common prefix prefix = commonpath(source_list + [str(Path.cwd())]) job["source_list"] = [s.replace(prefix, "") for s in source_list] if job.get("main_source") is not None: job["main_source"] = job["main_source"].replace(prefix, "") for name in list(job.get("sources", {})): data = job["sources"].pop(name) # sanitize AST data in compiler output for ast_key in ("ast", "legacyAST"): if not (data.get(ast_key) and data[ast_key].get("absolutePath")): continue sanitized_absolute = data[ast_key]["absolutePath"].replace( prefix, "") data[ast_key]["absolutePath"] = sanitized_absolute # replace source key names job["sources"][name.replace(prefix, "")] = data return job
def _traversal_guard(self): """ Check the absolute path of the MEDIAGUARD_ROOT against the abs path from the file. Ensures that nothing nasty is attempted for serving files outside the scope of the root folder. """ abs_mediaguard_root = path.abspath(settings.MEDIAGUARD_ROOT) if not path.commonpath([abs_mediaguard_root]) == path.commonpath( [abs_mediaguard_root, self.abs_media_path]): raise exceptions.SuspiciousFileOperation( "Potential directory traversal attack")
def folders_to_open(default_dir: str = getcwd()) -> Iterable[str]: app = QApplication(list()) window = MultiFolderApp(default_dir) # noinspection PyTypeChecker,PyCallByClass QTimer.singleShot(0, window.show_ui) app.exec_() file_names = window.file_names # if the first item is a parent folder if len(file_names) > 1: parent = path.commonpath( [path.abspath(file_names[0]), path.abspath(file_names[1])]) if parent == path.commonpath([path.abspath(file_names[0])]): return file_names[1:] return file_names
def _add_local_dir_to_gitignore_if_needed(self, resource): """Figure out whether resource has a local path under the workspace's git repo, which needs to be added to .gitignore. If so, do it. """ if resource.resource_type == "git-subdirectory": return # this is always a part of the dataworkspace's repo elif not isinstance(resource, ws.LocalStateResourceMixin): return # no local state, so not an iddue local_path = resource.get_local_path_if_any() if local_path is None: return assert isabs(local_path), "Resource local path should be absolute" if commonpath([local_path, self.workspace_dir]) != self.workspace_dir: return None local_relpath = local_path[len(self.workspace_dir) + 1:] if not local_relpath.endswith("/"): local_relpath = local_relpath + "/" # matches only directories # Add a / as the start to indicate that the path starts at the root of the repo. # Otherwise, we'll hit cases where the path could match other directories (e.g. issue #11) local_relpath = "/" + local_relpath if not local_relpath.startswith( "/") else local_relpath ensure_entry_in_gitignore( self.workspace_dir, ".gitignore", local_relpath, match_independent_of_slashes=True, verbose=self.verbose, )
def create_filepath(original_filepath, original_rootpath=None, dest_dirpath=None, prefix=None, suffix=None, ext=None): # handle filename original_filename = basename(original_filepath) splited_filename = splitext(original_filename) dest_filename = '' if prefix: dest_filename += prefix dest_filename += splited_filename[0] if suffix: dest_filename += suffix if ext: dest_filename += ext else: dest_filename += splited_filename[1] # handle path original_dirpath = dirname(original_filepath) if dest_dirpath and original_rootpath: common_root = commonpath([original_rootpath, original_dirpath]) relative_path = original_dirpath[len(common_root) + 1:] else: relative_path = '' if not dest_dirpath: dest_dirpath = original_dirpath return join(dest_dirpath, relative_path, dest_filename)
def __init__(self,jobs=None,path=None,name=None,sort_by_name=True): """ Class to store sets of calculations Parameters ---------- jobs : (list), optional List of Job objects belonging to the dataset. The default is None. path : (str), optional Path of dataset directory. If None the work dir is used if jobs is None, else the commonpath between the jobs is used. The default is None. name : (str), optional Name to assign to dataset. The default is None. If None the folder name is used. sort_by_name : (bool), optional Sort list of jobs based on Job names. The default is True. """ if jobs: path = op.commonpath([j.path for j in jobs]) self.path = op.abspath(path) else: self.path = op.abspath(path) if path else os.getcwd() self.name = name if name else op.basename(self.path) self.sort_by_name = sort_by_name self.jobs = jobs if jobs: self._group_jobs() if sort_by_name: self.jobs = sorted(self.jobs, key=operator.attrgetter('name')) self._localdir = HPCInterface().localdir self._workdir = HPCInterface().workdir self._path_relative = self.path.replace(self._localdir,'') self.path_in_hpc = self._workdir + self._path_relative
def dir_contains(dirname, path, exists=True): """Check if a file of directory is contained in another. Parameters ---------- dirname: str The base directory that should contain `path` path: str The name of a directory or file that should be in `dirname` exists: bool If True, the `path` and `dirname` must exist Notes ----- `path` and `dirname` must be either both absolute or both relative paths""" if exists: dirname = osp.abspath(dirname) path = osp.abspath(path) if six.PY2 or six.PY34: return osp.exists(path) and osp.samefile( osp.commonprefix([dirname, path]), dirname) else: return osp.samefile(osp.commonpath([dirname, path]), dirname) return dirname in osp.commonprefix([dirname, path])
def add_jobs_from_directory(self,path,job_script_filenames='job.sh',sort_by_name=True,load_outputs=True,regroup=True): """ Add jobs to the Dataset searching all folders and subfolders contained in given path. Jobs are selected based on where the job bash script is present. VaspJobs are selected based on where all input files are present (INCAR,KPOINTS,POSCAR,POTCAR). Parameters ---------- path : (str) Parent directory of the dataset. job_script_filenames : (str or list), optional Filename of job bash script. The default is 'job.sh'. Can also be a list of strings if multiple file names are present. The default is 'job.sh'. sort_by_name : (bool), optional Sort list of jobs by attribute "name". The default is True. regroup : (bool), optional Regroup jobs after adding new jobs list, self.path is set to the commonpath. The default is True. """ path = op.abspath(path) jobs = find_jobs(path,job_script_filenames=job_script_filenames,sort_by_name=sort_by_name,load_outputs=load_outputs) self.add_jobs(jobs,False) if regroup: commonpath = op.commonpath([path,self.path]) self.regroup_jobs(path=commonpath) return
def extract_data(images, masks): """ given a set of brain images and a set of masks, extract the average signal inside each mask for each brain image. Returns a dataframe with 3 columns: image, mask, value. """ masker = NiftiMapsMasker(masks) values = masker.fit_transform(images) nimgs, nmasks = values.shape cp = op.commonpath(images) labelsimages = [i.replace(cp, '') for i in images] print(cp) print(labelsimages) cpmask = op.commonprefix(masks) labelsrois = [i.replace(cpmask, '').replace('.nii.gz', '') for i in masks] print(cpmask) print(labelsrois) df = pd.DataFrame(columns=['image', 'mask', 'value']) row = 0 for iimg in range(nimgs): for iroi in range(nmasks): df.loc[row] = pd.Series({ 'image': labelsimages[iimg], 'mask': labelsrois[iroi], 'value': values[iimg, iroi] }) row = row + 1 return df
def install_files(self, install_type, destination, files): if isinstance(files, basestring): files = [ files, ] sources = [self.generator.relpath(s) for s in files] if len(sources) > 1: if not self.name(): common_part = commonpath(files) name, _ = self.generator.name_as_target(common_part) self.set_name(name) var_name = "%s_%s" % (self.name().upper(), install_type) self.output.write_command('set', var_name, '', sources) sources = [ "${%s}" % var_name, ] else: destination_name = basename(destination) source_name = basename(sources[0]) if isfile(destination) or not exists( destination) and source_name == destination_name: destination = dirname(destination) if source_name != destination_name: destination += ' RENAME ' + destination_name self.output.write_command('install', '', install_type, sources, 'DESTINATION ' + destination)
def mix_prefixes(prev, new): if prev is None: return new elif new is None: return '' else: return commonpath([prev, new])
def test_func(self): if not is_admin_or_root(self.request.user): return False if path.commonpath([self.root, settings.MIRROR_DIR ]) != settings.MIRROR_DIR: raise False return True
def legal_path(self, dir_str): base_folder_str = 'www' norm_path_str = path.normpath(base_folder_str + dir_str) path_seq = [norm_path_str, base_folder_str] # the path exist AND path under 'www' folder return path.exists(norm_path_str) and (path.commonpath(path_seq) == base_folder_str)
def convert(self, value, param, ctx): rv = abspath(expanduser(super().convert(value, param, ctx))) if isdir(rv): parent_dir = rv else: parent_dir = parent_path(rv) if self.allow_multiple_levels_of_missing_dirs: while not isdir(parent_dir) and parent_dir != "/": parent_dir = parent_path(parent_dir) if not exists(parent_dir): self.fail('%s "%s" does not exist.' % (self.path_type, parent_dir), param, ctx) # type: ignore if not isdir(parent_dir): self.fail('%s "%s" is a file.' % (self.path_type, parent_dir), param, ctx) # type: ignore if not os.access(parent_dir, os.W_OK): self.fail('%s "%s" is not writable.' % (self.path_type, parent_dir), param, ctx) # type: ignore if (self.must_be_outside_of_workspace is not None) and commonpath([ self.must_be_outside_of_workspace, rv ]) in (self.must_be_outside_of_workspace, rv): self.fail( '%s must be outside of workspace "%s"' % (self.path_type, self.must_be_outside_of_workspace), # type: ignore param, ctx, ) return rv
def hook(hook_api): if not hook_api.__name__ == "vlc": return None libvlc_src_file = os.environ["PYTHON_VLC_LIB_PATH"] plugin_src_dir = os.environ["PYTHON_VLC_MODULE_PATH"] # Get common root common_root = commonpath([libvlc_src_file, plugin_src_dir]) # Add libvlc binaries libvlc_src_files = glob(join(dirname(libvlc_src_file), DYLIB_PATTERN)) libvlc_binaries = [] for f in libvlc_src_files: binary_tuple = (f, ".") libvlc_binaries.append(binary_tuple) hook_api.add_binaries(libvlc_binaries) # Add plugin binaries plugin_src_files = [] for root, _, __ in os.walk(plugin_src_dir): plugin_src_files.extend(glob(join(root, DYLIB_PATTERN))) plugin_binaries = [] for f in plugin_src_files: rel_dir = relpath(dirname(f), common_root) bin_tuple = (f, rel_dir) plugin_binaries.append(bin_tuple) hook_api.add_binaries(plugin_binaries)
def logger_context(log_dir, run_ID, name, log_params=None, snapshot_mode="none"): logger.set_snapshot_mode(snapshot_mode) logger.set_log_tabular_only(False) log_dir = osp.join(log_dir, f"run_{run_ID}") exp_dir = osp.abspath(log_dir) if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]): print(f"logger_context received log_dir outside of {LOG_DIR}: " f"prepending by {LOG_DIR}/local/<yyyymmdd>/") exp_dir = get_log_dir(log_dir) tabular_log_file = osp.join(exp_dir, "progress.csv") text_log_file = osp.join(exp_dir, "debug.log") params_log_file = osp.join(exp_dir, "params.json") logger.set_snapshot_dir(exp_dir) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.push_prefix(f"{name}_{run_ID} ") if log_params is None: log_params = dict() log_params["name"] = name log_params["run_ID"] = run_ID with open(params_log_file, "w") as f: json.dump(log_params, f) yield logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def get_file_list(path, valid_extensions=None): """ Args: path (str): base path valid_extensions (list): list of valid extensions Returns: flist, base_path: file list and base path, so that join(base_path, flist[i]) gives the full path of the i-th file """ if valid_extensions is None: is_valid = lambda f: True else: is_valid = lambda f: bool(splitext(f)[1].lower() in valid_extensions) path = abspath(path) if not exists(path): raise OSError('{} doesn\'t exist'.format(path)) # get file list flist = [] for root, _, files in os.walk(path, followlinks=True): for fname in [f for f in files if is_valid(f)]: flist.append(join(root, fname)) if len(flist) == 0: return [], path # relative path fpath = commonpath(flist) + '/' flist = [f.replace(fpath, '') for f in flist] return flist, fpath
def getSelectedFiles(self, inputdir, datafile, expt, prefix, nofilter=False): """ Check list or directory matches searchtext(expt) and prefix(group) and contains only datafiles tries several methods for detecting files :param inputdir: list of files or input directory :param datafile: matching datafile name :param expt: searchstring in filename/filepath :param prefix: comparison group - also needs to appear in filepath or filename :param nofilter: assume provided list is correct - no further matching :return: basename and file list """ files = [] base = '' searchtext = expt + prefix # get list of files from a directory if not isinstance(inputdir, list) and isdir(inputdir): base = inputdir if access(inputdir, R_OK): allfiles = [y for y in iglob(join(inputdir, '**', datafile), recursive=True)] if len(allfiles) > 0: # Filter on searchtext - single word in directory path files = [f for f in allfiles if re.search(searchtext, f, flags=re.IGNORECASE)] if len(files) <= 0: # try separate expt and prefix - case insensitive on windows but ?mac allfiles = [y for y in iglob(join(base, '**', prefix, '**', datafile), recursive=True)] files = [f for f in allfiles if re.search(expt, f, flags=re.IGNORECASE)] if len(files) <= 0: # try uppercase directory name files = [f for f in allfiles if prefix.upper() in f.upper().split(sep)] if len(files) <= 0: msg = "Batch: No match in path for both expt + prefix: %s %s" % (expt, prefix) logging.error(msg) raise ValueError(msg) else: msg = "Batch: No files found in input" logging.error(msg) raise IOError(msg) else: raise IOError("Batch: Cannot access directory: %s", inputdir) else: # assume we have a list as input - exclude duplicates if isinstance(inputdir, list): allfiles = unique(inputdir).tolist() else: allfiles = unique(inputdir.tolist()).tolist() if not nofilter: files = [f for f in allfiles if re.search(searchtext, f, flags=re.IGNORECASE)] if len(files) <= 0: files = [f for f in allfiles if prefix.upper() in f.upper().split(sep)] else: files = allfiles #default assume prefix and expt strings are not found else: files = allfiles if (len(files) > 0): base = commonpath(files) print("Total Files Found: ", len(files)) return (base, files)
def logger_context( log_dir, run_ID, name, log_params=None, snapshot_mode="none", override_prefix=False, use_summary_writer=False, ): """Use as context manager around calls to the runner's ``train()`` method. Sets up the logger directory and filenames. Unless override_prefix is True, this function automatically prepends ``log_dir`` with the rlpyt logging directory and the date: `path-to-rlpyt/data/yyyymmdd/hhmmss` (`data/` is in the gitignore), and appends with `/run_{run_ID}` to separate multiple runs of the same settings. Saves hyperparameters provided in ``log_params`` to `params.json`, along with experiment `name` and `run_ID`. Input ``snapshot_mode`` refers to how often the logger actually saves the snapshot (e.g. may include agent parameters). The runner calls on the logger to save the snapshot at every iteration, but the input ``snapshot_mode`` sets how often the logger actually saves (e.g. snapshot may include agent parameters). Possible modes include (but check inside the logger itself): * "none": don't save at all * "last": always save and overwrite the previous * "all": always save and keep each iteration * "gap": save periodically and keep each (will also need to set the gap, not done here) The cleanup operations after the ``yield`` close files but might not be strictly necessary if not launching another training session in the same python process. """ logger.set_snapshot_mode(snapshot_mode) logger.set_log_tabular_only(False) log_dir = osp.join(log_dir, f"run_{run_ID}") exp_dir = osp.abspath(log_dir) if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]) and not override_prefix: print(f"logger_context received log_dir outside of {LOG_DIR}: " f"prepending by {LOG_DIR}/local/<yyyymmdd>/<hhmmss>/") exp_dir = get_log_dir(log_dir) tabular_log_file = osp.join(exp_dir, "progress.csv") text_log_file = osp.join(exp_dir, "debug.log") params_log_file = osp.join(exp_dir, "params.json") logger.set_snapshot_dir(exp_dir) if use_summary_writer: logger.set_tf_summary_writer(SummaryWriter(exp_dir)) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.push_prefix(f"{name}_{run_ID} ") if log_params is None: log_params = dict() log_params["name"] = name log_params["run_ID"] = run_ID with open(params_log_file, "w") as f: json.dump(log_params, f, default=lambda o: type(o).__name__) yield logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def get_fstype(fp): """Retrieve filesystem type of file path `fp`""" fp = op.abspath(fp) parent_mountpoints = {} for p in pu.disk_partitions(all=True): if op.samefile(op.commonpath((fp, p.mountpoint)), p.mountpoint): parent_mountpoints[p.mountpoint] = p.fstype return max(parent_mountpoints.items(), key=lambda p: len(p[0]))[0]
def is_path_antecedent(ancestor, antecedent): 'True if antecedent path is below the ancestor path' ancestor = os.path.normpath(ancestor) antecedent = os.path.normpath(antecedent) if ancestor == antecedent: return False common = commonpath((ancestor, antecedent)) return ancestor == common
def test51(): paths = [ '/1/2/3/4', '/1/2/3/', '/1/2/3', '/1/2/34', ] at(path.commonpath(paths), r'\1\2')
def _build_sample_files(file_glob): sample_files = [] file_names = glob(file_glob) prefix = commonpath(file_names) suffix = _commonsuffix(file_names) for file_name in sorted(file_names): sample_name = file_name.lstrip(prefix).rstrip(suffix) sample_files.append((sample_name, file_name)) return sample_files
def get_log_dir(experiment_name, run_ID): log_dir = osp.join(experiment_name, f"run_{run_ID}") exp_dir = osp.abspath(log_dir) if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]): print(f"logger_context received log_dir outside of {LOG_DIR}: " f"prepending by {LOG_DIR}/local/<yyyymmdd>/") yyyymmdd = datetime.datetime.today().strftime("%Y%m%d") log_dir = osp.join(LOG_DIR, "local", yyyymmdd, log_dir) return log_dir
def __init__(self, cache_root, doc_path, metadata): self.cache_root = path.realpath(cache_root) doc_root = path.commonpath((self.cache_root, path.realpath(doc_path))) self.cache_rel_file = path.relpath(doc_path, doc_root) + ".cache" self.cache_file = path.join(cache_root, self.cache_rel_file) self.cache_dir = path.dirname(self.cache_file) makedirs(self.cache_dir, exist_ok=True) self.metadata = self.normalize(metadata) self.metadata["cache_version"] = self.CACHE_VERSION
def validate_html_static_path(app: Sphinx, config: Config) -> None: """Check html_static_paths setting.""" for entry in config.html_static_path[:]: static_path = path.normpath(path.join(app.confdir, entry)) if not path.exists(static_path): logger.warning(__('html_static_path entry %r does not exist'), entry) config.html_static_path.remove(entry) elif path.commonpath([app.outdir, static_path]) == app.outdir: logger.warning(__('html_static_path entry %r is placed inside outdir'), entry) config.html_static_path.remove(entry)
def post(self, request, *args, **kwargs): filename = request.POST['filename'] try: upload_base_dir = path.join(settings.UPLOAD_DIR, str(self.problem.pk)) real_path = path.abspath(path.join(upload_base_dir, filename)) if path.commonpath([real_path, upload_base_dir]) != upload_base_dir: raise PermissionDenied remove(real_path) except OSError: return HttpResponseServerError() return HttpResponse()
def _update_history_menu(self): self.history_menu.clear() basename = path.commonpath(self.history) for idx, item in enumerate(self.history): label = path.relpath(item, basename) action = QtWidgets.QAction("{:d}: {:s}".format(idx, label), self) action.setStatusTip(item) action.triggered.connect(partial(self._load_config, item)) self.history_menu.addAction(action)
def groups(self): """Directory names of the first subdirectories in the dataset path.""" groups = [] commonpath = op.commonpath([op.abspath(j.path) for j in self.jobs]) grps = [] for j in self.jobs: job_group = op.relpath(op.abspath(j.path),start=commonpath).split('/')[0] if job_group not in grps: grps.append(job_group) for g in grps: if list(g)[0] != '.': groups.append(g) return groups
def save_dataframe(self): self._save_path = Path(commonpath(self._files)).parent self._save_name = self.generate_savename() self._writer = pd.ExcelWriter(Path(self._save_path, self._save_name), engine='xlsxwriter') self._user_df.to_excel(self._writer, sheet_name=self._sheets_name, index=False) # self.format_result_file() self._writer.save()
def _get_source_info(self, source_fn): source_line = "<N/A>" if 'builtin_function' in str(type(source_fn)): source_file = "<built-in>" else: try: source_file = path.abspath(inspect.getsourcefile(source_fn)) source_line = inspect.getsourcelines(source_fn)[1] here = path.abspath(__file__) common = path.commonpath([here, source_file]) source_file = source_file.replace(common, 'numba') except: source_file = "Unknown" return source_file, source_line
def sort(self): # 'self.lst' contains a list of path names to match. 'fts' is the list # of file types to use for matching. l = self.lst m = re.compile(r'(.*/run[0-9])/?(.*).*') c = path.commonpath(l) dctslst = [{'type': path.splitext(p)[1][1:], 'path': p, \ 'root': re.search(m, path.dirname(p.replace(c, '', 1))).group(1), \ 'dir': re.search(m, path.dirname(p.replace(c, '', 1))).group(2)} \ for p in l] uniq = [{'uname': 'dir' + str(n), 'uid': u} \ for n, u in enumerate({d['root'] for d in dctslst})] for vals in uniq: if not hasattr(self, vals['uname']): setattr(self, vals['uname'], []) self.uniqdirs.append([vals['uname'], vals['uid']]) tmpattr = getattr(self, vals['uname']) for dct in dctslst: if dct['root'] == vals['uid']: tmpattr.append(dct) setattr(self, vals['uname'], tmpattr)
def calc_relpath(target: Path, ref: Path) -> Path: """Calculates the target's path relative to the reference. :param pathlib.Path target: The path to which the relative path will point. :param pathlib.Path ref: Reference path. :returns: The relative path from ``ref`` to ``to``. :rtype: :class:`Path` """ ref = ref.expanduser() target = target.expanduser() if not ref.is_absolute() or not target.is_absolute(): raise ValueError("cannot compute relative paths of non-absolute" " input paths") common = Path(path.commonpath([ref, target])) ref_uniq = ref.parts[len(common.parts):] target_uniq = target.parts[len(common.parts):] rel_parts = ("..",) * (len(ref_uniq)) + target_uniq return Path(*rel_parts)
def custom(record): try: record = c.convert_to_unicode(record) except TypeError as e: logging.warning("Unicode Error on: {}".format(record['ID'])) record['error'] = 'unicode' try: #add md5 of associated files files = [add_slash_if_necessary(y) for x in record['file'].split(';') for y in x.split(':') if bool(y.strip()) and y.strip().lower() != 'pdf'] file_set = set(files) if not 'hashes' in record: hashes = [file_to_hash(x) for x in file_set] record['hashes'] = ";".join(hashes) #regularize format of files list record['file'] = ";".join(file_set) except Exception as e: logging.warning("File Error: {} : {}".format(record['ID'], e.args[0])) record['error'] = 'file' #todo: if file is not in the library common prefix, move it there #look for year, then first surname, then copy in, making dir if necessary if file_set: for x in file_set: try: current_path = realpath(x) common = commonpath([current_path, args.library]) if common != args.library: logging.info("Found file outside library: {}".format(current_path)) logging.info("Common: {}".format(common)) #get the author and year year = record['year'] authors = c.getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")]) authors_split = [c.splitname(a) for a in authors] author_surnames = [a['last'][0] for a in authors_split] new_path = join(args.library, year, ", ".join(author_surnames)) logging.info("New Path: {}".format(new_path)) #create directory if necessary #copy file full_new_path = join(new_path, split(current_path)[1]) logging.info("Copying file") logging.info("From: {}".format(current_path)) logging.info("To: {}".format(full_new_path)) response = input("Enter to confirm: ") if response == "": logging.info("Proceeding") if not exists(new_path): mkdir(new_path) if exists(full_new_path): raise Exception("File already exists") copyfile(x, full_new_path) file_set.remove(x) file_set.add(full_new_path) record['file'] = ";".join(file_set) except Exception as e: logging.info("Issue copying file for: {}".format(x)) logging.info(e) record['error'] = 'file_copy' #regularize keywords try: keywords = set() if 'tags' not in record: if 'keywords' in record: keywords.update([x.strip() for x in record['keywords'].split(',')]) del record['keywords'] if 'mendeley-tags' in record: keywords.update([x.strip() for x in record['mendeley-tags'].split(',')]) del record['mendeley-tags'] record['tags'] = ",".join(keywords) except Error as e: logging.warning("Tag Error: {}".format(record['ID'])) record['error'] = 'tag' # record = c.type(record) # record = c.author(record) # record = c.editor(record) # record = c.journal(record) # record = c.keyword(record) # record = c.link(record) # record = c.doi(record) # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record
def path_common_prefix(*paths): 'Return the common path prefix for a sequence of paths.' try: return _path.commonpath(paths) except ValueError: # we want a more specific exception. raise MixedAbsoluteAndRelativePathsError(paths) from None
def test_abspath_match(abspath_couple): assert commonpath(abspath_couple) == old_commonpath(abspath_couple)
def test_relpath_match(relpath_couple): assert commonpath(relpath_couple) == old_commonpath(relpath_couple)