Example #1
0
def sanitize_paths(job):
    """Remove the common prefix from paths.

    This method takes a job payload, iterates through all paths, and
    removes all their common prefixes. This is an effort to only submit
    information on a need-to-know basis to MythX. Unless it's to distinguish
    between files, the API does not need to know the absolute path of a file.
    This may even leak user information and should be removed.

    If a common prefix cannot be found (e.g. if there is just one element in
    the source list), the relative path from the current working directory
    will be returned.

    This concerns the following fields:
    - sources
    - AST absolute path
    - legacy AST absolute path
    - source list
    - main source

    :param job: The payload to sanitize
    """

    source_list = job.get("source_list")
    if not source_list:
        # triggers on None and empty list
        # if no source list is given, we are analyzing bytecode only
        return job
    source_list = [abspath(s) for s in source_list]
    if len(source_list) > 1:
        # get common path prefix and remove it
        prefix = commonpath(source_list)
    else:
        # fallback: replace with CWD and get common prefix
        prefix = commonpath(source_list + [str(Path.cwd())])

    job["source_list"] = [s.replace(prefix, "") for s in source_list]
    if job.get("main_source") is not None:
        job["main_source"] = job["main_source"].replace(prefix, "")
    for name in list(job.get("sources", {})):
        data = job["sources"].pop(name)
        # sanitize AST data in compiler output
        for ast_key in ("ast", "legacyAST"):
            if not (data.get(ast_key) and data[ast_key].get("absolutePath")):
                continue
            sanitized_absolute = data[ast_key]["absolutePath"].replace(
                prefix, "")
            data[ast_key]["absolutePath"] = sanitized_absolute

        # replace source key names
        job["sources"][name.replace(prefix, "")] = data

    return job
Example #2
0
    def _traversal_guard(self):
        """
        Check the absolute path of the MEDIAGUARD_ROOT against the abs path
        from the file.

        Ensures that nothing nasty is attempted for serving files outside the
        scope of the root folder.
        """
        abs_mediaguard_root = path.abspath(settings.MEDIAGUARD_ROOT)
        if not path.commonpath([abs_mediaguard_root]) == path.commonpath(
            [abs_mediaguard_root, self.abs_media_path]):
            raise exceptions.SuspiciousFileOperation(
                "Potential directory traversal attack")
Example #3
0
def folders_to_open(default_dir: str = getcwd()) -> Iterable[str]:
    app = QApplication(list())
    window = MultiFolderApp(default_dir)
    # noinspection PyTypeChecker,PyCallByClass
    QTimer.singleShot(0, window.show_ui)
    app.exec_()
    file_names = window.file_names
    # if the first item is a parent folder
    if len(file_names) > 1:
        parent = path.commonpath(
            [path.abspath(file_names[0]),
             path.abspath(file_names[1])])
        if parent == path.commonpath([path.abspath(file_names[0])]):
            return file_names[1:]
    return file_names
 def _add_local_dir_to_gitignore_if_needed(self, resource):
     """Figure out whether resource has a local path under the workspace's
     git repo, which needs to be added to .gitignore. If so, do it.
     """
     if resource.resource_type == "git-subdirectory":
         return  # this is always a part of the dataworkspace's repo
     elif not isinstance(resource, ws.LocalStateResourceMixin):
         return  # no local state, so not an iddue
     local_path = resource.get_local_path_if_any()
     if local_path is None:
         return
     assert isabs(local_path), "Resource local path should be absolute"
     if commonpath([local_path, self.workspace_dir]) != self.workspace_dir:
         return None
     local_relpath = local_path[len(self.workspace_dir) + 1:]
     if not local_relpath.endswith("/"):
         local_relpath = local_relpath + "/"  # matches only directories
     # Add a / as the start to indicate that the path starts at the root of the repo.
     # Otherwise, we'll hit cases where the path could match other directories (e.g. issue #11)
     local_relpath = "/" + local_relpath if not local_relpath.startswith(
         "/") else local_relpath
     ensure_entry_in_gitignore(
         self.workspace_dir,
         ".gitignore",
         local_relpath,
         match_independent_of_slashes=True,
         verbose=self.verbose,
     )
Example #5
0
def create_filepath(original_filepath,
                    original_rootpath=None,
                    dest_dirpath=None,
                    prefix=None,
                    suffix=None,
                    ext=None):
    # handle filename
    original_filename = basename(original_filepath)
    splited_filename = splitext(original_filename)
    dest_filename = ''
    if prefix:
        dest_filename += prefix
    dest_filename += splited_filename[0]
    if suffix:
        dest_filename += suffix
    if ext:
        dest_filename += ext
    else:
        dest_filename += splited_filename[1]

    # handle path
    original_dirpath = dirname(original_filepath)
    if dest_dirpath and original_rootpath:
        common_root = commonpath([original_rootpath, original_dirpath])
        relative_path = original_dirpath[len(common_root) + 1:]
    else:
        relative_path = ''

    if not dest_dirpath:
        dest_dirpath = original_dirpath
    return join(dest_dirpath, relative_path, dest_filename)
Example #6
0
    def __init__(self,jobs=None,path=None,name=None,sort_by_name=True): 
        """
        Class to store sets of calculations

        Parameters
        ----------
        jobs : (list), optional
            List of Job objects belonging to the dataset. The default is None.        
        path : (str), optional
            Path of dataset directory. If None the work dir is used if jobs is None,
            else the commonpath between the jobs is used. The default is None.
        name : (str), optional
            Name to assign to dataset. The default is None. If None the folder name is used.
        sort_by_name : (bool), optional
            Sort list of jobs based on Job names. The default is True.
        """
        if jobs:
            path = op.commonpath([j.path for j in jobs])
            self.path = op.abspath(path)
        else:
            self.path = op.abspath(path) if path else os.getcwd()
        self.name = name if name else op.basename(self.path)
        self.sort_by_name = sort_by_name
        self.jobs = jobs
        if jobs:
            self._group_jobs()
            if sort_by_name:
                self.jobs = sorted(self.jobs, key=operator.attrgetter('name'))

        self._localdir = HPCInterface().localdir
        self._workdir = HPCInterface().workdir
        self._path_relative = self.path.replace(self._localdir,'')
        
        self.path_in_hpc = self._workdir + self._path_relative
Example #7
0
def dir_contains(dirname, path, exists=True):
    """Check if a file of directory is contained in another.

    Parameters
    ----------
    dirname: str
        The base directory that should contain `path`
    path: str
        The name of a directory or file that should be in `dirname`
    exists: bool
        If True, the `path` and `dirname` must exist

    Notes
    -----
    `path` and `dirname` must be either both absolute or both relative
    paths"""
    if exists:
        dirname = osp.abspath(dirname)
        path = osp.abspath(path)
        if six.PY2 or six.PY34:
            return osp.exists(path) and osp.samefile(
                osp.commonprefix([dirname, path]), dirname)
        else:
            return osp.samefile(osp.commonpath([dirname, path]), dirname)
    return dirname in osp.commonprefix([dirname, path])
Example #8
0
    def add_jobs_from_directory(self,path,job_script_filenames='job.sh',sort_by_name=True,load_outputs=True,regroup=True):
        """
        Add jobs to the Dataset searching all folders and subfolders contained in given path. 
        Jobs are selected based on where the job bash script is present. 
        VaspJobs are selected based on where all input files are present (INCAR,KPOINTS,POSCAR,POTCAR).

        Parameters
        ----------
        path : (str)
            Parent directory of the dataset.
       job_script_filenames : (str or list), optional
            Filename of job bash script. The default is 'job.sh'. Can also be a list of strings if multiple 
            file names are present. The default is 'job.sh'.
        sort_by_name : (bool), optional
            Sort list of jobs by attribute "name". The default is True.
        regroup : (bool), optional
            Regroup jobs after adding new jobs list, self.path is set to the commonpath. The default is True.
        """        
        path = op.abspath(path)
        jobs = find_jobs(path,job_script_filenames=job_script_filenames,sort_by_name=sort_by_name,load_outputs=load_outputs)
        self.add_jobs(jobs,False)
        if regroup:
            commonpath = op.commonpath([path,self.path])
            self.regroup_jobs(path=commonpath)
        return
Example #9
0
def extract_data(images, masks):
    """ given a set of brain images and a set of masks,  extract the average signal inside each mask for each brain image.
        Returns a dataframe with 3 columns: image, mask, value.
    """

    masker = NiftiMapsMasker(masks)
    values = masker.fit_transform(images)
    nimgs, nmasks = values.shape

    cp = op.commonpath(images)
    labelsimages = [i.replace(cp, '') for i in images]
    print(cp)
    print(labelsimages)

    cpmask = op.commonprefix(masks)
    labelsrois = [i.replace(cpmask, '').replace('.nii.gz', '') for i in masks]
    print(cpmask)
    print(labelsrois)

    df = pd.DataFrame(columns=['image', 'mask', 'value'])
    row = 0
    for iimg in range(nimgs):
        for iroi in range(nmasks):
            df.loc[row] = pd.Series({
                'image': labelsimages[iimg],
                'mask': labelsrois[iroi],
                'value': values[iimg, iroi]
            })
            row = row + 1
    return df
Example #10
0
 def install_files(self, install_type, destination, files):
     if isinstance(files, basestring):
         files = [
             files,
         ]
     sources = [self.generator.relpath(s) for s in files]
     if len(sources) > 1:
         if not self.name():
             common_part = commonpath(files)
             name, _ = self.generator.name_as_target(common_part)
             self.set_name(name)
         var_name = "%s_%s" % (self.name().upper(), install_type)
         self.output.write_command('set', var_name, '', sources)
         sources = [
             "${%s}" % var_name,
         ]
     else:
         destination_name = basename(destination)
         source_name = basename(sources[0])
         if isfile(destination) or not exists(
                 destination) and source_name == destination_name:
             destination = dirname(destination)
             if source_name != destination_name:
                 destination += ' RENAME ' + destination_name
     self.output.write_command('install', '', install_type, sources,
                               'DESTINATION ' + destination)
Example #11
0
 def mix_prefixes(prev, new):
     if prev is None:
         return new
     elif new is None:
         return ''
     else:
         return commonpath([prev, new])
Example #12
0
 def test_func(self):
     if not is_admin_or_root(self.request.user):
         return False
     if path.commonpath([self.root, settings.MIRROR_DIR
                         ]) != settings.MIRROR_DIR:
         raise False
     return True
Example #13
0
    def legal_path(self, dir_str):
        base_folder_str = 'www'
        norm_path_str = path.normpath(base_folder_str + dir_str)
        path_seq = [norm_path_str, base_folder_str]

        # the path exist AND path under 'www' folder
        return path.exists(norm_path_str) and (path.commonpath(path_seq) == base_folder_str)
Example #14
0
 def convert(self, value, param, ctx):
     rv = abspath(expanduser(super().convert(value, param, ctx)))
     if isdir(rv):
         parent_dir = rv
     else:
         parent_dir = parent_path(rv)
     if self.allow_multiple_levels_of_missing_dirs:
         while not isdir(parent_dir) and parent_dir != "/":
             parent_dir = parent_path(parent_dir)
     if not exists(parent_dir):
         self.fail('%s "%s" does not exist.' % (self.path_type, parent_dir),
                   param, ctx)  # type: ignore
     if not isdir(parent_dir):
         self.fail('%s "%s" is a file.' % (self.path_type, parent_dir),
                   param, ctx)  # type: ignore
     if not os.access(parent_dir, os.W_OK):
         self.fail('%s "%s" is not writable.' %
                   (self.path_type, parent_dir), param, ctx)  # type: ignore
     if (self.must_be_outside_of_workspace is not None) and commonpath([
             self.must_be_outside_of_workspace, rv
     ]) in (self.must_be_outside_of_workspace, rv):
         self.fail(
             '%s must be outside of workspace "%s"' %
             (self.path_type,
              self.must_be_outside_of_workspace),  # type: ignore
             param,
             ctx,
         )
     return rv
Example #15
0
def hook(hook_api):
    if not hook_api.__name__ == "vlc":
        return None

    libvlc_src_file = os.environ["PYTHON_VLC_LIB_PATH"]
    plugin_src_dir = os.environ["PYTHON_VLC_MODULE_PATH"]

    # Get common root
    common_root = commonpath([libvlc_src_file, plugin_src_dir])

    # Add libvlc binaries
    libvlc_src_files = glob(join(dirname(libvlc_src_file), DYLIB_PATTERN))
    libvlc_binaries = []
    for f in libvlc_src_files:
        binary_tuple = (f, ".")
        libvlc_binaries.append(binary_tuple)
    hook_api.add_binaries(libvlc_binaries)

    # Add plugin binaries
    plugin_src_files = []
    for root, _, __ in os.walk(plugin_src_dir):
        plugin_src_files.extend(glob(join(root, DYLIB_PATTERN)))
    plugin_binaries = []
    for f in plugin_src_files:
        rel_dir = relpath(dirname(f), common_root)
        bin_tuple = (f, rel_dir)
        plugin_binaries.append(bin_tuple)
    hook_api.add_binaries(plugin_binaries)
def logger_context(log_dir,
                   run_ID,
                   name,
                   log_params=None,
                   snapshot_mode="none"):
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_log_tabular_only(False)
    log_dir = osp.join(log_dir, f"run_{run_ID}")
    exp_dir = osp.abspath(log_dir)
    if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]):
        print(f"logger_context received log_dir outside of {LOG_DIR}: "
              f"prepending by {LOG_DIR}/local/<yyyymmdd>/")
        exp_dir = get_log_dir(log_dir)
    tabular_log_file = osp.join(exp_dir, "progress.csv")
    text_log_file = osp.join(exp_dir, "debug.log")
    params_log_file = osp.join(exp_dir, "params.json")

    logger.set_snapshot_dir(exp_dir)
    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.push_prefix(f"{name}_{run_ID} ")

    if log_params is None:
        log_params = dict()
    log_params["name"] = name
    log_params["run_ID"] = run_ID
    with open(params_log_file, "w") as f:
        json.dump(log_params, f)

    yield

    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Example #17
0
def get_file_list(path, valid_extensions=None):
    """
    Args:
       path (str): base path
       valid_extensions (list): list of valid extensions
    Returns:
       flist, base_path: file list and base path, so that
          join(base_path, flist[i]) gives the full path of the i-th file
    """
    if valid_extensions is None:
        is_valid = lambda f: True
    else:
        is_valid = lambda f: bool(splitext(f)[1].lower() in valid_extensions)

    path = abspath(path)
    if not exists(path):
        raise OSError('{} doesn\'t exist'.format(path))

    # get file list
    flist = []
    for root, _, files in os.walk(path, followlinks=True):
        for fname in [f for f in files if is_valid(f)]:
            flist.append(join(root, fname))

    if len(flist) == 0:
        return [], path

    # relative path
    fpath = commonpath(flist) + '/'
    flist = [f.replace(fpath, '') for f in flist]

    return flist, fpath
Example #18
0
    def getSelectedFiles(self, inputdir, datafile, expt, prefix, nofilter=False):
        """
        Check list or directory matches searchtext(expt) and prefix(group) and contains only datafiles
        tries several methods for detecting files
        :param inputdir: list of files or input directory
        :param datafile: matching datafile name
        :param expt: searchstring in filename/filepath
        :param prefix: comparison group - also needs to appear in filepath or filename
        :param nofilter: assume provided list is correct - no further matching
        :return: basename and file list
        """
        files = []
        base = ''
        searchtext = expt + prefix
        # get list of files from a directory
        if not isinstance(inputdir, list) and isdir(inputdir):
            base = inputdir
            if access(inputdir, R_OK):
                allfiles = [y for y in iglob(join(inputdir, '**', datafile), recursive=True)]
                if len(allfiles) > 0:
                    # Filter on searchtext - single word in directory path
                    files = [f for f in allfiles if re.search(searchtext, f, flags=re.IGNORECASE)]
                    if len(files) <= 0:
                        # try separate expt and prefix - case insensitive on windows but ?mac
                        allfiles = [y for y in iglob(join(base, '**', prefix, '**', datafile), recursive=True)]
                        files = [f for f in allfiles if re.search(expt, f, flags=re.IGNORECASE)]
                    if len(files) <= 0:
                        # try uppercase directory name
                        files = [f for f in allfiles if prefix.upper() in f.upper().split(sep)]
                    if len(files) <= 0:
                        msg = "Batch: No match in path for both expt + prefix: %s %s" % (expt, prefix)
                        logging.error(msg)
                        raise ValueError(msg)
                else:
                    msg = "Batch: No files found in input"
                    logging.error(msg)
                    raise IOError(msg)

            else:
                raise IOError("Batch: Cannot access directory: %s", inputdir)
        else:
            # assume we have a list as input - exclude duplicates
            if isinstance(inputdir, list):
                allfiles = unique(inputdir).tolist()
            else:
                allfiles = unique(inputdir.tolist()).tolist()
            if not nofilter:
                files = [f for f in allfiles if re.search(searchtext, f, flags=re.IGNORECASE)]
                if len(files) <= 0:
                    files = [f for f in allfiles if prefix.upper() in f.upper().split(sep)]
                else:
                    files = allfiles #default assume prefix and expt strings are not found
            else:
                files = allfiles
            if (len(files) > 0):
                base = commonpath(files)
        print("Total Files Found: ", len(files))

        return (base, files)
Example #19
0
def logger_context(
    log_dir, run_ID, name, log_params=None, snapshot_mode="none", override_prefix=False,
    use_summary_writer=False,
):
    """Use as context manager around calls to the runner's ``train()`` method.
    Sets up the logger directory and filenames.  Unless override_prefix is
    True, this function automatically prepends ``log_dir`` with the rlpyt
    logging directory and the date: `path-to-rlpyt/data/yyyymmdd/hhmmss`
    (`data/` is in the gitignore), and appends with `/run_{run_ID}` to
    separate multiple runs of the same settings. Saves hyperparameters
    provided in ``log_params`` to `params.json`, along with experiment `name`
    and `run_ID`.

    Input ``snapshot_mode`` refers to how often the logger actually saves the
    snapshot (e.g. may include agent parameters).  The runner calls on the
    logger to save the snapshot at every iteration, but the input
    ``snapshot_mode`` sets how often the logger actually saves (e.g. snapshot
    may include agent parameters). Possible modes include (but check inside
    the logger itself):
        * "none": don't save at all
        * "last": always save and overwrite the previous
        * "all": always save and keep each iteration
        * "gap": save periodically and keep each (will also need to set the gap, not done here) 

    The cleanup operations after the ``yield`` close files but might not be
    strictly necessary if not launching another training session in the same
    python process.
    """
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_log_tabular_only(False)
    log_dir = osp.join(log_dir, f"run_{run_ID}")
    exp_dir = osp.abspath(log_dir)
    if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]) and not override_prefix:
        print(f"logger_context received log_dir outside of {LOG_DIR}: "
            f"prepending by {LOG_DIR}/local/<yyyymmdd>/<hhmmss>/")
        exp_dir = get_log_dir(log_dir)
    tabular_log_file = osp.join(exp_dir, "progress.csv")
    text_log_file = osp.join(exp_dir, "debug.log")
    params_log_file = osp.join(exp_dir, "params.json")

    logger.set_snapshot_dir(exp_dir)
    if use_summary_writer:
        logger.set_tf_summary_writer(SummaryWriter(exp_dir))
    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.push_prefix(f"{name}_{run_ID} ")

    if log_params is None:
        log_params = dict()
    log_params["name"] = name
    log_params["run_ID"] = run_ID
    with open(params_log_file, "w") as f:
        json.dump(log_params, f, default=lambda o: type(o).__name__)

    yield

    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Example #20
0
def get_fstype(fp):
    """Retrieve filesystem type of file path `fp`"""
    fp = op.abspath(fp)
    parent_mountpoints = {}
    for p in pu.disk_partitions(all=True):
        if op.samefile(op.commonpath((fp, p.mountpoint)), p.mountpoint):
            parent_mountpoints[p.mountpoint] = p.fstype
    return max(parent_mountpoints.items(), key=lambda p: len(p[0]))[0]
def is_path_antecedent(ancestor, antecedent):
    'True if antecedent path is below the ancestor path'
    ancestor = os.path.normpath(ancestor)
    antecedent = os.path.normpath(antecedent)
    if ancestor == antecedent:
        return False
    common = commonpath((ancestor, antecedent))
    return ancestor == common
def test51():
    paths = [
        '/1/2/3/4',
        '/1/2/3/',
        '/1/2/3',
        '/1/2/34',
    ]
    at(path.commonpath(paths), r'\1\2')
Example #23
0
def is_path_antecedent(ancestor, antecedent):
    'True if antecedent path is below the ancestor path'
    ancestor = os.path.normpath(ancestor)
    antecedent = os.path.normpath(antecedent)
    if ancestor == antecedent:
        return False
    common = commonpath((ancestor, antecedent))
    return ancestor == common
Example #24
0
def _build_sample_files(file_glob):
    sample_files = []
    file_names = glob(file_glob)
    prefix = commonpath(file_names)
    suffix = _commonsuffix(file_names)
    for file_name in sorted(file_names):
        sample_name = file_name.lstrip(prefix).rstrip(suffix)
        sample_files.append((sample_name, file_name))
    return sample_files
Example #25
0
def get_log_dir(experiment_name, run_ID):
    log_dir = osp.join(experiment_name, f"run_{run_ID}")
    exp_dir = osp.abspath(log_dir)
    if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]):
        print(f"logger_context received log_dir outside of {LOG_DIR}: "
              f"prepending by {LOG_DIR}/local/<yyyymmdd>/")
        yyyymmdd = datetime.datetime.today().strftime("%Y%m%d")
        log_dir = osp.join(LOG_DIR, "local", yyyymmdd, log_dir)
    return log_dir
Example #26
0
 def __init__(self, cache_root, doc_path, metadata):
     self.cache_root = path.realpath(cache_root)
     doc_root = path.commonpath((self.cache_root, path.realpath(doc_path)))
     self.cache_rel_file = path.relpath(doc_path, doc_root) + ".cache"
     self.cache_file = path.join(cache_root, self.cache_rel_file)
     self.cache_dir = path.dirname(self.cache_file)
     makedirs(self.cache_dir, exist_ok=True)
     self.metadata = self.normalize(metadata)
     self.metadata["cache_version"] = self.CACHE_VERSION
Example #27
0
def validate_html_static_path(app: Sphinx, config: Config) -> None:
    """Check html_static_paths setting."""
    for entry in config.html_static_path[:]:
        static_path = path.normpath(path.join(app.confdir, entry))
        if not path.exists(static_path):
            logger.warning(__('html_static_path entry %r does not exist'), entry)
            config.html_static_path.remove(entry)
        elif path.commonpath([app.outdir, static_path]) == app.outdir:
            logger.warning(__('html_static_path entry %r is placed inside outdir'), entry)
            config.html_static_path.remove(entry)
Example #28
0
 def post(self, request, *args, **kwargs):
     filename = request.POST['filename']
     try:
         upload_base_dir = path.join(settings.UPLOAD_DIR, str(self.problem.pk))
         real_path = path.abspath(path.join(upload_base_dir, filename))
         if path.commonpath([real_path, upload_base_dir]) != upload_base_dir:
             raise PermissionDenied
         remove(real_path)
     except OSError:
         return HttpResponseServerError()
     return HttpResponse()
Example #29
0
    def _update_history_menu(self):

        self.history_menu.clear()

        basename = path.commonpath(self.history)

        for idx, item in enumerate(self.history):
            label = path.relpath(item, basename)
            action = QtWidgets.QAction("{:d}: {:s}".format(idx, label), self)
            action.setStatusTip(item)
            action.triggered.connect(partial(self._load_config, item))
            self.history_menu.addAction(action)
Example #30
0
 def groups(self):
     """Directory names of the first subdirectories in the dataset path."""
     groups = []
     commonpath = op.commonpath([op.abspath(j.path) for j in self.jobs])
     grps = []
     for j in self.jobs:
         job_group = op.relpath(op.abspath(j.path),start=commonpath).split('/')[0]
         if job_group not in grps:
             grps.append(job_group)
     for g in grps:
         if list(g)[0] != '.':
             groups.append(g)
     return groups
Example #31
0
    def save_dataframe(self):
        self._save_path = Path(commonpath(self._files)).parent
        self._save_name = self.generate_savename()

        self._writer = pd.ExcelWriter(Path(self._save_path, self._save_name),
                                      engine='xlsxwriter')
        self._user_df.to_excel(self._writer,
                               sheet_name=self._sheets_name,
                               index=False)

        # self.format_result_file()

        self._writer.save()
Example #32
0
 def _get_source_info(self, source_fn):
     source_line = "<N/A>"
     if 'builtin_function' in str(type(source_fn)):
         source_file = "<built-in>"
     else:
         try:
             source_file = path.abspath(inspect.getsourcefile(source_fn))
             source_line = inspect.getsourcelines(source_fn)[1]
             here = path.abspath(__file__)
             common = path.commonpath([here, source_file])
             source_file = source_file.replace(common, 'numba')
         except:
             source_file = "Unknown"
     return source_file, source_line
Example #33
0
File: core.py Project: slushecl/dev
 def sort(self):
     # 'self.lst' contains a list of path names to match. 'fts' is the list
     # of file types to use for matching.
     l = self.lst
     m = re.compile(r'(.*/run[0-9])/?(.*).*')
     c = path.commonpath(l)
     dctslst = [{'type': path.splitext(p)[1][1:], 'path': p, \
         'root': re.search(m, path.dirname(p.replace(c, '', 1))).group(1), \
         'dir': re.search(m, path.dirname(p.replace(c, '', 1))).group(2)} \
         for p in l]
     uniq = [{'uname': 'dir' + str(n), 'uid': u} \
             for n, u in enumerate({d['root'] for d in dctslst})]
     for vals in uniq:
         if not hasattr(self, vals['uname']):
             setattr(self, vals['uname'], [])
             self.uniqdirs.append([vals['uname'], vals['uid']])
         tmpattr = getattr(self, vals['uname'])
         for dct in dctslst:
             if dct['root'] == vals['uid']:
                 tmpattr.append(dct)
         setattr(self, vals['uname'], tmpattr)
Example #34
0
File: utils.py Project: bow/volt
def calc_relpath(target: Path, ref: Path) -> Path:
    """Calculates the target's path relative to the reference.

    :param pathlib.Path target: The path to which the relative path will point.
    :param pathlib.Path ref: Reference path.
    :returns: The relative path from ``ref`` to ``to``.
    :rtype: :class:`Path`

    """
    ref = ref.expanduser()
    target = target.expanduser()
    if not ref.is_absolute() or not target.is_absolute():
        raise ValueError("cannot compute relative paths of non-absolute"
                         " input paths")

    common = Path(path.commonpath([ref, target]))
    ref_uniq = ref.parts[len(common.parts):]
    target_uniq = target.parts[len(common.parts):]

    rel_parts = ("..",) * (len(ref_uniq)) + target_uniq

    return Path(*rel_parts)
def custom(record):
    try:
        record = c.convert_to_unicode(record)
    except TypeError as e:
        logging.warning("Unicode Error on: {}".format(record['ID']))
        record['error'] = 'unicode'

    try:
        #add md5 of associated files
        files = [add_slash_if_necessary(y) for x in record['file'].split(';') for y in x.split(':') if bool(y.strip()) and y.strip().lower() != 'pdf']
        file_set = set(files)
        if not 'hashes' in record:
            hashes = [file_to_hash(x) for x in file_set]
            record['hashes'] = ";".join(hashes)
            #regularize format of files list
            record['file'] = ";".join(file_set)
    except Exception as e:
        logging.warning("File Error: {} : {}".format(record['ID'], e.args[0]))
        record['error'] = 'file'

    #todo: if file is not in the library common prefix, move it there
    #look for year, then first surname, then copy in, making dir if necessary
    if file_set:
        for x in file_set:
            try:
                current_path = realpath(x)
                common = commonpath([current_path, args.library])
                if common != args.library:
                    logging.info("Found file outside library: {}".format(current_path))
                    logging.info("Common: {}".format(common))
                    #get the author and year
                    year = record['year']
                    authors = c.getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
                    authors_split = [c.splitname(a) for a in authors]
                    author_surnames = [a['last'][0] for a in authors_split]
                    new_path = join(args.library, year, ", ".join(author_surnames))
                    logging.info("New Path: {}".format(new_path))
                    #create directory if necessary
                    #copy file
                    full_new_path = join(new_path, split(current_path)[1])
                    logging.info("Copying file")
                    logging.info("From: {}".format(current_path))
                    logging.info("To: {}".format(full_new_path))
                    response = input("Enter to confirm: ")
                    if response == "":
                        logging.info("Proceeding")
                        if not exists(new_path):
                            mkdir(new_path)
                        if exists(full_new_path):
                            raise Exception("File already exists")
                        copyfile(x, full_new_path)
                        file_set.remove(x)
                        file_set.add(full_new_path)
                        record['file'] = ";".join(file_set)
            except Exception as e:
                logging.info("Issue copying file for: {}".format(x))
                logging.info(e)
                record['error'] = 'file_copy'


    #regularize keywords
    try:
        keywords = set()
        if 'tags' not in record:
            if 'keywords' in record:
                keywords.update([x.strip() for x in record['keywords'].split(',')])
                del record['keywords']
            if 'mendeley-tags' in record:
                keywords.update([x.strip() for x in record['mendeley-tags'].split(',')])
                del record['mendeley-tags']

            record['tags'] = ",".join(keywords)
    except Error as e:
        logging.warning("Tag Error: {}".format(record['ID']))
        record['error'] = 'tag'

    # record = c.type(record)
    # record = c.author(record)
    # record = c.editor(record)
    # record = c.journal(record)
    # record = c.keyword(record)
    # record = c.link(record)
    # record = c.doi(record)
    # record['p_authors'] = []
    # if 'author' in record:
    #     record['p_authors'] = [c.splitname(x, False) for x in record['author']]
    return record
Example #36
0
File: fs.py Project: gwk/pithy
def path_common_prefix(*paths):
  'Return the common path prefix for a sequence of paths.'
  try: return _path.commonpath(paths)
  except ValueError: # we want a more specific exception.
    raise MixedAbsoluteAndRelativePathsError(paths) from None
Example #37
0
 def test_abspath_match(abspath_couple):
     assert commonpath(abspath_couple) == old_commonpath(abspath_couple)
Example #38
0
 def test_relpath_match(relpath_couple):
     assert commonpath(relpath_couple) == old_commonpath(relpath_couple)