Example #1
0
def fail_if_too_old(filename, max_age=None):
    if not exists(filename):
        raise IOError('file "%s" could not be found' % filename)
    if max_age is not None:
        if time() - getmtime(filename) > max_age:
            raise EOAgeError(
                'file "%s" could not be loaded because it is %ds old, which is more than the maximum age %ds.'
                % (filename, time() - getmtime(filename), max_age))
Example #2
0
def fetch_repodata(url,
                   schannel,
                   priority,
                   cache_dir=None,
                   use_cache=False,
                   session=None):
    cache_path = join(cache_dir or create_cache_dir(), cache_fn_url(url))

    try:
        mtime = getmtime(cache_path)
    except (IOError, OSError):
        log.debug("No local cache found for %s at %s", url, cache_path)
        if use_cache:
            return {'packages': {}}
        else:
            mod_etag_headers = {}
    else:
        mod_etag_headers = read_mod_and_etag(cache_path)

        if context.local_repodata_ttl > 1:
            max_age = context.local_repodata_ttl
        elif context.local_repodata_ttl == 1:
            max_age = get_cache_control_max_age(
                mod_etag_headers.get('_cache_control', ''))
        else:
            max_age = 0

        timeout = mtime + max_age - time()
        if (timeout > 0 or context.offline) and not url.startswith('file://'):
            log.debug("Using cached repodata for %s at %s. Timeout in %d sec",
                      url, cache_path, timeout)
            return read_local_repodata(cache_path, url, schannel, priority,
                                       mod_etag_headers.get('_etag'),
                                       mod_etag_headers.get('_mod'))

        log.debug("Locally invalidating cached repodata for %s at %s", url,
                  cache_path)

    try:
        assert url is not None, url
        repodata = fetch_repodata_remote_request(session, url,
                                                 mod_etag_headers.get('_etag'),
                                                 mod_etag_headers.get('_mod'))
    except Response304ContentUnchanged:
        log.debug(
            "304 NOT MODIFIED for '%s'. Updating mtime and loading from disk",
            url)
        touch(cache_path)
        return read_local_repodata(cache_path, url, schannel, priority,
                                   mod_etag_headers.get('_etag'),
                                   mod_etag_headers.get('_mod'))
    if repodata is None:
        return None

    with open(cache_path, 'w') as fo:
        json.dump(repodata, fo, indent=2, sort_keys=True, cls=EntityEncoder)

    process_repodata(repodata, url, schannel, priority)
    write_pickled_repodata(cache_path, repodata)
    return repodata
Example #3
0
def ds_traverse(rootds, parent=None, json=None,
                recurse_datasets=False, recurse_directories=False,
                long_=False):
    """Hierarchical dataset traverser

    Parameters
    ----------
    rootds: Dataset
      Root dataset to be traversed
    parent: Dataset
      Parent dataset of the current rootds
    recurse_datasets: bool
      Recurse into subdatasets of the root dataset
    recurse_directories: bool
      Recurse into subdirectories of the current dataset
      In both of above cases, if False, they will not be explicitly
      recursed but data would be loaded from their meta-data files

    Returns
    -------
    list of dict
      extracts and returns a (recursive) list of dataset(s) info at path
    """
    # extract parent info to pass to traverser
    fsparent = fs_extract(parent.path, parent.repo, basepath=rootds.path) \
        if parent else None

    # (recursively) traverse file tree of current dataset
    fs = fs_traverse(
        rootds.path, rootds.repo,
        subdatasets=list(rootds.subdatasets(result_xfm='relpaths')),
        render=False,
        parent=fsparent,
        # XXX note that here I kinda flipped the notions!
        recurse_datasets=recurse_datasets,
        recurse_directories=recurse_directories,
        json=json
    )

    # BUT if we are recurse_datasets but not recurse_directories
    #     we need to handle those subdatasets then somehow since
    #     otherwise we might not even get to them?!

    fs['nodes'][0]['size'] = fs['size']  # update self's updated size in nodes sublist too!

    # add dataset specific entries to its dict
    rootds_model = GitModel(rootds.repo)
    fs['tags'] = rootds_model.describe
    fs['branch'] = rootds_model.branch
    index_file = opj(rootds.path, '.git', 'index')
    fs['index-mtime'] = time.strftime(
        u"%Y-%m-%d %H:%M:%S",
        time.localtime(getmtime(index_file))) if exists(index_file) else ''

    # render current dataset
    lgr.info('Dataset: %s' % rootds.path)
    fs_render(fs, json=json, ds_path=rootds.path)
    return fs
Example #4
0
def ds_traverse(rootds, parent=None, json=None,
                recurse_datasets=False, recurse_directories=False,
                long_=False):
    """Hierarchical dataset traverser

    Parameters
    ----------
    rootds: Dataset
      Root dataset to be traversed
    parent: Dataset
      Parent dataset of the current rootds
    recurse_datasets: bool
      Recurse into subdatasets of the root dataset
    recurse_directories: bool
      Recurse into subdirectories of the current dataset
      In both of above cases, if False, they will not be explicitly
      recursed but data would be loaded from their meta-data files

    Returns
    -------
    list of dict
      extracts and returns a (recursive) list of dataset(s) info at path
    """
    # extract parent info to pass to traverser
    fsparent = fs_extract(parent.path, parent.repo, basepath=rootds.path) \
        if parent else None

    # (recursively) traverse file tree of current dataset
    fs = fs_traverse(
        rootds.path, rootds.repo,
        subdatasets=list(rootds.subdatasets(result_xfm='relpaths')),
        render=False,
        parent=fsparent,
        # XXX note that here I kinda flipped the notions!
        recurse_datasets=recurse_datasets,
        recurse_directories=recurse_directories,
        json=json
    )

    # BUT if we are recurse_datasets but not recurse_directories
    #     we need to handle those subdatasets then somehow since
    #     otherwise we might not even get to them?!

    fs['nodes'][0]['size'] = fs['size']  # update self's updated size in nodes sublist too!

    # add dataset specific entries to its dict
    rootds_model = GitModel(rootds.repo)
    fs['tags'] = rootds_model.describe
    fs['branch'] = rootds_model.branch
    index_file = opj(rootds.path, '.git', 'index')
    fs['index-mtime'] = time.strftime(
        u"%Y-%m-%d %H:%M:%S",
        time.localtime(getmtime(index_file))) if exists(index_file) else ''

    # render current dataset
    lgr.info('Dataset: %s', rootds.path)
    fs_render(fs, json=json, ds_path=rootds.path)
    return fs
Example #5
0
    def package_build_time(self):
        from genericpath import getmtime

        try:
            path = self.path.path
        except AttributeError:
            path = self.path

        return getmtime(path)
Example #6
0
    def copy(self, to, allow_symlink=False):
        """
		Copy necessary files to `to` if they are local.
		"""
        self.logger.info(' {0:} {2:} for {1:s}'.format(self.__class__.__name__,
                                                       self.group_name,
                                                       id(self) % 100000),
                         level=3)
        if self.local_path is None:
            return
        if self.processed_path is None:
            self.processed_path = self.full_file_path
        if self.copy_map:
            allow_symlink = False  # this may be too aggressive
        else:
            self.copy_map = {None: self.local_path}
        for src, dst in self.copy_map.items():
            if src:
                assert '*' not in src, '{0:}: wildcards not allowed in copy_map'.format(
                    self)
                assert self.resource_dir is not None, 'local resources should have resource_dir specified'
                srcpth = join(self.resource_dir, self.archive_dir or '', src)
            else:
                srcpth = self.processed_path
            dstpth = join(to, dst)
            if self.logger.get_level() >= 3:
                self.logger.info('  copying {0:s} {1:s} -> {2:}'.format(
                    self.__class__.__name__, srcpth, dstpth),
                                 level=3)
            else:
                self.logger.info(' copying {0:s} {1:}'.format(
                    self.__class__.__name__, dstpth),
                                 level=2)
            if exists(dstpth) and getmtime(dstpth) >= getmtime(srcpth):
                self.logger.info('  {0:s} {1:s} seems unchanged'.format(
                    self.__class__.__name__, dstpth),
                                 level=3)
            else:
                link_or_copy(src=srcpth,
                             dst=dstpth,
                             follow_symlinks=True,
                             allow_linking=allow_symlink,
                             create_dirs=True,
                             exist_ok=True)
Example #7
0
def define_page(url):
    """ 
    Sends a GET request to the specified URL. Saves the response to a file for later access.

        - Checks if archived file exists; creates file if neccessary
        - Makes URL Request if not
        - If 403 Error, creates spoof to bypass
        - Saves response to .txt file
    """

    # Imports
    from tld import get_tld
    from genericpath import getmtime
    import time

    # Local variables
    domain = get_tld(url)
    file_name = extract_name(url, domain) + '.txt'
    html_dir = domain + '\\html'
    file_path = html_dir + '\\' + file_name
    now = time.time()
    ageLimit = 604800

    if not os.path.exists(domain):
        os.makedirs(html_dir)
        print("New directory created: ", domain)

    if not os.path.isfile(file_path) or now - getmtime(file_path) > ageLimit:
        print("File does not exist or is past a week old...attempting to create a new reference file.")
        try:
            wget.download(url, file_path)
        except (URLError, ValueError) as e:
            print("Not a valid URL - ", e)
            try:
                print("Assembling spoof request")
                spoof = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36'
                agentRequest = {'User-Agent': spoof}
                urlObject = u.Request(url, headers=agentRequest)
                page = u.urlopen(urlObject)
                try:
                    with open(file_path, 'w+') as f:
                        for line in page:
                            line = line.decode('utf-8')
                            f.write(line)
                except:
                    import sys
                    print("spoof failed\nTerminating program.")
                    sys.exit(0)
            except (URLError, ValueError) as er:
                print("Again, Not a valid URL - ", er)
                print("No further options available.\nTerminating program.")
                sys.exit(0)
        else:
            print("New reference file created.")
            print("Reference Filename: ", file_name)
    return file_name, domain, html_dir
Example #8
0
    def isOutDated(self, file_path):
        """A file is outdated if it does not exists or if its modification date is
        older than (now - update_interval)
        """
        if ALWAYS_REFRESH:
            return True

        if os.path.exists(file_path):
            time_limit = int(time()) - UPDATE_INTERVAL
            mtime = getmtime(file_path)
            return mtime < time_limit

        return True
Example #9
0
def fetch_repodata(url, schannel, priority,
                   cache_dir=None, use_cache=False, session=None):
    cache_path = join(cache_dir or create_cache_dir(), cache_fn_url(url))

    try:
        mtime = getmtime(cache_path)
    except (IOError, OSError):
        log.debug("No local cache found for %s at %s", url, cache_path)
        if use_cache:
            return {'packages': {}}
        else:
            mod_etag_headers = {}
    else:
        mod_etag_headers = read_mod_and_etag(cache_path)

        if context.local_repodata_ttl > 1:
            max_age = context.local_repodata_ttl
        elif context.local_repodata_ttl == 1:
            max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', ''))
        else:
            max_age = 0

        timeout = mtime + max_age - time()
        if (timeout > 0 or context.offline) and not url.startswith('file://'):
            log.debug("Using cached repodata for %s at %s. Timeout in %d sec",
                      url, cache_path, timeout)
            return read_local_repodata(cache_path, url, schannel, priority,
                                       mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod'))

        log.debug("Locally invalidating cached repodata for %s at %s", url, cache_path)

    try:
        assert url is not None, url
        repodata = fetch_repodata_remote_request(session, url,
                                                 mod_etag_headers.get('_etag'),
                                                 mod_etag_headers.get('_mod'))
    except Response304ContentUnchanged:
        log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", url)
        touch(cache_path)
        return read_local_repodata(cache_path, url, schannel, priority,
                                   mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod'))
    if repodata is None:
        return None

    with open(cache_path, 'w') as fo:
        json.dump(repodata, fo, indent=2, sort_keys=True, cls=EntityEncoder)

    process_repodata(repodata, url, schannel, priority)
    write_pickled_repodata(cache_path, repodata)
    return repodata
Example #10
0
    def is_older_than_metadata(self):
        """
        Return True if the package save file is older than the metadata. Returns False if the time of either can't be determined

        :param path: Optional extra save path, used in save_path()

        """
        from genericpath import getmtime

        try:
            path = self.path.path
        except AttributeError:
            path = self.path

        source_ref = self._doc.ref.path

        try:
            age_diff = getmtime(source_ref) - getmtime(path)

            return age_diff > 0

        except (FileNotFoundError, OSError):
            return False
Example #11
0
 def __call__(cls, channel, repodata_fn=REPODATA_FN):
     assert channel.subdir
     assert not channel.package_filename
     assert type(channel) is Channel
     now = time()
     cache_key = channel.url(with_credentials=True), repodata_fn
     if cache_key in SubdirData._cache_:
         cache_entry = SubdirData._cache_[cache_key]
         if cache_key[0].startswith('file://'):
             file_path = url_to_path(channel.url() + '/' + repodata_fn)
             if exists(file_path):
                 if cache_entry._mtime > getmtime(file_path):
                     return cache_entry
         else:
             return cache_entry
     subdir_data_instance = super(SubdirDataType, cls).__call__(channel, repodata_fn)
     subdir_data_instance._mtime = now
     SubdirData._cache_[cache_key] = subdir_data_instance
     return subdir_data_instance
Example #12
0
    def test_time(self):
        f = open(support.TESTFN, "wb")
        try:
            f.write(b"foo")
            f.close()
            f = open(support.TESTFN, "ab")
            f.write(b"bar")
            f.close()
            f = open(support.TESTFN, "rb")
            d = f.read()
            f.close()
            self.assertEqual(d, b"foobar")

            self.assert_(
                genericpath.getctime(support.TESTFN) <= genericpath.getmtime(
                    support.TESTFN))
        finally:
            if not f.closed:
                f.close()
            os.remove(support.TESTFN)
    def test_time(self):
        f = open(support.TESTFN, "wb")
        try:
            f.write(b"foo")
            f.close()
            f = open(support.TESTFN, "ab")
            f.write(b"bar")
            f.close()
            f = open(support.TESTFN, "rb")
            d = f.read()
            f.close()
            self.assertEqual(d, b"foobar")

            self.assertLessEqual(
                genericpath.getctime(support.TESTFN),
                genericpath.getmtime(support.TESTFN)
            )
        finally:
            if not f.closed:
                f.close()
            os.remove(support.TESTFN)
Example #14
0
    def _load(self):
        try:
            mtime = getmtime(self.cache_path_json)
        except (IOError, OSError):
            log.debug("No local cache found for %s at %s", self.url_w_repodata_fn,
                      self.cache_path_json)
            if context.use_index_cache or (context.offline
                                           and not self.url_w_subdir.startswith('file://')):
                log.debug("Using cached data for %s at %s forced. Returning empty repodata.",
                          self.url_w_repodata_fn, self.cache_path_json)
                return {
                    '_package_records': (),
                    '_names_index': defaultdict(list),
                    '_track_features_index': defaultdict(list),
                }
            else:
                mod_etag_headers = {}
        else:
            mod_etag_headers = read_mod_and_etag(self.cache_path_json)

            if context.use_index_cache:
                log.debug("Using cached repodata for %s at %s because use_cache=True",
                          self.url_w_repodata_fn, self.cache_path_json)

                _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                           mod_etag_headers.get('_mod'))
                return _internal_state

            if context.local_repodata_ttl > 1:
                max_age = context.local_repodata_ttl
            elif context.local_repodata_ttl == 1:
                max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', ''))
            else:
                max_age = 0

            timeout = mtime + max_age - time()
            if (timeout > 0 or context.offline) and not self.url_w_subdir.startswith('file://'):
                log.debug("Using cached repodata for %s at %s. Timeout in %d sec",
                          self.url_w_repodata_fn, self.cache_path_json, timeout)
                _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                           mod_etag_headers.get('_mod'))
                return _internal_state

            log.debug("Local cache timed out for %s at %s",
                      self.url_w_repodata_fn, self.cache_path_json)

        # TODO (AV): Pull contents of this conditional into a separate module/function
        if context.extra_safety_checks:
            if cct is None:
                log.warn("metadata signature verification requested, "
                         "but `conda-content-trust` is not installed.")
            elif not context.signing_metadata_url_base:
                log.info("metadata signature verification requested, "
                         "but no metadata URL base has not been specified.")
            else:
                self._refresh_signing_metadata()

        try:
            raw_repodata_str = fetch_repodata_remote_request(
                self.url_w_credentials,
                mod_etag_headers.get('_etag'),
                mod_etag_headers.get('_mod'),
                repodata_fn=self.repodata_fn)
            # empty file
            if not raw_repodata_str and self.repodata_fn != REPODATA_FN:
                raise UnavailableInvalidChannel(self.url_w_repodata_fn, 404)
        except UnavailableInvalidChannel:
            if self.repodata_fn != REPODATA_FN:
                self.repodata_fn = REPODATA_FN
                return self._load()
            else:
                raise
        except Response304ContentUnchanged:
            log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk",
                      self.url_w_repodata_fn)
            touch(self.cache_path_json)
            _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                       mod_etag_headers.get('_mod'))
            return _internal_state
        else:
            if not isdir(dirname(self.cache_path_json)):
                mkdir_p(dirname(self.cache_path_json))
            try:
                with io_open(self.cache_path_json, 'w') as fh:
                    fh.write(raw_repodata_str or '{}')
            except (IOError, OSError) as e:
                if e.errno in (EACCES, EPERM, EROFS):
                    raise NotWritableError(self.cache_path_json, e.errno, caused_by=e)
                else:
                    raise
            _internal_state = self._process_raw_repodata_str(raw_repodata_str)
            self._internal_state = _internal_state
            self._pickle_me()
            return _internal_state
Example #15
0
    def _load(self):
        try:
            mtime = getmtime(self.cache_path_json)
        except (IOError, OSError):
            log.debug("No local cache found for %s at %s", self.url_w_subdir,
                      self.cache_path_json)
            if context.use_index_cache or (
                    context.offline
                    and not self.url_w_subdir.startswith('file://')):
                log.debug(
                    "Using cached data for %s at %s forced. Returning empty repodata.",
                    self.url_w_subdir, self.cache_path_json)
                return {
                    '_package_records': (),
                    '_names_index': defaultdict(list),
                    '_track_features_index': defaultdict(list),
                }
            else:
                mod_etag_headers = {}
        else:
            mod_etag_headers = read_mod_and_etag(self.cache_path_json)

            if context.use_index_cache:
                log.debug(
                    "Using cached repodata for %s at %s because use_cache=True",
                    self.url_w_subdir, self.cache_path_json)

                _internal_state = self._read_local_repdata(
                    mod_etag_headers.get('_etag'),
                    mod_etag_headers.get('_mod'))
                return _internal_state

            if context.local_repodata_ttl > 1:
                max_age = context.local_repodata_ttl
            elif context.local_repodata_ttl == 1:
                max_age = get_cache_control_max_age(
                    mod_etag_headers.get('_cache_control', ''))
            else:
                max_age = 0

            timeout = mtime + max_age - time()
            if (timeout > 0 or context.offline
                ) and not self.url_w_subdir.startswith('file://'):
                log.debug(
                    "Using cached repodata for %s at %s. Timeout in %d sec",
                    self.url_w_subdir, self.cache_path_json, timeout)
                _internal_state = self._read_local_repdata(
                    mod_etag_headers.get('_etag'),
                    mod_etag_headers.get('_mod'))
                return _internal_state

            log.debug("Local cache timed out for %s at %s", self.url_w_subdir,
                      self.cache_path_json)

        try:
            raw_repodata_str = fetch_repodata_remote_request(
                self.url_w_credentials, mod_etag_headers.get('_etag'),
                mod_etag_headers.get('_mod'))
        except Response304ContentUnchanged:
            log.debug(
                "304 NOT MODIFIED for '%s'. Updating mtime and loading from disk",
                self.url_w_subdir)
            touch(self.cache_path_json)
            _internal_state = self._read_local_repdata(
                mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod'))
            return _internal_state
        else:
            if not isdir(dirname(self.cache_path_json)):
                mkdir_p(dirname(self.cache_path_json))
            try:
                with io_open(self.cache_path_json, 'w') as fh:
                    fh.write(raw_repodata_str or '{}')
            except (IOError, OSError) as e:
                if e.errno in (EACCES, EPERM, EROFS):
                    raise NotWritableError(self.cache_path_json,
                                           e.errno,
                                           caused_by=e)
                else:
                    raise
            _internal_state = self._process_raw_repodata_str(raw_repodata_str)
            self._internal_state = _internal_state
            self._pickle_me()
            return _internal_state
Example #16
0
    def _load(self):
        try:
            mtime = getmtime(self.cache_path_json)
        except (IOError, OSError):
            log.debug("No local cache found for %s at %s", self.url_w_subdir, self.cache_path_json)
            if context.use_index_cache or (context.offline
                                           and not self.url_w_subdir.startswith('file://')):
                log.debug("Using cached data for %s at %s forced. Returning empty repodata.",
                          self.url_w_subdir, self.cache_path_json)
                return {
                    '_package_records': (),
                    '_names_index': defaultdict(list),
                    '_track_features_index': defaultdict(list),
                }
            else:
                mod_etag_headers = {}
        else:
            mod_etag_headers = read_mod_and_etag(self.cache_path_json)

            if context.use_index_cache:
                log.debug("Using cached repodata for %s at %s because use_cache=True",
                          self.url_w_subdir, self.cache_path_json)

                _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                           mod_etag_headers.get('_mod'))
                return _internal_state

            if context.local_repodata_ttl > 1:
                max_age = context.local_repodata_ttl
            elif context.local_repodata_ttl == 1:
                max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', ''))
            else:
                max_age = 0

            timeout = mtime + max_age - time()
            if (timeout > 0 or context.offline) and not self.url_w_subdir.startswith('file://'):
                log.debug("Using cached repodata for %s at %s. Timeout in %d sec",
                          self.url_w_subdir, self.cache_path_json, timeout)
                _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                           mod_etag_headers.get('_mod'))
                return _internal_state

            log.debug("Local cache timed out for %s at %s",
                      self.url_w_subdir, self.cache_path_json)

        try:
            raw_repodata_str = fetch_repodata_remote_request(self.url_w_credentials,
                                                             mod_etag_headers.get('_etag'),
                                                             mod_etag_headers.get('_mod'))
        except Response304ContentUnchanged:
            log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk",
                      self.url_w_subdir)
            touch(self.cache_path_json)
            _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'),
                                                       mod_etag_headers.get('_mod'))
            return _internal_state
        else:
            if not isdir(dirname(self.cache_path_json)):
                mkdir_p(dirname(self.cache_path_json))
            try:
                with open(self.cache_path_json, 'w') as fh:
                    fh.write(raw_repodata_str or '{}')
            except (IOError, OSError) as e:
                if e.errno in (EACCES, EPERM):
                    raise NotWritableError(self.cache_path_json, e.errno, caused_by=e)
                else:
                    raise
            _internal_state = self._process_raw_repodata_str(raw_repodata_str)
            self._internal_state = _internal_state
            self._pickle_me()
            return _internal_state
Example #17
0
def time_sort_file(d):
    files = filter(isfile, glob(d + "/*"))
    files.sort(key=lambda x: getmtime(x))
    files = map(lambda p:p.replace(d+'/',''), files)
    return files
Example #18
0
 def update_event(self, inp=-1):
     self.set_output_val(0, genericpath.getmtime(self.input(0)))