def fail_if_too_old(filename, max_age=None): if not exists(filename): raise IOError('file "%s" could not be found' % filename) if max_age is not None: if time() - getmtime(filename) > max_age: raise EOAgeError( 'file "%s" could not be loaded because it is %ds old, which is more than the maximum age %ds.' % (filename, time() - getmtime(filename), max_age))
def fetch_repodata(url, schannel, priority, cache_dir=None, use_cache=False, session=None): cache_path = join(cache_dir or create_cache_dir(), cache_fn_url(url)) try: mtime = getmtime(cache_path) except (IOError, OSError): log.debug("No local cache found for %s at %s", url, cache_path) if use_cache: return {'packages': {}} else: mod_etag_headers = {} else: mod_etag_headers = read_mod_and_etag(cache_path) if context.local_repodata_ttl > 1: max_age = context.local_repodata_ttl elif context.local_repodata_ttl == 1: max_age = get_cache_control_max_age( mod_etag_headers.get('_cache_control', '')) else: max_age = 0 timeout = mtime + max_age - time() if (timeout > 0 or context.offline) and not url.startswith('file://'): log.debug("Using cached repodata for %s at %s. Timeout in %d sec", url, cache_path, timeout) return read_local_repodata(cache_path, url, schannel, priority, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) log.debug("Locally invalidating cached repodata for %s at %s", url, cache_path) try: assert url is not None, url repodata = fetch_repodata_remote_request(session, url, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) except Response304ContentUnchanged: log.debug( "304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", url) touch(cache_path) return read_local_repodata(cache_path, url, schannel, priority, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) if repodata is None: return None with open(cache_path, 'w') as fo: json.dump(repodata, fo, indent=2, sort_keys=True, cls=EntityEncoder) process_repodata(repodata, url, schannel, priority) write_pickled_repodata(cache_path, repodata) return repodata
def ds_traverse(rootds, parent=None, json=None, recurse_datasets=False, recurse_directories=False, long_=False): """Hierarchical dataset traverser Parameters ---------- rootds: Dataset Root dataset to be traversed parent: Dataset Parent dataset of the current rootds recurse_datasets: bool Recurse into subdatasets of the root dataset recurse_directories: bool Recurse into subdirectories of the current dataset In both of above cases, if False, they will not be explicitly recursed but data would be loaded from their meta-data files Returns ------- list of dict extracts and returns a (recursive) list of dataset(s) info at path """ # extract parent info to pass to traverser fsparent = fs_extract(parent.path, parent.repo, basepath=rootds.path) \ if parent else None # (recursively) traverse file tree of current dataset fs = fs_traverse( rootds.path, rootds.repo, subdatasets=list(rootds.subdatasets(result_xfm='relpaths')), render=False, parent=fsparent, # XXX note that here I kinda flipped the notions! recurse_datasets=recurse_datasets, recurse_directories=recurse_directories, json=json ) # BUT if we are recurse_datasets but not recurse_directories # we need to handle those subdatasets then somehow since # otherwise we might not even get to them?! fs['nodes'][0]['size'] = fs['size'] # update self's updated size in nodes sublist too! # add dataset specific entries to its dict rootds_model = GitModel(rootds.repo) fs['tags'] = rootds_model.describe fs['branch'] = rootds_model.branch index_file = opj(rootds.path, '.git', 'index') fs['index-mtime'] = time.strftime( u"%Y-%m-%d %H:%M:%S", time.localtime(getmtime(index_file))) if exists(index_file) else '' # render current dataset lgr.info('Dataset: %s' % rootds.path) fs_render(fs, json=json, ds_path=rootds.path) return fs
def ds_traverse(rootds, parent=None, json=None, recurse_datasets=False, recurse_directories=False, long_=False): """Hierarchical dataset traverser Parameters ---------- rootds: Dataset Root dataset to be traversed parent: Dataset Parent dataset of the current rootds recurse_datasets: bool Recurse into subdatasets of the root dataset recurse_directories: bool Recurse into subdirectories of the current dataset In both of above cases, if False, they will not be explicitly recursed but data would be loaded from their meta-data files Returns ------- list of dict extracts and returns a (recursive) list of dataset(s) info at path """ # extract parent info to pass to traverser fsparent = fs_extract(parent.path, parent.repo, basepath=rootds.path) \ if parent else None # (recursively) traverse file tree of current dataset fs = fs_traverse( rootds.path, rootds.repo, subdatasets=list(rootds.subdatasets(result_xfm='relpaths')), render=False, parent=fsparent, # XXX note that here I kinda flipped the notions! recurse_datasets=recurse_datasets, recurse_directories=recurse_directories, json=json ) # BUT if we are recurse_datasets but not recurse_directories # we need to handle those subdatasets then somehow since # otherwise we might not even get to them?! fs['nodes'][0]['size'] = fs['size'] # update self's updated size in nodes sublist too! # add dataset specific entries to its dict rootds_model = GitModel(rootds.repo) fs['tags'] = rootds_model.describe fs['branch'] = rootds_model.branch index_file = opj(rootds.path, '.git', 'index') fs['index-mtime'] = time.strftime( u"%Y-%m-%d %H:%M:%S", time.localtime(getmtime(index_file))) if exists(index_file) else '' # render current dataset lgr.info('Dataset: %s', rootds.path) fs_render(fs, json=json, ds_path=rootds.path) return fs
def package_build_time(self): from genericpath import getmtime try: path = self.path.path except AttributeError: path = self.path return getmtime(path)
def copy(self, to, allow_symlink=False): """ Copy necessary files to `to` if they are local. """ self.logger.info(' {0:} {2:} for {1:s}'.format(self.__class__.__name__, self.group_name, id(self) % 100000), level=3) if self.local_path is None: return if self.processed_path is None: self.processed_path = self.full_file_path if self.copy_map: allow_symlink = False # this may be too aggressive else: self.copy_map = {None: self.local_path} for src, dst in self.copy_map.items(): if src: assert '*' not in src, '{0:}: wildcards not allowed in copy_map'.format( self) assert self.resource_dir is not None, 'local resources should have resource_dir specified' srcpth = join(self.resource_dir, self.archive_dir or '', src) else: srcpth = self.processed_path dstpth = join(to, dst) if self.logger.get_level() >= 3: self.logger.info(' copying {0:s} {1:s} -> {2:}'.format( self.__class__.__name__, srcpth, dstpth), level=3) else: self.logger.info(' copying {0:s} {1:}'.format( self.__class__.__name__, dstpth), level=2) if exists(dstpth) and getmtime(dstpth) >= getmtime(srcpth): self.logger.info(' {0:s} {1:s} seems unchanged'.format( self.__class__.__name__, dstpth), level=3) else: link_or_copy(src=srcpth, dst=dstpth, follow_symlinks=True, allow_linking=allow_symlink, create_dirs=True, exist_ok=True)
def define_page(url): """ Sends a GET request to the specified URL. Saves the response to a file for later access. - Checks if archived file exists; creates file if neccessary - Makes URL Request if not - If 403 Error, creates spoof to bypass - Saves response to .txt file """ # Imports from tld import get_tld from genericpath import getmtime import time # Local variables domain = get_tld(url) file_name = extract_name(url, domain) + '.txt' html_dir = domain + '\\html' file_path = html_dir + '\\' + file_name now = time.time() ageLimit = 604800 if not os.path.exists(domain): os.makedirs(html_dir) print("New directory created: ", domain) if not os.path.isfile(file_path) or now - getmtime(file_path) > ageLimit: print("File does not exist or is past a week old...attempting to create a new reference file.") try: wget.download(url, file_path) except (URLError, ValueError) as e: print("Not a valid URL - ", e) try: print("Assembling spoof request") spoof = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36' agentRequest = {'User-Agent': spoof} urlObject = u.Request(url, headers=agentRequest) page = u.urlopen(urlObject) try: with open(file_path, 'w+') as f: for line in page: line = line.decode('utf-8') f.write(line) except: import sys print("spoof failed\nTerminating program.") sys.exit(0) except (URLError, ValueError) as er: print("Again, Not a valid URL - ", er) print("No further options available.\nTerminating program.") sys.exit(0) else: print("New reference file created.") print("Reference Filename: ", file_name) return file_name, domain, html_dir
def isOutDated(self, file_path): """A file is outdated if it does not exists or if its modification date is older than (now - update_interval) """ if ALWAYS_REFRESH: return True if os.path.exists(file_path): time_limit = int(time()) - UPDATE_INTERVAL mtime = getmtime(file_path) return mtime < time_limit return True
def fetch_repodata(url, schannel, priority, cache_dir=None, use_cache=False, session=None): cache_path = join(cache_dir or create_cache_dir(), cache_fn_url(url)) try: mtime = getmtime(cache_path) except (IOError, OSError): log.debug("No local cache found for %s at %s", url, cache_path) if use_cache: return {'packages': {}} else: mod_etag_headers = {} else: mod_etag_headers = read_mod_and_etag(cache_path) if context.local_repodata_ttl > 1: max_age = context.local_repodata_ttl elif context.local_repodata_ttl == 1: max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', '')) else: max_age = 0 timeout = mtime + max_age - time() if (timeout > 0 or context.offline) and not url.startswith('file://'): log.debug("Using cached repodata for %s at %s. Timeout in %d sec", url, cache_path, timeout) return read_local_repodata(cache_path, url, schannel, priority, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) log.debug("Locally invalidating cached repodata for %s at %s", url, cache_path) try: assert url is not None, url repodata = fetch_repodata_remote_request(session, url, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) except Response304ContentUnchanged: log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", url) touch(cache_path) return read_local_repodata(cache_path, url, schannel, priority, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) if repodata is None: return None with open(cache_path, 'w') as fo: json.dump(repodata, fo, indent=2, sort_keys=True, cls=EntityEncoder) process_repodata(repodata, url, schannel, priority) write_pickled_repodata(cache_path, repodata) return repodata
def is_older_than_metadata(self): """ Return True if the package save file is older than the metadata. Returns False if the time of either can't be determined :param path: Optional extra save path, used in save_path() """ from genericpath import getmtime try: path = self.path.path except AttributeError: path = self.path source_ref = self._doc.ref.path try: age_diff = getmtime(source_ref) - getmtime(path) return age_diff > 0 except (FileNotFoundError, OSError): return False
def __call__(cls, channel, repodata_fn=REPODATA_FN): assert channel.subdir assert not channel.package_filename assert type(channel) is Channel now = time() cache_key = channel.url(with_credentials=True), repodata_fn if cache_key in SubdirData._cache_: cache_entry = SubdirData._cache_[cache_key] if cache_key[0].startswith('file://'): file_path = url_to_path(channel.url() + '/' + repodata_fn) if exists(file_path): if cache_entry._mtime > getmtime(file_path): return cache_entry else: return cache_entry subdir_data_instance = super(SubdirDataType, cls).__call__(channel, repodata_fn) subdir_data_instance._mtime = now SubdirData._cache_[cache_key] = subdir_data_instance return subdir_data_instance
def test_time(self): f = open(support.TESTFN, "wb") try: f.write(b"foo") f.close() f = open(support.TESTFN, "ab") f.write(b"bar") f.close() f = open(support.TESTFN, "rb") d = f.read() f.close() self.assertEqual(d, b"foobar") self.assert_( genericpath.getctime(support.TESTFN) <= genericpath.getmtime( support.TESTFN)) finally: if not f.closed: f.close() os.remove(support.TESTFN)
def test_time(self): f = open(support.TESTFN, "wb") try: f.write(b"foo") f.close() f = open(support.TESTFN, "ab") f.write(b"bar") f.close() f = open(support.TESTFN, "rb") d = f.read() f.close() self.assertEqual(d, b"foobar") self.assertLessEqual( genericpath.getctime(support.TESTFN), genericpath.getmtime(support.TESTFN) ) finally: if not f.closed: f.close() os.remove(support.TESTFN)
def _load(self): try: mtime = getmtime(self.cache_path_json) except (IOError, OSError): log.debug("No local cache found for %s at %s", self.url_w_repodata_fn, self.cache_path_json) if context.use_index_cache or (context.offline and not self.url_w_subdir.startswith('file://')): log.debug("Using cached data for %s at %s forced. Returning empty repodata.", self.url_w_repodata_fn, self.cache_path_json) return { '_package_records': (), '_names_index': defaultdict(list), '_track_features_index': defaultdict(list), } else: mod_etag_headers = {} else: mod_etag_headers = read_mod_and_etag(self.cache_path_json) if context.use_index_cache: log.debug("Using cached repodata for %s at %s because use_cache=True", self.url_w_repodata_fn, self.cache_path_json) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state if context.local_repodata_ttl > 1: max_age = context.local_repodata_ttl elif context.local_repodata_ttl == 1: max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', '')) else: max_age = 0 timeout = mtime + max_age - time() if (timeout > 0 or context.offline) and not self.url_w_subdir.startswith('file://'): log.debug("Using cached repodata for %s at %s. Timeout in %d sec", self.url_w_repodata_fn, self.cache_path_json, timeout) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state log.debug("Local cache timed out for %s at %s", self.url_w_repodata_fn, self.cache_path_json) # TODO (AV): Pull contents of this conditional into a separate module/function if context.extra_safety_checks: if cct is None: log.warn("metadata signature verification requested, " "but `conda-content-trust` is not installed.") elif not context.signing_metadata_url_base: log.info("metadata signature verification requested, " "but no metadata URL base has not been specified.") else: self._refresh_signing_metadata() try: raw_repodata_str = fetch_repodata_remote_request( self.url_w_credentials, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod'), repodata_fn=self.repodata_fn) # empty file if not raw_repodata_str and self.repodata_fn != REPODATA_FN: raise UnavailableInvalidChannel(self.url_w_repodata_fn, 404) except UnavailableInvalidChannel: if self.repodata_fn != REPODATA_FN: self.repodata_fn = REPODATA_FN return self._load() else: raise except Response304ContentUnchanged: log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", self.url_w_repodata_fn) touch(self.cache_path_json) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state else: if not isdir(dirname(self.cache_path_json)): mkdir_p(dirname(self.cache_path_json)) try: with io_open(self.cache_path_json, 'w') as fh: fh.write(raw_repodata_str or '{}') except (IOError, OSError) as e: if e.errno in (EACCES, EPERM, EROFS): raise NotWritableError(self.cache_path_json, e.errno, caused_by=e) else: raise _internal_state = self._process_raw_repodata_str(raw_repodata_str) self._internal_state = _internal_state self._pickle_me() return _internal_state
def _load(self): try: mtime = getmtime(self.cache_path_json) except (IOError, OSError): log.debug("No local cache found for %s at %s", self.url_w_subdir, self.cache_path_json) if context.use_index_cache or ( context.offline and not self.url_w_subdir.startswith('file://')): log.debug( "Using cached data for %s at %s forced. Returning empty repodata.", self.url_w_subdir, self.cache_path_json) return { '_package_records': (), '_names_index': defaultdict(list), '_track_features_index': defaultdict(list), } else: mod_etag_headers = {} else: mod_etag_headers = read_mod_and_etag(self.cache_path_json) if context.use_index_cache: log.debug( "Using cached repodata for %s at %s because use_cache=True", self.url_w_subdir, self.cache_path_json) _internal_state = self._read_local_repdata( mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state if context.local_repodata_ttl > 1: max_age = context.local_repodata_ttl elif context.local_repodata_ttl == 1: max_age = get_cache_control_max_age( mod_etag_headers.get('_cache_control', '')) else: max_age = 0 timeout = mtime + max_age - time() if (timeout > 0 or context.offline ) and not self.url_w_subdir.startswith('file://'): log.debug( "Using cached repodata for %s at %s. Timeout in %d sec", self.url_w_subdir, self.cache_path_json, timeout) _internal_state = self._read_local_repdata( mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state log.debug("Local cache timed out for %s at %s", self.url_w_subdir, self.cache_path_json) try: raw_repodata_str = fetch_repodata_remote_request( self.url_w_credentials, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) except Response304ContentUnchanged: log.debug( "304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", self.url_w_subdir) touch(self.cache_path_json) _internal_state = self._read_local_repdata( mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state else: if not isdir(dirname(self.cache_path_json)): mkdir_p(dirname(self.cache_path_json)) try: with io_open(self.cache_path_json, 'w') as fh: fh.write(raw_repodata_str or '{}') except (IOError, OSError) as e: if e.errno in (EACCES, EPERM, EROFS): raise NotWritableError(self.cache_path_json, e.errno, caused_by=e) else: raise _internal_state = self._process_raw_repodata_str(raw_repodata_str) self._internal_state = _internal_state self._pickle_me() return _internal_state
def _load(self): try: mtime = getmtime(self.cache_path_json) except (IOError, OSError): log.debug("No local cache found for %s at %s", self.url_w_subdir, self.cache_path_json) if context.use_index_cache or (context.offline and not self.url_w_subdir.startswith('file://')): log.debug("Using cached data for %s at %s forced. Returning empty repodata.", self.url_w_subdir, self.cache_path_json) return { '_package_records': (), '_names_index': defaultdict(list), '_track_features_index': defaultdict(list), } else: mod_etag_headers = {} else: mod_etag_headers = read_mod_and_etag(self.cache_path_json) if context.use_index_cache: log.debug("Using cached repodata for %s at %s because use_cache=True", self.url_w_subdir, self.cache_path_json) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state if context.local_repodata_ttl > 1: max_age = context.local_repodata_ttl elif context.local_repodata_ttl == 1: max_age = get_cache_control_max_age(mod_etag_headers.get('_cache_control', '')) else: max_age = 0 timeout = mtime + max_age - time() if (timeout > 0 or context.offline) and not self.url_w_subdir.startswith('file://'): log.debug("Using cached repodata for %s at %s. Timeout in %d sec", self.url_w_subdir, self.cache_path_json, timeout) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state log.debug("Local cache timed out for %s at %s", self.url_w_subdir, self.cache_path_json) try: raw_repodata_str = fetch_repodata_remote_request(self.url_w_credentials, mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) except Response304ContentUnchanged: log.debug("304 NOT MODIFIED for '%s'. Updating mtime and loading from disk", self.url_w_subdir) touch(self.cache_path_json) _internal_state = self._read_local_repdata(mod_etag_headers.get('_etag'), mod_etag_headers.get('_mod')) return _internal_state else: if not isdir(dirname(self.cache_path_json)): mkdir_p(dirname(self.cache_path_json)) try: with open(self.cache_path_json, 'w') as fh: fh.write(raw_repodata_str or '{}') except (IOError, OSError) as e: if e.errno in (EACCES, EPERM): raise NotWritableError(self.cache_path_json, e.errno, caused_by=e) else: raise _internal_state = self._process_raw_repodata_str(raw_repodata_str) self._internal_state = _internal_state self._pickle_me() return _internal_state
def time_sort_file(d): files = filter(isfile, glob(d + "/*")) files.sort(key=lambda x: getmtime(x)) files = map(lambda p:p.replace(d+'/',''), files) return files
def update_event(self, inp=-1): self.set_output_val(0, genericpath.getmtime(self.input(0)))