def get_function_hash_filename(fcn, argname_argvalue_list, create_dir_if_not = False): args_code = compute_fixed_hash(argname_argvalue_list) # TODO: Include function path in hash? Or module path, which would allow memos to be shareable. full_path = os.path.join(get_memo_dir(fcn), '{}.pkl'.format(args_code, )) if create_dir_if_not: make_file_dir(full_path) return full_path
def save_result(self, result): file_path = get_local_experiment_path( os.path.join(self._experiment_directory, 'result.pkl')) make_file_dir(file_path) with open(file_path, 'w') as f: pickle.dump(result, f, protocol=2) print 'Saving Result for Experiment "%s"' % (self.get_id(), )
def get_file(relative_name=None, url = None, use_cache = True, data_transformation = None): """ Get a file and return the full local path to that file. :param relative_name: The name of the local file, relative to the FILE_ROOT (by default, FILE_ROOT is ~/.artemis) Or if None, and a URL is specified, we give the file a temporary name. :param url: Optionally, a url to fetch this file from if it doesn't exist locally. :param use_cache: If the file exists locally and a URL is specified, use the local version. :param data_transformation: Optionally a function that takes the downloaded data (from response.read) and outputs binary data that is be written into the file. :return: """ assert (relative_name is not None) or (url is not None), 'You must provide a local name and/or a URL to fetch from.' full_filename = get_file_path(relative_name=relative_name, url=url) if (not os.path.exists(full_filename)) or (not use_cache): assert url is not None, "No local copy of '%s' was found, and you didn't provide a URL to fetch it from" % (full_filename, ) print('Downloading file from url: "%s"...' % (url, )) response = urlopen(url) data = response.read() print('...Done.') if data_transformation is not None: print('Processing downloaded data...') data = data_transformation(data) make_file_dir(full_filename) with open(full_filename, 'wb') as f: f.write(data) return full_filename
def check_memos(*args, **kwargs): if disable_on_tests and is_test_mode(): return fcn(*args, **kwargs) result_computed = False full_args = infer_arg_values(fcn, *args, **kwargs) filepath = get_function_hash_filename(fcn, full_args) # The filepath is used as the unique identifier, for both the local path and the disk-path # It may be more efficient to use the built-in hashability of certain types for the local cash, and just have special # ways of dealing with non-hashables like lists and numpy arrays - it's a bit dangerous because we need to check # that no object or subobjects have been changed. if MEMO_READ_ENABLED: if local_cache: # local_cache_signature = get_local_cache_signature(args, kwargs) if filepath in cached_local_results: if not suppress_info: LOGGER.info( 'Reading disk-memo from local cache for function %s' % (fcn.__name__, )) return cached_local_results[filepath] if os.path.exists(filepath): with open(filepath) as f: try: if not suppress_info: LOGGER.info('Reading memo for function %s' % (fcn.__name__, )) result = pickle.load(f) except (ValueError, ImportError) as err: if isinstance(err, ValueError) and not suppress_info: LOGGER.warn( 'Memo-file "%s" was corrupt. (%s: %s). Recomputing.' % (filepath, err.__class__.__name__, str(err))) elif isinstance(err, ImportError) and not suppress_info: LOGGER.warn( 'Memo-file "{}" was tried to reference an old class and got ImportError: {}. Recomputing.' .format(filepath, str(err))) result_computed = True result = fcn(*args, **kwargs) else: result_computed = True result = fcn(*args, **kwargs) else: result_computed = True result = fcn(*args, **kwargs) if MEMO_WRITE_ENABLED and result is not None: # We assume result of None means you haven't done coding your function. if local_cache: cached_local_results[filepath] = result if result_computed: # Result was computed, so write it down filepath = get_function_hash_filename(fcn, full_args) make_file_dir(filepath) with open(filepath, 'w') as f: if not suppress_info: LOGGER.info('Writing disk-memo for function %s' % (fcn.__name__, )) pickle.dump(result, f, protocol=2) return result
def save_figure(fig, path, ext=None, default_ext = '.pdf'): """ :param fig: The figure to show :param path: The absolute path to the figure. :param default_ext: The default extension to use, if none is specified. :return: """ if ext is None: _, ext = os.path.splitext(path) if ext == '': path += default_ext else: assert ext in _supported_filetypes, "We inferred the extension '{}' from your filename, but it was not in the list of supported extensions: {}" \ .format(ext, _supported_filetypes) else: path += ext if ext.startswith('.') else '.'+ext if '%L' in path: path = path.replace('%L', fig.get_label() if fig.get_label() is not '' else 'unnamed') path = format_filename(path) make_file_dir(path) if ext=='.pkl': with open(path, 'wb') as f: pickle.dump(fig, f, protocol=pickle.HIGHEST_PROTOCOL) else: fig.savefig(path) ARTEMIS_LOGGER.info('Saved Figure: %s' % path) return path
def smart_file(location, use_cache = False, make_dir = False): """ :param location: Specifies where the file is. If it's formatted as a url, it's downloaded. If it begins with a "/", it's assumed to be a local path. Otherwise, it is assumed to be referenced relative to the data directory. :param use_cache: If True, and the location is a url, make a local cache of the file for future use (note: if the file at this url changes, the cached file will not). :param make_dir: Make the directory for this file, if it does not exist. :yield: The local path to the file. """ its_a_url = is_url(location) if its_a_url: assert not make_dir, "We cannot 'make the directory' for a URL" local_path = get_file(url=location, use_cache=use_cache) else: local_path = get_artemis_data_path(location) if make_dir: make_file_dir(local_path) yield local_path if its_a_url and not use_cache: os.remove(local_path)
def save_figure(fig, path, ext=None, default_ext='.pdf'): """ :param fig: The figure to show :param path: The absolute path to the figure. :param default_ext: The default extension to use, if none is specified. :return: """ if ext is None: _, ext = os.path.splitext(path) if ext == '': path += default_ext else: assert ext in _supported_filetypes, "We inferred the extension '{}' from your filename, but it was not in the list of supported extensions: {}" \ .format(ext, _supported_filetypes) else: path += ext if ext.startswith('.') else '.' + ext if '%L' in path: path = path.replace( '%L', fig.get_label() if fig.get_label() is not '' else 'unnamed') path = format_filename(path) make_file_dir(path) if ext == '.pkl': with open(path, 'wb') as f: pickle.dump(fig, f, protocol=pickle.HIGHEST_PROTOCOL) else: fig.savefig(path) ARTEMIS_LOGGER.info('Saved Figure: %s' % path) return path
def smart_file(location, use_cache=False, make_dir=False): """ :param location: Specifies where the file is. If it's formatted as a url, it's downloaded. If it begins with a "/", it's assumed to be a local path. Otherwise, it is assumed to be referenced relative to the data directory. :param use_cache: If True, and the location is a url, make a local cache of the file for future use (note: if the file at this url changes, the cached file will not). :param make_dir: Make the directory for this file, if it does not exist. :yield: The local path to the file. """ its_a_url = is_url(location) if its_a_url: assert not make_dir, "We cannot 'make the directory' for a URL" local_path = get_file(url=location, use_cache=use_cache) else: local_path = get_artemis_data_path(location) if make_dir: make_file_dir(local_path) yield local_path if its_a_url and not use_cache: os.remove(local_path)
def save_result(self, result): file_path = get_local_experiment_path( os.path.join(self._experiment_directory, 'result.pkl')) make_file_dir(file_path) with open(file_path, 'wb') as f: pickle.dump(result, f, protocol=2) ARTEMIS_LOGGER.info('Saving Result for Experiment "{}"'.format( self.get_id(), ))
def get_archive(relative_path, url, force_extract=False, archive_type = None, force_download = False): """ Download a compressed archive and extract it into a folder. :param relative_path: Local name for the extracted folder. (Zip file will be named this with the appropriate zip extension) :param url: Url of the archive to download :param force_extract: Force the zip file to re-extract (rather than just reusing the extracted folder) :return: The full path to the extracted folder on your system. """ local_folder_path = get_artemis_data_path(relative_path) assert archive_type in ('.tar.gz', '.zip', None) if force_download: shutil.rmtree(local_folder_path) if not os.path.exists(local_folder_path) or force_download: # If the folder does not exist, download zip and extract. # (We also check force download here to avoid a race condition) response = urllib2.urlopen(url) # Need to infer if archive_type is None: if url.endswith('.tar.gz'): archive_type = '.tar.gz' elif url.endswith('.zip'): archive_type = '.zip' else: info = response.info() try: header = next(x for x in info.headers if x.startswith('Content-Disposition')) original_file_name = next(x for x in header.split(';') if x.startswith('filename')).split('=')[-1].lstrip('"\'').rstrip('"\'') archive_type = '.tar.gz' if original_file_name.endswith('.tar.gz') else '.zip' if original_file_name.endswith('.zip') else \ bad_value(original_file_name, 'Filename "%s" does not end with a familiar zip extension like .zip or .tar.gz' % (original_file_name, )) except StopIteration: raise Exception("Could not infer archive type from user argument, url-name, or file-header. Please specify archive type as either '.zip' or '.tar.gz'.") print 'Downloading archive from url: "%s"...' % (url, ) data = response.read() print '...Done.' local_zip_path = local_folder_path + archive_type make_file_dir(local_zip_path) with open(local_zip_path, 'w') as f: f.write(data) force_extract = True if force_extract: if archive_type == '.tar.gz': with tarfile.open(local_zip_path) as f: f.extractall(local_folder_path) elif archive_type == '.zip': with ZipFile(local_zip_path) as f: f.extractall(local_folder_path) else: raise Exception() return local_folder_path
def record_experiment(identifier='%T-%N', name = 'unnamed', info = '', print_to_console = True, show_figs = None, save_figs = True, saved_figure_ext = '.pdf', use_temp_dir = False): """ :param identifier: The string that uniquely identifies this experiment record. Convention is that it should be in the format :param name: Base-name of the experiment :param print_to_console: If True, print statements still go to console - if False, they're just rerouted to file. :param show_figs: Show figures when the experiment produces them. Can be: 'hang': Show and hang 'draw': Show but keep on going False: Don't show figures """ # Note: matplotlib imports are internal in order to avoid trouble for people who may import this module without having # a working matplotlib (which can occasionally be tricky to install). identifier = format_filename(file_string = identifier, base_name=name, current_time = datetime.now()) if show_figs is None: show_figs = 'draw' if is_test_mode() else 'hang' assert show_figs in ('hang', 'draw', False) if use_temp_dir: experiment_directory = tempfile.mkdtemp() atexit.register(lambda: shutil.rmtree(experiment_directory)) else: experiment_directory = get_local_path('experiments/{identifier}'.format(identifier=identifier)) make_dir(experiment_directory) make_file_dir(experiment_directory) log_file_name = os.path.join(experiment_directory, 'output.txt') log_capture_context = PrintAndStoreLogger(log_file_path = log_file_name, print_to_console = print_to_console) log_capture_context.__enter__() from artemis.plotting.manage_plotting import WhatToDoOnShow blocking_show_context = WhatToDoOnShow(show_figs) blocking_show_context.__enter__() if save_figs: from artemis.plotting.saving_plots import SaveFiguresOnShow figure_save_context = SaveFiguresOnShow(path = os.path.join(experiment_directory, 'fig-%T-%L'+saved_figure_ext)) figure_save_context.__enter__() _register_current_experiment(name, identifier) global _CURRENT_EXPERIMENT_RECORD _CURRENT_EXPERIMENT_RECORD = ExperimentRecord(experiment_directory) _CURRENT_EXPERIMENT_RECORD.add_info('Name: %s' % (name, )) _CURRENT_EXPERIMENT_RECORD.add_info('Identifier: %s' % (identifier, )) _CURRENT_EXPERIMENT_RECORD.add_info('Directory: %s' % (_CURRENT_EXPERIMENT_RECORD.get_dir(), )) yield _CURRENT_EXPERIMENT_RECORD _CURRENT_EXPERIMENT_RECORD = None blocking_show_context.__exit__(None, None, None) log_capture_context.__exit__(None, None, None) if save_figs: figure_save_context.__exit__(None, None, None) _deregister_current_experiment()
def __init__(self, log_file_path=None, print_to_console=True): self._print_to_console = print_to_console if log_file_path is not None: # self._log_file_path = os.path.join(base_dir, log_file_path.replace('%T', now)) make_file_dir(log_file_path) self.log = open(log_file_path, 'w') else: self.log = StringIO() self._log_file_path = log_file_path self.terminal = _ORIGINAL_STDOUT
def save_figures_on_close(dir, prefix='', default_ext = 'pdf', close_after =False): old_fignums = plt.get_fignums() yield new_figs = [fig for fig in plt.get_fignums() if fig ] for fig_no in new_figs: make_file_dir(path) fig.savefig(path)
def check_memos(*args, **kwargs): if disable_on_tests and is_test_mode(): return fcn(*args, **kwargs) result_computed = False full_args = infer_arg_values(fcn, args, kwargs) filepath = get_function_hash_filename(fcn, full_args) # The filepath is used as the unique identifier, for both the local path and the disk-path # It may be more efficient to use the built-in hashability of certain types for the local cash, and just have special # ways of dealing with non-hashables like lists and numpy arrays - it's a bit dangerous because we need to check # that no object or subobjects have been changed. if MEMO_READ_ENABLED: if local_cache: # local_cache_signature = get_local_cache_signature(args, kwargs) if filepath in cached_local_results: if not suppress_info: LOGGER.info('Reading disk-memo from local cache for function {}'.format(fcn.__name__, )) return cached_local_results[filepath] if os.path.exists(filepath): with open(filepath, 'rb') as f: try: if not suppress_info: LOGGER.info('Reading memo for function {}'.format(fcn.__name__, )) result = pickle.load(f) except (ValueError, ImportError, EOFError) as err: if isinstance(err, (ValueError, EOFError)) and not suppress_info: LOGGER.warn('Memo-file "{}" was corrupt. ({}: {}). Recomputing.'.format(filepath, err.__class__.__name__, str(err))) elif isinstance(err, ImportError) and not suppress_info: LOGGER.warn('Memo-file "{}" was tried to reference an old class and got ImportError: {}. Recomputing.'.format(filepath, str(err))) result_computed = True result = fcn(*args, **kwargs) else: result_computed = True result = fcn(*args, **kwargs) else: result_computed = True result = fcn(*args, **kwargs) if MEMO_WRITE_ENABLED and result is not None: # We assume result of None means you haven't done coding your function. if local_cache: cached_local_results[filepath] = result if result_computed: # Result was computed, so write it down filepath = get_function_hash_filename(fcn, full_args, create_dir_if_not=True) make_file_dir(filepath) with open(filepath, 'wb') as f: if not suppress_info: LOGGER.info('Writing disk-memo for function {}'.format(fcn.__name__, )) pickle.dump(result, f, protocol=2) return result
def __init__(self, log_file_path = None, print_to_console = True): """ :param log_file_path: The path to save the records, or None if you just want to keep it in memory :param print_to_console: """ self._print_to_console = print_to_console if log_file_path is not None: # self._log_file_path = os.path.join(base_dir, log_file_path.replace('%T', now)) make_file_dir(log_file_path) self.log = open(log_file_path, 'w') else: self.log = StringIO() self._log_file_path = log_file_path self.terminal = _ORIGINAL_STDOUT
def __init__(self, log_file_path = None, print_to_console = True, prefix = None): """ :param log_file_path: The path to save the records, or None if you just want to keep it in memory :param print_to_console: """ self._print_to_console = print_to_console if log_file_path is not None: # self._log_file_path = os.path.join(base_dir, log_file_path.replace('%T', now)) make_file_dir(log_file_path) self.log = open(log_file_path, 'w') else: self.log = StringIO() self._log_file_path = log_file_path self.old_stdout = _ORIGINAL_STDOUT self.prefix = None if prefix is None else prefix
def open_file(self, filename, *args, **kwargs): """ Open a file within the experiment record folder. Example Usage: with record.open_file('filename.txt') as f: txt = f.read() :param filename: Path within experiment directory (it can include subdirectories) :param args, kwargs: Forwarded to python's "open" function :return: A file object """ full_path = os.path.join(self._experiment_directory, filename) make_file_dir(full_path) # Make the path if it does not yet exist return open(full_path, *args, **kwargs)
def get_file(relative_name=None, url=None, use_cache=True, data_transformation=None): """ Get a file and return the full local path to that file. :param relative_name: The name of the local file, relative to the FILE_ROOT (by default, FILE_ROOT is ~/.artemis) Or if None, and a URL is specified, we give the file a temporary name. :param url: Optionally, a url to fetch this file from if it doesn't exist locally. :param use_cache: If the file exists locally and a URL is specified, use the local version. :param data_transformation: Optionally a function that takes the downloaded data (from response.read) and outputs binary data that is be written into the file. :return: """ assert (relative_name is not None) or ( url is not None ), 'You must provide a local name and/or a URL to fetch from.' full_filename = get_file_path(relative_name=relative_name, url=url) if (not os.path.exists(full_filename)) or (not use_cache): assert url is not None, "No local copy of '%s' was found, and you didn't provide a URL to fetch it from" % ( full_filename, ) print('Downloading file from url: "%s"...' % (url, )) response = urlopen(url) data = response.read() print('...Done.') if data_transformation is not None: print('Processing downloaded data...') data = data_transformation(data) make_file_dir(full_filename) with open(full_filename, 'wb') as f: f.write(data) return full_filename
def save_figure(fig, path, default_ext='.pdf'): """ :param fig: The figure to show :param path: The absolute path to the figure. :param default_ext: The default extension to use, if none is specified. :return: """ _, ext = os.path.splitext(path) if ext == '': path += default_ext if '%L' in path: path = path.replace( '%L', fig.get_label() if fig.get_label() is not '' else 'unnamed') path = format_filename(path) make_file_dir(path) fig.savefig(path) ARTEMIS_LOGGER.info('Saved Figure: %s' % path) return path
def get_archive(url, relative_path=None, force_extract=False, archive_type = None, use_cache=True): """ Download a compressed archive and extract it into a folder. :param relative_path: Local name for the extracted folder. (Zip file will be named this with the appropriate zip extension) :param url: Url of the archive to download :param force_extract: Force the zip file to re-extract (rather than just reusing the extracted folder) :return: The full path to the extracted folder on your system. """ if relative_path is None: relative_path = get_unnamed_file_hash(url) local_folder_path, _ = os.path.splitext(os.path.join(FILE_ROOT, relative_path)) assert archive_type in ('.tar.gz', '.zip', None) if (not os.path.exists(local_folder_path)) or (not use_cache): # If the folder does not exist, download zip and extract. # (We also check force download here to avoid a race condition) if not use_cache and os.path.exists(local_folder_path): shutil.rmtree(local_folder_path) response = urlopen(url) # Need to infer if archive_type is None: if url.endswith('.tar.gz'): archive_type = '.tar.gz' elif url.endswith('.zip'): archive_type = '.zip' else: # info = response.info() try: # header = next(x for x in info.headers if x.startswith('Content-Disposition')) header = response.headers['content-disposition'] original_file_name = next(x for x in header.split(';') if x.startswith('filename')).split('=')[-1].lstrip('"\'').rstrip('"\'') archive_type = '.tar.gz' if original_file_name.endswith('.tar.gz') else '.zip' if original_file_name.endswith('.zip') else \ bad_value(original_file_name, 'Filename "%s" does not end with a familiar zip extension like .zip or .tar.gz' % (original_file_name, )) except StopIteration: raise Exception("Could not infer archive type from user argument, url-name, or file-header. Please specify archive type as either '.zip' or '.tar.gz'.") print('Downloading archive from url: "%s"...' % (url, )) data = response.read() print('...Done.') local_zip_path = local_folder_path + archive_type if os.path.exists(local_zip_path): if os.path.isdir(local_zip_path): # This shouldnt happen but may by accident. rmtree(local_zip_path) else: os.remove(local_zip_path) make_file_dir(local_zip_path) with open(local_zip_path, 'wb') as f: f.write(data) force_extract = True if force_extract: if archive_type == '.tar.gz': with tarfile.open(local_zip_path) as f: f.extractall(local_folder_path) elif archive_type == '.zip': with ZipFile(local_zip_path) as f: f.extractall(local_folder_path) else: raise Exception() return local_folder_path
def save_result(self, result): file_path = get_local_experiment_path(os.path.join(self._experiment_directory, 'result.pkl')) make_file_dir(file_path) with open(file_path, 'wb') as f: pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL) ARTEMIS_LOGGER.info('Saving Result for Experiment "{}"'.format(self.get_id(),))
def close(self): make_file_dir(self.file_path) with open(self.file_path, 'wb') as f: pickle.dump(list(self.items()), f, protocol=self.pickle_protocol)