def dump_flight_to_kml(flight, kml_filename_local): """Dumps the flight to KML format. Args: flight: an igc_lib.Flight, the flight to be saved kml_filename_local: a string, the name of the output file """ assert flight.valid kml = simplekml.Kml() def add_point(name, fix): kml.newpoint(name=name, coords=[(fix.lon, fix.lat)]) coords = [] for fix in flight.fixes: coords.append((fix.lon, fix.lat)) kml.newlinestring(coords=coords) add_point(name="Takeoff", fix=flight.takeoff_fix) add_point(name="Landing", fix=flight.landing_fix) for i, thermal in enumerate(flight.thermals): add_point(name="thermal_%02d" % i, fix=thermal.enter_fix) add_point(name="thermal_%02d_END" % i, fix=thermal.exit_fix) kml_filename = Path(kml_filename_local).expanduser().absolute() kml.save(kml_filename.as_posix())
def get_common_path(list_of_files): # type: (Sequence[Union[str, Path]]) -> Optional[str] """ Return the common path of a list of files :param list_of_files: list of files (str or Path objects) :return: Common path string (always absolute) or None if common path could not be found """ if not list_of_files: return None # a single file has its parent as common path if len(list_of_files) == 1: return Path(list_of_files[0]).absolute().parent.as_posix() # find common path to support folder structure inside zip common_path_parts = Path(list_of_files[0]).absolute().parts for f in list_of_files: f_parts = Path(f).absolute().parts num_p = min(len(f_parts), len(common_path_parts)) if f_parts[:num_p] == common_path_parts[:num_p]: common_path_parts = common_path_parts[:num_p] continue num_p = min([ i for i, (a, b) in enumerate( zip(common_path_parts[:num_p], f_parts[:num_p])) if a != b ] or [-1]) # no common path, break if num_p < 0: common_path_parts = [] break # update common path common_path_parts = common_path_parts[:num_p] if common_path_parts: common_path = Path() for f in common_path_parts: common_path /= f return common_path.as_posix() return None
def create_cache_folders(self, slot_index=0): """ create and update the cache folders notice we support multiple instances sharing the same cache on some folders and on some we use "instance slot" numbers in order to differentiate between the different instances running notice slot_index=0 is the default, meaning no suffix is added to the singleton_folders Note: do not call this function twice with non zero slot_index it will add a suffix to the folders on each call :param slot_index: integer """ # create target folders: folder_keys = ('agent.venvs_dir', 'agent.vcs_cache.path', 'agent.pip_download_cache.path', 'agent.docker_pip_cache', 'agent.docker_apt_cache') singleton_folders = ( 'agent.venvs_dir', 'agent.vcs_cache.path', ) for key in folder_keys: folder_key = ConfigValue(self.config, key) if not folder_key.get(): continue if slot_index and key in singleton_folders: f = folder_key.get() if f.endswith(os.path.sep): f = f[:-1] folder_key.set(f + '.{}'.format(slot_index)) # update the configuration for full path folder = Path(os.path.expandvars( folder_key.get())).expanduser().absolute() folder_key.set(folder.as_posix()) try: folder.mkdir(parents=True, exist_ok=True) except: pass
def _upload_local_file(self, local_file, name, delete_after_upload=False, override_filename=None, override_filename_ext=None, wait_on_upload=False): # type: (str, str, bool, Optional[str], Optional[str], Optional[bool]) -> str """ Upload local file and return uri of the uploaded file (uploading in the background) """ from trains.storage import StorageManager upload_uri = self._task.output_uri or self._task.get_logger( ).get_default_upload_destination() if not isinstance(local_file, Path): local_file = Path(local_file) ev = UploadEvent( metric='artifacts', variant=name, image_data=None, upload_uri=upload_uri, local_image_path=local_file.as_posix(), delete_after_upload=delete_after_upload, override_filename=override_filename, override_filename_ext=override_filename_ext, override_storage_key_prefix=self._get_storage_uri_prefix()) _, uri = ev.get_target_full_upload_uri(upload_uri, quote_uri=False) # send for upload # noinspection PyProtectedMember if wait_on_upload: StorageManager.upload_file(local_file, uri) else: self._task._reporter._report(ev) _, quoted_uri = ev.get_target_full_upload_uri(upload_uri) return quoted_uri
def _upload_local_file(self, local_file, name, delete_after_upload=False, override_filename=None, override_filename_ext=None): """ Upload local file and return uri of the uploaded file (uploading in the background) """ upload_uri = self._task.output_uri or self._task.get_logger().get_default_upload_destination() if not isinstance(local_file, Path): local_file = Path(local_file) ev = UploadEvent(metric='artifacts', variant=name, image_data=None, upload_uri=upload_uri, local_image_path=local_file.as_posix(), delete_after_upload=delete_after_upload, override_filename=override_filename, override_filename_ext=override_filename_ext, override_storage_key_prefix=self._get_storage_uri_prefix()) _, uri = ev.get_target_full_upload_uri(upload_uri) # send for upload self._task.reporter._report(ev) return uri
def get_local_copy( cls, remote_url, cache_context=None, extract_archive=True, name=None, force_download=False, ): # type: (str, Optional[str], bool, Optional[str], bool) -> str """ Get a local copy of the remote file. If the remote URL is a direct file access, the returned link is the same, otherwise a link to a local copy of the url file is returned. Caching is enabled by default, cache limited by number of stored files per cache context. Oldest accessed files are deleted when cache is full. :param str remote_url: remote url link (string) :param str cache_context: Optional caching context identifier (string), default context 'global' :param bool extract_archive: if True returned path will be a cached folder containing the archive's content, currently only zip files are supported. :param str name: name of the target file :param bool force_download: download file from remote even if exists in local cache :return: Full path to local copy of the requested url. Return None on Error. """ cache = CacheManager.get_cache_manager(cache_context=cache_context) cached_file = cache.get_local_copy(remote_url=remote_url, force_download=force_download) if extract_archive and cached_file: # this will get us the actual cache (even with direct access) cache_path_encoding = Path(cache.get_cache_folder( )) / cache.get_hashed_url_file(remote_url) return cls._extract_to_cache( cached_file, name, cache_context, cache_path_encoding=cache_path_encoding.as_posix()) return cached_file
def __init__(self, session, base_interpreter=None): # type: (Session, PathLike) -> () self._session = session self.config = deepcopy(session.config) # type: ConfigTree self.handlers = [] # type: List[RequirementSubstitution] agent = self.config['agent'] self.active = not agent.get('cpu_only', False) self.found_cuda = False if self.active: try: agent['cuda_version'], agent[ 'cudnn_version'] = self.get_cuda_version(self.config) self.found_cuda = True except Exception: # if we have a cuda version, it is good enough (we dont have to have cudnn version) if agent.get('cuda_version'): self.found_cuda = True pip_cache_dir = Path( self.config["agent.pip_download_cache.path"]).expanduser() / ( 'cu' + agent['cuda_version'] if self.found_cuda else 'cpu') self.translator = RequirementsTranslator( session, interpreter=base_interpreter, cache_dir=pip_cache_dir.as_posix())
def report_offline_session(cls, task, folder): from ... import StorageManager filename = Path(folder) / cls.__offline_filename if not filename.is_file(): return False # noinspection PyProtectedMember remote_url = task._get_default_report_storage_uri() if remote_url and remote_url.endswith('/'): remote_url = remote_url[:-1] uploaded_files = set() task_id = task.id with open(filename.as_posix(), 'rt') as f: i = 0 while True: try: line = f.readline() if not line: break list_requests = json.loads(line) for r in list_requests: org_task_id = r['task'] r['task'] = task_id if r.get('key') and r.get('url'): debug_sample = (Path(folder) / 'data').joinpath( *(r['key'].split('/'))) r['key'] = r['key'].replace( '.{}{}'.format(org_task_id, os.sep), '.{}{}'.format(task_id, os.sep), 1) r['url'] = '{}/{}'.format(remote_url, r['key']) if debug_sample not in uploaded_files and debug_sample.is_file( ): uploaded_files.add(debug_sample) StorageManager.upload_file( local_file=debug_sample.as_posix(), remote_url=r['url']) elif r.get('plot_str'): # hack plotly embedded images links # noinspection PyBroadException try: task_id_sep = '.{}{}'.format( org_task_id, os.sep) plot = json.loads(r['plot_str']) if plot.get('layout', {}).get('images'): for image in plot['layout']['images']: if task_id_sep not in image['source']: continue pre, post = image['source'].split( task_id_sep, 1) pre = os.sep.join( pre.split(os.sep)[-2:]) debug_sample = ( Path(folder) / 'data').joinpath( pre + '.{}'.format(org_task_id), post) image['source'] = '/'.join([ remote_url, pre + '.{}'.format(task_id), post ]) if debug_sample not in uploaded_files and debug_sample.is_file( ): uploaded_files.add(debug_sample) StorageManager.upload_file( local_file=debug_sample. as_posix(), remote_url=image['source']) r['plot_str'] = json.dumps(plot) except Exception: pass i += 1 except StopIteration: break except Exception as ex: warning('Failed reporting metric, line {} [{}]'.format( i, ex)) batch_requests = api_events.AddBatchRequest( requests=list_requests) if batch_requests.requests: res = task.session.send(batch_requests) if res and not res.ok(): warning( "failed logging metric task to backend ({:d} lines, {})" .format(len(batch_requests.requests), str(res.meta))) return True
def _extract_to_cache(cls, cached_file, name, cache_context=None, target_folder=None): # type: (str, str, Optional[str], Optional[str]) -> str """ Extract cached file to cache folder :param str cached_file: local copy of archive file :param str name: name of the target file :param str cache_context: cache context id :param str target_folder: specify target path to use for archive extraction :return: cached folder containing the extracted archive content """ if not cached_file: return cached_file cached_file = Path(cached_file) # we support zip and tar.gz files auto-extraction suffix = cached_file.suffix.lower() if suffix == '.gz': suffix = ''.join(a.lower() for a in cached_file.suffixes[-2:]) if suffix not in (".zip", ".tgz", ".tar.gz"): return str(cached_file) cached_folder = Path(cached_file).parent archive_suffix = cached_file.name[:-len(suffix)] name = encode_string_to_filename(name) target_folder = Path( target_folder or CacheManager.get_context_folder_lookup(cache_context).format( archive_suffix, name)) if target_folder.exists(): # noinspection PyBroadException try: target_folder.touch(exist_ok=True) return target_folder.as_posix() except Exception: pass base_logger = LoggerRoot.get_base_logger() try: temp_target_folder = cached_folder / "{0}_{1}_{2}".format( target_folder.name, time() * 1000, str(random()).replace('.', '')) temp_target_folder.mkdir(parents=True, exist_ok=True) if suffix == ".zip": ZipFile(cached_file.as_posix()).extractall( path=temp_target_folder.as_posix()) elif suffix == ".tar.gz": with tarfile.open(cached_file.as_posix()) as file: file.extractall(temp_target_folder.as_posix()) elif suffix == ".tgz": with tarfile.open(cached_file.as_posix(), mode='r:gz') as file: file.extractall(temp_target_folder.as_posix()) # we assume we will have such folder if we already extract the file # noinspection PyBroadException try: # if rename fails, it means that someone else already manged to extract the file, delete the current # folder and return the already existing cached zip folder shutil.move(temp_target_folder.as_posix(), target_folder.as_posix()) except Exception: if target_folder.exists(): target_folder.touch(exist_ok=True) else: base_logger.warning("Failed renaming {0} to {1}".format( temp_target_folder.as_posix(), target_folder.as_posix())) try: shutil.rmtree(temp_target_folder.as_posix()) except Exception as ex: base_logger.warning( "Exception {}\nFailed deleting folder {}".format( ex, temp_target_folder.as_posix())) except Exception as ex: # failed extracting the file: base_logger.warning( "Exception {}\nFailed extracting zip file {}".format( ex, cached_file.as_posix())) # noinspection PyBroadException try: target_folder.rmdir() except Exception: pass return cached_file.as_posix() return target_folder.as_posix()
def _get_jupyter_notebook_filename(cls): # check if we are running in vscode, we have the jupyter notebook defined: if 'IPython' in sys.modules: # noinspection PyBroadException try: from IPython import get_ipython # noqa ip = get_ipython() # vscode-jupyter PR #8531 added this variable local_ipynb_file = ip.__dict__.get('user_ns', {}).get('__vsc_ipynb_file__') if ip else None if local_ipynb_file: # now replace the .ipynb with .py # we assume we will have that file available for monitoring local_ipynb_file = Path(local_ipynb_file) script_entry_point = local_ipynb_file.with_suffix('.py').as_posix() # install the post store hook, # notice that if we do not have a local file we serialize/write every time the entire notebook cls._jupyter_install_post_store_hook(local_ipynb_file.as_posix(), log_history=False) return script_entry_point except Exception: pass if not (sys.argv[0].endswith(os.path.sep + 'ipykernel_launcher.py') or sys.argv[0].endswith(os.path.join(os.path.sep, 'ipykernel', '__main__.py'))) \ or len(sys.argv) < 3 or not sys.argv[2].endswith('.json'): return None server_info = None # we can safely assume that we can import the notebook package here # noinspection PyBroadException try: # noinspection PyPackageRequirements from notebook.notebookapp import list_running_servers import requests current_kernel = sys.argv[2].split(os.path.sep)[-1].replace('kernel-', '').replace('.json', '') # noinspection PyBroadException try: server_info = next(list_running_servers()) except Exception: # on some jupyter notebook versions this function can crash on parsing the json file, # we will parse it manually here # noinspection PyPackageRequirements import ipykernel from glob import glob import json for f in glob(os.path.join(os.path.dirname(ipykernel.get_connection_file()), '??server-*.json')): # noinspection PyBroadException try: with open(f, 'r') as json_data: server_info = json.load(json_data) except Exception: server_info = None if server_info: break cookies = None password = None if server_info and server_info.get('password'): # we need to get the password from ....config import config password = config.get('development.jupyter_server_password', '') if not password: cls._get_logger().warning( 'Password protected Jupyter Notebook server was found! ' 'Add `sdk.development.jupyter_server_password=<jupyter_password>` to ~/clearml.conf') return os.path.join(os.getcwd(), 'error_notebook_not_found.py') r = requests.get(url=server_info['url'] + 'login') cookies = {'_xsrf': r.cookies.get('_xsrf', '')} r = requests.post(server_info['url'] + 'login?next', cookies=cookies, data={'_xsrf': cookies['_xsrf'], 'password': password}) cookies.update(r.cookies) auth_token = server_info.get('token') or os.getenv('JUPYTERHUB_API_TOKEN') or '' try: r = requests.get( url=server_info['url'] + 'api/sessions', cookies=cookies, headers={'Authorization': 'token {}'.format(auth_token), }) except requests.exceptions.SSLError: # disable SSL check warning from urllib3.exceptions import InsecureRequestWarning # noinspection PyUnresolvedReferences requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) # fire request r = requests.get( url=server_info['url'] + 'api/sessions', cookies=cookies, headers={'Authorization': 'token {}'.format(auth_token), }, verify=False) # enable SSL check warning import warnings warnings.simplefilter('default', InsecureRequestWarning) # send request to the jupyter server try: r.raise_for_status() except Exception as ex: cls._get_logger().warning('Failed accessing the jupyter server{}: {}'.format( ' [password={}]'.format(password) if server_info.get('password') else '', ex)) return os.path.join(os.getcwd(), 'error_notebook_not_found.py') notebooks = r.json() cur_notebook = None for n in notebooks: if n['kernel']['id'] == current_kernel: cur_notebook = n break notebook_path = cur_notebook['notebook'].get('path', '') notebook_name = cur_notebook['notebook'].get('name', '') is_google_colab = False # check if this is google.colab, then there is no local file # noinspection PyBroadException try: # noinspection PyPackageRequirements from IPython import get_ipython if get_ipython() and 'google.colab' in get_ipython().extension_manager.loaded: is_google_colab = True except Exception: pass if is_google_colab: script_entry_point = str(notebook_name or 'notebook').replace( '>', '_').replace('<', '_').replace('.ipynb', '.py') if not script_entry_point.lower().endswith('.py'): script_entry_point += '.py' local_ipynb_file = None else: # always slash, because this is from uri (so never backslash not even on windows) entry_point_filename = notebook_path.split('/')[-1] # now we should try to find the actual file entry_point = (Path.cwd() / entry_point_filename).absolute() if not entry_point.is_file(): entry_point = (Path.cwd() / notebook_path).absolute() # fix for VSCode pushing uuid at the end of the notebook name. if not entry_point.exists(): # noinspection PyBroadException try: alternative_entry_point = '-'.join(entry_point_filename.split('-')[:-5])+'.ipynb' # now we should try to find the actual file entry_point_alternative = (Path.cwd() / alternative_entry_point).absolute() if not entry_point_alternative.is_file(): entry_point_alternative = (Path.cwd() / alternative_entry_point).absolute() # If we found it replace it if entry_point_alternative.exists(): entry_point = entry_point_alternative except Exception as ex: cls._get_logger().warning('Failed accessing jupyter notebook {}: {}'.format(notebook_path, ex)) # get local ipynb for observer local_ipynb_file = entry_point.as_posix() # now replace the .ipynb with .py # we assume we will have that file available with the Jupyter notebook plugin entry_point = entry_point.with_suffix('.py') script_entry_point = entry_point.as_posix() # install the post store hook, # notice that if we do not have a local file we serialize/write every time the entire notebook cls._jupyter_install_post_store_hook(local_ipynb_file, is_google_colab) return script_entry_point except Exception: return None
def upload_artifact(self, name, artifact_object=None, metadata=None, preview=None, delete_after_upload=False, auto_pickle=True): # type: (str, Optional[object], Optional[dict], Optional[str], bool, bool) -> bool if not Session.check_min_api_version('2.3'): LoggerRoot.get_base_logger().warning( 'Artifacts not supported by your TRAINS-server version, ' 'please upgrade to the latest server version') return False if name in self._artifacts_container: raise ValueError( "Artifact by the name of {} is already registered, use register_artifact" .format(name)) # cast preview to string if preview: preview = str(preview) # convert string to object if try is a file/folder (dont try to serialize long texts if isinstance(artifact_object, six.string_types) and len(artifact_object) < 2048: # noinspection PyBroadException try: artifact_path = Path(artifact_object) if artifact_path.exists(): artifact_object = artifact_path elif '*' in artifact_object or '?' in artifact_object: # hackish, detect wildcard in tr files folder = Path('').joinpath(*artifact_path.parts[:-1]) if folder.is_dir() and folder.parts: wildcard = artifact_path.parts[-1] if list(Path(folder).rglob(wildcard)): artifact_object = artifact_path except Exception: pass artifact_type_data = tasks.ArtifactTypeData() artifact_type_data.preview = '' override_filename_in_uri = None override_filename_ext_in_uri = None uri = None if np and isinstance(artifact_object, np.ndarray): artifact_type = 'numpy' artifact_type_data.content_type = 'application/numpy' artifact_type_data.preview = preview or str( artifact_object.__repr__()) override_filename_ext_in_uri = '.npz' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) np.savez_compressed(local_filename, **{name: artifact_object}) delete_after_upload = True elif pd and isinstance(artifact_object, pd.DataFrame): artifact_type = 'pandas' artifact_type_data.content_type = 'text/csv' artifact_type_data.preview = preview or str( artifact_object.__repr__()) override_filename_ext_in_uri = self._save_format override_filename_in_uri = name fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) artifact_object.to_csv(local_filename, compression=self._compression) delete_after_upload = True elif isinstance(artifact_object, Image.Image): artifact_type = 'image' artifact_type_data.content_type = 'image/png' desc = str(artifact_object.__repr__()) artifact_type_data.preview = preview or desc[1:desc.find(' at ')] override_filename_ext_in_uri = '.png' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) artifact_object.save(local_filename) delete_after_upload = True elif isinstance(artifact_object, dict): artifact_type = 'JSON' artifact_type_data.content_type = 'application/json' preview = preview or json.dumps( artifact_object, sort_keys=True, indent=4) override_filename_ext_in_uri = '.json' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.write(fd, bytes(preview.encode())) os.close(fd) if len(preview) < self.max_preview_size_bytes: artifact_type_data.preview = preview else: artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format( self.max_preview_size_bytes // 1024, preview[:self.max_preview_size_bytes]) delete_after_upload = True elif isinstance(artifact_object, ( Path, pathlib_Path, ) if pathlib_Path is not None else (Path, )): # check if single file artifact_object = Path(artifact_object) artifact_object.expanduser().absolute() # noinspection PyBroadException try: create_zip_file = not artifact_object.is_file() except Exception: # Hack for windows pathlib2 bug, is_file isn't valid. create_zip_file = True else: # We assume that this is not Windows os if artifact_object.is_dir(): # change to wildcard artifact_object /= '*' if create_zip_file: folder = Path('').joinpath(*artifact_object.parts[:-1]) if not folder.is_dir() or not folder.parts: raise ValueError( "Artifact file/folder '{}' could not be found".format( artifact_object.as_posix())) wildcard = artifact_object.parts[-1] files = list(Path(folder).rglob(wildcard)) override_filename_ext_in_uri = '.zip' override_filename_in_uri = folder.parts[ -1] + override_filename_ext_in_uri fd, zip_file = mkstemp( prefix=quote(folder.parts[-1], safe="") + '.', suffix=override_filename_ext_in_uri) try: artifact_type_data.content_type = 'application/zip' archive_preview = 'Archive content {}:\n'.format( artifact_object.as_posix()) with ZipFile(zip_file, 'w', allowZip64=True, compression=ZIP_DEFLATED) as zf: for filename in sorted(files): if filename.is_file(): relative_file_name = filename.relative_to( folder).as_posix() archive_preview += '{} - {}\n'.format( relative_file_name, humanfriendly.format_size( filename.stat().st_size)) zf.write(filename.as_posix(), arcname=relative_file_name) except Exception as e: # failed uploading folder: LoggerRoot.get_base_logger().warning( 'Exception {}\nFailed zipping artifact folder {}'. format(folder, e)) return False finally: os.close(fd) artifact_type_data.preview = preview or archive_preview artifact_object = zip_file artifact_type = 'archive' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] local_filename = artifact_object delete_after_upload = True else: if not artifact_object.is_file(): raise ValueError( "Artifact file '{}' could not be found".format( artifact_object.as_posix())) override_filename_in_uri = artifact_object.parts[-1] artifact_type_data.preview = preview or '{} - {}\n'.format( artifact_object, humanfriendly.format_size(artifact_object.stat().st_size)) artifact_object = artifact_object.as_posix() artifact_type = 'custom' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] local_filename = artifact_object elif (isinstance(artifact_object, six.string_types) and len(artifact_object) < 4096 and urlparse(artifact_object).scheme in remote_driver_schemes): # we should not upload this, just register local_filename = None uri = artifact_object artifact_type = 'custom' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] elif isinstance(artifact_object, six.string_types): # if we got here, we should store it as text file. artifact_type = 'string' artifact_type_data.content_type = 'text/plain' if preview: artifact_type_data.preview = preview elif len(artifact_object) < self.max_preview_size_bytes: artifact_type_data.preview = artifact_object else: artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format( self.max_preview_size_bytes // 1024, artifact_object[:self.max_preview_size_bytes]) delete_after_upload = True override_filename_ext_in_uri = '.txt' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) # noinspection PyBroadException try: with open(local_filename, 'wt') as f: f.write(artifact_object) except Exception: # cleanup and raise exception os.unlink(local_filename) raise elif auto_pickle: # if we are here it means we do not know what to do with the object, so we serialize it with pickle. artifact_type = 'pickle' artifact_type_data.content_type = 'application/pickle' # noinspection PyBroadException try: artifact_type_data.preview = preview or str( artifact_object.__repr__())[:self.max_preview_size_bytes] except Exception: artifact_type_data.preview = preview or '' delete_after_upload = True override_filename_ext_in_uri = '.pkl' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) # noinspection PyBroadException try: with open(local_filename, 'wb') as f: pickle.dump(artifact_object, f) except Exception: # cleanup and raise exception os.unlink(local_filename) raise else: raise ValueError("Artifact type {} not supported".format( type(artifact_object))) # remove from existing list, if exists for artifact in self._task_artifact_list: if artifact.key == name: if artifact.type == self._pd_artifact_type: raise ValueError( "Artifact of name {} already registered, " "use register_artifact instead".format(name)) self._task_artifact_list.remove(artifact) break if not local_filename: file_size = None file_hash = None else: # check that the file to upload exists local_filename = Path(local_filename).absolute() if not local_filename.exists() or not local_filename.is_file(): LoggerRoot.get_base_logger().warning( 'Artifact upload failed, cannot find file {}'.format( local_filename.as_posix())) return False file_hash, _ = self.sha256sum(local_filename.as_posix()) file_size = local_filename.stat().st_size uri = self._upload_local_file( local_filename, name, delete_after_upload=delete_after_upload, override_filename=override_filename_in_uri, override_filename_ext=override_filename_ext_in_uri) timestamp = int(time()) artifact = tasks.Artifact( key=name, type=artifact_type, uri=uri, content_size=file_size, hash=file_hash, timestamp=timestamp, type_data=artifact_type_data, display_data=[(str(k), str(v)) for k, v in metadata.items()] if metadata else None) # update task artifacts with self._task_edit_lock: self._task_artifact_list.append(artifact) self._task.set_artifacts(self._task_artifact_list) return True
def _make_file_info(target: pathlib.Path, arcname: Optional[str] = None, dereference=False) -> Dict[str, Any]: f = {} # type: Dict[str, Any] f['origin'] = target if arcname is not None: f['filename'] = pathlib.Path(arcname).as_posix() else: f['filename'] = target.as_posix() if os.name == 'nt': fstat = target.lstat() if target.is_symlink(): if dereference: fstat = target.stat() if stat.S_ISDIR(fstat.st_mode): f['emptystream'] = True f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK # type: ignore # noqa else: f['emptystream'] = False f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE # type: ignore # noqa f['uncompressed'] = fstat.st_size else: f['emptystream'] = False f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK # type: ignore # noqa # f['attributes'] |= stat.FILE_ATTRIBUTE_REPARSE_POINT # type: ignore # noqa elif target.is_dir(): f['emptystream'] = True f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK # type: ignore # noqa elif target.is_file(): f['emptystream'] = False f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE # type: ignore # noqa f['uncompressed'] = fstat.st_size else: fstat = target.lstat() if target.is_symlink(): if dereference: fstat = target.stat() if stat.S_ISDIR(fstat.st_mode): f['emptystream'] = True f['attributes'] = stat.FILE_ATTRIBUTE_DIRECTORY # type: ignore # noqa f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | ( stat.S_IFDIR << 16) f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16) else: f['emptystream'] = False f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE # type: ignore # noqa f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | ( stat.S_IMODE(fstat.st_mode) << 16) else: f['emptystream'] = False f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE | stat.FILE_ATTRIBUTE_REPARSE_POINT # type: ignore # noqa f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | ( stat.S_IFLNK << 16) f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16) elif target.is_dir(): f['emptystream'] = True f['attributes'] = stat.FILE_ATTRIBUTE_DIRECTORY # type: ignore # noqa f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | ( stat.S_IFDIR << 16) f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16) elif target.is_file(): f['emptystream'] = False f['uncompressed'] = fstat.st_size f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE # type: ignore # noqa f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | ( stat.S_IMODE(fstat.st_mode) << 16) f['creationtime'] = fstat.st_ctime f['lastwritetime'] = fstat.st_mtime f['lastaccesstime'] = fstat.st_atime return f
file.rename(referto_path / avis_name) browser.back() if __name__ == '__main__': referti_path = Path("Referti") download_path = Path(__file__).cwd() / 'temppdfdownloads' config_file = Path('config.json') with open(config_file.name) as data_file: params = json.load(data_file) profile = FirefoxProfile() profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.dir", download_path.as_posix()) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", 'application/pdf') # disable Firefox's built-in PDF viewer profile.set_preference("pdfjs.disabled", True) # disable Adobe Acrobat PDF preview plugin profile.set_preference("plugin.scan.plid.all", False) profile.set_preference("plugin.scan.Acrobat", "99.0") browser = webdriver.Firefox(executable_path=params['gekoexecutable'], firefox_profile=profile) print('Open') browser.get(params['avis_address']) print('Open end') try:
def _daemon(cls, jupyter_notebook_filename): from clearml import Task # load jupyter notebook package # noinspection PyBroadException try: # noinspection PyPackageRequirements from nbconvert.exporters.script import ScriptExporter _script_exporter = ScriptExporter() except Exception as ex: _logger.warning('Could not read Jupyter Notebook: {}'.format(ex)) return # load pigar # noinspection PyBroadException try: from ....utilities.pigar.reqs import get_installed_pkgs_detail, file_import_modules from ....utilities.pigar.modules import ReqsModules from ....utilities.pigar.log import logger logger.setLevel(logging.WARNING) except Exception: file_import_modules = None # load IPython # noinspection PyBroadException try: # noinspection PyPackageRequirements from IPython import get_ipython except Exception: # should not happen get_ipython = None # setup local notebook files if jupyter_notebook_filename: notebook = Path(jupyter_notebook_filename) local_jupyter_filename = jupyter_notebook_filename else: notebook = None fd, local_jupyter_filename = mkstemp(suffix='.ipynb') os.close(fd) last_update_ts = None counter = 0 prev_script_hash = None # noinspection PyBroadException try: from ....version import __version__ our_module = cls.__module__.split('.')[0], __version__ except Exception: our_module = None # noinspection PyBroadException try: import re replace_ipython_pattern = re.compile(r'\n([ \t]*)get_ipython\(\)') except Exception: replace_ipython_pattern = None # main observer loop, check if we need to exit while not cls._exit_event.wait(timeout=0.): # wait for timeout or sync event cls._sync_event.wait(cls._sample_frequency if counter else cls._first_sample_frequency) cls._sync_event.clear() counter += 1 # noinspection PyBroadException try: # if there is no task connected, do nothing task = Task.current_task() if not task: continue script_code = None fmodules = None current_cell = None # if we have a local file: if notebook: if not notebook.exists(): continue # check if notebook changed if last_update_ts is not None and notebook.stat().st_mtime - last_update_ts <= 0: continue last_update_ts = notebook.stat().st_mtime else: # serialize notebook to a temp file if cls._jupyter_history_logger: script_code, current_cell = cls._jupyter_history_logger.history_to_str() else: # noinspection PyBroadException try: # noinspection PyBroadException try: os.unlink(local_jupyter_filename) except Exception: pass get_ipython().run_line_magic('history', '-t -f {}'.format(local_jupyter_filename)) with open(local_jupyter_filename, 'r') as f: script_code = f.read() # load the modules from ....utilities.pigar.modules import ImportedModules fmodules = ImportedModules() for nm in set([str(m).split('.')[0] for m in sys.modules]): fmodules.add(nm, 'notebook', 0) except Exception: continue # get notebook python script if script_code is None and local_jupyter_filename: script_code, _ = _script_exporter.from_filename(local_jupyter_filename) if cls._store_notebook_artifact: # also upload the jupyter notebook as artifact task.upload_artifact( name='notebook', artifact_object=Path(local_jupyter_filename), preview='See `notebook preview` artifact', metadata={'UPDATE': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}, wait_on_upload=True, ) # noinspection PyBroadException try: from nbconvert.exporters import HTMLExporter # noqa html, _ = HTMLExporter().from_filename(filename=local_jupyter_filename) local_html = Path(gettempdir()) / 'notebook_{}.html'.format(task.id) with open(local_html.as_posix(), 'wt') as f: f.write(html) task.upload_artifact( name='notebook preview', artifact_object=local_html, preview='Click `FILE PATH` link', metadata={'UPDATE': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}, delete_after_upload=True, wait_on_upload=True, ) except Exception: pass current_script_hash = hash(script_code + (current_cell or '')) if prev_script_hash and prev_script_hash == current_script_hash: continue # remove ipython direct access from the script code # we will not be able to run them anyhow if replace_ipython_pattern: script_code = replace_ipython_pattern.sub(r'\n# \g<1>get_ipython()', script_code) requirements_txt = '' conda_requirements = '' # parse jupyter python script and prepare pip requirements (pigar) # if backend supports requirements if file_import_modules and Session.check_min_api_version('2.2'): if fmodules is None: fmodules, _ = file_import_modules( notebook.parts[-1] if notebook else 'notebook', script_code) if current_cell: cell_fmodules, _ = file_import_modules( notebook.parts[-1] if notebook else 'notebook', current_cell) # noinspection PyBroadException try: fmodules |= cell_fmodules except Exception: pass # add current cell to the script if current_cell: script_code += '\n' + current_cell fmodules = ScriptRequirements.add_trains_used_packages(fmodules) # noinspection PyUnboundLocalVariable installed_pkgs = get_installed_pkgs_detail() # make sure we are in installed packages if our_module and (our_module[0] not in installed_pkgs): installed_pkgs[our_module[0]] = our_module # noinspection PyUnboundLocalVariable reqs = ReqsModules() for name in fmodules: if name in installed_pkgs: pkg_name, version = installed_pkgs[name] reqs.add(pkg_name, version, fmodules[name]) requirements_txt, conda_requirements = ScriptRequirements.create_requirements_txt(reqs) # update script prev_script_hash = current_script_hash data_script = task.data.script data_script.diff = script_code data_script.requirements = {'pip': requirements_txt, 'conda': conda_requirements} # noinspection PyProtectedMember task._update_script(script=data_script) # update requirements # noinspection PyProtectedMember task._update_requirements(requirements=requirements_txt) except Exception: pass
''' SOURCE DATA ''' task_dir = Path('/data2/polo/half_baked_data/slopes/abs') single_task_slope_csv = task_dir / 'raw_slopes_ok_subjs_abs.csv' corrected_single_task_csv = task_dir / 'deCAPed_preHD_slopes_abs.csv' n_visit_csv = task_dir / 'n_visits_used.csv' in_mat = Path().cwd().parent / 'VBM_controls' /\ 'TON_log_deg_maps_local_gm_corrected.mat' ''' LOAD ''' source = 'python' subject_list = ps.load_subject_list(in_mat.as_posix(), source=source) n_df = pd.read_csv(n_visit_csv, index_col='subjid') slope_df = pd.read_csv(single_task_slope_csv, index_col='subjid') task_names = n_df.columns.values behav_n_imag = [s for s in subject_list if s in slope_df.index] x = slope_df.loc[behav_n_imag]['group'] == 'preHD' preHD_idx = x.loc[x].index.values n_df.corrwith(slope_df.loc[preHD_idx][task_names]) task_corrs = OrderedDict() corr_dict = OrderedDict() corr_dict['task'] = task_names p_vals = []
def create_task(self): # type: () -> Task """ Create the new populated Task :return: newly created Task object """ local_entry_file = None repo_info = None if self.folder or (self.script and Path(self.script).is_file() and not self.repo): self.folder = os.path.expandvars(os.path.expanduser( self.folder)) if self.folder else None self.script = os.path.expandvars(os.path.expanduser( self.script)) if self.script else None self.cwd = os.path.expandvars(os.path.expanduser( self.cwd)) if self.cwd else None if Path(self.script).is_file(): entry_point = self.script else: entry_point = (Path(self.folder) / self.script).as_posix() entry_point = os.path.abspath(entry_point) if not os.path.isfile(entry_point): raise ValueError( "Script entrypoint file \'{}\' could not be found".format( entry_point)) local_entry_file = entry_point repo_info, requirements = ScriptInfo.get( filepaths=[entry_point], log=getLogger(), create_requirements=self.packages is True, uncommitted_from_remote=True, detect_jupyter_notebook=False) # check if we have no repository and no requirements raise error if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \ and not self.repo and ( not repo_info or not repo_info.script or not repo_info.script.get('repository')): raise ValueError( "Standalone script detected \'{}\', but no requirements provided" .format(self.script)) if self.base_task_id: if self.verbose: print('Cloning task {}'.format(self.base_task_id)) task = Task.clone(source_task=self.base_task_id, project=Task.get_project_id(self.project_name)) else: # noinspection PyProtectedMember task = Task._create(task_name=self.task_name, project_name=self.project_name, task_type=self.task_type or Task.TaskTypes.training) # if there is nothing to populate, return if not any([ self.folder, self.commit, self.branch, self.repo, self.script, self.cwd, self.packages, self.requirements_file, self.base_task_id, self.docker ]): return task task_state = task.export_task() if 'script' not in task_state: task_state['script'] = {} if repo_info: task_state['script']['repository'] = repo_info.script['repository'] task_state['script']['version_num'] = repo_info.script[ 'version_num'] task_state['script']['branch'] = repo_info.script['branch'] task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['working_dir'] = repo_info.script[ 'working_dir'] task_state['script']['entry_point'] = repo_info.script[ 'entry_point'] task_state['script']['binary'] = repo_info.script['binary'] task_state['script']['requirements'] = repo_info.script.get( 'requirements') or {} if self.cwd: self.cwd = self.cwd cwd = self.cwd if Path(self.cwd).is_dir() else ( Path(repo_info.script['repo_root']) / self.cwd).as_posix() if not Path(cwd).is_dir(): raise ValueError( "Working directory \'{}\' could not be found".format( cwd)) cwd = Path(cwd).relative_to( repo_info.script['repo_root']).as_posix() entry_point = \ Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[ 'entry_point'] entry_point = entry_point.relative_to(cwd).as_posix() task_state['script']['entry_point'] = entry_point task_state['script']['working_dir'] = cwd elif self.repo: # normalize backslashes and remove first one entry_point = '/'.join( [p for p in self.script.split('/') if p and p != '.']) cwd = '/'.join( [p for p in (self.cwd or '.').split('/') if p and p != '.']) if cwd and entry_point.startswith(cwd + '/'): entry_point = entry_point[len(cwd) + 1:] task_state['script']['repository'] = self.repo task_state['script']['version_num'] = self.commit or None task_state['script']['branch'] = self.branch or None task_state['script']['diff'] = '' task_state['script']['working_dir'] = cwd or '.' task_state['script']['entry_point'] = entry_point else: # standalone task task_state['script']['entry_point'] = self.script task_state['script']['working_dir'] = '.' # update requirements reqs = [] if self.requirements_file: with open(self.requirements_file.as_posix(), 'rt') as f: reqs = [line.strip() for line in f.readlines()] if self.packages and self.packages is not True: reqs += self.packages if reqs: # make sure we have clearml. clearml_found = False for line in reqs: if line.strip().startswith('#'): continue package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>", line).strip() if package == 'clearml': clearml_found = True break if not clearml_found: reqs.append('clearml') task_state['script']['requirements'] = {'pip': '\n'.join(reqs)} elif not self.repo and repo_info and not repo_info.script.get( 'requirements'): # we are in local mode, make sure we have "requirements.txt" it is a must reqs_txt_file = Path( repo_info.script['repo_root']) / "requirements.txt" if self.raise_on_missing_entries and not reqs_txt_file.is_file(): raise ValueError("requirements.txt not found [{}] " "Use --requirements or --packages".format( reqs_txt_file.as_posix())) if self.add_task_init_call: script_entry = os.path.abspath( '/' + task_state['script'].get('working_dir', '.') + '/' + task_state['script']['entry_point']) idx_a = 0 # find the right entry for the patch if we have a local file (basically after __future__ if local_entry_file: with open(local_entry_file, 'rt') as f: lines = f.readlines() future_found = self._locate_future_import(lines) if future_found >= 0: idx_a = future_found + 1 task_init_patch = '' if self.repo or task_state.get('script', {}).get('repository'): # if we do not have requirements, add clearml to the requirements.txt if not reqs: task_init_patch += \ "diff --git a/requirements.txt b/requirements.txt\n" \ "--- a/requirements.txt\n" \ "+++ b/requirements.txt\n" \ "@@ -0,0 +1,1 @@\n" \ "+clearml\n" # Add Task.init call task_init_patch += \ "diff --git a{script_entry} b{script_entry}\n" \ "--- a{script_entry}\n" \ "+++ b{script_entry}\n" \ "@@ -{idx_a},0 +{idx_b},3 @@\n" \ "+from clearml import Task\n" \ "+Task.init()\n" \ "+\n".format( script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1) else: # Add Task.init call task_init_patch += \ "from clearml import Task\n" \ "Task.init()\n\n" # make sure we add the dif at the end of the current diff task_state['script']['diff'] = task_state['script'].get('diff', '') if task_state['script']['diff'] and not task_state['script'][ 'diff'].endswith('\n'): task_state['script']['diff'] += '\n' task_state['script']['diff'] += task_init_patch # set base docker image if provided if self.docker: task.set_base_docker( docker_cmd=self.docker.get('image'), docker_arguments=self.docker.get('args'), docker_setup_bash_script=self.docker.get('bash_script'), ) if self.verbose: if task_state['script']['repository']: repo_details = { k: v for k, v in task_state['script'].items() if v and k not in ('diff', 'requirements', 'binary') } print('Repository Detected\n{}'.format( json.dumps(repo_details, indent=2))) else: print('Standalone script detected\n Script: {}'.format( self.script)) if task_state['script'].get('requirements') and \ task_state['script']['requirements'].get('pip'): print('Requirements:{}{}'.format( '\n Using requirements.txt: {}'.format( self.requirements_file.as_posix()) if self.requirements_file else '', '\n {}Packages: {}'.format( 'Additional ' if self.requirements_file else '', self.packages) if self.packages else '')) if self.docker: print('Base docker image: {}'.format(self.docker)) # update the Task task.update_task(task_state) self.task = task return task
class CreateAndPopulate(object): def __init__( self, project_name=None, # Optional[str] task_name=None, # Optional[str] task_type=None, # Optional[str] repo=None, # Optional[str] branch=None, # Optional[str] commit=None, # Optional[str] script=None, # Optional[str] working_directory=None, # Optional[str] packages=None, # Optional[Union[bool, Sequence[str]]] requirements_file=None, # Optional[Union[str, Path]] docker=None, # Optional[str] docker_args=None, # Optional[str] docker_bash_setup_script=None, # Optional[str] base_task_id=None, # Optional[str] add_task_init_call=True, # bool raise_on_missing_entries=False, # bool verbose=False, # bool ): # type: (...) -> None """ Create a new Task from an existing code base. If the code does not already contain a call to Task.init, pass add_task_init_call=True, and the code will be patched in remote execution (i.e. when executed by `clearml-agent` :param project_name: Set the project name for the task. Required if base_task_id is None. :param task_name: Set the name of the remote task. Required if base_task_id is None. :param task_type: Optional, The task type to be created. Supported values: 'training', 'testing', 'inference', 'data_processing', 'application', 'monitor', 'controller', 'optimizer', 'service', 'qc', 'custom' :param repo: Remote URL for the repository to use, OR path to local copy of the git repository Example: 'https://github.com/allegroai/clearml.git' or '~/project/repo' :param branch: Select specific repository branch/tag (implies the latest commit from the branch) :param commit: Select specific commit id to use (default: latest commit, or when used with local repository matching the local commit id) :param script: Specify the entry point script for the remote execution. When used in tandem with remote git repository the script should be a relative path inside the repository, for example: './source/train.py' . When used with local repository path it supports a direct path to a file inside the local repository itself, for example: '~/project/source/train.py' :param working_directory: Working directory to launch the script from. Default: repository root folder. Relative to repo root or local folder. :param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"] or `True` to automatically create requirements based on locally installed packages (repository must be local). :param requirements_file: Specify requirements.txt file to install when setting the session. If not provided, the requirements.txt from the repository will be used. :param docker: Select the docker image to be executed in by the remote session :param docker_args: Add docker arguments, pass a single string :param docker_bash_setup_script: Add bash script to be executed inside the docker before setting up the Task's environement :param base_task_id: Use a pre-existing task in the system, instead of a local repo/script. Essentially clones an existing task and overrides arguments/requirements. :param add_task_init_call: If True, a 'Task.init()' call is added to the script entry point in remote execution. :param raise_on_missing_entries: If True raise ValueError on missing entries when populating :param verbose: If True print verbose logging """ if len(urlparse(repo).scheme) <= 1: folder = repo repo = None else: folder = None if raise_on_missing_entries and not base_task_id: if not script: raise ValueError("Entry point script not provided") if not repo and not folder and not Path(script).is_file(): raise ValueError( "Script file \'{}\' could not be found".format(script)) if raise_on_missing_entries and commit and branch: raise ValueError( "Specify either a branch/tag or specific commit id, not both (either --commit or --branch)" ) if raise_on_missing_entries and not folder and working_directory and working_directory.startswith( '/'): raise ValueError( "working directory \'{}\', must be relative to repository root" ) if requirements_file and not Path(requirements_file).is_file(): raise ValueError("requirements file could not be found \'{}\'") self.folder = folder self.commit = commit self.branch = branch self.repo = repo self.script = script self.cwd = working_directory assert not packages or isinstance(packages, (tuple, list, bool)) self.packages = list(packages) if packages is not None and not isinstance(packages, bool) \ else (packages or None) self.requirements_file = Path( requirements_file) if requirements_file else None self.base_task_id = base_task_id self.docker = dict(image=docker, args=docker_args, bash_script=docker_bash_setup_script) self.add_task_init_call = add_task_init_call self.project_name = project_name self.task_name = task_name self.task_type = task_type self.task = None self.raise_on_missing_entries = raise_on_missing_entries self.verbose = verbose def create_task(self): # type: () -> Task """ Create the new populated Task :return: newly created Task object """ local_entry_file = None repo_info = None if self.folder or (self.script and Path(self.script).is_file() and not self.repo): self.folder = os.path.expandvars(os.path.expanduser( self.folder)) if self.folder else None self.script = os.path.expandvars(os.path.expanduser( self.script)) if self.script else None self.cwd = os.path.expandvars(os.path.expanduser( self.cwd)) if self.cwd else None if Path(self.script).is_file(): entry_point = self.script else: entry_point = (Path(self.folder) / self.script).as_posix() entry_point = os.path.abspath(entry_point) if not os.path.isfile(entry_point): raise ValueError( "Script entrypoint file \'{}\' could not be found".format( entry_point)) local_entry_file = entry_point repo_info, requirements = ScriptInfo.get( filepaths=[entry_point], log=getLogger(), create_requirements=self.packages is True, uncommitted_from_remote=True, detect_jupyter_notebook=False) # check if we have no repository and no requirements raise error if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \ and not self.repo and ( not repo_info or not repo_info.script or not repo_info.script.get('repository')): raise ValueError( "Standalone script detected \'{}\', but no requirements provided" .format(self.script)) if self.base_task_id: if self.verbose: print('Cloning task {}'.format(self.base_task_id)) task = Task.clone(source_task=self.base_task_id, project=Task.get_project_id(self.project_name)) else: # noinspection PyProtectedMember task = Task._create(task_name=self.task_name, project_name=self.project_name, task_type=self.task_type or Task.TaskTypes.training) # if there is nothing to populate, return if not any([ self.folder, self.commit, self.branch, self.repo, self.script, self.cwd, self.packages, self.requirements_file, self.base_task_id, self.docker ]): return task task_state = task.export_task() if 'script' not in task_state: task_state['script'] = {} if repo_info: task_state['script']['repository'] = repo_info.script['repository'] task_state['script']['version_num'] = repo_info.script[ 'version_num'] task_state['script']['branch'] = repo_info.script['branch'] task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['working_dir'] = repo_info.script[ 'working_dir'] task_state['script']['entry_point'] = repo_info.script[ 'entry_point'] task_state['script']['binary'] = repo_info.script['binary'] task_state['script']['requirements'] = repo_info.script.get( 'requirements') or {} if self.cwd: self.cwd = self.cwd cwd = self.cwd if Path(self.cwd).is_dir() else ( Path(repo_info.script['repo_root']) / self.cwd).as_posix() if not Path(cwd).is_dir(): raise ValueError( "Working directory \'{}\' could not be found".format( cwd)) cwd = Path(cwd).relative_to( repo_info.script['repo_root']).as_posix() entry_point = \ Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[ 'entry_point'] entry_point = entry_point.relative_to(cwd).as_posix() task_state['script']['entry_point'] = entry_point task_state['script']['working_dir'] = cwd elif self.repo: # normalize backslashes and remove first one entry_point = '/'.join( [p for p in self.script.split('/') if p and p != '.']) cwd = '/'.join( [p for p in (self.cwd or '.').split('/') if p and p != '.']) if cwd and entry_point.startswith(cwd + '/'): entry_point = entry_point[len(cwd) + 1:] task_state['script']['repository'] = self.repo task_state['script']['version_num'] = self.commit or None task_state['script']['branch'] = self.branch or None task_state['script']['diff'] = '' task_state['script']['working_dir'] = cwd or '.' task_state['script']['entry_point'] = entry_point else: # standalone task task_state['script']['entry_point'] = self.script task_state['script']['working_dir'] = '.' # update requirements reqs = [] if self.requirements_file: with open(self.requirements_file.as_posix(), 'rt') as f: reqs = [line.strip() for line in f.readlines()] if self.packages and self.packages is not True: reqs += self.packages if reqs: # make sure we have clearml. clearml_found = False for line in reqs: if line.strip().startswith('#'): continue package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>", line).strip() if package == 'clearml': clearml_found = True break if not clearml_found: reqs.append('clearml') task_state['script']['requirements'] = {'pip': '\n'.join(reqs)} elif not self.repo and repo_info and not repo_info.script.get( 'requirements'): # we are in local mode, make sure we have "requirements.txt" it is a must reqs_txt_file = Path( repo_info.script['repo_root']) / "requirements.txt" if self.raise_on_missing_entries and not reqs_txt_file.is_file(): raise ValueError("requirements.txt not found [{}] " "Use --requirements or --packages".format( reqs_txt_file.as_posix())) if self.add_task_init_call: script_entry = os.path.abspath( '/' + task_state['script'].get('working_dir', '.') + '/' + task_state['script']['entry_point']) idx_a = 0 # find the right entry for the patch if we have a local file (basically after __future__ if local_entry_file: with open(local_entry_file, 'rt') as f: lines = f.readlines() future_found = self._locate_future_import(lines) if future_found >= 0: idx_a = future_found + 1 task_init_patch = '' if self.repo or task_state.get('script', {}).get('repository'): # if we do not have requirements, add clearml to the requirements.txt if not reqs: task_init_patch += \ "diff --git a/requirements.txt b/requirements.txt\n" \ "--- a/requirements.txt\n" \ "+++ b/requirements.txt\n" \ "@@ -0,0 +1,1 @@\n" \ "+clearml\n" # Add Task.init call task_init_patch += \ "diff --git a{script_entry} b{script_entry}\n" \ "--- a{script_entry}\n" \ "+++ b{script_entry}\n" \ "@@ -{idx_a},0 +{idx_b},3 @@\n" \ "+from clearml import Task\n" \ "+Task.init()\n" \ "+\n".format( script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1) else: # Add Task.init call task_init_patch += \ "from clearml import Task\n" \ "Task.init()\n\n" # make sure we add the dif at the end of the current diff task_state['script']['diff'] = task_state['script'].get('diff', '') if task_state['script']['diff'] and not task_state['script'][ 'diff'].endswith('\n'): task_state['script']['diff'] += '\n' task_state['script']['diff'] += task_init_patch # set base docker image if provided if self.docker: task.set_base_docker( docker_cmd=self.docker.get('image'), docker_arguments=self.docker.get('args'), docker_setup_bash_script=self.docker.get('bash_script'), ) if self.verbose: if task_state['script']['repository']: repo_details = { k: v for k, v in task_state['script'].items() if v and k not in ('diff', 'requirements', 'binary') } print('Repository Detected\n{}'.format( json.dumps(repo_details, indent=2))) else: print('Standalone script detected\n Script: {}'.format( self.script)) if task_state['script'].get('requirements') and \ task_state['script']['requirements'].get('pip'): print('Requirements:{}{}'.format( '\n Using requirements.txt: {}'.format( self.requirements_file.as_posix()) if self.requirements_file else '', '\n {}Packages: {}'.format( 'Additional ' if self.requirements_file else '', self.packages) if self.packages else '')) if self.docker: print('Base docker image: {}'.format(self.docker)) # update the Task task.update_task(task_state) self.task = task return task def update_task_args(self, args=None): # type: (Optional[Union[Sequence[str], Sequence[Tuple[str, str]]]]) -> () """ Update the newly created Task argparse Arguments If called before Task created, used for argument verification :param args: Arguments to pass to the remote execution, list of string pairs (argument, value) or list of strings '<argument>=<value>'. Example: ['lr=0.003', (batch_size, 64)] """ if not args: return # check args are in format <key>=<value> args_list = [] for a in args: if isinstance(a, (list, tuple)): assert len(a) == 2 args_list.append(a) continue try: parts = a.split('=', 1) assert len(parts) == 2 args_list.append(parts) except Exception: raise ValueError( "Failed parsing argument \'{}\', arguments must be in \'<key>=<value>\' format" ) if not self.task: return task_params = self.task.get_parameters() args_list = {'Args/{}'.format(k): v for k, v in args_list} task_params.update(args_list) self.task.set_parameters(task_params) def get_id(self): # type: () -> Optional[str] """ :return: Return the created Task id (str) """ return self.task.id if self.task else None @staticmethod def _locate_future_import(lines): # type: (List[str]) -> int """ :param lines: string lines of a python file :return: line index of the last __future_ import. return -1 if no __future__ was found """ # skip over the first two lines, they are ours # then skip over empty or comment lines lines = [(i, line.split('#', 1)[0].rstrip()) for i, line in enumerate(lines) if line.strip('\r\n\t ') and not line.strip().startswith('#')] # remove triple quotes ' """ ' nested_c = -1 skip_lines = [] for i, line_pair in enumerate(lines): for _ in line_pair[1].split('"""')[1:]: if nested_c >= 0: skip_lines.extend(list(range(nested_c, i + 1))) nested_c = -1 else: nested_c = i # now select all the lines = [pair for i, pair in enumerate(lines) if i not in skip_lines] from_future = re.compile(r"^from[\s]*__future__[\s]*") import_future = re.compile(r"^import[\s]*__future__[\s]*") # test if we have __future__ import found_index = -1 for a_i, (_, a_line) in enumerate(lines): if found_index >= a_i: continue if from_future.match(a_line) or import_future.match(a_line): found_index = a_i # check the last import block i, line = lines[found_index] # wither we have \\ character at the end of the line or the line is indented parenthesized_lines = '(' in line and ')' not in line while line.endswith('\\') or parenthesized_lines: found_index += 1 i, line = lines[found_index] if ')' in line: break else: break return found_index if found_index < 0 else lines[found_index][0]
class CondaAPI(PackageManager): """ A programmatic interface for controlling conda """ MINIMUM_VERSION = "4.3.30" def __init__(self, session, path, python, requirements_manager, execution_info=None, **kwargs): # type: (Session, PathLike, float, RequirementsManager, ExecutionInfo, Any) -> None """ :param python: base python version to use (e.g python3.6) :param path: path of env """ self.session = session self.python = python self.source = None self.requirements_manager = requirements_manager self.path = path self.env_read_only = False self.extra_channels = self.session.config.get( 'agent.package_manager.conda_channels', []) self.conda_env_as_base_docker = \ self.session.config.get('agent.package_manager.conda_env_as_base_docker', None) or \ bool(ENV_CONDA_ENV_PACKAGE.get()) if ENV_CONDA_ENV_PACKAGE.get(): self.conda_pre_build_env_path = ENV_CONDA_ENV_PACKAGE.get() else: self.conda_pre_build_env_path = execution_info.docker_cmd if execution_info else None self.pip = CondaPip( session=self.session, source=self.source, python=self.python, requirements_manager=self.requirements_manager, path=self.path, ) try: self.conda = (find_executable("conda") or Argv( select_for_platform(windows="where", linux="which"), "conda").get_output(shell=select_for_platform( windows=True, linux=False)).strip()) except Exception: raise ValueError("ERROR: package manager \"conda\" selected, " "but \'conda\' executable could not be located") try: output = Argv(self.conda, "--version").get_output(stderr=subprocess.STDOUT) except subprocess.CalledProcessError as ex: raise CommandFailedError( "Unable to determine conda version: {ex}, output={ex.output}". format(ex=ex)) self.conda_version = self.get_conda_version(output) if SimpleVersion.compare_versions(self.conda_version, '<', self.MINIMUM_VERSION): raise CommandFailedError( "conda version '{}' is smaller than minimum supported conda version '{}'" .format(self.conda_version, self.MINIMUM_VERSION)) @staticmethod def get_conda_version(output): match = re.search(r"(\d+\.){0,2}\d+", output) if not match: raise CommandFailedError("Unidentified conda version string:", output) return match.group(0) @property def bin(self): return self.pip.bin # noinspection SpellCheckingInspection def upgrade_pip(self): # do not change pip version if pre built environement is used if self.env_read_only: print('Conda environment in read-only mode, skipping pip upgrade.') return '' return self._install("pip" + self.pip.get_pip_version()) def create(self): """ Create a new environment """ if self.conda_env_as_base_docker and self.conda_pre_build_env_path: if Path(self.conda_pre_build_env_path).is_dir(): print("Using pre-existing Conda environment from {}".format( self.conda_pre_build_env_path)) self.path = Path(self.conda_pre_build_env_path) self.source = ("conda", "activate", self.path.as_posix()) self.pip = CondaPip( session=self.session, source=self.source, python=self.python, requirements_manager=self.requirements_manager, path=self.path, ) conda_env = Path( self.conda ).parent.parent / 'etc' / 'profile.d' / 'conda.sh' self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) self.env_read_only = True return self elif Path(self.conda_pre_build_env_path).is_file(): print("Restoring Conda environment from {}".format( self.conda_pre_build_env_path)) tar_path = find_executable("tar") self.path.mkdir(parents=True, exist_ok=True) output = Argv( tar_path, "-xzf", self.conda_pre_build_env_path, "-C", self.path, ).get_output() self.source = self.pip.source = ("conda", "activate", self.path.as_posix()) conda_env = Path( self.conda ).parent.parent / 'etc' / 'profile.d' / 'conda.sh' self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) # unpack cleanup print("Fixing prefix in Conda environment {}".format( self.path)) CommandSequence(('source', conda_env.as_posix()), ((self.path / 'bin' / 'conda-unpack').as_posix(), )).get_output() return self else: raise ValueError( "Could not restore Conda environment, cannot find {}". format(self.conda_pre_build_env_path)) output = Argv( self.conda, "create", "--yes", "--mkdir", "--prefix", self.path, "python={}".format(self.python), ).get_output(stderr=DEVNULL) match = re.search( r"\W*(.*activate) ({})".format(re.escape(str(self.path))), output) self.source = self.pip.source = (tuple(match.group(1).split()) + (match.group(2), ) if match else ("conda", "activate", self.path.as_posix())) conda_env = Path( self.conda).parent.parent / 'etc' / 'profile.d' / 'conda.sh' if conda_env.is_file() and not is_windows_platform(): self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) # install cuda toolkit # noinspection PyBroadException try: cuda_version = float(int( self.session.config['agent.cuda_version'])) / 10.0 if cuda_version > 0: self._install('cudatoolkit={:.1f}'.format(cuda_version)) except Exception: pass return self def remove(self): """ Delete a conda environment. Use 'conda env remove', then 'rm_tree' to be safe. Conda seems to load "vcruntime140.dll" from all its environment on startup. This means environment have to be deleted using 'conda env remove'. If necessary, conda can be fooled into deleting a partially-deleted environment by creating an empty file in '<ENV>\conda-meta\history' (value found in 'conda.gateways.disk.test.PREFIX_MAGIC_FILE'). Otherwise, it complains that said directory is not a conda environment. See: https://github.com/conda/conda/issues/7682 """ try: self._run_command(("env", "remove", "-p", self.path)) except Exception: pass rm_tree(self.path) # if we failed removing the path, change it's name if is_windows_platform() and Path(self.path).exists(): try: Path(self.path).rename( Path(self.path).as_posix() + '_' + str(time())) except Exception: pass def _install_from_file(self, path): """ Install packages from requirement file. """ self._install("--file", path) def _install(self, *args): # type: (*PathLike) -> () # if we are in read only mode, do not install anything if self.env_read_only: print( 'Conda environment in read-only mode, skipping package installing: {}' .format(args)) return channels_args = tuple( chain.from_iterable( ("-c", channel) for channel in self.extra_channels)) self._run_command(("install", "-p", self.path) + channels_args + args) def _get_pip_packages(self, packages): # type: (Iterable[Text]) -> Sequence[Text] """ Return subset of ``packages`` which are not available on conda """ pips = [] while True: with self.temp_file("conda_reqs", packages) as path: try: self._install_from_file(path) except PackageNotFoundError as e: pips.append(e.pkg) packages = _package_diff(path, {e.pkg}) else: break return pips def install_packages(self, *packages): # type: (*Text) -> () return self._install(*packages) def uninstall_packages(self, *packages): # if we are in read only mode, do not uninstall anything if self.env_read_only: print( 'Conda environment in read-only mode, skipping package uninstalling: {}' .format(packages)) return '' return self._run_command(("uninstall", "-p", self.path)) def install_from_file(self, path): """ Try to install packages from conda. Install packages which are not available from conda with pip. """ try: self._install_from_file(path) return except PackageNotFoundError as e: pip_packages = [e.pkg] except PackagesNotFoundError as e: pip_packages = package_set(e.packages) with self.temp_file("conda_reqs", _package_diff(path, pip_packages)) as reqs: self.install_from_file(reqs) with self.temp_file("pip_reqs", pip_packages) as reqs: self.pip.install_from_file(reqs) def freeze(self, freeze_full_environment=False): requirements = self.pip.freeze() req_lines = [] conda_lines = [] # noinspection PyBroadException try: pip_lines = requirements['pip'] conda_packages_json = json.loads( self._run_command( (self.conda, "list", "--json", "-p", self.path), raw=True)) for r in conda_packages_json: # check if this is a pypi package, if it is, leave it outside if not r.get('channel') or r.get('channel') == 'pypi': name = (r['name'].replace('-', '_'), r['name']) pip_req_line = [ l for l in pip_lines if l.split('==', 1)[0].strip() in name or l.split('@', 1)[0].strip() in name ] if pip_req_line and \ ('@' not in pip_req_line[0] or not pip_req_line[0].split('@', 1)[1].strip().startswith('file://')): req_lines.append(pip_req_line[0]) continue req_lines.append( '{}=={}'.format(name[1], r['version']) if r. get('version') else '{}'.format(name[1])) continue # check if we have it in our required packages name = r['name'] # hack support pytorch/torch different naming convention if name == 'pytorch': name = 'torch' # skip over packages with _ if name.startswith('_'): continue conda_lines.append('{}=={}'.format(name, r['version']) if r. get('version') else '{}'.format(name)) # make sure we see the conda packages, put them into the pip as well if conda_lines: req_lines = ['# Conda Packages', ''] + conda_lines + [ '', '# pip Packages', '' ] + req_lines requirements['pip'] = req_lines requirements['conda'] = conda_lines except Exception: pass if freeze_full_environment: # noinspection PyBroadException try: conda_env_json = json.loads( self._run_command((self.conda, "env", "export", "--json", "-p", self.path), raw=True)) conda_env_json.pop('name', None) conda_env_json.pop('prefix', None) conda_env_json.pop('channels', None) requirements['conda_env_json'] = json.dumps(conda_env_json) except Exception: pass return requirements def _load_conda_full_env(self, conda_env_dict, requirements): # noinspection PyBroadException try: cuda_version = int(self.session.config.get('agent.cuda_version', 0)) except Exception: cuda_version = 0 conda_env_dict['channels'] = self.extra_channels if 'dependencies' not in conda_env_dict: conda_env_dict['dependencies'] = [] new_dependencies = OrderedDict() pip_requirements = None for line in conda_env_dict['dependencies']: if isinstance(line, dict): pip_requirements = line.pop('pip', None) continue name = line.strip().split('=', 1)[0].lower() if name == 'pip': continue elif name == 'python': line = 'python={}'.format('.'.join( line.split('=')[1].split('.')[:2])) elif name == 'tensorflow-gpu' and cuda_version == 0: line = 'tensorflow={}'.format(line.split('=')[1]) elif name == 'tensorflow' and cuda_version > 0: line = 'tensorflow-gpu={}'.format(line.split('=')[1]) elif name in ('cupti', 'cudnn'): # cudatoolkit should pull them based on the cudatoolkit version continue elif name.startswith('_'): continue new_dependencies[line.split('=', 1)[0].strip()] = line # fix packages: conda_env_dict['dependencies'] = list(new_dependencies.values()) with self.temp_file("conda_env", yaml.dump(conda_env_dict), suffix=".yml") as name: print('Conda: Trying to install requirements:\n{}'.format( conda_env_dict['dependencies'])) result = self._run_command( ("env", "update", "-p", self.path, "--file", name)) # check if we need to remove specific packages bad_req = self._parse_conda_result_bad_packges(result) if bad_req: print('failed installing the following conda packages: {}'.format( bad_req)) return False if pip_requirements: # create a list of vcs packages that we need to replace in the pip section vcs_reqs = {} if 'pip' in requirements: pip_lines = requirements['pip'].splitlines() \ if isinstance(requirements['pip'], six.string_types) else requirements['pip'] for line in pip_lines: try: marker = list(parse(line)) except Exception: marker = None if not marker: continue m = MarkerRequirement(marker[0]) if m.vcs: vcs_reqs[m.name] = m try: pip_req_str = [ str(vcs_reqs.get(r.split('=', 1)[0], r)) for r in pip_requirements if not r.startswith('pip=') and not r.startswith('virtualenv=') ] print( 'Conda: Installing requirements: step 2 - using pip:\n{}'. format(pip_req_str)) PackageManager._selected_manager = self.pip self.pip.load_requirements({'pip': '\n'.join(pip_req_str)}) except Exception as e: print(e) raise e finally: PackageManager._selected_manager = self self.requirements_manager.post_install(self.session) def load_requirements(self, requirements): # if we are in read only mode, do not uninstall anything if self.env_read_only: print( 'Conda environment in read-only mode, skipping requirements installation.' ) return None # if we have a full conda environment, use it and pass the pip to pip if requirements.get('conda_env_json'): # noinspection PyBroadException try: conda_env_json = json.loads(requirements.get('conda_env_json')) print('Conda restoring full yaml environment') return self._load_conda_full_env(conda_env_json, requirements) except Exception: print( 'Could not load fully stored conda environment, falling back to requirements' ) # create new environment file conda_env = dict() conda_env['channels'] = self.extra_channels reqs = [] if isinstance(requirements['pip'], six.string_types): requirements['pip'] = requirements['pip'].split('\n') if isinstance(requirements.get('conda'), six.string_types): requirements['conda'] = requirements['conda'].split('\n') has_torch = False has_matplotlib = False try: cuda_version = int(self.session.config.get('agent.cuda_version', 0)) except: cuda_version = 0 # notice 'conda' entry with empty string is a valid conda requirements list, it means pip only # this should happen if experiment was executed on non-conda machine or old trains client conda_supported_req = requirements['pip'] if requirements.get( 'conda', None) is None else requirements['conda'] conda_supported_req_names = [] pip_requirements = [] for r in conda_supported_req: try: marker = list(parse(r)) except: marker = None if not marker: continue m = MarkerRequirement(marker[0]) # conda does not support version control links if m.vcs: pip_requirements.append(m) continue # Skip over pip if m.name in ( 'pip', 'virtualenv', ): continue # python version, only major.minor if m.name == 'python' and m.specs: m.specs = [ (m.specs[0][0], '.'.join(m.specs[0][1].split('.')[:2])), ] if '.' not in m.specs[0][1]: continue conda_supported_req_names.append(m.name.lower()) if m.req.name.lower() == 'matplotlib': has_matplotlib = True elif m.req.name.lower().startswith('torch'): has_torch = True if m.req.name.lower() in ('torch', 'pytorch'): has_torch = True m.req.name = 'pytorch' if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu', 'tensorflow'): has_torch = True m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow' reqs.append(m) # if we have a conda list, the rest should be installed with pip, if requirements.get('conda', None) is not None: for r in requirements['pip']: try: marker = list(parse(r)) except: marker = None if not marker: continue m = MarkerRequirement(marker[0]) # skip over local files (we cannot change the version to a local file) if m.local_file: continue m_name = m.name.lower() if m_name in conda_supported_req_names: # this package is in the conda list, # make sure that if we changed version and we match it in conda ## conda_supported_req_names.remove(m_name) for cr in reqs: if m_name.lower().replace( '_', '-') == cr.name.lower().replace('_', '-'): # match versions cr.specs = m.specs # # conda always likes "-" not "_" but only on pypi packages # cr.name = cr.name.lower().replace('_', '-') break else: # not in conda, it is a pip package pip_requirements.append(m) if m_name == 'matplotlib': has_matplotlib = True # Conda requirements Hacks: if has_matplotlib: reqs.append(MarkerRequirement(Requirement.parse('graphviz'))) reqs.append(MarkerRequirement( Requirement.parse('python-graphviz'))) reqs.append(MarkerRequirement(Requirement.parse('kiwisolver'))) # remove specific cudatoolkit, it should have being preinstalled. # allow to override default cudatoolkit, but not the derivative packages, cudatoolkit should pull them reqs = [r for r in reqs if r.name not in ('cudnn', 'cupti')] if has_torch and cuda_version == 0: reqs.append(MarkerRequirement(Requirement.parse('cpuonly'))) # make sure we have no double entries reqs = list(OrderedDict((r.name, r) for r in reqs).values()) # conform conda packages (version/name) for r in reqs: # change _ to - in name but not the prefix _ (as this is conda prefix) if not r.name.startswith('_') and not requirements.get( 'conda', None): r.name = r.name.replace('_', '-') # remove .post from version numbers, it fails ~= version, and change == to ~= if r.specs and r.specs[0]: r.specs = [(r.specs[0][0].replace('==', '~='), r.specs[0][1].split('.post')[0])] while reqs: # notice, we give conda more freedom in version selection, to help it choose best combination def clean_ver(ar): if not ar.specs: return ar.tostr() ar.specs = [ (ar.specs[0][0], ar.specs[0][1] + '.0' if '.' not in ar.specs[0][1] else ar.specs[0][1]) ] return ar.tostr() conda_env['dependencies'] = [clean_ver(r) for r in reqs] with self.temp_file("conda_env", yaml.dump(conda_env), suffix=".yml") as name: print('Conda: Trying to install requirements:\n{}'.format( conda_env['dependencies'])) result = self._run_command( ("env", "update", "-p", self.path, "--file", name)) # check if we need to remove specific packages bad_req = self._parse_conda_result_bad_packges(result) if not bad_req: break solved = False for bad_r in bad_req: name = bad_r.split('[')[0].split('=')[0].split('~')[0].split( '<')[0].split('>')[0] # look for name in requirements for r in reqs: if r.name.lower() == name.lower(): pip_requirements.append(r) reqs.remove(r) solved = True break # we couldn't remove even one package, # nothing we can do but try pip if not solved: pip_requirements.extend(reqs) break if pip_requirements: try: pip_req_str = [ r.tostr() for r in pip_requirements if r.name not in ( 'pip', 'virtualenv', ) ] print( 'Conda: Installing requirements: step 2 - using pip:\n{}'. format(pip_req_str)) PackageManager._selected_manager = self.pip self.pip.load_requirements({'pip': '\n'.join(pip_req_str)}) except Exception as e: print(e) raise e finally: PackageManager._selected_manager = self self.requirements_manager.post_install(self.session) return True def _parse_conda_result_bad_packges(self, result_dict): if not result_dict: return None if 'bad_deps' in result_dict and result_dict['bad_deps']: return result_dict['bad_deps'] if result_dict.get('error'): error_lines = result_dict['error'].split('\n') if error_lines[0].strip().lower().startswith( "unsatisfiableerror:"): empty_lines = [ i for i, l in enumerate(error_lines) if not l.strip() ] if len(empty_lines) >= 2: deps = error_lines[empty_lines[0] + 1:empty_lines[1]] try: return yaml.load('\n'.join(deps), Loader=yaml.SafeLoader) except: return None return None def _run_command(self, command, raw=False, **kwargs): # type: (Iterable[Text], bool, Any) -> Union[Dict, Text] """ Run a conda command, returning JSON output. The command is prepended with 'conda' and run with JSON output flags. :param command: command to run :param raw: return text output and don't change command :param kwargs: kwargs for Argv.get_output() :return: JSON output or text output """ def escape_ansi(line): ansi_escape = re.compile( r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]') return ansi_escape.sub('', line) command = Argv(*command) # type: Executable if not raw: command = (self.conda, ) + command + ("--quiet", "--json") try: print('Executing Conda: {}'.format(command.serialize())) result = command.get_output(stdin=DEVNULL, **kwargs) if self.session.debug_mode: print(result) except Exception as e: result = e.output if hasattr(e, 'output') else '' if self.session.debug_mode: print(result) if raw: raise if raw: return result result = json.loads(escape_ansi(result)) if result else {} if result.get('success', False): print('Pass') elif result.get('error'): print('Conda error: {}'.format(result.get('error'))) return result def get_python_command(self, extra=()): return CommandSequence(self.source, self.pip.get_python_command(extra=extra))
def create(self): """ Create a new environment """ if self.conda_env_as_base_docker and self.conda_pre_build_env_path: if Path(self.conda_pre_build_env_path).is_dir(): print("Using pre-existing Conda environment from {}".format( self.conda_pre_build_env_path)) self.path = Path(self.conda_pre_build_env_path) self.source = ("conda", "activate", self.path.as_posix()) self.pip = CondaPip( session=self.session, source=self.source, python=self.python, requirements_manager=self.requirements_manager, path=self.path, ) conda_env = Path( self.conda ).parent.parent / 'etc' / 'profile.d' / 'conda.sh' self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) self.env_read_only = True return self elif Path(self.conda_pre_build_env_path).is_file(): print("Restoring Conda environment from {}".format( self.conda_pre_build_env_path)) tar_path = find_executable("tar") self.path.mkdir(parents=True, exist_ok=True) output = Argv( tar_path, "-xzf", self.conda_pre_build_env_path, "-C", self.path, ).get_output() self.source = self.pip.source = ("conda", "activate", self.path.as_posix()) conda_env = Path( self.conda ).parent.parent / 'etc' / 'profile.d' / 'conda.sh' self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) # unpack cleanup print("Fixing prefix in Conda environment {}".format( self.path)) CommandSequence(('source', conda_env.as_posix()), ((self.path / 'bin' / 'conda-unpack').as_posix(), )).get_output() return self else: raise ValueError( "Could not restore Conda environment, cannot find {}". format(self.conda_pre_build_env_path)) output = Argv( self.conda, "create", "--yes", "--mkdir", "--prefix", self.path, "python={}".format(self.python), ).get_output(stderr=DEVNULL) match = re.search( r"\W*(.*activate) ({})".format(re.escape(str(self.path))), output) self.source = self.pip.source = (tuple(match.group(1).split()) + (match.group(2), ) if match else ("conda", "activate", self.path.as_posix())) conda_env = Path( self.conda).parent.parent / 'etc' / 'profile.d' / 'conda.sh' if conda_env.is_file() and not is_windows_platform(): self.source = self.pip.source = CommandSequence( ('source', conda_env.as_posix()), self.source) # install cuda toolkit # noinspection PyBroadException try: cuda_version = float(int( self.session.config['agent.cuda_version'])) / 10.0 if cuda_version > 0: self._install('cudatoolkit={:.1f}'.format(cuda_version)) except Exception: pass return self
# -*- coding: utf-8 -*- import sys import sphinx_rtd_theme try: from pathlib2 import Path except ImportError: from pathlib import Path project_path = Path(__file__).absolute().parent.joinpath('../..') sys.path.insert(0, project_path.as_posix()) from httpretty.version import version # noqa project = 'HTTPretty' copyright = '2018, Gabriel Falcao' author = 'Gabriel Falcao' # The short X.Y version version = version # The full version, including alpha/beta/rc tags release = version extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage',
# -*- coding: utf-8 -*- """ Created on Wed Sep 12 15:06:03 2018 @author: ADMIN """ import logging import logging.config from functools import partial from pathlib2 import Path cfgpath = Path(__file__).parent/'logging.conf' lcfg = partial(logging.config.fileConfig, cfgpath.as_posix()) pl = logging.getLogger('file_append') ps = logging.getLogger('file_only')
def _extract_to_cache( cls, cached_file, # type: str name, # type: str cache_context=None, # type: Optional[str] target_folder=None, # type: Optional[str] cache_path_encoding=None, # type: Optional[str] force=False, # type: bool ): # type: (...) -> str """ Extract cached file to cache folder :param str cached_file: local copy of archive file :param str name: name of the target file :param str cache_context: cache context id :param str target_folder: specify target path to use for archive extraction :param str cache_path_encoding: specify representation of the local path of the cached files, this will always point to local cache folder, even if we have direct access file. Used for extracting the cached archived based on cache_path_encoding :param bool force: Force archive extraction even if target folder exists :return: cached folder containing the extracted archive content """ if not cached_file: return cached_file cached_file = Path(cached_file) cache_path_encoding = Path( cache_path_encoding) if cache_path_encoding else None # we support zip and tar.gz files auto-extraction suffix = cached_file.suffix.lower() if suffix == '.gz': suffix = ''.join(a.lower() for a in cached_file.suffixes[-2:]) if suffix not in (".zip", ".tgz", ".tar.gz"): return str(cached_file) cache_folder = Path(cache_path_encoding or cached_file).parent archive_suffix = (cache_path_encoding or cached_file).name[:-len(suffix)] name = encode_string_to_filename(name) if name else name if target_folder: target_folder = Path(target_folder) else: target_folder = cache_folder / CacheManager.get_context_folder_lookup( cache_context).format(archive_suffix, name) if target_folder.is_dir() and not force: # noinspection PyBroadException try: target_folder.touch(exist_ok=True) return target_folder.as_posix() except Exception: pass base_logger = LoggerRoot.get_base_logger() try: # if target folder exists, meaning this is forced ao we extract directly into target folder if target_folder.is_dir(): temp_target_folder = target_folder else: temp_target_folder = cache_folder / "{0}_{1}_{2}".format( target_folder.name, time() * 1000, str(random()).replace('.', '')) temp_target_folder.mkdir(parents=True, exist_ok=True) if suffix == ".zip": ZipFile(cached_file.as_posix()).extractall( path=temp_target_folder.as_posix()) elif suffix == ".tar.gz": with tarfile.open(cached_file.as_posix()) as file: file.extractall(temp_target_folder.as_posix()) elif suffix == ".tgz": with tarfile.open(cached_file.as_posix(), mode='r:gz') as file: file.extractall(temp_target_folder.as_posix()) if temp_target_folder != target_folder: # we assume we will have such folder if we already extract the file # noinspection PyBroadException try: # if rename fails, it means that someone else already manged to extract the file, delete the current # folder and return the already existing cached zip folder shutil.move(temp_target_folder.as_posix(), target_folder.as_posix()) except Exception: if target_folder.exists(): target_folder.touch(exist_ok=True) else: base_logger.warning( "Failed renaming {0} to {1}".format( temp_target_folder.as_posix(), target_folder.as_posix())) try: shutil.rmtree(temp_target_folder.as_posix()) except Exception as ex: base_logger.warning( "Exception {}\nFailed deleting folder {}".format( ex, temp_target_folder.as_posix())) except Exception as ex: # failed extracting the file: base_logger.warning( "Exception {}\nFailed extracting zip file {}".format( ex, cached_file.as_posix())) # noinspection PyBroadException try: target_folder.rmdir() except Exception: pass return cached_file.as_posix() return target_folder.as_posix()
# -*- coding: utf-8 -*- import sys try: from pathlib2 import Path except ImportError: from pathlib import Path project_path = Path(__file__).absolute().parent.joinpath('../../..') sys.path.insert(0, project_path.as_posix()) import sphinx_bulma_theme # noqa project = 'Sphinx Bulma Theme' copyright = '2018, Gabriel Falcao' author = 'Gabriel Falcao' version = sphinx_bulma_theme.version release = version needs_sphinx = '1.7.1' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel', 'sphinx.ext.coverage', 'sphinx.ext.doctest', 'sphinx.ext.githubpages',
def _extract_to_cache(cls, cached_file, name): """ Extract cached file to cache folder :param str cached_file: local copy of archive file :param str name: cache context :return: cached folder containing the extracted archive content """ if not cached_file: return cached_file cached_file = Path(cached_file) # we support zip and tar.gz files auto-extraction if (not cached_file.suffix == ".zip" and not cached_file.suffixes[-2:] == [".tar", ".gz"]): return str(cached_file) cached_folder = cached_file.parent name = encode_string_to_filename(name) if name else name target_folder = Path("{0}/{1}_artifacts_archive_{2}".format( cached_folder, cached_file.stem, name)) if target_folder.exists(): # noinspection PyBroadException try: target_folder.touch(exist_ok=True) return target_folder except Exception: pass base_logger = LoggerRoot.get_base_logger() try: temp_target_folder = cached_folder / "{0}_{1}_{2}".format( target_folder.name, time() * 1000, str(random()).replace('.', '')) temp_target_folder.mkdir(parents=True, exist_ok=True) if cached_file.suffix == ".zip": ZipFile(cached_file).extractall( path=temp_target_folder.as_posix()) elif cached_file.suffixes[-2:] == [".tar", ".gz"]: with tarfile.open(cached_file) as file: file.extractall(temp_target_folder) # we assume we will have such folder if we already extract the file # noinspection PyBroadException try: # if rename fails, it means that someone else already manged to extract the file, delete the current # folder and return the already existing cached zip folder shutil.move(temp_target_folder.as_posix(), target_folder.as_posix()) except Exception: if target_folder.exists(): target_folder.touch(exist_ok=True) else: base_logger.warning("Failed renaming {0} to {1}".format( temp_target_folder, target_folder)) try: shutil.rmtree(temp_target_folder) except Exception as ex: base_logger.warning( "Exception {}\nFailed deleting folder {}".format( ex, temp_target_folder)) except Exception as ex: # failed extracting the file: base_logger.warning( "Exception {}\nFailed extracting zip file {}".format( ex, str(cached_file))) # noinspection PyBroadException try: target_folder.rmdir() except Exception: pass return cached_file return target_folder
def _get_script_info(cls, filepath, check_uncommitted=True, create_requirements=True, log=None): jupyter_filepath = cls._get_jupyter_notebook_filename() if jupyter_filepath: script_path = Path(os.path.normpath(jupyter_filepath)).absolute() else: script_path = Path(os.path.normpath(filepath)).absolute() if not script_path.is_file(): raise ScriptInfoError( "Script file [{}] could not be found".format(filepath)) script_dir = script_path.parent def _log(msg, *args, **kwargs): if not log: return log.warning("Failed auto-detecting task repository: {}".format( msg.format(*args, **kwargs))) plugin = next((p for p in cls.plugins if p.exists(script_dir)), None) repo_info = DetectionResult() if not plugin: log.info("No repository found, storing script code instead") else: try: repo_info = plugin.get_info(str(script_dir), include_diff=check_uncommitted) except Exception as ex: _log("no info for {} ({})", script_dir, ex) else: if repo_info.is_empty(): _log("no info for {}", script_dir) repo_root = repo_info.root or script_dir if not plugin: working_dir = '.' entry_point = str(script_path.name) else: working_dir = cls._get_working_dir(repo_root) entry_point = cls._get_entry_point(repo_root, script_path) if check_uncommitted: diff = cls._get_script_code(script_path.as_posix()) \ if not plugin or not repo_info.commit else repo_info.diff else: diff = '' # if this is not jupyter, get the requirements.txt requirements = '' conda_requirements = '' # create requirements if backend supports requirements # if jupyter is present, requirements will be created in the background, when saving a snapshot if not jupyter_filepath and Session.check_min_api_version('2.2'): script_requirements = ScriptRequirements( Path(repo_root).as_posix() if repo_info.url else script_path. as_posix()) if create_requirements: requirements, conda_requirements = script_requirements.get_requirements( ) else: script_requirements = None script_info = dict( repository=furl(repo_info.url).remove(username=True, password=True).tostr(), branch=repo_info.branch, version_num=repo_info.commit, entry_point=entry_point, working_dir=working_dir, diff=diff, requirements={ 'pip': requirements, 'conda': conda_requirements } if requirements else None, ) messages = [] if repo_info.modified: messages.append( "======> WARNING! UNCOMMITTED CHANGES IN REPOSITORY {} <======" .format(script_info.get("repository", ""))) if not any(script_info.values()): script_info = None return (ScriptInfoResult(script=script_info, warning_messages=messages), script_requirements)
def _upload_data_audit_artifacts(self, name): logger = self._task.get_logger() pd_artifact = self._artifacts_container.get(name) pd_metadata = self._artifacts_container.get_metadata(name) # remove from artifacts watch list if name in self._unregister_request: try: self._unregister_request.remove(name) except KeyError: pass self._artifacts_container.unregister_artifact(name) if pd_artifact is None: return override_filename_ext_in_uri = self._save_format override_filename_in_uri = name fd, local_csv = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) local_csv = Path(local_csv) pd_artifact.to_csv(local_csv.as_posix(), index=False, compression=self._compression) current_sha2, file_sha2 = self.sha256sum(local_csv.as_posix(), skip_header=32) if name in self._last_artifacts_upload: previous_sha2 = self._last_artifacts_upload[name] if previous_sha2 == current_sha2: # nothing to do, we can skip the upload try: local_csv.unlink() except Exception: pass return self._last_artifacts_upload[name] = current_sha2 # If old trains-server, upload as debug image if not Session.check_min_api_version('2.3'): logger.report_image(title='artifacts', series=name, local_path=local_csv.as_posix(), delete_after_upload=True, iteration=self._task.get_last_iteration(), max_image_history=2) return # Find our artifact artifact = None for an_artifact in self._task_artifact_list: if an_artifact.key == name: artifact = an_artifact break file_size = local_csv.stat().st_size # upload file uri = self._upload_local_file( local_csv, name, delete_after_upload=True, override_filename=override_filename_in_uri, override_filename_ext=override_filename_ext_in_uri) # update task artifacts with self._task_edit_lock: if not artifact: artifact = tasks.Artifact(key=name, type=self._pd_artifact_type) self._task_artifact_list.append(artifact) artifact_type_data = tasks.ArtifactTypeData() artifact_type_data.data_hash = current_sha2 artifact_type_data.content_type = "text/csv" artifact_type_data.preview = str( pd_artifact.__repr__()) + '\n\n' + self._get_statistics( {name: pd_artifact}) artifact.type_data = artifact_type_data artifact.uri = uri artifact.content_size = file_size artifact.hash = file_sha2 artifact.timestamp = int(time()) artifact.display_data = [ (str(k), str(v)) for k, v in pd_metadata.items() ] if pd_metadata else None self._task.set_artifacts(self._task_artifact_list)
def _file_path(raw_path: Union[str, Path]) -> Tuple[str, Path]: config_file = Path(raw_path) / '.sv_cfg' return config_file.as_posix(), abs_path(config_file)
def to_posix(path): parts = path.parts[1:] full_path = Path("/") / "mnt" / "c" / '/'.join(parts) return full_path.as_posix()
def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False): if not Session.check_min_api_version('2.3'): LoggerRoot.get_base_logger().warning( 'Artifacts not supported by your TRAINS-server version, ' 'please upgrade to the latest server version') return False if name in self._artifacts_container: raise ValueError( "Artifact by the name of {} is already registered, use register_artifact" .format(name)) artifact_type_data = tasks.ArtifactTypeData() override_filename_in_uri = None override_filename_ext_in_uri = None uri = None if np and isinstance(artifact_object, np.ndarray): artifact_type = 'numpy' artifact_type_data.content_type = 'application/numpy' artifact_type_data.preview = str(artifact_object.__repr__()) override_filename_ext_in_uri = '.npz' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) np.savez_compressed(local_filename, **{name: artifact_object}) delete_after_upload = True elif pd and isinstance(artifact_object, pd.DataFrame): artifact_type = 'pandas' artifact_type_data.content_type = 'text/csv' artifact_type_data.preview = str(artifact_object.__repr__()) override_filename_ext_in_uri = self._save_format override_filename_in_uri = name fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) artifact_object.to_csv(local_filename, compression=self._compression) delete_after_upload = True elif isinstance(artifact_object, Image.Image): artifact_type = 'image' artifact_type_data.content_type = 'image/png' desc = str(artifact_object.__repr__()) artifact_type_data.preview = desc[1:desc.find(' at ')] override_filename_ext_in_uri = '.png' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) artifact_object.save(local_filename) delete_after_upload = True elif isinstance(artifact_object, dict): artifact_type = 'JSON' artifact_type_data.content_type = 'application/json' preview = json.dumps(artifact_object, sort_keys=True, indent=4) override_filename_ext_in_uri = '.json' override_filename_in_uri = name + override_filename_ext_in_uri fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.write(fd, bytes(preview.encode())) os.close(fd) artifact_type_data.preview = preview delete_after_upload = True elif isinstance(artifact_object, six.string_types) and urlparse( artifact_object).scheme in remote_driver_schemes: # we should not upload this, just register local_filename = None uri = artifact_object artifact_type = 'custom' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] elif isinstance(artifact_object, six.string_types + (Path, )): # check if single file artifact_object = Path(artifact_object) artifact_object.expanduser().absolute() try: create_zip_file = not artifact_object.is_file() except Exception: # Hack for windows pathlib2 bug, is_file isn't valid. create_zip_file = True else: # We assume that this is not Windows os if artifact_object.is_dir(): # change to wildcard artifact_object /= '*' if create_zip_file: folder = Path('').joinpath(*artifact_object.parts[:-1]) if not folder.is_dir() or not folder.parts: raise ValueError( "Artifact file/folder '{}' could not be found".format( artifact_object.as_posix())) wildcard = artifact_object.parts[-1] files = list(Path(folder).rglob(wildcard)) override_filename_ext_in_uri = '.zip' override_filename_in_uri = folder.parts[ -1] + override_filename_ext_in_uri fd, zip_file = mkstemp( prefix=quote(folder.parts[-1], safe="") + '.', suffix=override_filename_ext_in_uri) try: artifact_type_data.content_type = 'application/zip' artifact_type_data.preview = 'Archive content {}:\n'.format( artifact_object.as_posix()) with ZipFile(zip_file, 'w', allowZip64=True, compression=ZIP_DEFLATED) as zf: for filename in sorted(files): if filename.is_file(): relative_file_name = filename.relative_to( folder).as_posix() artifact_type_data.preview += '{} - {}\n'.format( relative_file_name, humanfriendly.format_size( filename.stat().st_size)) zf.write(filename.as_posix(), arcname=relative_file_name) except Exception as e: # failed uploading folder: LoggerRoot.get_base_logger().warning( 'Exception {}\nFailed zipping artifact folder {}'. format(folder, e)) return None finally: os.close(fd) artifact_object = zip_file artifact_type = 'archive' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] local_filename = artifact_object delete_after_upload = True else: if not artifact_object.is_file(): raise ValueError( "Artifact file '{}' could not be found".format( artifact_object.as_posix())) override_filename_in_uri = artifact_object.parts[-1] artifact_object = artifact_object.as_posix() artifact_type = 'custom' artifact_type_data.content_type = mimetypes.guess_type( artifact_object)[0] local_filename = artifact_object else: raise ValueError("Artifact type {} not supported".format( type(artifact_object))) # remove from existing list, if exists for artifact in self._task_artifact_list: if artifact.key == name: if artifact.type == self._pd_artifact_type: raise ValueError( "Artifact of name {} already registered, " "use register_artifact instead".format(name)) self._task_artifact_list.remove(artifact) break if not local_filename: file_size = None file_hash = None else: # check that the file to upload exists local_filename = Path(local_filename).absolute() if not local_filename.exists() or not local_filename.is_file(): LoggerRoot.get_base_logger().warning( 'Artifact upload failed, cannot find file {}'.format( local_filename.as_posix())) return False file_hash, _ = self.sha256sum(local_filename.as_posix()) file_size = local_filename.stat().st_size uri = self._upload_local_file( local_filename, name, delete_after_upload=delete_after_upload, override_filename=override_filename_in_uri, override_filename_ext=override_filename_ext_in_uri) timestamp = int(time()) artifact = tasks.Artifact( key=name, type=artifact_type, uri=uri, content_size=file_size, hash=file_hash, timestamp=timestamp, type_data=artifact_type_data, display_data=[(str(k), str(v)) for k, v in metadata.items()] if metadata else None) # update task artifacts with self._task_edit_lock: self._task_artifact_list.append(artifact) self._task.set_artifacts(self._task_artifact_list) return True
class FolderCache(object): _lock_filename = '.clearml.lock' _lock_timeout_seconds = 30 _temp_entry_prefix = '_temp.' def __init__(self, cache_folder, max_cache_entries=5, min_free_space_gb=None): self._cache_folder = Path( os.path.expandvars(cache_folder)).expanduser().absolute() self._cache_folder.mkdir(parents=True, exist_ok=True) self._max_cache_entries = max_cache_entries self._last_copied_entry_folder = None self._min_free_space_gb = min_free_space_gb if min_free_space_gb and min_free_space_gb > 0 else None self._lock = FileLock( (self._cache_folder / self._lock_filename).as_posix()) def get_cache_folder(self): # type: () -> Path """ :return: Return the base cache folder """ return self._cache_folder def copy_cached_entry(self, keys, destination): # type: (List[str], Path) -> Optional[Path] """ Copy a cached entry into a destination directory, if the cached entry does not exist return None :param keys: :param destination: :return: Target path, None if cached entry does not exist """ self._last_copied_entry_folder = None if not keys: return None # lock so we make sure no one deletes it before we copy it # noinspection PyBroadException try: self._lock.acquire(timeout=self._lock_timeout_seconds) except BaseException as ex: warning('Could not lock cache folder {}: {}'.format( self._cache_folder, ex)) return None src = None try: src = self.get_entry(keys) if src: destination = Path(destination).absolute() destination.mkdir(parents=True, exist_ok=True) shutil.rmtree(destination.as_posix()) shutil.copytree(src.as_posix(), dst=destination.as_posix(), symlinks=True) except BaseException as ex: warning('Could not copy cache folder {} to {}: {}'.format( src, destination, ex)) self._lock.release() return None # release Lock self._lock.release() self._last_copied_entry_folder = src return destination if src else None def get_entry(self, keys): # type: (List[str]) -> Optional[Path] """ Return a folder (a sub-folder of inside the cache_folder) matching one of the keys :param keys: List of keys, return the first match to one of the keys, notice keys cannot contain '.' :return: Path to the sub-folder or None if none was found """ if not keys: return None # conform keys keys = [keys] if isinstance(keys, str) else keys keys = sorted([k.replace('.', '_') for k in keys]) for cache_folder in self._cache_folder.glob('*'): if cache_folder.is_dir() and any( True for k in cache_folder.name.split('.') if k in keys): cache_folder.touch() return cache_folder return None def add_entry(self, keys, source_folder, exclude_sub_folders=None): # type: (List[str], Path, Optional[Sequence[str]]) -> bool """ Add a local folder into the cache, copy all sub-folders inside `source_folder` excluding folders matching `exclude_sub_folders` list :param keys: Cache entry keys list (str) :param source_folder: Folder to copy into the cache :param exclude_sub_folders: List of sub-folders to exclude from the copy operation :return: return True is new entry was added to cache """ if not keys: return False keys = [keys] if isinstance(keys, str) else keys keys = sorted([k.replace('.', '_') for k in keys]) # If entry already exists skip it cached_entry = self.get_entry(keys) if cached_entry: # make sure the entry contains all keys cached_keys = cached_entry.name.split('.') if set(keys) - set(cached_keys): # noinspection PyBroadException try: self._lock.acquire(timeout=self._lock_timeout_seconds) except BaseException as ex: warning('Could not lock cache folder {}: {}'.format( self._cache_folder, ex)) # failed locking do nothing return True keys = sorted(list(set(keys) | set(cached_keys))) dst = cached_entry.parent / '.'.join(keys) # rename try: shutil.move(src=cached_entry.as_posix(), dst=dst.as_posix()) except BaseException as ex: warning('Could not rename cache entry {} to {}: ex'.format( cached_entry.as_posix(), dst.as_posix(), ex)) # release lock self._lock.release() return True # make sure we remove old entries self._remove_old_entries() # if we do not have enough free space, do nothing. if not self._check_min_free_space(): warning( 'Could not add cache entry, not enough free space on drive, ' 'free space threshold {} GB. Clearing all cache entries!'. format(self._min_free_space_gb)) self._remove_old_entries(max_cache_entries=0) return False # create the new entry for us exclude_sub_folders = exclude_sub_folders or [] source_folder = Path(source_folder).absolute() # create temp folder temp_folder = \ self._temp_entry_prefix + \ '{}.{}'.format(str(time()).replace('.', '_'), str(random()).replace('.', '_')) temp_folder = self._cache_folder / temp_folder temp_folder.mkdir(parents=True, exist_ok=False) for f in source_folder.glob('*'): if f.name in exclude_sub_folders: continue shutil.copytree(src=f.as_posix(), dst=(temp_folder / f.name).as_posix(), symlinks=True) # rename the target folder target_cache_folder = self._cache_folder / '.'.join(keys) # if we failed moving it means someone else created the cached entry before us, we can just leave # noinspection PyBroadException try: shutil.move(src=temp_folder.as_posix(), dst=target_cache_folder.as_posix()) except BaseException: # noinspection PyBroadException try: shutil.rmtree(path=temp_folder.as_posix()) except BaseException: return False return True def get_last_copied_entry(self): # type: () -> Optional[Path] """ :return: the last copied cached entry folder inside the cache """ return self._last_copied_entry_folder def _remove_old_entries(self, max_cache_entries=None): # type: (Optional[int]) -> () """ Notice we only keep self._max_cache_entries-1, assuming we will be adding a new entry soon :param int max_cache_entries: if not None use instead of self._max_cache_entries """ folder_entries = [ (cache_folder, cache_folder.stat().st_mtime) for cache_folder in self._cache_folder.glob('*') if cache_folder.is_dir() and not cache_folder.name.startswith(self._temp_entry_prefix) ] folder_entries = sorted(folder_entries, key=lambda x: x[1], reverse=True) # lock so we make sure no one deletes it before we copy it # noinspection PyBroadException try: self._lock.acquire(timeout=self._lock_timeout_seconds) except BaseException as ex: warning('Could not lock cache folder {}: {}'.format( self._cache_folder, ex)) return number_of_entries_to_keep = self._max_cache_entries - 1 \ if max_cache_entries is None else max(0, int(max_cache_entries)) for folder, ts in folder_entries[number_of_entries_to_keep:]: try: shutil.rmtree(folder.as_posix(), ignore_errors=True) except BaseException as ex: warning('Could not delete cache entry {}: {}'.format( folder.as_posix(), ex)) self._lock.release() def _check_min_free_space(self): # type: () -> bool """ :return: return False if we hit the free space limit. If not free space limit provided, always return True """ if not self._min_free_space_gb or not self._cache_folder: return True free_space = float( psutil.disk_usage(self._cache_folder.as_posix()).free) free_space /= 2**30 return free_space > self._min_free_space_gb
def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False): if not Session.check_min_api_version('2.3'): LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, ' 'please upgrade to the latest server version') return False if name in self._artifacts_dict: raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name)) artifact_type_data = tasks.ArtifactTypeData() use_filename_in_uri = True if np and isinstance(artifact_object, np.ndarray): artifact_type = 'numpy' artifact_type_data.content_type = 'application/numpy' artifact_type_data.preview = str(artifact_object.__repr__()) fd, local_filename = mkstemp(suffix='.npz') os.close(fd) np.savez_compressed(local_filename, **{name: artifact_object}) delete_after_upload = True use_filename_in_uri = False elif pd and isinstance(artifact_object, pd.DataFrame): artifact_type = 'pandas' artifact_type_data.content_type = 'text/csv' artifact_type_data.preview = str(artifact_object.__repr__()) fd, local_filename = mkstemp(suffix=self._save_format) os.close(fd) artifact_object.to_csv(local_filename, compression=self._compression) delete_after_upload = True use_filename_in_uri = False elif isinstance(artifact_object, Image.Image): artifact_type = 'image' artifact_type_data.content_type = 'image/png' desc = str(artifact_object.__repr__()) artifact_type_data.preview = desc[1:desc.find(' at ')] fd, local_filename = mkstemp(suffix='.png') os.close(fd) artifact_object.save(local_filename) delete_after_upload = True use_filename_in_uri = False elif isinstance(artifact_object, dict): artifact_type = 'JSON' artifact_type_data.content_type = 'application/json' preview = json.dumps(artifact_object, sort_keys=True, indent=4) fd, local_filename = mkstemp(suffix='.json') os.write(fd, bytes(preview.encode())) os.close(fd) artifact_type_data.preview = preview delete_after_upload = True use_filename_in_uri = False elif isinstance(artifact_object, six.string_types) or isinstance(artifact_object, Path): if isinstance(artifact_object, Path): artifact_object = artifact_object.as_posix() artifact_type = 'custom' artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] local_filename = artifact_object else: raise ValueError("Artifact type {} not supported".format(type(artifact_object))) # remove from existing list, if exists for artifact in self._task_artifact_list: if artifact.key == name: if artifact.type == self._pd_artifact_type: raise ValueError("Artifact of name {} already registered, " "use register_artifact instead".format(name)) self._task_artifact_list.remove(artifact) break # check that the file to upload exists local_filename = Path(local_filename).absolute() if not local_filename.exists() or not local_filename.is_file(): LoggerRoot.get_base_logger().warning('Artifact upload failed, cannot find file {}'.format( local_filename.as_posix())) return False file_hash, _ = self.sha256sum(local_filename.as_posix()) timestamp = int(time()) file_size = local_filename.stat().st_size uri = self._upload_local_file(local_filename, name, delete_after_upload=delete_after_upload, use_filename=use_filename_in_uri) artifact = tasks.Artifact(key=name, type=artifact_type, uri=uri, content_size=file_size, hash=file_hash, timestamp=timestamp, type_data=artifact_type_data, display_data=[(str(k), str(v)) for k, v in metadata.items()] if metadata else None) # update task artifacts with self._task_edit_lock: self._task_artifact_list.append(artifact) self._task.set_artifacts(self._task_artifact_list) return True
def _extract_to_cache(cls, cached_file, name): """ Extract cached file zip file to cache folder :param str cached_file: local copy of archive file :param str name: cache context :return: cached folder containing the extracted archive content """ # only zip files if not cached_file or not str(cached_file).lower().endswith('.zip'): return cached_file cached_folder = Path(cached_file).parent archive_suffix = cached_file.rpartition(".")[0] name = encode_string_to_filename(name) target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name)) if target_folder.exists(): # noinspection PyBroadException try: target_folder.touch(exist_ok=True) return target_folder except Exception: pass base_logger = LoggerRoot.get_base_logger() try: temp_target_folder = cached_folder / "{0}_{1}_{2}".format( target_folder.name, time() * 1000, str(random()).replace('.', '')) temp_target_folder.mkdir(parents=True, exist_ok=True) ZipFile(cached_file).extractall(path=temp_target_folder.as_posix()) # we assume we will have such folder if we already extract the zip file # noinspection PyBroadException try: # if rename fails, it means that someone else already manged to extract the zip, delete the current # folder and return the already existing cached zip folder shutil.move(temp_target_folder.as_posix(), target_folder.as_posix()) except Exception: if target_folder.exists(): target_folder.touch(exist_ok=True) else: base_logger.warning( "Failed renaming {0} to {1}".format( temp_target_folder, target_folder ) ) try: shutil.rmtree(temp_target_folder) except Exception as ex: base_logger.warning( "Exception {}\nFailed deleting folder {}".format( ex, temp_target_folder ) ) except Exception as ex: # failed extracting zip file: base_logger.warning( "Exception {}\nFailed extracting zip file {}".format(ex, cached_file) ) # noinspection PyBroadException try: target_folder.rmdir() except Exception: pass return cached_file return target_folder