Example #1
0
def dump_flight_to_kml(flight, kml_filename_local):
    """Dumps the flight to KML format.

    Args:
        flight: an igc_lib.Flight, the flight to be saved
        kml_filename_local: a string, the name of the output file
    """
    assert flight.valid
    kml = simplekml.Kml()

    def add_point(name, fix):
        kml.newpoint(name=name, coords=[(fix.lon, fix.lat)])

    coords = []
    for fix in flight.fixes:
        coords.append((fix.lon, fix.lat))
    kml.newlinestring(coords=coords)

    add_point(name="Takeoff", fix=flight.takeoff_fix)
    add_point(name="Landing", fix=flight.landing_fix)

    for i, thermal in enumerate(flight.thermals):
        add_point(name="thermal_%02d" % i, fix=thermal.enter_fix)
        add_point(name="thermal_%02d_END" % i, fix=thermal.exit_fix)
        kml_filename = Path(kml_filename_local).expanduser().absolute()
    kml.save(kml_filename.as_posix())
Example #2
0
def get_common_path(list_of_files):
    # type: (Sequence[Union[str, Path]]) -> Optional[str]
    """
    Return the common path of a list of files

    :param list_of_files: list of files (str or Path objects)
    :return: Common path string (always absolute) or None if common path could not be found
    """
    if not list_of_files:
        return None

    # a single file has its parent as common path
    if len(list_of_files) == 1:
        return Path(list_of_files[0]).absolute().parent.as_posix()

    # find common path to support folder structure inside zip
    common_path_parts = Path(list_of_files[0]).absolute().parts
    for f in list_of_files:
        f_parts = Path(f).absolute().parts
        num_p = min(len(f_parts), len(common_path_parts))
        if f_parts[:num_p] == common_path_parts[:num_p]:
            common_path_parts = common_path_parts[:num_p]
            continue
        num_p = min([
            i for i, (a, b) in enumerate(
                zip(common_path_parts[:num_p], f_parts[:num_p])) if a != b
        ] or [-1])
        # no common path, break
        if num_p < 0:
            common_path_parts = []
            break
        # update common path
        common_path_parts = common_path_parts[:num_p]

    if common_path_parts:
        common_path = Path()
        for f in common_path_parts:
            common_path /= f
        return common_path.as_posix()

    return None
Example #3
0
    def create_cache_folders(self, slot_index=0):
        """
        create and update the cache folders
        notice we support multiple instances sharing the same cache on some folders
        and on some we use "instance slot" numbers in order to differentiate between the different instances running
        notice slot_index=0 is the default, meaning no suffix is added to the singleton_folders

        Note: do not call this function twice with non zero slot_index
            it will add a suffix to the folders on each call

        :param slot_index: integer
        """

        # create target folders:
        folder_keys = ('agent.venvs_dir', 'agent.vcs_cache.path',
                       'agent.pip_download_cache.path',
                       'agent.docker_pip_cache', 'agent.docker_apt_cache')
        singleton_folders = (
            'agent.venvs_dir',
            'agent.vcs_cache.path',
        )

        for key in folder_keys:
            folder_key = ConfigValue(self.config, key)
            if not folder_key.get():
                continue

            if slot_index and key in singleton_folders:
                f = folder_key.get()
                if f.endswith(os.path.sep):
                    f = f[:-1]
                folder_key.set(f + '.{}'.format(slot_index))

            # update the configuration for full path
            folder = Path(os.path.expandvars(
                folder_key.get())).expanduser().absolute()
            folder_key.set(folder.as_posix())
            try:
                folder.mkdir(parents=True, exist_ok=True)
            except:
                pass
Example #4
0
    def _upload_local_file(self,
                           local_file,
                           name,
                           delete_after_upload=False,
                           override_filename=None,
                           override_filename_ext=None,
                           wait_on_upload=False):
        # type: (str, str, bool, Optional[str], Optional[str], Optional[bool]) -> str
        """
        Upload local file and return uri of the uploaded file (uploading in the background)
        """
        from trains.storage import StorageManager

        upload_uri = self._task.output_uri or self._task.get_logger(
        ).get_default_upload_destination()
        if not isinstance(local_file, Path):
            local_file = Path(local_file)
        ev = UploadEvent(
            metric='artifacts',
            variant=name,
            image_data=None,
            upload_uri=upload_uri,
            local_image_path=local_file.as_posix(),
            delete_after_upload=delete_after_upload,
            override_filename=override_filename,
            override_filename_ext=override_filename_ext,
            override_storage_key_prefix=self._get_storage_uri_prefix())
        _, uri = ev.get_target_full_upload_uri(upload_uri, quote_uri=False)

        # send for upload
        # noinspection PyProtectedMember
        if wait_on_upload:
            StorageManager.upload_file(local_file, uri)
        else:
            self._task._reporter._report(ev)

        _, quoted_uri = ev.get_target_full_upload_uri(upload_uri)

        return quoted_uri
Example #5
0
    def _upload_local_file(self, local_file, name, delete_after_upload=False,
                           override_filename=None,
                           override_filename_ext=None):
        """
        Upload local file and return uri of the uploaded file (uploading in the background)
        """
        upload_uri = self._task.output_uri or self._task.get_logger().get_default_upload_destination()
        if not isinstance(local_file, Path):
            local_file = Path(local_file)
        ev = UploadEvent(metric='artifacts', variant=name,
                         image_data=None, upload_uri=upload_uri,
                         local_image_path=local_file.as_posix(),
                         delete_after_upload=delete_after_upload,
                         override_filename=override_filename,
                         override_filename_ext=override_filename_ext,
                         override_storage_key_prefix=self._get_storage_uri_prefix())
        _, uri = ev.get_target_full_upload_uri(upload_uri)

        # send for upload
        self._task.reporter._report(ev)

        return uri
Example #6
0
    def get_local_copy(
        cls,
        remote_url,
        cache_context=None,
        extract_archive=True,
        name=None,
        force_download=False,
    ):
        # type: (str, Optional[str], bool, Optional[str], bool) -> str
        """
        Get a local copy of the remote file. If the remote URL is a direct file access,
        the returned link is the same, otherwise a link to a local copy of the url file is returned.
        Caching is enabled by default, cache limited by number of stored files per cache context.
        Oldest accessed files are deleted when cache is full.

        :param str remote_url: remote url link (string)
        :param str cache_context: Optional caching context identifier (string), default context 'global'
        :param bool extract_archive: if True returned path will be a cached folder containing the archive's content,
            currently only zip files are supported.
        :param str name: name of the target file
        :param bool force_download: download file from remote even if exists in local cache
        :return: Full path to local copy of the requested url. Return None on Error.
        """
        cache = CacheManager.get_cache_manager(cache_context=cache_context)
        cached_file = cache.get_local_copy(remote_url=remote_url,
                                           force_download=force_download)
        if extract_archive and cached_file:
            # this will get us the actual cache (even with direct access)
            cache_path_encoding = Path(cache.get_cache_folder(
            )) / cache.get_hashed_url_file(remote_url)
            return cls._extract_to_cache(
                cached_file,
                name,
                cache_context,
                cache_path_encoding=cache_path_encoding.as_posix())

        return cached_file
Example #7
0
 def __init__(self, session, base_interpreter=None):
     # type: (Session, PathLike) -> ()
     self._session = session
     self.config = deepcopy(session.config)  # type: ConfigTree
     self.handlers = []  # type: List[RequirementSubstitution]
     agent = self.config['agent']
     self.active = not agent.get('cpu_only', False)
     self.found_cuda = False
     if self.active:
         try:
             agent['cuda_version'], agent[
                 'cudnn_version'] = self.get_cuda_version(self.config)
             self.found_cuda = True
         except Exception:
             # if we have a cuda version, it is good enough (we dont have to have cudnn version)
             if agent.get('cuda_version'):
                 self.found_cuda = True
     pip_cache_dir = Path(
         self.config["agent.pip_download_cache.path"]).expanduser() / (
             'cu' + agent['cuda_version'] if self.found_cuda else 'cpu')
     self.translator = RequirementsTranslator(
         session,
         interpreter=base_interpreter,
         cache_dir=pip_cache_dir.as_posix())
Example #8
0
 def report_offline_session(cls, task, folder):
     from ... import StorageManager
     filename = Path(folder) / cls.__offline_filename
     if not filename.is_file():
         return False
     # noinspection PyProtectedMember
     remote_url = task._get_default_report_storage_uri()
     if remote_url and remote_url.endswith('/'):
         remote_url = remote_url[:-1]
     uploaded_files = set()
     task_id = task.id
     with open(filename.as_posix(), 'rt') as f:
         i = 0
         while True:
             try:
                 line = f.readline()
                 if not line:
                     break
                 list_requests = json.loads(line)
                 for r in list_requests:
                     org_task_id = r['task']
                     r['task'] = task_id
                     if r.get('key') and r.get('url'):
                         debug_sample = (Path(folder) / 'data').joinpath(
                             *(r['key'].split('/')))
                         r['key'] = r['key'].replace(
                             '.{}{}'.format(org_task_id, os.sep),
                             '.{}{}'.format(task_id, os.sep), 1)
                         r['url'] = '{}/{}'.format(remote_url, r['key'])
                         if debug_sample not in uploaded_files and debug_sample.is_file(
                         ):
                             uploaded_files.add(debug_sample)
                             StorageManager.upload_file(
                                 local_file=debug_sample.as_posix(),
                                 remote_url=r['url'])
                     elif r.get('plot_str'):
                         # hack plotly embedded images links
                         # noinspection PyBroadException
                         try:
                             task_id_sep = '.{}{}'.format(
                                 org_task_id, os.sep)
                             plot = json.loads(r['plot_str'])
                             if plot.get('layout', {}).get('images'):
                                 for image in plot['layout']['images']:
                                     if task_id_sep not in image['source']:
                                         continue
                                     pre, post = image['source'].split(
                                         task_id_sep, 1)
                                     pre = os.sep.join(
                                         pre.split(os.sep)[-2:])
                                     debug_sample = (
                                         Path(folder) / 'data').joinpath(
                                             pre +
                                             '.{}'.format(org_task_id),
                                             post)
                                     image['source'] = '/'.join([
                                         remote_url,
                                         pre + '.{}'.format(task_id), post
                                     ])
                                     if debug_sample not in uploaded_files and debug_sample.is_file(
                                     ):
                                         uploaded_files.add(debug_sample)
                                         StorageManager.upload_file(
                                             local_file=debug_sample.
                                             as_posix(),
                                             remote_url=image['source'])
                             r['plot_str'] = json.dumps(plot)
                         except Exception:
                             pass
                 i += 1
             except StopIteration:
                 break
             except Exception as ex:
                 warning('Failed reporting metric, line {} [{}]'.format(
                     i, ex))
             batch_requests = api_events.AddBatchRequest(
                 requests=list_requests)
             if batch_requests.requests:
                 res = task.session.send(batch_requests)
                 if res and not res.ok():
                     warning(
                         "failed logging metric task to backend ({:d} lines, {})"
                         .format(len(batch_requests.requests),
                                 str(res.meta)))
     return True
Example #9
0
    def _extract_to_cache(cls,
                          cached_file,
                          name,
                          cache_context=None,
                          target_folder=None):
        # type: (str, str, Optional[str], Optional[str]) -> str
        """
        Extract cached file to cache folder
        :param str cached_file: local copy of archive file
        :param str name: name of the target file
        :param str cache_context: cache context id
        :param str target_folder: specify target path to use for archive extraction
        :return: cached folder containing the extracted archive content
        """
        if not cached_file:
            return cached_file

        cached_file = Path(cached_file)

        # we support zip and tar.gz files auto-extraction
        suffix = cached_file.suffix.lower()
        if suffix == '.gz':
            suffix = ''.join(a.lower() for a in cached_file.suffixes[-2:])

        if suffix not in (".zip", ".tgz", ".tar.gz"):
            return str(cached_file)

        cached_folder = Path(cached_file).parent
        archive_suffix = cached_file.name[:-len(suffix)]
        name = encode_string_to_filename(name)
        target_folder = Path(
            target_folder
            or CacheManager.get_context_folder_lookup(cache_context).format(
                archive_suffix, name))

        if target_folder.exists():
            # noinspection PyBroadException
            try:
                target_folder.touch(exist_ok=True)
                return target_folder.as_posix()
            except Exception:
                pass

        base_logger = LoggerRoot.get_base_logger()
        try:
            temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
                target_folder.name,
                time() * 1000,
                str(random()).replace('.', ''))
            temp_target_folder.mkdir(parents=True, exist_ok=True)
            if suffix == ".zip":
                ZipFile(cached_file.as_posix()).extractall(
                    path=temp_target_folder.as_posix())
            elif suffix == ".tar.gz":
                with tarfile.open(cached_file.as_posix()) as file:
                    file.extractall(temp_target_folder.as_posix())
            elif suffix == ".tgz":
                with tarfile.open(cached_file.as_posix(), mode='r:gz') as file:
                    file.extractall(temp_target_folder.as_posix())

            # we assume we will have such folder if we already extract the file
            # noinspection PyBroadException
            try:
                # if rename fails, it means that someone else already manged to extract the file, delete the current
                # folder and return the already existing cached zip folder
                shutil.move(temp_target_folder.as_posix(),
                            target_folder.as_posix())
            except Exception:
                if target_folder.exists():
                    target_folder.touch(exist_ok=True)
                else:
                    base_logger.warning("Failed renaming {0} to {1}".format(
                        temp_target_folder.as_posix(),
                        target_folder.as_posix()))
                try:
                    shutil.rmtree(temp_target_folder.as_posix())
                except Exception as ex:
                    base_logger.warning(
                        "Exception {}\nFailed deleting folder {}".format(
                            ex, temp_target_folder.as_posix()))
        except Exception as ex:
            # failed extracting the file:
            base_logger.warning(
                "Exception {}\nFailed extracting zip file {}".format(
                    ex, cached_file.as_posix()))
            # noinspection PyBroadException
            try:
                target_folder.rmdir()
            except Exception:
                pass
            return cached_file.as_posix()
        return target_folder.as_posix()
Example #10
0
    def _get_jupyter_notebook_filename(cls):
        # check if we are running in vscode, we have the jupyter notebook defined:
        if 'IPython' in sys.modules:
            # noinspection PyBroadException
            try:
                from IPython import get_ipython  # noqa
                ip = get_ipython()
                # vscode-jupyter PR #8531 added this variable
                local_ipynb_file = ip.__dict__.get('user_ns', {}).get('__vsc_ipynb_file__') if ip else None
                if local_ipynb_file:
                    # now replace the .ipynb with .py
                    # we assume we will have that file available for monitoring
                    local_ipynb_file = Path(local_ipynb_file)
                    script_entry_point = local_ipynb_file.with_suffix('.py').as_posix()

                    # install the post store hook,
                    # notice that if we do not have a local file we serialize/write every time the entire notebook
                    cls._jupyter_install_post_store_hook(local_ipynb_file.as_posix(), log_history=False)

                    return script_entry_point
            except Exception:
                pass

        if not (sys.argv[0].endswith(os.path.sep + 'ipykernel_launcher.py') or
                sys.argv[0].endswith(os.path.join(os.path.sep, 'ipykernel', '__main__.py'))) \
                or len(sys.argv) < 3 or not sys.argv[2].endswith('.json'):
            return None

        server_info = None

        # we can safely assume that we can import the notebook package here
        # noinspection PyBroadException
        try:
            # noinspection PyPackageRequirements
            from notebook.notebookapp import list_running_servers
            import requests
            current_kernel = sys.argv[2].split(os.path.sep)[-1].replace('kernel-', '').replace('.json', '')
            # noinspection PyBroadException
            try:
                server_info = next(list_running_servers())
            except Exception:
                # on some jupyter notebook versions this function can crash on parsing the json file,
                # we will parse it manually here
                # noinspection PyPackageRequirements
                import ipykernel
                from glob import glob
                import json
                for f in glob(os.path.join(os.path.dirname(ipykernel.get_connection_file()), '??server-*.json')):
                    # noinspection PyBroadException
                    try:
                        with open(f, 'r') as json_data:
                            server_info = json.load(json_data)
                    except Exception:
                        server_info = None
                    if server_info:
                        break

            cookies = None
            password = None
            if server_info and server_info.get('password'):
                # we need to get the password
                from ....config import config
                password = config.get('development.jupyter_server_password', '')
                if not password:
                    cls._get_logger().warning(
                        'Password protected Jupyter Notebook server was found! '
                        'Add `sdk.development.jupyter_server_password=<jupyter_password>` to ~/clearml.conf')
                    return os.path.join(os.getcwd(), 'error_notebook_not_found.py')

                r = requests.get(url=server_info['url'] + 'login')
                cookies = {'_xsrf': r.cookies.get('_xsrf', '')}
                r = requests.post(server_info['url'] + 'login?next', cookies=cookies,
                                  data={'_xsrf': cookies['_xsrf'], 'password': password})
                cookies.update(r.cookies)

            auth_token = server_info.get('token') or os.getenv('JUPYTERHUB_API_TOKEN') or ''
            try:
                r = requests.get(
                    url=server_info['url'] + 'api/sessions', cookies=cookies,
                    headers={'Authorization': 'token {}'.format(auth_token), })
            except requests.exceptions.SSLError:
                # disable SSL check warning
                from urllib3.exceptions import InsecureRequestWarning
                # noinspection PyUnresolvedReferences
                requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
                # fire request
                r = requests.get(
                    url=server_info['url'] + 'api/sessions', cookies=cookies,
                    headers={'Authorization': 'token {}'.format(auth_token), }, verify=False)
                # enable SSL check warning
                import warnings
                warnings.simplefilter('default', InsecureRequestWarning)

            # send request to the jupyter server
            try:
                r.raise_for_status()
            except Exception as ex:
                cls._get_logger().warning('Failed accessing the jupyter server{}: {}'.format(
                    ' [password={}]'.format(password) if server_info.get('password') else '', ex))
                return os.path.join(os.getcwd(), 'error_notebook_not_found.py')

            notebooks = r.json()

            cur_notebook = None
            for n in notebooks:
                if n['kernel']['id'] == current_kernel:
                    cur_notebook = n
                    break

            notebook_path = cur_notebook['notebook'].get('path', '')
            notebook_name = cur_notebook['notebook'].get('name', '')

            is_google_colab = False
            # check if this is google.colab, then there is no local file
            # noinspection PyBroadException
            try:
                # noinspection PyPackageRequirements
                from IPython import get_ipython
                if get_ipython() and 'google.colab' in get_ipython().extension_manager.loaded:
                    is_google_colab = True
            except Exception:
                pass

            if is_google_colab:
                script_entry_point = str(notebook_name or 'notebook').replace(
                    '>', '_').replace('<', '_').replace('.ipynb', '.py')
                if not script_entry_point.lower().endswith('.py'):
                    script_entry_point += '.py'
                local_ipynb_file = None
            else:
                # always slash, because this is from uri (so never backslash not even on windows)
                entry_point_filename = notebook_path.split('/')[-1]

                # now we should try to find the actual file
                entry_point = (Path.cwd() / entry_point_filename).absolute()
                if not entry_point.is_file():
                    entry_point = (Path.cwd() / notebook_path).absolute()

                # fix for VSCode pushing uuid at the end of the notebook name.
                if not entry_point.exists():
                    # noinspection PyBroadException
                    try:
                        alternative_entry_point = '-'.join(entry_point_filename.split('-')[:-5])+'.ipynb'
                        # now we should try to find the actual file
                        entry_point_alternative = (Path.cwd() / alternative_entry_point).absolute()
                        if not entry_point_alternative.is_file():
                            entry_point_alternative = (Path.cwd() / alternative_entry_point).absolute()

                        # If we found it replace it
                        if entry_point_alternative.exists():
                            entry_point = entry_point_alternative
                    except Exception as ex:
                        cls._get_logger().warning('Failed accessing jupyter notebook {}: {}'.format(notebook_path, ex))

                # get local ipynb for observer
                local_ipynb_file = entry_point.as_posix()

                # now replace the .ipynb with .py
                # we assume we will have that file available with the Jupyter notebook plugin
                entry_point = entry_point.with_suffix('.py')

                script_entry_point = entry_point.as_posix()

            # install the post store hook,
            # notice that if we do not have a local file we serialize/write every time the entire notebook
            cls._jupyter_install_post_store_hook(local_ipynb_file, is_google_colab)

            return script_entry_point
        except Exception:
            return None
Example #11
0
    def upload_artifact(self,
                        name,
                        artifact_object=None,
                        metadata=None,
                        preview=None,
                        delete_after_upload=False,
                        auto_pickle=True):
        # type: (str, Optional[object], Optional[dict], Optional[str], bool, bool) -> bool
        if not Session.check_min_api_version('2.3'):
            LoggerRoot.get_base_logger().warning(
                'Artifacts not supported by your TRAINS-server version, '
                'please upgrade to the latest server version')
            return False

        if name in self._artifacts_container:
            raise ValueError(
                "Artifact by the name of {} is already registered, use register_artifact"
                .format(name))

        # cast preview to string
        if preview:
            preview = str(preview)

        # convert string to object if try is a file/folder (dont try to serialize long texts
        if isinstance(artifact_object,
                      six.string_types) and len(artifact_object) < 2048:
            # noinspection PyBroadException
            try:
                artifact_path = Path(artifact_object)
                if artifact_path.exists():
                    artifact_object = artifact_path
                elif '*' in artifact_object or '?' in artifact_object:
                    # hackish, detect wildcard in tr files
                    folder = Path('').joinpath(*artifact_path.parts[:-1])
                    if folder.is_dir() and folder.parts:
                        wildcard = artifact_path.parts[-1]
                        if list(Path(folder).rglob(wildcard)):
                            artifact_object = artifact_path
            except Exception:
                pass

        artifact_type_data = tasks.ArtifactTypeData()
        artifact_type_data.preview = ''
        override_filename_in_uri = None
        override_filename_ext_in_uri = None
        uri = None
        if np and isinstance(artifact_object, np.ndarray):
            artifact_type = 'numpy'
            artifact_type_data.content_type = 'application/numpy'
            artifact_type_data.preview = preview or str(
                artifact_object.__repr__())
            override_filename_ext_in_uri = '.npz'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            np.savez_compressed(local_filename, **{name: artifact_object})
            delete_after_upload = True
        elif pd and isinstance(artifact_object, pd.DataFrame):
            artifact_type = 'pandas'
            artifact_type_data.content_type = 'text/csv'
            artifact_type_data.preview = preview or str(
                artifact_object.__repr__())
            override_filename_ext_in_uri = self._save_format
            override_filename_in_uri = name
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.to_csv(local_filename,
                                   compression=self._compression)
            delete_after_upload = True
        elif isinstance(artifact_object, Image.Image):
            artifact_type = 'image'
            artifact_type_data.content_type = 'image/png'
            desc = str(artifact_object.__repr__())
            artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
            override_filename_ext_in_uri = '.png'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.save(local_filename)
            delete_after_upload = True
        elif isinstance(artifact_object, dict):
            artifact_type = 'JSON'
            artifact_type_data.content_type = 'application/json'
            preview = preview or json.dumps(
                artifact_object, sort_keys=True, indent=4)
            override_filename_ext_in_uri = '.json'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.write(fd, bytes(preview.encode()))
            os.close(fd)
            if len(preview) < self.max_preview_size_bytes:
                artifact_type_data.preview = preview
            else:
                artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
                    self.max_preview_size_bytes // 1024,
                    preview[:self.max_preview_size_bytes])

            delete_after_upload = True
        elif isinstance(artifact_object, (
                Path,
                pathlib_Path,
        ) if pathlib_Path is not None else (Path, )):
            # check if single file
            artifact_object = Path(artifact_object)

            artifact_object.expanduser().absolute()
            # noinspection PyBroadException
            try:
                create_zip_file = not artifact_object.is_file()
            except Exception:  # Hack for windows pathlib2 bug, is_file isn't valid.
                create_zip_file = True
            else:  # We assume that this is not Windows os
                if artifact_object.is_dir():
                    # change to wildcard
                    artifact_object /= '*'

            if create_zip_file:
                folder = Path('').joinpath(*artifact_object.parts[:-1])
                if not folder.is_dir() or not folder.parts:
                    raise ValueError(
                        "Artifact file/folder '{}' could not be found".format(
                            artifact_object.as_posix()))

                wildcard = artifact_object.parts[-1]
                files = list(Path(folder).rglob(wildcard))
                override_filename_ext_in_uri = '.zip'
                override_filename_in_uri = folder.parts[
                    -1] + override_filename_ext_in_uri
                fd, zip_file = mkstemp(
                    prefix=quote(folder.parts[-1], safe="") + '.',
                    suffix=override_filename_ext_in_uri)
                try:
                    artifact_type_data.content_type = 'application/zip'
                    archive_preview = 'Archive content {}:\n'.format(
                        artifact_object.as_posix())

                    with ZipFile(zip_file,
                                 'w',
                                 allowZip64=True,
                                 compression=ZIP_DEFLATED) as zf:
                        for filename in sorted(files):
                            if filename.is_file():
                                relative_file_name = filename.relative_to(
                                    folder).as_posix()
                                archive_preview += '{} - {}\n'.format(
                                    relative_file_name,
                                    humanfriendly.format_size(
                                        filename.stat().st_size))
                                zf.write(filename.as_posix(),
                                         arcname=relative_file_name)
                except Exception as e:
                    # failed uploading folder:
                    LoggerRoot.get_base_logger().warning(
                        'Exception {}\nFailed zipping artifact folder {}'.
                        format(folder, e))
                    return False
                finally:
                    os.close(fd)
                artifact_type_data.preview = preview or archive_preview
                artifact_object = zip_file
                artifact_type = 'archive'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
                delete_after_upload = True
            else:
                if not artifact_object.is_file():
                    raise ValueError(
                        "Artifact file '{}' could not be found".format(
                            artifact_object.as_posix()))

                override_filename_in_uri = artifact_object.parts[-1]
                artifact_type_data.preview = preview or '{} - {}\n'.format(
                    artifact_object,
                    humanfriendly.format_size(artifact_object.stat().st_size))
                artifact_object = artifact_object.as_posix()
                artifact_type = 'custom'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
        elif (isinstance(artifact_object, six.string_types)
              and len(artifact_object) < 4096
              and urlparse(artifact_object).scheme in remote_driver_schemes):
            # we should not upload this, just register
            local_filename = None
            uri = artifact_object
            artifact_type = 'custom'
            artifact_type_data.content_type = mimetypes.guess_type(
                artifact_object)[0]
        elif isinstance(artifact_object, six.string_types):
            # if we got here, we should store it as text file.
            artifact_type = 'string'
            artifact_type_data.content_type = 'text/plain'
            if preview:
                artifact_type_data.preview = preview
            elif len(artifact_object) < self.max_preview_size_bytes:
                artifact_type_data.preview = artifact_object
            else:
                artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format(
                    self.max_preview_size_bytes // 1024,
                    artifact_object[:self.max_preview_size_bytes])
            delete_after_upload = True
            override_filename_ext_in_uri = '.txt'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            # noinspection PyBroadException
            try:
                with open(local_filename, 'wt') as f:
                    f.write(artifact_object)
            except Exception:
                # cleanup and raise exception
                os.unlink(local_filename)
                raise
        elif auto_pickle:
            # if we are here it means we do not know what to do with the object, so we serialize it with pickle.
            artifact_type = 'pickle'
            artifact_type_data.content_type = 'application/pickle'
            # noinspection PyBroadException
            try:
                artifact_type_data.preview = preview or str(
                    artifact_object.__repr__())[:self.max_preview_size_bytes]
            except Exception:
                artifact_type_data.preview = preview or ''
            delete_after_upload = True
            override_filename_ext_in_uri = '.pkl'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            # noinspection PyBroadException
            try:
                with open(local_filename, 'wb') as f:
                    pickle.dump(artifact_object, f)
            except Exception:
                # cleanup and raise exception
                os.unlink(local_filename)
                raise
        else:
            raise ValueError("Artifact type {} not supported".format(
                type(artifact_object)))

        # remove from existing list, if exists
        for artifact in self._task_artifact_list:
            if artifact.key == name:
                if artifact.type == self._pd_artifact_type:
                    raise ValueError(
                        "Artifact of name {} already registered, "
                        "use register_artifact instead".format(name))

                self._task_artifact_list.remove(artifact)
                break

        if not local_filename:
            file_size = None
            file_hash = None
        else:
            # check that the file to upload exists
            local_filename = Path(local_filename).absolute()
            if not local_filename.exists() or not local_filename.is_file():
                LoggerRoot.get_base_logger().warning(
                    'Artifact upload failed, cannot find file {}'.format(
                        local_filename.as_posix()))
                return False

            file_hash, _ = self.sha256sum(local_filename.as_posix())
            file_size = local_filename.stat().st_size

            uri = self._upload_local_file(
                local_filename,
                name,
                delete_after_upload=delete_after_upload,
                override_filename=override_filename_in_uri,
                override_filename_ext=override_filename_ext_in_uri)

        timestamp = int(time())

        artifact = tasks.Artifact(
            key=name,
            type=artifact_type,
            uri=uri,
            content_size=file_size,
            hash=file_hash,
            timestamp=timestamp,
            type_data=artifact_type_data,
            display_data=[(str(k), str(v))
                          for k, v in metadata.items()] if metadata else None)

        # update task artifacts
        with self._task_edit_lock:
            self._task_artifact_list.append(artifact)
            self._task.set_artifacts(self._task_artifact_list)

        return True
Example #12
0
    def _make_file_info(target: pathlib.Path,
                        arcname: Optional[str] = None,
                        dereference=False) -> Dict[str, Any]:
        f = {}  # type: Dict[str, Any]
        f['origin'] = target
        if arcname is not None:
            f['filename'] = pathlib.Path(arcname).as_posix()
        else:
            f['filename'] = target.as_posix()
        if os.name == 'nt':
            fstat = target.lstat()
            if target.is_symlink():
                if dereference:
                    fstat = target.stat()
                    if stat.S_ISDIR(fstat.st_mode):
                        f['emptystream'] = True
                        f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK  # type: ignore  # noqa
                    else:
                        f['emptystream'] = False
                        f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE  # type: ignore  # noqa
                        f['uncompressed'] = fstat.st_size
                else:
                    f['emptystream'] = False
                    f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK  # type: ignore  # noqa
                    # f['attributes'] |= stat.FILE_ATTRIBUTE_REPARSE_POINT  # type: ignore  # noqa
            elif target.is_dir():
                f['emptystream'] = True
                f['attributes'] = fstat.st_file_attributes & FILE_ATTRIBUTE_WINDOWS_MASK  # type: ignore  # noqa
            elif target.is_file():
                f['emptystream'] = False
                f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE  # type: ignore  # noqa
                f['uncompressed'] = fstat.st_size
        else:
            fstat = target.lstat()
            if target.is_symlink():
                if dereference:
                    fstat = target.stat()
                    if stat.S_ISDIR(fstat.st_mode):
                        f['emptystream'] = True
                        f['attributes'] = stat.FILE_ATTRIBUTE_DIRECTORY  # type: ignore  # noqa
                        f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | (
                            stat.S_IFDIR << 16)
                        f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16)
                    else:
                        f['emptystream'] = False
                        f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE  # type: ignore  # noqa
                        f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | (
                            stat.S_IMODE(fstat.st_mode) << 16)
                else:
                    f['emptystream'] = False
                    f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE | stat.FILE_ATTRIBUTE_REPARSE_POINT  # type: ignore  # noqa
                    f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | (
                        stat.S_IFLNK << 16)
                    f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16)
            elif target.is_dir():
                f['emptystream'] = True
                f['attributes'] = stat.FILE_ATTRIBUTE_DIRECTORY  # type: ignore  # noqa
                f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | (
                    stat.S_IFDIR << 16)
                f['attributes'] |= (stat.S_IMODE(fstat.st_mode) << 16)
            elif target.is_file():
                f['emptystream'] = False
                f['uncompressed'] = fstat.st_size
                f['attributes'] = stat.FILE_ATTRIBUTE_ARCHIVE  # type: ignore  # noqa
                f['attributes'] |= FILE_ATTRIBUTE_UNIX_EXTENSION | (
                    stat.S_IMODE(fstat.st_mode) << 16)

        f['creationtime'] = fstat.st_ctime
        f['lastwritetime'] = fstat.st_mtime
        f['lastaccesstime'] = fstat.st_atime
        return f
Example #13
0
            file.rename(referto_path / avis_name)

    browser.back()


if __name__ == '__main__':
    referti_path = Path("Referti")
    download_path = Path(__file__).cwd() / 'temppdfdownloads'
    config_file = Path('config.json')

    with open(config_file.name) as data_file:
        params = json.load(data_file)

    profile = FirefoxProfile()
    profile.set_preference("browser.download.folderList", 2)
    profile.set_preference("browser.download.dir", download_path.as_posix())
    profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                           'application/pdf')
    # disable Firefox's built-in PDF viewer
    profile.set_preference("pdfjs.disabled", True)

    # disable Adobe Acrobat PDF preview plugin
    profile.set_preference("plugin.scan.plid.all", False)
    profile.set_preference("plugin.scan.Acrobat", "99.0")

    browser = webdriver.Firefox(executable_path=params['gekoexecutable'],
                                firefox_profile=profile)
    print('Open')
    browser.get(params['avis_address'])
    print('Open end')
    try:
Example #14
0
    def _daemon(cls, jupyter_notebook_filename):
        from clearml import Task

        # load jupyter notebook package
        # noinspection PyBroadException
        try:
            # noinspection PyPackageRequirements
            from nbconvert.exporters.script import ScriptExporter
            _script_exporter = ScriptExporter()
        except Exception as ex:
            _logger.warning('Could not read Jupyter Notebook: {}'.format(ex))
            return
        # load pigar
        # noinspection PyBroadException
        try:
            from ....utilities.pigar.reqs import get_installed_pkgs_detail, file_import_modules
            from ....utilities.pigar.modules import ReqsModules
            from ....utilities.pigar.log import logger
            logger.setLevel(logging.WARNING)
        except Exception:
            file_import_modules = None
        # load IPython
        # noinspection PyBroadException
        try:
            # noinspection PyPackageRequirements
            from IPython import get_ipython
        except Exception:
            # should not happen
            get_ipython = None

        # setup local notebook files
        if jupyter_notebook_filename:
            notebook = Path(jupyter_notebook_filename)
            local_jupyter_filename = jupyter_notebook_filename
        else:
            notebook = None
            fd, local_jupyter_filename = mkstemp(suffix='.ipynb')
            os.close(fd)
        last_update_ts = None
        counter = 0
        prev_script_hash = None

        # noinspection PyBroadException
        try:
            from ....version import __version__
            our_module = cls.__module__.split('.')[0], __version__
        except Exception:
            our_module = None

        # noinspection PyBroadException
        try:
            import re
            replace_ipython_pattern = re.compile(r'\n([ \t]*)get_ipython\(\)')
        except Exception:
            replace_ipython_pattern = None

        # main observer loop, check if we need to exit
        while not cls._exit_event.wait(timeout=0.):
            # wait for timeout or sync event
            cls._sync_event.wait(cls._sample_frequency if counter else cls._first_sample_frequency)

            cls._sync_event.clear()
            counter += 1
            # noinspection PyBroadException
            try:
                # if there is no task connected, do nothing
                task = Task.current_task()
                if not task:
                    continue

                script_code = None
                fmodules = None
                current_cell = None
                # if we have a local file:
                if notebook:
                    if not notebook.exists():
                        continue
                    # check if notebook changed
                    if last_update_ts is not None and notebook.stat().st_mtime - last_update_ts <= 0:
                        continue
                    last_update_ts = notebook.stat().st_mtime
                else:
                    # serialize notebook to a temp file
                    if cls._jupyter_history_logger:
                        script_code, current_cell = cls._jupyter_history_logger.history_to_str()
                    else:
                        # noinspection PyBroadException
                        try:
                            # noinspection PyBroadException
                            try:
                                os.unlink(local_jupyter_filename)
                            except Exception:
                                pass
                            get_ipython().run_line_magic('history', '-t -f {}'.format(local_jupyter_filename))
                            with open(local_jupyter_filename, 'r') as f:
                                script_code = f.read()
                            # load the modules
                            from ....utilities.pigar.modules import ImportedModules
                            fmodules = ImportedModules()
                            for nm in set([str(m).split('.')[0] for m in sys.modules]):
                                fmodules.add(nm, 'notebook', 0)
                        except Exception:
                            continue

                # get notebook python script
                if script_code is None and local_jupyter_filename:
                    script_code, _ = _script_exporter.from_filename(local_jupyter_filename)
                    if cls._store_notebook_artifact:
                        # also upload the jupyter notebook as artifact
                        task.upload_artifact(
                            name='notebook',
                            artifact_object=Path(local_jupyter_filename),
                            preview='See `notebook preview` artifact',
                            metadata={'UPDATE': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')},
                            wait_on_upload=True,
                        )
                        # noinspection PyBroadException
                        try:
                            from nbconvert.exporters import HTMLExporter  # noqa
                            html, _ = HTMLExporter().from_filename(filename=local_jupyter_filename)
                            local_html = Path(gettempdir()) / 'notebook_{}.html'.format(task.id)
                            with open(local_html.as_posix(), 'wt') as f:
                                f.write(html)
                            task.upload_artifact(
                                name='notebook preview', artifact_object=local_html,
                                preview='Click `FILE PATH` link',
                                metadata={'UPDATE': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')},
                                delete_after_upload=True,
                                wait_on_upload=True,
                            )
                        except Exception:
                            pass

                current_script_hash = hash(script_code + (current_cell or ''))
                if prev_script_hash and prev_script_hash == current_script_hash:
                    continue

                # remove ipython direct access from the script code
                # we will not be able to run them anyhow
                if replace_ipython_pattern:
                    script_code = replace_ipython_pattern.sub(r'\n# \g<1>get_ipython()', script_code)

                requirements_txt = ''
                conda_requirements = ''
                # parse jupyter python script and prepare pip requirements (pigar)
                # if backend supports requirements
                if file_import_modules and Session.check_min_api_version('2.2'):
                    if fmodules is None:
                        fmodules, _ = file_import_modules(
                            notebook.parts[-1] if notebook else 'notebook', script_code)
                        if current_cell:
                            cell_fmodules, _ = file_import_modules(
                                notebook.parts[-1] if notebook else 'notebook', current_cell)
                            # noinspection PyBroadException
                            try:
                                fmodules |= cell_fmodules
                            except Exception:
                                pass
                    # add current cell to the script
                    if current_cell:
                        script_code += '\n' + current_cell
                    fmodules = ScriptRequirements.add_trains_used_packages(fmodules)
                    # noinspection PyUnboundLocalVariable
                    installed_pkgs = get_installed_pkgs_detail()
                    # make sure we are in installed packages
                    if our_module and (our_module[0] not in installed_pkgs):
                        installed_pkgs[our_module[0]] = our_module

                    # noinspection PyUnboundLocalVariable
                    reqs = ReqsModules()
                    for name in fmodules:
                        if name in installed_pkgs:
                            pkg_name, version = installed_pkgs[name]
                            reqs.add(pkg_name, version, fmodules[name])
                    requirements_txt, conda_requirements = ScriptRequirements.create_requirements_txt(reqs)

                # update script
                prev_script_hash = current_script_hash
                data_script = task.data.script
                data_script.diff = script_code
                data_script.requirements = {'pip': requirements_txt, 'conda': conda_requirements}
                # noinspection PyProtectedMember
                task._update_script(script=data_script)
                # update requirements
                # noinspection PyProtectedMember
                task._update_requirements(requirements=requirements_txt)
            except Exception:
                pass
'''
SOURCE DATA
'''
task_dir = Path('/data2/polo/half_baked_data/slopes/abs')
single_task_slope_csv = task_dir / 'raw_slopes_ok_subjs_abs.csv'
corrected_single_task_csv = task_dir / 'deCAPed_preHD_slopes_abs.csv'
n_visit_csv = task_dir / 'n_visits_used.csv'
in_mat = Path().cwd().parent / 'VBM_controls' /\
    'TON_log_deg_maps_local_gm_corrected.mat'

'''
LOAD
'''
source = 'python'
subject_list = ps.load_subject_list(in_mat.as_posix(), source=source)
n_df = pd.read_csv(n_visit_csv, index_col='subjid')
slope_df = pd.read_csv(single_task_slope_csv, index_col='subjid')

task_names = n_df.columns.values

behav_n_imag = [s for s in subject_list if s in slope_df.index]

x = slope_df.loc[behav_n_imag]['group'] == 'preHD'
preHD_idx = x.loc[x].index.values
n_df.corrwith(slope_df.loc[preHD_idx][task_names])
task_corrs = OrderedDict()

corr_dict = OrderedDict()
corr_dict['task'] = task_names
p_vals = []
Example #16
0
    def create_task(self):
        # type: () -> Task
        """
        Create the new populated Task

        :return: newly created Task object
        """
        local_entry_file = None
        repo_info = None
        if self.folder or (self.script and Path(self.script).is_file()
                           and not self.repo):
            self.folder = os.path.expandvars(os.path.expanduser(
                self.folder)) if self.folder else None
            self.script = os.path.expandvars(os.path.expanduser(
                self.script)) if self.script else None
            self.cwd = os.path.expandvars(os.path.expanduser(
                self.cwd)) if self.cwd else None
            if Path(self.script).is_file():
                entry_point = self.script
            else:
                entry_point = (Path(self.folder) / self.script).as_posix()
            entry_point = os.path.abspath(entry_point)
            if not os.path.isfile(entry_point):
                raise ValueError(
                    "Script entrypoint file \'{}\' could not be found".format(
                        entry_point))

            local_entry_file = entry_point
            repo_info, requirements = ScriptInfo.get(
                filepaths=[entry_point],
                log=getLogger(),
                create_requirements=self.packages is True,
                uncommitted_from_remote=True,
                detect_jupyter_notebook=False)

        # check if we have no repository and no requirements raise error
        if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \
                and not self.repo and (
                not repo_info or not repo_info.script or not repo_info.script.get('repository')):
            raise ValueError(
                "Standalone script detected \'{}\', but no requirements provided"
                .format(self.script))

        if self.base_task_id:
            if self.verbose:
                print('Cloning task {}'.format(self.base_task_id))
            task = Task.clone(source_task=self.base_task_id,
                              project=Task.get_project_id(self.project_name))
        else:
            # noinspection PyProtectedMember
            task = Task._create(task_name=self.task_name,
                                project_name=self.project_name,
                                task_type=self.task_type
                                or Task.TaskTypes.training)

            # if there is nothing to populate, return
            if not any([
                    self.folder, self.commit, self.branch, self.repo,
                    self.script, self.cwd, self.packages,
                    self.requirements_file, self.base_task_id, self.docker
            ]):
                return task

        task_state = task.export_task()
        if 'script' not in task_state:
            task_state['script'] = {}

        if repo_info:
            task_state['script']['repository'] = repo_info.script['repository']
            task_state['script']['version_num'] = repo_info.script[
                'version_num']
            task_state['script']['branch'] = repo_info.script['branch']
            task_state['script']['diff'] = repo_info.script['diff'] or ''
            task_state['script']['working_dir'] = repo_info.script[
                'working_dir']
            task_state['script']['entry_point'] = repo_info.script[
                'entry_point']
            task_state['script']['binary'] = repo_info.script['binary']
            task_state['script']['requirements'] = repo_info.script.get(
                'requirements') or {}
            if self.cwd:
                self.cwd = self.cwd
                cwd = self.cwd if Path(self.cwd).is_dir() else (
                    Path(repo_info.script['repo_root']) / self.cwd).as_posix()
                if not Path(cwd).is_dir():
                    raise ValueError(
                        "Working directory \'{}\' could not be found".format(
                            cwd))
                cwd = Path(cwd).relative_to(
                    repo_info.script['repo_root']).as_posix()
                entry_point = \
                    Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[
                        'entry_point']
                entry_point = entry_point.relative_to(cwd).as_posix()
                task_state['script']['entry_point'] = entry_point
                task_state['script']['working_dir'] = cwd
        elif self.repo:
            # normalize backslashes and remove first one
            entry_point = '/'.join(
                [p for p in self.script.split('/') if p and p != '.'])
            cwd = '/'.join(
                [p for p in (self.cwd or '.').split('/') if p and p != '.'])
            if cwd and entry_point.startswith(cwd + '/'):
                entry_point = entry_point[len(cwd) + 1:]
            task_state['script']['repository'] = self.repo
            task_state['script']['version_num'] = self.commit or None
            task_state['script']['branch'] = self.branch or None
            task_state['script']['diff'] = ''
            task_state['script']['working_dir'] = cwd or '.'
            task_state['script']['entry_point'] = entry_point
        else:
            # standalone task
            task_state['script']['entry_point'] = self.script
            task_state['script']['working_dir'] = '.'

        # update requirements
        reqs = []
        if self.requirements_file:
            with open(self.requirements_file.as_posix(), 'rt') as f:
                reqs = [line.strip() for line in f.readlines()]
        if self.packages and self.packages is not True:
            reqs += self.packages
        if reqs:
            # make sure we have clearml.
            clearml_found = False
            for line in reqs:
                if line.strip().startswith('#'):
                    continue
                package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>",
                                 line).strip()
                if package == 'clearml':
                    clearml_found = True
                    break
            if not clearml_found:
                reqs.append('clearml')
            task_state['script']['requirements'] = {'pip': '\n'.join(reqs)}
        elif not self.repo and repo_info and not repo_info.script.get(
                'requirements'):
            # we are in local mode, make sure we have "requirements.txt" it is a must
            reqs_txt_file = Path(
                repo_info.script['repo_root']) / "requirements.txt"
            if self.raise_on_missing_entries and not reqs_txt_file.is_file():
                raise ValueError("requirements.txt not found [{}] "
                                 "Use --requirements or --packages".format(
                                     reqs_txt_file.as_posix()))

        if self.add_task_init_call:
            script_entry = os.path.abspath(
                '/' + task_state['script'].get('working_dir', '.') + '/' +
                task_state['script']['entry_point'])
            idx_a = 0
            # find the right entry for the patch if we have a local file (basically after __future__
            if local_entry_file:
                with open(local_entry_file, 'rt') as f:
                    lines = f.readlines()
                future_found = self._locate_future_import(lines)
                if future_found >= 0:
                    idx_a = future_found + 1

            task_init_patch = ''
            if self.repo or task_state.get('script', {}).get('repository'):
                # if we do not have requirements, add clearml to the requirements.txt
                if not reqs:
                    task_init_patch += \
                        "diff --git a/requirements.txt b/requirements.txt\n" \
                        "--- a/requirements.txt\n" \
                        "+++ b/requirements.txt\n" \
                        "@@ -0,0 +1,1 @@\n" \
                        "+clearml\n"

                # Add Task.init call
                task_init_patch += \
                    "diff --git a{script_entry} b{script_entry}\n" \
                    "--- a{script_entry}\n" \
                    "+++ b{script_entry}\n" \
                    "@@ -{idx_a},0 +{idx_b},3 @@\n" \
                    "+from clearml import Task\n" \
                    "+Task.init()\n" \
                    "+\n".format(
                        script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
            else:
                # Add Task.init call
                task_init_patch += \
                    "from clearml import Task\n" \
                    "Task.init()\n\n"

            # make sure we add the dif at the end of the current diff
            task_state['script']['diff'] = task_state['script'].get('diff', '')
            if task_state['script']['diff'] and not task_state['script'][
                    'diff'].endswith('\n'):
                task_state['script']['diff'] += '\n'
            task_state['script']['diff'] += task_init_patch

        # set base docker image if provided
        if self.docker:
            task.set_base_docker(
                docker_cmd=self.docker.get('image'),
                docker_arguments=self.docker.get('args'),
                docker_setup_bash_script=self.docker.get('bash_script'),
            )

        if self.verbose:
            if task_state['script']['repository']:
                repo_details = {
                    k: v
                    for k, v in task_state['script'].items()
                    if v and k not in ('diff', 'requirements', 'binary')
                }
                print('Repository Detected\n{}'.format(
                    json.dumps(repo_details, indent=2)))
            else:
                print('Standalone script detected\n  Script: {}'.format(
                    self.script))

            if task_state['script'].get('requirements') and \
                    task_state['script']['requirements'].get('pip'):
                print('Requirements:{}{}'.format(
                    '\n  Using requirements.txt: {}'.format(
                        self.requirements_file.as_posix())
                    if self.requirements_file else '',
                    '\n  {}Packages: {}'.format(
                        'Additional ' if self.requirements_file else '',
                        self.packages) if self.packages else ''))
            if self.docker:
                print('Base docker image: {}'.format(self.docker))

        # update the Task
        task.update_task(task_state)
        self.task = task
        return task
Example #17
0
class CreateAndPopulate(object):
    def __init__(
            self,
            project_name=None,  # Optional[str]
            task_name=None,  # Optional[str]
            task_type=None,  # Optional[str]
            repo=None,  # Optional[str]
            branch=None,  # Optional[str]
            commit=None,  # Optional[str]
            script=None,  # Optional[str]
            working_directory=None,  # Optional[str]
            packages=None,  # Optional[Union[bool, Sequence[str]]]
            requirements_file=None,  # Optional[Union[str, Path]]
            docker=None,  # Optional[str]
            docker_args=None,  # Optional[str]
            docker_bash_setup_script=None,  # Optional[str]
            base_task_id=None,  # Optional[str]
            add_task_init_call=True,  # bool
            raise_on_missing_entries=False,  # bool
            verbose=False,  # bool
    ):
        # type: (...) -> None
        """
        Create a new Task from an existing code base.
        If the code does not already contain a call to Task.init, pass add_task_init_call=True,
        and the code will be patched in remote execution (i.e. when executed by `clearml-agent`

        :param project_name: Set the project name for the task. Required if base_task_id is None.
        :param task_name: Set the name of the remote task. Required if base_task_id is None.
        :param task_type: Optional, The task type to be created. Supported values: 'training', 'testing', 'inference',
            'data_processing', 'application', 'monitor', 'controller', 'optimizer', 'service', 'qc', 'custom'
        :param repo: Remote URL for the repository to use, OR path to local copy of the git repository
            Example: 'https://github.com/allegroai/clearml.git' or '~/project/repo'
        :param branch: Select specific repository branch/tag (implies the latest commit from the branch)
        :param commit: Select specific commit id to use (default: latest commit,
            or when used with local repository matching the local commit id)
        :param script: Specify the entry point script for the remote execution. When used in tandem with
            remote git repository the script should be a relative path inside the repository,
            for example: './source/train.py' . When used with local repository path it supports a
            direct path to a file inside the local repository itself, for example: '~/project/source/train.py'
        :param working_directory: Working directory to launch the script from. Default: repository root folder.
            Relative to repo root or local folder.
        :param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"]
            or `True` to automatically create requirements
            based on locally installed packages (repository must be local).
        :param requirements_file: Specify requirements.txt file to install when setting the session.
            If not provided, the requirements.txt from the repository will be used.
        :param docker: Select the docker image to be executed in by the remote session
        :param docker_args: Add docker arguments, pass a single string
        :param docker_bash_setup_script: Add bash script to be executed
            inside the docker before setting up the Task's environement
        :param base_task_id: Use a pre-existing task in the system, instead of a local repo/script.
            Essentially clones an existing task and overrides arguments/requirements.
        :param add_task_init_call: If True, a 'Task.init()' call is added to the script entry point in remote execution.
        :param raise_on_missing_entries: If True raise ValueError on missing entries when populating
        :param verbose: If True print verbose logging
        """
        if len(urlparse(repo).scheme) <= 1:
            folder = repo
            repo = None
        else:
            folder = None

        if raise_on_missing_entries and not base_task_id:
            if not script:
                raise ValueError("Entry point script not provided")
            if not repo and not folder and not Path(script).is_file():
                raise ValueError(
                    "Script file \'{}\' could not be found".format(script))
        if raise_on_missing_entries and commit and branch:
            raise ValueError(
                "Specify either a branch/tag or specific commit id, not both (either --commit or --branch)"
            )
        if raise_on_missing_entries and not folder and working_directory and working_directory.startswith(
                '/'):
            raise ValueError(
                "working directory \'{}\', must be relative to repository root"
            )

        if requirements_file and not Path(requirements_file).is_file():
            raise ValueError("requirements file could not be found \'{}\'")

        self.folder = folder
        self.commit = commit
        self.branch = branch
        self.repo = repo
        self.script = script
        self.cwd = working_directory
        assert not packages or isinstance(packages, (tuple, list, bool))
        self.packages = list(packages) if packages is not None and not isinstance(packages, bool) \
            else (packages or None)
        self.requirements_file = Path(
            requirements_file) if requirements_file else None
        self.base_task_id = base_task_id
        self.docker = dict(image=docker,
                           args=docker_args,
                           bash_script=docker_bash_setup_script)
        self.add_task_init_call = add_task_init_call
        self.project_name = project_name
        self.task_name = task_name
        self.task_type = task_type
        self.task = None
        self.raise_on_missing_entries = raise_on_missing_entries
        self.verbose = verbose

    def create_task(self):
        # type: () -> Task
        """
        Create the new populated Task

        :return: newly created Task object
        """
        local_entry_file = None
        repo_info = None
        if self.folder or (self.script and Path(self.script).is_file()
                           and not self.repo):
            self.folder = os.path.expandvars(os.path.expanduser(
                self.folder)) if self.folder else None
            self.script = os.path.expandvars(os.path.expanduser(
                self.script)) if self.script else None
            self.cwd = os.path.expandvars(os.path.expanduser(
                self.cwd)) if self.cwd else None
            if Path(self.script).is_file():
                entry_point = self.script
            else:
                entry_point = (Path(self.folder) / self.script).as_posix()
            entry_point = os.path.abspath(entry_point)
            if not os.path.isfile(entry_point):
                raise ValueError(
                    "Script entrypoint file \'{}\' could not be found".format(
                        entry_point))

            local_entry_file = entry_point
            repo_info, requirements = ScriptInfo.get(
                filepaths=[entry_point],
                log=getLogger(),
                create_requirements=self.packages is True,
                uncommitted_from_remote=True,
                detect_jupyter_notebook=False)

        # check if we have no repository and no requirements raise error
        if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \
                and not self.repo and (
                not repo_info or not repo_info.script or not repo_info.script.get('repository')):
            raise ValueError(
                "Standalone script detected \'{}\', but no requirements provided"
                .format(self.script))

        if self.base_task_id:
            if self.verbose:
                print('Cloning task {}'.format(self.base_task_id))
            task = Task.clone(source_task=self.base_task_id,
                              project=Task.get_project_id(self.project_name))
        else:
            # noinspection PyProtectedMember
            task = Task._create(task_name=self.task_name,
                                project_name=self.project_name,
                                task_type=self.task_type
                                or Task.TaskTypes.training)

            # if there is nothing to populate, return
            if not any([
                    self.folder, self.commit, self.branch, self.repo,
                    self.script, self.cwd, self.packages,
                    self.requirements_file, self.base_task_id, self.docker
            ]):
                return task

        task_state = task.export_task()
        if 'script' not in task_state:
            task_state['script'] = {}

        if repo_info:
            task_state['script']['repository'] = repo_info.script['repository']
            task_state['script']['version_num'] = repo_info.script[
                'version_num']
            task_state['script']['branch'] = repo_info.script['branch']
            task_state['script']['diff'] = repo_info.script['diff'] or ''
            task_state['script']['working_dir'] = repo_info.script[
                'working_dir']
            task_state['script']['entry_point'] = repo_info.script[
                'entry_point']
            task_state['script']['binary'] = repo_info.script['binary']
            task_state['script']['requirements'] = repo_info.script.get(
                'requirements') or {}
            if self.cwd:
                self.cwd = self.cwd
                cwd = self.cwd if Path(self.cwd).is_dir() else (
                    Path(repo_info.script['repo_root']) / self.cwd).as_posix()
                if not Path(cwd).is_dir():
                    raise ValueError(
                        "Working directory \'{}\' could not be found".format(
                            cwd))
                cwd = Path(cwd).relative_to(
                    repo_info.script['repo_root']).as_posix()
                entry_point = \
                    Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[
                        'entry_point']
                entry_point = entry_point.relative_to(cwd).as_posix()
                task_state['script']['entry_point'] = entry_point
                task_state['script']['working_dir'] = cwd
        elif self.repo:
            # normalize backslashes and remove first one
            entry_point = '/'.join(
                [p for p in self.script.split('/') if p and p != '.'])
            cwd = '/'.join(
                [p for p in (self.cwd or '.').split('/') if p and p != '.'])
            if cwd and entry_point.startswith(cwd + '/'):
                entry_point = entry_point[len(cwd) + 1:]
            task_state['script']['repository'] = self.repo
            task_state['script']['version_num'] = self.commit or None
            task_state['script']['branch'] = self.branch or None
            task_state['script']['diff'] = ''
            task_state['script']['working_dir'] = cwd or '.'
            task_state['script']['entry_point'] = entry_point
        else:
            # standalone task
            task_state['script']['entry_point'] = self.script
            task_state['script']['working_dir'] = '.'

        # update requirements
        reqs = []
        if self.requirements_file:
            with open(self.requirements_file.as_posix(), 'rt') as f:
                reqs = [line.strip() for line in f.readlines()]
        if self.packages and self.packages is not True:
            reqs += self.packages
        if reqs:
            # make sure we have clearml.
            clearml_found = False
            for line in reqs:
                if line.strip().startswith('#'):
                    continue
                package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>",
                                 line).strip()
                if package == 'clearml':
                    clearml_found = True
                    break
            if not clearml_found:
                reqs.append('clearml')
            task_state['script']['requirements'] = {'pip': '\n'.join(reqs)}
        elif not self.repo and repo_info and not repo_info.script.get(
                'requirements'):
            # we are in local mode, make sure we have "requirements.txt" it is a must
            reqs_txt_file = Path(
                repo_info.script['repo_root']) / "requirements.txt"
            if self.raise_on_missing_entries and not reqs_txt_file.is_file():
                raise ValueError("requirements.txt not found [{}] "
                                 "Use --requirements or --packages".format(
                                     reqs_txt_file.as_posix()))

        if self.add_task_init_call:
            script_entry = os.path.abspath(
                '/' + task_state['script'].get('working_dir', '.') + '/' +
                task_state['script']['entry_point'])
            idx_a = 0
            # find the right entry for the patch if we have a local file (basically after __future__
            if local_entry_file:
                with open(local_entry_file, 'rt') as f:
                    lines = f.readlines()
                future_found = self._locate_future_import(lines)
                if future_found >= 0:
                    idx_a = future_found + 1

            task_init_patch = ''
            if self.repo or task_state.get('script', {}).get('repository'):
                # if we do not have requirements, add clearml to the requirements.txt
                if not reqs:
                    task_init_patch += \
                        "diff --git a/requirements.txt b/requirements.txt\n" \
                        "--- a/requirements.txt\n" \
                        "+++ b/requirements.txt\n" \
                        "@@ -0,0 +1,1 @@\n" \
                        "+clearml\n"

                # Add Task.init call
                task_init_patch += \
                    "diff --git a{script_entry} b{script_entry}\n" \
                    "--- a{script_entry}\n" \
                    "+++ b{script_entry}\n" \
                    "@@ -{idx_a},0 +{idx_b},3 @@\n" \
                    "+from clearml import Task\n" \
                    "+Task.init()\n" \
                    "+\n".format(
                        script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
            else:
                # Add Task.init call
                task_init_patch += \
                    "from clearml import Task\n" \
                    "Task.init()\n\n"

            # make sure we add the dif at the end of the current diff
            task_state['script']['diff'] = task_state['script'].get('diff', '')
            if task_state['script']['diff'] and not task_state['script'][
                    'diff'].endswith('\n'):
                task_state['script']['diff'] += '\n'
            task_state['script']['diff'] += task_init_patch

        # set base docker image if provided
        if self.docker:
            task.set_base_docker(
                docker_cmd=self.docker.get('image'),
                docker_arguments=self.docker.get('args'),
                docker_setup_bash_script=self.docker.get('bash_script'),
            )

        if self.verbose:
            if task_state['script']['repository']:
                repo_details = {
                    k: v
                    for k, v in task_state['script'].items()
                    if v and k not in ('diff', 'requirements', 'binary')
                }
                print('Repository Detected\n{}'.format(
                    json.dumps(repo_details, indent=2)))
            else:
                print('Standalone script detected\n  Script: {}'.format(
                    self.script))

            if task_state['script'].get('requirements') and \
                    task_state['script']['requirements'].get('pip'):
                print('Requirements:{}{}'.format(
                    '\n  Using requirements.txt: {}'.format(
                        self.requirements_file.as_posix())
                    if self.requirements_file else '',
                    '\n  {}Packages: {}'.format(
                        'Additional ' if self.requirements_file else '',
                        self.packages) if self.packages else ''))
            if self.docker:
                print('Base docker image: {}'.format(self.docker))

        # update the Task
        task.update_task(task_state)
        self.task = task
        return task

    def update_task_args(self, args=None):
        # type: (Optional[Union[Sequence[str], Sequence[Tuple[str, str]]]]) -> ()
        """
        Update the newly created Task argparse Arguments
        If called before Task created, used for argument verification

        :param args: Arguments to pass to the remote execution, list of string pairs (argument, value) or
            list of strings '<argument>=<value>'. Example: ['lr=0.003', (batch_size, 64)]
        """
        if not args:
            return

        # check args are in format <key>=<value>
        args_list = []
        for a in args:
            if isinstance(a, (list, tuple)):
                assert len(a) == 2
                args_list.append(a)
                continue
            try:
                parts = a.split('=', 1)
                assert len(parts) == 2
                args_list.append(parts)
            except Exception:
                raise ValueError(
                    "Failed parsing argument \'{}\', arguments must be in \'<key>=<value>\' format"
                )

        if not self.task:
            return

        task_params = self.task.get_parameters()
        args_list = {'Args/{}'.format(k): v for k, v in args_list}
        task_params.update(args_list)
        self.task.set_parameters(task_params)

    def get_id(self):
        # type: () -> Optional[str]
        """
        :return: Return the created Task id (str)
        """
        return self.task.id if self.task else None

    @staticmethod
    def _locate_future_import(lines):
        # type: (List[str]) -> int
        """
        :param lines: string lines of a python file
        :return: line index of the last __future_ import. return -1 if no __future__ was found
        """
        # skip over the first two lines, they are ours
        # then skip over empty or comment lines
        lines = [(i, line.split('#', 1)[0].rstrip())
                 for i, line in enumerate(lines)
                 if line.strip('\r\n\t ') and not line.strip().startswith('#')]

        # remove triple quotes ' """ '
        nested_c = -1
        skip_lines = []
        for i, line_pair in enumerate(lines):
            for _ in line_pair[1].split('"""')[1:]:
                if nested_c >= 0:
                    skip_lines.extend(list(range(nested_c, i + 1)))
                    nested_c = -1
                else:
                    nested_c = i
        # now select all the
        lines = [pair for i, pair in enumerate(lines) if i not in skip_lines]

        from_future = re.compile(r"^from[\s]*__future__[\s]*")
        import_future = re.compile(r"^import[\s]*__future__[\s]*")
        # test if we have __future__ import
        found_index = -1
        for a_i, (_, a_line) in enumerate(lines):
            if found_index >= a_i:
                continue
            if from_future.match(a_line) or import_future.match(a_line):
                found_index = a_i
                # check the last import block
                i, line = lines[found_index]
                # wither we have \\ character at the end of the line or the line is indented
                parenthesized_lines = '(' in line and ')' not in line
                while line.endswith('\\') or parenthesized_lines:
                    found_index += 1
                    i, line = lines[found_index]
                    if ')' in line:
                        break

            else:
                break

        return found_index if found_index < 0 else lines[found_index][0]
Example #18
0
class CondaAPI(PackageManager):
    """
    A programmatic interface for controlling conda
    """

    MINIMUM_VERSION = "4.3.30"

    def __init__(self,
                 session,
                 path,
                 python,
                 requirements_manager,
                 execution_info=None,
                 **kwargs):
        # type: (Session, PathLike, float, RequirementsManager, ExecutionInfo, Any) -> None
        """
        :param python: base python version to use (e.g python3.6)
        :param path: path of env
        """
        self.session = session
        self.python = python
        self.source = None
        self.requirements_manager = requirements_manager
        self.path = path
        self.env_read_only = False
        self.extra_channels = self.session.config.get(
            'agent.package_manager.conda_channels', [])
        self.conda_env_as_base_docker = \
            self.session.config.get('agent.package_manager.conda_env_as_base_docker', None) or \
            bool(ENV_CONDA_ENV_PACKAGE.get())
        if ENV_CONDA_ENV_PACKAGE.get():
            self.conda_pre_build_env_path = ENV_CONDA_ENV_PACKAGE.get()
        else:
            self.conda_pre_build_env_path = execution_info.docker_cmd if execution_info else None
        self.pip = CondaPip(
            session=self.session,
            source=self.source,
            python=self.python,
            requirements_manager=self.requirements_manager,
            path=self.path,
        )
        try:
            self.conda = (find_executable("conda") or Argv(
                select_for_platform(windows="where", linux="which"),
                "conda").get_output(shell=select_for_platform(
                    windows=True, linux=False)).strip())
        except Exception:
            raise ValueError("ERROR: package manager \"conda\" selected, "
                             "but \'conda\' executable could not be located")
        try:
            output = Argv(self.conda,
                          "--version").get_output(stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as ex:
            raise CommandFailedError(
                "Unable to determine conda version: {ex}, output={ex.output}".
                format(ex=ex))
        self.conda_version = self.get_conda_version(output)
        if SimpleVersion.compare_versions(self.conda_version, '<',
                                          self.MINIMUM_VERSION):
            raise CommandFailedError(
                "conda version '{}' is smaller than minimum supported conda version '{}'"
                .format(self.conda_version, self.MINIMUM_VERSION))

    @staticmethod
    def get_conda_version(output):
        match = re.search(r"(\d+\.){0,2}\d+", output)
        if not match:
            raise CommandFailedError("Unidentified conda version string:",
                                     output)
        return match.group(0)

    @property
    def bin(self):
        return self.pip.bin

    # noinspection SpellCheckingInspection
    def upgrade_pip(self):
        # do not change pip version if pre built environement is used
        if self.env_read_only:
            print('Conda environment in read-only mode, skipping pip upgrade.')
            return ''
        return self._install("pip" + self.pip.get_pip_version())

    def create(self):
        """
        Create a new environment
        """
        if self.conda_env_as_base_docker and self.conda_pre_build_env_path:
            if Path(self.conda_pre_build_env_path).is_dir():
                print("Using pre-existing Conda environment from {}".format(
                    self.conda_pre_build_env_path))
                self.path = Path(self.conda_pre_build_env_path)
                self.source = ("conda", "activate", self.path.as_posix())
                self.pip = CondaPip(
                    session=self.session,
                    source=self.source,
                    python=self.python,
                    requirements_manager=self.requirements_manager,
                    path=self.path,
                )
                conda_env = Path(
                    self.conda
                ).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
                self.source = self.pip.source = CommandSequence(
                    ('source', conda_env.as_posix()), self.source)
                self.env_read_only = True
                return self
            elif Path(self.conda_pre_build_env_path).is_file():
                print("Restoring Conda environment from {}".format(
                    self.conda_pre_build_env_path))
                tar_path = find_executable("tar")
                self.path.mkdir(parents=True, exist_ok=True)
                output = Argv(
                    tar_path,
                    "-xzf",
                    self.conda_pre_build_env_path,
                    "-C",
                    self.path,
                ).get_output()

                self.source = self.pip.source = ("conda", "activate",
                                                 self.path.as_posix())
                conda_env = Path(
                    self.conda
                ).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
                self.source = self.pip.source = CommandSequence(
                    ('source', conda_env.as_posix()), self.source)
                # unpack cleanup
                print("Fixing prefix in Conda environment {}".format(
                    self.path))
                CommandSequence(('source', conda_env.as_posix()),
                                ((self.path / 'bin' /
                                  'conda-unpack').as_posix(), )).get_output()
                return self
            else:
                raise ValueError(
                    "Could not restore Conda environment, cannot find {}".
                    format(self.conda_pre_build_env_path))

        output = Argv(
            self.conda,
            "create",
            "--yes",
            "--mkdir",
            "--prefix",
            self.path,
            "python={}".format(self.python),
        ).get_output(stderr=DEVNULL)
        match = re.search(
            r"\W*(.*activate) ({})".format(re.escape(str(self.path))), output)
        self.source = self.pip.source = (tuple(match.group(1).split()) +
                                         (match.group(2), ) if match else
                                         ("conda", "activate",
                                          self.path.as_posix()))

        conda_env = Path(
            self.conda).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
        if conda_env.is_file() and not is_windows_platform():
            self.source = self.pip.source = CommandSequence(
                ('source', conda_env.as_posix()), self.source)

        # install cuda toolkit
        # noinspection PyBroadException
        try:
            cuda_version = float(int(
                self.session.config['agent.cuda_version'])) / 10.0
            if cuda_version > 0:
                self._install('cudatoolkit={:.1f}'.format(cuda_version))
        except Exception:
            pass
        return self

    def remove(self):
        """
        Delete a conda environment.
        Use 'conda env remove', then 'rm_tree' to be safe.

        Conda seems to load "vcruntime140.dll" from all its environment on startup.
        This means environment have to be deleted using 'conda env remove'.
        If necessary, conda can be fooled into deleting a partially-deleted environment by creating an empty file
        in '<ENV>\conda-meta\history' (value found in 'conda.gateways.disk.test.PREFIX_MAGIC_FILE').
        Otherwise, it complains that said directory is not a conda environment.

        See: https://github.com/conda/conda/issues/7682
        """
        try:
            self._run_command(("env", "remove", "-p", self.path))
        except Exception:
            pass
        rm_tree(self.path)
        # if we failed removing the path, change it's name
        if is_windows_platform() and Path(self.path).exists():
            try:
                Path(self.path).rename(
                    Path(self.path).as_posix() + '_' + str(time()))
            except Exception:
                pass

    def _install_from_file(self, path):
        """
        Install packages from requirement file.
        """
        self._install("--file", path)

    def _install(self, *args):
        # type: (*PathLike) -> ()
        # if we are in read only mode, do not install anything
        if self.env_read_only:
            print(
                'Conda environment in read-only mode, skipping package installing: {}'
                .format(args))
            return
        channels_args = tuple(
            chain.from_iterable(
                ("-c", channel) for channel in self.extra_channels))
        self._run_command(("install", "-p", self.path) + channels_args + args)

    def _get_pip_packages(self, packages):
        # type: (Iterable[Text]) -> Sequence[Text]
        """
        Return subset of ``packages`` which are not available on conda
        """
        pips = []
        while True:
            with self.temp_file("conda_reqs", packages) as path:
                try:
                    self._install_from_file(path)
                except PackageNotFoundError as e:
                    pips.append(e.pkg)
                    packages = _package_diff(path, {e.pkg})
                else:
                    break
        return pips

    def install_packages(self, *packages):
        # type: (*Text) -> ()
        return self._install(*packages)

    def uninstall_packages(self, *packages):
        # if we are in read only mode, do not uninstall anything
        if self.env_read_only:
            print(
                'Conda environment in read-only mode, skipping package uninstalling: {}'
                .format(packages))
            return ''
        return self._run_command(("uninstall", "-p", self.path))

    def install_from_file(self, path):
        """
        Try to install packages from conda. Install packages which are not available from conda with pip.
        """
        try:
            self._install_from_file(path)
            return
        except PackageNotFoundError as e:
            pip_packages = [e.pkg]
        except PackagesNotFoundError as e:
            pip_packages = package_set(e.packages)
        with self.temp_file("conda_reqs", _package_diff(path,
                                                        pip_packages)) as reqs:
            self.install_from_file(reqs)
        with self.temp_file("pip_reqs", pip_packages) as reqs:
            self.pip.install_from_file(reqs)

    def freeze(self, freeze_full_environment=False):
        requirements = self.pip.freeze()
        req_lines = []
        conda_lines = []

        # noinspection PyBroadException
        try:
            pip_lines = requirements['pip']
            conda_packages_json = json.loads(
                self._run_command(
                    (self.conda, "list", "--json", "-p", self.path), raw=True))
            for r in conda_packages_json:
                # check if this is a pypi package, if it is, leave it outside
                if not r.get('channel') or r.get('channel') == 'pypi':
                    name = (r['name'].replace('-', '_'), r['name'])
                    pip_req_line = [
                        l for l in pip_lines
                        if l.split('==', 1)[0].strip() in name
                        or l.split('@', 1)[0].strip() in name
                    ]
                    if pip_req_line and \
                            ('@' not in pip_req_line[0] or
                             not pip_req_line[0].split('@', 1)[1].strip().startswith('file://')):
                        req_lines.append(pip_req_line[0])
                        continue

                    req_lines.append(
                        '{}=={}'.format(name[1], r['version']) if r.
                        get('version') else '{}'.format(name[1]))
                    continue

                # check if we have it in our required packages
                name = r['name']
                # hack support pytorch/torch different naming convention
                if name == 'pytorch':
                    name = 'torch'
                # skip over packages with _
                if name.startswith('_'):
                    continue
                conda_lines.append('{}=={}'.format(name, r['version']) if r.
                                   get('version') else '{}'.format(name))
            # make sure we see the conda packages, put them into the pip as well
            if conda_lines:
                req_lines = ['# Conda Packages', ''] + conda_lines + [
                    '', '# pip Packages', ''
                ] + req_lines

            requirements['pip'] = req_lines
            requirements['conda'] = conda_lines
        except Exception:
            pass

        if freeze_full_environment:
            # noinspection PyBroadException
            try:
                conda_env_json = json.loads(
                    self._run_command((self.conda, "env", "export", "--json",
                                       "-p", self.path),
                                      raw=True))
                conda_env_json.pop('name', None)
                conda_env_json.pop('prefix', None)
                conda_env_json.pop('channels', None)
                requirements['conda_env_json'] = json.dumps(conda_env_json)
            except Exception:
                pass

        return requirements

    def _load_conda_full_env(self, conda_env_dict, requirements):
        # noinspection PyBroadException
        try:
            cuda_version = int(self.session.config.get('agent.cuda_version',
                                                       0))
        except Exception:
            cuda_version = 0

        conda_env_dict['channels'] = self.extra_channels
        if 'dependencies' not in conda_env_dict:
            conda_env_dict['dependencies'] = []
        new_dependencies = OrderedDict()
        pip_requirements = None
        for line in conda_env_dict['dependencies']:
            if isinstance(line, dict):
                pip_requirements = line.pop('pip', None)
                continue
            name = line.strip().split('=', 1)[0].lower()
            if name == 'pip':
                continue
            elif name == 'python':
                line = 'python={}'.format('.'.join(
                    line.split('=')[1].split('.')[:2]))
            elif name == 'tensorflow-gpu' and cuda_version == 0:
                line = 'tensorflow={}'.format(line.split('=')[1])
            elif name == 'tensorflow' and cuda_version > 0:
                line = 'tensorflow-gpu={}'.format(line.split('=')[1])
            elif name in ('cupti', 'cudnn'):
                # cudatoolkit should pull them based on the cudatoolkit version
                continue
            elif name.startswith('_'):
                continue
            new_dependencies[line.split('=', 1)[0].strip()] = line

        # fix packages:
        conda_env_dict['dependencies'] = list(new_dependencies.values())

        with self.temp_file("conda_env",
                            yaml.dump(conda_env_dict),
                            suffix=".yml") as name:
            print('Conda: Trying to install requirements:\n{}'.format(
                conda_env_dict['dependencies']))
            result = self._run_command(
                ("env", "update", "-p", self.path, "--file", name))

        # check if we need to remove specific packages
        bad_req = self._parse_conda_result_bad_packges(result)
        if bad_req:
            print('failed installing the following conda packages: {}'.format(
                bad_req))
            return False

        if pip_requirements:
            # create a list of vcs packages that we need to replace in the pip section
            vcs_reqs = {}
            if 'pip' in requirements:
                pip_lines = requirements['pip'].splitlines() \
                    if isinstance(requirements['pip'], six.string_types) else requirements['pip']
                for line in pip_lines:
                    try:
                        marker = list(parse(line))
                    except Exception:
                        marker = None
                    if not marker:
                        continue

                    m = MarkerRequirement(marker[0])
                    if m.vcs:
                        vcs_reqs[m.name] = m
            try:
                pip_req_str = [
                    str(vcs_reqs.get(r.split('=', 1)[0], r))
                    for r in pip_requirements if not r.startswith('pip=')
                    and not r.startswith('virtualenv=')
                ]
                print(
                    'Conda: Installing requirements: step 2 - using pip:\n{}'.
                    format(pip_req_str))
                PackageManager._selected_manager = self.pip
                self.pip.load_requirements({'pip': '\n'.join(pip_req_str)})
            except Exception as e:
                print(e)
                raise e
            finally:
                PackageManager._selected_manager = self

        self.requirements_manager.post_install(self.session)

    def load_requirements(self, requirements):
        # if we are in read only mode, do not uninstall anything
        if self.env_read_only:
            print(
                'Conda environment in read-only mode, skipping requirements installation.'
            )
            return None

        # if we have a full conda environment, use it and pass the pip to pip
        if requirements.get('conda_env_json'):
            # noinspection PyBroadException
            try:
                conda_env_json = json.loads(requirements.get('conda_env_json'))
                print('Conda restoring full yaml environment')
                return self._load_conda_full_env(conda_env_json, requirements)
            except Exception:
                print(
                    'Could not load fully stored conda environment, falling back to requirements'
                )

        # create new environment file
        conda_env = dict()
        conda_env['channels'] = self.extra_channels
        reqs = []
        if isinstance(requirements['pip'], six.string_types):
            requirements['pip'] = requirements['pip'].split('\n')
        if isinstance(requirements.get('conda'), six.string_types):
            requirements['conda'] = requirements['conda'].split('\n')
        has_torch = False
        has_matplotlib = False
        try:
            cuda_version = int(self.session.config.get('agent.cuda_version',
                                                       0))
        except:
            cuda_version = 0

        # notice 'conda' entry with empty string is a valid conda requirements list, it means pip only
        # this should happen if experiment was executed on non-conda machine or old trains client
        conda_supported_req = requirements['pip'] if requirements.get(
            'conda', None) is None else requirements['conda']
        conda_supported_req_names = []
        pip_requirements = []
        for r in conda_supported_req:
            try:
                marker = list(parse(r))
            except:
                marker = None
            if not marker:
                continue

            m = MarkerRequirement(marker[0])
            # conda does not support version control links
            if m.vcs:
                pip_requirements.append(m)
                continue
            # Skip over pip
            if m.name in (
                    'pip',
                    'virtualenv',
            ):
                continue
            # python version, only major.minor
            if m.name == 'python' and m.specs:
                m.specs = [
                    (m.specs[0][0], '.'.join(m.specs[0][1].split('.')[:2])),
                ]
                if '.' not in m.specs[0][1]:
                    continue

            conda_supported_req_names.append(m.name.lower())
            if m.req.name.lower() == 'matplotlib':
                has_matplotlib = True
            elif m.req.name.lower().startswith('torch'):
                has_torch = True

            if m.req.name.lower() in ('torch', 'pytorch'):
                has_torch = True
                m.req.name = 'pytorch'

            if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu',
                                      'tensorflow'):
                has_torch = True
                m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow'

            reqs.append(m)

        # if we have a conda list, the rest should be installed with pip,
        if requirements.get('conda', None) is not None:
            for r in requirements['pip']:
                try:
                    marker = list(parse(r))
                except:
                    marker = None
                if not marker:
                    continue

                m = MarkerRequirement(marker[0])
                # skip over local files (we cannot change the version to a local file)
                if m.local_file:
                    continue
                m_name = m.name.lower()
                if m_name in conda_supported_req_names:
                    # this package is in the conda list,
                    # make sure that if we changed version and we match it in conda
                    ## conda_supported_req_names.remove(m_name)
                    for cr in reqs:
                        if m_name.lower().replace(
                                '_', '-') == cr.name.lower().replace('_', '-'):
                            # match versions
                            cr.specs = m.specs
                            # # conda always likes "-" not "_" but only on pypi packages
                            # cr.name = cr.name.lower().replace('_', '-')
                            break
                else:
                    # not in conda, it is a pip package
                    pip_requirements.append(m)
                    if m_name == 'matplotlib':
                        has_matplotlib = True

        # Conda requirements Hacks:
        if has_matplotlib:
            reqs.append(MarkerRequirement(Requirement.parse('graphviz')))
            reqs.append(MarkerRequirement(
                Requirement.parse('python-graphviz')))
            reqs.append(MarkerRequirement(Requirement.parse('kiwisolver')))

        # remove specific cudatoolkit, it should have being preinstalled.
        # allow to override default cudatoolkit, but not the derivative packages, cudatoolkit should pull them
        reqs = [r for r in reqs if r.name not in ('cudnn', 'cupti')]

        if has_torch and cuda_version == 0:
            reqs.append(MarkerRequirement(Requirement.parse('cpuonly')))

        # make sure we have no double entries
        reqs = list(OrderedDict((r.name, r) for r in reqs).values())

        # conform conda packages (version/name)
        for r in reqs:
            # change _ to - in name but not the prefix _ (as this is conda prefix)
            if not r.name.startswith('_') and not requirements.get(
                    'conda', None):
                r.name = r.name.replace('_', '-')
            # remove .post from version numbers, it fails ~= version, and change == to ~=
            if r.specs and r.specs[0]:
                r.specs = [(r.specs[0][0].replace('==', '~='),
                            r.specs[0][1].split('.post')[0])]

        while reqs:
            # notice, we give conda more freedom in version selection, to help it choose best combination
            def clean_ver(ar):
                if not ar.specs:
                    return ar.tostr()
                ar.specs = [
                    (ar.specs[0][0], ar.specs[0][1] +
                     '.0' if '.' not in ar.specs[0][1] else ar.specs[0][1])
                ]
                return ar.tostr()

            conda_env['dependencies'] = [clean_ver(r) for r in reqs]
            with self.temp_file("conda_env",
                                yaml.dump(conda_env),
                                suffix=".yml") as name:
                print('Conda: Trying to install requirements:\n{}'.format(
                    conda_env['dependencies']))
                result = self._run_command(
                    ("env", "update", "-p", self.path, "--file", name))
            # check if we need to remove specific packages
            bad_req = self._parse_conda_result_bad_packges(result)
            if not bad_req:
                break

            solved = False
            for bad_r in bad_req:
                name = bad_r.split('[')[0].split('=')[0].split('~')[0].split(
                    '<')[0].split('>')[0]
                # look for name in requirements
                for r in reqs:
                    if r.name.lower() == name.lower():
                        pip_requirements.append(r)
                        reqs.remove(r)
                        solved = True
                        break

            # we couldn't remove even one package,
            # nothing we can do but try pip
            if not solved:
                pip_requirements.extend(reqs)
                break

        if pip_requirements:
            try:
                pip_req_str = [
                    r.tostr() for r in pip_requirements if r.name not in (
                        'pip',
                        'virtualenv',
                    )
                ]
                print(
                    'Conda: Installing requirements: step 2 - using pip:\n{}'.
                    format(pip_req_str))
                PackageManager._selected_manager = self.pip
                self.pip.load_requirements({'pip': '\n'.join(pip_req_str)})
            except Exception as e:
                print(e)
                raise e
            finally:
                PackageManager._selected_manager = self

        self.requirements_manager.post_install(self.session)
        return True

    def _parse_conda_result_bad_packges(self, result_dict):
        if not result_dict:
            return None

        if 'bad_deps' in result_dict and result_dict['bad_deps']:
            return result_dict['bad_deps']

        if result_dict.get('error'):
            error_lines = result_dict['error'].split('\n')
            if error_lines[0].strip().lower().startswith(
                    "unsatisfiableerror:"):
                empty_lines = [
                    i for i, l in enumerate(error_lines) if not l.strip()
                ]
                if len(empty_lines) >= 2:
                    deps = error_lines[empty_lines[0] + 1:empty_lines[1]]
                    try:
                        return yaml.load('\n'.join(deps),
                                         Loader=yaml.SafeLoader)
                    except:
                        return None
        return None

    def _run_command(self, command, raw=False, **kwargs):
        # type: (Iterable[Text], bool, Any) -> Union[Dict, Text]
        """
        Run a conda command, returning JSON output.
        The command is prepended with 'conda' and run with JSON output flags.
        :param command: command to run
        :param raw: return text output and don't change command
        :param kwargs: kwargs for Argv.get_output()
        :return: JSON output or text output
        """
        def escape_ansi(line):
            ansi_escape = re.compile(
                r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]')
            return ansi_escape.sub('', line)

        command = Argv(*command)  # type: Executable
        if not raw:
            command = (self.conda, ) + command + ("--quiet", "--json")
        try:
            print('Executing Conda: {}'.format(command.serialize()))
            result = command.get_output(stdin=DEVNULL, **kwargs)
            if self.session.debug_mode:
                print(result)
        except Exception as e:
            result = e.output if hasattr(e, 'output') else ''
            if self.session.debug_mode:
                print(result)
            if raw:
                raise
        if raw:
            return result

        result = json.loads(escape_ansi(result)) if result else {}
        if result.get('success', False):
            print('Pass')
        elif result.get('error'):
            print('Conda error: {}'.format(result.get('error')))
        return result

    def get_python_command(self, extra=()):
        return CommandSequence(self.source,
                               self.pip.get_python_command(extra=extra))
Example #19
0
    def create(self):
        """
        Create a new environment
        """
        if self.conda_env_as_base_docker and self.conda_pre_build_env_path:
            if Path(self.conda_pre_build_env_path).is_dir():
                print("Using pre-existing Conda environment from {}".format(
                    self.conda_pre_build_env_path))
                self.path = Path(self.conda_pre_build_env_path)
                self.source = ("conda", "activate", self.path.as_posix())
                self.pip = CondaPip(
                    session=self.session,
                    source=self.source,
                    python=self.python,
                    requirements_manager=self.requirements_manager,
                    path=self.path,
                )
                conda_env = Path(
                    self.conda
                ).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
                self.source = self.pip.source = CommandSequence(
                    ('source', conda_env.as_posix()), self.source)
                self.env_read_only = True
                return self
            elif Path(self.conda_pre_build_env_path).is_file():
                print("Restoring Conda environment from {}".format(
                    self.conda_pre_build_env_path))
                tar_path = find_executable("tar")
                self.path.mkdir(parents=True, exist_ok=True)
                output = Argv(
                    tar_path,
                    "-xzf",
                    self.conda_pre_build_env_path,
                    "-C",
                    self.path,
                ).get_output()

                self.source = self.pip.source = ("conda", "activate",
                                                 self.path.as_posix())
                conda_env = Path(
                    self.conda
                ).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
                self.source = self.pip.source = CommandSequence(
                    ('source', conda_env.as_posix()), self.source)
                # unpack cleanup
                print("Fixing prefix in Conda environment {}".format(
                    self.path))
                CommandSequence(('source', conda_env.as_posix()),
                                ((self.path / 'bin' /
                                  'conda-unpack').as_posix(), )).get_output()
                return self
            else:
                raise ValueError(
                    "Could not restore Conda environment, cannot find {}".
                    format(self.conda_pre_build_env_path))

        output = Argv(
            self.conda,
            "create",
            "--yes",
            "--mkdir",
            "--prefix",
            self.path,
            "python={}".format(self.python),
        ).get_output(stderr=DEVNULL)
        match = re.search(
            r"\W*(.*activate) ({})".format(re.escape(str(self.path))), output)
        self.source = self.pip.source = (tuple(match.group(1).split()) +
                                         (match.group(2), ) if match else
                                         ("conda", "activate",
                                          self.path.as_posix()))

        conda_env = Path(
            self.conda).parent.parent / 'etc' / 'profile.d' / 'conda.sh'
        if conda_env.is_file() and not is_windows_platform():
            self.source = self.pip.source = CommandSequence(
                ('source', conda_env.as_posix()), self.source)

        # install cuda toolkit
        # noinspection PyBroadException
        try:
            cuda_version = float(int(
                self.session.config['agent.cuda_version'])) / 10.0
            if cuda_version > 0:
                self._install('cudatoolkit={:.1f}'.format(cuda_version))
        except Exception:
            pass
        return self
Example #20
0
# -*- coding: utf-8 -*-

import sys
import sphinx_rtd_theme
try:
    from pathlib2 import Path
except ImportError:
    from pathlib import Path

project_path = Path(__file__).absolute().parent.joinpath('../..')

sys.path.insert(0, project_path.as_posix())

from httpretty.version import version # noqa


project = 'HTTPretty'
copyright = '2018, Gabriel Falcao'
author = 'Gabriel Falcao'

# The short X.Y version
version = version
# The full version, including alpha/beta/rc tags
release = version


extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.doctest',
    'sphinx.ext.intersphinx',
    'sphinx.ext.coverage',
Example #21
0
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 12 15:06:03 2018

@author: ADMIN
"""

import logging
import logging.config
from functools import partial
from pathlib2 import Path

cfgpath = Path(__file__).parent/'logging.conf'

lcfg = partial(logging.config.fileConfig, cfgpath.as_posix())

pl = logging.getLogger('file_append')
ps = logging.getLogger('file_only')
Example #22
0
    def _extract_to_cache(
            cls,
            cached_file,  # type: str
            name,  # type: str
            cache_context=None,  # type: Optional[str]
            target_folder=None,  # type: Optional[str]
            cache_path_encoding=None,  # type: Optional[str]
            force=False,  # type: bool
    ):
        # type: (...) -> str
        """
        Extract cached file to cache folder
        :param str cached_file: local copy of archive file
        :param str name: name of the target file
        :param str cache_context: cache context id
        :param str target_folder: specify target path to use for archive extraction
        :param str cache_path_encoding: specify representation of the local path of the cached files,
            this will always point to local cache folder, even if we have direct access file.
            Used for extracting the cached archived based on cache_path_encoding
        :param bool force: Force archive extraction even if target folder exists
        :return: cached folder containing the extracted archive content
        """
        if not cached_file:
            return cached_file

        cached_file = Path(cached_file)
        cache_path_encoding = Path(
            cache_path_encoding) if cache_path_encoding else None

        # we support zip and tar.gz files auto-extraction
        suffix = cached_file.suffix.lower()
        if suffix == '.gz':
            suffix = ''.join(a.lower() for a in cached_file.suffixes[-2:])

        if suffix not in (".zip", ".tgz", ".tar.gz"):
            return str(cached_file)

        cache_folder = Path(cache_path_encoding or cached_file).parent
        archive_suffix = (cache_path_encoding
                          or cached_file).name[:-len(suffix)]
        name = encode_string_to_filename(name) if name else name
        if target_folder:
            target_folder = Path(target_folder)
        else:
            target_folder = cache_folder / CacheManager.get_context_folder_lookup(
                cache_context).format(archive_suffix, name)

        if target_folder.is_dir() and not force:
            # noinspection PyBroadException
            try:
                target_folder.touch(exist_ok=True)
                return target_folder.as_posix()
            except Exception:
                pass

        base_logger = LoggerRoot.get_base_logger()
        try:
            # if target folder exists, meaning this is forced ao we extract directly into target folder
            if target_folder.is_dir():
                temp_target_folder = target_folder
            else:
                temp_target_folder = cache_folder / "{0}_{1}_{2}".format(
                    target_folder.name,
                    time() * 1000,
                    str(random()).replace('.', ''))
                temp_target_folder.mkdir(parents=True, exist_ok=True)

            if suffix == ".zip":
                ZipFile(cached_file.as_posix()).extractall(
                    path=temp_target_folder.as_posix())
            elif suffix == ".tar.gz":
                with tarfile.open(cached_file.as_posix()) as file:
                    file.extractall(temp_target_folder.as_posix())
            elif suffix == ".tgz":
                with tarfile.open(cached_file.as_posix(), mode='r:gz') as file:
                    file.extractall(temp_target_folder.as_posix())

            if temp_target_folder != target_folder:
                # we assume we will have such folder if we already extract the file
                # noinspection PyBroadException
                try:
                    # if rename fails, it means that someone else already manged to extract the file, delete the current
                    # folder and return the already existing cached zip folder
                    shutil.move(temp_target_folder.as_posix(),
                                target_folder.as_posix())
                except Exception:
                    if target_folder.exists():
                        target_folder.touch(exist_ok=True)
                    else:
                        base_logger.warning(
                            "Failed renaming {0} to {1}".format(
                                temp_target_folder.as_posix(),
                                target_folder.as_posix()))
                    try:
                        shutil.rmtree(temp_target_folder.as_posix())
                    except Exception as ex:
                        base_logger.warning(
                            "Exception {}\nFailed deleting folder {}".format(
                                ex, temp_target_folder.as_posix()))
        except Exception as ex:
            # failed extracting the file:
            base_logger.warning(
                "Exception {}\nFailed extracting zip file {}".format(
                    ex, cached_file.as_posix()))
            # noinspection PyBroadException
            try:
                target_folder.rmdir()
            except Exception:
                pass
            return cached_file.as_posix()
        return target_folder.as_posix()
Example #23
0
# -*- coding: utf-8 -*-
import sys
try:
    from pathlib2 import Path
except ImportError:
    from pathlib import Path

project_path = Path(__file__).absolute().parent.joinpath('../../..')

sys.path.insert(0, project_path.as_posix())

import sphinx_bulma_theme  # noqa

project = 'Sphinx Bulma Theme'
copyright = '2018, Gabriel Falcao'
author = 'Gabriel Falcao'

version = sphinx_bulma_theme.version
release = version

needs_sphinx = '1.7.1'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosectionlabel',
    'sphinx.ext.coverage',
    'sphinx.ext.doctest',
    'sphinx.ext.githubpages',
Example #24
0
    def _extract_to_cache(cls, cached_file, name):
        """
        Extract cached file to cache folder
        :param str cached_file: local copy of archive file
        :param str name: cache context
        :return: cached folder containing the extracted archive content
        """
        if not cached_file:
            return cached_file

        cached_file = Path(cached_file)

        # we support zip and tar.gz files auto-extraction
        if (not cached_file.suffix == ".zip"
                and not cached_file.suffixes[-2:] == [".tar", ".gz"]):
            return str(cached_file)

        cached_folder = cached_file.parent

        name = encode_string_to_filename(name) if name else name
        target_folder = Path("{0}/{1}_artifacts_archive_{2}".format(
            cached_folder, cached_file.stem, name))
        if target_folder.exists():
            # noinspection PyBroadException
            try:
                target_folder.touch(exist_ok=True)
                return target_folder
            except Exception:
                pass

        base_logger = LoggerRoot.get_base_logger()
        try:
            temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
                target_folder.name,
                time() * 1000,
                str(random()).replace('.', ''))
            temp_target_folder.mkdir(parents=True, exist_ok=True)
            if cached_file.suffix == ".zip":
                ZipFile(cached_file).extractall(
                    path=temp_target_folder.as_posix())
            elif cached_file.suffixes[-2:] == [".tar", ".gz"]:
                with tarfile.open(cached_file) as file:
                    file.extractall(temp_target_folder)

            # we assume we will have such folder if we already extract the file
            # noinspection PyBroadException
            try:
                # if rename fails, it means that someone else already manged to extract the file, delete the current
                # folder and return the already existing cached zip folder
                shutil.move(temp_target_folder.as_posix(),
                            target_folder.as_posix())
            except Exception:
                if target_folder.exists():
                    target_folder.touch(exist_ok=True)
                else:
                    base_logger.warning("Failed renaming {0} to {1}".format(
                        temp_target_folder, target_folder))
                try:
                    shutil.rmtree(temp_target_folder)
                except Exception as ex:
                    base_logger.warning(
                        "Exception {}\nFailed deleting folder {}".format(
                            ex, temp_target_folder))
        except Exception as ex:
            # failed extracting the file:
            base_logger.warning(
                "Exception {}\nFailed extracting zip file {}".format(
                    ex, str(cached_file)))
            # noinspection PyBroadException
            try:
                target_folder.rmdir()
            except Exception:
                pass
            return cached_file
        return target_folder
Example #25
0
    def _get_script_info(cls,
                         filepath,
                         check_uncommitted=True,
                         create_requirements=True,
                         log=None):
        jupyter_filepath = cls._get_jupyter_notebook_filename()
        if jupyter_filepath:
            script_path = Path(os.path.normpath(jupyter_filepath)).absolute()
        else:
            script_path = Path(os.path.normpath(filepath)).absolute()
            if not script_path.is_file():
                raise ScriptInfoError(
                    "Script file [{}] could not be found".format(filepath))

        script_dir = script_path.parent

        def _log(msg, *args, **kwargs):
            if not log:
                return
            log.warning("Failed auto-detecting task repository: {}".format(
                msg.format(*args, **kwargs)))

        plugin = next((p for p in cls.plugins if p.exists(script_dir)), None)
        repo_info = DetectionResult()
        if not plugin:
            log.info("No repository found, storing script code instead")
        else:
            try:
                repo_info = plugin.get_info(str(script_dir),
                                            include_diff=check_uncommitted)
            except Exception as ex:
                _log("no info for {} ({})", script_dir, ex)
            else:
                if repo_info.is_empty():
                    _log("no info for {}", script_dir)

        repo_root = repo_info.root or script_dir
        if not plugin:
            working_dir = '.'
            entry_point = str(script_path.name)
        else:
            working_dir = cls._get_working_dir(repo_root)
            entry_point = cls._get_entry_point(repo_root, script_path)

        if check_uncommitted:
            diff = cls._get_script_code(script_path.as_posix()) \
                if not plugin or not repo_info.commit else repo_info.diff
        else:
            diff = ''
        # if this is not jupyter, get the requirements.txt
        requirements = ''
        conda_requirements = ''
        # create requirements if backend supports requirements
        # if jupyter is present, requirements will be created in the background, when saving a snapshot
        if not jupyter_filepath and Session.check_min_api_version('2.2'):
            script_requirements = ScriptRequirements(
                Path(repo_root).as_posix() if repo_info.url else script_path.
                as_posix())
            if create_requirements:
                requirements, conda_requirements = script_requirements.get_requirements(
                )
        else:
            script_requirements = None

        script_info = dict(
            repository=furl(repo_info.url).remove(username=True,
                                                  password=True).tostr(),
            branch=repo_info.branch,
            version_num=repo_info.commit,
            entry_point=entry_point,
            working_dir=working_dir,
            diff=diff,
            requirements={
                'pip': requirements,
                'conda': conda_requirements
            } if requirements else None,
        )

        messages = []
        if repo_info.modified:
            messages.append(
                "======> WARNING! UNCOMMITTED CHANGES IN REPOSITORY {} <======"
                .format(script_info.get("repository", "")))

        if not any(script_info.values()):
            script_info = None

        return (ScriptInfoResult(script=script_info,
                                 warning_messages=messages),
                script_requirements)
Example #26
0
    def _upload_data_audit_artifacts(self, name):
        logger = self._task.get_logger()
        pd_artifact = self._artifacts_container.get(name)
        pd_metadata = self._artifacts_container.get_metadata(name)

        # remove from artifacts watch list
        if name in self._unregister_request:
            try:
                self._unregister_request.remove(name)
            except KeyError:
                pass
            self._artifacts_container.unregister_artifact(name)

        if pd_artifact is None:
            return

        override_filename_ext_in_uri = self._save_format
        override_filename_in_uri = name
        fd, local_csv = mkstemp(prefix=quote(name, safe="") + '.',
                                suffix=override_filename_ext_in_uri)
        os.close(fd)
        local_csv = Path(local_csv)
        pd_artifact.to_csv(local_csv.as_posix(),
                           index=False,
                           compression=self._compression)
        current_sha2, file_sha2 = self.sha256sum(local_csv.as_posix(),
                                                 skip_header=32)
        if name in self._last_artifacts_upload:
            previous_sha2 = self._last_artifacts_upload[name]
            if previous_sha2 == current_sha2:
                # nothing to do, we can skip the upload
                try:
                    local_csv.unlink()
                except Exception:
                    pass
                return
        self._last_artifacts_upload[name] = current_sha2

        # If old trains-server, upload as debug image
        if not Session.check_min_api_version('2.3'):
            logger.report_image(title='artifacts',
                                series=name,
                                local_path=local_csv.as_posix(),
                                delete_after_upload=True,
                                iteration=self._task.get_last_iteration(),
                                max_image_history=2)
            return

        # Find our artifact
        artifact = None
        for an_artifact in self._task_artifact_list:
            if an_artifact.key == name:
                artifact = an_artifact
                break

        file_size = local_csv.stat().st_size

        # upload file
        uri = self._upload_local_file(
            local_csv,
            name,
            delete_after_upload=True,
            override_filename=override_filename_in_uri,
            override_filename_ext=override_filename_ext_in_uri)

        # update task artifacts
        with self._task_edit_lock:
            if not artifact:
                artifact = tasks.Artifact(key=name,
                                          type=self._pd_artifact_type)
                self._task_artifact_list.append(artifact)
            artifact_type_data = tasks.ArtifactTypeData()

            artifact_type_data.data_hash = current_sha2
            artifact_type_data.content_type = "text/csv"
            artifact_type_data.preview = str(
                pd_artifact.__repr__()) + '\n\n' + self._get_statistics(
                    {name: pd_artifact})

            artifact.type_data = artifact_type_data
            artifact.uri = uri
            artifact.content_size = file_size
            artifact.hash = file_sha2
            artifact.timestamp = int(time())
            artifact.display_data = [
                (str(k), str(v)) for k, v in pd_metadata.items()
            ] if pd_metadata else None

            self._task.set_artifacts(self._task_artifact_list)
Example #27
0
 def _file_path(raw_path: Union[str, Path]) -> Tuple[str, Path]:
     config_file = Path(raw_path) / '.sv_cfg'
     return config_file.as_posix(), abs_path(config_file)
Example #28
0
def to_posix(path):
    parts = path.parts[1:]
    full_path = Path("/") / "mnt" / "c" / '/'.join(parts)
    return full_path.as_posix()
Example #29
0
    def upload_artifact(self,
                        name,
                        artifact_object=None,
                        metadata=None,
                        delete_after_upload=False):
        if not Session.check_min_api_version('2.3'):
            LoggerRoot.get_base_logger().warning(
                'Artifacts not supported by your TRAINS-server version, '
                'please upgrade to the latest server version')
            return False

        if name in self._artifacts_container:
            raise ValueError(
                "Artifact by the name of {} is already registered, use register_artifact"
                .format(name))

        artifact_type_data = tasks.ArtifactTypeData()
        override_filename_in_uri = None
        override_filename_ext_in_uri = None
        uri = None
        if np and isinstance(artifact_object, np.ndarray):
            artifact_type = 'numpy'
            artifact_type_data.content_type = 'application/numpy'
            artifact_type_data.preview = str(artifact_object.__repr__())
            override_filename_ext_in_uri = '.npz'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            np.savez_compressed(local_filename, **{name: artifact_object})
            delete_after_upload = True
        elif pd and isinstance(artifact_object, pd.DataFrame):
            artifact_type = 'pandas'
            artifact_type_data.content_type = 'text/csv'
            artifact_type_data.preview = str(artifact_object.__repr__())
            override_filename_ext_in_uri = self._save_format
            override_filename_in_uri = name
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.to_csv(local_filename,
                                   compression=self._compression)
            delete_after_upload = True
        elif isinstance(artifact_object, Image.Image):
            artifact_type = 'image'
            artifact_type_data.content_type = 'image/png'
            desc = str(artifact_object.__repr__())
            artifact_type_data.preview = desc[1:desc.find(' at ')]
            override_filename_ext_in_uri = '.png'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.save(local_filename)
            delete_after_upload = True
        elif isinstance(artifact_object, dict):
            artifact_type = 'JSON'
            artifact_type_data.content_type = 'application/json'
            preview = json.dumps(artifact_object, sort_keys=True, indent=4)
            override_filename_ext_in_uri = '.json'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.write(fd, bytes(preview.encode()))
            os.close(fd)
            artifact_type_data.preview = preview
            delete_after_upload = True
        elif isinstance(artifact_object, six.string_types) and urlparse(
                artifact_object).scheme in remote_driver_schemes:
            # we should not upload this, just register
            local_filename = None
            uri = artifact_object
            artifact_type = 'custom'
            artifact_type_data.content_type = mimetypes.guess_type(
                artifact_object)[0]
        elif isinstance(artifact_object, six.string_types + (Path, )):
            # check if single file
            artifact_object = Path(artifact_object)

            artifact_object.expanduser().absolute()
            try:
                create_zip_file = not artifact_object.is_file()
            except Exception:  # Hack for windows pathlib2 bug, is_file isn't valid.
                create_zip_file = True
            else:  # We assume that this is not Windows os
                if artifact_object.is_dir():
                    # change to wildcard
                    artifact_object /= '*'

            if create_zip_file:
                folder = Path('').joinpath(*artifact_object.parts[:-1])
                if not folder.is_dir() or not folder.parts:
                    raise ValueError(
                        "Artifact file/folder '{}' could not be found".format(
                            artifact_object.as_posix()))

                wildcard = artifact_object.parts[-1]
                files = list(Path(folder).rglob(wildcard))
                override_filename_ext_in_uri = '.zip'
                override_filename_in_uri = folder.parts[
                    -1] + override_filename_ext_in_uri
                fd, zip_file = mkstemp(
                    prefix=quote(folder.parts[-1], safe="") + '.',
                    suffix=override_filename_ext_in_uri)
                try:
                    artifact_type_data.content_type = 'application/zip'
                    artifact_type_data.preview = 'Archive content {}:\n'.format(
                        artifact_object.as_posix())

                    with ZipFile(zip_file,
                                 'w',
                                 allowZip64=True,
                                 compression=ZIP_DEFLATED) as zf:
                        for filename in sorted(files):
                            if filename.is_file():
                                relative_file_name = filename.relative_to(
                                    folder).as_posix()
                                artifact_type_data.preview += '{} - {}\n'.format(
                                    relative_file_name,
                                    humanfriendly.format_size(
                                        filename.stat().st_size))
                                zf.write(filename.as_posix(),
                                         arcname=relative_file_name)
                except Exception as e:
                    # failed uploading folder:
                    LoggerRoot.get_base_logger().warning(
                        'Exception {}\nFailed zipping artifact folder {}'.
                        format(folder, e))
                    return None
                finally:
                    os.close(fd)

                artifact_object = zip_file
                artifact_type = 'archive'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
                delete_after_upload = True
            else:
                if not artifact_object.is_file():
                    raise ValueError(
                        "Artifact file '{}' could not be found".format(
                            artifact_object.as_posix()))

                override_filename_in_uri = artifact_object.parts[-1]
                artifact_object = artifact_object.as_posix()
                artifact_type = 'custom'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
        else:
            raise ValueError("Artifact type {} not supported".format(
                type(artifact_object)))

        # remove from existing list, if exists
        for artifact in self._task_artifact_list:
            if artifact.key == name:
                if artifact.type == self._pd_artifact_type:
                    raise ValueError(
                        "Artifact of name {} already registered, "
                        "use register_artifact instead".format(name))

                self._task_artifact_list.remove(artifact)
                break

        if not local_filename:
            file_size = None
            file_hash = None
        else:
            # check that the file to upload exists
            local_filename = Path(local_filename).absolute()
            if not local_filename.exists() or not local_filename.is_file():
                LoggerRoot.get_base_logger().warning(
                    'Artifact upload failed, cannot find file {}'.format(
                        local_filename.as_posix()))
                return False

            file_hash, _ = self.sha256sum(local_filename.as_posix())
            file_size = local_filename.stat().st_size

            uri = self._upload_local_file(
                local_filename,
                name,
                delete_after_upload=delete_after_upload,
                override_filename=override_filename_in_uri,
                override_filename_ext=override_filename_ext_in_uri)

        timestamp = int(time())

        artifact = tasks.Artifact(
            key=name,
            type=artifact_type,
            uri=uri,
            content_size=file_size,
            hash=file_hash,
            timestamp=timestamp,
            type_data=artifact_type_data,
            display_data=[(str(k), str(v))
                          for k, v in metadata.items()] if metadata else None)

        # update task artifacts
        with self._task_edit_lock:
            self._task_artifact_list.append(artifact)
            self._task.set_artifacts(self._task_artifact_list)

        return True
Example #30
0
class FolderCache(object):
    _lock_filename = '.clearml.lock'
    _lock_timeout_seconds = 30
    _temp_entry_prefix = '_temp.'

    def __init__(self,
                 cache_folder,
                 max_cache_entries=5,
                 min_free_space_gb=None):
        self._cache_folder = Path(
            os.path.expandvars(cache_folder)).expanduser().absolute()
        self._cache_folder.mkdir(parents=True, exist_ok=True)
        self._max_cache_entries = max_cache_entries
        self._last_copied_entry_folder = None
        self._min_free_space_gb = min_free_space_gb if min_free_space_gb and min_free_space_gb > 0 else None
        self._lock = FileLock(
            (self._cache_folder / self._lock_filename).as_posix())

    def get_cache_folder(self):
        # type: () -> Path
        """
        :return: Return the base cache folder
        """
        return self._cache_folder

    def copy_cached_entry(self, keys, destination):
        # type: (List[str], Path) -> Optional[Path]
        """
        Copy a cached entry into a destination directory, if the cached entry does not exist return None
        :param keys:
        :param destination:
        :return: Target path, None if cached entry does not exist
        """
        self._last_copied_entry_folder = None
        if not keys:
            return None

        # lock so we make sure no one deletes it before we copy it
        # noinspection PyBroadException
        try:
            self._lock.acquire(timeout=self._lock_timeout_seconds)
        except BaseException as ex:
            warning('Could not lock cache folder {}: {}'.format(
                self._cache_folder, ex))
            return None

        src = None
        try:
            src = self.get_entry(keys)
            if src:
                destination = Path(destination).absolute()
                destination.mkdir(parents=True, exist_ok=True)
                shutil.rmtree(destination.as_posix())
                shutil.copytree(src.as_posix(),
                                dst=destination.as_posix(),
                                symlinks=True)
        except BaseException as ex:
            warning('Could not copy cache folder {} to {}: {}'.format(
                src, destination, ex))
            self._lock.release()
            return None

        # release Lock
        self._lock.release()

        self._last_copied_entry_folder = src
        return destination if src else None

    def get_entry(self, keys):
        # type: (List[str]) -> Optional[Path]
        """
        Return a folder (a sub-folder of inside the cache_folder) matching one of the keys
        :param keys: List of keys, return the first match to one of the keys, notice keys cannot contain '.'
        :return: Path to the sub-folder or None if none was found
        """
        if not keys:
            return None
        # conform keys
        keys = [keys] if isinstance(keys, str) else keys
        keys = sorted([k.replace('.', '_') for k in keys])
        for cache_folder in self._cache_folder.glob('*'):
            if cache_folder.is_dir() and any(
                    True for k in cache_folder.name.split('.') if k in keys):
                cache_folder.touch()
                return cache_folder
        return None

    def add_entry(self, keys, source_folder, exclude_sub_folders=None):
        # type: (List[str], Path, Optional[Sequence[str]]) -> bool
        """
        Add a local folder into the cache, copy all sub-folders inside `source_folder`
        excluding folders matching `exclude_sub_folders` list
        :param keys: Cache entry keys list (str)
        :param source_folder: Folder to copy into the cache
        :param exclude_sub_folders: List of sub-folders to exclude from the copy operation
        :return: return True is new entry was added to cache
        """
        if not keys:
            return False

        keys = [keys] if isinstance(keys, str) else keys
        keys = sorted([k.replace('.', '_') for k in keys])

        # If entry already exists skip it
        cached_entry = self.get_entry(keys)
        if cached_entry:
            # make sure the entry contains all keys
            cached_keys = cached_entry.name.split('.')
            if set(keys) - set(cached_keys):
                # noinspection PyBroadException
                try:
                    self._lock.acquire(timeout=self._lock_timeout_seconds)
                except BaseException as ex:
                    warning('Could not lock cache folder {}: {}'.format(
                        self._cache_folder, ex))
                    # failed locking do nothing
                    return True
                keys = sorted(list(set(keys) | set(cached_keys)))
                dst = cached_entry.parent / '.'.join(keys)
                # rename
                try:
                    shutil.move(src=cached_entry.as_posix(),
                                dst=dst.as_posix())
                except BaseException as ex:
                    warning('Could not rename cache entry {} to {}: ex'.format(
                        cached_entry.as_posix(), dst.as_posix(), ex))
                # release lock
                self._lock.release()
            return True

        # make sure we remove old entries
        self._remove_old_entries()

        # if we do not have enough free space, do nothing.
        if not self._check_min_free_space():
            warning(
                'Could not add cache entry, not enough free space on drive, '
                'free space threshold {} GB. Clearing all cache entries!'.
                format(self._min_free_space_gb))
            self._remove_old_entries(max_cache_entries=0)
            return False

        # create the new entry for us
        exclude_sub_folders = exclude_sub_folders or []
        source_folder = Path(source_folder).absolute()
        # create temp folder
        temp_folder = \
            self._temp_entry_prefix + \
            '{}.{}'.format(str(time()).replace('.', '_'), str(random()).replace('.', '_'))
        temp_folder = self._cache_folder / temp_folder
        temp_folder.mkdir(parents=True, exist_ok=False)

        for f in source_folder.glob('*'):
            if f.name in exclude_sub_folders:
                continue
            shutil.copytree(src=f.as_posix(),
                            dst=(temp_folder / f.name).as_posix(),
                            symlinks=True)

        # rename the target folder
        target_cache_folder = self._cache_folder / '.'.join(keys)
        # if we failed moving it means someone else created the cached entry before us, we can just leave
        # noinspection PyBroadException
        try:
            shutil.move(src=temp_folder.as_posix(),
                        dst=target_cache_folder.as_posix())
        except BaseException:
            # noinspection PyBroadException
            try:
                shutil.rmtree(path=temp_folder.as_posix())
            except BaseException:
                return False

        return True

    def get_last_copied_entry(self):
        # type: () -> Optional[Path]
        """
        :return: the last copied cached entry folder inside the cache
        """
        return self._last_copied_entry_folder

    def _remove_old_entries(self, max_cache_entries=None):
        # type: (Optional[int]) -> ()
        """
        Notice we only keep self._max_cache_entries-1, assuming we will be adding a new entry soon
        :param int max_cache_entries: if not None use instead of self._max_cache_entries
        """
        folder_entries = [
            (cache_folder, cache_folder.stat().st_mtime)
            for cache_folder in self._cache_folder.glob('*')
            if cache_folder.is_dir()
            and not cache_folder.name.startswith(self._temp_entry_prefix)
        ]
        folder_entries = sorted(folder_entries,
                                key=lambda x: x[1],
                                reverse=True)

        # lock so we make sure no one deletes it before we copy it
        # noinspection PyBroadException
        try:
            self._lock.acquire(timeout=self._lock_timeout_seconds)
        except BaseException as ex:
            warning('Could not lock cache folder {}: {}'.format(
                self._cache_folder, ex))
            return

        number_of_entries_to_keep = self._max_cache_entries - 1 \
            if max_cache_entries is None else max(0, int(max_cache_entries))
        for folder, ts in folder_entries[number_of_entries_to_keep:]:
            try:
                shutil.rmtree(folder.as_posix(), ignore_errors=True)
            except BaseException as ex:
                warning('Could not delete cache entry {}: {}'.format(
                    folder.as_posix(), ex))

        self._lock.release()

    def _check_min_free_space(self):
        # type: () -> bool
        """
        :return: return False if we hit the free space limit.
        If not free space limit provided, always return True
        """
        if not self._min_free_space_gb or not self._cache_folder:
            return True
        free_space = float(
            psutil.disk_usage(self._cache_folder.as_posix()).free)
        free_space /= 2**30
        return free_space > self._min_free_space_gb
Example #31
0
    def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False):
        if not Session.check_min_api_version('2.3'):
            LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, '
                                                 'please upgrade to the latest server version')
            return False

        if name in self._artifacts_dict:
            raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name))

        artifact_type_data = tasks.ArtifactTypeData()
        use_filename_in_uri = True
        if np and isinstance(artifact_object, np.ndarray):
            artifact_type = 'numpy'
            artifact_type_data.content_type = 'application/numpy'
            artifact_type_data.preview = str(artifact_object.__repr__())
            fd, local_filename = mkstemp(suffix='.npz')
            os.close(fd)
            np.savez_compressed(local_filename, **{name: artifact_object})
            delete_after_upload = True
            use_filename_in_uri = False
        elif pd and isinstance(artifact_object, pd.DataFrame):
            artifact_type = 'pandas'
            artifact_type_data.content_type = 'text/csv'
            artifact_type_data.preview = str(artifact_object.__repr__())
            fd, local_filename = mkstemp(suffix=self._save_format)
            os.close(fd)
            artifact_object.to_csv(local_filename, compression=self._compression)
            delete_after_upload = True
            use_filename_in_uri = False
        elif isinstance(artifact_object, Image.Image):
            artifact_type = 'image'
            artifact_type_data.content_type = 'image/png'
            desc = str(artifact_object.__repr__())
            artifact_type_data.preview = desc[1:desc.find(' at ')]
            fd, local_filename = mkstemp(suffix='.png')
            os.close(fd)
            artifact_object.save(local_filename)
            delete_after_upload = True
            use_filename_in_uri = False
        elif isinstance(artifact_object, dict):
            artifact_type = 'JSON'
            artifact_type_data.content_type = 'application/json'
            preview = json.dumps(artifact_object, sort_keys=True, indent=4)
            fd, local_filename = mkstemp(suffix='.json')
            os.write(fd, bytes(preview.encode()))
            os.close(fd)
            artifact_type_data.preview = preview
            delete_after_upload = True
            use_filename_in_uri = False
        elif isinstance(artifact_object, six.string_types) or isinstance(artifact_object, Path):
            if isinstance(artifact_object, Path):
                artifact_object = artifact_object.as_posix()
            artifact_type = 'custom'
            artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
            local_filename = artifact_object
        else:
            raise ValueError("Artifact type {} not supported".format(type(artifact_object)))

        # remove from existing list, if exists
        for artifact in self._task_artifact_list:
            if artifact.key == name:
                if artifact.type == self._pd_artifact_type:
                    raise ValueError("Artifact of name {} already registered, "
                                     "use register_artifact instead".format(name))

                self._task_artifact_list.remove(artifact)
                break

        # check that the file to upload exists
        local_filename = Path(local_filename).absolute()
        if not local_filename.exists() or not local_filename.is_file():
            LoggerRoot.get_base_logger().warning('Artifact upload failed, cannot find file {}'.format(
                local_filename.as_posix()))
            return False

        file_hash, _ = self.sha256sum(local_filename.as_posix())
        timestamp = int(time())
        file_size = local_filename.stat().st_size

        uri = self._upload_local_file(local_filename, name,
                                      delete_after_upload=delete_after_upload, use_filename=use_filename_in_uri)

        artifact = tasks.Artifact(key=name, type=artifact_type,
                                  uri=uri,
                                  content_size=file_size,
                                  hash=file_hash,
                                  timestamp=timestamp,
                                  type_data=artifact_type_data,
                                  display_data=[(str(k), str(v)) for k, v in metadata.items()] if metadata else None)

        # update task artifacts
        with self._task_edit_lock:
            self._task_artifact_list.append(artifact)
            self._task.set_artifacts(self._task_artifact_list)

        return True
Example #32
0
    def _extract_to_cache(cls, cached_file, name):
        """
        Extract cached file zip file to cache folder
        :param str cached_file: local copy of archive file
        :param str name: cache context
        :return: cached folder containing the extracted archive content
        """
        # only zip files
        if not cached_file or not str(cached_file).lower().endswith('.zip'):
            return cached_file

        cached_folder = Path(cached_file).parent
        archive_suffix = cached_file.rpartition(".")[0]
        name = encode_string_to_filename(name)
        target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name))
        if target_folder.exists():
            # noinspection PyBroadException
            try:
                target_folder.touch(exist_ok=True)
                return target_folder
            except Exception:
                pass

        base_logger = LoggerRoot.get_base_logger()
        try:
            temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
                target_folder.name, time() * 1000, str(random()).replace('.', ''))
            temp_target_folder.mkdir(parents=True, exist_ok=True)
            ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
            # we assume we will have such folder if we already extract the zip file
            # noinspection PyBroadException
            try:
                # if rename fails, it means that someone else already manged to extract the zip, delete the current
                # folder and return the already existing cached zip folder
                shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
            except Exception:
                if target_folder.exists():
                    target_folder.touch(exist_ok=True)
                else:
                    base_logger.warning(
                        "Failed renaming {0} to {1}".format(
                            temp_target_folder, target_folder
                        )
                    )
                try:
                    shutil.rmtree(temp_target_folder)
                except Exception as ex:
                    base_logger.warning(
                        "Exception {}\nFailed deleting folder {}".format(
                            ex, temp_target_folder
                        )
                    )
        except Exception as ex:
            # failed extracting zip file:
            base_logger.warning(
                "Exception {}\nFailed extracting zip file {}".format(ex, cached_file)
            )
            # noinspection PyBroadException
            try:
                target_folder.rmdir()
            except Exception:
                pass
            return cached_file
        return target_folder