Esempio n. 1
0
def replicate(
    client_src: CogniteClient,
    client_dst: CogniteClient,
    batch_size: int = 10000,
    num_threads: int = 1,
    delete_replicated_if_not_in_src: bool = False,
    delete_not_replicated_in_dst: bool = False,
    skip_unlinkable: bool = False,
    skip_nonasset: bool = False,
    target_external_ids: Optional[List[str]] = None,
    exclude_pattern: str = None,
):
    """
    Replicates all the files from the source project into the destination project.

    Args:
        client_src: The client corresponding to the source project.
        client_dst: The client corresponding to the destination project.
        batch_size: The biggest batch size to post chunks in.
        num_threads: The number of threads to be used.
        delete_replicated_if_not_in_src: If True, will delete replicated files that are in the destination,
        but no longer in the source project (Default=False).
        delete_not_replicated_in_dst: If True, will delete files from the destination if they were not replicated
        from the source (Default=False).
        skip_unlinkable: If no assets exist in the destination for a file, do not replicate it
        skip_nonasset: If a file has no associated assets, do not replicate it
        target_external_ids: List of specific files external ids to replicate
        exclude_pattern: Regex pattern; files whose names match will not be replicated
    """
    project_src = client_src.config.project
    project_dst = client_dst.config.project

    if target_external_ids:
        files_src = client_src.files.retrieve_multiple(
            external_ids=target_external_ids, ignore_unknown_ids=True)
        try:
            files_dst = client_dst.files.retrieve_multiple(
                external_ids=target_external_ids, ignore_unknown_ids=True)
        except CogniteNotFoundError:
            files_dst = FileMetadataList([])
    else:
        files_src = client_src.files.list(limit=None)
        files_dst = client_dst.files.list(limit=None)
        logging.info(
            f"There are {len(files_src)} existing files in source ({project_src})."
        )
        logging.info(
            f"There are {len(files_dst)} existing files in destination ({project_dst})."
        )

    src_id_dst_file = replication.make_id_object_map(files_dst)

    assets_dst = client_dst.assets.list(limit=None)
    src_dst_ids_assets = replication.existing_mapping(*assets_dst)
    logging.info(
        f"If a files asset ids is one of the {len(src_dst_ids_assets)} assets "
        f"that have been replicated then it will be linked.")

    compiled_re = None
    if exclude_pattern:
        compiled_re = re.compile(exclude_pattern)

    def filter_fn(ts):
        if exclude_pattern:
            return _is_copyable(ts) and compiled_re.search(
                ts.external_id) is None
        return _is_copyable(ts)

    if skip_unlinkable or skip_nonasset or exclude_pattern:
        pre_filter_length = len(files_src)
        files_src = replication.filter_objects(files_src, src_dst_ids_assets,
                                               skip_unlinkable, skip_nonasset,
                                               filter_fn)
        logging.info(
            f"Filtered out {pre_filter_length - len(files_src)} files. {len(files_src)} files remain."
        )

    replicated_runtime = int(time.time()) * 1000
    logging.info(
        f"These copied/updated files will have a replicated run time of: {replicated_runtime}."
    )

    logging.info(f"Starting to copy and update {len(files_src)} files from "
                 f"source ({project_src}) to destination ({project_dst}).")

    if len(files_src) > batch_size:
        replication.thread(
            num_threads=num_threads,
            batch_size=batch_size,
            copy=copy_files,
            src_objects=files_src,
            src_id_dst_obj=src_id_dst_file,
            src_dst_ids_assets=src_dst_ids_assets,
            project_src=project_src,
            replicated_runtime=replicated_runtime,
            client=client_dst,
            src_filter=files_dst,
        )
    else:
        copy_files(
            src_files=files_src,
            src_id_dst_file=src_id_dst_file,
            src_dst_ids_assets=src_dst_ids_assets,
            project_src=project_src,
            runtime=replicated_runtime,
            client=client_dst,
            src_filter=files_dst,
        )

    logging.info(f"Finished copying and updating {len(files_src)} files from "
                 f"source ({project_src}) to destination ({project_dst}).")

    if delete_replicated_if_not_in_src:
        ids_to_delete = replication.find_objects_to_delete_if_not_in_src(
            files_src, files_dst)
        client_dst.files.delete(id=ids_to_delete)
        logging.info(
            f"Deleted {len(ids_to_delete)} files in destination ({project_dst})"
            f" because they were no longer in source ({project_src})   ")
    if delete_not_replicated_in_dst:
        ids_to_delete = replication.find_objects_to_delete_not_replicated_in_dst(
            files_dst)
        client_dst.files.delete(id=ids_to_delete)
        logging.info(
            f"Deleted {len(ids_to_delete)} files in destination ({project_dst}) because"
            f"they were not replicated from source ({project_src})   ")
Esempio n. 2
0
    def upload(
        self,
        path: str,
        external_id: str = None,
        name: str = None,
        source: str = None,
        mime_type: str = None,
        metadata: Dict[str, Any] = None,
        asset_ids: List[int] = None,
        source_created_time: int = None,
        source_modified_time: int = None,
        recursive: bool = False,
        overwrite: bool = False,
    ) -> Union[FileMetadata, FileMetadataList]:
        """Upload a file

        Args:
            path (str): Path to the file you wish to upload. If path is a directory, this method will upload all files in that directory.
            external_id (str): The external ID provided by the client. Must be unique within the project.
            name (str): Name of the file.
            source (str): The source of the file.
            mime_type (str): File type. E.g. text/plain, application/pdf, ...
            metadata (Dict[str, Any]): Customizable extra data about the file. String key -> String value.
            asset_ids (List[int]): No description.
            source_created_time (int): The timestamp for when the file was originally created in the source system.
            source_modified_time (int): The timestamp for when the file was last modified in the source system.
            recursive (bool): If path is a directory, upload all contained files recursively.
            overwrite (bool): If 'overwrite' is set to true, and the POST body content specifies a 'externalId' field,
                fields for the file found for externalId can be overwritten. The default setting is false.
                If metadata is included in the request body, all of the original metadata will be overwritten.
                The actual file will be overwritten after successful upload. If there is no successful upload, the
                current file contents will be kept.
                File-Asset mappings only change if explicitly stated in the assetIds field of the POST json body.
                Do not set assetIds in request body if you want to keep the current file-asset mappings.

        Returns:
            Union[FileMetadata, FileMetadataList]: The file metadata of the uploaded file(s).

        Examples:

            Upload a file in a given path::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.files.upload("/path/to/file", name="my_file")

            If name is omitted, this method will use the name of the file

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.files.upload("/path/to/file")

            You can also upload all files in a directory by setting path to the path of a directory::

                >>> from cognite.client import CogniteClient
                >>> c = CogniteClient()
                >>> res = c.files.upload("/path/to/my/directory")

        """
        file_metadata = FileMetadata(
            name=name,
            external_id=external_id,
            source=source,
            mime_type=mime_type,
            metadata=metadata,
            asset_ids=asset_ids,
            source_created_time=source_created_time,
            source_modified_time=source_modified_time,
        )
        if os.path.isfile(path):
            if not name:
                file_metadata.name = os.path.basename(path)
            return self._upload_file_from_path(file_metadata, path, overwrite)
        elif os.path.isdir(path):
            tasks = []
            if recursive:
                for root, _, files in os.walk(path):
                    for file in files:
                        file_path = os.path.join(root, file)
                        basename = os.path.basename(file_path)
                        tasks.append((FileMetadata(name=basename), file_path,
                                      overwrite))
            else:
                for file_name in os.listdir(path):
                    file_path = os.path.join(path, file_name)
                    if os.path.isfile(file_path):
                        tasks.append((FileMetadata(name=file_name), file_path,
                                      overwrite))
            tasks_summary = utils._concurrency.execute_tasks_concurrently(
                self._upload_file_from_path, tasks, self._config.max_workers)
            tasks_summary.raise_compound_exception_if_failed_tasks(
                task_unwrap_fn=lambda x: x[0].name)
            return FileMetadataList(tasks_summary.results)
        raise ValueError("path '{}' does not exist".format(path))