Exemple #1
0
 def _get_testing_directories(self):
     """Get the testing directories."""
     dirs = [connector.path for connector in connectors.for_storage("data")]
     dirs += [
         connector.path for connector in connectors.for_storage("upload")
     ]
     dirs += [
         storage_settings.FLOW_VOLUMES[volume_name]["config"]["path"]
         for volume_name in ["processing", "input"]
         if volume_name in storage_settings.FLOW_VOLUMES
     ]
     return dirs
Exemple #2
0
    def setUp(self):
        """Initialize test data."""
        super().setUp()

        _, stderr = self._register_schemas()
        stderr = stderr.getvalue()
        if stderr:
            raise RuntimeError(stderr)

        upload_connectors = [
            connector for connector in connectors.for_storage("upload")
            if connector.mountable
        ]
        assert upload_connectors, "No upload connector defined on filesystem"

        self.upload_dir = upload_connectors[0].path
        self._profiler = TestProfiler(self)
        self._preparation_stage = 0
        self._executed_processes = set()
        self._files_path = None
        self._upload_files = []

        # create upload dir if it doesn't exist
        if not os.path.isdir(self.upload_dir):
            os.mkdir(self.upload_dir)
Exemple #3
0
    def list(self, request):
        """Return the JSON representing the upload configuration.

        The returning object is JSON representation of the dictionary with the
        following fields:

        - type: the type of upload connector. Currently we
          support 'LOCAL', and 'S3' connector types. Upload through server is
          always supported.

        - credentials: the dictionary representing the set of credentials that
          are used to upload data. This dictionary is specific to connector
          type and may be empty.
        """
        try:
            upload_connector = connectors.for_storage("upload")[0]
            prefix = str(get_user(request.user).id)
            response = {
                "type": upload_connector.CONNECTOR_TYPE.name,
                "config": upload_connector.temporary_credentials(prefix),
            }
        except Exception:
            message = "Upload connector could not be determined."
            logger.exception(message)
            raise ImproperlyConfigured(message)

        return Response(response)
Exemple #4
0
    def _lock_inputs_local_storage_locations(self, data: Data):
        """Lock storage locations for inputs.

        Lock storage locations of inputs so they are not deleted while data
        object is processing.
        """
        data_connectors = connectors.for_storage("data")
        mountable_data_connectors = [
            connector for connector in data_connectors if connector.mountable
        ]
        priority_range = data_connectors[-1].priority - data_connectors[0].priority + 1

        connector_priorities = {
            connector.name: connector.priority for connector in data_connectors
        }

        # Prefer mountable locations but keep their relations intact.
        for connector in mountable_data_connectors:
            connector_priorities[connector.name] -= priority_range

        whens = [
            models.When(connector_name=connector_name, then=priority)
            for connector_name, priority in connector_priorities.items()
        ]

        storage_location_subquery = (
            StorageLocation.objects.filter(file_storage_id=OuterRef("file_storage_id"))
            .annotate(
                priority=models.Case(
                    *whens,
                    default=DEFAULT_CONNECTOR_PRIORITY,
                    output_field=models.IntegerField(),
                )
            )
            .order_by("priority")
            .values_list("id", flat=True)[:1]
        )

        file_storages = (
            DataDependency.objects.filter(child=data, kind=DataDependency.KIND_IO)
            .values_list("parent__location", flat=True)
            .distinct()
        )

        storage_locations = (
            StorageLocation.objects.filter(file_storage__in=file_storages)
            .filter(pk__in=Subquery(storage_location_subquery))
            .values_list("id", flat=True)
        )

        AccessLog.objects.bulk_create(
            [
                AccessLog(
                    storage_location_id=storage_location,
                    reason="Input for data with id {}".format(data.id),
                    cause=data,
                )
                for storage_location in storage_locations
            ]
        )
Exemple #5
0
    def resolve_data_path(self, data=None, filename=None):
        """Resolve data path for use with the executor.

        :param data: Data object instance
        :param filename: Filename to resolve
        :return: Resolved filename, which can be used to access the
            given data file in programs executed using this executor
        :raises RuntimeError: when data path can not be resolved.
        """
        storage_name = "data"
        filesystem_connectors = [
            connector for connector in connectors.for_storage(storage_name)
            if connector.mountable
        ]

        if data is None:
            if not filesystem_connectors:
                return constants.INPUTS_VOLUME
            else:
                return f"/{storage_name}_{filesystem_connectors[0].name}"

        data_connectors = data.location.connectors
        for connector in filesystem_connectors:
            if connector in data_connectors:
                return data.location.get_path(
                    prefix=f"/{storage_name}_{connector.name}",
                    filename=filename)

        return data.location.get_path(prefix=constants.INPUTS_VOLUME,
                                      filename=filename)
Exemple #6
0
def get_mountable_connectors() -> Iterable[Tuple[str, BaseStorageConnector]]:
    """Iterate through all the storages and find mountable connectors.

    :returns: list of tuples (storage_name, connector).
    """
    return [(storage_name, connector)
            for storage_name in storage_settings.FLOW_STORAGE
            for connector in connectors.for_storage(storage_name)
            if connector.mountable]
Exemple #7
0
def _prepare_settings():
    """Prepare and apply settings/port overrides needed for testing.

    Override necessary settings and binds to a free port that will be used in
    listener.

    :returns: tuple (overrides, port).
    """
    # Override container name prefix setting.
    mountable_data_connectors = [
        connector for connector in connectors.for_storage("data") if connector.mountable
    ]
    resolwe_settings.FLOW_EXECUTOR_SETTINGS[
        "CONTAINER_NAME_PREFIX"
    ] = "{}_{}_{}".format(
        getattr(settings, "FLOW_EXECUTOR", {}).get("CONTAINER_NAME_PREFIX", "resolwe"),
        # NOTE: This is necessary to avoid container name clashes when tests are run from
        # different Resolwe code bases on the same system (e.g. on a CI server).
        get_random_string(length=6),
        os.path.basename(mountable_data_connectors[0].path),
    )

    hosts = list(
        settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {})
        .get("hosts", {"local": "127.0.0.1"})
        .values()
    )
    protocol = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get(
        "protocol", "tcp"
    )
    min_port = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get(
        "min_port", 50000
    )
    max_port = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get(
        "max_port", 60000
    )

    zmq_context: zmq.asyncio.Context = zmq.asyncio.Context.instance()
    zmq_socket: zmq.asyncio.Socket = zmq_context.socket(zmq.ROUTER)
    zmq_socket.setsockopt(zmq.ROUTER_HANDOVER, 1)

    host = hosts[0]
    port = zmq_socket.bind_to_random_port(
        f"{protocol}://{host}", min_port=min_port, max_port=max_port
    )
    for host in hosts[1:]:
        zmq_socket.bind(f"{protocol}://{host}:{port}")

    # Set the port in the settings.
    resolwe_settings.FLOW_EXECUTOR_SETTINGS["LISTENER_CONNECTION"]["port"] = port

    overrides = override_settings(
        CELERY_ALWAYS_EAGER=True,
        FLOW_EXECUTOR=resolwe_settings.FLOW_EXECUTOR_SETTINGS,
        FLOW_MANAGER=resolwe_settings.FLOW_MANAGER_SETTINGS,
    )
    return (overrides, zmq_socket)
Exemple #8
0
def get_upload_dir() -> str:
    """Get the upload path.

    : returns: the path of the first mountable connector for storage
        'upload'.

    :raises RuntimeError: if no applicable connector is found.
    """
    for connector in connectors.for_storage("upload"):
        if connector.mountable:
            return f"/upload_{connector.name}"
    raise RuntimeError("No mountable upload connector is defined.")
Exemple #9
0
 def handle_resolve_data_path(self, message: Message[int],
                              manager: "Processor") -> Response[str]:
     """Return the base path that stores given data."""
     data_pk = message.message_data
     if data_pk not in self._hydrate_cache:
         mount_point = os.fspath(constants.INPUTS_VOLUME)
         data_connectors = FileStorage.objects.get(
             data__pk=data_pk).connectors
         for connector in connectors.for_storage("data"):
             if connector in data_connectors:
                 mount_point = f"/data_{connector.name}"
                 break
         self._hydrate_cache[data_pk] = mount_point
     return message.respond_ok(self._hydrate_cache[data_pk])
Exemple #10
0
    def handle(self, *args, **kwargs):
        """Cleanup files created during testing."""
        directories = [
            Path(connector.path)
            for connector in chain(connectors.for_storage("data"),
                                   connectors.for_storage("upload"))
            if connector.mountable
        ]
        directories += [
            Path(volume_config["config"]["path"]) for volume_name,
            volume_config in storage_settings.FLOW_VOLUMES.items()
            if not volume_config["config"].get("read_only", False)
            and volume_config["type"] == "host_path"
        ]

        for directory in directories:
            directory = directory.resolve()
            for test_dir in directory.iterdir():
                if not test_dir.is_dir():
                    continue
                if not re.match(TEST_DIR_REGEX, test_dir.name):
                    continue

                shutil.rmtree(test_dir)
Exemple #11
0
    def resolve_data_path(self, data=None, filename=None):
        """Resolve data path for use with the executor.

        :param data: Data object instance
        :param filename: Filename to resolve
        :return: Resolved filename, which can be used to access the
            given data file in programs executed using this executor
        """
        if data is None:
            filesystem_connectors = [
                connector
                for connector in connectors.for_storage("data")
                if connector.mountable
            ]
            return filesystem_connectors[0].path if filesystem_connectors else None

        return data.location.get_path(filename=filename)
Exemple #12
0
def create_data_location(subpath=None):
    """Create equivalent of old DataLocation object.

    When argument is None, store the ID of the file storage object in the
    subpath.
    """
    file_storage = FileStorage.objects.create()
    if subpath is None:
        subpath = file_storage.pk

    data_connector = [
        connector.name
        for connector in connectors.for_storage("data")
        if connector.mountable
    ][0]

    StorageLocation.objects.create(
        url=subpath, file_storage=file_storage, connector_name=data_connector
    )
    return file_storage
Exemple #13
0
    def resolve_upload_path(self, filename=None):
        """Resolve upload path for use with the executor.

        :param filename: Filename to resolve
        :return: Resolved filename, which can be used to access the
            given uploaded file in programs executed using this
            executor
        """
        upload_connectors = [
            connector for connector in connectors.for_storage("upload")
            if connector.mountable
        ]
        if not upload_connectors:
            raise RuntimeError(
                "No connectors are configured for 'upload' storage.")

        upload_connector = upload_connectors[0]
        if filename is None:
            return f"/upload_{upload_connector.name}"

        return f"/upload_{upload_connector.name}/{filename}"
Exemple #14
0
    def resolve_upload_path(self, filename=None):
        """Resolve upload path for use with the executor.

        :param filename: Filename to resolve
        :return: Resolved filename, which can be used to access the
            given uploaded file in programs executed using this
            executor

        :raises RuntimeError: when no storage connectors are configured for
          upload storage or path could not be resolved.
        """
        upload_connectors = [
            connector
            for connector in connectors.for_storage("upload")
            if connector.mountable
        ]
        if not upload_connectors:
            raise RuntimeError("No connectors are configured for 'upload' storage.")

        upload_connector = upload_connectors[0]
        if filename is None:
            return upload_connector.path
        else:
            return os.path.join(upload_connector.path, filename)
    def handle_missing_data_locations(
        self, message: Message, manager: "Processor"
    ) -> Response[Union[str, Dict[str, Dict[str, Any]]]]:
        """Handle an incoming request to get missing data locations."""
        storage_name = "data"
        filesystem_connector = None
        filesystem_connectors = [
            connector for connector in connectors.for_storage(storage_name)
            if connector.mountable
        ]

        if filesystem_connectors:
            filesystem_connector = filesystem_connectors[0]

        missing_data = dict()
        dependencies = (Data.objects.filter(
            children_dependency__child=manager.data,
            children_dependency__kind=DataDependency.KIND_IO,
        ).exclude(location__isnull=True).exclude(
            pk=manager.data.id).distinct())

        for parent in dependencies:
            file_storage = parent.location
            # Is location available on some local connector?
            if any(
                    file_storage.has_storage_location(
                        filesystem_connector.name)
                    for filesystem_connector in filesystem_connectors):
                continue

            from_location = file_storage.default_storage_location
            if from_location is None:
                manager._log_exception(
                    "No storage location exists (handle_get_missing_data_locations).",
                    extra={"file_storage_id": file_storage.id},
                )
                return message.respond_error("No storage location exists")

            # When there exists at least one filesystem connector for the data
            # storage download inputs to the shared storage.
            missing_data_item = {
                "data_id": parent.pk,
                "from_connector": from_location.connector_name,
            }
            if filesystem_connector:
                to_location = StorageLocation.all_objects.get_or_create(
                    file_storage=file_storage,
                    url=from_location.url,
                    connector_name=filesystem_connector.name,
                )[0]
                missing_data_item[
                    "from_storage_location_id"] = from_location.id
                missing_data_item["to_storage_location_id"] = to_location.id
                missing_data_item["to_connector"] = filesystem_connector.name
            else:
                missing_data_item["files"] = list(
                    ReferencedPath.objects.filter(
                        storage_locations=from_location).values())

            missing_data[from_location.url] = missing_data_item
            # Set last modified time so it does not get deleted.
            from_location.last_update = now()
            from_location.save()
        return message.respond_ok(missing_data)