def _get_testing_directories(self): """Get the testing directories.""" dirs = [connector.path for connector in connectors.for_storage("data")] dirs += [ connector.path for connector in connectors.for_storage("upload") ] dirs += [ storage_settings.FLOW_VOLUMES[volume_name]["config"]["path"] for volume_name in ["processing", "input"] if volume_name in storage_settings.FLOW_VOLUMES ] return dirs
def setUp(self): """Initialize test data.""" super().setUp() _, stderr = self._register_schemas() stderr = stderr.getvalue() if stderr: raise RuntimeError(stderr) upload_connectors = [ connector for connector in connectors.for_storage("upload") if connector.mountable ] assert upload_connectors, "No upload connector defined on filesystem" self.upload_dir = upload_connectors[0].path self._profiler = TestProfiler(self) self._preparation_stage = 0 self._executed_processes = set() self._files_path = None self._upload_files = [] # create upload dir if it doesn't exist if not os.path.isdir(self.upload_dir): os.mkdir(self.upload_dir)
def list(self, request): """Return the JSON representing the upload configuration. The returning object is JSON representation of the dictionary with the following fields: - type: the type of upload connector. Currently we support 'LOCAL', and 'S3' connector types. Upload through server is always supported. - credentials: the dictionary representing the set of credentials that are used to upload data. This dictionary is specific to connector type and may be empty. """ try: upload_connector = connectors.for_storage("upload")[0] prefix = str(get_user(request.user).id) response = { "type": upload_connector.CONNECTOR_TYPE.name, "config": upload_connector.temporary_credentials(prefix), } except Exception: message = "Upload connector could not be determined." logger.exception(message) raise ImproperlyConfigured(message) return Response(response)
def _lock_inputs_local_storage_locations(self, data: Data): """Lock storage locations for inputs. Lock storage locations of inputs so they are not deleted while data object is processing. """ data_connectors = connectors.for_storage("data") mountable_data_connectors = [ connector for connector in data_connectors if connector.mountable ] priority_range = data_connectors[-1].priority - data_connectors[0].priority + 1 connector_priorities = { connector.name: connector.priority for connector in data_connectors } # Prefer mountable locations but keep their relations intact. for connector in mountable_data_connectors: connector_priorities[connector.name] -= priority_range whens = [ models.When(connector_name=connector_name, then=priority) for connector_name, priority in connector_priorities.items() ] storage_location_subquery = ( StorageLocation.objects.filter(file_storage_id=OuterRef("file_storage_id")) .annotate( priority=models.Case( *whens, default=DEFAULT_CONNECTOR_PRIORITY, output_field=models.IntegerField(), ) ) .order_by("priority") .values_list("id", flat=True)[:1] ) file_storages = ( DataDependency.objects.filter(child=data, kind=DataDependency.KIND_IO) .values_list("parent__location", flat=True) .distinct() ) storage_locations = ( StorageLocation.objects.filter(file_storage__in=file_storages) .filter(pk__in=Subquery(storage_location_subquery)) .values_list("id", flat=True) ) AccessLog.objects.bulk_create( [ AccessLog( storage_location_id=storage_location, reason="Input for data with id {}".format(data.id), cause=data, ) for storage_location in storage_locations ] )
def resolve_data_path(self, data=None, filename=None): """Resolve data path for use with the executor. :param data: Data object instance :param filename: Filename to resolve :return: Resolved filename, which can be used to access the given data file in programs executed using this executor :raises RuntimeError: when data path can not be resolved. """ storage_name = "data" filesystem_connectors = [ connector for connector in connectors.for_storage(storage_name) if connector.mountable ] if data is None: if not filesystem_connectors: return constants.INPUTS_VOLUME else: return f"/{storage_name}_{filesystem_connectors[0].name}" data_connectors = data.location.connectors for connector in filesystem_connectors: if connector in data_connectors: return data.location.get_path( prefix=f"/{storage_name}_{connector.name}", filename=filename) return data.location.get_path(prefix=constants.INPUTS_VOLUME, filename=filename)
def get_mountable_connectors() -> Iterable[Tuple[str, BaseStorageConnector]]: """Iterate through all the storages and find mountable connectors. :returns: list of tuples (storage_name, connector). """ return [(storage_name, connector) for storage_name in storage_settings.FLOW_STORAGE for connector in connectors.for_storage(storage_name) if connector.mountable]
def _prepare_settings(): """Prepare and apply settings/port overrides needed for testing. Override necessary settings and binds to a free port that will be used in listener. :returns: tuple (overrides, port). """ # Override container name prefix setting. mountable_data_connectors = [ connector for connector in connectors.for_storage("data") if connector.mountable ] resolwe_settings.FLOW_EXECUTOR_SETTINGS[ "CONTAINER_NAME_PREFIX" ] = "{}_{}_{}".format( getattr(settings, "FLOW_EXECUTOR", {}).get("CONTAINER_NAME_PREFIX", "resolwe"), # NOTE: This is necessary to avoid container name clashes when tests are run from # different Resolwe code bases on the same system (e.g. on a CI server). get_random_string(length=6), os.path.basename(mountable_data_connectors[0].path), ) hosts = list( settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}) .get("hosts", {"local": "127.0.0.1"}) .values() ) protocol = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get( "protocol", "tcp" ) min_port = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get( "min_port", 50000 ) max_port = settings.FLOW_EXECUTOR.get("LISTENER_CONNECTION", {}).get( "max_port", 60000 ) zmq_context: zmq.asyncio.Context = zmq.asyncio.Context.instance() zmq_socket: zmq.asyncio.Socket = zmq_context.socket(zmq.ROUTER) zmq_socket.setsockopt(zmq.ROUTER_HANDOVER, 1) host = hosts[0] port = zmq_socket.bind_to_random_port( f"{protocol}://{host}", min_port=min_port, max_port=max_port ) for host in hosts[1:]: zmq_socket.bind(f"{protocol}://{host}:{port}") # Set the port in the settings. resolwe_settings.FLOW_EXECUTOR_SETTINGS["LISTENER_CONNECTION"]["port"] = port overrides = override_settings( CELERY_ALWAYS_EAGER=True, FLOW_EXECUTOR=resolwe_settings.FLOW_EXECUTOR_SETTINGS, FLOW_MANAGER=resolwe_settings.FLOW_MANAGER_SETTINGS, ) return (overrides, zmq_socket)
def get_upload_dir() -> str: """Get the upload path. : returns: the path of the first mountable connector for storage 'upload'. :raises RuntimeError: if no applicable connector is found. """ for connector in connectors.for_storage("upload"): if connector.mountable: return f"/upload_{connector.name}" raise RuntimeError("No mountable upload connector is defined.")
def handle_resolve_data_path(self, message: Message[int], manager: "Processor") -> Response[str]: """Return the base path that stores given data.""" data_pk = message.message_data if data_pk not in self._hydrate_cache: mount_point = os.fspath(constants.INPUTS_VOLUME) data_connectors = FileStorage.objects.get( data__pk=data_pk).connectors for connector in connectors.for_storage("data"): if connector in data_connectors: mount_point = f"/data_{connector.name}" break self._hydrate_cache[data_pk] = mount_point return message.respond_ok(self._hydrate_cache[data_pk])
def handle(self, *args, **kwargs): """Cleanup files created during testing.""" directories = [ Path(connector.path) for connector in chain(connectors.for_storage("data"), connectors.for_storage("upload")) if connector.mountable ] directories += [ Path(volume_config["config"]["path"]) for volume_name, volume_config in storage_settings.FLOW_VOLUMES.items() if not volume_config["config"].get("read_only", False) and volume_config["type"] == "host_path" ] for directory in directories: directory = directory.resolve() for test_dir in directory.iterdir(): if not test_dir.is_dir(): continue if not re.match(TEST_DIR_REGEX, test_dir.name): continue shutil.rmtree(test_dir)
def resolve_data_path(self, data=None, filename=None): """Resolve data path for use with the executor. :param data: Data object instance :param filename: Filename to resolve :return: Resolved filename, which can be used to access the given data file in programs executed using this executor """ if data is None: filesystem_connectors = [ connector for connector in connectors.for_storage("data") if connector.mountable ] return filesystem_connectors[0].path if filesystem_connectors else None return data.location.get_path(filename=filename)
def create_data_location(subpath=None): """Create equivalent of old DataLocation object. When argument is None, store the ID of the file storage object in the subpath. """ file_storage = FileStorage.objects.create() if subpath is None: subpath = file_storage.pk data_connector = [ connector.name for connector in connectors.for_storage("data") if connector.mountable ][0] StorageLocation.objects.create( url=subpath, file_storage=file_storage, connector_name=data_connector ) return file_storage
def resolve_upload_path(self, filename=None): """Resolve upload path for use with the executor. :param filename: Filename to resolve :return: Resolved filename, which can be used to access the given uploaded file in programs executed using this executor """ upload_connectors = [ connector for connector in connectors.for_storage("upload") if connector.mountable ] if not upload_connectors: raise RuntimeError( "No connectors are configured for 'upload' storage.") upload_connector = upload_connectors[0] if filename is None: return f"/upload_{upload_connector.name}" return f"/upload_{upload_connector.name}/{filename}"
def resolve_upload_path(self, filename=None): """Resolve upload path for use with the executor. :param filename: Filename to resolve :return: Resolved filename, which can be used to access the given uploaded file in programs executed using this executor :raises RuntimeError: when no storage connectors are configured for upload storage or path could not be resolved. """ upload_connectors = [ connector for connector in connectors.for_storage("upload") if connector.mountable ] if not upload_connectors: raise RuntimeError("No connectors are configured for 'upload' storage.") upload_connector = upload_connectors[0] if filename is None: return upload_connector.path else: return os.path.join(upload_connector.path, filename)
def handle_missing_data_locations( self, message: Message, manager: "Processor" ) -> Response[Union[str, Dict[str, Dict[str, Any]]]]: """Handle an incoming request to get missing data locations.""" storage_name = "data" filesystem_connector = None filesystem_connectors = [ connector for connector in connectors.for_storage(storage_name) if connector.mountable ] if filesystem_connectors: filesystem_connector = filesystem_connectors[0] missing_data = dict() dependencies = (Data.objects.filter( children_dependency__child=manager.data, children_dependency__kind=DataDependency.KIND_IO, ).exclude(location__isnull=True).exclude( pk=manager.data.id).distinct()) for parent in dependencies: file_storage = parent.location # Is location available on some local connector? if any( file_storage.has_storage_location( filesystem_connector.name) for filesystem_connector in filesystem_connectors): continue from_location = file_storage.default_storage_location if from_location is None: manager._log_exception( "No storage location exists (handle_get_missing_data_locations).", extra={"file_storage_id": file_storage.id}, ) return message.respond_error("No storage location exists") # When there exists at least one filesystem connector for the data # storage download inputs to the shared storage. missing_data_item = { "data_id": parent.pk, "from_connector": from_location.connector_name, } if filesystem_connector: to_location = StorageLocation.all_objects.get_or_create( file_storage=file_storage, url=from_location.url, connector_name=filesystem_connector.name, )[0] missing_data_item[ "from_storage_location_id"] = from_location.id missing_data_item["to_storage_location_id"] = to_location.id missing_data_item["to_connector"] = filesystem_connector.name else: missing_data_item["files"] = list( ReferencedPath.objects.filter( storage_locations=from_location).values()) missing_data[from_location.url] = missing_data_item # Set last modified time so it does not get deleted. from_location.last_update = now() from_location.save() return message.respond_ok(missing_data)