コード例 #1
0
ファイル: input_data.py プロジェクト: prodo-dev/plz
    def publish_input_data(self, expected_input_id: str,
                           metadata: InputMetadata,
                           input_data_stream: BinaryIO) -> None:
        input_file_path = self.input_file(expected_input_id)
        if os.path.exists(input_file_path):
            input_data_stream.close()
            return

        file_hash = hashlib.sha256()
        fd, temp_file_path = tempfile.mkstemp(dir=self.temp_data_dir)
        try:
            with os.fdopen(fd, 'wb') as f:
                bytes_read = 0
                while True:
                    data = input_data_stream.read(READ_BUFFER_SIZE)
                    bytes_read += len(data)
                    log.debug(f'{bytes_read} bytes of input read')
                    if not data:
                        break
                    f.write(data)
                    file_hash.update(data)

            input_id = file_hash.hexdigest()
            if input_id != expected_input_id:
                raise IncorrectInputIDException()

            os.rename(temp_file_path, input_file_path)
            if metadata.has_all_args():
                self._store_input_id(metadata, input_id)
            return
        except Exception:
            os.remove(temp_file_path)
            raise
コード例 #2
0
 def _has_input(self, input_id: str) -> bool:
     input_metadata = InputMetadata.of(
         user=self.user,
         project=self.project,
         path=self.path,
         timestamp_millis=self.timestamp_millis)
     return self.controller.check_input_data(input_id, input_metadata)
コード例 #3
0
 def put_input(self, input_id: str, input_metadata: InputMetadata,
               input_data_stream: BinaryIO) -> None:
     if not input_metadata.has_all_args_or_none():
         raise BadInputMetadataException(input_metadata.__dict__)
     self.input_data_configuration.publish_input_data(
         input_id, input_metadata, request.stream)
     return jsonify({'id': input_id})
コード例 #4
0
 def get_input_id_or_none(self,
                          input_metadata: InputMetadata) -> Optional[str]:
     if not input_metadata.has_all_args_or_none():
         raise BadInputMetadataException(input_metadata.__dict__)
     id_or_none = \
         self.input_data_configuration.get_input_id_from_metadata_or_none(
             input_metadata)
     return id_or_none
コード例 #5
0
def _get_input_metadata_from_request() -> InputMetadata:
    metadata: InputMetadata = InputMetadata()
    metadata.user = request.args.get('user', default=None, type=str)
    metadata.project = request.args.get('project', default=None, type=str)
    metadata.path = request.args.get('path', default=None, type=str)
    metadata.timestamp_millis = request.args.get('timestamp_millis',
                                                 default=None,
                                                 type=str)
    return metadata
コード例 #6
0
 def _put_tarball(self, input_id: str) -> None:
     self.tarball.seek(0)
     input_metadata = InputMetadata.of(
         user=self.user,
         project=self.project,
         path=self.path,
         timestamp_millis=self.timestamp_millis)
     self.controller.put_input(input_id=input_id,
                               input_metadata=input_metadata,
                               input_data_stream=self.tarball)
コード例 #7
0
ファイル: input_data.py プロジェクト: prodo-dev/plz
 def check_input_data(self, input_id: str, metadata: InputMetadata) -> bool:
     if self._input_file_exists(input_id):
         if metadata.has_all_args():
             # The reason to do this is that, if there's a blob that
             # changed timestamp but not hash (because of a `touch`, for
             # instance), the timestamp check will always return false and
             # the tarball will be constructed all the times on the client
             # side. It happened.
             self._store_input_id(metadata, input_id)
         return True
     else:
         return False
コード例 #8
0
ファイル: input_data.py プロジェクト: prodo-dev/plz
 def get_input_id_from_metadata_or_none(self, metadata: InputMetadata) \
         -> Optional[str]:
     input_id_bytes = self.redis.hget(_INPUT_ID_KEY, metadata.redis_field())
     if not input_id_bytes:
         return None
     input_id = str(input_id_bytes, 'utf-8')
     # We have the metadata stored, but the file doesn't exist. I can
     # imagine this happening so let's make this cache mechanism resilient
     # to that.
     if not self._input_file_exists(input_id):
         return None
     else:
         return input_id
コード例 #9
0
    def __enter__(self):
        # Nothing to save in the context, we have an input id in the controller
        # and just refer to it
        if self.input_id:
            return self

        if self.path is None:
            raise ValueError('For input data, neither path nor input id were '
                             'given')

        input_metadata = InputMetadata.of(
            user=self.user,
            project=self.project,
            path=self.path,
            timestamp_millis=self.timestamp_millis)
        # Try to avoid building the tarball. Look at maximum modification
        # time in the input, and if we have in input for the timestamp, use
        # that one
        input_id = self.controller.get_input_id_or_none(input_metadata)
        log_debug(f'Input ID from the controller: {input_id}')
        if input_id:
            log_info('Input files not changed according to modification times')
            self.input_id = input_id
            return self

        log_debug('Building the tarball!')
        files = (os.path.join(directory, file)
                 for directory, _, files in os.walk(self.path)
                 for file in files)
        self.tarball = tempfile.NamedTemporaryFile()
        with tarfile.open(self.tarball.name, mode='w:bz2') as tar:
            for file in files:
                name = os.path.relpath(file, self.path)
                size = os.stat(file).st_size
                with open(file, 'rb') as f:
                    tarinfo = tarfile.TarInfo(name=name)
                    tarinfo.size = size
                    tar.addfile(tarinfo, fileobj=f)
        return self
コード例 #10
0
ファイル: input_data.py プロジェクト: prodo-dev/plz
 def _store_input_id(self, metadata: InputMetadata, input_id: str) -> None:
     field = metadata.redis_field()
     self.redis.hset(_INPUT_ID_KEY, field, input_id)
     log.debug(field + ': ' +
               str(self.get_input_id_from_metadata_or_none(metadata)))
コード例 #11
0
 def check_input_data(self, input_id: str,
                      input_metadata: InputMetadata) -> bool:
     if not input_metadata.has_all_args_or_none():
         raise BadInputMetadataException(input_metadata.__dict__)
     return self.input_data_configuration.check_input_data(
         input_id, input_metadata)