Example #1
0
    def test_upload_frame_with_order(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name,
                                                   is_fusion=True)
        dataset_client.create_draft("draft-1")
        segment_client = dataset_client.get_or_create_segment("segment1")

        segment_client.upload_sensor(Sensor.loads(LIDAR_DATA))

        path = tmp_path / "sub"
        path.mkdir()
        # If noe setting frame id in frame, set timestamp(order) when uploading
        for i in reversed(range(5)):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            frame[LIDAR_DATA["name"]] = data
            segment_client.upload_frame(frame, timestamp=i)

        # Set frame id in frame
        for i in range(5, 10):
            frame = Frame(frame_id=ulid.from_timestamp(i))
            local_path = path / f"goodbye{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            frame[LIDAR_DATA["name"]] = data
            segment_client.upload_frame(frame)

        # Both setting frame id in frame and set timestamp(order) when uploading are not allowed
        i = 10
        frame = Frame(frame_id=ulid.from_timestamp(i))
        local_path = path / f"goodbye{i}.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        frame[LIDAR_DATA["name"]] = data
        with pytest.raises(FrameError):
            segment_client.upload_frame(frame, timestamp=i)

        # Neither setting frame id in frame nor set timestamp(order) when uploading is not allowed
        frame = Frame()
        local_path = path / f"goodbye{i}.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        frame[LIDAR_DATA["name"]] = data
        with pytest.raises(FrameError):
            segment_client.upload_frame(frame)

        frames = segment_client.list_frames()
        assert len(frames) == 10
        assert frames[0][LIDAR_DATA["name"]].path == "hello0.txt"
        assert frames[5][LIDAR_DATA["name"]].path == "goodbye5.txt"
        assert not frames[0][LIDAR_DATA["name"]].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Example #2
0
    def test_upload_segment(self, mocker):
        self.fusion_dataset_client._status.checkout(draft_number=1)
        segment_test = FusionSegment(name="test1")
        for i in range(5):
            temp_frame = Frame()
            temp_ulid = from_timestamp(10 * i + 10)
            temp_frame.frame_id = temp_ulid
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
            segment_test.append(temp_frame)

        segment_client = FusionSegmentClient(
            name="test1", data_client=self.fusion_dataset_client)
        upload_segment = mocker.patch(
            f"{dataset.__name__}.FusionDatasetClient._upload_segment",
            return_value=segment_client)
        assert self.fusion_dataset_client.upload_segment(
            segment_test).name == "test1"
        args, keywords = upload_segment.call_args
        assert args[0] == segment_test
        assert keywords["jobs"] == 1
        assert not keywords["skip_uploaded_files"]
        upload_segment.assert_called_once()
Example #3
0
    def test__extract_all_data(self):
        source_frames = []
        ulids = []
        for i in range(5):
            temp_frame = Frame()
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
            temp_ulid = from_timestamp(10 * i + 10)
            temp_frame.frame_id = temp_ulid
            source_frames.append((temp_frame, temp_ulid))
            ulids.append(temp_ulid)

        with Tqdm(5, disable=False) as pbar:
            for index, values in enumerate(
                    self.fusion_dataset_client._extract_all_data(
                        source_frames, pbar)):
                data, sensor_name, frame_id = values
                assert data.path == f"{index}.png"
                if index % 2 == 0:
                    assert sensor_name == "camera"
                else:
                    assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
    def test__extract_unuploaded_data(self):
        source_frames = []
        ulids = []
        done_frames = {}
        for i in range(5):
            temp_frame = Frame()
            temp_ulid = from_timestamp(10 * i + 10)
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
                done_frames[temp_ulid.timestamp().timestamp] = temp_frame
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
                ulids.append(temp_ulid)
            temp_frame.frame_id = temp_ulid
            source_frames.append((temp_frame, temp_ulid))

        with Tqdm(5, disable=False) as pbar:
            for index, values in enumerate(
                self.fusion_dataset_client._extract_unuploaded_data(
                    source_frames, pbar, done_frames=done_frames
                )
            ):
                data, sensor_name, frame_id = values
                assert data.path == f"{index * 2 + 1}.png"
                assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
Example #5
0
def create_id_from_datetime(timestamp: datetime) -> str:
    """Create an ID from an existing datetime.
    Args:
        timestamp (datetime): The time to timestamp the ID.
    Returns:
        (str): A unique lexicographic ID.
    """
    return ulid.from_timestamp(timestamp).str.lower()
Example #6
0
def create_id_from_datetime(dt):
    """Create an ID from an existing datetime.
    Args:
        dt (datetime): The time to timestamp the ID.
    Returns:
        (str): A unique lexicographic ID.
    """
    return ulid.from_timestamp(dt)
Example #7
0
    def _upload_segment(
        self,
        segment: FusionSegment,
        *,
        jobs: int,
        skip_uploaded_files: bool,
        pbar: Tqdm,
    ) -> FusionSegmentClient:
        segment_client = self.get_or_create_segment(segment.name)
        for sensor in segment.sensors:
            segment_client.upload_sensor(sensor)

        if not segment:
            return segment_client

        have_frame_id = hasattr(segment[0], "frame_id")

        for frame in segment:
            if hasattr(frame, "frame_id") != have_frame_id:
                raise FrameError(
                    "All the frames should have the same patterns(all have frame id or not)."
                )

        if have_frame_id:
            source_frames = ((frame, frame.frame_id) for frame in segment)
        else:
            source_frames = ((frame, from_timestamp(10 * index + 10))
                             for index, frame in enumerate(segment))

        if not skip_uploaded_files:
            data_to_upload = FusionDatasetClient._extract_all_data(
                source_frames, pbar)
        else:
            done_frames: Dict[float, Frame] = {
                frame.frame_id.timestamp().timestamp: frame
                for frame in segment_client.list_frames()
            }
            data_to_upload = FusionDatasetClient._extract_unuploaded_data(
                source_frames, pbar, done_frames=done_frames)

        multithread_upload(
            # pylint: disable=protected-access
            lambda args: segment_client._upload_or_import_data(*args),
            data_to_upload,
            callback=segment_client._synchronize_upload_info,
            jobs=jobs,
            pbar=pbar,
        )

        return segment_client
Example #8
0
    def upload_frame(self,
                     frame: Frame,
                     timestamp: Optional[float] = None) -> None:
        """Upload frame to the draft.

        Arguments:
            frame: The :class:`~tensorbay.dataset.frame.Frame` to upload.
            timestamp: The mark to sort frames, supporting timestamp and float.

        Raises:
            FrameError: When lacking frame id or frame id conflicts.

        """
        self._status.check_authority_for_draft()

        if timestamp is None:
            try:
                frame_id = frame.frame_id
            except AttributeError as error:
                raise FrameError(
                    "Lack frame id, please add frame id in frame or "
                    "give timestamp to the function!") from error
        elif not hasattr(frame, "frame_id"):
            frame_id = from_timestamp(timestamp)
        else:
            raise FrameError(
                "Frame id conflicts, please do not give timestamp to the function!."
            )

        callback_bodies = []
        for sensor_name, data in frame.items():
            try:
                callback_body = data.get_callback_body(
                )  # type:ignore[union-attr]
            except AttributeError:
                continue

            callback_body["frameId"] = frame_id.str
            callback_body["sensorName"] = sensor_name
            if isinstance(data, Data):
                self._upload_file(data)
                self._upload_mask_files(data.label)
                callback_bodies.append(callback_body)
            elif isinstance(data, AuthData):
                self._synchronize_import_info((callback_body, ))

        for chunked_callback_bodies in chunked(callback_bodies, 50):
            self._synchronize_upload_info(chunked_callback_bodies)
Example #9
0
    def __init__(self, identifier, name, created_by, created_on):
        if created_on is None:
            created_on = arrow.utcnow()
        self.created_on = get_date(created_on)

        if identifier is None:
            self.identifier = \
                ulid.from_timestamp(self.created_on.datetime).str
        else:
            self.identifier = identifier

        self.id = self.identifier
        self.name = name
        self.created_by = created_by
        self.events = []
        self.type = 'application'
Example #10
0
    def __init__(self, identifier, name, created_by, created_on,
                 parent_app_id):
        if created_on is None:
            created_on = arrow.utcnow()
        self.created_on = get_date(created_on)

        if identifier is None:
            self.identifier = \
                ulid.from_timestamp(self.created_on.datetime).str
        else:
            self.identifier = identifier

        self.id = self.identifier
        self.name = name
        self.created_by = created_by
        self.parent_app_id = parent_app_id
        self.parent_app = None
        self.type = 'event'
Example #11
0
def getRecentJobs(event, context):
    ''' Example body:
    { 
        "site": "wmd, 
        "timeRange": "<number of milliseconds>", 
    }
    '''
    params = json.loads(event.get("body", ""))
    table = dynamodb.Table(os.environ['DYNAMODB_JOBS'])
    site = params['site']

    aDay = 24 * 3600 * 1000  # ms in a day (default value)
    timeRange = params.get('timeRange', aDay) / 1000  # convert to seconds

    now = time.time()  #s timestamp
    earliest = now - timeRange
    earliestUlid = ulid.from_timestamp(earliest)

    table_response = table.query(KeyConditionExpression=Key('site').eq(site)
                                 & Key('ulid').gte(earliestUlid.str))
    return get_response(
        HTTPStatus.OK,
        json.dumps(table_response['Items'], indent=4, cls=DecimalEncoder))
Example #12
0
    def upload_frame(  # pylint: disable=too-many-locals
            self,
            frame: Frame,
            timestamp: Optional[float] = None,
            skip_uploaded_files: bool = False) -> None:
        """Upload frame to the draft.

        Arguments:
            frame: The :class:`~tensorbay.dataset.frame.Frame` to upload.
            timestamp: The mark to sort frames, supporting timestamp and float.
            skip_uploaded_files: Set it to True to skip the uploaded files.

        Raises:
            FrameError: When lacking frame id or frame id conflicts.
            InvalidParamsError: When remote_path does not follow linux style.

        """
        self._status.check_authority_for_draft()

        if timestamp is None:
            try:
                frame_id = frame.frame_id
            except AttributeError as error:
                raise FrameError(
                    "Lack frame id, please add frame id in frame or "
                    "give timestamp to the function!") from error
        elif not hasattr(frame, "frame_id"):
            frame_id = from_timestamp(timestamp)
        else:
            raise FrameError(
                "Frame id conflicts, please do not give timestamp to the function!."
            )

        for sensor_name, data in frame.items():
            if not isinstance(data, Data):
                continue

            target_remote_path = data.target_remote_path

            if "\\" in target_remote_path:
                raise InvalidParamsError(param_name="path",
                                         param_value=target_remote_path)

            permission = self._get_upload_permission()
            post_data = permission["result"]

            checksum = self._calculate_file_sha1(data.path)

            post_data["key"] = checksum

            backend_type = permission["extra"]["backendType"]
            if backend_type == "azure":
                url = (
                    f'{permission["extra"]["host"]}{permission["extra"]["objectPrefix"]}'
                    f'{target_remote_path}?{permission["result"]["token"]}')

                self._put_binary_file_to_azure(url, data.path, post_data)
            else:
                self._post_multipart_formdata(
                    permission["extra"]["host"],
                    data.path,
                    target_remote_path,
                    post_data,
                )

            frame_info: Dict[str, Any] = {
                "segmentName": self._name,
                "sensorName": sensor_name,
                "frameId": str(frame_id),
            }
            if hasattr(data, "timestamp"):
                frame_info["timestamp"] = data.timestamp

            self._synchronize_upload_info(target_remote_path, checksum,
                                          frame_info, skip_uploaded_files)

            self._upload_label(data)
Example #13
0
def getUlid(ts):
    if not ts:
        ts = int(time.time())
    uid = ulid.from_timestamp(ts)
    return uid
Example #14
0
    def test__upload_segment(self, mocker):
        segment_test = FusionSegment(name="test1")
        ulids = []
        done_frames = []
        for i in range(5):
            temp_frame = Frame()
            temp_ulid = from_timestamp(10 * i + 10)
            temp_frame.frame_id = temp_ulid
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
                done_frames.append(temp_frame)
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
            ulids.append(temp_ulid)
            segment_test.append(temp_frame)

        segment_client = FusionSegmentClient(
            name="test1", data_client=self.fusion_dataset_client)
        get_or_create_segment = mocker.patch(
            f"{dataset.__name__}.FusionDatasetClient.get_or_create_segment",
            return_value=segment_client,
        )

        list_frames = mocker.patch(
            f"{segment.__name__}.FusionSegmentClient.list_frames",
            return_value=done_frames,
        )

        multithread_upload = mocker.patch(
            f"{dataset.__name__}.multithread_upload")

        with Tqdm(5, disable=False) as pbar:
            self.fusion_dataset_client._upload_segment(
                segment_test, jobs=8, skip_uploaded_files=True, pbar=pbar)
            get_or_create_segment.assert_called_once_with(segment_test.name)
            list_frames.assert_called_once_with()
            args, keywords = multithread_upload.call_args
            for index, values in enumerate(args[1]):
                data, sensor_name, frame_id = values
                assert data.path == f"{index * 2 + 1}.png"
                assert sensor_name == "lidar"
                assert frame_id == ulids[index * 2 + 1].str
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 8
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called_once()
        with Tqdm(5, disable=False) as pbar:
            self.fusion_dataset_client._upload_segment(
                segment_test, jobs=8, skip_uploaded_files=False, pbar=pbar)
            get_or_create_segment.assert_called_with(segment_test.name)
            list_frames.assert_called_with()
            args, keywords = multithread_upload.call_args
            for index, values in enumerate(args[1]):
                data, sensor_name, frame_id = values
                assert data.path == f"{index}.png"
                if index % 2 == 0:
                    assert sensor_name == "camera"
                else:
                    assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 8
            assert keywords["pbar"] == pbar
Example #15
0
def create_dataset_version(body: JsonObject) -> JsonObject:
    logger = set_up_logging(__name__)

    logger.debug(json.dumps({"event": body}))

    body_schema = {
        "type": "object",
        "properties": {
            DATASET_ID_SHORT_KEY: {
                "type": "string"
            },
            METADATA_URL_KEY: {
                "type": "string"
            },
            NOW_KEY: {
                "type": "string",
                "format": "date-time"
            },
        },
        "required": [DATASET_ID_SHORT_KEY, METADATA_URL_KEY],
    }

    # validate input
    try:
        validate(body, body_schema)
    except ValidationError as err:
        logger.warning(json.dumps({ERROR_KEY: err}, default=str))
        return error_response(HTTPStatus.BAD_REQUEST, err.message)

    datasets_model_class = datasets_model_with_meta()

    # validate dataset exists
    try:
        dataset = datasets_model_class.get(
            hash_key=f"{DATASET_ID_PREFIX}{body[DATASET_ID_SHORT_KEY]}",
            consistent_read=True)
    except DoesNotExist as err:
        logger.warning(json.dumps({ERROR_KEY: err}, default=str))
        return error_response(
            HTTPStatus.NOT_FOUND,
            f"dataset '{body[DATASET_ID_SHORT_KEY]}' could not be found")

    now = datetime.fromisoformat(
        body.get(NOW_KEY,
                 datetime.utcnow().isoformat()))
    dataset_version_id = human_readable_ulid(from_timestamp(now))

    # execute step function
    step_functions_input = {
        DATASET_ID_KEY: dataset.dataset_id,
        DATASET_PREFIX_KEY: dataset.dataset_prefix,
        VERSION_ID_KEY: dataset_version_id,
        METADATA_URL_KEY: body[METADATA_URL_KEY],
    }
    state_machine_arn = get_param(
        ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN)

    step_functions_response = STEP_FUNCTIONS_CLIENT.start_execution(
        stateMachineArn=state_machine_arn,
        name=dataset_version_id,
        input=json.dumps(step_functions_input),
    )

    logger.debug(json.dumps({"response": step_functions_response},
                            default=str))

    # return arn of executing process
    return success_response(
        HTTPStatus.CREATED,
        {
            VERSION_ID_KEY: dataset_version_id,
            EXECUTION_ARN_KEY: step_functions_response["executionArn"],
        },
    )
Example #16
0
    def upload_frame(self,
                     frame: Frame,
                     timestamp: Optional[float] = None) -> None:
        """Upload frame to the draft.

        Arguments:
            frame: The :class:`~tensorbay.dataset.frame.Frame` to upload.
            timestamp: The mark to sort frames, supporting timestamp and float.

        Raises:
            GASPathError: When remote_path does not follow linux style.
            GASException: When uploading frame failed.
            TypeError: When frame id conflicts。                                `

        """
        self._status.check_authority_for_draft()

        if timestamp is None:
            try:
                frame_id = frame.frame_id
            except AttributeError as error:
                raise TypeError(
                    "Lack frame id, please add frame id in frame or "
                    "give timestamp to the function!") from error
        elif hasattr(frame, "frame_id"):
            raise TypeError(
                "Frame id conflicts, please do not give timestamp to the function!."
            )
        else:
            frame_id = str(ulid.from_timestamp(timestamp))

        for sensor_name, data in frame.items():
            if not isinstance(data, Data):
                continue

            remote_path = data.target_remote_path

            if "\\" in remote_path:
                raise GASPathError(remote_path)

            permission = self._get_upload_permission()
            post_data = permission["result"]
            post_data[
                "key"] = permission["extra"]["objectPrefix"] + remote_path

            try:
                version_id, etag = self._post_multipart_formdata(
                    permission["extra"]["host"],
                    data.path,
                    remote_path,
                    post_data,
                )

                frame_info: Dict[str, Any] = {
                    "segmentName": self._name,
                    "sensorName": sensor_name,
                    "frameId": frame_id,
                }
                if hasattr(data, "timestamp"):
                    frame_info["timestamp"] = data.timestamp

                self._synchronize_upload_info(post_data["key"], version_id,
                                              etag, frame_info)

            except GASException:
                self._clear_upload_permission()
                raise
            self._upload_label(data)