Beispiel #1
0
    def post(self, dataset_id: int):
        parser = reqparse.RequestParser()
        parser.add_argument('event_time', type=int)
        parser.add_argument('files',
                            required=True,
                            type=list,
                            location='json',
                            help=_FORMAT_ERROR_MESSAGE.format('files'))
        parser.add_argument('move', type=bool)
        parser.add_argument('comment', type=str)
        body = parser.parse_args()
        event_time = body.get('event_time')
        files = body.get('files')
        move = body.get('move', False)
        comment = body.get('comment')

        dataset = Dataset.query.filter_by(id=dataset_id).first()
        if dataset is None:
            raise NotFoundException()
        if event_time is None and dataset.type == DatasetType.STREAMING:
            raise InvalidArgumentException(
                details='data_batch.event_time is empty')
        # TODO: PSI dataset should not allow multi batches

        # Create batch
        batch = DataBatch(
            dataset_id=dataset.id,
            # Use current timestamp to fill when type is PSI
            event_time=datetime.datetime.fromtimestamp(
                event_time or datetime.datetime.now().timestamp()),
            comment=comment,
            state=BatchState.NEW,
            move=move,
        )
        batch_details = dataset_pb2.DataBatch()
        root_dir = current_app.config.get('STORAGE_ROOT')
        batch_folder_name = batch.event_time.strftime('%Y%m%d%H%M%S')
        for file_path in files:
            file = batch_details.files.add()
            file.source_path = file_path
            file_name = file_path.split('/')[-1]
            file.destination_path = f'{root_dir}/dataset/{dataset.id}' \
                                    f'/batch/{batch_folder_name}/{file_name}'
        batch.set_details(batch_details)
        db.session.add(batch)
        db.session.commit()
        db.session.refresh(batch)
        scheduler.wakeup(data_batch_ids=[batch.id])
        return {'data': batch.to_dict()}
Beispiel #2
0
 def get_details(self):
     if self.details is None:
         return None
     proto = dataset_pb2.DataBatch()
     proto.ParseFromString(self.details)
     return proto