class DataBatch(db.Model): __tablename__ = 'data_batches_v2' __table_args__ = (PrimaryKeyConstraint('event_time', 'dataset_id'), ) event_time = db.Column(db.TIMESTAMP(timezone=True)) dataset_id = db.Column(db.Integer, db.ForeignKey(Dataset.id)) state = db.Column(db.Enum(BatchState)) source = db.Column(db.Text(), default=dataset_pb2.DatasetSource().SerializeToString()) failed_source = db.Column(db.Text()) file_size = db.Column(db.Integer, default=0) imported_file_num = db.Column(db.Integer, default=0) num_file = db.Column(db.Integer, default=0) comment = db.Column(db.Text()) created_at = db.Column(db.DateTime(timezone=True), server_default=func.now()) updated_at = db.Column(db.DateTime(timezone=True), server_default=func.now(), server_onupdate=func.now()) deleted_at = db.Column(db.DateTime(timezone=True)) dataset = db.relationship('Dataset', back_populates='data_batches') def set_source(self, proto): self.source = proto.SerializeToString() def get_source(self): if self.source is None: return None proto = dataset_pb2.DatasetSource() proto.ParseFromString(self.source) return proto
class DataBatch(db.Model): __tablename__ = 'data_batches_v2' __table_args__ = (UniqueConstraint('event_time', 'dataset_id'), ) id = db.Column(db.Integer, primary_key=True, autoincrement=True) event_time = db.Column(db.TIMESTAMP(timezone=True), nullable=False) dataset_id = db.Column(db.Integer, nullable=False) path = db.Column(db.String(512)) state = db.Column(db.Enum(BatchState, native_enum=False), default=BatchState.NEW) move = db.Column(db.Boolean, default=False) # Serialized proto of DatasetBatch details = db.Column(db.LargeBinary()) file_size = db.Column(db.Integer, default=0) num_imported_file = db.Column(db.Integer, default=0) num_file = db.Column(db.Integer, default=0) comment = db.Column('cmt', db.Text(), key='comment') created_at = db.Column(db.DateTime(timezone=True), server_default=func.now()) updated_at = db.Column(db.DateTime(timezone=True), server_default=func.now(), server_onupdate=func.now()) deleted_at = db.Column(db.DateTime(timezone=True)) dataset = db.relationship('Dataset', primaryjoin='Dataset.id == ' 'foreign(DataBatch.dataset_id)', back_populates='data_batches') def set_details(self, proto): self.num_file = len(proto.files) num_imported_file = 0 num_failed_file = 0 file_size = 0 # Aggregates stats for file in proto.files: if file.state == dataset_pb2.File.State.COMPLETED: num_imported_file += 1 file_size += file.size elif file.state == dataset_pb2.File.State.FAILED: num_failed_file += 1 if num_imported_file + num_failed_file == self.num_file: if num_failed_file > 0: self.state = BatchState.FAILED else: self.state = BatchState.SUCCESS self.num_imported_file = num_imported_file self.file_size = file_size self.details = proto.SerializeToString() def get_details(self): if self.details is None: return None proto = dataset_pb2.DataBatch() proto.ParseFromString(self.details) return proto