Example #1
0
class Setting(db.Model):
    __tablename__ = 'settings_v2'
    __table_args__ = (UniqueConstraint('key', name='uniq_key'),
                      default_table_args('this is webconsole settings table'))
    id = db.Column(db.Integer, primary_key=True, comment='id')
    key = db.Column(db.String(255), nullable=False, comment='key')
    value = db.Column(db.Text, comment='value')
Example #2
0
class Setting(db.Model):
    __tablename__ = 'settings_v2'
    __table_args__ = (UniqueConstraint('key', name='uniq_key'), {
        'comment': 'workflow_v2',
        'mysql_engine': 'innodb',
        'mysql_charset': 'utf8mb4',
    })
    id = db.Column(db.Integer, primary_key=True, comment='id')
    key = db.Column(db.String(255), nullable=False, comment='key')
    value = db.Column(db.Text, comment='value')
Example #3
0
class Project(db.Model):
    __tablename__ = 'projects_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), index=True, unique=True)
    token = db.Column(db.String(64), index=True)
    config = db.Column(db.Text())
    certificate = db.Column(db.Text())
    comment = db.Column(db.Text())
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_onupdate=func.now(),
                           server_default=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = project_pb2.Project()
        proto.ParseFromString(self.config)
        return proto

    def set_certificate(self, proto):
        self.certificate = proto.SerializeToString()

    def get_certificate(self):
        proto = project_pb2.CertificateStorage()
        proto.ParseFromString(self.certificate)
        return proto
Example #4
0
class Dataset(db.Model):
    __tablename__ = 'datasets_v2'
    __table_args__ = ({
        'comment': 'This is webconsole dataset table',
        'mysql_engine': 'innodb',
        'mysql_charset': 'utf8mb4',
    })

    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   comment='id')
    name = db.Column(db.String(255), nullable=False, comment='dataset name')
    dataset_type = db.Column(db.Enum(DatasetType, native_enum=False),
                             nullable=False,
                             comment='data type')
    path = db.Column(db.String(512), comment='dataset path')
    comment = db.Column('cmt',
                        db.Text(),
                        key='comment',
                        comment='comment of dataset')
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           comment='created time')
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           onupdate=func.now(),
                           comment='updated time')
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted time')
    project_id = db.Column(db.Integer, default=0, comment='project_id')

    data_batches = db.relationship(
        'DataBatch', primaryjoin='foreign(DataBatch.dataset_id) == Dataset.id')
    project = db.relationship(
        'Project', primaryjoin='foreign(Dataset.project_id) == Project.id')
Example #5
0
class User(db.Model):
    __tablename__ = 'users_v2'
    id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String(255), index=True)
    password = db.Column(db.String(255))

    def set_password(self, password):
        self.password = pwd_context.hash(password)

    def verify_password(self, password):
        return pwd_context.verify(password, self.password)
Example #6
0
class Project(db.Model):
    __tablename__ = 'projects_v2'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(255), index=True)
    config = db.Column(db.Text())

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = project_pb2.Project()
        proto.ParseFromString(self.config)
        return proto
Example #7
0
class Session(db.Model):
    __tablename__ = 'session_v2'
    __table_args__ = (Index('idx_jti', 'jti'),
                      default_table_args('This is webconsole session table'))
    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   comment='session id')
    jti = db.Column(db.String(64), comment='JWT jti')
    expired_at = db.Column(db.DateTime(timezone=True),
                           comment='expired time, for db automatically clear')
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           comment='created at')
Example #8
0
class Project(db.Model):
    __tablename__ = 'projects_v2'
    __table_args__ = (UniqueConstraint('name', name='idx_name'),
                      Index('idx_token', 'token'), {
                          'comment': 'webconsole projects',
                          'mysql_engine': 'innodb',
                          'mysql_charset': 'utf8mb4',
                      })
    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   comment='id')
    name = db.Column(db.String(255), comment='name')
    token = db.Column(db.String(64), comment='token')
    config = db.Column(db.LargeBinary(), comment='config')
    certificate = db.Column(db.LargeBinary(), comment='certificate')
    comment = db.Column('cmt', db.Text(), key='comment', comment='comment')
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           comment='created at')
    updated_at = db.Column(db.DateTime(timezone=True),
                           onupdate=func.now(),
                           server_default=func.now(),
                           comment='updated at')
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted at')

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        if self.config is None:
            return None
        proto = project_pb2.Project()
        proto.ParseFromString(self.config)
        return proto

    def set_certificate(self, proto):
        self.certificate = proto.SerializeToString()

    def get_certificate(self):
        if self.certificate is None:
            return None
        proto = project_pb2.CertificateStorage()
        proto.ParseFromString(self.certificate)
        return proto

    def get_namespace(self):
        config = self.get_config()
        if config is not None:
            variables = self.get_config().variables
            for variable in variables:
                if variable.name == 'NAMESPACE':
                    return variable.value
        return 'default'
Example #9
0
class WorkflowTemplate(db.Model):
    __tablename__ = 'template_v2'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(255), unique=True, index=True)
    comment = db.Column(db.String(255))
    group_alias = db.Column(db.String(255), nullable=False, index=True)
    config = db.Column(db.Text(), nullable=False)

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = workflow_definition_pb2.WorkflowDefinition()
        proto.ParseFromString(self.config)
        return proto
Example #10
0
class JobDependency(db.Model):
    __tablename__ = 'job_dependency_v2'
    __table_args__ = (Index('idx_src_job_id', 'src_job_id'),
                      Index('idx_dst_job_id', 'dst_job_id'), {
                          'comment': 'record job dependencies',
                          'mysql_engine': 'innodb',
                          'mysql_charset': 'utf8mb4',
                      })
    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   comment='id')
    src_job_id = db.Column(db.Integer, comment='src job id')
    dst_job_id = db.Column(db.Integer, comment='dst job id')
    dep_index = db.Column(db.Integer, comment='dep index')
Example #11
0
class _TestModel(db.Model):
    __tablename__ = 'test_table'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255))
    token = db.Column(db.String(64), index=True)
    created_at = db.Column(db.DateTime(timezone=True))
    grpc_spec = db.Column(db.Text())

    def set_grpc_spec(self, proto):
        self.grpc_spec = proto.SerializeToString()

    def get_grpc_spec(self):
        proto = common_pb2.GrpcSpec()
        proto.ParseFromString(self.grpc_spec)
        return proto
Example #12
0
class User(db.Model):
    __tablename__ = 'users_v2'
    __table_args__ = (UniqueConstraint('username', name='uniq_username'), {
        'comment': 'This is webconsole user table',
        'mysql_engine': 'innodb',
        'mysql_charset': 'utf8mb4',
    })
    id = db.Column(db.Integer, primary_key=True, comment='user id')
    username = db.Column(db.String(255), comment='user name of user')
    password = db.Column(db.String(255), comment='user password after encode')

    def set_password(self, password):
        self.password = pwd_context.hash(password)

    def verify_password(self, password):
        return pwd_context.verify(password, self.password)
Example #13
0
class User(db.Model):
    __tablename__ = 'users_v2'
    id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String(255), index=True)
    password = db.Column(db.String(255))

    def set_password(self, password):
        self.password = pwd_context.hash(password)

    def verify_password(self, password):
        return pwd_context.verify(password, self.password)

    def to_dict(self):
        return {
            col.name: getattr(self, col.name)
            for col in self.__table__.columns
        }
Example #14
0
class WorkflowTemplate(db.Model):
    __tablename__ = 'template_v2'
    __table_args__ = (UniqueConstraint('name', name='uniq_name'),
                      Index('idx_group_alias', 'group_alias'), {
                          'comment': 'workflow template',
                          'mysql_engine': 'innodb',
                          'mysql_charset': 'utf8mb4',
                      })
    id = db.Column(db.Integer, primary_key=True, comment='id')
    name = db.Column(db.String(255), comment='name')
    comment = db.Column('cmt',
                        db.String(255),
                        key='comment',
                        comment='comment')
    group_alias = db.Column(db.String(255),
                            nullable=False,
                            comment='group_alias')
    config = db.Column(db.LargeBinary(), nullable=False, comment='config')
    is_left = db.Column(db.Boolean, comment='is_left')

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = workflow_definition_pb2.WorkflowDefinition()
        proto.ParseFromString(self.config)
        return proto
Example #15
0
class Dataset(db.Model):
    __tablename__ = 'datasets_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), unique=True)
    type = db.Column(db.Enum(DatasetType))
    external_storage_path = db.Column(db.Text())
    comment = db.Column(db.Text())
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    data_batches = db.relationship('DataBatch', back_populates='dataset')
Example #16
0
class Project(db.Model):
    __tablename__ = 'projects_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), index=True, unique=True)
    token = db.Column(db.String(64), index=True)
    config = db.Column(db.Text())
    certificate = db.Column(db.Text())
    comment = db.Column(db.Text())
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           onupdate=func.now(),
                           server_default=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        if self.config is None:
            return None
        proto = project_pb2.Project()
        proto.ParseFromString(self.config)
        return proto

    def set_certificate(self, proto):
        self.certificate = proto.SerializeToString()

    def get_certificate(self):
        if self.certificate is None:
            return None
        proto = project_pb2.CertificateStorage()
        proto.ParseFromString(self.certificate)
        return proto

    def get_namespace(self):
        config = self.get_config()
        if config is not None:
            variables = self.get_config().variables
            for variable in variables:
                if variable.name == 'NAMESPACE':
                    return variable.value
        return 'default'
Example #17
0
class Dataset(db.Model):
    __tablename__ = 'datasets_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), unique=True, nullable=False)
    dataset_type = db.Column(db.Enum(DatasetType, native_enum=False),
                             nullable=False)
    path = db.Column(db.String(512))
    comment = db.Column('cmt', db.Text(), key='comment')
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    data_batches = db.relationship('DataBatch', back_populates='dataset')
Example #18
0
class Project(db.Model):
    __tablename__ = 'projects_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), index=True)
    token = db.Column(db.String(64), index=True)
    config = db.Column(db.Text())
    certificate = db.Column(db.Text())
    comment = db.Column(db.Text())
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_onupdate=func.now(),
                           server_default=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = project_pb2.Project()
        proto.ParseFromString(self.config)
        return proto

    def set_certificate(self, proto):
        self.certificate = proto.SerializeToString()

    def get_certificate(self):
        proto = project_pb2.Certificate()
        proto.ParseFromString(self.certificate)
        return proto

    def to_dict(self):
        return {
            'id': self.id,
            'name': self.name,
            'token': self.token,
            'config': MessageToDict(self.get_config()),
            'comment': self.comment,
            'created_at': self.created_at.strftime("%Y-%m-%d %H:%M:%S"),
            'updated_at': self.updated_at.strftime("%Y-%m-%d %H:%M:%S"),
        }
Example #19
0
class Template(db.Model):
    __tablename__ = 'template_v2'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(255), unique=True, index=True)
    comment = db.Column(db.String(255))
    group_alias = db.Column(db.String(255), nullable=False, index=True)
    config = db.Column(db.Text(), nullable=False)

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def get_config(self):
        proto = workflow_definition_pb2.WorkflowDefinition()
        proto.ParseFromString(self.config)
        return proto

    def to_dict(self):
        dic = {
            col.name: getattr(self, col.name)
            for col in self.__table__.columns
        }
        dic['config'] = json_format.MessageToDict(
            self.get_config(), preserving_proto_field_name=True)
        return dic
Example #20
0
class WorkflowTemplate(db.Model):
    __tablename__ = 'template_v2'
    __table_args__ = (UniqueConstraint('name', name='uniq_name'),
                      Index('idx_group_alias', 'group_alias'),
                      default_table_args('workflow template'))
    id = db.Column(db.Integer, primary_key=True, comment='id')
    name = db.Column(db.String(255), comment='name')
    comment = db.Column('cmt',
                        db.String(255),
                        key='comment',
                        comment='comment')
    group_alias = db.Column(db.String(255),
                            nullable=False,
                            comment='group_alias')
    # max store 16777215 bytes (16 MB)
    config = db.Column(db.LargeBinary(16777215),
                       nullable=False,
                       comment='config')
    is_left = db.Column(db.Boolean, comment='is_left')
    editor_info = db.Column(db.LargeBinary(16777215),
                            comment='editor_info',
                            default=b'')
    kind = db.Column(db.Integer,
                     comment='template kind')  # WorkflowTemplateKind enum

    def set_config(self, proto):
        self.config = proto.SerializeToString()

    def set_editor_info(self, proto):
        self.editor_info = proto.SerializeToString()

    def get_config(self):
        proto = workflow_definition_pb2.WorkflowDefinition()
        proto.ParseFromString(self.config)
        return proto

    def get_editor_info(self):
        proto = workflow_definition_pb2.WorkflowTemplateEditorInfo()
        if self.editor_info is not None:
            proto.ParseFromString(self.editor_info)
        return proto
Example #21
0
class User(db.Model):
    __tablename__ = 'users_v2'
    __table_args__ = (UniqueConstraint('username', name='uniq_username'),
                      default_table_args('This is webconsole user table'))
    id = db.Column(db.Integer, primary_key=True, comment='user id')
    username = db.Column(db.String(255), comment='unique name of user')
    password = db.Column(db.String(255), comment='user password after encode')
    role = db.Column(db.Enum(Role, native_enum=False),
                     default=Role.USER,
                     comment='role of user')
    name = db.Column(db.String(255), comment='name of user')
    email = db.Column(db.String(255), comment='email of user')
    state = db.Column(db.Enum(State, native_enum=False),
                      default=State.ACTIVE,
                      comment='state of user')

    def set_password(self, password):
        self.password = pwd_context.hash(password)

    def verify_password(self, password):
        return pwd_context.verify(password, self.password)
Example #22
0
class ModelGroup(db.Model):
    __tablename__ = 'model_groups_v2'
    __table_args__ = (default_table_args('model_groups_v2'))

    id = db.Column(db.Integer, primary_key=True, comment='id')
    name = db.Column(db.String(255),
                     comment='name')  # can be modified by end-user

    created_at = db.Column(db.DateTime(timezone=True),
                           comment='created_at',
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           comment='updated_at',
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted_at')

    # TODO https://code.byted.org/data/fedlearner_web_console_v2/issues/289
    extra = db.Column(db.Text(), comment='extra')  # json string
Example #23
0
class OptimisticLock(db.Model):
    __tablename__ = 'optimistic_lock_v2'
    __table_args__ = (
        UniqueConstraint('name', name='uniq_name'),
        default_table_args('optimistic lock'),
    )
    id = db.Column(db.Integer,
                   comment='id',
                   primary_key=True,
                   autoincrement=True)
    name = db.Column(db.String(255), comment='lock name', nullable=False)
    version = db.Column(db.BIGINT, comment='lock version', nullable=False)
    created_at = db.Column(db.DateTime(timezone=True),
                           comment='created at',
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           comment='updated at',
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted at')
Example #24
0
class SchedulerRunner(db.Model):
    __tablename__ = 'scheduler_runner_v2'
    __table_args__ = (default_table_args('scheduler runners'))
    id = db.Column(db.Integer,
                   comment='id',
                   primary_key=True,
                   autoincrement=True)
    item_id = db.Column(db.Integer, comment='item id', nullable=False)
    status = db.Column(db.Integer,
                       comment='runner status',
                       nullable=False,
                       default=RunnerStatus.INIT.value)
    start_at = db.Column(db.DateTime(timezone=True),
                         comment='runner start time')
    end_at = db.Column(db.DateTime(timezone=True), comment='runner end time')
    pipeline = db.Column(db.Text(),
                         comment='pipeline from scheduler item',
                         nullable=False,
                         default='{}')
    output = db.Column(db.Text(),
                       comment='output',
                       nullable=False,
                       default='{}')
    context = db.Column(db.Text(),
                        comment='context',
                        nullable=False,
                        default='{}')
    extra = db.Column(db.Text(), comment='extra info')
    created_at = db.Column(db.DateTime(timezone=True),
                           comment='created at',
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           comment='updated at',
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted at')
Example #25
0
class DataBatch(db.Model):
    __tablename__ = 'data_batches_v2'
    __table_args__ = (UniqueConstraint('event_time', 'dataset_id'), )
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    event_time = db.Column(db.TIMESTAMP(timezone=True), nullable=False)
    dataset_id = db.Column(db.Integer, nullable=False)
    path = db.Column(db.String(512))
    state = db.Column(db.Enum(BatchState, native_enum=False),
                      default=BatchState.NEW)
    move = db.Column(db.Boolean, default=False)
    # Serialized proto of DatasetBatch
    details = db.Column(db.LargeBinary())
    file_size = db.Column(db.Integer, default=0)
    num_imported_file = db.Column(db.Integer, default=0)
    num_file = db.Column(db.Integer, default=0)
    comment = db.Column('cmt', db.Text(), key='comment')
    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           server_onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    dataset = db.relationship('Dataset',
                              primaryjoin='Dataset.id == '
                              'foreign(DataBatch.dataset_id)',
                              back_populates='data_batches')

    def set_details(self, proto):
        self.num_file = len(proto.files)
        num_imported_file = 0
        num_failed_file = 0
        file_size = 0
        # Aggregates stats
        for file in proto.files:
            if file.state == dataset_pb2.File.State.COMPLETED:
                num_imported_file += 1
                file_size += file.size
            elif file.state == dataset_pb2.File.State.FAILED:
                num_failed_file += 1
        if num_imported_file + num_failed_file == self.num_file:
            if num_failed_file > 0:
                self.state = BatchState.FAILED
            else:
                self.state = BatchState.SUCCESS
        self.num_imported_file = num_imported_file
        self.file_size = file_size
        self.details = proto.SerializeToString()

    def get_details(self):
        if self.details is None:
            return None
        proto = dataset_pb2.DataBatch()
        proto.ParseFromString(self.details)
        return proto
Example #26
0
class SchedulerItem(db.Model):
    __tablename__ = 'scheduler_item_v2'
    __table_args__ = (UniqueConstraint('name', name='uniq_name'),
                      default_table_args('scheduler items'))
    id = db.Column(db.Integer,
                   comment='id',
                   primary_key=True,
                   autoincrement=True)
    name = db.Column(db.String(255), comment='item name', nullable=False)
    pipeline = db.Column(db.Text,
                         comment='pipeline',
                         nullable=False,
                         default='{}')
    status = db.Column(db.Integer,
                       comment='item status',
                       nullable=False,
                       default=ItemStatus.ON.value)
    interval_time = db.Column(db.Integer,
                              comment='item run interval in second',
                              nullable=False,
                              default=-1)
    last_run_at = db.Column(db.DateTime(timezone=True),
                            comment='last runner time')
    retry_cnt = db.Column(db.Integer,
                          comment='retry count when item is failed',
                          nullable=False,
                          default=0)
    extra = db.Column(db.Text(), comment='extra info')
    created_at = db.Column(db.DateTime(timezone=True),
                           comment='created at',
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           comment='updated at',
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted at')

    def need_run(self) -> bool:
        # job runs one time
        if self.interval_time == -1 and self.last_run_at is None:
            return True
        if self.interval_time > 0:  # cronjob
            if self.last_run_at is None:  # never run
                return True
            # compare datetime in utc
            next_run_at = self.last_run_at.replace(
                tzinfo=datetime.timezone.utc) + datetime.timedelta(
                    seconds=self.interval_time)
            utc_now = datetime.datetime.now(datetime.timezone.utc)
            logging.info(f'[composer] item id: {self.id}, '
                         f'next_run_at: {next_run_at.timestamp()}, '
                         f'utc_now: {utc_now.timestamp()}')
            if next_run_at.timestamp() < utc_now.timestamp():
                return True
        return False
Example #27
0
class Job(db.Model):
    __tablename__ = 'job_v2'
    __table_args__ = (Index('idx_workflow_id', 'workflow_id'), {
        'comment': 'webconsole job',
        'mysql_engine': 'innodb',
        'mysql_charset': 'utf8mb4',
    })
    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   comment='id')
    name = db.Column(db.String(255), unique=True, comment='name')
    job_type = db.Column(db.Enum(JobType, native_enum=False),
                         nullable=False,
                         comment='job type')
    state = db.Column(db.Enum(JobState, native_enum=False),
                      nullable=False,
                      default=JobState.INVALID,
                      comment='state')
    config = db.Column(db.LargeBinary(16777215), comment='config')

    is_disabled = db.Column(db.Boolean(), default=False, comment='is_disabled')

    workflow_id = db.Column(db.Integer, nullable=False, comment='workflow id')
    project_id = db.Column(db.Integer, nullable=False, comment='project id')
    flapp_snapshot = db.Column(db.Text(16777215), comment='flapp snapshot')
    pods_snapshot = db.Column(db.Text(16777215), comment='pods snapshot')
    error_message = db.Column(db.Text(), comment='error message')

    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           comment='created at')
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           onupdate=func.now(),
                           comment='updated at')
    deleted_at = db.Column(db.DateTime(timezone=True), comment='deleted at')

    project = db.relationship('Project',
                              primaryjoin='Project.id == '
                              'foreign(Job.project_id)')
    workflow = db.relationship('Workflow',
                               primaryjoin='Workflow.id == '
                               'foreign(Job.workflow_id)')

    def get_config(self):
        if self.config is not None:
            proto = JobDefinition()
            proto.ParseFromString(self.config)
            return proto
        return None

    def set_config(self, proto):
        if proto is not None:
            self.config = proto.SerializeToString()
        else:
            self.config = None

    def _set_snapshot_flapp(self):
        def default(o):
            if isinstance(o, (datetime.date, datetime.datetime)):
                return o.isoformat()
            return str(o)

        flapp = k8s_client.get_flapp(self.name)
        if flapp:
            self.flapp_snapshot = json.dumps(flapp, default=default)
        else:
            self.flapp_snapshot = None

    def get_flapp_details(self):
        if self.state == JobState.STARTED:
            flapp = k8s_client.get_flapp(self.name)
        elif self.flapp_snapshot is not None:
            flapp = json.loads(self.flapp_snapshot)
            # aims to support old job
            if 'flapp' not in flapp:
                flapp['flapp'] = None
            if 'pods' not in flapp and self.pods_snapshot:
                flapp['pods'] = json.loads(self.pods_snapshot)['pods']
        else:
            flapp = {'flapp': None, 'pods': {'items': []}}
        return flapp

    def get_pods_for_frontend(self, include_private_info=True):
        flapp_details = self.get_flapp_details()
        flapp = FlApp.from_json(flapp_details.get('flapp', None))
        pods_json = None
        if 'pods' in flapp_details:
            pods_json = flapp_details['pods'].get('items', None)
        pods = []
        if pods_json is not None:
            pods = [Pod.from_json(p) for p in pods_json]

        # deduplication pods both in pods and flapp
        result = {}
        for pod in flapp.pods:
            result[pod.name] = pod
        for pod in pods:
            result[pod.name] = pod
        return [pod.to_dict(include_private_info) for pod in result.values()]

    def get_state_for_frontend(self):
        return self.state.name

    def is_flapp_failed(self):
        # TODO: make the getter more efficient
        flapp = FlApp.from_json(self.get_flapp_details()['flapp'])
        return flapp.state in [FlAppState.FAILED, FlAppState.SHUTDOWN]

    def is_flapp_complete(self):
        # TODO: make the getter more efficient
        flapp = FlApp.from_json(self.get_flapp_details()['flapp'])
        return flapp.state == FlAppState.COMPLETED

    def get_complete_at(self):
        # TODO: make the getter more efficient
        flapp = FlApp.from_json(self.get_flapp_details()['flapp'])
        return flapp.completed_at

    def stop(self):
        if self.state not in [JobState.WAITING, JobState.STARTED,
                              JobState.COMPLETED, JobState.FAILED]:
            logging.warning('illegal job state, name: %s, state: %s',
                            self.name, self.state)
            return
        if self.state == JobState.STARTED:
            self._set_snapshot_flapp()
            k8s_client.delete_flapp(self.name)
        # state change:
        # WAITING -> NEW
        # STARTED -> STOPPED
        # COMPLETED/FAILED unchanged
        if self.state == JobState.STARTED:
            self.state = JobState.STOPPED
        if self.state == JobState.WAITING:
            self.state = JobState.NEW

    def schedule(self):
        # COMPLETED/FAILED Job State can be scheduled since stop action
        # will not change the state of completed or failed job
        assert self.state in [JobState.NEW, JobState.STOPPED,
                              JobState.COMPLETED, JobState.FAILED]
        self.pods_snapshot = None
        self.flapp_snapshot = None
        self.state = JobState.WAITING

    def start(self):
        assert self.state == JobState.WAITING
        self.state = JobState.STARTED

    def complete(self):
        assert self.state == JobState.STARTED, 'Job State is not STARTED'
        self._set_snapshot_flapp()
        k8s_client.delete_flapp(self.name)
        self.state = JobState.COMPLETED

    def fail(self):
        assert self.state == JobState.STARTED, 'Job State is not STARTED'
        self._set_snapshot_flapp()
        k8s_client.delete_flapp(self.name)
        self.state = JobState.FAILED
Example #28
0
class Workflow(db.Model):
    __tablename__ = 'workflow_v2'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(255), unique=True, index=True)
    project_id = db.Column(db.Integer, db.ForeignKey(Project.id))
    config = db.Column(db.Text())
    comment = db.Column(db.String(255))

    forkable = db.Column(db.Boolean, default=False)
    forked_from = db.Column(db.Integer, default=None)
    # index in config.job_defs instead of job's id
    reuse_job_names = db.Column(db.TEXT())
    peer_reuse_job_names = db.Column(db.TEXT())
    fork_proposal_config = db.Column(db.TEXT())

    recur_type = db.Column(db.Enum(RecurType), default=RecurType.NONE)
    recur_at = db.Column(db.Interval)
    trigger_dataset = db.Column(db.Integer)
    last_triggered_batch = db.Column(db.Integer)

    job_ids = db.Column(db.TEXT())

    state = db.Column(db.Enum(WorkflowState), default=WorkflowState.INVALID)
    target_state = db.Column(db.Enum(WorkflowState),
                             default=WorkflowState.INVALID)
    transaction_state = db.Column(db.Enum(TransactionState),
                                  default=TransactionState.READY)
    transaction_err = db.Column(db.Text())

    start_at = db.Column(db.Integer)
    stop_at = db.Column(db.Integer)

    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           onupdate=func.now(),
                           server_default=func.now())

    owned_jobs = db.relationship('Job', back_populates='workflow')
    project = db.relationship(Project)

    def set_config(self, proto):
        if proto is not None:
            self.config = proto.SerializeToString()
        else:
            self.config = None

    def get_config(self):
        if self.config is not None:
            proto = workflow_definition_pb2.WorkflowDefinition()
            proto.ParseFromString(self.config)
            return proto
        return None

    def set_fork_proposal_config(self, proto):
        if proto is not None:
            self.fork_proposal_config = proto.SerializeToString()
        else:
            self.fork_proposal_config = None

    def get_fork_proposal_config(self):
        if self.fork_proposal_config is not None:
            proto = workflow_definition_pb2.WorkflowDefinition()
            proto.ParseFromString(self.fork_proposal_config)
            return proto
        return None

    def set_job_ids(self, job_ids):
        self.job_ids = ','.join([str(i) for i in job_ids])

    def get_job_ids(self):
        if not self.job_ids:
            return []
        return [int(i) for i in self.job_ids.split(',')]

    def get_jobs(self):
        return [Job.query.get(i) for i in self.get_job_ids()]

    def set_reuse_job_names(self, reuse_job_names):
        self.reuse_job_names = ','.join(reuse_job_names)

    def get_reuse_job_names(self):
        if not self.reuse_job_names:
            return []
        return self.reuse_job_names.split(',')

    def set_peer_reuse_job_names(self, peer_reuse_job_names):
        self.peer_reuse_job_names = ','.join(peer_reuse_job_names)

    def get_peer_reuse_job_names(self):
        if not self.peer_reuse_job_names:
            return []
        return self.peer_reuse_job_names.split(',')

    def update_target_state(self, target_state):
        if self.target_state != target_state \
                and self.target_state != WorkflowState.INVALID:
            raise ValueError(f'Another transaction is in progress [{self.id}]')
        if target_state not in [
                WorkflowState.READY, WorkflowState.RUNNING,
                WorkflowState.STOPPED
        ]:
            raise ValueError(f'Invalid target_state {self.target_state}')
        if (self.state, target_state) not in VALID_TRANSITIONS:
            raise ValueError(
                f'Invalid transition from {self.state} to {target_state}')
        self.target_state = target_state

    def update_state(self, asserted_state, target_state, transaction_state):
        assert asserted_state is None or self.state == asserted_state, \
            'Cannot change current state directly'

        if transaction_state != self.transaction_state:
            if (self.transaction_state, transaction_state) in \
                    IGNORED_TRANSACTION_TRANSITIONS:
                return self.transaction_state
            assert (self.transaction_state, transaction_state) in \
                   VALID_TRANSACTION_TRANSITIONS, \
                'Invalid transaction transition from {} to {}'.format(
                    self.transaction_state, transaction_state)
            self.transaction_state = transaction_state

        # coordinator prepare & rollback
        if self.transaction_state == TransactionState.COORDINATOR_PREPARE:
            self.prepare(target_state)
        if self.transaction_state == TransactionState.COORDINATOR_ABORTING:
            self.rollback()

        # participant prepare & rollback & commit
        if self.transaction_state == TransactionState.PARTICIPANT_PREPARE:
            self.prepare(target_state)
        if self.transaction_state == TransactionState.PARTICIPANT_ABORTING:
            self.rollback()
            self.transaction_state = TransactionState.ABORTED
        if self.transaction_state == TransactionState.PARTICIPANT_COMMITTING:
            self.commit()

        return self.transaction_state

    def prepare(self, target_state):
        assert self.transaction_state in [
            TransactionState.COORDINATOR_PREPARE,
            TransactionState.PARTICIPANT_PREPARE], \
            'Workflow not in prepare state'

        # TODO(tjulinfan): remove this
        if target_state is None:
            # No action
            return

        # Validation
        try:
            self.update_target_state(target_state)
        except ValueError as e:
            logging.warning('Error during update target state in prepare: %s',
                            str(e))
            self.transaction_state = TransactionState.ABORTED
            return

        success = True
        if self.target_state == WorkflowState.READY:
            success = self._prepare_for_ready()

        if success:
            if self.transaction_state == TransactionState.COORDINATOR_PREPARE:
                self.transaction_state = \
                    TransactionState.COORDINATOR_COMMITTABLE
            else:
                self.transaction_state = \
                    TransactionState.PARTICIPANT_COMMITTABLE

    def rollback(self):
        self.target_state = WorkflowState.INVALID

    # TODO: separate this method to another module
    def commit(self):
        assert self.transaction_state in [
            TransactionState.COORDINATOR_COMMITTING,
            TransactionState.PARTICIPANT_COMMITTING], \
                'Workflow not in prepare state'

        if self.target_state == WorkflowState.STOPPED:
            self.stop_at = int(datetime.utcnow().timestamp())
            for job in self.owned_jobs:
                job.stop()
        elif self.target_state == WorkflowState.READY:
            self._setup_jobs()
            self.fork_proposal_config = None
        elif self.target_state == WorkflowState.RUNNING:
            self.start_at = int(datetime.utcnow().timestamp())
            for job in self.owned_jobs:
                if not job.get_config().is_manual:
                    job.schedule()

        self.state = self.target_state
        self.target_state = WorkflowState.INVALID
        self.transaction_state = TransactionState.READY

    def _setup_jobs(self):
        if self.forked_from is not None:
            trunk = Workflow.query.get(self.forked_from)
            assert trunk is not None, \
                'Source workflow %d not found'%self.forked_from
            trunk_job_defs = trunk.get_config().job_definitions
            trunk_name2index = {
                job.name: i
                for i, job in enumerate(trunk_job_defs)
            }
        else:
            assert not self.get_reuse_job_names()

        job_defs = self.get_config().job_definitions
        jobs = []
        reuse_jobs = set(self.get_reuse_job_names())
        for i, job_def in enumerate(job_defs):
            if job_def.name in reuse_jobs:
                assert job_def.name in trunk_name2index, \
                    "Job %s not found in base workflow"%job_def.name
                j = trunk.get_job_ids()[trunk_name2index[job_def.name]]
                job = Job.query.get(j)
                assert job is not None, \
                    'Job %d not found'%j
                # TODO: check forked jobs does not depend on non-forked jobs
            else:
                job = Job(name=f'{self.name}-{job_def.name}',
                          job_type=JobType(job_def.type),
                          config=job_def.SerializeToString(),
                          workflow_id=self.id,
                          project_id=self.project_id,
                          state=JobState.STOPPED)
                job.set_yaml_template(job_def.yaml_template)
                db.session.add(job)
            jobs.append(job)
        db.session.commit()

        name2index = {job.name: i for i, job in enumerate(job_defs)}
        for i, job in enumerate(jobs):
            if job.name in reuse_jobs:
                continue
            for j, dep_def in enumerate(job.get_config().dependencies):
                dep = JobDependency(
                    src_job_id=jobs[name2index[dep_def.source]].id,
                    dst_job_id=job.id,
                    dep_index=j)
                db.session.add(dep)

        self.set_job_ids([job.id for job in jobs])

        db.session.commit()

    def log_states(self):
        logging.debug(
            'workflow %d updated to state=%s, target_state=%s, '
            'transaction_state=%s', self.id, self.state.name,
            self.target_state.name, self.transaction_state.name)

    def _get_peer_workflow(self):
        project_config = self.project.get_config()
        # TODO: find coordinator for multiparty
        client = RpcClient(project_config, project_config.participants[0])
        return client.get_workflow(self.name)

    def _prepare_for_ready(self):
        # This is a hack, if config is not set then
        # no action needed
        if self.transaction_state == TransactionState.COORDINATOR_PREPARE:
            # TODO(tjulinfan): validate if the config is legal or not
            return bool(self.config)
        peer_workflow = self._get_peer_workflow()
        if peer_workflow.forked_from:
            base_workflow = Workflow.query.filter(
                Workflow.name == peer_workflow.forked_from).first()
            if base_workflow is None or not base_workflow.forkable:
                return False
            self.forked_from = base_workflow.id
            self.forkable = base_workflow.forkable
            self.set_reuse_job_names(peer_workflow.peer_reuse_job_names)
            self.set_peer_reuse_job_names(peer_workflow.reuse_job_names)
            config = base_workflow.get_config()
            _merge_workflow_config(config, peer_workflow.fork_proposal_config,
                                   [common_pb2.Variable.PEER_WRITABLE])
            self.set_config(config)
            return True
        return bool(self.config)
Example #29
0
class JobDependency(db.Model):
    __tablename__ = 'job_dependency_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    src_job_id = db.Column(db.Integer, index=True)
    dst_job_id = db.Column(db.Integer, index=True)
    dep_index = db.Column(db.Integer)
Example #30
0
class Job(db.Model):
    __tablename__ = 'job_v2'
    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
    name = db.Column(db.String(255), unique=True)
    job_type = db.Column(db.Enum(JobType, native_enum=False), nullable=False)
    state = db.Column(db.Enum(JobState, native_enum=False),
                      nullable=False,
                      default=JobState.INVALID)
    yaml_template = db.Column(db.Text())
    config = db.Column(db.LargeBinary())

    workflow_id = db.Column(db.Integer,
                            db.ForeignKey('workflow_v2.id'),
                            nullable=False,
                            index=True)
    project_id = db.Column(db.Integer,
                           db.ForeignKey(Project.id),
                           nullable=False)
    flapp_snapshot = db.Column(db.Text())
    pods_snapshot = db.Column(db.Text())

    created_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now())
    updated_at = db.Column(db.DateTime(timezone=True),
                           server_default=func.now(),
                           onupdate=func.now())
    deleted_at = db.Column(db.DateTime(timezone=True))

    project = db.relationship(Project)
    workflow = db.relationship('Workflow')
    _k8s_client = get_client()

    def get_config(self):
        if self.config is not None:
            proto = JobDefinition()
            proto.ParseFromString(self.config)
            return proto
        return None

    def _set_snapshot_flapp(self):
        flapp = self._k8s_client.get_custom_object(
            CrdKind.FLAPP, self.name, self.project.get_namespace())
        self.flapp_snapshot = json.dumps(flapp)

    def _set_snapshot_pods(self):
        pods = self._k8s_client.list_resource_of_custom_object(
            CrdKind.FLAPP, self.name, 'pods', self.project.get_namespace())
        self.pods_snapshot = json.dumps(pods)

    def get_pods(self):
        if self.state == JobState.STARTED:
            try:
                pods = self._k8s_client.list_resource_of_custom_object(
                    CrdKind.FLAPP, self.name, 'pods',
                    self.project.get_namespace())
                return pods['pods']
            except RuntimeError as e:
                logging.error('Get %d pods error msg: %s', self.id, e.args)
                return None
        if self.pods_snapshot is not None:
            return json.loads(self.pods_snapshot)['pods']
        return None

    def get_flapp(self):
        if self.state == JobState.STARTED:
            try:
                flapp = self._k8s_client.get_custom_object(
                    CrdKind.FLAPP, self.name, self.project.get_namespace())
                return flapp['flapp']
            except RuntimeError as e:
                logging.error('Get %d flapp error msg: %s', self.id, str(e))
                return None
        if self.flapp_snapshot is not None:
            return json.loads(self.flapp_snapshot)['flapp']
        return None

    def get_pods_for_frontend(self):
        result = []
        flapp = self.get_flapp()
        if flapp is None:
            return result
        if 'status' in flapp \
            and 'flReplicaStatus' in flapp['status']:
            replicas = flapp['status']['flReplicaStatus']
            if replicas is None:
                return result
            for pod_type in replicas:
                for state in ['failed', 'succeeded']:
                    for pod in replicas[pod_type][state]:
                        result.append({
                            'name': pod,
                            'status': 'Flapp_{}'.format(state),
                            'pod_type': pod_type
                        })
        # msg from pods
        pods = self.get_pods()
        if pods is None:
            return result
        pods = pods['items']
        for pod in pods:
            # TODO: make this more readable for frontend
            pod_for_front = {
                'name': pod['metadata']['name'],
                'pod_type': pod['metadata']['labels']['fl-replica-type'],
                'status': pod['status']['phase'],
                'conditions': pod['status']['conditions']
            }
            if 'containerStatuses' in pod['status']:
                pod_for_front['containers_status'] = \
                    pod['status']['containerStatuses']
            result.append(pod_for_front)
        # deduplication pods both in pods and flapp
        result = list({pod['name']: pod for pod in result}.values())
        return result

    def get_state_for_frontend(self):
        if self.state == JobState.STARTED:
            if self.is_complete():
                return 'COMPLETED'
            if self.is_failed():
                return 'FAILED'
            return 'RUNNING'
        if self.state == JobState.STOPPED:
            if self.get_flapp() is None:
                return 'NEW'
        return self.state.name

    def is_failed(self):
        flapp = self.get_flapp()
        if flapp is None \
                or 'status' not in flapp \
                or 'appState' not in flapp['status']:
            return False
        return flapp['status']['appState'] in [
            'FLStateFailed', 'FLStateShutDown'
        ]

    def is_complete(self):
        flapp = self.get_flapp()
        if flapp is None \
                or 'status' not in flapp \
                or 'appState' not in flapp['status']:
            return False
        return flapp['status']['appState'] == 'FLStateComplete'

    def get_complete_at(self):
        flapp = self.get_flapp()
        if flapp is None \
                or 'status' not in flapp \
                or 'complete_at' not in flapp['status']:
            return None
        return flapp['status']['complete_at']

    def stop(self):
        if self.state == JobState.STARTED:
            self._set_snapshot_flapp()
            self._set_snapshot_pods()
            self._k8s_client.delete_custom_object(CrdKind.FLAPP, self.name,
                                                  self.project.get_namespace())
        self.state = JobState.STOPPED

    def schedule(self):
        assert self.state == JobState.STOPPED
        self.pods_snapshot = None
        self.flapp_snapshot = None
        self.state = JobState.WAITING

    def start(self):
        self.state = JobState.STARTED

    def set_yaml_template(self, yaml_template):
        self.yaml_template = yaml_template