class Pipeline(Entity, sdc_client.IPipeline): __tablename__ = 'pipelines' STATUS_RUNNING = 'RUNNING' STATUS_STOPPED = 'STOPPED' STATUS_EDITED = 'EDITED' STATUS_RETRY = 'RETRY' STATUS_STOPPING = 'STOPPING' STATUS_STARTING = 'STARTING' STATUS_RUN_ERROR = 'RUN_ERROR' STATUS_START_ERROR = 'START_ERROR' STATUS_STOP_ERROR = 'STOP_ERROR' STATUS_RUNNING_ERROR = 'RUNNING_ERROR' OVERRIDE_SOURCE = 'override_source' FLUSH_BUCKET_SIZE = 'flush_bucket_size' COUNTER = 'counter' GAUGE = 'gauge' RUNNING_COUNTER = 'running_counter' id = Column(Integer, primary_key=True) name = Column(String) type = Column(String) source_id = Column(Integer, ForeignKey('sources.id')) destination_id = Column(Integer, ForeignKey('destinations.id'), nullable=True) config = Column(JSON) schema = Column(JSON) override_source = Column(JSON) created_at = Column(TIMESTAMP(timezone=True), default=func.now()) last_edited = Column(TIMESTAMP(timezone=True), default=func.now(), onupdate=func.now()) status = Column(String, default=STATUS_EDITED) streamsets_id = Column(Integer, ForeignKey('streamsets.id')) offset = relationship("PipelineOffset", cascade="delete", uselist=False) watermark = relationship("PipelineWatermark", cascade="delete", uselist=False) source_ = relationship('Source', back_populates='pipelines') destination = relationship('HttpDestination', cascade="merge") streamsets = relationship('StreamSets') retries = relationship('PipelineRetries', cascade="delete", uselist=False) notifications = relationship('PiplineNotifications', cascade="delete", uselist=False) def __init__(self, pipeline_id: str, source_: Source, destination: HttpDestination): self.name = pipeline_id self._previous_config = {} self._previous_override_source = {} self.config = {} self.source_ = source_ self.source_id = source_.id self.destination = destination self.destination_id = destination.id self.override_source = {} self.streamsets_id = None self.streamsets = None self.type = REGULAR_PIPELINE self.notifications: PiplineNotifications = None def config_changed(self) -> bool: if not hasattr(self, '_previous_config'): return False return self.config != self._previous_config or self.override_source != self._previous_override_source def set_config(self, config: dict): self._previous_config = deepcopy(self.config) self._previous_override_source = deepcopy(self.override_source) self.override_source = config.pop(self.OVERRIDE_SOURCE, {}) self.config = deepcopy(config) @property def source(self) -> Source: return self.source_ def has_periodic_watermark_config(self) -> bool: return bool(self.config.get('periodic_watermark')) def has_offset(self) -> bool: return bool(self.offset) def has_watermark(self) -> bool: return bool(self.watermark) @property def periodic_watermark_config(self) -> dict: return self.config.get('periodic_watermark', {}) @property def watermark_delay(self) -> int: return self.config.get('periodic_watermark', {}).get('delay', 0) @property def watermark_logs_enabled(self) -> bool: return self.config.get('watermark_logs_enabled', False) @property def flush_bucket_size(self) -> FlushBucketSize: return FlushBucketSize(self.config.get(self.FLUSH_BUCKET_SIZE, '1d')) @flush_bucket_size.setter def flush_bucket_size(self, value: str): self.config[self.FLUSH_BUCKET_SIZE] = FlushBucketSize(value).value @property def static_dimensions(self) -> dict: return self.config.get('properties', {}) @property def static_dimension_names(self) -> list: return [ tools.replace_illegal_chars(s_dim) for s_dim in self.static_dimensions.keys() ] @property def dimensions(self) -> list | dict: return self.config.get('dimensions', []) @property def dimension_paths(self) -> list: return [ self._get_property_path(value) for value in self.all_dimensions ] @property def required_dimensions(self) -> list: if type(self.dimensions) is list: return [] return self.dimensions.get('required', []) @property def required_dimension_paths(self) -> list: return [ self._get_property_path(value) for value in self.required_dimensions ] @property def optional_dimensions(self) -> list: if type(self.dimensions) is list: return [] return self.dimensions.get('optional', []) @property def all_dimensions(self) -> list: if not self.dimensions or type(self.dimensions) is list: return self.dimensions + self.static_dimension_names return self.required_dimensions + self.optional_dimensions + self.static_dimension_names @property def dimension_names(self) -> list: return [ tools.replace_illegal_chars(d.replace('/', '_')) for d in self.all_dimensions ] @property def dimension_paths_with_names(self) -> dict: return dict(zip(self.dimension_paths, self.dimension_names)) @property def dimension_configurations(self) -> Optional[dict]: if not isinstance(self.dimensions, list): raise PipelineException(( 'Pipeline dimensions should be a list in order to build dimension_configurations, ' f'but {type(self.dimensions).__name__} provided')) return _build_transformation_configurations( self.dimensions, self.config.get('dimension_configurations')) @property def measurement_configurations(self) -> Optional[dict]: return _build_transformation_configurations( list(self.values), self.config.get('measurement_configurations', {}), ) @property def tag_configurations(self) -> Optional[dict]: return self.config.get('tag_configurations', {}) @property def timestamp_path(self) -> str: return self._get_property_path(self.config['timestamp']['name']) @property def timezone(self) -> str: return self.config.get('timezone', 'UTC') @property def timestamp_type(self) -> TimestampType: return TimestampType(self.config['timestamp']['type']) @property def timestamp_name(self) -> Optional[str]: return self.config.get('timestamp', {}).get('name') @property def timestamp_format(self) -> str: return self.config['timestamp'].get('format') @property def values(self) -> dict: return self.config.get('values', {}) @property def value_paths(self) -> list: return [self._get_property_path(value) for value in self.values.keys()] @property def target_types(self) -> list: if self.source.type == source.TYPE_INFLUX: return [self.config.get('target_type', 'gauge')] * len( self.value_paths) return list(self.values.values()) @property def measurement_paths_with_names(self) -> dict: return dict( zip( self.config.get('measurement_names', {}).keys(), self.measurement_names)) @property def measurement_names(self) -> list: return [ tools.replace_illegal_chars( self.config.get('measurement_names', {}).get(key, key)) for key in self.values.keys() ] @property def measurement_names_with_target_types(self) -> dict: result = {} measurement_names = self.config.get('measurement_names', {}) for measurement, target_type in self.values.items(): measurement_name = measurement_names.get(measurement, measurement) measurement_name = tools.replace_illegal_chars(measurement_name) result[measurement_name] = target_type return result @property def measurement_names_paths(self): return [ self._get_property_path(value) for value in self.measurement_names ] @property def value_paths_with_names(self) -> dict: # value_paths should work the same as value_names that were here # value_paths are needed for directory and kafka and mb something else return dict(zip(self.value_paths, self.measurement_names)) @property def target_types_paths(self): return [ self._get_property_path(t_type) for t_type in self.target_types ] @property def count_records(self) -> bool: return self.config.get('count_records', False) @property def count_records_measurement_name(self) -> str: return tools.replace_illegal_chars( self.config.get('count_records_measurement_name', 'count')) @property def static_what(self) -> bool: return self.config.get('static_what', True) @property def transformations_config(self) -> str: return self.config.get('transform', {}).get('config') @property def filter_condition(self) -> str: return self.config.get('filter', {}).get('condition') @property def tags(self) -> dict: return self.config.get('tags', {}) @property def values_array_path(self) -> str: return self.config.get('values_array_path', '') @property def values_array_filter_metrics(self) -> list: return self.config.get('values_array_filter_metrics', []) @property def query_file(self) -> Optional[str]: return self.config.get('query_file') @property def query(self) -> Optional[str]: return self.config.get('query') @query.setter def query(self, query: str): self.config['query'] = query @property def interval(self) -> Optional[int]: # returns interval in seconds interval = self.config.get('interval') if interval in Interval.VALUES: return Interval(interval).total_seconds() return int(interval) if interval is not None else None @property def days_to_backfill(self) -> str: return str(self.config.get('days_to_backfill', '0')) @property def delay(self) -> str: return self.config.get('delay', '0') @property def watermark_in_local_timezone(self) -> str: return self.config.get('watermark_in_local_timezone', False) @property def batch_size(self) -> str: return self.config.get('batch_size', '1000') def uses_schema(self) -> bool: return bool(self.config.get('uses_schema')) @property def histories_batch_size(self) -> str: return self.config.get('histories_batch_size', '100') @property def header_attributes(self) -> list: return self.config.get('header_attributes', []) @property def log_everything(self) -> bool: return bool(self.config.get('log_everything')) @property def transform_script_config(self) -> str: return self.config.get('transform_script', {}).get('config', '') @property def watermark_sleep_time(self) -> int: return self.config.get('watermark_sleep_time', 10) @property def lookups(self) -> dict: return self.config.get('lookups', {}) @property def is_strict(self) -> bool: return bool(self.config.get('strict', True)) @property def dvp_config(self) -> dict: return self.config.get('dvpConfig', {}) @property def dynamic_step(self) -> bool: return bool(self.config.get('dynamic_step', False)) def get_streamsets_config(self) -> dict: return pipeline.manager.create_streamsets_pipeline_config(self) def get_id(self) -> str: return self.name def get_offset(self) -> Optional[str]: return self.offset.offset if self.offset else None def get_streamsets(self) -> Optional[sdc_client.IStreamSets]: return self.streamsets def set_streamsets(self, streamsets_: StreamSets): self.streamsets_id = streamsets_.id self.streamsets = streamsets_ def delete_streamsets(self): self.streamsets_id = None self.streamsets = None def get_schema(self) -> dict: return self.schema or {} def has_schema(self) -> bool: return bool(self.schema) def get_schema_id(self) -> Optional[str]: return self.get_schema().get('id') def export(self) -> dict: return { **self.config, self.OVERRIDE_SOURCE: self.override_source, 'pipeline_id': self.name, 'source': self.source.name, } def to_dict(self) -> dict: return { 'id': self.name, 'config': self.config, 'schema': self.get_schema(), 'override_source': self.override_source, 'source': self.source.config, 'destination': self.destination.config, } def _get_property_path(self, property_value: str) -> str: for idx, item in self.source.config.get('csv_mapping', {}).items(): if item == property_value: return str(idx) if property_value in self.config.get('dimension_value_paths', {}): return str( self.config.get('dimension_value_paths', {})[property_value]) return property_value def meta_tags(self) -> dict: return { 'source': ['anodot-agent'], 'source_host_id': [self.destination.host_id], 'source_host_name': [tools.replace_illegal_chars(HOSTNAME)], 'pipeline_id': [self.name], 'pipeline_type': [self.source.type] } def get_tags(self) -> dict: return {**self.meta_tags(), **self.tags} def error_notification_enabled(self) -> bool: return not self.config.get('disable_error_notifications', False)
from flask_sqlalchemy import SQLAlchemy from sqlalchemy.dialects.postgresql import INTERVAL, TIMESTAMP, TSTZRANGE, UUID db = SQLAlchemy() db.INTERVAL = INTERVAL() db.TIMESTAMP = TIMESTAMP() db.TSTZRANGE = TSTZRANGE() db.UUID = UUID() class Profile(db.Model): __tablename__ = 'profile' __table_args__ = {'schema': 'account'} id = db.Column(UUID(as_uuid=False), primary_key=True) user_name = db.Column(db.Text) employee_id = db.Column(UUID())
def default_fkey(*args, **kwargs): return ForeignKeyConstraint(*args, onupdate='CASCADE', ondelete='CASCADE', **kwargs) ads = Table( 'ads', metadata, Column('id', Integer(), primary_key=True, nullable=False), Column('owner', Text(), nullable=False), Column('link_target', Text(), nullable=False), Column('file', Integer(), nullable=False), Column('start', TIMESTAMP(), nullable=True), Column('end', TIMESTAMP(), nullable=True), default_fkey(['file'], ['media.mediaid'], name='ads_file_fkey'), ) Index('ind_ads_end', ads.c.end) api_tokens = Table( 'api_tokens', metadata, Column('userid', Integer(), primary_key=True, nullable=False), Column('token', String(length=64), primary_key=True, nullable=False), Column('description', String()), default_fkey(['userid'], ['login.userid'], name='api_tokens_userid_fkey'), )
class AggregateFacilityDaily(Base): """ Facility Dailies Aggregates """ __tablename__ = "at_facility_daily" # def __str__(self) -> str: # return "<{}: {} {} {}>".format( # self.__class__, # self.trading_interval, # self.network_id, # self.facility_code, # ) # def __repr__(self) -> str: # return "{}: {} {} {}".format( # self.__class__, # self.trading_interval, # self.network_id, # self.facility_code, # ) trading_day = Column(TIMESTAMP(timezone=True), index=True, primary_key=True, nullable=False) network_id = Column( Text, ForeignKey("network.code", name="fk_at_facility_daily_network_code"), primary_key=True, index=True, nullable=False, ) network = relationship("Network") facility_code = Column( Text, ForeignKey("facility.code", name="fk_at_facility_daily_facility_code"), primary_key=True, index=True, nullable=False, ) facility = relationship("Facility") fueltech_id = Column(Text, nullable=True) energy = Column(Numeric, nullable=True) market_value = Column(Numeric, nullable=True) emissions = Column(Numeric, nullable=True) __table_args__ = ( Index( "idx_at_facility_day_facility_code_trading_interval", facility_code, trading_day.desc(), ), Index("idx_at_facility_daily_network_id_trading_interval", network_id, trading_day.desc()), Index("idx_at_facility_daily_trading_interval_facility_code", trading_day, facility_code), )
class Job(BaseModel): __tablename__ = "jobs" name = db.Column( db.String(255), unique=False, nullable=False, # For migrating users. server_default=text("'job'"), ) pipeline_name = db.Column( db.String(255), unique=False, nullable=False, # For migrating users. server_default=text("''"), ) uuid = db.Column(db.String(36), primary_key=True) project_uuid = db.Column(db.String(36), ) pipeline_uuid = db.Column(db.String(36), primary_key=False) # Jobs that are to be schedule once (right now) or once in the # future will have no schedule (null). schedule = db.Column(db.String(100), nullable=True) # A list of dictionaries. The length of the list is the number of # non interactive runs that will be run, one for each parameters # dictinary. A parameter dictionary maps step uuids to a dictionary, # containing the parameters of that step for that particular run. # [{ <step_uuid>: {"a": 1}, ...}, ...GG] parameters = db.Column( JSONB, nullable=False, # This way migrated entries that did not have this column will # still be valid. Note that the entries will be stored as a list # of dicts. server_default="[]", ) # Note that this column also contains the parameters that were # stored within the pipeline definition file. These are not the job # parameters, but the original ones. pipeline_definition = db.Column( JSONB, nullable=False, # This way migrated entries that did not have this column will # still be valid. server_default="{}", ) pipeline_run_spec = db.Column( JSONB, nullable=False, # This way migrated entries that did not have this column will # still be valid. server_default="{}", ) # So that we can efficiently look for jobs to run. next_scheduled_time = db.Column(TIMESTAMP(timezone=True), index=True) # So that we can show the user the last time it was scheduled/run. last_scheduled_time = db.Column(TIMESTAMP(timezone=True), index=True) # So that we can "stamp" every non interactive run with the # execution number it belongs to, e.g. the first time a job runs it # will be batch 1, then 2, etc. total_scheduled_executions = db.Column( db.Integer, unique=False, server_default=text("0"), ) pipeline_runs = db.relationship( "NonInteractivePipelineRun", lazy="select", # let the db take care of cascading deletions # https://docs.sqlalchemy.org/en/13/orm/relationship_api.html#sqlalchemy.orm.relationship.params.passive_deletes # A value of True indicates that unloaded child items should not # be loaded during a delete operation on the parent. Normally, # when a parent item is deleted, all child items are loaded so # that they can either be marked as deleted, or have their # foreign key to the parent set to NULL. Marking this flag as # True usually implies an ON DELETE <CASCADE|SET NULL> rule is # in place which will handle updating/deleting child rows on the # database side. passive_deletes=True, # https://docs.sqlalchemy.org/en/14/orm/cascades.html#using-foreign-key-on-delete-cascade-with-orm-relationships # In order to use ON DELETE foreign key cascades in conjunction # with relationship(), it’s important to note first and foremost # that the relationship.cascade setting must still be configured # to match the desired “delete” or “set null” behavior # Essentially, the specified behaviour in the FK column # and the one specified in the relationship must match. cascade="all, delete", # When querying a job and its runs the runs will be sorted by # job schedule number and the index of the pipeline in that job. order_by=( "[desc(NonInteractivePipelineRun.job_run_index), " "desc(NonInteractivePipelineRun.job_run_pipeline_run_index)]"), ) # The status of a job can be DRAFT, PENDING, STARTED, SUCCESS, # ABORTED. Jobs start as DRAFT, this indicates that the job has # been created but that has not been started by the user. Once a # job is started by the user, what happens depends on the type of # job. One time jobs become PENDING, and become STARTED once they # are run by the scheduler and their pipeline runs are added to the # queue. Once they are completed, their status will be SUCCESS, if # they are aborted, their status will be set to ABORTED. Recurring # jobs, characterized by having a schedule, become STARTED, and can # only move to the ABORTED state in case they get cancelled, which # implies that the job will not be scheduled anymore. status = db.Column( db.String(15), unique=False, nullable=False, # Pre-existing Jobs of migrating users will be set to SUCCESS. server_default=text("'SUCCESS'"), ) strategy_json = db.Column( JSONB, nullable=False, server_default="{}", ) created_time = db.Column( db.DateTime, unique=False, nullable=False, index=True, # For migrating users. server_default=text("timezone('utc', now())"), ) def __repr__(self): return f"<Job: {self.uuid}>"
class AmzAccountInfo(Base, BaseMethod): __tablename__ = 'amz_account_infos' id = Column(BIGINT, primary_key=True) market_place_id = Column(BIGINT, nullable=False) email = Column(VARCHAR(64), nullable=False) password = Column(VARCHAR(64), nullable=False) given_name = Column(VARCHAR(64), nullable=False) surname = Column(VARCHAR(64), nullable=False) # Use as the dummy address addr_state = Column(VARCHAR(16), nullable=False) addr_state_full = Column(VARCHAR(128)) addr_city = Column(VARCHAR(64), nullable=False) addr_street = Column(VARCHAR(256), nullable=False) zip_code = Column(VARCHAR(16), nullable=False) # used to recover the account telephone = Column(VARCHAR(32), nullable=False) telephones = Column(VARCHAR(128), default='') ua = Column(VARCHAR(256), default='') balance = Column(BIGINT, default=0) last_used = Column(TIMESTAMP(timezone=True), default=datetime.utcnow) is_available = Column(BOOLEAN, default=True) @classmethod def get_by_id(cls, id): return DBsession.query(cls).filter(cls.id == id).first() @classmethod def get_by_market_email(cls, market_place_id, email): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.email == email).first() @classmethod def get_all_by_marketplace(cls, market_place_id, is_available=True): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.is_available == is_available).all() def to_json(self): return { 'market_place_id': self.market_place_id, 'email': self.email, 'password': self.password, 'given_name': self.given_name, 'surname': self.surname, 'address_line1': self.addr_street, 'address_line2': '', 'city': self.addr_city, 'state': self.addr_state, 'zip_code': self.zip_code, 'telephone': format_telephone(self.telephone), 'telephones': self.telephones, 'ua': self.ua, 'balance': self.balance, 'last_used': str(self.last_used), 'is_available': self.is_available } @classmethod def query_by_marketplace_limit(cls, market_place_id, limit=50, is_available=True): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.is_available == is_available).\ order_by(cls.last_used).limit(limit).all() @classmethod def query_by_limit(cls, limit=50, is_available=True): return DBsession.query(cls).\ filter(cls.is_available == is_available).\ order_by(cls.last_used).limit(limit).all() @classmethod def query_by_marketplace_days_ago(cls, market_place_id, days=7, is_available=True): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.last_used <= datetime.utcnow() - timedelta(days=days), cls.is_available == is_available).\ all() @classmethod def query_unbind_account(cls, market_place_id, is_available=True): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.bind_proxy_id == 0, cls.is_available == is_available).all() @classmethod def query_binding_account(cls, market_place_id, is_available=True): return DBsession.query(cls).\ filter(cls.market_place_id == market_place_id, cls.is_available == is_available, cls.bind_proxy_id > 0).all() def use(self, commit=True): self.last_used = datetime.utcnow().replace(tzinfo=pytz.UTC) self.update(commit) def soft_delete(self, commit=True): self.is_available = False self.update(commit) @classmethod def is_account_available(cls, account_id): account = cls.get_by_id(account_id) return account and account.is_available
actor_id = Column(SmallInteger, primary_key=True, nullable=False) film_id = Column(SmallInteger, primary_key=True, nullable=False, index=True) last_update = Column(TIMESTAMP(precision=6), nullable=False, server_default=text("now()")) t_film_category = Table( 'film_category', metadata, Column('film_id', SmallInteger, nullable=False), Column('category_id', SmallInteger, nullable=False), Column('last_update', TIMESTAMP(precision=6), nullable=False, server_default=text("now()"))) class Genre(Base): __tablename__ = 'genre' genre_id = Column( Integer, primary_key=True, server_default=text("nextval('genre_genre_id_seq'::regclass)")) name = Column(String(120)) class MediaType(Base):
class PersistantDataTable(db.Model): __tablename__ = 'persistantdatatable' id = db.Column(INTEGER, primary_key=True) lastserverversioncheck_datetime = db.Column(TIMESTAMP(precision=0))
class ApiUsers(db.Model, AutoSerialize, Serializer): __tablename__ = 'api_users' id = Column(Integer, primary_key=True, unique=True, server_default=text("nextval('api_users_id_seq'::regclass)")) username = Column(String(256), nullable=False, unique=True) password_hash = Column(String(256), nullable=False) created = Column(TIMESTAMP(True, 6)) def __init__(self, username, password, created): """ :param username: :param password: :param created: :return: """ self.username = username self.password_hash = self.hash_password(password) self.created = created def hash_password(self, password): """Converts password (clear text) information into a hash password. :param password: :return: """ return common.encrypt(password).decode() def verify_password(self, password): """Verifies password (clear text) is the same as the one stored in hashed format. :param password: :return: """ if password: if common.decrypt(self.password_hash).decode() == password: return True else: return False return False def generate_auth_token(self, expiration=settings.TOKEN_EXPIRATION_SECS): """Generates a token. :param expiration: (int) Expiration time in seconds. :return: A token from (TimedJSONWebSignatureSerializer) """ s = SecSerializer( settings.API_KEY, expires_in=expiration ) return s.dumps({'id': self.id}) @staticmethod def verify_auth_token(token): """Check if there is a user with in database with obtained id. If True then access is granted, else user cannot access data. :param token: (str) Token information for user """ s = SecSerializer(settings.API_KEY) try: data = s.loads(token) except SignatureExpired: return None # Valid token, but expired. # TODO(gogasca) Add # logging.warning except BadSignature: return None # Invalid token. # TODO(gogasca) Add logging.error user = ApiUsers.query.get(data['id']) return user def serialize(self): """Return user information from database.""" d = Serializer.serialize(self) del d['password_hash'] return d
Column('priceid', Integer(), primary_key=True, nullable=False), Column('classid', Integer(), primary_key=True, nullable=False), Column('userid', Integer(), primary_key=True, nullable=False), Column('title', String(length=500), nullable=False), Column('amount_min', Integer(), nullable=False), Column('amount_max', Integer(), nullable=False), Column('settings', String(length=20), nullable=False, server_default=''), default_fkey(['userid'], ['login.userid'], name='commishprice_userid_fkey'), ) Index('ind_classid_userid_title', commishprice.c.classid, commishprice.c.userid, commishprice.c.title, unique=True) cron_runs = Table( 'cron_runs', metadata, Column('last_run', TIMESTAMP(), nullable=False), ) emailblacklist = Table( 'emailblacklist', metadata, Column('id', Integer(), primary_key=True, nullable=False), Column('added_by', Integer(), nullable=False), Column('domain_name', String(length=252), nullable=False, unique=True), Column('reason', Text(), nullable=False), default_fkey(['added_by'], ['login.userid'], name='emailblacklist_userid_fkey'), ) emailverify = Table( 'emailverify', metadata,
def test_non_default_value(self): expected = [('timezone', True)] result = list(get_type_attributes(TIMESTAMP(timezone=True))) self.assertEqual(expected, result)
class HttpDestination(Entity): __tablename__ = 'destinations' id = Column(Integer, primary_key=True) host_id = Column(String) access_key = Column(String) config = Column(MutableDict.as_mutable(JSON)) created_at = Column(TIMESTAMP(timezone=True), default=func.now()) last_edited = Column(TIMESTAMP(timezone=True), default=func.now(), onupdate=func.now()) auth_token = relationship("AuthenticationToken", cascade="delete", uselist=False) TYPE = 'http' STATUS_URL = 'api/v1/status' CONFIG_PROXY_USE = 'conf.client.useProxy' CONFIG_PROXY_USERNAME = '******' CONFIG_PROXY_PASSWORD = '******' CONFIG_PROXY_URI = 'conf.client.proxy.uri' CONFIG_ENABLE_REQUEST_LOGGING = 'conf.client.requestLoggingConfig.enableRequestLogging' PROTOCOL_20 = 'anodot20' PROTOCOL_30 = 'anodot30' def __init__(self): self.config: Dict[str, any] = {self.CONFIG_PROXY_USE: False} self.host_id = self.generate_host_id() self.access_key = None def to_dict(self) -> dict: return { 'config': self.config, 'type': self.TYPE, 'host_id': self.host_id, 'access_key': self.access_key } @classmethod def generate_host_id(cls, length: int = 10) -> str: return str(uuid.uuid4()).replace('-', '')[:length].upper() @property def url(self): return self.config.get('url', ANODOT_API_URL) @url.setter def url(self, value: str): self.config['url'] = value @property def token(self): return self.config.get('token', None) @token.setter def token(self, value: str): self.config['token'] = value @property def metrics_url(self) -> Optional[str]: return \ urllib.parse.urljoin(self.url, f'api/v1/metrics?token={self.token}&protocol={HttpDestination.PROTOCOL_20}') def enable_logs(self): self.config[self.CONFIG_ENABLE_REQUEST_LOGGING] = True def disable_logs(self): self.config[self.CONFIG_ENABLE_REQUEST_LOGGING] = False @property def if_logs_enabled(self) -> bool: return self.config.get(self.CONFIG_ENABLE_REQUEST_LOGGING, False) @property def proxy(self) -> Optional[Proxy]: if self.config[self.CONFIG_PROXY_USE]: return Proxy( self.config[self.CONFIG_PROXY_URI], self.config[self.CONFIG_PROXY_USERNAME], self.config[self.CONFIG_PROXY_PASSWORD], ) return None @proxy.setter def proxy(self, proxy: Optional[Proxy]): if proxy: self.config[self.CONFIG_PROXY_USE] = True self.config[self.CONFIG_PROXY_URI] = proxy.uri self.config[self.CONFIG_PROXY_USERNAME] = proxy.username self.config[self.CONFIG_PROXY_PASSWORD] = proxy.password else: self.config[self.CONFIG_PROXY_USE] = False self.config[self.CONFIG_PROXY_URI] = '' self.config[self.CONFIG_PROXY_USERNAME] = '' self.config[self.CONFIG_PROXY_PASSWORD] = '' def get_proxy_url(self) -> str: return self.config.get(self.CONFIG_PROXY_URI, '') def get_proxy_username(self) -> str: return self.config.get(self.CONFIG_PROXY_USERNAME, '')
}, length=20), nullable=False, server_default=''), default_fkey(['userid'], ['login.userid'], name='folder_userid_fkey'), ) Index('ind_folder_userid', folder.c.userid) forgotpassword = Table( 'forgotpassword', metadata, Column('token_sha256', BYTEA(), primary_key=True, nullable=False), Column('email', String(length=254), nullable=False), Column('created_at', TIMESTAMP(timezone=True), nullable=False, server_default=func.now()), ) Index('ind_forgotpassword_created_at', forgotpassword.c.created_at) frienduser = Table( 'frienduser', metadata, Column('userid', Integer(), primary_key=True, nullable=False), Column('otherid', Integer(), primary_key=True, nullable=False), Column('settings', CharSettingsColumn({ 'p': 'pending', }, length=20),
class ConsistencyCheck(AbstractConcreteBase, DQBase): """ Representation of abstract consistency check table. """ __abstract__ = True _table_prefix = "consistency_check" id = Column(BIGINT, primary_key=True) type = Column(TEXT, nullable=False) name = Column(TEXT, nullable=False) description = Column(TEXT) left_table = Column(TEXT, nullable=False) right_table = Column(TEXT, nullable=False) status = Column(TEXT) time_filter = Column( TEXT, nullable=False, default=TIME_FILTER_DEFAULT, server_default=TIME_FILTER_DEFAULT, ) task_ts = Column(TIMESTAMP(timezone=True), nullable=False, index=True) created_at = Column( DateTime(timezone=True), server_default=text("NOW()"), nullable=False, index=True, ) @declared_attr def __table_args__(cls): """ Concrete classes derived from this abstract one should have unique check among the columns that below. But the constraint needs to have unique name, therefore we are using @declared_attr here to construct name of the constraint using its table name. :return: """ return (UniqueConstraint( "type", "name", "left_table", "right_table", "task_ts", "time_filter", name=f"{cls.__tablename__}_unique", ), ) def init_row( self, check: Dict, status: str, left_table_name: str, right_table_name: str, time_filter=None, context: Dict = None, **_, ): """ Set result to consistency check object. """ self.type = check["type"] self.task_ts = context["task_ts"] self.name = check["name"] self.description = check["description"] self.left_table = left_table_name self.right_table = right_table_name if isinstance(time_filter, str): self.time_filter = time_filter else: self.time_filter = json.dumps(time_filter) self.status = status def __repr__(self): return f"Rule ({self.type} - {self.name} - {self.task_ts})"
class TimestampMixin: date_created = db.Column(TIMESTAMP(timezone=True), default=datetime.utcnow) date_updated = db.Column(TIMESTAMP(timezone=True), onupdate=datetime.utcnow)
class QualityCheck(AbstractConcreteBase, DQBase): """ Representation of abstract quality check table. """ __abstract__ = True _table_prefix = "quality_check" id = Column(BIGINT, primary_key=True) attribute = Column(TEXT, nullable=False) rule_name = Column(TEXT, nullable=False) rule_type = Column(TEXT, nullable=False) rule_description = Column(TEXT) total_records = Column(INTEGER) failed = Column(INTEGER) median_30_day_failed = Column(DOUBLE_PRECISION) failed_percentage = Column(DOUBLE_PRECISION) passed = Column(INTEGER) median_30_day_passed = Column(DOUBLE_PRECISION) passed_percentage = Column(DOUBLE_PRECISION) status = Column(TEXT) time_filter = Column( TEXT, default=TIME_FILTER_DEFAULT, server_default=TIME_FILTER_DEFAULT, nullable=False, ) task_ts = Column(TIMESTAMP(timezone=True), nullable=False, index=True) created_at = Column( DateTime(timezone=True), server_default=text("NOW()"), nullable=False, index=True, ) @declared_attr def __table_args__(cls): """ Concrete classes derived from this abstract one should have unique check among the columns that below. But the constraint needs to have unique name, therefore we are using @declared_attr here to construct name of the constraint using its table name. :return: """ return (UniqueConstraint( "attribute", "rule_name", "rule_type", "task_ts", "time_filter", name=f"{cls.__tablename__}_unique", ), ) def init_row(self, rule: Rule, results: pd.Series, conn: Connector, context: Dict = None): """ Count metrics we want to measure using pd.Series api and set them to quality check object. """ if results.isnull().any(): raise ValueError( "In results of rule.apply can't be any Null values.") # todo - add to doc self.task_ts = context["task_ts"] self.attribute = rule.attribute self.rule_name = rule.name self.rule_type = rule.type self.rule_description = rule.description self.total_records = results.shape[0] self.failed = results[results == False].shape[0] self.passed = results[results == True].shape[0] self.set_medians(conn) if isinstance(rule.time_filter, str): self.time_filter = rule.time_filter else: self.time_filter = json.dumps(rule.time_filter) self.failed_percentage = self._perc(self.failed, self.total_records) self.passed_percentage = self._perc(self.passed, self.total_records) self.status = "invalid" if self.failed > 0 else "valid" def _perc(self, a, b): res = 0 try: res = (a / b) * 100 except ZeroDivisionError: pass return res def set_medians(self, conn: Connector, days=30): """ Calculate median of passed/failed quality checks from last 30 days. """ now = datetime.today().date() past = now - timedelta(days=days) cls = self.__class__ session = conn.make_session() checks = (session.query(cls.failed, cls.passed).filter( and_(cls.task_ts <= str(now), cls.task_ts >= str(past))).all()) session.expunge_all() session.commit() session.close() failed = [ch.failed for ch in checks] self.median_30_day_failed = median(failed) if failed else None passed = [ch.passed for ch in checks] self.median_30_day_passed = median(passed) if passed else None def __repr__(self): return f"Rule ({self.attribute} - {self.rule_name} - {self.rule_type} - {self.task_ts})"
class Prompt(db.Model): id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) date_created = db.Column(TIMESTAMP(timezone=True), default=datetime.utcnow) created_by = db.Column(UUID(as_uuid=True), db.ForeignKey('registered_user.id'), nullable=False) content = db.Column(db.String(240), nullable=False) status = db.Column(db.String, db.ForeignKey('review_status_kind.name'), default='pending') posts = db.relationship('Post', lazy='dynamic', backref=db.backref('prompt', lazy='joined', uselist=False)) reviews = db.relationship('PromptReview', lazy='dynamic') # def as_dict(self, include_reviews=False): # ret = {} # if include_reviews: # ret.update({'reviews': self.reviews.all()}) # ret.update({ c.name: getattr(self, c.name) for c in self.__table__.columns }) # return ret def save_to_db(self): db.session.add(self) db.session.commit() @classmethod def get_paginated(cls, status='', include_reviews=False, desc=True): result = cls.query.filter_by(status=status)\ .order_by(cls.date_created.desc() if desc else cls.date_created.asc())\ .paginate() return dict(page=result.page, has_next=result.has_next, per_page=result.per_page, items=[x.as_dict(include_reviews) for x in result.items]) @classmethod def new_to_old(cls, status='', cursor=None, limit=50): if not status: raise ValueError('no status provied') conditions = cls.status == status, # tuple if cursor: prompt = cls.query.get(cursor) conditions = cls.status == status, cls.date_created < prompt.date_created # tuple return cls.query.filter(*conditions)\ .order_by(cls.date_created.desc())\ .limit(limit)\ .all() @classmethod def old_to_new(cls, status='', cursor=None, limit=50): if not status: raise ValueError('no status provied') conditions = cls.status == status, if cursor: prompt = cls.query.get(cursor) conditions = cls.status == status, cls.date_created > prompt.date_created return cls.query.filter(*conditions)\ .order_by(cls.date_created.asc())\ .limit(limit)\ .all()