예제 #1
0
class MpSiteKeys(CommonBase):
	__tablename__ = 'mp_site_keys'

	rid = Column(BigInteger, primary_key=True, nullable=False, autoincrement=True)
	pubKey = Column(MEDIUMTEXT(), nullable=False)
	pubKeyHash = Column(String(255), nullable=False)
	priKey = Column(MEDIUMTEXT(), nullable=False)
	priKeyHash = Column(String(255), nullable=False)
	active = Column(Integer, nullable=True, server_default='1')
	request_new_key = Column(Integer, nullable=True, server_default='0')
	mdate = Column(DateTime, nullable=True, server_default='1970-01-01 00:00:00')
예제 #2
0
class MPAgentRegistration(CommonBase):
	__tablename__ = 'mp_agent_registration'

	rid = Column(BigInteger, primary_key=True, autoincrement=True, info='rid')
	cuuid = Column(String(50), nullable=False, info='Client ID')
	enabled = Column(Integer, server_default='0', info='Enabled')
	clientKey = Column(String(100), server_default='NA')
	pubKeyPem = Column(MEDIUMTEXT())
	pubKeyPemHash = Column(MEDIUMTEXT())
	hostname = Column(String(255), nullable=False, info='Hostname')
	serialno = Column(String(255), nullable=False, info='Serial No')
	reg_date = Column(DateTime, nullable=False, server_default='1970-01-01 00:00:00', info='Reg Date')
class ExampleTable(Base):
    __tablename__ = "{{ cookiecutter.plugin_name }}_example"

    example_id = Column(Unicode(64), primary_key=True)
    example_name = Column(Unicode(120))
    example_desc = Column(MEDIUMTEXT(collation="utf8mb4_unicode_ci"))
    example_type = Column(INTEGER)
    example_url = Column(MEDIUMTEXT(collation="utf8mb4_unicode_ci"))
    example_file = Column(Unicode(64))
    example_mimetype = Column(Unicode(120))
    example_owner = Column(ForeignKey("fsuser.user_id"), nullable=False, index=True)
    extras = Column(MEDIUMTEXT(collation="utf8mb4_unicode_ci"))
    tags = Column(MEDIUMTEXT(collation="utf8mb4_unicode_ci"))

    fsuser = relationship("User")
def get_string_type(col_type, params):
    """Create a string type column.

    Args:
        col_type (string): Type of the column.
        params (object): Additional parameters.

    Returns:
        sqlalchemy.types.TypeEngine: String type like char or text
    """
    if col_type == 'char':
        return CHAR(params.get('length'))
    elif col_type == 'json':
        return (
            JSON(none_as_null=True)
            .with_variant(JSONB(none_as_null=True), 'postgresql')
            .with_variant(Text(), 'sqlite')
        )
    elif col_type == 'long_text':
        return LONGTEXT().with_variant(Text(), 'sqlite')
    elif col_type == 'medium_text':
        return MEDIUMTEXT().with_variant(Text(), 'sqlite')
    elif col_type == 'string':
        return String(length=params.get('length'))
    elif col_type == 'text':
        return Text()
예제 #5
0
class MpOsProfilesCriteria(CommonBase):
	__tablename__ = 'mp_os_config_profiles_criteria'

	rid 		= Column(BigInteger, primary_key=True, autoincrement=True)
	gPolicyID 	= Column(String(50), nullable=False)
	type 		= Column(String(25))
	type_data 	= Column(MEDIUMTEXT())
	type_action = Column(INTEGER(1, unsigned=True), server_default='0')
	type_order 	= Column(INTEGER(2, unsigned=True), server_default='0')
예제 #6
0
class AdHocReports(CommonBase):
	__tablename__ = 'mp_adhoc_reports'

	rid             = Column(BigInteger, primary_key=True, autoincrement=True)
	name            = Column(String(255), nullable=False)
	reportData      = Column(MEDIUMTEXT())
	owner           = Column(String(255), nullable=False)
	rights          = Column(INTEGER(1, unsigned=True), server_default='0')
	disabled        = Column(INTEGER(1, unsigned=True), server_default='0')
	disabledDate    = Column(DateTime, server_default='1970-01-01 00:00:00')
예제 #7
0
class OSMigrationStatus(CommonBase):
	__tablename__ = 'mp_os_migration_status'

	rid             = Column(BigInteger, primary_key=True, autoincrement=True)
	cuuid           = Column(String(50), ForeignKey('mp_clients.cuuid', ondelete='CASCADE', onupdate='NO ACTION'), nullable=False, index=True, unique=True)
	startDateTime   = Column(DateTime, server_default='1970-01-01 00:00:00')
	stopDateTime    = Column(DateTime, server_default='1970-01-01 00:00:00')
	preOSVer        = Column(String(255), nullable=False)
	postOSVer       = Column(String(255))
	label           = Column(MEDIUMTEXT())
	migrationID     = Column(String(100), nullable=False)
예제 #8
0
class ApplePatchCriteria(CommonBase):
	__tablename__ = 'mp_apple_patch_criteria'

	rid         = Column(BigInteger, primary_key=True, autoincrement=True)
	puuid       = Column(String(50), nullable=False, server_default='1')
	supatchname = Column(String(255), nullable=True)
	type        = Column(String(25))
	type_data   = Column(MEDIUMTEXT())
	type_action = Column(INTEGER(1, unsigned=True), server_default='0')
	type_order  = Column(INTEGER(2, unsigned=True), server_default='0')
	cdate       = Column(DateTime, server_default='1970-01-01 00:00:00')
	mdate       = Column(DateTime, server_default='1970-01-01 00:00:00')
예제 #9
0
    class Question(Base):

        __tablename__ = 'zhihu_questions'

        id = Column(INTEGER(), primary_key=True)
        url = Column(VARCHAR(45))
        title = Column(NVARCHAR(100))
        content = Column(MEDIUMTEXT(), nullable=True)
        topic = Column(NVARCHAR(200))
        answers_num = Column(INTEGER())
        follower = Column(INTEGER())
        watcher = Column(INTEGER())
        crawl_time = Column(DATETIME())
예제 #10
0
    class Answer(Base):

        __tablename__ = 'zhihu_answers'

        q_id = Column(INTEGER(), ForeignKey('zhihu_questions.id'))
        question = relationship('Question', backref='answer')
        answer_id = Column(INTEGER(), primary_key=True)
        author_id = Column(VARCHAR(100))
        author_name = Column(NVARCHAR(20))
        author_is_advertiser = Column(BOOLEAN())
        created_time = Column(DATETIME())
        updated_time = Column(DATETIME())
        voteup_num = Column(INTEGER())
        comment_num = Column(INTEGER())
        content = Column(MEDIUMTEXT())
예제 #11
0
class Messages(Base):
    __tablename__ = 'messages'
    __table_args__ = {'mysql_engine': 'InnoDB', 'mysql_charset': 'utf8mb4'}

    message_id = Column(VARCHAR(255), primary_key=True)
    mailing_list_url = Column(VARCHAR(255),
                              ForeignKey('mailing_lists.mailing_list_url',
                                         onupdate='CASCADE',
                                         ondelete='CASCADE'),
                              primary_key=True)
    mailing_list = Column(VARCHAR(255))
    first_date = Column(DateTime)
    first_date_tz = Column(NUMERIC(11))
    arrival_date = Column(DateTime)
    arrival_date_tz = Column(NUMERIC(11))
    subject = Column(VARCHAR(1024))
    message_body = Column(MEDIUMTEXT())
    is_response_of = Column(VARCHAR(255), index=True)
    mail_path = Column(TEXT)

    def __repr__(self):
        return u"<Messages(message_id='{0}', " \
               "mailing_list_url='{1}', " \
               "mailing_list='{2}', " \
               "first_date='{3}', first_date_tz='{4}', " \
               "arrival_date='{5}', arrival_date_tz='{6}', " \
               "subject='{7}', message_body='{8}', " \
               "is_response_of='{9}', " \
               "mail_path='{10}')>".format(self.message_id,
                                           self.mailing_list_url,
                                           self.mailing_list,
                                           self.first_date,
                                           self.first_date_tz,
                                           self.arrival_date,
                                           self.arrival_date_tz,
                                           self.subject,
                                           self.message_body,
                                           self.is_response_of,
                                           self.mail_path)
예제 #12
0
def MediumText() -> Variant:  # pylint:disable=invalid-name
    return Text().with_variant(MEDIUMTEXT(), "mysql")
예제 #13
0
class Scraper(object):
    GKG_url = 'http://data.gdeltproject.org/gdeltv2/{}.gkg.csv.zip'
    Mentions_url = 'http://data.gdeltproject.org/gdeltv2/{}.mentions.CSV.zip'
    Events_url = 'http://data.gdeltproject.org/gdeltv2/{}.export.CSV.zip'

    colnames_gkg = pd.read_csv(
        'C:/Users/605453/Documents/Projects/Firesail/Save Our Jobs/Part 2/Archive/Headers/schema_csvs/GDELT_2.0_gdeltKnowledgeGraph_Column_Labels_Header_Row_Sep2016.tsv',
        sep='\t')['tableId']
    colnames_mentions = pd.read_csv(
        'C:/Users/605453/Documents/Projects/Firesail/Save Our Jobs/Part 2/Archive/Headers/schema_csvs/GDELT_2.0_eventMentions_Column_Labels_Header_Row_Sep2016.tsv',
        sep='\t')['0']
    colnames_events = pd.read_csv(
        'C:/Users/605453/Documents/Projects/Firesail/Save Our Jobs/Part 2/Archive/Headers/schema_csvs/GDELT_2.0_Events_Column_Labels_Header_Row_Sep2016.csv'
    )['tableId']

    dict_gkg = {colnames_gkg[0]: VARCHAR(255)}
    dict_gkg.update(
        {x: MEDIUMTEXT(collation='utf8mb4_bin')
         for x in colnames_gkg[5:]})
    dict_mentions = {
        x: MEDIUMTEXT(collation='utf8mb4_bin')
        for x in colnames_mentions[3:]
    }
    dict_events = {
        x: MEDIUMTEXT(collation='utf8mb4_bin')
        for x in colnames_events[5:]
    }

    error_log = "C:\\Users\\605453\\Documents\\GDELT\\Errors.txt"

    def __init__(self, beg_date, end_date, folder, db):
        self.beg_month, self.beg_day, self.beg_year = [
            int(x) for x in beg_date.split("-")
        ]
        self.end_month, self.end_day, self.end_year = [
            int(x) for x in end_date.split("-")
        ]
        self.folder = folder
        self.db = db

    def pull(self, url_type, date, dtype):
        url = url_type.format(date)
        file_location = '{}{}.{}.CSV.zip'.format(self.folder, date, dtype)
        urllib.request.urlretrieve(url, file_location)
        return file_location

    def pandafy(self, file_location, colnames, index, GKG_drop=False):
        try:
            df = pd.read_csv(file_location,
                             sep='\t',
                             header=None,
                             names=colnames,
                             encoding='utf-8')
        except:
            df = pd.read_csv(file_location,
                             sep='\t',
                             header=None,
                             names=colnames,
                             encoding='latin-1')
        remove(file_location)
        df = df[df.iloc[:, 0].astype(str).apply(lambda x: len(x) < 255)]
        df = df.set_index(index)
        return df

    def insert(self, df, table_name, db_name, index):
        engine = create_engine(
            "mysql://*****:*****@154@localhost/{}?charset=utf8mb4".format(
                db_name),
            echo=False)
        con = engine.connect()
        if table_name == "mentions":
            df.to_sql(name=table_name,
                      con=engine,
                      if_exists='append',
                      chunksize=50,
                      method="multi",
                      schema=db_name,
                      index_label=index,
                      dtype=self.dict_mentions)
        elif table_name == "events":
            df.to_sql(name=table_name,
                      con=engine,
                      if_exists='append',
                      chunksize=50,
                      method="multi",
                      schema=db_name,
                      index_label=index,
                      dtype=self.dict_events)
        elif table_name == "gkg":
            df.to_sql(name=table_name,
                      con=engine,
                      if_exists='append',
                      chunksize=50,
                      method="multi",
                      schema=db_name,
                      index_label=index,
                      dtype=self.dict_gkg)
        con.close()

    def execute(self, url, date, table, colnames, index, GKG_drop=False):
        try:
            result = self.pull(url, date, table)
            print(result)
            df = self.pandafy(result, colnames, index, GKG_drop=False)
            self.insert(df, table, self.db, index)
        except:
            error = "Fail:{}-{}".format(date, table)
            print(error)
            print(sys.exc_info()[0])
            Errors.append(error)

    def scrape(self, GKG=False, Mentions=False, Events=False):
        start = datetime.datetime(year=self.beg_year,
                                  month=self.beg_month,
                                  day=self.beg_day,
                                  hour=00,
                                  minute=00)
        end = datetime.datetime(year=self.end_year,
                                month=self.end_month,
                                day=self.end_day,
                                hour=00,
                                minute=00)
        days_to_collect = end - start
        date_list = [
            end - datetime.timedelta(minutes=15 * x)
            for x in range(1, days_to_collect.days * 96 + 1)
        ]
        date_list = list(
            map(lambda x: x.strftime("%Y%m%d%H%M") + '00', date_list))

        global Errors
        Errors = []

        for date in date_list:
            print(date)
            if GKG:
                self.execute(self.GKG_url, date, "gkg", self.colnames_gkg,
                             'GKGRECORDID')

            if Mentions:
                self.execute(self.Mentions_url, date, "mentions",
                             self.colnames_mentions, 'GLOBALEVENTID')

            if Events:
                self.execute(self.Events_url, date, "events",
                             self.colnames_events, 'GLOBALEVENTID')

            with open(self.error_log, "w") as outfile:
                outfile.write("\n".join(Errors))

        now = datetime.datetime.now()
        error_log_final = "C:\\Users\\605453\\Documents\\GDELT\\Error Logs\\Errors_" + now.strftime(
            "%d%m%Y_%H%M%S") + ".txt"
        with open(error_log_final, "w") as outfile:
            outfile.write("\n".join(Errors))

    def missedDates(beg_date,
                    end_date,
                    GKG=False,
                    Mentions=False,
                    Events=False):
        beg_year, beg_month, beg_day, beg_hour, beg_min = int(
            beg_date[:4]), int(beg_date[4:6]), int(beg_date[6:8]), int(
                beg_date[8:10]), int(beg_date[10:12])
        end_year, end_month, end_day, end_hour, end_min = int(
            end_date[:4]), int(end_date[4:6]), int(end_date[6:8]), int(
                end_date[8:10]), int(end_date[10:12])

        start = datetime.datetime(year=beg_year,
                                  month=beg_month,
                                  day=beg_day,
                                  hour=beg_hour,
                                  minute=beg_min)
        end = datetime.datetime(year=end_year,
                                month=end_month,
                                day=end_day,
                                hour=end_hour,
                                minute=end_min)
        days_to_collect = end - start

        date_list = [end]
        date_list.extend([
            end - datetime.timedelta(minutes=15 * x)
            for x in range(1, int((days_to_collect.total_seconds() / 60) / 15))
        ])
        date_list.extend([start])
예제 #14
0
def MediumText():
    return Text().with_variant(MEDIUMTEXT(), 'mysql')
예제 #15
0
class MySQLEngineSpec(BaseEngineSpec, BasicParametersMixin):
    engine = "mysql"
    engine_name = "MySQL"
    max_column_name_length = 64

    default_driver = "mysqldb"
    sqlalchemy_uri_placeholder = (
        "mysql://*****:*****@host:port/dbname[?key=value&key=value...]")
    encryption_parameters = {"ssl": "1"}

    column_type_mappings = (
        (
            re.compile(r"^int.*", re.IGNORECASE),
            INTEGER(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^tinyint", re.IGNORECASE),
            TINYINT(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^mediumint", re.IGNORECASE),
            MEDIUMINT(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^decimal", re.IGNORECASE),
            DECIMAL(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^float", re.IGNORECASE),
            FLOAT(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^double", re.IGNORECASE),
            DOUBLE(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^bit", re.IGNORECASE),
            BIT(),
            GenericDataType.NUMERIC,
        ),
        (
            re.compile(r"^tinytext", re.IGNORECASE),
            TINYTEXT(),
            GenericDataType.STRING,
        ),
        (
            re.compile(r"^mediumtext", re.IGNORECASE),
            MEDIUMTEXT(),
            GenericDataType.STRING,
        ),
        (
            re.compile(r"^longtext", re.IGNORECASE),
            LONGTEXT(),
            GenericDataType.STRING,
        ),
    )

    _time_grain_expressions = {
        None:
        "{col}",
        "PT1S":
        "DATE_ADD(DATE({col}), "
        "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
        " + SECOND({col})) SECOND)",
        "PT1M":
        "DATE_ADD(DATE({col}), "
        "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)",
        "PT1H":
        "DATE_ADD(DATE({col}), "
        "INTERVAL HOUR({col}) HOUR)",
        "P1D":
        "DATE({col})",
        "P1W":
        "DATE(DATE_SUB({col}, "
        "INTERVAL DAYOFWEEK({col}) - 1 DAY))",
        "P1M":
        "DATE(DATE_SUB({col}, "
        "INTERVAL DAYOFMONTH({col}) - 1 DAY))",
        "P3M":
        "MAKEDATE(YEAR({col}), 1) "
        "+ INTERVAL QUARTER({col}) QUARTER - INTERVAL 1 QUARTER",
        "P1Y":
        "DATE(DATE_SUB({col}, "
        "INTERVAL DAYOFYEAR({col}) - 1 DAY))",
        "1969-12-29T00:00:00Z/P1W":
        "DATE(DATE_SUB({col}, "
        "INTERVAL DAYOFWEEK(DATE_SUB({col}, "
        "INTERVAL 1 DAY)) - 1 DAY))",
    }

    type_code_map: Dict[int,
                        str] = {}  # loaded from get_datatype only if needed

    custom_errors: Dict[Pattern[str], Tuple[str, SupersetErrorType, Dict[
        str, Any]]] = {
            CONNECTION_ACCESS_DENIED_REGEX: (
                __('Either the username "%(username)s" or the password is incorrect.'
                   ),
                SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
                {
                    "invalid": ["username", "password"]
                },
            ),
            CONNECTION_INVALID_HOSTNAME_REGEX: (
                __('Unknown MySQL server host "%(hostname)s".'),
                SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
                {
                    "invalid": ["host"]
                },
            ),
            CONNECTION_HOST_DOWN_REGEX: (
                __('The host "%(hostname)s" might be down and can\'t be reached.'
                   ),
                SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
                {
                    "invalid": ["host", "port"]
                },
            ),
            CONNECTION_UNKNOWN_DATABASE_REGEX: (
                __('Unable to connect to database "%(database)s".'),
                SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
                {
                    "invalid": ["database"]
                },
            ),
            SYNTAX_ERROR_REGEX: (
                __('Please check your query for syntax errors near "%(server_error)s". '
                   "Then, try running your query again."),
                SupersetErrorType.SYNTAX_ERROR,
                {},
            ),
        }

    @classmethod
    def convert_dttm(
            cls,
            target_type: str,
            dttm: datetime,
            db_extra: Optional[Dict[str, Any]] = None) -> Optional[str]:
        tt = target_type.upper()
        if tt == utils.TemporalType.DATE:
            return f"STR_TO_DATE('{dttm.date().isoformat()}', '%Y-%m-%d')"
        if tt == utils.TemporalType.DATETIME:
            datetime_formatted = dttm.isoformat(sep=" ",
                                                timespec="microseconds")
            return f"""STR_TO_DATE('{datetime_formatted}', '%Y-%m-%d %H:%i:%s.%f')"""
        return None

    @classmethod
    def adjust_database_uri(cls,
                            uri: URL,
                            selected_schema: Optional[str] = None) -> URL:
        if selected_schema:
            uri = uri.set(database=parse.quote(selected_schema, safe=""))

        return uri

    @classmethod
    def get_datatype(cls, type_code: Any) -> Optional[str]:
        if not cls.type_code_map:
            # only import and store if needed at least once
            # pylint: disable=import-outside-toplevel
            import MySQLdb

            ft = MySQLdb.constants.FIELD_TYPE
            cls.type_code_map = {
                getattr(ft, k): k
                for k in dir(ft) if not k.startswith("_")
            }
        datatype = type_code
        if isinstance(type_code, int):
            datatype = cls.type_code_map.get(type_code)
        if datatype and isinstance(datatype, str) and datatype:
            return datatype
        return None

    @classmethod
    def epoch_to_dttm(cls) -> str:
        return "from_unixtime({col})"

    @classmethod
    def _extract_error_message(cls, ex: Exception) -> str:
        """Extract error message for queries"""
        message = str(ex)
        try:
            if isinstance(ex.args, tuple) and len(ex.args) > 1:
                message = ex.args[1]
        except (AttributeError, KeyError):
            pass
        return message

    @classmethod
    def get_column_spec(
        cls,
        native_type: Optional[str],
        db_extra: Optional[Dict[str, Any]] = None,
        source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
        column_type_mappings: Tuple[ColumnTypeMapping,
                                    ...] = column_type_mappings,
    ) -> Optional[ColumnSpec]:

        column_spec = super().get_column_spec(native_type)
        if column_spec:
            return column_spec

        return super().get_column_spec(
            native_type, column_type_mappings=column_type_mappings)

    @classmethod
    def get_cancel_query_id(cls, cursor: Any, query: Query) -> Optional[str]:
        """
        Get MySQL connection ID that will be used to cancel all other running
        queries in the same connection.

        :param cursor: Cursor instance in which the query will be executed
        :param query: Query instance
        :return: MySQL Connection ID
        """
        cursor.execute("SELECT CONNECTION_ID()")
        row = cursor.fetchone()
        return row[0]

    @classmethod
    def cancel_query(cls, cursor: Any, query: Query,
                     cancel_query_id: str) -> bool:
        """
        Cancel query in the underlying database.

        :param cursor: New cursor instance to the db of the query
        :param query: Query instance
        :param cancel_query_id: MySQL Connection ID
        :return: True if query cancelled successfully, False otherwise
        """
        try:
            cursor.execute(f"KILL CONNECTION {cancel_query_id}")
        except Exception:  # pylint: disable=broad-except
            return False

        return True
예제 #16
0
from storyboard.db.decorators import UTCDateTime

CONF = cfg.CONF


def table_args():
    engine_name = urlparse.urlparse(cfg.CONF.database_connection).scheme
    if engine_name == 'mysql':
        return {'mysql_engine': cfg.CONF.mysql_engine,
                'mysql_charset': "utf8"}
    return None

# # CUSTOM TYPES

# A mysql medium text type.
MYSQL_MEDIUM_TEXT = UnicodeText().with_variant(MEDIUMTEXT(), 'mysql')


class CommonLength:
    top_large_length = 255
    top_middle_length = 100
    top_short_length = 50
    lower_large_length = 5
    lower_middle_length = 3
    lower_short_length = 1
    name_length = 30


class IdMixin(object):
    id = Column(Integer, primary_key=True)
예제 #17
0
class MySQLEngineSpec(BaseEngineSpec):
    engine = "mysql"
    engine_name = "MySQL"
    max_column_name_length = 64

    column_type_mappings: Tuple[
        Tuple[
            Pattern[str],
            Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
            GenericDataType,
        ],
        ...,
    ] = (
        (re.compile(r"^int.*", re.IGNORECASE), INTEGER(), GenericDataType.NUMERIC,),
        (re.compile(r"^tinyint", re.IGNORECASE), TINYINT(), GenericDataType.NUMERIC,),
        (
            re.compile(r"^mediumint", re.IGNORECASE),
            MEDIUMINT(),
            GenericDataType.NUMERIC,
        ),
        (re.compile(r"^decimal", re.IGNORECASE), DECIMAL(), GenericDataType.NUMERIC,),
        (re.compile(r"^float", re.IGNORECASE), FLOAT(), GenericDataType.NUMERIC,),
        (re.compile(r"^double", re.IGNORECASE), DOUBLE(), GenericDataType.NUMERIC,),
        (re.compile(r"^bit", re.IGNORECASE), BIT(), GenericDataType.NUMERIC,),
        (re.compile(r"^tinytext", re.IGNORECASE), TINYTEXT(), GenericDataType.STRING,),
        (
            re.compile(r"^mediumtext", re.IGNORECASE),
            MEDIUMTEXT(),
            GenericDataType.STRING,
        ),
        (re.compile(r"^longtext", re.IGNORECASE), LONGTEXT(), GenericDataType.STRING,),
    )

    _time_grain_expressions = {
        None: "{col}",
        "PT1S": "DATE_ADD(DATE({col}), "
        "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
        " + SECOND({col})) SECOND)",
        "PT1M": "DATE_ADD(DATE({col}), "
        "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)",
        "PT1H": "DATE_ADD(DATE({col}), " "INTERVAL HOUR({col}) HOUR)",
        "P1D": "DATE({col})",
        "P1W": "DATE(DATE_SUB({col}, " "INTERVAL DAYOFWEEK({col}) - 1 DAY))",
        "P1M": "DATE(DATE_SUB({col}, " "INTERVAL DAYOFMONTH({col}) - 1 DAY))",
        "P0.25Y": "MAKEDATE(YEAR({col}), 1) "
        "+ INTERVAL QUARTER({col}) QUARTER - INTERVAL 1 QUARTER",
        "P1Y": "DATE(DATE_SUB({col}, " "INTERVAL DAYOFYEAR({col}) - 1 DAY))",
        "1969-12-29T00:00:00Z/P1W": "DATE(DATE_SUB({col}, "
        "INTERVAL DAYOFWEEK(DATE_SUB({col}, "
        "INTERVAL 1 DAY)) - 1 DAY))",
    }

    type_code_map: Dict[int, str] = {}  # loaded from get_datatype only if needed

    custom_errors = {
        CONNECTION_ACCESS_DENIED_REGEX: (
            __('Either the username "%(username)s" or the password is incorrect.'),
            SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
        ),
        CONNECTION_INVALID_HOSTNAME_REGEX: (
            __('Unknown MySQL server host "%(hostname)s".'),
            SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
        ),
        CONNECTION_HOST_DOWN_REGEX: (
            __('The host "%(hostname)s" might be down and can\'t be reached.'),
            SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
        ),
        CONNECTION_UNKNOWN_DATABASE_REGEX: (
            __(
                'We were unable to connect to your database named "%(database)s". '
                "Please verify your database name and try again."
            ),
            SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
        ),
    }

    @classmethod
    def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
        tt = target_type.upper()
        if tt == utils.TemporalType.DATE:
            return f"STR_TO_DATE('{dttm.date().isoformat()}', '%Y-%m-%d')"
        if tt == utils.TemporalType.DATETIME:
            datetime_formatted = dttm.isoformat(sep=" ", timespec="microseconds")
            return f"""STR_TO_DATE('{datetime_formatted}', '%Y-%m-%d %H:%i:%s.%f')"""
        return None

    @classmethod
    def adjust_database_uri(
        cls, uri: URL, selected_schema: Optional[str] = None
    ) -> None:
        if selected_schema:
            uri.database = parse.quote(selected_schema, safe="")

    @classmethod
    def get_datatype(cls, type_code: Any) -> Optional[str]:
        if not cls.type_code_map:
            # only import and store if needed at least once
            import MySQLdb

            ft = MySQLdb.constants.FIELD_TYPE
            cls.type_code_map = {
                getattr(ft, k): k for k in dir(ft) if not k.startswith("_")
            }
        datatype = type_code
        if isinstance(type_code, int):
            datatype = cls.type_code_map.get(type_code)
        if datatype and isinstance(datatype, str) and datatype:
            return datatype
        return None

    @classmethod
    def epoch_to_dttm(cls) -> str:
        return "from_unixtime({col})"

    @classmethod
    def _extract_error_message(cls, ex: Exception) -> str:
        """Extract error message for queries"""
        message = str(ex)
        try:
            if isinstance(ex.args, tuple) and len(ex.args) > 1:
                message = ex.args[1]
        except (AttributeError, KeyError):
            pass
        return message

    @classmethod
    def get_column_spec(  # type: ignore
        cls,
        native_type: Optional[str],
        source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
        column_type_mappings: Tuple[
            Tuple[
                Pattern[str],
                Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
                GenericDataType,
            ],
            ...,
        ] = column_type_mappings,
    ) -> Union[ColumnSpec, None]:

        column_spec = super().get_column_spec(native_type)
        if column_spec:
            return column_spec

        return super().get_column_spec(
            native_type, column_type_mappings=column_type_mappings
        )
예제 #18
0
class DagCode(Base):
    """A table for DAGs code.

    dag_code table contains code of DAG files synchronized by scheduler.

    For details on dag serialization see SerializedDagModel
    """

    __tablename__ = 'dag_code'

    fileloc_hash = Column(BigInteger,
                          nullable=False,
                          primary_key=True,
                          autoincrement=False)
    fileloc = Column(String(2000), nullable=False)
    # The max length of fileloc exceeds the limit of indexing.
    last_updated = Column(UtcDateTime, nullable=False)
    source_code = Column(Text().with_variant(MEDIUMTEXT(), 'mysql'),
                         nullable=False)

    def __init__(self, full_filepath: str, source_code: Optional[str] = None):
        self.fileloc = full_filepath
        self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc)
        self.last_updated = timezone.utcnow()
        self.source_code = source_code or DagCode.code(self.fileloc)

    @provide_session
    def sync_to_db(self, session=None):
        """Writes code into database.

        :param session: ORM Session
        """
        self.bulk_sync_to_db([self.fileloc], session)

    @classmethod
    @provide_session
    def bulk_sync_to_db(cls, filelocs: Iterable[str], session=None):
        """Writes code in bulk into database.

        :param filelocs: file paths of DAGs to sync
        :param session: ORM Session
        """
        filelocs = set(filelocs)
        filelocs_to_hashes = {
            fileloc: DagCode.dag_fileloc_hash(fileloc)
            for fileloc in filelocs
        }
        existing_orm_dag_codes = (session.query(DagCode).filter(
            DagCode.fileloc_hash.in_(
                filelocs_to_hashes.values())).with_for_update(
                    of=DagCode).all())

        if existing_orm_dag_codes:
            existing_orm_dag_codes_map = {
                orm_dag_code.fileloc: orm_dag_code
                for orm_dag_code in existing_orm_dag_codes
            }
        else:
            existing_orm_dag_codes_map = {}

        existing_orm_dag_codes_by_fileloc_hashes = {
            orm.fileloc_hash: orm
            for orm in existing_orm_dag_codes
        }
        existing_orm_filelocs = {
            orm.fileloc
            for orm in existing_orm_dag_codes_by_fileloc_hashes.values()
        }
        if not existing_orm_filelocs.issubset(filelocs):
            conflicting_filelocs = existing_orm_filelocs.difference(filelocs)
            hashes_to_filelocs = {
                DagCode.dag_fileloc_hash(fileloc): fileloc
                for fileloc in filelocs
            }
            message = ""
            for fileloc in conflicting_filelocs:
                filename = hashes_to_filelocs[DagCode.dag_fileloc_hash(
                    fileloc)]
                message += (
                    f"Filename '{filename}' causes a hash collision in the "
                    f"database with '{fileloc}'. Please rename the file.")
            raise AirflowException(message)

        existing_filelocs = {
            dag_code.fileloc
            for dag_code in existing_orm_dag_codes
        }
        missing_filelocs = filelocs.difference(existing_filelocs)

        for fileloc in missing_filelocs:
            orm_dag_code = DagCode(fileloc, cls._get_code_from_file(fileloc))
            session.add(orm_dag_code)

        for fileloc in existing_filelocs:
            current_version = existing_orm_dag_codes_by_fileloc_hashes[
                filelocs_to_hashes[fileloc]]
            file_mod_time = datetime.fromtimestamp(os.path.getmtime(
                correct_maybe_zipped(fileloc)),
                                                   tz=timezone.utc)

            if file_mod_time > current_version.last_updated:
                orm_dag_code = existing_orm_dag_codes_map[fileloc]
                orm_dag_code.last_updated = file_mod_time
                orm_dag_code.source_code = cls._get_code_from_file(
                    orm_dag_code.fileloc)
                session.merge(orm_dag_code)

    @classmethod
    @provide_session
    def remove_deleted_code(cls, alive_dag_filelocs: List[str], session=None):
        """Deletes code not included in alive_dag_filelocs.

        :param alive_dag_filelocs: file paths of alive DAGs
        :param session: ORM Session
        """
        alive_fileloc_hashes = [
            cls.dag_fileloc_hash(fileloc) for fileloc in alive_dag_filelocs
        ]

        log.debug("Deleting code from %s table ", cls.__tablename__)

        session.query(cls).filter(
            cls.fileloc_hash.notin_(alive_fileloc_hashes),
            cls.fileloc.notin_(alive_dag_filelocs)).delete(
                synchronize_session='fetch')

    @classmethod
    @provide_session
    def has_dag(cls, fileloc: str, session=None) -> bool:
        """Checks a file exist in dag_code table.

        :param fileloc: the file to check
        :param session: ORM Session
        """
        fileloc_hash = cls.dag_fileloc_hash(fileloc)
        return session.query(literal(True)).filter(
            cls.fileloc_hash == fileloc_hash).one_or_none() is not None

    @classmethod
    def get_code_by_fileloc(cls, fileloc: str) -> str:
        """Returns source code for a given fileloc.

        :param fileloc: file path of a DAG
        :return: source code as string
        """
        return cls.code(fileloc)

    @classmethod
    def code(cls, fileloc) -> str:
        """Returns source code for this DagCode object.

        :return: source code as string
        """
        return cls._get_code_from_db(fileloc)

    @staticmethod
    def _get_code_from_file(fileloc):
        with open_maybe_zipped(fileloc, 'r') as f:
            code = f.read()
        return code

    @classmethod
    @provide_session
    def _get_code_from_db(cls, fileloc, session=None):
        dag_code = session.query(cls).filter(
            cls.fileloc_hash == cls.dag_fileloc_hash(fileloc)).first()
        if not dag_code:
            raise DagCodeNotFound()
        else:
            code = dag_code.source_code
        return code

    @staticmethod
    def dag_fileloc_hash(full_filepath: str) -> int:
        """Hashing file location for indexing.

        :param full_filepath: full filepath of DAG file
        :return: hashed full_filepath
        """
        # Hashing is needed because the length of fileloc is 2000 as an Airflow convention,
        # which is over the limit of indexing.
        import hashlib

        # Only 7 bytes because MySQL BigInteger can hold only 8 bytes (signed).
        return struct.unpack(
            '>Q',
            hashlib.sha1(full_filepath.encode('utf-8')).digest()[-8:])[0] >> 8
예제 #19
0
def MediumText():
    return sqlalchemy.Text().with_variant(MEDIUMTEXT(), 'mysql')
예제 #20
0
def MediumText() -> Variant:
    return Text().with_variant(MEDIUMTEXT(), "mysql")
예제 #21
0
파일: upqdb.py 프로젝트: springraaar/upq
    def __init__(self, databaseurl, debug):
        self.engine = create_engine(databaseurl,
                                    encoding="utf-8",
                                    echo=debug,
                                    pool_recycle=True)
        logging.info("Opened DB connection.")
        self.meta = MetaData()
        self.tables['mirror'] = Table(
            'mirror',
            self.meta,  #table with file mirrors
            Column('mid',
                   INTEGER(display_width=10),
                   primary_key=True,
                   nullable=False,
                   autoincrement=True),
            Column('title', VARCHAR(length=64)),
            Column('description', TEXT()),
            Column('country', VARCHAR(length=64)),
            Column('url_prefix', VARCHAR(length=64)),  # prefix to files
            Column('url_daemon',
                   VARCHAR(length=64)),  # absolute url to daemon.php
            Column('mirror_size',
                   INTEGER(display_width=11)),  # maximum size of mirror
            Column('bandwidth_limit',
                   INTEGER(display_width=11)),  # upload speed limit in kb/s
            Column('status', INTEGER(display_width=4)))  # 0=inactive, 1=active
        self.tables['mirror_file'] = Table(
            'mirror_file',
            self.meta,  #table with files on file mirrors
            Column('mfid',
                   INTEGER(display_width=10),
                   primary_key=True,
                   nullable=False,
                   autoincrement=True),
            Column('fid', Integer, ForeignKey("file.fid")),
            Column('mid', INTEGER(display_width=4),
                   ForeignKey("mirror.mid")),  # mirror id
            Column('path',
                   VARCHAR(length=1024)),  # relative to (mfid.url_prefix) path
            Column('lastcheck', DATETIME(
                timezone=False)),  # last time checksum/existence was checked
            Column(
                'status', INTEGER(display_width=4)
            ),  # 0=inactive, 1 = active, 2 = marked for recheck, 3 = broken, 4 = archived (=possible deleted)
            UniqueConstraint('fid', 'mid'))
        self.tables['file'] = Table(
            'file',
            self.meta,  #all known files
            Column('fid',
                   INTEGER(display_width=10),
                   primary_key=True,
                   nullable=False,
                   autoincrement=True),  #primary key of file
            Column('uid', INTEGER(display_width=10), default=0,
                   nullable=False),  # owner uid of file
            Column('filename',
                   VARCHAR(length=255),
                   nullable=False,
                   unique=False),  # filename (without path)
            Column('path', VARCHAR(length=1024), default='', nullable=False
                   ),  # relative path where file is (without filename!)
            Column('size', INTEGER(display_width=11),
                   nullable=False),  # file size
            Column(
                'status', INTEGER(display_width=11), nullable=False
            ),  # 0=inactive, 1 = active, 2 = marked for recheck, 3 = broken
            Column('timestamp', TIMESTAMP(timezone=False)),
            Column('md5', CHAR(length=32), unique=True),
            Column('sha1', CHAR(length=40)),
            Column('sha256', CHAR(length=64)),
            Column('name', VARCHAR(length=256)),  #spring name of this file
            Column('version',
                   VARCHAR(length=256)),  #spring version of this file
            Column(
                'cid',
                INTEGER(display_width=11)),  #category of this file: game/map
            Column('sdp', VARCHAR(length=32), nullable=True,
                   unique=True),  #for this file
            Column('metadata', MEDIUMTEXT()),
            UniqueConstraint('name', 'version', 'cid'),
            UniqueConstraint('filename', 'cid'))

        #self.tables['rapidrepo']=Table('rapidrepo', self.meta,
        #	Column('rid', INTEGER(display_width=10), primary_key=True, nullable=False, autoincrement=True),
        #	Column('baseurl', VARCHAR(length=32), unique=True, nullable=False) # i.e. https://repos.springrts.com/ba/

        #self.tables['rapid']=Table('rapid', self.meta,
        #	Column('rid', INTEGER(display_width=10), primary_key=True, nullable=False, autoincrement=True),
        #	Column('fid', Integer, ForeignKey("file.fid"), nullable=True, unique=True),
        #	Column('repo', Integer, ForeignKey("rapid-repo.rid"), nullable=True, unique=True),
        #	Column('sdp', VARCHAR(length=32), unique=True),
        #	Column('timestamp', TIMESTAMP(timezone=False)),

        self.tables['tag'] = Table(
            'tag', self.meta,
            Column('tid',
                   INTEGER(display_width=10),
                   primary_key=True,
                   nullable=False,
                   autoincrement=True),
            Column('fid', Integer, ForeignKey("file.fid"), nullable=False),
            Column('tag', VARCHAR(length=128), unique=True))
        self.tables['categories'] = Table(
            'categories',
            self.meta,  # file categories
            Column('cid',
                   INTEGER(display_width=11),
                   primary_key=True,
                   nullable=False,
                   autoincrement=True),
            Column('name', VARCHAR(length=24), nullable=False))
        self.tables['file_depends'] = Table(
            'file_depends',
            self.meta,
            Column('fid', Integer, ForeignKey("file.fid")),
            Column(
                'depends_fid', Integer, ForeignKey("file.fid"), nullable=True
            ),  #id of other file, if null(couldn't be resolved), use depends_string
            Column('depends_string', VARCHAR(length=64), nullable=False),
            UniqueConstraint('fid', 'depends_string'))

        try:
            self.meta.create_all(self.engine)
        except Exception as e:
            raise Exception("Unable to initialize database %s:%s" %
                            (databaseurl, e))
        self.meta.bind = self.engine