def insert_data(self, dataframe): dataframe.to_sql('etl_hh', schema='adhoc_parser', con=engine, if_exists='append', index=False, dtype={ 'id': INT(), 'name': String(255), 'has_test': Boolean(), 'published_at': DateTime(), 'created_at': DateTime(), 'url': String(255), 'area_name': String(255), 'salary_from': INT(), 'salary_to': INT(), 'salary_currency': String(10), 'salary.gross': Boolean(), 'address.city': String(255), 'address.street': String(255), 'address_building': String(255), 'address_raw': String(500), 'metro_name': String(255), 'employer_id': INT(), 'employer_name': String(255), 'snippet_requirement': TEXT(), 'snippet_responsibility': TEXT(), 'contacts_name': String(255), 'contacts_email': String(255), })
class Category(Base): __tablename__ = 'categories' id = Column(Integer, primary_key=True) parent_id = Column(Integer, ForeignKey('categories.id')) data = Column(TEXT()) children = relationship('Category', backref=backref('parent', remote_side=[id])) def __repr__(self): return "TreeNode(data=%r, id=%r, parent_id=%r)" % (self.data, self.id, self.parent_id)
class SpeechInputDB(Base): __tablename__ = 'speech_input' id = Column(INT, Sequence('speech_input_id_seq'), primary_key=True) datetime = Column(TIMESTAMP(timezone=False)) source = Column(VARCHAR(160), nullable=False, default='') message = Column(TEXT(convert_unicode=True), default='') confidence = Column(INT, default='') lang = Column(VARCHAR(convert_unicode=True, length=8), default='') audio_path = Column(TEXT(convert_unicode=True), default='') def __repr__(self): return "<SpeechInput(source={}, datetime={}, message={})>".format( self.source, self.datetime, self.message) @staticmethod def insert(source, message, confidence, lang, audio_path): row = SpeechInputDB(source=source, message=message, confidence=confidence, lang=lang, audio_path=audio_path, datetime=dt.utcnow()) session.add(row) session.commit()
class ChatLogDB(Base): __tablename__ = 'chatlog' id = Column(INT, Sequence('chatlog_id_seq'), primary_key=True) datetime = Column(TIMESTAMP(timezone=False)) source = Column(VARCHAR(160), nullable=False, default='') event = Column(VARCHAR(160), nullable=False, default='') author = Column(TEXT(convert_unicode=True), default='') message = Column(TEXT(convert_unicode=True), default='') msgtype = Column(TEXT(convert_unicode=True), default='') def __repr__(self): return "<Chatlog(source={}, datetime={}, author={}, message={})>".format( self.source, self.datetime, self.author, self.message) @staticmethod def insert(source, author, message, msgtype, event=''): row = ChatLogDB(source=source, event=event, author=author, message=message, msgtype=msgtype, datetime=dt.utcnow()) session.add(row) session.commit()
def insert_data(self, dataframe): self.logger.info('create engine and connect to database') engine = sqlalchemy.create_engine( f'{self.database_type}://{self.database_login}:{self.database_password}@{self.database_url}/{self.database_name}' ) self.logger.info('prepare to insert data') dataframe.to_sql('etl_hh', schema='adhoc_parser', con=engine, if_exists='append', index=False, dtype={ 'id': INT(), 'name': String(255), 'has_test': Boolean(), 'published_at': DateTime(), 'created_at': DateTime(), 'url': String(255), 'area_name': String(255), 'salary_from': INT(), 'salary_to': INT(), 'salary_currency': String(10), 'salary.gross': Boolean(), 'address.city': String(255), 'address.street': String(255), 'address_building': String(255), 'address_raw': String(500), 'metro_name': String(255), 'employer_id': INT(), 'employer_name': String(255), 'snippet_requirement': TEXT(), 'snippet_responsibility': TEXT(), 'contacts_name': String(255), 'contacts_email': String(255), }) self.logger.warning('data are inserted now')
class Hack(BaseModel): __tablename__ = 'jm_hacker' id = Column(Integer, primary_key=True) ip = Column(CHAR(60)) hack_city = Column(CHAR(40)) hack_addr = Column(CHAR(40)) url = Column(TEXT()) host = Column(CHAR(80)) acl = Column(TEXT(80)) src_time = Column(DATETIME(20)) method = Column(CHAR(10)) headers = Column(TEXT(200)) user_agent = Column(TEXT(80)) cookie = Column(TEXT(80)) post_data = Column(TEXT())
def test_defaults(self): assert self._comp(DATE()) == 'DATE' assert self._comp(DATETIME()) == 'TIMESTAMP(6)' assert self._comp(TIMESTAMP()) == 'TIMESTAMP(6)' assert self._comp(TIME()) == 'TIME(6)' assert self._comp(CHAR()) == 'CHAR(1)' assert self._comp(VARCHAR()) == 'LONG VARCHAR' assert self._comp(NCHAR()) == 'CHAR CHAR SET UNICODE' assert self._comp(NVARCHAR()) == 'LONG VARCHAR CHAR SET UNICODE' assert self._comp(CLOB()) == 'CLOB' assert self._comp(TEXT()) == 'CLOB' assert self._comp(DECIMAL()) == 'DECIMAL(5, 0)' assert self._comp(NUMERIC()) == 'NUMERIC(5, 0)' assert self._comp(INTEGER()) == 'INTEGER' assert self._comp(FLOAT()) == 'FLOAT' assert self._comp(REAL()) == 'REAL' assert self._comp(SMALLINT()) == 'SMALLINT' assert self._comp(BIGINT()) == 'BIGINT' assert self._comp(BOOLEAN()) == 'BYTEINT'
class EmailInfo(Base): __tablename__ = 'email_info' id = Column(Integer, primary_key=True, doc='邮件id') title = Column(VARCHAR(64), doc='邮件标题') content = Column(TEXT(1000), doc='邮件内容') create_time = Column(Integer, doc='创建时间') reward_id = Column(VARCHAR(100), doc='奖励id') author = Column(VARCHAR(64), doc='发送人') icon = Column(VARCHAR(200), doc='邮件图标') status = Column(Integer, doc='判断此邮件信息是否可用') @classmethod def get_email_info_by_id(cls, id): with sessionCM() as session: res = session.query(cls).filter(cls.id == id).first() return cls.to_dict(res) @classmethod def get_all_email_info(cls): data = [] with sessionCM() as session: res = session.query(cls).filter(cls.status == 1).all() for item in res: data.append(cls.to_dict(item)) return data @classmethod def to_dict(cls, email_info): data = { 'id': email_info.id, 'title': email_info.title, 'content': email_info.content, 'create_time': email_info.create_time, 'reward_id': email_info.reward_id, 'author': email_info.author, 'icon': email_info.icon } return data
# Imports from sqlalchemy.exc import OperationalError from sqlalchemy.schema import (Column, ForeignKey, Index, MetaData, Table, UniqueConstraint) from sqlalchemy.types import BINARY, BLOB, INTEGER, JSON, TEXT, VARCHAR from lib import logger, mysql_engine # Constants CHAR_MAX_LEN = 255 BINARY_MAX_LEN = 2**32 - 1 SHA1_LEN = 20 # Types LONGBLOB = BLOB(length=BINARY_MAX_LEN) LONGTEXT = TEXT(length=BINARY_MAX_LEN) LONGVARCHAR = VARCHAR(length=CHAR_MAX_LEN) SHA1 = BINARY(length=SHA1_LEN) # Table metadata metadata = MetaData(bind=mysql_engine) # All known strace executables and arguments executables = Table( 'executables', metadata, Column('id', INTEGER, primary_key=True), Column('system', LONGVARCHAR, nullable=False), Column('executable', LONGVARCHAR, nullable=False), Column('arguments_hash', SHA1, nullable=False), Column('arguments', JSON, nullable=False),
def get_columns(self, connection, table_name, schema=None, **kw): # Query to extract the details of all the fields of the given table tblqry = """ SELECT r.rdb$field_name AS fname, r.rdb$null_flag AS null_flag, t.rdb$type_name AS ftype, f.rdb$field_sub_type AS stype, f.rdb$field_length/ COALESCE(cs.rdb$bytes_per_character,1) AS flen, f.rdb$field_precision AS fprec, f.rdb$field_scale AS fscale, COALESCE(r.rdb$default_source, f.rdb$default_source) AS fdefault FROM rdb$relation_fields r JOIN rdb$fields f ON r.rdb$field_source=f.rdb$field_name JOIN rdb$types t ON t.rdb$type=f.rdb$field_type AND t.rdb$field_name='RDB$FIELD_TYPE' LEFT JOIN rdb$character_sets cs ON f.rdb$character_set_id=cs.rdb$character_set_id WHERE f.rdb$system_flag=0 AND r.rdb$relation_name=? ORDER BY r.rdb$field_position """ # get the PK, used to determine the eventual associated sequence pk_constraint = self.get_pk_constraint(connection, table_name) pkey_cols = pk_constraint["constrained_columns"] tablename = self.denormalize_name(table_name) # get all of the fields for this table c = connection.execute(tblqry, [tablename]) cols = [] while True: row = c.fetchone() if row is None: break name = self.normalize_name(row["fname"]) orig_colname = row["fname"] # get the data type colspec = row["ftype"].rstrip() coltype = self.ischema_names.get(colspec) if coltype is None: util.warn("Did not recognize type '%s' of column '%s'" % (colspec, name)) coltype = sqltypes.NULLTYPE elif issubclass(coltype, Integer) and row["fprec"] != 0: coltype = NUMERIC(precision=row["fprec"], scale=row["fscale"] * -1) elif colspec in ("VARYING", "CSTRING"): coltype = coltype(row["flen"]) elif colspec == "TEXT": coltype = TEXT(row["flen"]) elif colspec == "BLOB": if row["stype"] == 1: coltype = TEXT() else: coltype = BLOB() else: coltype = coltype() # does it have a default value? defvalue = None if row["fdefault"] is not None: # the value comes down as "DEFAULT 'value'": there may be # more than one whitespace around the "DEFAULT" keyword # and it may also be lower case # (see also http://tracker.firebirdsql.org/browse/CORE-356) defexpr = row["fdefault"].lstrip() assert defexpr[:8].rstrip().upper() == "DEFAULT", ( "Unrecognized default value: %s" % defexpr) defvalue = defexpr[8:].strip() if defvalue == "NULL": # Redundant defvalue = None col_d = { "name": name, "type": coltype, "nullable": not bool(row["null_flag"]), "default": defvalue, "autoincrement": "auto", } if orig_colname.lower() == orig_colname: col_d["quote"] = True # if the PK is a single field, try to see if its linked to # a sequence thru a trigger if len(pkey_cols) == 1 and name == pkey_cols[0]: seq_d = self.get_column_sequence(connection, tablename, name) if seq_d is not None: col_d["sequence"] = seq_d cols.append(col_d) return cols
def get_columns(self, connection, table_name, schema=None, **kw): # Query to extract the details of all the fields of the given table tblqry = """ SELECT DISTINCT r.rdb$field_name AS fname, r.rdb$null_flag AS null_flag, t.rdb$type_name AS ftype, f.rdb$field_sub_type AS stype, f.rdb$field_length/ COALESCE(cs.rdb$bytes_per_character,1) AS flen, f.rdb$field_precision AS fprec, f.rdb$field_scale AS fscale, COALESCE(r.rdb$default_source, f.rdb$default_source) AS fdefault FROM rdb$relation_fields r JOIN rdb$fields f ON r.rdb$field_source=f.rdb$field_name JOIN rdb$types t ON t.rdb$type=f.rdb$field_type AND t.rdb$field_name='RDB$FIELD_TYPE' LEFT JOIN rdb$character_sets cs ON f.rdb$character_set_id=cs.rdb$character_set_id WHERE f.rdb$system_flag=0 AND r.rdb$relation_name=? ORDER BY r.rdb$field_position """ # get the PK, used to determine the eventual associated sequence pkey_cols = self.get_primary_keys(connection, table_name) tablename = self.denormalize_name(table_name) # get all of the fields for this table c = connection.execute(tblqry, [tablename]) cols = [] while True: row = c.fetchone() if row is None: break name = self.normalize_name(row['fname']) orig_colname = row['fname'] # get the data type colspec = row['ftype'].rstrip() coltype = self.ischema_names.get(colspec) if coltype is None: util.warn("Did not recognize type '%s' of column '%s'" % (colspec, name)) coltype = sqltypes.NULLTYPE elif colspec == 'INT64': coltype = coltype(precision=row['fprec'], scale=row['fscale'] * -1) elif colspec in ('VARYING', 'CSTRING'): coltype = coltype(row['flen']) elif colspec == 'TEXT': coltype = TEXT(row['flen']) elif colspec == 'BLOB': if row['stype'] == 1: coltype = TEXT() else: coltype = BLOB() else: coltype = coltype(row) # does it have a default value? defvalue = None if row['fdefault'] is not None: # the value comes down as "DEFAULT 'value'": there may be # more than one whitespace around the "DEFAULT" keyword # (see also http://tracker.firebirdsql.org/browse/CORE-356) defexpr = row['fdefault'].lstrip() assert defexpr[:8].rstrip() == \ 'DEFAULT', "Unrecognized default value: %s" % \ defexpr defvalue = defexpr[8:].strip() if defvalue == 'NULL': # Redundant defvalue = None col_d = { 'name': name, 'type': coltype, 'nullable': not bool(row['null_flag']), 'default': defvalue } if orig_colname.lower() == orig_colname: col_d['quote'] = True # if the PK is a single field, try to see if its linked to # a sequence thru a trigger if len(pkey_cols) == 1 and name == pkey_cols[0]: seq_d = self.get_column_sequence(connection, tablename, name) if seq_d is not None: col_d['sequence'] = seq_d cols.append(col_d) return cols
log = logging.getLogger(__name__) Base = declarative_base() CONFIG_SECTION = 'redcapdb' redcap_data = Table( 'redcap_data', Base.metadata, Column(u'project_id', INTEGER(), nullable=False, default=text(u"'0'"), primary_key=True), Column(u'event_id', INTEGER(), primary_key=True), Column(u'record', VARCHAR(length=100), primary_key=True), Column(u'field_name', VARCHAR(length=100), primary_key=True), Column(u'value', TEXT()), ) # this is mostly for testing redcap_surveys_response = Table( 'redcap_surveys_response', Base.metadata, Column(u'response_id', INTEGER(), primary_key=True, nullable=False), Column(u'participant_id', INTEGER()), Column(u'record', VARCHAR(length=100)), Column(u'first_submit_time', DATETIME()), Column(u'completion_time', DATETIME()), Column(u'return_code', VARCHAR(length=8)), ) redcap_surveys_participants = Table(
def load_dialect_impl(self, dialect): if dialect.name == 'postgresql': impl = postgresql.JSON() else: impl = TEXT() return dialect.type_descriptor(impl)
def load_dialect_impl(self, dialect): if dialect.name == 'mysql': return dialect.type_descriptor(TIME()) else: return dialect.type_descriptor(TEXT())