def __init__(self, schema): self.schema = schema self.database = schema.schema_name + ".db" self.engine = Sqlite3Engine(self.database) self.engine.autocommit(False) self.metadata = MetaData() self.create_all_tables()
def Metadata_reflect_unittest(): """get the metadata from existing sqlite database """ engine = Sqlite3Engine("movies.db") meta = MetaData() meta.reflect(engine) print(meta.tables["movie"].__dict__) for column in meta.tables["movie"].columns.values(): print(repr(column))
def __init__(self, dbname): self.engine = Sqlite3Engine(dbname) self.metadata = MetaData() self.pipeline = deque() self.timewrapper = TimeWrapper() self.messenger = Messenger() self.log = Log()
def create_all_tables(self): ## create the main table self.metadata = MetaData() main_table_columns = [Column(field.field_name, field.sqlite_dtype, primary_key = field.primary_key, nullable = field.nullable, default = field.default,) \ for field in self.schema.fields.values()] self.main_table = Table(self.schema.schema_name, self.metadata, *main_table_columns) ## create keyword table for keyword_field in self.schema.keyword_fields: Table( keyword_field, self.metadata, Column("keyword", datatype.text, primary_key=True), Column("uuid_set", datatype.pythonset), ) self.metadata.create_all(self.engine)
def create_all_tables(self): ## create the main table self.metadata = MetaData() main_table_columns = [Column(field.field_name, field.sqlite_dtype, primary_key = field.primary_key, nullable = field.nullable, default = field.default,) \ for field in self.schema.fields.values()] self.main_table = Table(self.schema.schema_name, self.metadata, *main_table_columns) ## create keyword table for keyword_field in self.schema.keyword_fields: Table(keyword_field, self.metadata, Column("keyword", datatype.text, primary_key = True), Column("uuid_set", datatype.pythonset), ) self.metadata.create_all(self.engine)
from angora.GADGET.pytimer import Timer import random import os fm = FormatMaster() try: os.remove("performance.db") except: pass # engine = Sqlite3Engine("performance.db") engine = Sqlite3Engine(":memory:") conn = engine.connect c = engine.cursor metadata = MetaData() datatype = DataType() col_ID = Column("ID", datatype.integer, primary_key=True) col_name = Column("name", datatype.text) test = Table("test", metadata, col_ID, col_name) metadata.create_all(engine) ins = test.insert() records = [(i, "abcdefghijklmnopqrstuvwxyz") for i in range(1000)] records = records + [(random.randint(1, 1000), "abcdefghijklmnopqrstuvwxyz") for i in range(10)] rows = [Row(("ID", "name"), (i, "abcdefghijklmnopqrstuvwxyz")) for i in range(1000)] rows = rows + [Row(("ID", "name"), (random.randint(1, 1000), "abcdefghijklmnopqrstuvwxyz")) for i in range(10)] timer = Timer() def insert_test1(): # 4.0 - 5.0 second
class SearchEngine(): def __init__(self, schema): self.schema = schema self.database = schema.schema_name + ".db" self.engine = Sqlite3Engine(self.database) self.engine.autocommit(False) self.metadata = MetaData() self.create_all_tables() def commit(self): self.engine.commit() def create_all_tables(self): ## create the main table self.metadata = MetaData() main_table_columns = [Column(field.field_name, field.sqlite_dtype, primary_key = field.primary_key, nullable = field.nullable, default = field.default,) \ for field in self.schema.fields.values()] self.main_table = Table(self.schema.schema_name, self.metadata, *main_table_columns) ## create keyword table for keyword_field in self.schema.keyword_fields: Table( keyword_field, self.metadata, Column("keyword", datatype.text, primary_key=True), Column("uuid_set", datatype.pythonset), ) self.metadata.create_all(self.engine) def get_table(self, table_name): """根据table_name得到一个表对象 """ return self.metadata.tables[table_name] def add_one(self, document): """用于往数据库中添加数据, 以增量更新的模式更新索引, 性能较低 1. 往主表格中填充一条文档 2. 更新倒排索引里的表中的索引数据 """ # 将字典document转化为row columns, values = list(), list() for k, v in document.items(): columns.append(k) values.append(v) row = Row(columns, values) # 更新主表的数据 ins = self.main_table.insert() self.engine.insert_row(ins, row) # 对每一个field所对应的表进行更新 for keyword_field in self.schema.keyword_fields: table = self.get_table(keyword_field) ins = table.insert() upd = table.update() # 对表中所涉及的keyword的行进行更新 for keyword in document[keyword_field]: try: # 尝试插入新的, 若已经存在, 则进入更新 self.engine.insert_row( ins, Row(["keyword", "uuid_set"], [keyword, set([document[self.schema.uuid]])])) except: # 对keyword的行进行更新 a = self.engine.select( Select([table.uuid_set ]).where(table.keyword == keyword)) new_uuid_set = list(a)[0][0] print(new_uuid_set) new_uuid_set.add(document[self.schema.uuid]) self.engine.update( upd.values(uuid_set=new_uuid_set).where( table.keyword == keyword)) def clone_from_data_stream(self, documents): """用于从0开始, 从批量文档中生成数据库, 性能较高 1. 往主表格中填充一条文档 2. 从文档中生成倒排索引的索引字典 3. 往所有的索引表中填充索引 invert_index = {keyword: set_of_uuid} """ import time st = time.clock() print("正在往数据库 %s 中填充数据..." % self.schema.schema_name) # 初始化all_inv_dict all_inv_dict = dict() for keyword_field in self.schema.keyword_fields: # initialize a empty dict for invert index all_inv_dict[keyword_field] = dict() ins = self.main_table.insert() counter = 0 for document in documents: counter += 1 # 将字典document转化为row columns, values = list(), list() for k, v in document.items(): columns.append(k) values.append(v) row = Row(columns, values) # 更新主表的数据 try: self.engine.insert_row(ins, row) except: pass # 计算倒排索引 for keyword_field in self.schema.keyword_fields: uuid = document[self.schema.uuid] for keyword in document[keyword_field]: if keyword in all_inv_dict[keyword_field]: all_inv_dict[keyword_field][keyword].add(uuid) else: all_inv_dict[keyword_field][keyword] = set([ uuid, ]) # 将all_inv_dict中的数据存入索引表中 for keyword_field in all_inv_dict: table = self.get_table(keyword_field) ins = table.insert() for keyword, uuid_set in all_inv_dict[keyword_field].items(): self.engine.insert_record(ins, (keyword, uuid_set)) print("\t数据库准备完毕, 一共插入了 %s 条数据, 可以进行搜索了! 一共耗时 %s 秒" % ( counter, (time.clock() - st), )) def create_query(self): """生成一个Query对象, 并把引擎所绑定的Schema传给Query 使得Query能够自行找到Schema中的各个Fields """ return Query(self.schema) def _search(self, query): """根据query进行单元搜索, 返回record tuple. 本方法为search()和search_document()方法的内核 """ main_sqlcmd_select_uuid, main_sqlcmd_select_all, keyword_sqlcmd_list = query.create_sql( ) ### 情况1, 主表和倒排索引表都要被查询 if (len(keyword_sqlcmd_list) >= 1) and ("WHERE" in main_sqlcmd_select_uuid): # 得到查询主表所筛选出的 result_uuid_set result_uuid_set = OrderedSet([ record[0] for record in self.engine.cursor.execute( main_sqlcmd_select_uuid) ]) # 得到使用倒排索引所筛选出的 keyword_uuid_set keyword_uuid_set = OrderedSet( set.intersection(*[ self.engine.cursor.execute(sqlcmd).fetchone()[0] for sqlcmd in keyword_sqlcmd_list ])) # 对两者求交集 result_uuid_set = OrderedSet.intersection(result_uuid_set, keyword_uuid_set) # 根据结果中的uuid, 去主表中取数据 for uuid in result_uuid_set: record = self.engine.cursor.execute( "SELECT * FROM {0} WHERE {1} = {2}".format( self.schema.schema_name, self.schema.uuid, repr(uuid), )).fetchone() yield record ### 情况2, 只对倒排索引表查询 elif (len(keyword_sqlcmd_list) >= 1) and ("WHERE" not in main_sqlcmd_select_uuid): # 得到查询主表所筛选出的 result_uuid_set result_uuid_set = OrderedSet([ record[0] for record in self.engine.cursor.execute( main_sqlcmd_select_uuid) ]) # 得到使用倒排索引所筛选出的 keyword_uuid_set keyword_uuid_set = OrderedSet( set.intersection(*[ self.engine.cursor.execute(sqlcmd).fetchone()[0] for sqlcmd in keyword_sqlcmd_list ])) # 对两者求交集 result_uuid_set = OrderedSet.intersection(result_uuid_set, keyword_uuid_set) # 根据结果中的uuid, 去主表中取数据 for uuid in result_uuid_set: record = self.engine.cursor.execute( "SELECT * FROM {0} WHERE {1} = {2}".format( self.schema.schema_name, self.schema.uuid, repr(uuid), )).fetchone() yield record ### 情况3, 只对主表查询 elif (len(keyword_sqlcmd_list) == 0) and ("WHERE" in main_sqlcmd_select_uuid): for record in self.engine.cursor.execute(main_sqlcmd_select_all): yield record ### 情况4, 空查询 else: pass def search(self, query): """根据query进行单元搜索, 返回record tuple """ counter = 0 for record in self._search(query): counter += 1 if counter <= query.limit_number: yield record else: return def search_document(self, query): """根据query进行单元搜索, 返回document ordereddict example: OrderedDict({field_name: field_value}) """ counter = 0 for record in self.search(query): counter += 1 document = OrderedDict() # pack up as a ordered dict for field_name, field, value in zip(self.schema.fields.keys(), self.schema.fields.values(), record): document[field_name] = value if counter <= query.limit_number: yield document else: return ### =================== 语法糖方法 ========================= def _get_all_valid_keyword(self, field_name): """私有函数, 用于支持Engine.display_valid_keyword, Engine.search_valid_keyword功能 根据field_name得到该field下所有出现过的keyword """ if field_name in self.schema.keyword_fields: all_keywords = [ row[0] for row in self.engine.execute("SELECT keyword FROM %s" % field_name) ] return all_keywords else: raise Exception("ERROR! field_name has to be in %s, yours is %s" % (self.keyword_fields, field_name)) def _search_valid_keyword(self, field_name, pattern): """根据pattern, 搜索在单元名为field_name中可供选择的keyword""" result = [ keyword for keyword in self._get_all_valid_keyword(field_name) if pattern in keyword ] return result ### ================== Help ==================== def print_as_table(self, array): for chunk in grouper(array, 5, ""): print( "\t{0[0]:<20}\t{0[1]:<20}\t{0[2]:<20}\t{0[3]:<20}\t{0[4]:<20}". format(chunk)) def display_searchable_fields(self): """打印所有能被搜索到的单元名和具体类定义""" print("\n{:=^100}".format("All searchable fields")) for field_name, field in self.schema.fields.items(): print("\t%s <---> %s" % (field_name, repr(field))) def display_keyword_fields(self): """打印所有支持倒排索引的单元名和具体类定义""" print("\n{:=^100}".format("All keyword fields")) for field_name, field in self.schema.fields.items(): if "_Searchable_KEYWORD" in field.search_types: print("\t%s <---> %s" % (field_name, repr(field))) def display_criterion(self): """打印所有引擎支持的筛选条件""" query = self.create_query() print("\n{:=^100}".format("All supported criterion")) print("\t%s" % query.query_equal.help()) print("\t%s" % query.query_greater.help()) print("\t%s" % query.query_smaller.help()) print("\t%s" % query.query_between.help()) print("\t%s" % query.query_startwith.help()) print("\t%s" % query.query_endwith.help()) print("\t%s" % query.query_like.help()) print("\t%s" % query.query_contains.help()) def display_valid_keyword(self, field_name): """打印某个单元下所有有效的keyword的集合""" print("\n{:=^100}".format("All valid keyword in %s" % field_name)) if field_name in self.schema.keyword_fields: all_keywords = self._get_all_valid_keyword(field_name) all_keywords.sort() self.print_as_table(all_keywords) print("Found %s valid keywords with in %s" % (len(all_keywords), field_name)) else: print("ERROR! field_name has to be in %s, yours is %s" % (self.schema.keyword_fields, field_name)) def search_valid_keyword(self, field_name, pattern): """根据pattern, 打印在单元名为field_name中可供选择的keyword""" print("\n{:=^100}".format("All valid keyword with pattern %s in %s" % (pattern, field_name))) result = self._search_valid_keyword(field_name, pattern) result.sort() self.print_as_table(result) print("Found %s valid keywords with pattern %s in %s" % (len(result), pattern, field_name)) return result def help(self): """print help information""" text = \ """ Use the following command to help you create desired query: \tSearchEngine.display_searchable_fields() \tSearchEngine.display_keyword_fields() \tSearchEngine.display_criterion() \tSearchEngine.display_valid_keyword(field_name) \tSearchEngine.search_valid_keyword(field_name, pattern) """ print(text)
class SearchEngine(): def __init__(self, schema): self.schema = schema self.database = schema.schema_name + ".db" self.engine = Sqlite3Engine(self.database) self.engine.autocommit(False) self.metadata = MetaData() self.create_all_tables() def commit(self): self.engine.commit() def create_all_tables(self): ## create the main table self.metadata = MetaData() main_table_columns = [Column(field.field_name, field.sqlite_dtype, primary_key = field.primary_key, nullable = field.nullable, default = field.default,) \ for field in self.schema.fields.values()] self.main_table = Table(self.schema.schema_name, self.metadata, *main_table_columns) ## create keyword table for keyword_field in self.schema.keyword_fields: Table(keyword_field, self.metadata, Column("keyword", datatype.text, primary_key = True), Column("uuid_set", datatype.pythonset), ) self.metadata.create_all(self.engine) def get_table(self, table_name): """根据table_name得到一个表对象 """ return self.metadata.tables[table_name] def add_one(self, document): """用于往数据库中添加数据, 以增量更新的模式更新索引, 性能较低 1. 往主表格中填充一条文档 2. 更新倒排索引里的表中的索引数据 """ # 将字典document转化为row columns, values = list(), list() for k, v in document.items(): columns.append(k) values.append(v) row = Row(columns, values) # 更新主表的数据 ins = self.main_table.insert() self.engine.insert_row(ins, row) # 对每一个field所对应的表进行更新 for keyword_field in self.schema.keyword_fields: table = self.get_table(keyword_field) ins = table.insert() upd = table.update() # 对表中所涉及的keyword的行进行更新 for keyword in document[keyword_field]: try: # 尝试插入新的, 若已经存在, 则进入更新 self.engine.insert_row(ins, Row( ["keyword", "uuid_set"], [keyword, set([document[self.schema.uuid]])] ) ) except: # 对keyword的行进行更新 a = self.engine.select(Select([table.uuid_set]).where(table.keyword == keyword)) new_uuid_set = list(a)[0][0] print(new_uuid_set) new_uuid_set.add(document[self.schema.uuid]) self.engine.update(upd.values(uuid_set = new_uuid_set).where(table.keyword == keyword) ) def clone_from_data_stream(self, documents): """用于从0开始, 从批量文档中生成数据库, 性能较高 1. 往主表格中填充一条文档 2. 从文档中生成倒排索引的索引字典 3. 往所有的索引表中填充索引 invert_index = {keyword: set_of_uuid} """ import time st = time.clock() print("正在往数据库 %s 中填充数据..." % self.schema.schema_name) # 初始化all_inv_dict all_inv_dict = dict() for keyword_field in self.schema.keyword_fields: # initialize a empty dict for invert index all_inv_dict[keyword_field] = dict() ins = self.main_table.insert() counter = 0 for document in documents: counter += 1 # 将字典document转化为row columns, values = list(), list() for k, v in document.items(): columns.append(k) values.append(v) row = Row(columns, values) # 更新主表的数据 try: self.engine.insert_row(ins, row) except: pass # 计算倒排索引 for keyword_field in self.schema.keyword_fields: uuid = document[self.schema.uuid] for keyword in document[keyword_field]: if keyword in all_inv_dict[keyword_field]: all_inv_dict[keyword_field][keyword].add(uuid) else: all_inv_dict[keyword_field][keyword] = set([uuid,]) # 将all_inv_dict中的数据存入索引表中 for keyword_field in all_inv_dict: table = self.get_table(keyword_field) ins = table.insert() for keyword, uuid_set in all_inv_dict[keyword_field].items(): self.engine.insert_record(ins, (keyword, uuid_set)) print("\t数据库准备完毕, 一共插入了 %s 条数据, 可以进行搜索了! 一共耗时 %s 秒" % (counter, (time.clock() - st), ) ) def create_query(self): """生成一个Query对象, 并把引擎所绑定的Schema传给Query 使得Query能够自行找到Schema中的各个Fields """ return Query(self.schema) def _search(self, query): """根据query进行单元搜索, 返回record tuple. 本方法为search()和search_document()方法的内核 """ main_sqlcmd_select_uuid, main_sqlcmd_select_all, keyword_sqlcmd_list = query.create_sql() ### 情况1, 主表和倒排索引表都要被查询 if (len(keyword_sqlcmd_list) >= 1) and ("WHERE" in main_sqlcmd_select_uuid): # 得到查询主表所筛选出的 result_uuid_set result_uuid_set = OrderedSet([record[0] for record in self.engine.cursor.execute(main_sqlcmd_select_uuid)]) # 得到使用倒排索引所筛选出的 keyword_uuid_set keyword_uuid_set = OrderedSet( set.intersection( *[self.engine.cursor.execute(sqlcmd).fetchone()[0] for sqlcmd in keyword_sqlcmd_list] ) ) # 对两者求交集 result_uuid_set = OrderedSet.intersection(result_uuid_set, keyword_uuid_set) # 根据结果中的uuid, 去主表中取数据 for uuid in result_uuid_set: record = self.engine.cursor.execute("SELECT * FROM {0} WHERE {1} = {2}".format(self.schema.schema_name, self.schema.uuid, repr(uuid),) ).fetchone() yield record ### 情况2, 只对倒排索引表查询 elif (len(keyword_sqlcmd_list) >= 1) and ("WHERE" not in main_sqlcmd_select_uuid): # 得到查询主表所筛选出的 result_uuid_set result_uuid_set = OrderedSet([record[0] for record in self.engine.cursor.execute(main_sqlcmd_select_uuid)]) # 得到使用倒排索引所筛选出的 keyword_uuid_set keyword_uuid_set = OrderedSet( set.intersection( *[self.engine.cursor.execute(sqlcmd).fetchone()[0] for sqlcmd in keyword_sqlcmd_list] ) ) # 对两者求交集 result_uuid_set = OrderedSet.intersection(result_uuid_set, keyword_uuid_set) # 根据结果中的uuid, 去主表中取数据 for uuid in result_uuid_set: record = self.engine.cursor.execute("SELECT * FROM {0} WHERE {1} = {2}".format(self.schema.schema_name, self.schema.uuid, repr(uuid),) ).fetchone() yield record ### 情况3, 只对主表查询 elif (len(keyword_sqlcmd_list) == 0) and ("WHERE" in main_sqlcmd_select_uuid): for record in self.engine.cursor.execute(main_sqlcmd_select_all): yield record ### 情况4, 空查询 else: pass def search(self, query): """根据query进行单元搜索, 返回record tuple """ counter = 0 for record in self._search(query): counter += 1 if counter <= query.limit_number: yield record else: return def search_document(self, query): """根据query进行单元搜索, 返回document ordereddict example: OrderedDict({field_name: field_value}) """ counter = 0 for record in self.search(query): counter += 1 document = OrderedDict() # pack up as a ordered dict for field_name, field, value in zip(self.schema.fields.keys(), self.schema.fields.values(), record): document[field_name] = value if counter <= query.limit_number: yield document else: return ### =================== 语法糖方法 ========================= def _get_all_valid_keyword(self, field_name): """私有函数, 用于支持Engine.display_valid_keyword, Engine.search_valid_keyword功能 根据field_name得到该field下所有出现过的keyword """ if field_name in self.schema.keyword_fields: all_keywords = [row[0] for row in self.engine.execute("SELECT keyword FROM %s" % field_name)] return all_keywords else: raise Exception("ERROR! field_name has to be in %s, yours is %s" % (self.keyword_fields, field_name)) def _search_valid_keyword(self, field_name, pattern): """根据pattern, 搜索在单元名为field_name中可供选择的keyword""" result = [keyword for keyword in self._get_all_valid_keyword(field_name) if pattern in keyword] return result ### ================== Help ==================== def print_as_table(self, array): for chunk in grouper(array, 5, ""): print("\t{0[0]:<20}\t{0[1]:<20}\t{0[2]:<20}\t{0[3]:<20}\t{0[4]:<20}".format(chunk) ) def display_searchable_fields(self): """打印所有能被搜索到的单元名和具体类定义""" print("\n{:=^100}".format("All searchable fields")) for field_name, field in self.schema.fields.items(): print("\t%s <---> %s" % (field_name, repr(field) ) ) def display_keyword_fields(self): """打印所有支持倒排索引的单元名和具体类定义""" print("\n{:=^100}".format("All keyword fields")) for field_name, field in self.schema.fields.items(): if "_Searchable_KEYWORD" in field.search_types: print("\t%s <---> %s" % (field_name, repr(field) ) ) def display_criterion(self): """打印所有引擎支持的筛选条件""" query = self.create_query() print("\n{:=^100}".format("All supported criterion")) print("\t%s" % query.query_equal.help()) print("\t%s" % query.query_greater.help()) print("\t%s" % query.query_smaller.help()) print("\t%s" % query.query_between.help()) print("\t%s" % query.query_startwith.help()) print("\t%s" % query.query_endwith.help()) print("\t%s" % query.query_like.help()) print("\t%s" % query.query_contains.help()) def display_valid_keyword(self, field_name): """打印某个单元下所有有效的keyword的集合""" print("\n{:=^100}".format("All valid keyword in %s" % field_name)) if field_name in self.schema.keyword_fields: all_keywords = self._get_all_valid_keyword(field_name) all_keywords.sort() self.print_as_table(all_keywords) print("Found %s valid keywords with in %s" % (len(all_keywords), field_name) ) else: print("ERROR! field_name has to be in %s, yours is %s" % (self.schema.keyword_fields, field_name) ) def search_valid_keyword(self, field_name, pattern): """根据pattern, 打印在单元名为field_name中可供选择的keyword""" print("\n{:=^100}".format("All valid keyword with pattern %s in %s" % (pattern, field_name) ) ) result = self._search_valid_keyword(field_name, pattern) result.sort() self.print_as_table(result) print("Found %s valid keywords with pattern %s in %s" % (len(result), pattern, field_name)) return result def help(self): """print help information""" text = \ """ Use the following command to help you create desired query: \tSearchEngine.display_searchable_fields() \tSearchEngine.display_keyword_fields() \tSearchEngine.display_criterion() \tSearchEngine.display_valid_keyword(field_name) \tSearchEngine.search_valid_keyword(field_name, pattern) """ print(text)
def _read_metadata(self): """construct the metadata for creating the database table """ self.metadata = MetaData() datatype = DataType() ### map the CSV.dtype definition to pandas.read_csv dtype and sqlite3 dtype _pd_dtype_mapping = { "TEXT": np.str, "INTEGER": np.int64, "REAL": np.float64, "DATE": np.str, "DATETIME": np.str } _db_dtype_mapping = { "TEXT": datatype.text, "INTEGER": datatype.integer, "REAL": datatype.real, "DATE": datatype.date, "DATETIME": datatype.datetime } pd_dtype = dict( ) # {"column_name": dtype} for part of columns, other columns using default setting db_dtype = dict() # {"column_name": dtype} for all columns for column_name, data_type in self.dtype.items(): if data_type in _pd_dtype_mapping: pd_dtype[column_name] = _pd_dtype_mapping[data_type] if data_type in _db_dtype_mapping: db_dtype[column_name] = _db_dtype_mapping[data_type] ### Read one row, and extract column information from csv if self.usecols: df = pd.read_csv(self.path, sep=self.sep, header=self.header, nrows=1, dtype=pd_dtype, usecols=self.usecols) else: df = pd.read_csv(self.path, sep=self.sep, header=self.header, nrows=1, dtype=pd_dtype) # 强行转化为字符串, 却表列index = 数据表中的列名, 且为合法字符串 new_columns = list() for i in df.columns: if not isinstance(i, str): new_columns.append("c" + str(i)) else: new_columns.append(i) df.columns = new_columns ### Define the right data type in database for each column for column_name, data_type in zip(df.columns, df.dtypes): if column_name not in db_dtype: if data_type in [ np.object, ]: db_dtype.setdefault(column_name, datatype.text) elif data_type in [ np.int64, np.int32, np.int16, np.int8, np.int0, np.int ]: db_dtype.setdefault(column_name, datatype.integer) elif data_type in [ np.float64, np.float32, np.float16, np.float ]: db_dtype.setdefault(column_name, datatype.real) self.pd_dtype = pd_dtype self.db_dtype = db_dtype ### Construct Database.Table Metadata columns = list() for column_name, data_type in zip(df.columns, df.dtypes): if column_name in self.primary_key_columns: primary_key_flag = True else: primary_key_flag = False columns.append( Column(column_name, db_dtype[column_name], primary_key=primary_key_flag)) Table(self.table_name, self.metadata, *columns) self.table = self.metadata.tables[self.table_name]
from collections import OrderedDict import datetime class MyClass(): """a user customized class stored with document """ def __init__(self, value): self.value = value def __repr__(self): return "MyClass(%s)" % repr(self.value) engine = Sqlite3Engine(":memory:") metadata = MetaData() datatype = DataType() pythontype = Table( "pythontype", metadata, Column("uuid", datatype.integer, primary_key=True), Column("date_type", datatype.date, default=datetime.date(1999, 1, 1)), Column("datetime_type", datatype.datetime, default=datetime.datetime(2000, 1, 1, 0, 0, 0)), Column("list_type", datatype.pythonlist, default=list()), Column("set_type", datatype.pythonset, default=set()), Column("dict_type", datatype.pythondict, default=dict()), Column("ordereddict_type", datatype.ordereddict, default=OrderedDict()), Column("strset_type", datatype.strset, default=StrSet(["a", "b", "c"])),