def __init__(self, data_model): self.data_model = data_model self.data_model.fake_item_ids_store = self assert self.data_model.cache_dir, "FakeItemIds need cache_dir from data_model!" sqlite_path = os.path.join(self.data_model.cache_dir, "fake_item_ids_store.db") sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False) class FakeItemIdsStore(Model): is_deleted = BooleanField( default=False) # mark processed or duplicated items item_id = CharField() item_content_json = TextField() created_at = TimeField(default=datetime.datetime.now) class Meta: database = sqlite_database self.storage = FakeItemIdsStore if not self.storage.table_exists(): self.storage.create_table() sqlite_database.create_index(self.storage, "is_deleted item_id".split(" "))
def build_features_tree(self): from peewee import SqliteDatabase, Model, IntegerField, CharField, BooleanField # built or connect database sqlite_path = { "memory": ":memory:", "disk": self.sqlite3db_path(), }[self.link_to_detdup.storage_type] sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False) class BaseFeaturesTree(Model): uniq_chars__len = IntegerField(default=0) sqrt_chars__len = IntegerField(default=0) sorted_freq_chars = CharField() # TODO support item_id as int or str type item_id = CharField() class Meta: database = sqlite_database self.features_tree = BaseFeaturesTree tablename = "_".join( self.custom_features).capitalize() or "DefaultFeaturesTree" # If customize more features if self.custom_features: self.features_tree = type(tablename, (BaseFeaturesTree, ), dict()) for feature_k1 in self.custom_features: # http://stackoverflow.com/questions/22358489/dynamically-define-fields-in-a-peewee-model feature_v1 = self.custom_features[feature_k1] # Compact with (int) instance if type(feature_v1) is int: feature_v1 = int field1 = {int: IntegerField, str: CharField}[feature_v1]() field1.add_to_class(self.features_tree, feature_k1) self.features_tree._meta.db_table = tablename # create table and indexes if not self.features_tree.table_exists(): self.features_tree.create_table() sqlite_database.create_index(self.features_tree, "item_id".split(" ")) # TODO 让大str在前面,加快索引搜索速度 index_columns = self.default_features.keys( ) + self.custom_features.keys() sqlite_database.create_index(self.features_tree, index_columns) print "[build_features_tree]", self.features_tree, "self.default_features :", self.default_features, "self.custom_features :", self.custom_features print
def build_features_tree(self): from peewee import SqliteDatabase, Model, IntegerField, CharField, BooleanField # built or connect database sqlite_path = { "memory" : ":memory:", "disk" : self.sqlite3db_path(), }[self.link_to_detdup.storage_type] sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False) class BaseFeaturesTree(Model): uniq_chars__len = IntegerField(default=0) sqrt_chars__len = IntegerField(default=0) sorted_freq_chars = CharField() # TODO support item_id as int or str type item_id = CharField() class Meta: database = sqlite_database self.features_tree = BaseFeaturesTree tablename = "_".join(self.custom_features).capitalize() or "DefaultFeaturesTree" # If customize more features if self.custom_features: self.features_tree = type(tablename, (BaseFeaturesTree,), dict()) for feature_k1 in self.custom_features: # http://stackoverflow.com/questions/22358489/dynamically-define-fields-in-a-peewee-model feature_v1 = self.custom_features[feature_k1] # Compact with (int) instance if type(feature_v1) is int: feature_v1 = int field1 = {int: IntegerField, str: CharField}[feature_v1]() field1.add_to_class(self.features_tree, feature_k1) self.features_tree._meta.db_table = tablename # create table and indexes if not self.features_tree.table_exists(): self.features_tree.create_table() sqlite_database.create_index(self.features_tree, "item_id".split(" ")) # TODO 让大str在前面,加快索引搜索速度 index_columns = self.default_features.keys() + self.custom_features.keys() sqlite_database.create_index(self.features_tree, index_columns) print "[build_features_tree]", self.features_tree, "self.default_features :", self.default_features, "self.custom_features :", self.custom_features print
def __init__(self, data_model): self.data_model = data_model self.data_model.fake_item_ids_store = self assert self.data_model.cache_dir, "FakeItemIds need cache_dir from data_model!" sqlite_path = os.path.join(self.data_model.cache_dir, "fake_item_ids_store.db") sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False) class FakeItemIdsStore(Model): is_deleted = BooleanField(default=False) # mark processed or duplicated items item_id = CharField() item_content_json = TextField() created_at = TimeField(default=datetime.datetime.now) class Meta: database = sqlite_database self.storage = FakeItemIdsStore if not self.storage.table_exists(): self.storage.create_table() sqlite_database.create_index(self.storage, "is_deleted item_id".split(" "))