Example #1
0
    def __init__(self, data_model):
        self.data_model = data_model
        self.data_model.fake_item_ids_store = self

        assert self.data_model.cache_dir, "FakeItemIds need cache_dir from data_model!"
        sqlite_path = os.path.join(self.data_model.cache_dir,
                                   "fake_item_ids_store.db")

        sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False)

        class FakeItemIdsStore(Model):
            is_deleted = BooleanField(
                default=False)  # mark processed or duplicated items
            item_id = CharField()
            item_content_json = TextField()
            created_at = TimeField(default=datetime.datetime.now)

            class Meta:
                database = sqlite_database

        self.storage = FakeItemIdsStore

        if not self.storage.table_exists():
            self.storage.create_table()
            sqlite_database.create_index(self.storage,
                                         "is_deleted item_id".split(" "))
Example #2
0
    def build_features_tree(self):
        from peewee import SqliteDatabase, Model, IntegerField, CharField, BooleanField

        # built or connect database
        sqlite_path = {
            "memory": ":memory:",
            "disk": self.sqlite3db_path(),
        }[self.link_to_detdup.storage_type]
        sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False)

        class BaseFeaturesTree(Model):
            uniq_chars__len = IntegerField(default=0)
            sqrt_chars__len = IntegerField(default=0)
            sorted_freq_chars = CharField()
            # TODO support item_id as int or str type
            item_id = CharField()

            class Meta:
                database = sqlite_database

        self.features_tree = BaseFeaturesTree

        tablename = "_".join(
            self.custom_features).capitalize() or "DefaultFeaturesTree"

        # If customize more features
        if self.custom_features:
            self.features_tree = type(tablename, (BaseFeaturesTree, ), dict())
            for feature_k1 in self.custom_features:
                # http://stackoverflow.com/questions/22358489/dynamically-define-fields-in-a-peewee-model
                feature_v1 = self.custom_features[feature_k1]
                # Compact with (int) instance
                if type(feature_v1) is int: feature_v1 = int
                field1 = {int: IntegerField, str: CharField}[feature_v1]()
                field1.add_to_class(self.features_tree, feature_k1)

        self.features_tree._meta.db_table = tablename

        # create table and indexes
        if not self.features_tree.table_exists():
            self.features_tree.create_table()
            sqlite_database.create_index(self.features_tree,
                                         "item_id".split(" "))

            # TODO 让大str在前面,加快索引搜索速度
            index_columns = self.default_features.keys(
            ) + self.custom_features.keys()
            sqlite_database.create_index(self.features_tree, index_columns)

        print "[build_features_tree]", self.features_tree, "self.default_features :", self.default_features, "self.custom_features :", self.custom_features
        print
Example #3
0
    def build_features_tree(self):
        from peewee import SqliteDatabase, Model, IntegerField, CharField, BooleanField

        # built or connect database
        sqlite_path = {
                "memory" : ":memory:",
                "disk"   : self.sqlite3db_path(),
        }[self.link_to_detdup.storage_type]
        sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False)

        class BaseFeaturesTree(Model):
            uniq_chars__len     = IntegerField(default=0)
            sqrt_chars__len     = IntegerField(default=0)
            sorted_freq_chars   = CharField()
# TODO support item_id as int or str type
            item_id             = CharField()

            class Meta:
                database = sqlite_database
        self.features_tree = BaseFeaturesTree

        tablename = "_".join(self.custom_features).capitalize() or "DefaultFeaturesTree"

        # If customize more features
        if self.custom_features:
            self.features_tree = type(tablename, (BaseFeaturesTree,), dict())
            for feature_k1 in self.custom_features:
                # http://stackoverflow.com/questions/22358489/dynamically-define-fields-in-a-peewee-model
                feature_v1 = self.custom_features[feature_k1]
                # Compact with (int) instance
                if type(feature_v1) is int: feature_v1 = int
                field1 = {int: IntegerField, str: CharField}[feature_v1]()
                field1.add_to_class(self.features_tree, feature_k1)

        self.features_tree._meta.db_table = tablename

        # create table and indexes
        if not self.features_tree.table_exists():
            self.features_tree.create_table()
            sqlite_database.create_index(self.features_tree, "item_id".split(" "))

# TODO 让大str在前面,加快索引搜索速度
            index_columns = self.default_features.keys() + self.custom_features.keys()
            sqlite_database.create_index(self.features_tree, index_columns)

        print "[build_features_tree]", self.features_tree, "self.default_features :", self.default_features, "self.custom_features :", self.custom_features
        print
Example #4
0
    def __init__(self, data_model):
        self.data_model = data_model
        self.data_model.fake_item_ids_store = self

        assert self.data_model.cache_dir, "FakeItemIds need cache_dir from data_model!"
        sqlite_path = os.path.join(self.data_model.cache_dir, "fake_item_ids_store.db")

        sqlite_database = SqliteDatabase(sqlite_path, check_same_thread=False)

        class FakeItemIdsStore(Model):
            is_deleted = BooleanField(default=False)  # mark processed or duplicated items
            item_id = CharField()
            item_content_json = TextField()
            created_at = TimeField(default=datetime.datetime.now)

            class Meta:
                database = sqlite_database
        self.storage = FakeItemIdsStore

        if not self.storage.table_exists():
            self.storage.create_table()
            sqlite_database.create_index(self.storage, "is_deleted item_id".split(" "))