Beispiel #1
0
 def row_generator():
     row_list = [
         Row(("movie_id", "title"), ("m0004", "Madagascar 2")),
         Row(("movie_id", "title"), ("m0005", "Madagascar 2")),
     ]
     for row in row_list:
         yield row
Beispiel #2
0
    def clone_from_data_stream(self, documents):
        """用于从0开始, 从批量文档中生成数据库, 性能较高
        1. 往主表格中填充一条文档
        2. 从文档中生成倒排索引的索引字典
        3. 往所有的索引表中填充索引
                
        invert_index = {keyword: set_of_uuid}
        """
        import time
        st = time.clock()
        print("正在往数据库 %s 中填充数据..." % self.schema.schema_name)

        # 初始化all_inv_dict
        all_inv_dict = dict()
        for keyword_field in self.schema.keyword_fields:  # initialize a empty dict for invert index
            all_inv_dict[keyword_field] = dict()

        ins = self.main_table.insert()
        counter = 0
        for document in documents:
            counter += 1
            # 将字典document转化为row
            columns, values = list(), list()
            for k, v in document.items():
                columns.append(k)
                values.append(v)
            row = Row(columns, values)

            # 更新主表的数据
            try:
                self.engine.insert_row(ins, row)
            except:
                pass

            # 计算倒排索引
            for keyword_field in self.schema.keyword_fields:
                uuid = document[self.schema.uuid]
                for keyword in document[keyword_field]:
                    if keyword in all_inv_dict[keyword_field]:
                        all_inv_dict[keyword_field][keyword].add(uuid)
                    else:
                        all_inv_dict[keyword_field][keyword] = set([
                            uuid,
                        ])

        # 将all_inv_dict中的数据存入索引表中
        for keyword_field in all_inv_dict:
            table = self.get_table(keyword_field)
            ins = table.insert()
            for keyword, uuid_set in all_inv_dict[keyword_field].items():
                self.engine.insert_record(ins, (keyword, uuid_set))

        print("\t数据库准备完毕, 一共插入了 %s 条数据, 可以进行搜索了! 一共耗时 %s 秒" % (
            counter,
            (time.clock() - st),
        ))
Beispiel #3
0
    def add_one(self, document):
        """用于往数据库中添加数据, 以增量更新的模式更新索引, 性能较低
        1. 往主表格中填充一条文档
        2. 更新倒排索引里的表中的索引数据
        """
        # 将字典document转化为row
        columns, values = list(), list()
        for k, v in document.items():
            columns.append(k)
            values.append(v)
        row = Row(columns, values)

        # 更新主表的数据
        ins = self.main_table.insert()
        self.engine.insert_row(ins, row)

        # 对每一个field所对应的表进行更新
        for keyword_field in self.schema.keyword_fields:
            table = self.get_table(keyword_field)
            ins = table.insert()
            upd = table.update()

            # 对表中所涉及的keyword的行进行更新
            for keyword in document[keyword_field]:
                try:  # 尝试插入新的, 若已经存在, 则进入更新
                    self.engine.insert_row(
                        ins,
                        Row(["keyword", "uuid_set"],
                            [keyword,
                             set([document[self.schema.uuid]])]))
                except:  # 对keyword的行进行更新
                    a = self.engine.select(
                        Select([table.uuid_set
                                ]).where(table.keyword == keyword))

                    new_uuid_set = list(a)[0][0]

                    print(new_uuid_set)
                    new_uuid_set.add(document[self.schema.uuid])

                    self.engine.update(
                        upd.values(uuid_set=new_uuid_set).where(
                            table.keyword == keyword))
Beispiel #4
0
def Engine_insert_and_update_unittest():
    ins = movie.insert()
    engine.insert_many_records(ins, records)

    new_records = [
        ("m0001", "The Shawshank Redemption", 999, None, date(1994, 10, 14),
         {"Drama", "Crime"}),
        ("m0002", "The Godfather", 999, None, date(1972, 3,
                                                   24), {"Crime", "Drama"}),
    ]
    new_rows = [
        Row(("movie_id", "title", "length", "release_date", "genres"),
            ("m0004", "12 Angry Men", 999, date(1957, 4, 11), {"Drama"})),
        Row(("movie_id", "title", "length", "release_date", "genres"),
            ("m0005", "Schindler's List", 999, date(
                1994, 2, 4), {"Biography", "Drama", "History"})),
    ]

    engine.insert_and_update_many_records(ins, new_records)
    engine.insert_and_update_many_rows(ins, new_rows)
    engine.prt_all(movie)
Beispiel #5
0
    def test1():
        """测试逐条插入的方法
        """
        record = ("m0001", "Yes Man!", 95, 6.2, "2010-01-01",
                  {"Drama", "Fantasy"})
        row = Row(
            ("movie_id", "title", "length", "rate", "release_date", "genres"),
            ("m0002", "Yes Man!", 95, 6.2, "2010-01-01", {"Drama", "Fantasy"}))

        engine.insert_record(ins, record)
        engine.insert_row(ins, row)
        engine.prt_all(movie)
Beispiel #6
0
def Row_unittest():
    """测试Row的属性和方法
    """
    row = Row(("movie_id", "title"), ("m0002", "Madagascar 2"))
    tplt._straightline("__str__ and __repr__ method")
    print(row)
    print(repr(row))
    tplt._straightline("access attribute and item")
    print(row.movie_id)
    print(row["title"])

    # change row item
    row["title"] = "good"
    print(row)
Beispiel #7
0
def Insert_unittest():
    record = ("m0001", "Yes Man!", 95, 6.3, "2010-01-01", {"Drama", "Fantasy"})
    row = Row(("movie_id", "title", "genres"),
              ("m0002", "Madagascar 2", {"Anime", "Comedy"}))
    ins = movie.insert()

    tplt._straightline("insert record sqlcmd")
    ins.sqlcmd_from_record()
    print(ins.insert_sqlcmd)

    tplt._straightline("insert row sqlcmd")
    ins.sqlcmd_from_row(row)
    print(ins.insert_sqlcmd)

    tplt._straightline("record converter")
    print(ins.default_record_converter(record))

    tplt._straightline("row converter")
    print(ins.default_row_converter(row))
Beispiel #8
0
    Column("intlist_type", datatype.intlist, default=IntList([8, 9, 10])),
    Column("pickletype_type",
           datatype.pickletype,
           default={
               1: "a",
               2: "b",
               3: "c"
           }),
)

print(pythontype.create_table_sql())

metadata.create_all(engine)

ins = pythontype.insert()

# record = (1, datetime.date(1999,1,1), datetime.datetime(2000,1,1,0,30,45),
#           [1,2,3], {1,2,3}, {1:"a", 2:"b", 3: "c"}, OrderedDict({1:"a", 2:"b", 3: "c"}),
#           StrSet(["a", "b", "c"]), IntSet([1, 2, 3]), StrList(["x", "y", "z"]), IntList([9, 8, 7]),
#           MyClass(1000))
# engine.insert_record(ins, record)

row = Row(["uuid"], [1])
engine.insert_row(ins, row)

for record in engine.execute("SELECT * FROM pythontype"):
    print(record)
for record in engine.select(Select(pythontype.all)):
    print(record)
engine.prt_all(pythontype)
Beispiel #9
0
conn = engine.connect
c = engine.cursor

metadata = MetaData()
datatype = DataType()
col_ID = Column("ID", datatype.integer, primary_key=True)
col_name = Column("name", datatype.text)
test = Table("test", metadata, col_ID, col_name)
metadata.create_all(engine)
ins = test.insert()

records = [(i, "abcdefghijklmnopqrstuvwxyz") for i in range(1000)]
records = records + [(random.randint(1, 1000), "abcdefghijklmnopqrstuvwxyz")
                     for i in range(10)]
rows = [
    Row(("ID", "name"), (i, "abcdefghijklmnopqrstuvwxyz")) for i in range(1000)
]
rows = rows + [
    Row(("ID", "name"),
        (random.randint(1, 1000), "abcdefghijklmnopqrstuvwxyz"))
    for i in range(10)
]

timer = Timer()


def insert_test1():  # 4.0 - 5.0 second
    """test insert record one by one 
    """
    timer.start()
    for record in records: