def find_all(cls): sql = "select * from tb_tag" helper = MysqlHelper() if helper: return helper.fetch_all(sql) else: return False
def find_by_id(cls, id): sql = "select * from tb_tag where id = %s" % id helper = MysqlHelper() if helper: return helper.fetch_all(sql) else: return False
def find_all(cls): """ 查询所有数据 :return: """ sql = "select * from tb_video" helper = MysqlHelper() if helper: return helper.fetch_all(sql)
def find_all_tag_urls(cls): sql = "select tag_addr from tb_tag" helper = MysqlHelper() res = [] if helper: temp = helper.fetch_all(sql) for i in temp: res.append(i[0]) return res
def find_by_id(cls, id): """ 通过id查找数据 :param id: :return: """ sql = "select * from tb_video where id = %s" % id helper = MysqlHelper() if helper: return helper.fetch_all(sql)
def insert(cls, tag_name, tag_id, tag_addr, tag_video_count): sql = "insert into tb_tag (tag_name, tag_id, tag_addr, tag_video_count) " \ "values ('%s','%s','%s','%s') " % \ (tag_name, tag_id, tag_addr, tag_video_count) helper = MysqlHelper() if helper: return helper.execute(sql) else: return False
def find_all_author_urls(cls): sql = "select home_url from tb_author" helper = MysqlHelper() res = [] if helper: temp = helper.fetch_all(sql) if len(temp) > 0: for i in temp: res.append(i[0]) return res
def insert(cls, author_name, home_url, info): sql = "insert into tb_author (author_name, home_url, info) " \ "values ('%s','%s','%s') " % \ (author_name, home_url, info) helper = MysqlHelper() if helper: return helper.execute(sql) else: return False
def find_all_video_urls(cls) -> list: """ 获取所有video的url数据 :return: video的url 以list形式 """ sql = "select page_url from tb_video" helper = MysqlHelper() res = [] if helper: temp = helper.fetch_all(sql) for i in temp: res.append(i[0]) return res
def insert(cls, name, author, page_url, video_url, image_url, create_time, content): """ 插入数据 :param video_name: :param video_author: :param page_url: :param video_url: :param image_url: :param create_time: :param content: :return: """ sql = "insert into tb_video (video_name, video_author, page_url, video_url, image_url, create_time, content) " \ "values ('%s', '%s', '%s', '%s', '%s', '%s', '%s')" % \ (name, author, page_url, video_url, image_url, create_time, content) helper = MysqlHelper() if helper: return helper.execute(sql) else: return False
result = result and self.bit_array[b] return result def get_postions(self, url): # 一个url获取七个位置,之后会把这七个位置变为1 point1 = mmh3.hash(url, 41) % self.BIT_SIZE point2 = mmh3.hash(url, 42) % self.BIT_SIZE point3 = mmh3.hash(url, 43) % self.BIT_SIZE point4 = mmh3.hash(url, 44) % self.BIT_SIZE point5 = mmh3.hash(url, 45) % self.BIT_SIZE point6 = mmh3.hash(url, 46) % self.BIT_SIZE point7 = mmh3.hash(url, 47) % self.BIT_SIZE return [point1, point2, point3, point4, point5, point6, point7] if __name__ == '__main__': BIT_SIZE = 5000000 # 类的实例化 bloom_filter = BloomFilter(BIT_SIZE) helper = MysqlHelper() results = helper.fetchall("select * from pear_video limit 1000") for result in results: bloom_filter.add(result[3]) test_list = helper.fetchall( "select * from pear_video where id between 700 and 1300") for test in test_list: res = bloom_filter.contains(test[3]) print('被检测的网址 : ', test[3], '/ 是否被包含在原集合中 : ', res)