Ejemplos de BloomFilter.BloomFilter en Python, ejemplos de bloom_filter.BloomFilter.BloomFilter en Python

Ejemplo n.º 1

0

Mostrar archivo

 def __init__(self, ui):
     self.ui = ui
     self.precompile_filter = BloomFilter()
     self.keyword_filter = BloomFilter(7, 10000)
     self.plainFmt = None # 解决format错误的问题
     self.flag_format_changed = False  # 解决无限递归问题
     self.init_filter()
     self.color = self.FontColor()
     self.plainFmt = self.ui.textEdit.currentCharFormat()

Ejemplo n.º 2

0

Mostrar archivo

    def __init__(self):
        self.words_bloom = BloomFilter(max_elements=64_000, error_rate=0.000001)
        self.parts_bloom = BloomFilter(max_elements=700_000, error_rate=0.000001)

        try:
            self.words_bloom, self.parts_bloom = pickle.load(_VOCABULARY_PATH.open('rb'))
        except Exception:
            logger.warning('Vocabulary unpickling error: \n' + traceback.format_exc())
            self._build_from_file()
            pickle.dump((self.words_bloom, self.parts_bloom), _VOCABULARY_PATH.open('wb'))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: pipelines.py Proyecto: Decepticon001/Tyc_spider

 def __init__(self):
     self.bf_urls = BloomFilter(max_elements=10000000,
                                error_rate=0.001,
                                filename="Filter_files/urls_AET_1.bf")
     self.bf_content = BloomFilter(max_elements=10000000,
                                   error_rate=0.001,
                                   filename="Filter_files/title_AET_1.bf")
     self.bf_urls1 = BloomFilter(max_elements=10000000,
                                 error_rate=0.001,
                                 filename="Filter_files/urls_AET.bf")
     self.bf_content1 = BloomFilter(max_elements=10000000,
                                    error_rate=0.001,
                                    filename="Filter_files/title_AET2.bf")

Ejemplo n.º 4

0

Mostrar archivo

def filter_anchors(query):
    non_singles = BloomFilter(max_elements=1000, error_rate=0.1)
    singles = BloomFilter(max_elements=5800, error_rate=0.1)

    priority0_x, priority1_x, priority2_x = [], [], []
    priority0_y, priority1_y, priority2_y = [], [], []
    for i in range(len(query.x)):
        d = str(((query.x[i] - query.y[i]) >> 4) << 4)
        if d in non_singles:
            priority2_x.append(query.x[i])
            priority2_y.append(query.y[i])
        else:
            non_singles.add(d)
            priority0_x.append(query.x[i])
            priority0_y.append(query.y[i])

        #if d in non_singles:
        #    priority2_x.append(query.x[i])
        #    priority2_y.append(query.y[i])
        #elif d in singles:
        #    non_singles.add(d)
        #    priority1_x.append(query.x[i])
        #    priority1_y.append(query.y[i])
        #else:
        #    singles.add(d)
        #    priority0_x.append(query.x[i])
        #    priority0_y.append(query.y[i])

    #Some stats:
    filter_mem = (non_singles.num_bits_m) / 8192
    original = (128 * len(query.x)) / 8192
    filtered = (128 * len(priority2_x)) / 8192
    print("Original memory use: {}".format(original))
    print("New memory use, data only: {}".format(filtered))
    print("New memory use, with filter: {}".format(filtered + filter_mem))
    print("Savings factor: {}".format(original / (filtered + filter_mem)))

    #Priority 0
    #plt.scatter(priority0_x, priority0_y, s=10)

    #Priority 1
    #plt.scatter(priority1_x, priority1_y, s=10, color='deepskyblue')

    #Priority 2
    #plt.scatter(priority2_x, priority2_y, s=10, color='r')

    #plt.xlabel("Reference Position")
    #plt.ylabel("Query Position")
    #plt.title("Filtered anchors. Filter size: {}K. Num Hashes: {} False positive Rate: {}".format(filter_mem, 4, 0.1))
    #plt.show()
    return original / (filtered + filter_mem)

Ejemplo n.º 5

0

Mostrar archivo

 def __init__(self, primes, num_prime_funcs, num_prime_bits, num_fp_funcs, num_fp_bits):
   self.primes_bloom_filter = BloomFilter(num_prime_funcs, num_prime_bits)
   self.fps_bloom_filter = BloomFilter(num_fp_funcs, num_fp_bits)
   print 'Adding primes'
   for p in primes[:-1]: # why ignore the last prime?
     self.primes_bloom_filter.add(p)
   print 'Adding false positives..'
   for i in range(primes[0], primes[-1]):
     true_prime = i in primes
     bf_prime = self.primes_bloom_filter.contains(i)
     if true_prime and not bf_prime:
       assert False, 'False negatives NEVER happen'
     elif not true_prime and bf_prime:
       self.fps_bloom_filter.add(i)

Ejemplo n.º 6

0

Mostrar archivo

class EventValidator:

    last = -1
    current = -1
    bloom = BloomFilter(4980000, 0.01)

    orderErrors = []
    uniqueErrors = []

    def __init__(self):
        self

    def checkOrder(self, value):

        if self.last == -1:
            self.last = value
        else:
            self.current = value
            if self.last > self.current:
                self.orderErrors.append(self.current)
            else:
                self.last = self.current

    def checkUnique(self, value):

        inside = value in self.bloom

        if inside:
            self.uniqueErrors.append(value)
        else:
            self.bloom.add(value)

Ejemplo n.º 7

0

Mostrar archivo

def crawl_city_page(url):
    # BLOOMF???
    bloomf = BloomFilter(10000000, 0.01)
    # print(bloomf)
    #全国城市url
    city_page_content = crawl_dp_page(url, 'citylist')
    # print(city_page_content,'>>>>>')
    html = etree.HTML(city_page_content)
    #这是全国城市链接
    city_urls = html.xpath('//div[@class="findHeight"]/a/@href')
    print(city_urls)

    for city_detail_url in city_urls:
        page = 1
        while True:
            #商铺列表页面
            city_detail_page_content = crawl_dp_page(
                "http:" + city_detail_url + "/ch20/g187p%s" % page,
                'citydetailpage')
            #商铺详情链接url
            shop_urls = re.findall(r'href="(.+/shop/[0-9]+)"',
                                   city_detail_page_content)
            tot_shop = 0
            for shop_url in shop_urls:
                if shop_url in bloomf:
                    print("ignore duplicate " + shop_url)
                else:
                    #商铺详情页
                    shop_content = crawl_dp_page(shop_url, 'shopdetail')
                    bloomf.add(shop_url)  #None
                    parse_shop(shop_content)  #None
                    tot_shop += 1
            if tot_shop < 1:  # break if there is no shop on that page
                break
            page += 1

Ejemplo n.º 8

0

Mostrar archivo

Archivo: base_rss.py Proyecto: jerryliu306/aioVextractor

    async def filter(self, existed_vid_list=None):
        bloom = BloomFilter(max_elements=config.MAX_ESTIMATE_RECORD_NUMBER
                            )  ## construct a bloom filter
        for ele in existed_vid_list:
            bloom.add(ele)  ## add origin_id into the filter
        latest_results = []  ## final result to output
        ## The one who is responsible for the paging for xinpianchang do not have any kids
        buffer = config.check_latest_buffer
        latest = await self.fetch()
        for ele in latest:
            if bloom.__contains__(
                    ele['vid']):  ## determine whether if the ele is reocrded
                ## if the ele is recorded
                ## meaning that the upcoming ele are repeated
                ## so we just return the current latest_results

                ## but due to the unreasonable paging issue
                ## we need a buffer to make sure that we make it to the end
                if buffer == 0:
                    del bloom  ## release memory
                    return jmespath.search(
                        '[]', latest_results) if latest_results else []
                else:
                    buffer -= 1
                    continue
            else:
                bloom.add(ele['vid'])  ## add origin_id into the filter
                latest_results.append(ele)
        else:
            return jmespath.search('[]',
                                   latest_results) if latest_results else []

Ejemplo n.º 9

0

Mostrar archivo

class Test:

    #这三个都是多线程共享的变量
    f = BloomFilter(0.0001, 10000000)
    urls = [
        'http://m.sodu.com',
    ]
    count = 0

    @classmethod
    def get_url(cls):
        url = Test.urls.pop(0)
        while Test.f.is_element_exist(url) == True:
            url = Test.urls.pop(0)
        Test.f.insert_element(url)
        # 可选,进行抓取的url写入一个文件中,但会增加I/O操作
        # with open('urls.txt', 'a') as file_obj:
        # 	file_obj.write(url + '\n')
        return url

    @classmethod
    def get_urls(cls):
        while len(Test.urls) > 0:
            url = Test.get_url()
            try:
                Test.count += 1
                print(Test.count, url)
                analysis = PageParser(url)
                test = analysis.get_urls()
                Test.urls += test
            except:
                pass

Ejemplo n.º 10

0

Mostrar archivo

Archivo: utils.py Proyecto: euati/aioshadowsocks-1

 def add(self, v):
     now = int(time.time())
     if now - self.last_reset_time > self.RESET_TIME:
         logging.info("bloom filter reset")
         self.bf = BloomFilter()
         self.last_reset_time = now
     self.bf.add(v)

Ejemplo n.º 11

0

Mostrar archivo

    def __init__(self, expected_inserts, error_rate, mode=SMALL_GROWTH):
        self.sbfilters = []

        sbfilter = BloomFilter(expected_inserts, error_rate)
        self.sbfilters.append(sbfilter)
        self.error_prob_ratio = 0.9
        self.space_scale = mode

Ejemplo n.º 12

0

Mostrar archivo

def add_time(capacity, error_rate):
    bfilter = BloomFilter(capacity, error_rate)
    start_time = time.time()
    for i in range(capacity):
        bfilter.add(i)
    end_time = time.time()
    return end_time - start_time

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_bloom_filter.py Proyecto: pichao314/LRU-Cache-and-Bloom-Filter

def test_bloom_filter():
    bloomfilter = BloomFilter(NUM_KEYS, FALSE_POSITIVE_PROBABILITY)
    word_present = [
        'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
        'blossom', 'bolster', 'bonny', 'bonus', 'bonuses', 'coherent',
        'cohesive', 'colorful', 'comely', 'comfort', 'gems', 'generosity',
        'generous', 'generously', 'genial'
    ]

    word_absent = ['facebook', 'twitter']

    for item in word_present:
        bloomfilter.add(item)

    test_words = word_present[:10] + word_absent
    shuffle(test_words)
    for word in test_words:
        if bloomfilter.is_member(word):
            if word in word_absent:
                print(f"'{word}' is a false positive!")
                logging.info(f"'{word}' is a false positive!")
            else:
                print(f"'{word}' is probably present!")
                logging.info(f"'{word}' is probably present!")
        else:
            print(f"'{word}' is definitely not present!")
            logging.info(f"'{word}' is definitely not present!")

Ejemplo n.º 14

0

Mostrar archivo

    def __init__(self,
                 n,
                 m,
                 dimension,
                 is_real_size=False,
                 point_build=[],
                 discretisator=None):
        """
        :param n: number of element that will be stored.
        :param m: size of the bloom filter in bit.
        :param point_build: list of object points to insert in the bloom filter
        """
        self.dimension = dimension
        if is_real_size:
            n = n * 2**dimension
            m = m * 2**dimension

        error_rate = float(
            math.exp((math.log(2)**2) * float(m) / float(-1 * n)))
        self.bloom_filter = BloomFilter(n, error_rate)
        # TODO version with the choose of the Hash : the third parameter is the hash
        self.point_build = point_build
        self.discretisator = discretisator
        if self.discretisator:
            for point in point_build:
                for d_pt in self.discretisator.discretise_point_to_insert(
                        point):
                    self.bloom_filter.add(d_pt.to_string())
        else:
            for pt in point_build:
                self.bloom_filter.add(pt.to_string())

Ejemplo n.º 15

0

Mostrar archivo

def test_false_positives():
    """ see how well it does at false positives"""
    bloom = BloomFilter(initial_size=10, vector_type=ByteBitVector)
    stats = Counter()
    counter = 0
    logging.basicConfig(level=logging.DEBUG)
    with open('./wordlist.txt', 'r', encoding='iso-8859-1') as fp:
        for line in fp:
            term = line.strip()
            contains = term in bloom
            if contains is True:
                counter += 1
                logging.debug(counter, "\tTerm already set:\t", term)
            stats.update([str(contains)])
            bloom.add(term)
            assert term in bloom  # no false negatives

    false_positives = float(stats[True] / sum(stats.values()))
    print('Count of false positives:', stats,
          "{:0.04f}%".format(100 * false_positives))
    print('sys.getsizeof filter:', getsizeof(bloom))
    print('sys.getsizeof bitvector needed:', getsizeof(bloom.bitvector.bits))
    print('num bits in vector:', len(bloom.bitvector))
    print('num bitvector set:', bloom.bitvector.num_set())
    # A bit arbitrary - descriptions of bloom filters said 2-3% false positives is pretty good
    assert false_positives <= .03

Ejemplo n.º 16

0

Mostrar archivo

Archivo: env_factory.py Proyecto: MitchellTesla/google-research

 def __init__(self, name, size=None, max_elements=1e4, error_rate=1e-8):
     self.name = name
     self.size = size
     self.max_elements = max_elements
     self.error_rate = error_rate
     self._set = BloomFilter(max_elements=max_elements,
                             error_rate=error_rate)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: mediapartition.py Proyecto: willgage/image-scripts

 def __init__(self, partition_id, src_dir, dest_dir, dry_run, flatten):
     self.partition_id = partition_id
     self.src_dir = src_dir
     self.dest_dir = dest_dir
     self.dest_bloom = BloomFilter(max_elements=EST_MAX_FILES_PER_YEAR)
     self.dry_run = dry_run
     self.flatten = flatten

Ejemplo n.º 18

0

Mostrar archivo

Archivo: tcem_aa_db.py Proyecto: MetaSUB/CAP2

 def build_db(self):
     if isfile(self.tcem_index):
         os.remove(self.tcem_index)
     logger.info(
         f'<chunk {self.chunk_index}> Building TCEM database from {self.fasta}'
     )
     bloom = BloomFilter(max_elements=self.bloom_size,
                         error_rate=self.bloom_error)
     seq_counter = 0
     with Pool(self.cores) as pool, gzip.open(
             self.fasta, 'rt') as f, gzip.open(self.tcem_index, 'wt') as o:
         try:
             seqs = (seq for i, seq in enumerate(SeqIO.parse(f, 'fasta'))
                     if (i % self.total_chunks) == self.chunk_index)
             for taxa_kmer_set in pool.imap_unordered(process_one_seq,
                                                      seqs,
                                                      chunksize=1000):
                 seq_counter += 1
                 if seq_counter % (10 * 1000) == 0:
                     logger.info(
                         f'<chunk {self.chunk_index}> Processing seq: {seq_counter}'
                     )
                 for pair in taxa_kmer_set:
                     pair_str = f'{pair[0]},{pair[1]}'
                     if pair_str in bloom:
                         continue
                     bloom.add(pair_str)
                     print(pair_str, file=o)
         except KeyboardInterrupt:
             pool.terminate()
             raise
     open(self.tcem_index + '.flag', 'w').close()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: main.py Proyecto: Jimmycheong/bloom_filtering

def main():

	"""
	- Access to the Database
	- Access to a Bloom filter
	- Consumer sending request for data

	"""

	UNIQUE_SET_SIZE = 5

	data_key = 34
	data_value = "This is a value"

	db_repository = BasicDBRepository()
	hashing_function_1 = BasicHashingFunction()
	hashing_function_2 = Basic2HashingFunction()

	print("Hello world!!")

	bloom_filter = BloomFilter(db_repository,hashing_function_1, hashing_function_2, UNIQUE_SET_SIZE)
	
	print(f'Initialized bloom filter: {bloom_filter.bit_vector}')

	bloom_filter.insert_new_data(data_key, data_value)

	print("Reached!")
	pprint(f"BloomFilter state: {bloom_filter.bit_vector}")
	pprint(db_repository.show_data(), width = 1)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: simpleWebFilter.py Proyecto: acbarie/WorkshopOpenFlow

 def __init__(self, *args, **kwargs):
     super(MyFilterSwitch, self).__init__(*args, **kwargs)
     self.swList = {}  #daftar switch
     self.hostDB = {}  #berisi pairing antara host.ID - port number
     self.bloom = BloomFilter(max_elements=10000, error_rate=0.1)
     self.randomFilter(12345678, 1000)
     self.monitor_thread = hub.spawn(self._monitor)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: pool.py Proyecto: tuxxy/py-evm

 def __init__(self, peer_pool: PeerPool) -> None:
     super().__init__()
     self._peer_pool = peer_pool
     # 1m should give us 9000 blocks before that filter becomes less reliable
     # It should take up about 1mb of memory
     self._bloom = BloomFilter(max_elements=1000000)
     self._bloom_salt = str(uuid.uuid4())

Ejemplo n.º 22

0

Mostrar archivo

Archivo: test_bloom_filter.py Proyecto: sunny1401/randoms

def test_bloom_filter():

    with pytest.assertRaises(ValueError):
        blf = BloomFilter(n=-1, p=0.1)

    with pytest.assertRaises(ValueError):
        blf = BloomFilter(n=1, p=-0.1)

    with pytest.assertRaises(ValueError):
        blf = BloomFilter(n=1.0, p=0.1)

    # aiming to cause collision
    blf = BloomFilter(n=3, p=0.1)
    blf.insert("Gondor")

    assert blf.is_present("Gondor 1") == True
    assert blf.is_present("Isenguard") == False

Ejemplo n.º 23

0

Mostrar archivo

Archivo: cnn_news.py Proyecto: zhangpeng0v0/news

 def __init__(self):
     self.headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
     self.cookies = {'cookie': 'tryThing00=1098; tryThing01=5838; tryThing02=0552; optimizelyEndUserId=oeu1556099053513r0.5843061130621663; s_fid=4A92F5E10FC9F2AB-176246D5FD4ACB26; gig_hasGmid=ver2; s_vi=[CS]v1|2E60197B8507E3C9-40000113A004AC13[CE]; __gads=ID=5243df9823110dbb:T=1556099830:S=ALNI_MbcB64SpCOjLKHMNlWpA0cU3jln6A; bfp_sn_rf_8b2087b102c9e3e5ffed1c1478ed8b78=Direct; bfp_sn_rt_8b2087b102c9e3e5ffed1c1478ed8b78=1556099832385; _fbp=fb.1.1556099850902.815265183; ajs_user_id=null; ajs_group_id=null; ajs_anonymous_id=%22abac316b-5aa7-403d-9230-f90f4f625c5f%22; _cb_ls=1; ug=5cc02ff00e7dc50a3f9cca0013c765a0; __qca=P0-689587383-1556590653464; _cb=BmyPaDKxXHCDf5t7; ugs=1; s_cc=true; s_ppv=100; countryCode=US; bounceClientVisit340v=N4IgNgDiBcIBYBcEQM4FIDMBBNAmAYnvgMYB2pApihAIakD2YAdGaS-QLZEgA0IATjBAgAvkA; _cb_svref=null; dmxRegion=false; s_sq=%5B%5BB%5D%5D; GED_PLAYLIST_ACTIVITY=W3sidSI6InA1UU4iLCJ0c2wiOjE1NTc3MTMxMzQsIm52IjoxLCJ1cHQiOjE1NTc3MTMwNzUsImx0IjoxNTU3NzEzMTMyfV0.; OptanonConsent=landingPath=NotLandingPage&datestamp=Mon+May+13+2019+10%3A05%3A38+GMT%2B0800+(%E4%B8%AD%E5%9B%BD%E6%A0%87%E5%87%86%E6%97%B6%E9%97%B4)&version=4.4.0&EU=false&groups=1%3A1%2C2%3A1%2C3%3A1%2C4%3A1%2C0_37248%3A1%2C0_37215%3A1%2C0_37244%3A1%2C0_37211%3A1%2C0_37240%3A1%2C0_37207%3A1%2C0_37236%3A1%2C0_37203%3A1%2C0_37198%3A1%2C0_37231%3A1%2C0_37227%3A1%2C0_37223%3A1%2C0_37219%3A1%2C0_37216%3A1%2C0_37249%3A1%2C0_37212%3A1%2C0_37245%3A1%2C0_37208%3A1%2C0_37241%3A1%2C0_37204%3A1%2C0_37237%3A1%2C0_37232%3A1%2C0_37199%3A1%2C0_37228%3A1%2C0_37224%3A1%2C0_37220%3A1%2C0_37217%3A1%2C0_37246%3A1%2C0_37213%3A1%2C0_37242%3A1%2C0_37209%3A1%2C0_37238%3A1%2C0_37205%3A1%2C0_37234%3A1%2C0_37200%3A1%2C0_37233%3A1%2C0_37196%3A1%2C0_37229%3A1%2C0_37225%3A1%2C0_37221%3A1%2C0_37250%3A1%2C0_37214%3A1%2C0_37210%3A1%2C0_37243%3A1%2C0_37206%3A1%2C0_37239%3A1%2C0_37202%3A1%2C0_37235%3A1%2C0_37201%3A1%2C0_37230%3A1%2C0_37197%3A1%2C0_37226%3A1%2C0_37222%3A1%2C0_37218%3A1%2C8%3A1%2C101%3A1%2C102%3A1%2C103%3A1%2C104%3A1%2C105%3A1%2C106%3A1%2C107%3A1%2C108%3A1%2C109%3A1%2C110%3A1%2C111%3A1%2C112%3A1%2C113%3A1%2C114%3A1%2C115%3A1%2C116%3A1%2C117%3A1%2C118%3A1%2C119%3A1%2C120%3A1%2C121%3A1%2C122%3A1%2C123%3A1%2C124%3A1%2C125%3A1%2C126%3A1%2C127%3A1%2C128%3A1%2C129%3A1%2C130%3A1%2C131%3A1%2C133%3A1%2C134%3A1%2C135%3A1%2C136%3A1%2C137%3A1&AwaitingReconsent=false; _chartbeat2=.1557484090990.1557713169225.1001.S1zafCOkPc4t2pQ-D9by5sD3_rnb.3'}
     self.post_url = 'http://127.0.0.1:30008/crawler/article/transfer'
     self.filter_url = 'http://console.cc.clipclaps.tv/crawler/log'
     self.have_met = BloomFilter(max_elements=100000, error_rate=0.1)
     self.downloadPath = '/data/crawler'
     self.picPath = '/cnn_news/picture/'

Ejemplo n.º 24

0

Mostrar archivo

Archivo: bloom_clock_backup.py Proyecto: rupeshkmr/projects

def process_two(pipe21, pipe23):
    pid = 1
    counter = BloomFilter(n, p, 0)
    counter = recv_message(pipe21, pid, counter, 'g')
    counter = send_message(pipe21, pid, counter, 'h')
    counter = send_message(pipe23, pid, counter, 'i')
    counter = recv_message(pipe23, pid, counter, 'j')
    print_history(counter, pid)

Ejemplo n.º 25

0

Mostrar archivo

 def add(self, key):
     bfilter = self.sbfilters[-1]
     if bfilter.can_accomodate() == False:
         new_expected_inserts = bfilter.expected_inserts * self.space_scale
         new_error_rate = bfilter.error_rate * self.error_prob_ratio
         new_bfilter = BloomFilter(new_expected_inserts, new_error_rate)
         self.sbfilters.append(new_bfilter)
         bfilter = new_bfilter
     bfilter.add(key)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: bloom_clock_backup.py Proyecto: rupeshkmr/projects

def process_one(pipe12):
    pid = 0
    counter = BloomFilter(n, p, 0)
    counter = event(pid, counter, 'b')
    counter = send_message(pipe12, pid, counter, 'c')
    counter = event(pid, counter, 'd')
    counter = recv_message(pipe12, pid, counter, 'e')
    counter = event(pid, counter, 'f')
    print_history(counter, pid)

Ejemplo n.º 27

0

Mostrar archivo

def Create_and_fillin_BF(inputList,maxElement, Error):
    myBF = BloomFilter(max_elements=maxElement, error_rate=Error)
    for element in inputList:
        if element not in myBF:
            myBF.add(element)
        else:
            print("Element {} is already in the BF".format(element))
    print('*********** Bloom Filter is created and filled-in ***************')        
    return myBF

Ejemplo n.º 28

0

Mostrar archivo

    def general_test(self):
        # Set the size of the bit vector.
        bit_vector_size = 20

        # Words to be added.
        words_saved = [
            "this", "nonsense", "senior", "story", "jokes", "a", "young", "a",
            "one", "to", "impress", "dev", "with", "trying", "a", "is", "of"
        ]

        # Words that will not be added.
        words_not_saved = [
            "These", "words", "do", "not", "exist", "in", "the", "filter"
        ]

        bloom = BloomFilter(bit_vector_size)

        # Add words to the filter.
        for word in range(len(words_saved)):
            bloom.add(words_saved[word])

        number_of_true_positive = 0
        number_of_true_negative = 0
        number_of_false_positive = 0
        number_of_false_negative = 0

        # Check all the added words.
        for i in range(len(words_saved)):
            # If the word is found the we have true positive result.
            if bloom.search(words_saved[i]):
                number_of_true_positive += 1
            # If the word is not found then there is a bug in the implementation.
            else:
                number_of_false_negative += 1

        # Check all the non added words
        for i in range(len(words_not_saved)):
            # If the word is found the we have false positive result.
            if bloom.search(words_not_saved[i]):
                number_of_false_positive += 1
            # If the word is not found the we have true negative result.
            else:
                number_of_true_negative += 1

        print("Number of true positive results: ", number_of_true_positive)
        print("Number of true negative results: ", number_of_true_negative)
        print("Number of false positive results: ", number_of_false_positive)
        print("Number of false negative results: ", number_of_false_negative)

        # A bloom filter should never return false negative, if it does raise an error.
        if number_of_false_negative != 0:
            logging.error(
                "general_test: FAIL. Bloom filter return false negative.")
            return

        logging.info("general_test: PASS")

Ejemplo n.º 29

0

Mostrar archivo

 def __init__(self, env, feature_transformer):
     self.env = env
     self.models = {}
     self.feature_transformer = feature_transformer
     for a in env.actions_available:
         self.models[a] = PassiveAggressiveRegressor(C=1.0,
                                                     fit_intercept=True,
                                                     shuffle=False)
     self.bloom_states = BloomFilter(max_elements=256**2)
     self.nonseen_states = 0

Ejemplo n.º 30

0

Mostrar archivo

 def __init__(self, *args, **kwargs):
     # // 要爬取网站的跟
     self.base_url = 'http://ggzyxx.deyang.gov.cn/'
     super(DeyangSpider, self).__init__(*args, **kwargs)
     self.bloom_filter = BloomFilter(max_elements=1000000,
                                     error_rate=0.1,
                                     filename='bf.data')
     self.num = 0
     self.scrawl_mode = ScrawlMode.HISTORY
     self._stop_parse = False