Ejemplo n.º 1
0
	def generate_new_bloom(self):

		if self.estimate_capacity:
			self._est_num_recs_and_set()

		self._bloom = self._get_bloom_class_inst(self.capacity, self.error_rate)

		for nsrl_file in get_all_nsrl_files(iso_base_dir=self.nsrl_dir):
			self._process_nsrl_file(nsrl_file, self._process_csv_chunk)
Ejemplo n.º 2
0
    def generate_new_bloom(self):

        if self.estimate_capacity:
            self._est_num_recs_and_set()

        self._bloom = self._get_bloom_class_inst(self.capacity,
                                                 self.error_rate)

        for nsrl_file in get_all_nsrl_files(iso_base_dir=self.nsrl_dir):
            self._process_nsrl_file(nsrl_file, self._process_csv_chunk)
Ejemplo n.º 3
0
    def _estimate_number_of_recs(self):
        s = set()

        def add_csv_to_set(csv_chunk):
            for csv_entry in self._filter_csv_chunk(csv_chunk):
                s.add(csv_entry[MD5_KEY_CSV])

        for nsrl_file in get_all_nsrl_files(iso_base_dir=self.nsrl_dir):
            self._process_nsrl_file(nsrl_file, add_csv_to_set)
        s_count = len(s)
        # this is doing to get large, but at least we'll have a more exact estimate that strips out doubles

        return s_count
Ejemplo n.º 4
0
	def _estimate_number_of_recs(self):
		s = set()

		def add_csv_to_set(csv_chunk):
			for csv_entry in self._filter_csv_chunk(csv_chunk):
				s.add(csv_entry[MD5_KEY_CSV])

		for nsrl_file in get_all_nsrl_files(iso_base_dir=self.nsrl_dir):
			self._process_nsrl_file(nsrl_file, add_csv_to_set)
		s_count = len(s)
		# this is doing to get large, but at least we'll have a more exact estimate that strips out doubles

		return s_count