Example #1
0
def set_bloom_filter(bbf_file):
    global bloom
    if os.path.exists(bbf_file):
        bloom = pydablooms.load_dabloom(capacity=10000000,
                                        error_rate=.05,
                                        filepath=bbf_file)
    else:
        print "WARNING: bloom filter file", bbf_file, "doesn't exist"
Example #2
0
for line in words_file:
    bloom.add(line.rstrip(), i)
    i += 1

words_file.seek(0)
i = 0
for line in words_file:
    if i % 5 == 0:
        bloom.delete(line.rstrip(), i)
    i += 1

bloom.flush()
del bloom

bloom = pydablooms.load_dabloom(capacity=capacity,
                                error_rate=error_rate,
                                filepath=bloom_fname)

true_positives = 0
true_negatives = 0
false_positives = 0
false_negatives = 0

words_file.seek(0)
i = 0
for line in words_file:
    exists = bloom.check(line.rstrip())
    contains = line.rstrip() in bloom
    assert exists == contains, \
        "ERROR: %r from 'bloom.check(x)', %i from 'x in bloom'" \
        % (exists, contains)
Example #3
0
def main():
    """
    By default will read from stdin and write to stdout using a temporary bloom file.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument('infile', nargs='?', type=argparse.FileType('rb'), default=sys.stdin,
                        help="Defaults to stdin")
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('wb'), default=sys.stdout,
                        help="Defaults to stdout")

    parser.add_argument('--offsetfile', type=argparse.FileType('w'), default=None,
                        help="Defaults to None, requires a named outfile")
    parser.add_argument('-b', '--bloom', help='The path to the bloom filter to check against')
    parser.add_argument('-c', '--capacity', type=int, default=CAPACITY, help='The capacity of the bloom filter, DEFAULT: 1 million')

    #Add and delete are mutually exclusive
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-a', '--add', action='store_true', default=False,
                        help='add the phone to the bloom file.')
    group.add_argument('-d', '--delete', action='store_true', default=False,
                        help='remove the phone from the bloom file.')

    args = parser.parse_args()

    bloom_self, tmpdir = __get_tmp_bloom()

    try:
        if args.offsetfile:
            args.outfile.tell()
            args.offsetfile.write('#Input:%s; ' % args.infile.name)
            args.offsetfile.write('Output:%s%s' % (args.outfile.name, os.linesep))

            if args.bloom:
                args.offsetfile.write('#Bloom:%s%s' % (args.bloom, os.linesep))

    except IOError:
        #File.tell() does not work with pipes.
        sys.stderr.write('Warning: Cannot write offset file when the output file is a pipe.' + os.linesep)
        args.offsetfile = None

    if args.bloom and os.path.isfile(os.path.abspath(args.bloom)):
        bloom_abspath = os.path.abspath(args.bloom)
        #sys.stderr.write('Using bloom: %s' % bloom_abspath + os.linesep)
        bloom = pydablooms.load_dabloom(capacity=args.capacity, error_rate=ERROR_RATE, filepath=bloom_abspath)
    elif args.bloom and args.add:
        bloom_abspath = os.path.abspath(args.bloom)
        bloom = pydablooms.Dablooms(capacity=args.capacity, error_rate=ERROR_RATE, filepath=bloom_abspath)
        sys.stderr.write('Created bloom with capacity %d at %s' % (args.capacity, bloom_abspath + os.linesep))
    else:
        if args.bloom:
            sys.stderr.write("Bloom file does not exist and we cannot create new bloom without the --add flag" + os.linesep)
        elif args.add:
            sys.stderr.write("Add option ignored without bloom file specified" + os.linesep)

        bloom = bloom_self
        sys.stderr.write('Created tmp bloom with capacity %d at %s' % (args.capacity, tmpdir + os.linesep))

    if args.delete:
        __remove_from_bloom(bloom, args.infile)
    elif args.add:
        __filter_with_add(bloom, args.infile, args.outfile, args.offsetfile)
    else:
        __filter_no_add(bloom, bloom_self, args.infile, args.outfile, args.offsetfile)

    if os.path.exists(tmpdir) and os.getcwd() != tmpdir:
        args.outfile.close()
        del bloom
        shutil.rmtree(tmpdir)

    args.infile.close()
    args.outfile.close()
Example #4
0
for line in words_file:
    bloom.add(line.rstrip(), i)
    i += 1

words_file.seek(0)
i = 0
for line in words_file:
    if i % 5 == 0:
        bloom.delete(line.rstrip(), i)
    i += 1

bloom.flush()
del bloom

bloom = pydablooms.load_dabloom(capacity=capacity,
                                error_rate=error_rate,
                                filepath=bloom_fname)

true_positives = 0
true_negatives = 0
false_positives = 0
false_negatives = 0

words_file.seek(0)
i = 0
for line in words_file:
    exists = bloom.check(line.rstrip())
    contains = line.rstrip() in bloom
    assert exists == contains, \
        "ERROR: %r from 'bloom.check(x)', %i from 'x in bloom'" \
        % (exists, contains)
Example #5
0
def set_bloom_filter(bbf_file):
	global bloom
	if os.path.exists(bbf_file):
		bloom = pydablooms.load_dabloom(capacity=10000000, error_rate=.05, filepath=bbf_file)
	else:
		print "WARNING: bloom filter file", bbf_file, "doesn't exist"