예제 #1
0
def call_main(author_name):
    bf = Bloom()
    count = bf.search("/Users/vivekris/GS/code/gsbloomboost/input/filters/",author_name)
    lookup = get_id_lookup()

    return_id = get_id(count, lookup)
    print return_id
예제 #2
0
def getMeme(cid, sub):
    path = str(os.getcwd() + "/stats/" + str(cid) + "/")
    os.makedirs(path, exist_ok=True)

    f = Path(path + "bloom.txt")
    print(f)
    filter = Bloom()
    if f.is_file():
        file = open(f, "r")
        filter.getTable(file)
        file.close()
    else:
        file = open(f, "w+")

    global reddit
    for s in reddit.subreddit(sub).hot():
        if not filter.find(str(s)) and s.is_reddit_media_domain:
            filter.insert(str(s))
            with open(f, "w") as file:
                filter.writeTable(file)
            break
    request.urlretrieve(s.url, "temp.jpg")
    return open("temp.jpg", "rb")


#getMeme(123,"dankmemes")
    def create_collection(self, genome):

        section_size = int(len(genome) / (self.k))
        for i in range(self.k):
            section = genome[i * (section_size):(i + 1) * (section_size)]
            bloom = Bloom(1000000000, 0.3)
            for k in range(0, len(section) - segment_size + 1):
                s = section[k:k + segment_size]
                bloom.bloom_add(s)
            self.collection.append(bloom)
        print("Collection created successfully")
        self.collection = tuple(self.collection)
예제 #4
0
 def post(self):
   bloom = memcache.get("filter")
   if bloom is None:
     bloom = Bloom(2**21, 5)
   
   value = self.request.POST["value"]
   value_already_included = False
   if value in bloom:
     value_already_included = True
   bloom.add(value)
   memcache.set("filter", bloom)
   self.redirect("/?success=" + str(not value_already_included) 
                 + "&value=" + value)
예제 #5
0
파일: merge.py 프로젝트: vkris/gsbloomboost
def call_main(lists_folder, bloom_folder):
    list_files  = get_files(lists_folder)
    #bloom_files = get_files(bloom_folder)

    for filee in list_files:
        # Create a new bloom filter file if it does not exist..
        #Get abc in /Users/xya/asdf/abc.2bloom
        file_name = filee.rsplit('/',1)[1].split('.')[0]
        bf = Bloom(bloom_folder+"/"+file_name+".bloom")
        # Get the list of elements from the file. 
        list_of_authors = [ element.strip() for element in file(filee) ]
        print list_of_authors
        # Now start adding elements
        bf.add_elements(list_of_authors)
예제 #6
0
    def train(self, xs, ys, epochs):
        """
        Train the model and setup the two amqs.
        """
        # Filter pos/neg examples
        # TODO: make more efficient (don't necessarily need to compute pos/negs here)
        positives = [x for x, y in zip(xs, ys) if y]
        negatives = [x for x, y in zip(xs, ys) if not y]

        # Setup first filter
        self.amq1.add_set(positives)

        # Train the neural net on reported positives of first filter
        amq1_pos_indices = [
            i for i, x in enumerate(xs) if self.amq1.contains(x)
        ]
        amq1_pos_xs = [xs[i] for i in amq1_pos_indices]
        amq1_pos_ys = [ys[i] for i in amq1_pos_indices]

        self.model.train(amq1_pos_xs, amq1_pos_ys, epochs)

        # Tune tau
        self.tau, fpr, fnr = self._choose_tau(amq1_pos_xs, amq1_pos_ys)

        # Get false negatives from model
        model_false_negs = [
            x for x in amq1_pos_xs if not (self.model(x) > self.tau)
        ]
        num_model_false_negs = len(model_false_negs)

        # Setup second filter if we have false negs
        if num_model_false_negs > 0 and fnr > 0:
            # Compute optimal bitarray size ratio for second filter
            inside = fpr / ((1 - fpr) * (1 / fnr - 1))
            m2 = int(0 if inside == 0 else -log2(inside) / log(2))
            if m2 == 0:
                self.amq2 = WordBloom(
                    Bloom.init_ne(num_model_false_negs, self.err))
            else:
                self.amq2 = WordBloom(Bloom.init_nm(num_model_false_negs, m2))
            self.amq2.add_set(model_false_negs)
예제 #7
0
def simulation():

    input = np.random.randint(1, 1000000, size=10000)
    test = np.random.randint(1, 1000000, size=1000)

    b = Bloom(input, 0.01)
    b.train_bloom()
    b.test_bloom(test)
예제 #8
0
    def __init__(self, n, c, err, set_size, err1k):
        """
        n: number of letters in string
        c: size of alphabet
        err: total error rate of sandwich
        """
        self.n = n
        self.c = c

        self.model = WordNet(n, c)
        self.tau = 0.5  # default value, adjust by tuning later
        self.alpha = 0.618503137801576  # 2 ** -log(2)

        # AMQs can only be set up after training model
        self.err = err
        self.err1 = self.err * err1k
        self.amq1 = WordBloom(Bloom.init_ne(set_size, self.err1))
        self.amq2 = None  # Determine size after training
예제 #9
0
    def train(self, xs, ys, epochs):
        """
        Train on examples for a certain number of epochs
        """
        # Train neural net
        # Note: torch dataloader takes care of shuffling
        self.model.train(xs, ys, epochs)

        # Tune tau
        self.tau = self._choose_tau(xs, ys)

        # Get false negatives
        positives = [x for x, y in zip(xs, ys) if y]
        false_negs = [x for x in positives if not (self.model(x) > self.tau)]

        # Build filter for negatives
        if len(false_negs) > 0:
            self.amq = WordBloom(Bloom.init_ne(len(false_negs), self.err / 2))
            self.amq.add_set(false_negs)
예제 #10
0
def bloom_test(xs, ys, num_pos, num_neg, n, c, e):
    """
    Perform a test on the Bloom filter
    """
    bloom = WordBloom(Bloom.init_ne(num_pos, e))

    positives = [x for x, y in zip(xs, ys) if y]
    bloom.add_set(positives)

    false_pos = false_neg = 0
    for x, y in zip(xs, ys):
        filter_contains = bloom.contains(x)
        false_pos += not y and filter_contains
        false_neg += y and not filter_contains

    print(bloom)
    print("fpr: {}, fnr: {}, correct%: {}".format(
        false_pos / num_neg, false_neg / num_pos,
        1 - (false_pos + false_neg) / (num_pos + num_neg)))
예제 #11
0
def main():
    """Main function."""
    bloom = Bloom(12)

    # Add some values to the set
    bloom.add("Curie")
    bloom.add("Laplace")

    # Now test some values
    person = "Pasteur"
    if (bloom.check(person)):
        print("%s is probably in the set." % person)
    else:
        print("%s is definitely not in the set." % person)

    # Test some more values
    person = "Curie"
    if (bloom.check(person)):
        print("%s is probably in the set." % person)
    else:
        print("%s is definitely not in the set." % person)
예제 #12
0
class TestBloomMethods(unittest.TestCase):
    """Test the Bloom Filter."""
    def setUp(self):
        """Set up the Bloom Filter table."""
        self.size = 10
        self.b = Bloom(self.size)

    def test_add(self):
        """Make sure that add properly activates bits in the table."""
        self.b.add('Archimedes')
        self.assertEqual(self.b.table, [1, 0, 0, 1, 0, 0, 0, 0, 0, 0])

    def test_check_element_in_table(self):
        """Ensure check method returns true if element was added."""
        self.b.add('Copernicus')
        self.assertEqual(self.b.check('Copernicus'), True)

    def test_check_element_not_in_table(self):
        """Ensure check method returns false if elmt definitely not in set."""
        self.assertEqual(self.b.check('Galileo'), False)
예제 #13
0
# rings, steps = gen_circle(rings=None, pixels_per=pixels_per, offset=0, invert=False)
# layout = layout_from_rings(rings, origin=(0, 0, 0), z_diff=8)
# driver = DriverSimPixel(sum(pixels_per), layout=layout)
# led = LEDCircle(driver, rings=rings, maxAngleDiff=0)


def shutdown():
    log.debug('Force close server')
    driver.server.close()
    time.sleep(2)
    sys.exit()


try:
    if isinstance(led, LEDMatrix):
        anim = Bloom(led, dir=True)
        anim.run(amt=2, fps=60)
    elif isinstance(led, LEDCube):
        # anim = cuby(led, color_list=c_list)
        # anim.run(amt=1, fps=8)
        anim = Simplex(led, freq=16, octaves=1)
        anim.run(amt=1, fps=30)
        # anim = Spectrum(led, vis_list=['Spread'], steps_per_vis=None,
        #                 bins=12, max_freq=4000, log_scale=True, auto_gain=False, gain=3)
        anim.run(amt=1, fps=8)
    elif isinstance(led, LEDCircle):
        anim = Diag(led, turns=1, angle=6, direction=False)
        anim.run(amt=6, fps=20)
except Exception as e:
    shutdown()
    raise
예제 #14
0
 def setUp(self):
     """Set up the Bloom Filter table."""
     self.size = 10
     self.b = Bloom(self.size)
예제 #15
0
        return [word]
    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
               'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
    words = []
    for char in word:
        if char == '*':
            for letter in letters:
                words.append(word.replace('*', letter, 1))
                for item in list(words):
                    for char in list(item):
                        if char == '*':
                            for letter in letters:
                                words.append(item.replace('*', letter, 1))
    return words

bloom = Bloom(15000000, 8)
parser = argparse.ArgumentParser(description="Bloom filter. Builds a\
                                 bloom filter. Searches for a word.\
                                 Wildcards(*) allowed.")
parser.add_argument('file', type=str, help="File location")
parser.add_argument('word', type=str, help="Word to search")
args = parser.parse_args()
with open(args.file, 'r') as reader:
    for line in reader:
        bloom.insert(line.rstrip())
print("Bloom filter built.")
print("Searching for \"{}\"...".format(args.word))
if '*' in args.word:
    print("The more wildcards, the longer this takes...")
matches = []
for word in replace(args.word):
예제 #16
0
import datetime
from  bloom import Bloom



f = open("data/emerson_essays.txt", "r")
sample = f.read()


lines = sample.splitlines()
mb = Bloom()
false_positive = 0
new_key = 0

start_time = datetime.datetime.now()


for line  in lines:

    if not mb.Add(line):
        false_positive += 1
    else: 
        new_key += 1

end_time = datetime.datetime.now()
print(" false_positive : %s - new_key: %s  on %s  " % (false_positive,new_key, len(lines)))
diff = (start_time-end_time).microseconds
ratio = diff / len(lines)
print("{} ms for ratio: {}".format(diff,ratio))
print(" {} LEN ".format(mb.Len()))