def approx_guesses(fname, q):
    """
    TODO: WRITE SOMETHING HERE
    """
    global pwm
    pwm = Passwords(fname)
    subset_heap = priority_dict()
    covered = set()
    guess_list = []
    ballsize = 1000  # I don't care any bigger ball
    freq_cache = {}
    done = set()
    pwfreq = np.copy(pwm.values())  # deep copy of the frequencies
    l = 1
    st = time.time()
    for i, (pwid, f) in enumerate(pwm):
        rpw = pwm.id2pw(pwid)
        if len(rpw) < 6: continue
        pw = pwm.id2pw(pwid)
        p = pwm.prob(pw)
        neighbors = [rpw]
        for tpw, w in subset_heap.sorted_iter():
            w = -w
            ball = getball(tpw)
            nw = pwfreq[ball].sum()
            if w == nw:
                if w >= f * ballsize:  # correct value
                    print "Guess({}/{}): {} weight: {}"\
                        .format(len(guess_list), q, tpw, w/pwm.totalf())
                    done.add(tpw)
                    guess_list.append(tpw)
                    pwfreq[ball] = 0
                    if len(guess_list) >= q:
                        break
                else:  # The ball weight is still small
                    subset_heap[tpw] = -nw
                    break
            else:
                subset_heap[tpw] = -nw
        for tpw, ball in zip(neighbors, map(getball, iter(neighbors))):
            ballsize = ballsize * 0.9 + ball.shape[0] * 0.1
            subset_heap[tpw] = -pwfreq[ball].sum()

        if len(subset_heap) > l:
            print(">> ({}) : Heap size: {} ballsize: {}".format(
                time.time() - st, len(subset_heap), ballsize))
            l = len(subset_heap) * 2
        if i % 30 == 0:
            print(">> ({}) : {}: {!r} ({})".format(time.time() - st, i, rpw,
                                                   f))
        if len(guess_list) >= q:
            break
    normal_succ = pwm.sumvalues(q=q) / pwm.totalf()
    pool = multiprocessing.Pool(7)
    guessed_pws = np.unique(np.concatenate(pool.map(getball, guess_list)))
    fuzzy_succ = pwm.values()[guessed_pws].sum() / pwm.totalf()
    print("normal succ: {}, fuzzy succ: {}".format(normal_succ, fuzzy_succ))
    with open('approx_guess_{}.json'.format(q), 'wb') as f:
        json.dump(guess_list, f)
    return guess_list
def greedy_maxcoverage_heap(fname, q, **kwargs):
    global pwm
    pwm = Passwords(fname)
    subset_heap = priority_dict()
    covered = set()
    guess_list = []
    ballsize = 2000  # I don't care any bigger ball
    freq_cache = {}
    done = set()
    pwfreq = np.copy(pwm.values())  # deep copy of the frequencies
    l = 1
    st = time.time()
    pool = multiprocessing.Pool(5)
    for i, (pwid, f) in enumerate(pwm):
        rpw = pwm.id2pw(pwid)
        if len(rpw) < 6: continue
        pw = pwm.id2pw(pwid)
        p = pwm.prob(pw)
        neighbors = set(apply_edits(pw.encode('ascii',
                                              errors='ignore'))) - done
        for tpw, w in subset_heap.sorted_iter():
            w = -w
            ball = getball(tpw)
            nw = pwfreq[ball].sum()
            if w == nw:
                if w >= f * ballsize:  # correct value
                    print("Guess({}/{}): {} weight: {}"\
                        .format(len(guess_list), q, tpw, w/pwm.totalf()))
                    done.add(tpw)
                    guess_list.append(tpw)
                    pwfreq[ball] = 0
                    if len(guess_list) >= q:
                        break
                else:  # The ball weight is still small
                    subset_heap[tpw] = -nw
                    break
            else:
                subset_heap[tpw] = -nw
        b_max = 0
        for tpw, ball in zip(neighbors, pool.map(getball, iter(neighbors))):
            subset_heap[tpw] = -pwfreq[ball].sum()
            b_max = max(b_max, ball.shape[0])
        ballsize = ballsize * 0.9 + b_max * 0.1

        if len(subset_heap) > l:
            print(">< ({}) : Heap size: {} ballsize: {}".format(
                time.time() - st, len(subset_heap), ballsize))
            l = len(subset_heap) * 2
        if i % 10 == 0:
            print("({}) : {}: {} ({})".format(time.time() - st, i, rpw, f))
        if len(guess_list) >= q:
            break
    normal_succ = pwm.sumvalues(q=q) / pwm.totalf()
    guessed_pws = np.unique(np.concatenate(pool.map(getball, guess_list)))
    fuzzy_succ = pwm.values()[guessed_pws].sum() / pwm.totalf()
    print("normal succ: {}, fuzzy succ: {}".format(normal_succ, fuzzy_succ))
    with open('guess_{}.json'.format(q), 'w') as f:
        json.dump(guess_list, f)
    return guess_list
def greedy_maxcoverage_heap(fname, q, **kwargs):
    global pwm
    pwm = Passwords(fname)
    subset_heap = priority_dict()
    covered = set()
    guess_list = []
    ballsize = 2000 # I don't care any bigger ball
    freq_cache = {}
    done = set()
    pwfreq = np.copy(pwm.values()) # deep copy of the frequencies
    l = 1
    st = time.time()
    pool = multiprocessing.Pool(5)
    for i, (pwid, f) in enumerate(pwm):
        rpw = pwm.id2pw(pwid)
        if len(rpw)<6: continue
        pw = pwm.id2pw(pwid)
        p = pwm.prob(pw)
        neighbors = set(apply_edits(pw.encode('ascii', errors='ignore'))) - done
        for tpw, w in subset_heap.sorted_iter():
            w = -w
            ball = getball(tpw)
            nw = pwfreq[ball].sum()
            if w == nw:
                if w >= f*ballsize: # correct value
                    print("Guess({}/{}): {} weight: {}"\
                        .format(len(guess_list), q, tpw, w/pwm.totalf()))
                    done.add(tpw)
                    guess_list.append(tpw)
                    pwfreq[ball] = 0
                    if len(guess_list)>=q:
                        break
                else:  # The ball weight is still small
                    subset_heap[tpw] = -nw
                    break
            else:
                subset_heap[tpw] = -nw
        b_max = 0
        for tpw, ball in zip(neighbors, pool.map(getball, iter(neighbors))):
            subset_heap[tpw] = -pwfreq[ball].sum()
            b_max = max(b_max, ball.shape[0])
        ballsize = ballsize*0.9 + b_max*0.1

        if len(subset_heap) > l:
            print(">< ({}) : Heap size: {} ballsize: {}".format(
                time.time()-st, len(subset_heap), ballsize
            ))
            l = len(subset_heap) * 2
        if i%10==0:
            print("({}) : {}: {} ({})".format(time.time()-st, i, rpw, f))
        if len(guess_list)>=q:
            break
    normal_succ = pwm.sumvalues(q=q)/pwm.totalf()
    guessed_pws = np.unique(np.concatenate(pool.map(getball, guess_list)))
    fuzzy_succ = pwm.values()[guessed_pws].sum()/pwm.totalf()
    print("normal succ: {}, fuzzy succ: {}".format(normal_succ, fuzzy_succ))
    with open('guess_{}.json'.format(q), 'w') as f:
        json.dump(guess_list, f)
    return guess_list
def approx_guesses(fname, q):
    """
    TODO: WRITE SOMETHING HERE
    """
    global pwm
    pwm = Passwords(fname)
    subset_heap = priority_dict()
    covered = set()
    guess_list = []
    ballsize = 1000 # I don't care any bigger ball
    freq_cache = {}
    done = set()
    pwfreq = np.copy(pwm.values()) # deep copy of the frequencies
    l = 1
    st = time.time()
    for i, (pwid, f) in enumerate(pwm):
        rpw = pwm.id2pw(pwid)
        if len(rpw)<6: continue
        pw = pwm.id2pw(pwid)
        p = pwm.prob(pw)
        neighbors = [rpw]
        for tpw, w in subset_heap.sorted_iter():
            w = -w
            ball = getball(tpw)
            nw = pwfreq[ball].sum()
            if w == nw:
                if w >= f*ballsize: # correct value
                    print "Guess({}/{}): {} weight: {}"\
                        .format(len(guess_list), q, tpw, w/pwm.totalf())
                    done.add(tpw)
                    guess_list.append(tpw)
                    pwfreq[ball] = 0
                    if len(guess_list)>=q:
                        break
                else:  # The ball weight is still small
                    subset_heap[tpw] = -nw
                    break
            else:
                subset_heap[tpw] = -nw
        for tpw, ball in zip(neighbors, map(getball, iter(neighbors))):
            ballsize = ballsize*0.9 + ball.shape[0]*0.1
            subset_heap[tpw] = -pwfreq[ball].sum()

        if len(subset_heap) > l:
            print(">> ({}) : Heap size: {} ballsize: {}".format(
                time.time()-st, len(subset_heap), ballsize
            ))
            l = len(subset_heap) * 2
        if i%30==0:
            print(">> ({}) : {}: {!r} ({})".format(time.time()-st, i, rpw, f))
        if len(guess_list)>=q:
            break
    normal_succ = pwm.sumvalues(q=q)/pwm.totalf()
    pool = multiprocessing.Pool(7)
    guessed_pws = np.unique(np.concatenate(pool.map(getball, guess_list)))
    fuzzy_succ = pwm.values()[
        guessed_pws
    ].sum()/pwm.totalf()
    print("normal succ: {}, fuzzy succ: {}".format(normal_succ, fuzzy_succ))
    with open('approx_guess_{}.json'.format(q), 'wb') as f:
        json.dump(guess_list, f)
    return guess_list