Beispiel #1
0
def default(pf, args, start_t):
    """Evaluates candidates, highest signature similarity first.

    Results are written to the file given by args.results (--results
    option).
    """

    print("Getting all candidates...")
    t = time.time()
    candidates = list(pf.candidates())
    print("Done, got {} candidates in {}s".format(len(candidates),
                                                  time.time() - t))

    print("Sorting candidates...")
    t = time.time()
    candidates = sorted(candidates, key=lambda c: -c[1])
    print("Done in {}s".format(time.time() - t))

    csv_file = args.results if 'results' in args else 'results.txt'
    append_results = not args.dont_append_results if 'dont_append_results' in args else True
    extended_results = args.extended if 'extended' in args else False

    csv = CsvWriter(csv_file, append=append_results)

    print("Verifying candidates...")
    found = 0
    found_times = np.array([])
    for i, ((c1, c2), sim) in enumerate(candidates):
        jac_sim = pf.jaccard_similarity(c1, c2)
        elapsed_t = time.time() - start_t

        if jac_sim > .5:
            c1, c2 = min(c1, c2) + 1, max(c1, c2) + 1
            if extended_results:
                csv.write([c1, c2, sim, jac_sim, i, elapsed_t])
            else:
                csv.write([c1, c2])
            found += 1
            found_times = np.append(found_times, time.time() - start_t)
            print("Found {} (at signature similarity {}, after {}s)".format(
                found, sim, elapsed_t),
                  end='\r')

        # stop when 30 minutes are over
        if elapsed_t > 1800 - 2:
            print("\nTime's up, stopping.")
            break

        # every 100 candidates, check whether rate is so low we should stop
        if found >= 100 and i % 100 == 0:
            if elapsed_t - found_times[-10] > 60:  # less than 10 per minute
                print("\nRate is slowing down, stopping.")
                break

    print("Finished in {}s.".format(elapsed_t))
    print("Found {} pairs, {} per minute.".format(found,
                                                  found / elapsed_t * 60))

    return True
def inner_simulation(rounds, lower, upper, step, debug, logging, k, delta):
    """
    This is the function that manages the different values for which
    we should run the simulation for and also writes results to file
    as they come in
    """

    # Run the simulation for all correlation coefficients supplied
    for d in delta:
        if debug == 0 or debug == 1:
            print(f"\nRunning for ρ={d}")

        # Run the simulation for all preference ordering lengths supplied
        for pref_length in k:
            if debug == 0 or debug == 1:
                print(f"\nRunning for k={pref_length}")

            # Initialize file writer if enabled (default)
            if logging:
                writer = CsvWriter(d, pref_length, rounds, lower, upper)

            # Initialize while loop that runs the simulation for the specified amount of rounds
            current_round = 0
            while rounds == -1 or rounds > current_round:

                if debug != -1:
                    print(
                        f"\nStarting round {current_round + 1} out of {rounds}"
                    )

                # Run the simulation for ρ and k decided by outer loop, for all n
                for n in range(max(lower, pref_length), upper, step):

                    # Create the preferences for all agents
                    male_prefs, female_prefs = get_preferences(
                        n, pref_length, d)

                    # Run deferred acceptance and look for deviations
                    useful_deviators_cnt = count_useful_deviatiors(
                        male_prefs, female_prefs)

                    # Calculate ratio of agents with useful deviations
                    ratio = useful_deviators_cnt / float(n)

                    if debug == 1 or debug == 0:
                        print(f"d={d} k={pref_length}: result n={n}: ", ratio)

                    if logging:
                        writer.write(n, ratio)

                current_round += 1
Beispiel #3
0
def get_jaccard_distribution(pf, args, start_t):
    """Samples random pairs and saves their Jaccard similarity to CSV.

    The result is used by jaccard_distribution.R to fit an
    distribution to the data.
    """
    csv = CsvWriter(args.results, append=True)
    csv.write_header(['u1', 'u2', 'jac_sim', 'sig_sim'])

    for i, (u1, u2) in enumerate(
            zip(np.random.permutation(pf.n_docs),
                np.random.permutation(pf.n_docs))):
        if u1 == u2: next
        print("Wrote {} similarities".format(i), end='\r')
        csv.write([u1, u2, pf.jaccard_similarity(u1, u2), pf.sig_sim(u1, u2)])

    return True
Beispiel #4
0
def get_candidate_distribution(pf, args, start_t):
    """Samples pairs from candidates, stratified by signature similarity.

    The result is stored in a CSV, including the signature similarity,
    Jaccard similarity, and weight (> 1 if there were more samples in
    the stratum than max_per_step, to get approximately accurate
    frequencies).
    """
    candidates = list(pf.candidates())

    csv_file = args.results
    csv = CsvWriter(csv_file, append=True)
    csv.write_header([
        'run_id', 'u1', 'u2', 'sig_sim', 'jac_sim', 'sig_len', 'bands',
        'max_buckets', 'used_buckets', 'weight'
    ])

    run_id = datetime.now().isoformat()

    lim = 0
    step = 0.05
    max_per_step = 100
    while lim < 1:
        cand = [(c, sim) for c, sim in candidates
                if sim >= lim and sim < lim + step]
        n = len(cand)
        print("{} candidates between {} and {}".format(n, lim, lim + step))

        weight = max(n, max_per_step) / max_per_step
        for i in np.random.permutation(n)[:max_per_step]:
            (c1, c2), sim = cand[i]
            csv.write([
                run_id, c1, c2, sim,
                pf.jaccard_similarity(c1, c2), pf.sig_len, pf.n_bands,
                pf.max_buckets,
                len(pf.buckets), weight
            ])

        lim += step

    return True