def default(pf, args, start_t): """Evaluates candidates, highest signature similarity first. Results are written to the file given by args.results (--results option). """ print("Getting all candidates...") t = time.time() candidates = list(pf.candidates()) print("Done, got {} candidates in {}s".format(len(candidates), time.time() - t)) print("Sorting candidates...") t = time.time() candidates = sorted(candidates, key=lambda c: -c[1]) print("Done in {}s".format(time.time() - t)) csv_file = args.results if 'results' in args else 'results.txt' append_results = not args.dont_append_results if 'dont_append_results' in args else True extended_results = args.extended if 'extended' in args else False csv = CsvWriter(csv_file, append=append_results) print("Verifying candidates...") found = 0 found_times = np.array([]) for i, ((c1, c2), sim) in enumerate(candidates): jac_sim = pf.jaccard_similarity(c1, c2) elapsed_t = time.time() - start_t if jac_sim > .5: c1, c2 = min(c1, c2) + 1, max(c1, c2) + 1 if extended_results: csv.write([c1, c2, sim, jac_sim, i, elapsed_t]) else: csv.write([c1, c2]) found += 1 found_times = np.append(found_times, time.time() - start_t) print("Found {} (at signature similarity {}, after {}s)".format( found, sim, elapsed_t), end='\r') # stop when 30 minutes are over if elapsed_t > 1800 - 2: print("\nTime's up, stopping.") break # every 100 candidates, check whether rate is so low we should stop if found >= 100 and i % 100 == 0: if elapsed_t - found_times[-10] > 60: # less than 10 per minute print("\nRate is slowing down, stopping.") break print("Finished in {}s.".format(elapsed_t)) print("Found {} pairs, {} per minute.".format(found, found / elapsed_t * 60)) return True
def inner_simulation(rounds, lower, upper, step, debug, logging, k, delta): """ This is the function that manages the different values for which we should run the simulation for and also writes results to file as they come in """ # Run the simulation for all correlation coefficients supplied for d in delta: if debug == 0 or debug == 1: print(f"\nRunning for ρ={d}") # Run the simulation for all preference ordering lengths supplied for pref_length in k: if debug == 0 or debug == 1: print(f"\nRunning for k={pref_length}") # Initialize file writer if enabled (default) if logging: writer = CsvWriter(d, pref_length, rounds, lower, upper) # Initialize while loop that runs the simulation for the specified amount of rounds current_round = 0 while rounds == -1 or rounds > current_round: if debug != -1: print( f"\nStarting round {current_round + 1} out of {rounds}" ) # Run the simulation for ρ and k decided by outer loop, for all n for n in range(max(lower, pref_length), upper, step): # Create the preferences for all agents male_prefs, female_prefs = get_preferences( n, pref_length, d) # Run deferred acceptance and look for deviations useful_deviators_cnt = count_useful_deviatiors( male_prefs, female_prefs) # Calculate ratio of agents with useful deviations ratio = useful_deviators_cnt / float(n) if debug == 1 or debug == 0: print(f"d={d} k={pref_length}: result n={n}: ", ratio) if logging: writer.write(n, ratio) current_round += 1
def get_jaccard_distribution(pf, args, start_t): """Samples random pairs and saves their Jaccard similarity to CSV. The result is used by jaccard_distribution.R to fit an distribution to the data. """ csv = CsvWriter(args.results, append=True) csv.write_header(['u1', 'u2', 'jac_sim', 'sig_sim']) for i, (u1, u2) in enumerate( zip(np.random.permutation(pf.n_docs), np.random.permutation(pf.n_docs))): if u1 == u2: next print("Wrote {} similarities".format(i), end='\r') csv.write([u1, u2, pf.jaccard_similarity(u1, u2), pf.sig_sim(u1, u2)]) return True
def get_candidate_distribution(pf, args, start_t): """Samples pairs from candidates, stratified by signature similarity. The result is stored in a CSV, including the signature similarity, Jaccard similarity, and weight (> 1 if there were more samples in the stratum than max_per_step, to get approximately accurate frequencies). """ candidates = list(pf.candidates()) csv_file = args.results csv = CsvWriter(csv_file, append=True) csv.write_header([ 'run_id', 'u1', 'u2', 'sig_sim', 'jac_sim', 'sig_len', 'bands', 'max_buckets', 'used_buckets', 'weight' ]) run_id = datetime.now().isoformat() lim = 0 step = 0.05 max_per_step = 100 while lim < 1: cand = [(c, sim) for c, sim in candidates if sim >= lim and sim < lim + step] n = len(cand) print("{} candidates between {} and {}".format(n, lim, lim + step)) weight = max(n, max_per_step) / max_per_step for i in np.random.permutation(n)[:max_per_step]: (c1, c2), sim = cand[i] csv.write([ run_id, c1, c2, sim, pf.jaccard_similarity(c1, c2), pf.sig_len, pf.n_bands, pf.max_buckets, len(pf.buckets), weight ]) lim += step return True