def gather_fit_data(fit_count, maxfev): fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt" try: return utils.load_table(fn_format.format(**locals())) except OSError: pass d = utils.filter_preferred_ml(utils.load_all()) d = d[~d["method"].isin(["imsrg[f]+eom[n]"])] results = [] with multiprocessing.Pool(4) as p: results = p.map( functools.partial(gather_fit_data_inner, fit_count=fit_count, maxfev=maxfev), tuple( d.groupby( ["label", "interaction", "num_filled", "freq", "method"]))) d = pd.DataFrame.from_records(itertools.chain(*results)) print("{} fits failed, out of {}".format( (d["fit_method"] == "fixedab").sum(), len(d))) # fit_count=5: # maxfev=default: 198 fits failed, out of 2247 # maxfev=10k: 40 fits failed, out of 2248 # maxfev=100k: 0 fits failed cols = """ interaction label freq num_filled method best_chisq best_coefficient best_coefficient_err best_constant best_constant_err best_fixedab_constant_err best_exponent best_exponent_err best_fit_method best_fit_stop chisq coefficient coefficient_err constant constant_err fixedab_constant_err exponent exponent_err fit_method fit_stop rel_discrep rel_discrep_err rel_dist rel_dist_err """.split() assert len(d.columns) == len(cols) utils.save_table(fn_format.format(**locals()), d[cols]) return d
def plot_table(path_dict, save_path): pd.set_option('display.precision', 4) pd.set_option('display.width', 40) pd.set_option('display.float_format', '{:,.3f}'.format) for i, act in enumerate(path_dict.keys()): row_labels, values_train, values_test = [], [], [] for path in path_dict[act]: with open(path + '/results.json', 'r') as f: results = json.load(f) if (plot_it is not None) and (results['combinator'] not in plot_it): continue if i == 0: col_labels = utils.fill_col_labels(results) temp_train, temp_test = utils.fill_row_values( results, path, act) values_train.append(temp_train) values_test.append(temp_test) if 'test_acc_hr_0.0' in results: temp_train, temp_test = utils.fill_row_values(results, path, act, hr=0.0) values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, act, 'train') table_test = utils.create_table(values_test, col_labels, act, 'test') # save table utils.save_table(table_train, table_test, save_path, act)
def gather_fit_data(fit_count, maxfev): fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt" try: return utils.load_table(fn_format.format(**locals())) except OSError: pass d = utils.filter_preferred_ml(utils.load_all()) d = d[~d["method"].isin(["imsrg[f]+eom[n]"])] results = [] with multiprocessing.Pool(4) as p: results = p.map( functools.partial(gather_fit_data_inner, fit_count=fit_count, maxfev=maxfev), tuple(d.groupby(["label", "interaction", "num_filled", "freq", "method"]))) d = pd.DataFrame.from_records(itertools.chain(*results)) print("{} fits failed, out of {}" .format((d["fit_method"] == "fixedab").sum(), len(d))) # fit_count=5: # maxfev=default: 198 fits failed, out of 2247 # maxfev=10k: 40 fits failed, out of 2248 # maxfev=100k: 0 fits failed cols = """ interaction label freq num_filled method best_chisq best_coefficient best_coefficient_err best_constant best_constant_err best_fixedab_constant_err best_exponent best_exponent_err best_fit_method best_fit_stop chisq coefficient coefficient_err constant constant_err fixedab_constant_err exponent exponent_err fit_method fit_stop rel_discrep rel_discrep_err rel_dist rel_dist_err """.split() assert len(d.columns) == len(cols) utils.save_table(fn_format.format(**locals()), d[cols]) return d
def plot_table_attention(path_dict, save_path): for i, act in enumerate(path_dict.keys()): row_labels, values_train, values_test = [], [], [] if act not in COMBINED_ACT: continue for path in path_dict[act]: # print(act) with open(f'{path}/results.json', 'r') as f: results = json.load(f) if results['combinator'] not in ATT_LIST or results[ 'combinator'] not in plot_it: continue if i == 0: col_labels = utils.fill_col_labels(results, att=1) temp_train, temp_test = utils.fill_row_values(results, path, act, att=1) values_train.append(temp_train) values_test.append(temp_test) if 'test_acc_hr_0.0' in results: temp_train, temp_test = utils.fill_row_values(results, path, act, att=1, hr=0.0) values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, act, 'train', att=1) table_test = utils.create_table(values_test, col_labels, act, 'test', att=1) # save table utils.save_table(table_train, table_test, save_path + 'ATT_', act)
def load_full_fit_data(fit_count=DEFAULT_FIT_COUNT, maxfev=DEFAULT_MAXFEV): '''Load fit data from file if available. Otherwise calculate the fits.''' fn = "fit_data.fit_count={fit_count}_maxfev={maxfev}.txt".format( **locals()) try: return utils.load_table(fn) except OSError: pass sys.stderr.write("Fit data has not yet been calculated. " "This may take a few minutes...\n") sys.stderr.flush() d = utils.filter_preferred_ml(utils.load_all()) d = d[~d["method"].isin(["imsrg[f]+eom[n]"])] with multiprocessing.Pool(4) as p: results_s, missing_num_shells = zip(*p.map( functools.partial( gather_fit_data, fit_count=fit_count, maxfev=maxfev), tuple( d.groupby([ "label", "interaction", "num_filled", "freq", "method" ])))) results = itertools.chain(*results_s) missing_fn = ("fits_missing_points." "fit_count={fit_count}_maxfev={maxfev}.log".format( **locals())) utils.save_table(missing_fn.format(**locals()), pd.DataFrame.from_records(missing_num_shells)) sys.stderr.write("Missing data points logged to: {}\n".format(missing_fn)) sys.stderr.flush() d = pd.DataFrame.from_records(results) num_failed = (d["fit_method"] == "fixedab").sum() if num_failed: sys.stderr.write("{} out of {} fits failed\n".format( num_failed, len(d))) sys.stderr.flush() utils.save_table(fn, d) return d
def plot_table_max(path_dict, save_path, limit): res_json = ['results.json', 'results_hr.json'] row_labels, values_train, values_test = [], [], [] for i, act in enumerate(path_dict.keys()): for path in path_dict[act]: for res in res_json: try: with open(f'{path}/{res}', 'r') as f: results = json.load(f) # att = 2 if res == 'results_hr.json' else 0 except Exception as e: continue if i == 0: col_labels = utils.fill_col_labels(results, max_=True, att=2) temp_train, temp_test = utils.fill_row_values(results, path, act, max_=True, att=2) if True not in np.where(temp_test[8] >= limit, True, False): continue values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, '', 'train', max_=True) table_test = utils.create_table(values_test, col_labels, '', 'test', max_=True) # save table utils.save_table(table_train, table_test, save_path, 'best')
def plot_table_attention(path_dict, save_path): res_json = ['results.json', 'results_hr.json'] for i, act in enumerate(path_dict.keys()): row_labels, values_train, values_test = [], [], [] if act not in COMBINED_ACT: continue for path in path_dict[act]: # print(act) for res in res_json: try: with open(f'{path}/{res}', 'r') as f: results = json.load(f) # att = 1 if res == 'results_hr.json' else 0 except Exception as e: continue if results['combinator'] not in ATT_LIST: continue if i == 0: col_labels = utils.fill_col_labels(results, att=1) temp_train, temp_test = utils.fill_row_values(results, path, act, att=1) values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, act, 'train', att=1) table_test = utils.create_table(values_test, col_labels, act, 'test', att=1) # save table utils.save_table(table_train, table_test, save_path + 'ATT_', act)
def plot_table(path_dict, save_path): pd.set_option('display.precision', 4) pd.set_option('display.width', 40) for i, act in enumerate(path_dict.keys()): row_labels, values_train, values_test = [], [], [] for path in path_dict[act]: with open(path + '/results.json', 'r') as f: results = json.load(f) if i == 0: col_labels = utils.fill_col_labels(results) temp_train, temp_test = utils.fill_row_values( results, path, act) values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, act, 'train') table_test = utils.create_table(values_test, col_labels, act, 'test') # save table utils.save_table(table_train, table_test, save_path, act)
def plot_table_max(path_dict, save_path, limit): row_labels, values_train, values_test = [], [], [] for i, act in enumerate(path_dict.keys()): for path in path_dict[act]: with open(f'{path}/results.json', 'r') as f: results = json.load(f) if (plot_it is not None) and (results['combinator'] not in plot_it): continue if i == 0: col_labels = utils.fill_col_labels(results, max_=True, att=2) temp_train, temp_test = utils.fill_row_values(results, path, act, max_=True, att=2) # print(temp_test[9]) if True not in np.where(temp_test[12] >= limit, True, False): continue values_train.append(temp_train) values_test.append(temp_test) # create table table_train = utils.create_table(values_train, col_labels, '', 'train', max_=True) table_test = utils.create_table(values_test, col_labels, '', 'test', max_=True) # save table utils.save_table(table_train, table_test, save_path, 'best')
#!/usr/bin/env python3 import os, re, sys sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..")) import utils fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt" d = utils.load_table(fn) d = utils.check_fun_dep( d, ["interaction", "num_shells", "num_filled", "freq", "method"], {"energy": 2e-5}, combiner=utils.rightmost_combiner) d = d.sort_values( ["interaction", "num_shells", "num_filled", "freq", "method", "energy"]) with open(fn, "w") as f: f.write(""" # Functional dependencies: # # * (interaction, num_shells, num_filled, freq, method) -> energy # """[1:]) utils.save_table(f, d)
#!/usr/bin/env python3 import os, re, sys sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..")) import utils fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt" d = utils.load_table(fn) # canonicalization can introduce duplicates, in addition to whatever # duplicates that already exist in the file d["p"] = d["p"].map(utils.canonicalize_p) d = utils.check_fun_dep(d, ["interaction", "num_shells", "num_filled", "freq", "method", "p", "term_id"], {"correction": 1e-7}, combiner=utils.rightmost_combiner) d = d.sort_values(["interaction", "num_shells", "num_filled", "freq", "method", "p", "term_id", "correction"]) with open(fn, "w") as f: f.write(""" # term_ids 3 and 4: QDPT2 # term_ids 5 to 22: QDPT3 # # Functional dependencies: # # * (num_shells, num_filled, freq, method, p, term_id) -> correction # """[1:]) utils.save_table(f, d)
def calculate(start, end, name): sentences, vnps, adjectives, items = select_data(start, end) # individual freq filename = 'items_%d_%s.csv' % (start.year, name) print filename items_freq = calculate_frequency(sentences, items, 'referred_items') save_table(items_freq, open(filename, 'w')) filename = 'vnps_%d_%s.csv' % (start.year, name) print filename vnps_freq = calculate_frequency(sentences, vnps, 'verb_noun_pair') save_table(vnps_freq, open(filename, 'w')) filename = 'adjectives_%d_%s.csv' % (start.year, name) print filename adjs_freq = calculate_frequency(sentences, adjectives, 'adjectives') save_table(adjs_freq, open(filename, 'w')) # user networks filename = 'vnp_user_net_%d_%s.csv' % (start.year, name) print filename vnp_links = count_user_links(sentences, vnps, 'verb_noun_pair') save_table(vnp_links, open(filename, 'w')) filename = 'adj_user_net_%d_%s.csv' % (start.year, name) print filename adj_links = count_user_links(sentences, adjectives, 'adjectives') save_table(adj_links, open(filename, 'w')) filename = 'itm_user_net_%d_%s.csv' % (start.year, name) print filename itm_links = count_user_links(sentences, items, 'referred_items') save_table(itm_links, open(filename, 'w')) # multi-frequencies filename = 'item_vs_vnp_%d_%s.csv' % (start.year, name) print filename item_vs_vnp = calculate_dual_frequency(sentences, items, 'referred_items', vnps, 'verb_noun_pair', diary=True) save_table(item_vs_vnp, open(filename, 'w')) filename = 'item_vs_adj_%d_%s.csv' % (start.year, name) print filename item_vs_adj = calculate_dual_frequency(sentences, items, 'referred_items', adjectives, 'adjectives', diary=True) save_table(item_vs_adj, open(filename, 'w')) filename = 'vnp_vs_vnp_%d_%s.csv' % (start.year, name) print filename vnp_vs_vnp = calculate_dual_frequency(sentences, vnps, 'verb_noun_pair', vnps, 'verb_noun_pair', diary=True) save_table(vnp_vs_vnp, open(filename, 'w')) filename = 'adj_vs_adj_%d_%s.csv' % (start.year, name) print filename adj_vs_adj = calculate_dual_frequency(sentences, adjectives, 'adjectives', adjectives, 'adjectives', diary=True) save_table(adj_vs_adj, open(filename, 'w')) filename = 'user_vs_item_%d_%s.csv' % (start.year, name) print filename user_vs_item = calculate_frequency_user(sentences, items, 'referred_items') save_table(user_vs_item, open(filename, 'w')) filename = 'user_vs_vnp_%d_%s.csv' % (start.year, name) print filename user_vs_vnp = calculate_frequency_user(sentences, vnps, 'verb_noun_pair') save_table(user_vs_vnp, open(filename, 'w')) return