def main(): cfg = tools.load_config("rep_graph_mc") db = tools.get_db(cfg["db"], 'r') for ns in range(1, 4): ups_key = "ups%ds" % ns axis = cfg[ups_key]['axis'] binning = tools.axis2bins(axis) hists_years = {} for data_key in ["mc2011", "mc2012"]: print data_key db_ups = db[data_key][ups_key] hists = [pyroot.h1_axis(axis), pyroot.h1_axis(axis), pyroot.h1_axis(axis)] for h in hists: h.blue() if data_key == "mc2011" else h.red() for i, bin in enumerate(binning, start=1): sigmas = [] for np in pdg.VALID_UPS_DECAYS[ns]: chib_key = 'chib1%dp' % np sigmas.append(db_ups[bin][chib_key]['sigma']) sigma = VE(str(sigmas[0])) hists[0][i] = sigma for j, s in enumerate(sigmas[1:], start=1): hists[j][i] = VE(str(s)) / sigma hists_years[data_key] = hists print "%s - %s" % (data_key, ups_key) shell()
def evaluate(label): # Run the evaluation # Load the configuration data config = tools.load_config(label) t = np.linspace(0, config['sim_time'], config['time_samples']) solution = odeint(double_pend, config['initial'], t, args=(config['g'], config['m1'], config['m2'], config['L1'], config['L2']) ) positions = position(solution, config['L1'], config['L2'], t) energies = energy(solution, config['g'], config['m1'], config['m2'], config['L1'], config['L2'], t) # Insert the time column into the solution array sol = np.insert(solution, 0, t, axis=1) # Store the data using the label as an indicator np.savetxt('data/double_solutions_%s.txt' % label, sol, delimiter='\t') np.savetxt('data/double_positions_%s.txt' % label, positions, delimiter='\t') np.savetxt('data/double_energies_%s.txt' % label, energies, delimiter='\t')
def main(): cli_args = docopt(__doc__, version="1.0") cfg = tools.load_config("rep_graph_fits") cfg_profile = cfg['profiles'][cli_args['--profile']] db = tools.get_db(cfg_profile['db']) binning = tools.axis2bins(cfg_profile['axis']) for plot in cfg_profile['plots']: key = plot['key'] graphs = [] for data_key in ['2011', '2012']: values = [] for bin in binning: ve = pyroot.VE(str(db[data_key][bin][key])) values.append((bin, ve)) graphs.append(graph.Graph(color=data_key, values=values)) ymin = plot.get("ymin", None) ymax = plot.get("ymax", None) mg = graph.MultiGraph(graphs=graphs, ymin=ymin, ymax=ymax) mg.draw() level = plot.get("level", None) if level: graphs[0].h.level(level) filename = "%s/%s" % (cfg_profile["output_dir"], key) tools.save_figure(filename) shell()
def main(): cfg = tools.load_config("rep_width_data_mc") axis = cfg["axis"] binning = tools.axis2bins(axis) db_mc = tools.get_db(cfg["mc"], 'r') db_data = tools.get_db(cfg["data"], 'r') for year in ["2011", "2012"]: h_mc = pyroot.h1_axis(axis) h_mc.red() h_data = pyroot.h1_axis(axis) h_data.blue() db_mc_year = db_mc["mc%s" % year]["ups1s"] db_data_year = db_data[year] for i, bin in enumerate(binning, start=1): h_mc[i] = VE(str(db_mc_year[bin]['chib11p']['sigma'])) h_data[i] = VE(str(db_data_year[bin]['sigma_b1_1p'])) h_ratio = h_data / h_mc h_ratio.Draw() h_ratio.Fit("pol0") tools.save_figure("%s/sigma_data_mc_%s" % (cfg["output_dir"], year)) shell()
def load_config(self, filename): """ Load a config file: - module config - user config """ config_file = os.path.join(self.module_folder, filename) return load_config(config_file)
def main(): cfg = tools.load_config("fig_mceff") data = cfg["data"] pic = FigureEff(title=cfg["title"], label=cfg["label"], size=cfg["size"], ncols=cfg["ncols"], data=data, scale=cfg["scale"]) print pic.texify()
def rand(): # Generate 100 random configuration files config_main = tools.load_config() for i in range(0, 100): # 100 limit arbitrary config = config_main config['initial'] = gen_initial() with open('config/config_rand%s.json' % i, 'w') as f: json.dump(config, f) f.close()
def pert(): # Generate 100 slightly perturbed configurations from some base. config_main = tools.load_config() for i in range(0, 100): # arbitrary limit config = config_main config['initial'][1] = config['initial'][1]+0.035 # Perturbation with open('config/config_pert%s.json' % i, 'w') as f: json.dump(config, f) f.close() config_main = config
def main(): try: config = load_config() init_logger('logs/cloudsweep.log') logger = logging.getLogger('clouddump') service = Factory().create( config['service']['driver'], config['service']) service.delete_old_files() logger.info("Program terminated successfully") except SystemExit: logger.info("Program terminated with errors")
def main(): cfg = tools.load_config("rep_syst_model") for ns in range(1, 4): cfg_decay = cfg.get("ups%ds" % ns, None) if not cfg_decay: continue bins = tools.axis2bins(cfg_decay["bins"].values()) valid_bins = {} for np in cfg_decay["bins"]: valid_bins[int(np)] = tools.axis2bins(cfg_decay["bins"][np]) tab = table.SqsTable( title=cfg["title"].format(ns=ns), label=cfg["label"].format(ns=ns), ns=ns, binning=bins, scale=cfg_decay["scale"], maxbins=cfg_decay["maxbins"] ) for np in pdg.VALID_UPS_DECAYS[ns]: tab.add_row(key=str(np), title=r"$N_{{\chi_b(%dP)}}$" % np) if np != pdg.VALID_UPS_DECAYS[ns][-1]: tab.space() db_ref = tools.get_db(cfg_decay["db_ref"]) dbs = [] for db_name in cfg_decay["nchib"]: dbs.append(tools.get_db(db_name, "r")) for bin in bins: for year in ["2011", "2012"]: bin_group = tab.get_group(bin, year) for np in pdg.VALID_UPS_DECAYS[ns]: if bin in valid_bins[np]: change = dbtools.get_chib_squared_error( db_ref, dbs, np, year, bin ) if change is None: value = None else: stat = dbtools.get_chib_yield_err( db_ref, np, year, bin ) value = ( r"${\pm %.1f \%% \stat}^{+%.1f \%%}_{-%.1f \%%} \syst$" % (stat * 100, change[0] * 100, change[1] * 100) ) else: value = None bin_group.add_value(key=str(np), value=value) print tab.texify()
def main(): canvas = ROOT.TCanvas("c_ups", "c_ups", 800, 600) cfg = tools.load_config("mc") cfg_tuples = tools.load_config("tuples") utree = ROOT.TChain("UpsilonAlg/Upsilon") utree.Add(cfg_tuples["mc2011"][0]) cut = cfg["decays"]["ups1s"]["ucut"] cut["pt_ups"] = [18, 22] cut_str = tools.cut_dict2str(cut) h = ROOT.TH1D("h_ups", "h_ups", 100, 9.2, 9.7) utree.Draw("m_dtf>>%s" % h.GetName(), cut_str) model = UpsMCModel( canvas=canvas, data=h, binning=[100, 9.2, 9.7], ) model.fitData() print(model) tools.save_figure("mc/ups1s/ups2011_dcb")
def copy_models(dest_foldername, df): check_if_folder_exists(dest_foldername) models_folder = join(TRAINING_DATA_PATH, 'models') for i in range(len(df)): entry = df.loc[i] model_id = entry['model_name'] source_path = join(models_folder, model_id) config = tools.load_config(join(source_path, 'model.pt')) dataset = config['source_dataset'].lower() embedding = config['embedding'] destination = join(dest_foldername, f'{dataset}_{embedding}_{model_id}') shutil.copytree(source_path, destination)
def main(): tools.error_led.off() config = tools.load_config(CONFIG_FILE) if args.debug: print(config) device_info = { "device_id": config["device_id"], "passkey_hash": config["passkey_hash"], "latitude": config["latitude"], "longitude": config["longitude"] } # Check if device is registered send_data = tools.send_data_to_server(REGISTER_DEVICE_URL, device_info) if args.debug: print(send_data) # Remove positional variables as they are no longer needed device_info.pop("latitude") device_info.pop("longitude") # Import sensor objects sensorList = tools.import_sensors(config) nextTime = int(get_new_time()) if args.debug: print(int(time.time()), nextTime) while True: while time.time() < nextTime: continue # Read and format sensor data data = read_from_all(sensorList) dataToSend = (device_info) dataToSend.update({'time': nextTime}) dataToSend.update(tools.format_dict(data)) # Send data to server print(dataToSend) send_data = tools.send_data_to_server(ADD_DATA_URL, dataToSend) if args.debug: print(send_data) # Calculate next time to retrieve information nextTime = int(get_new_time()) if args.debug: print(int(time.time()), nextTime)
def __init__(self, title, label, ns, binning, scale=1, resizebox=None, maxbins=None, is_cmidrule=True): super(PtTable, self).__init__(title=title, label=label, scale=scale, resizebox=resizebox) cfg = tools.load_config("pttable") self.ns = ns self.binning = binning self.maxbins = maxbins title = cfg["title"].format(ns=ns) if ns else cfg["titlemumu"] self.ups = self.add_subgroup(key="ups", title=title) for bin in binning: self.ups.add_subgroup(key=tuple(bin), title=cfg['range'].format(bin[0], bin[1]), is_cmidrule=is_cmidrule)
def main(): # log = Logger() cli_args = get_cli_args() if cli_args.complete: complete() exit(0) cfg = tools.load_config("fits")[cli_args.profile] if cli_args.year == 'all': years = ['2011', '2012'] elif cli_args.year == "full": years = ['all'] else: years = [cli_args.year] print "Years: "+str(years) args = [] sleep = 0 if "lambda" in cfg["profiles"]: cfg["profiles"].remove("lambda") cfg["profiles"] += ["lambda%d" % i for i in range(0, 11)] print "Profiles: "+str(cfg["profiles"] ) for profile in cfg["profiles"]: for year in years: for bin in tools.get_binning_per_year(cfg, year): print str(bin), ", ", args += ["--tab-with-profile", "Tomorrow"] args += ["--title", "{0}-{1} {2} {3}".format( bin[0], bin[1], year, profile )] args += ["-e", "./fit.sh" + " {0} {1} {2} {3} {4} {5}".format(sleep, cfg["decay"], year, bin[ 0], bin[1], profile ) ] sleep += 2 print "---" gnome_terminal(args)
def loss_landscape(eval_model_filepath, tokenizer_filepath, result_filepath, model_num, examples_dirpath, is_training): '''LOAD MODELS, EMBEDDING, AND TOKENIZER''' config = tools.load_config(eval_model_filepath) if config['embedding'] == 'MobileBERT': tools.USE_AMP = False clean_models_filepath = tools.get_clean_model_filepaths(config) clean_testing_model_filepath = tools.get_clean_model_filepaths(config, for_testing=True) models = tools.load_all_models(eval_model_filepath, clean_models_filepath, clean_testing_model_filepath) embedding_matrix = tools.get_embedding_matrix(models['eval_model']) clean_embedding_matrix = tools.get_average_clean_embedding_matrix(models['clean_models']) embedding_matrices = [embedding_matrix, clean_embedding_matrix] tools.TOKENIZER = tools.load_tokenizer(tokenizer_filepath, config) tools.MAX_INPUT_LENGTH = tools.get_max_input_length(config) class_list = tools.get_class_list(examples_dirpath) source_class = 1 target_class = 7 clean_class_list = [source_class, target_class] # trigger_token_ids = \ # tools.make_initial_trigger_tokens(is_random=False, initial_trigger_words="ok") trigger_token_ids = torch.tensor([0]).to(tools.DEVICE) trigger_length = len(trigger_token_ids) tools.update_logits_masks(class_list, clean_class_list, models['eval_model']) temp_examples_dirpath = join('/'.join(clean_models_filepath[0].split('/')[:-1]), 'clean_example_data') vars, trigger_mask, source_class_token_locations =\ playground.initialize_attack_for_source_class(temp_examples_dirpath, source_class, trigger_token_ids) best_k_ids = torch.tensor([list(tools.TOKENIZER.vocab.values())]).to(tools.DEVICE) loss_per_candidate = \ playground.get_loss_per_candidate(models, vars, source_class_token_locations, trigger_mask, trigger_token_ids, best_k_ids, len(trigger_token_ids)-1, source_class, target_class, clean_class_list, class_list, is_testing=True) np.save(f'/scratch/utrerf/TrojAI/NLP/round7/loss_landscape_model_{model_num}/candidates',\ np.array([i[0].detach().cpu().numpy() for i in loss_per_candidate])) np.save(f'/scratch/utrerf/TrojAI/NLP/round7/loss_landscape_model_{model_num}/losses',\ np.array([i[1] for i in loss_per_candidate])) np.save(f'/scratch/utrerf/TrojAI/NLP/round7/loss_landscape_model_{model_num}/embeddings',\ np.array([i[2] for i in loss_per_candidate]))
def main(): ''' Makes the database dump, compress it and uploads it to the chosen service ''' try: init_logger('logs/clouddump.log') logger = logging.getLogger('clouddump') config = load_config() service = Factory().create( config['service']['driver'], config['service']) date_time = strftime("%Y%m%d%H%M%S", gmtime()) file_name = TMP_DIR + date_time + '_' + config['database']['name'] database = Factory().create( config['database']['driver'], config['database']) dumped_file = database.dump(config['database']['name'], file_name) service.upload(dumped_file) os.remove(dumped_file) logger.info("%s removed from local system" % dumped_file) logger.info("Program terminated successfully") except SystemExit: logger.info("Program terminated with errors")
def main(): cfg_rep = tools.load_config("rep_mceff") db = tools.get_db(cfg_rep["db"], "r") for ns in range(1, 4): graph_values = { "mc2011": defaultdict(list), "mc2012": defaultdict(list), } for np in pdg.VALID_UPS_DECAYS[ns]: title = cfg_rep["title"].format( ns=ns, np=np ) label = cfg_rep["label"].format( ns=ns, np=np ) ups_key = "ups%ds" % ns axis = cfg_rep["axis"]["ups%ds" % ns] bins = tools.axis2bins(axis) maxbins = cfg_rep["maxbins"] tab = create_table(label=label, title=title, scale=cfg_rep["scale"], cfg_rows=cfg_rep["rows"], ns=ns, np=np, binning=bins, maxbins=maxbins) for bin in bins: for data_key in ["mc2011", "mc2012"]: mc = mctools.MC(db=db[data_key][ups_key], ns=ns, np=np) bin_group = tab.get_bin(bin) data_group = bin_group.add_subgroup( key=data_key, title=cfg_rep["data_titles"][data_key] ) for nb in range(1, 3): nchib, nups, eff = ( mc.nchib(bin, nb), mc.nups(bin, nb), mc.eff(bin, nb)) assert(nchib.value() != 0) assert(nups.value() != 0) assert(eff.value() != 0) data_group.add_value("n%d" % nb, nchib) data_group.add_value("nups%d" % nb, nups) data_group.add_value("eff%d" % nb, eff * 100, is_bold=True) eff = mc.eff(bin) * 100 data_group.add_value("eff", eff, is_bold=True) graph_values[data_key][np].append((bin, eff)) print tab.texify() for np in pdg.VALID_UPS_DECAYS[ns]: graphs = [] for data_key in ["mc2011", "mc2012"]: g = graph.Graph(color=data_key, values=graph_values[data_key][np], space=3) graphs.append(g) mg = graph.MultiGraph(graphs=graphs, ymin=0) mg.draw() output = "mc/eff/ups%d_%d" % (ns, np) tools.save_figure(output)
def main(): cli_args = docopt(__doc__, version='v1.0') def fit(niters=1): for iter in range(niters): model.fitData() print(model) if cfg['save?']: save(cfg['name'], model, cli_args["--year"], (int(cli_args["--ptbegin"]), int(cli_args["--ptend"]))) if model.status: log.info("OK") break else: log.err("BAD") log = Logger() # cli_args = get_cli_args() if cli_args["--complete"]: complete() exit(0) tuples_cfg = tools.load_config("tuples") if cli_args["--year"] != "all": tuples = [tuples_cfg[cli_args["--year"]]] else: # all tuples = [tuples_cfg[year] for year in ['2011', '2012']] log.info("Tuples: " + str(tuples)) cfg = tools.load_config(cli_args["--decay"]) cfg.update(cfg['profiles'].get(cli_args["--profile"], {})) del cfg["profiles"] tree = ROOT.TChain(cfg["tree"]) for file_name in tuples: tree.Add(file_name) fitter = get_fitter(cli_args["--decay"]) canvas = ROOT.TCanvas("c_fit", "{year} {start}-{end} {name}".format( year=cli_args["--year"], start=cli_args["--ptbegin"], end=cli_args["--ptend"], name=cfg["name"]) ) cut = cfg['cut'] cut["pt_ups"] = (int(cli_args["--ptbegin"]), int(cli_args["--ptend"])) log.info("Cut: %s" % tools.cut_dict2str(cfg['cut'])) # is_unbinned = ( # True if cfg["unbinned?"] and int( # cli_args["--ptbegin"]) >= 10 else False # ) is_unbinned = cfg["unbinned?"] if is_unbinned: data = source.dataset(tree=tree, cut=cut, field=cfg['field'], has_splot=cfg['splot?']) else: data = source.histogram(tree=tree, cut=cut, field=cfg['field'], nbins=cfg['nbins']) # mc = None log.info("Profile:" + pprint.pformat(cfg, indent=2)) model = fitter.prepare_model( canvas=canvas, data=data, year=cli_args['--year'], interval=cfg['cut'][cfg['field']], nbins=cfg['nbins'], name=cfg['name'], has_splot=cfg['splot?'], profile=cfg, pt_ups=cut["pt_ups"] ) fit() if not model.status: fit() if cli_args["--interactive"] or not model.status: shell()
def main(): cli_args = docopt(__doc__, version='v1.0') canvas = ROOT.TCanvas("c_yields", "c_yields", 800, 600) cfg = tools.load_config("rep_yields") report_key = cli_args["--profile"] report_cfg = cfg[report_key] db = tools.get_db(report_cfg["db"]) output_dir = report_cfg["output_dir"] plots_cfg = report_cfg["plots"] for plot_cfg in plots_cfg: yield_key = plot_cfg["key"] bins = tools.axis2bins(plot_cfg["axis"]) graphs = [] graphs_scaledbybin = [] graphs_scaledbylum = [] for data_key in ["2011", "2012"]: values = [] values_scaledbybinsize = [] values_scaledbylum = [] for bin in bins: if bin in db[data_key]: value = VE(str(db[data_key][bin][yield_key])) values.append((bin, value)) value_bin = value / (bin[1] - bin[0]) values_scaledbybinsize.append((bin, value_bin)) scale = 1 if data_key == "2011" else 2 values_scaledbylum.append((bin, value_bin / scale)) else: values.append((bin, None)) values_scaledbybinsize.append((bin, None)) values_scaledbylum.append((bin, None)) graphs.append( graph.Graph(color=data_key, values=values) ) graphs_scaledbybin.append( graph.Graph(color=data_key, values=values_scaledbybinsize) ) graphs_scaledbylum.append( graph.Graph(color=data_key, values=values_scaledbylum) ) ymin = plot_cfg["ymin"] ymax = plot_cfg["ymax"] if plot_cfg.get("logscale?", False): canvas.SetLogy(True) else: canvas.SetLogy(False) mg = graph.MultiGraph(graphs=graphs, ymin=ymin[0], ymax=ymax[0]) mg.draw() tools.save_figure("%s/%s" % (output_dir, yield_key)) mg = graph.MultiGraph(graphs=graphs_scaledbybin, ymin=ymin[1], ymax=ymax[1]) mg.draw() tools.save_figure("%s/%s_scaledbybin" % (output_dir, yield_key)) mg = graph.MultiGraph(graphs=graphs_scaledbylum, ymin=ymin[2], ymax=ymax[2]) mg.draw() tools.save_figure("%s/%s_scaledbylum" % (output_dir, yield_key)) canvas.SetLogy(False)
def main(): # cli_args = docopt(__doc__, version='v1.0') cfg = tools.load_config("rep_mcfits") keys_cfg = cfg["keys"] db = tools.get_db(cfg["db"]) fmt = {} for ns in range(1, 4): fmt["ns"] = ns bins = tools.axis2bins(cfg["binning"]["ups%ds" % ns]) for np in pdg.VALID_UPS_DECAYS[ns]: fmt["np"] = np title = cfg["title"].format(**fmt) label = cfg["label"].format(**fmt) tab = table.SqsTable( title=title, label=label, ns=ns, binning=bins, scale=cfg["scale"], maxbins=cfg["maxbins"] ) for row in cfg["rows"]: if row is None: tab.space() continue fmt["nb"] = row["nb"] chib = cfg["chib"].format(**fmt) tab.add_row( key=row["key"], title=get_title(cfg["keys"][row["map"]]).format(chib=chib) ) for bin in bins: for data_key in ["mc2011", "mc2012"]: # Add rows group = tab.get_group(bin=bin, sqs=data_key) for nb in range(1, 3): ups_key = "ups%ds" % ns for row in cfg["rows"]: if not row: continue chib_key = "chib%d%dp" % (row['nb'], np) db_fit = db[data_key][ups_key][ tuple(bin)][chib_key] value = db_fit.get(row['map'], None) scale = get_scale(cfg["keys"][row['map']]) rounds = get_round(cfg["keys"][row['map']]) if value and scale: value = VE(str(value)) * scale group.add_value( key=row['key'], value=value, round=rounds ) print tab.texify()
def main(): label = input('Enter config file label (e.g. core): ') config = tools.load_config(label) solutions = np.loadtxt('data/double_solutions_%s.txt' % label) displacements(solutions, label, config) frequencies(solutions, label, config)
from co2 import get_co2 from humidity import get_humidity def get_ip_and_token(tool, config): return config[tool]['ip'], config[tool]['token'] def run_command(params, command): cmd = f"""miplug --ip {params[0]} --token {params[1]} {command}""" print(cmd) subprocess.run(cmd.split(' ')) PROJECT_PATH = os.path.abspath(os.getcwd()) #PROJECT_PATH = '/users/test/Documents/proj_smart_home' CONFIG_PATH = 'configs' config = load_config(os.path.join(PROJECT_PATH, CONFIG_PATH, 'tokens.yaml')) for i in config.keys(): globals()[i] = get_ip_and_token(i, config) #print(floor_lamp, humidifier, recirculator, chandelier_1, chandelier_2) co2 = get_co2() print(co2) h, t = get_humidity() print("Temp={0:0.1f}*C Humidity={1:0.1f}%".format(t, h)) print(t, h) # run_command(humidifier, 'on') # time.sleep(7) # run_command(humidifier, 'off')
# -*- coding: utf-8 -*- import time from tools import load_config import ximalaya import music163 if __name__ == "__main__": albumlist = load_config('config.json') log = open("error.log", "a") for album in albumlist: try: if album['platform'] == 'ximalaya': podcast = ximalaya.Ximalaya(int(album['albumid'])) if album['platform'] == 'music163': podcast = music163.Music163(int(album['albumid'])) podcast.get_podcast() except Exception as e: print('异常:', e) curr_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) log.write("{}:爬取{}的{}失败:{}\n".format(curr_time, album['platform'], album['albumid'], str(e))) print("肉が全部焼きました どうぞ")
def main(): cfg = tools.load_config("rep_final") db_ups = tools.get_db(cfg["db_ups"], "r") db_mc = tools.get_db(cfg["db_mc"], "r") db_pol = tools.get_db(cfg["db_pol"], "r")["mcall"] db_out = tools.get_db(cfg["db_out"]) fmt_syst = r"${}^{+%.2f \%%}_{-%.2f \%%}$" fmt_syst_final = (r"%s\stat${}^{+%.1f}_{-%.1f}\syst" "^{+%.1f}_{-%.1f}\systpol \%%$") fmt_pm_ups = r"$\pm %.3f \%%$" fmt_pm_eff = r"$\pm %.2f \%%$" tabs_final = [] graph_values = {} max_syst_ns = {} for ns in range(2, 3): cfg_decay = cfg.get("ups%ds" % ns, None) if not cfg_decay: continue bins = tools.axis2bins(cfg_decay["bins"].values()) valid_bins = {} for np in cfg_decay["bins"]: valid_bins[int(np)] = tools.axis2bins(cfg_decay["bins"][np]) graph_values[ns] = {} tabs = [] TabFabric = partial( table.SqsTable, ns=ns, binning=bins, scale=cfg["scale"], maxbins=cfg["maxbins"] ) tab_model = TabFabric( title=cfg["title_model"].format(ns=ns), label=cfg["label_model"].format(ns=ns), ) tab_ups = TabFabric( title=cfg["title_ups"].format(ns=ns), label=cfg["label_ups"].format(ns=ns), ) tab_eff = TabFabric( title=cfg["title_eff"].format(ns=ns), label=cfg["label_eff"].format(ns=ns), ) tab_pol = TabFabric( title=cfg["title_pol"].format(ns=ns), label=cfg["label_pol"].format(ns=ns), ) tab_final = TabFabric( title=cfg["title_final"].format(ns=ns), label=cfg["label_final"].format(ns=ns), ) tab_final.maxbins = cfg["maxbins_final"] tab_final.scale = cfg["scale_final"] tabs.append(tab_model) tabs.append(tab_ups) tabs.append(tab_eff) tabs.append(tab_pol) tabs_final.append(tab_final) # TODO: add other tables for np in pdg.VALID_UPS_DECAYS[ns]: for tab in tabs + [tab_final]: tab.add_row(key=str(np), title=cfg["row_title"].format(ns=ns, np=np)) if np != pdg.VALID_UPS_DECAYS[ns][-1]: tab.space() db_ref = tools.get_db(cfg_decay["db_ref"]) dbs = [] for db_name in cfg_decay["nchib"]: db = tools.get_db(db_name, "r") # print db_name # print sorted(db['2011'].keys()) # print sorted(db['2012'].keys()) dbs.append(db) max_syst_np = {} for bin in bins: graph_values[ns][bin] = {} for year in ["2011", "2012"]: graph_values[ns][bin][year] = {} bin_group_model = tab_model.get_group(bin, year) bin_group_ups = tab_ups.get_group(bin, year) bin_group_eff = tab_eff.get_group(bin, year) bin_group_pol = tab_pol.get_group(bin, year) bin_group_final = tab_final.get_group(bin, year) for np in pdg.VALID_UPS_DECAYS[ns]: graph_values[ns][bin][year][np] = {} if bin in valid_bins[np]: max_model, min_model, max_pol, min_pol = ( max_syst_np.get(np, (0, 0, 0, 0)) ) change = dbtools.get_chib_squared_error( db_ref, dbs, np, year, bin ) frac_func = partial(dbtools.get_fraction, db_chib=db_ref, db_ups=db_ups, db_mc=db_mc, year=year, bin=bin, ns=ns, np=np) frac = frac_func() value = frac * 100 # frac_plus = frac_func(scalecb=(1 + change[0])) # frac_minus = frac_func(scalecb=(1 - change[1])) # assert frac and frac_plus and frac_minus, "!!!" # syst_model = ( # 100 * (frac_plus.value() - frac.value()), # 100 * (frac.value() - frac_minus.value()) # ) syst_model = ( change[0], change[1] ) # change_model = ( # 100 * (frac_plus / frac - 1).value(), # 100 * (1 - frac_minus / frac).value(), # ) max_model, min_model = ( max(max_model, change[0]), max(min_model, change[1]) ) ups_syst = cfg["ups_syst"] # frac_plus = frac_func(scaleups=(1 - ups_syst)) # frac_minus = frac_func(scaleups=(1 + ups_syst)) syst_ups = ( ups_syst, ups_syst ) # change_ups = ( # 100 * (frac_plus / frac - 1).value(), # 100 * (1 - frac_minus / frac).value(), # ) eff_syst = cfg["eff_syst"] # frac_plus = frac_func(scaleeff=(1 - eff_syst)) # frac_minus = frac_func(scaleeff=(1 + eff_syst)) syst_eff = ( eff_syst, eff_syst ) # change_eff = ( # 100 * (frac_plus / frac - 1).value(), # 100 * (1 - frac_minus / frac).value(), # ) change = dbtools.get_polarization_change( db_pol, ns, np, bin ) # frac_plus = frac_func(scaleeff=(1 - change[1])) # frac_minus = frac_func(scaleeff=(1 + change[0])) syst_pol = ( change[1], change[0] ) # change_pol = ( # 100 * (frac_plus / frac - 1).value(), # 100 * (1 - frac_minus / frac).value(), # ) max_pol, min_pol = ( max(max_pol, change[1]), max(min_pol, change[0]) ) max_syst_np[np] = ( max_model, min_model, max_pol, min_pol) final_syst = [] for i in range(2): final_syst.append( math.sqrt( syst_model[i] ** 2 + syst_ups[i] ** 2 + syst_eff[i] ** 2 ) ) final_graph = [] for i in range(2): final_graph.append( math.sqrt( (value.value() * final_syst[i]) ** 2 + (value.value() * syst_pol[i]) ** 2 + value.error() ** 2 ) ) graph_values[ns][bin][year][np] = ( value, final_graph[0], final_graph[1] ) # if ns == 3: # shell() # exit(1) else: value = None if value: # TODO: collect square roots # bin_group_model.add_value( # key=str(np), # value=fmt_syst % change_model) # bin_group_ups.add_value( # key=str(np), # value=fmt_pm_ups % change_ups[0]) # bin_group_eff.add_value( # key=str(np), # value=fmt_pm_eff % change_eff[0]) # bin_group_pol.add_value( # key=str(np), # value=fmt_syst % syst_pol) final_value = (fmt_syst_final % (tools.latex_ve(value), value * final_syst[0], value * final_syst[1], value * syst_pol[0], value * syst_pol[1] )) bin_group_final.add_value( key=str(np), value=final_value) else: bin_group_model.add_value(key=str(np), value=None) bin_group_ups.add_value(key=str(np), value=None) bin_group_eff.add_value(key=str(np), value=None) bin_group_pol.add_value(key=str(np), value=None) bin_group_final.add_value(key=str(np), value=None) max_syst_ns[ns] = max_syst_np # for tab in tabs: # if tab not in [tab_ups, tab_eff, tab_] # print tab.texify() print "%% Final tables ============ " for tab in tabs_final: print tab.texify() print_summary(max_syst_ns, cfg["decay_tmpl"]) draw_graphs(cfg, graph_values) save_values(cfg, db_out, graph_values)
def trojan_detector(eval_model_filepath, tokenizer_filepath, result_filepath, scratch_dirpath, examples_dirpath, is_training): ''' 1. LOAD MODELS, EMBEDDINGS AND TOKENIZER''' config = tools.load_config(eval_model_filepath) if config['embedding'] == 'MobileBERT': tools.USE_AMP = False clean_models_filepath = tools.get_clean_model_filepaths(config) clean_testing_model_filepath = tools.get_clean_model_filepaths( config, for_testing=True) models = tools.load_all_models(eval_model_filepath, clean_models_filepath, clean_testing_model_filepath) embedding_matrix = tools.get_embedding_matrix(models['eval_model']) clean_embedding_matrix = tools.get_average_clean_embedding_matrix( models['clean_models']) embedding_matrices = [embedding_matrix, clean_embedding_matrix] tools.TOKENIZER = tools.load_tokenizer(tokenizer_filepath, config) tools.MAX_INPUT_LENGTH = tools.get_max_input_length(config) ''' 2. INITIALIZE ATTACK FOR A SOURCE CLASS AND TRIGGER LENGTH ''' # initial_trigger_token_ids = tools.make_initial_trigger_tokens(is_random=False, initial_trigger_words="ok "*7) initial_trigger_token_ids = torch.tensor([0, 0, 0, 0, 0]).to(tools.DEVICE) # initial_trigger_token_ids = torch.tensor([11920]).to(tools.DEVICE) trigger_length = len(initial_trigger_token_ids) ''' 3. ITERATIVELY ATTACK THE MODEL CONSIDERING NUM CANDIDATES PER TOKEN ''' df = pd.DataFrame(columns=[ 'source_class', 'target_class', 'top_candidate', 'decoded_top_candidate', 'trigger_asr', 'clean_asr', 'loss', 'testing_loss', 'clean_accuracy', 'decoded_initial_candidate' ]) class_list = tools.get_class_list(examples_dirpath) # TODO: Remove this # class_list = [7, 1] TRIGGER_ASR_THRESHOLD, TRIGGER_LOSS_THRESHOLD = 0.95, 0.001 for source_class, target_class in tqdm( list(itertools.product(class_list, class_list))): if source_class == target_class: continue temp_class_list = tools.get_class_list(examples_dirpath) temp_class_list_clean = [source_class, target_class] tools.update_logits_masks(temp_class_list, temp_class_list_clean, models['eval_model']) # TODO: Clean this and make it more elegant temp_examples_dirpath = join( '/'.join(clean_models_filepath[0].split('/')[:-1]), 'clean_example_data') # temp_examples_dirpath = examples_dirpath update_clean_logits(models['clean_models'], temp_examples_dirpath, source_class, initial_trigger_token_ids=torch.tensor([])) vars, trigger_mask, masked_source_class_token_locations =\ initialize_attack_for_source_class(temp_examples_dirpath, source_class, initial_trigger_token_ids) trigger_token_ids, loss, initial_eval_logits, _ = \ get_trigger(models, vars, masked_source_class_token_locations, temp_class_list_clean, temp_class_list, source_class, target_class, initial_trigger_token_ids, trigger_mask, trigger_length, embedding_matrices) ''' Evaluate the trigger and save results to df''' trigger_asr = tools.get_trigger_asr( masked_source_class_token_locations, initial_eval_logits, target_class) update_clean_logits(models['clean_testing_models'], temp_examples_dirpath, source_class, initial_trigger_token_ids=torch.tensor([])) vars, trigger_mask, masked_source_class_token_locations =\ initialize_attack_for_source_class(temp_examples_dirpath, source_class, initial_trigger_token_ids) clean_asr_list, clean_accuracy_list, testing_loss = \ tools.get_clean_asr_and_accuracy(vars, trigger_mask, trigger_token_ids, temp_examples_dirpath, initial_trigger_token_ids, models, source_class, target_class, temp_class_list_clean, temp_class_list, masked_source_class_token_locations) decoded_top_candidate = tools.decode_tensor_of_token_ids( trigger_token_ids) decoded_initial_candidate = tools.decode_tensor_of_token_ids( initial_trigger_token_ids) df.loc[len(df)] = [source_class, target_class, trigger_token_ids.detach().cpu().numpy(), \ decoded_top_candidate, trigger_asr, np.array(clean_asr_list).mean(), \ loss[0].detach().cpu().numpy(), testing_loss[0].detach().cpu().numpy(), \ np.array(clean_accuracy_list).mean(), decoded_initial_candidate] if trigger_asr > TRIGGER_ASR_THRESHOLD and testing_loss[ 0] < TRIGGER_LOSS_THRESHOLD: break parent_dir = '/scratch/utrerf/TrojAI/NLP/round7/results/' subdir = f'lambda_{tools.LAMBDA}_num_candidates_{tools.NUM_CANDIDATES}_'+\ f'beam_size_{tools.BEAM_SIZE}_trigger_length_{trigger_length}/' tools.check_if_folder_exists(parent_dir) tools.check_if_folder_exists(join(parent_dir, subdir)) filename = f'{args.model_num}.csv' if is_training: df.to_csv(join(parent_dir, subdir, filename)) else: df = df.sort_values('testing_loss').reset_index(drop=True) X = df.loc[0, ['testing_loss', 'clean_asr', 'trigger_asr']] clf = load('NLP/round7/classifier.joblib') pred = clf.predict_proba(X.to_numpy().reshape(1, -1)) with open(result_filepath, 'w') as f: f.write("{}".format(pred))
def main(): cli_args = docopt(__doc__, version="1.0") cfg = tools.load_config("rep_syst") for ns in range(2, 3): cfg_decay = cfg["ups%ds" % ns] db_ref = tools.get_db(cfg_decay["db"]) bins = tools.axis2bins(cfg_decay["axis"].values()) valid_bins = {} for np in cfg_decay["axis"]: valid_bins[int(np)] = tools.axis2bins(cfg_decay["axis"][np]) # Check presence # ==================================================================== check_dbs = [] for cfg_row in cfg_decay["tables"]: for row in cfg_row["rows"]: check_dbs.append(row["db"]) check_bins = set() for np in cfg_decay["axis"]: check_bins.update(tools.axis2bins(cfg_decay["axis"][np])) ok = True for data_key in ["2011", "2012"]: for dbpath in check_dbs: db = tools.get_db(dbpath, "r") for bin in sorted(check_bins): db_year = db[data_key] if bin not in db_year: ok = False print("DB %s: No bin %s for Y(%dS) decays in %s" % ( dbpath, str(bin), ns, data_key) ) db.close() if not ok: continue # ==================================================================== for cfg_table in cfg_decay["tables"]: scale = (cfg_table['scale'] if "scale" in cfg_table else cfg_decay['scale']) tab = table.SystTable( title=cfg_table["title"], label=cfg_table["label"], binning=bins, ns=ns, nchib=pdg.VALID_UPS_DECAYS[ns], maxbins=cfg_decay["maxbins"], scale=scale ) for cfg_row in cfg_table['rows']: tab.add_row(key=cfg_row["key"], title=cfg_row["title"]) if cfg_row.get("space", False): tab.space() db = tools.get_db(cfg_row["db"], "r") for bin in bins: for data_key in ["2011", "2012"]: db_bin = db[data_key][bin] db_bin_ref = db_ref[data_key][bin] sqs = 7 if data_key == "2011" else 8 for np in pdg.VALID_UPS_DECAYS[ns]: np_key = "N%dP" % np value = db_bin.get(np_key, None) value_ref = db_bin_ref.get(np_key, None) if not (bin in valid_bins[np] and value and value_ref): value_change = None else: value_change = ( 1 - VE(str(value)) / VE(str(value_ref)) ) * 100 value_change = value_change.value() group = tab.get_group(bin=bin, sqs=sqs, np=np) group.add_value( key=cfg_row["key"], value=value_change, round=1) # Fill Table print(tab.texify())
def main(): cli_args = docopt(__doc__, version='v1.0') cfg = tools.load_config("rep_frac") profiles = [] if cli_args["--profile"]: profiles.append(cli_args["--profile"]) else: for profile in cfg: profiles.append(profile) for profile_name in sorted(profiles): profile_cfg = cfg[profile_name] ns = profile_cfg["ns"] db = tools.get_db(profile_cfg["db"], "r") db_y = tools.get_db(profile_cfg["db_y"], "r") db_mc = tools.get_db(profile_cfg["mc"], "r") bins, year_bins = get_axises(profile_cfg["axises"]) tab = table.SqsTable( title=profile_cfg["title"], label=profile_cfg["label"], ns=ns, binning=bins, scale=profile_cfg["scale"], maxbins=profile_cfg["maxbins"] ) for np in profile_cfg["nps"]: tab.add_row( key=str(np), title=r"$N_{\chi_b(%dP)}$" % np ) tab.space() tab.add_row(key="y", title=r"$N_{\Upsilon(%dS)}$" % ns) tab.space() for np in profile_cfg["nps"]: title = (r"$\eps_{\chi_b(%dP) \to \Upsilon(%dS)" % (np, ns) + r" \gamma}^{\gamma}$, \%") tab.add_row( key="e%d" % np, title=title ) tab.line() for np in profile_cfg["nps"]: tab.add_row( key="f%d" % np, title=r"Fraction $\chi_b(%dP)$, \%%" % np ) values = { "2011": defaultdict(list), "2012": defaultdict(list), } for bin in bins: for data_key in ["2011", "2012"]: bin_group = tab.get_group(bin, data_key) db_bin = db[data_key].get(bin, False) db_y_bin = db_y[data_key][bin] ups_key = "N%dS" % ns nups = db_y_bin.get(ups_key, None) bin_group.add_value(key="y", value=nups) for np in profile_cfg["nps"]: mct = mctools.MC( db=db_mc["mc%s" % data_key]["ups%ds" % ns], ns=profile_cfg["ns"], np=np ) key = "N%sP" % np nchib = db_bin.get(key, None) if db_bin else None bin_group.add_value(key=str(np), value=nchib) eff = mct.eff(bin) bin_group.add_value(key="e%d" % np, value=eff * 100) # if nups and nchib and eff and (bin in axis_bins[np]): if nups and nchib and eff: frac = (VE(str(nchib)) / VE(str(eff)) / VE(str(nups))) * 100 if bin in year_bins[np][data_key]: values[data_key][np].append((bin, frac)) else: frac = None bin_group.add_value(key="f%d" % np, value=frac, is_bold=True) print(tab.texify()) graphs_cfg = profile_cfg["graphs"] for np in profile_cfg["nps"]: graphs = [] for data_key in ["2011", "2012"]: graph_values = values[data_key][np] if graph_values[0][0] != 5: graph_values.insert(0, ((5, graph_values[0][0]), None)) g = graph.Graph(color=data_key, values=values[data_key][np]) graphs.append(g) ymax = graphs_cfg[str(np)]["ymax"] if ns == 1 and np == 1: g = graph.Graph(color="2010", values=profile_cfg["2010"]) graphs.append(g) mg = graph.MultiGraph(graphs=graphs, ymin=0, ymax=ymax) mg.draw() output_file = "fracs/%s_%d" % (profile_name, np) tools.save_figure(output_file)
# -*- coding:utf-8 -*- import network import json import sys if __name__ == '__main__': # 添加ezlab包路径 add module ezlab to path sys.path.append('hotload') from tools import load_config # 是否开启热加载 hotloader mode or not if load_config(): from hotload_server import hotloader hotloader().Start() else: # your own code goes here pass
from preprocess_data import load_data from tools import load_config, save_params ProcessedData = './processed_data/preprocess.p' ProcessedParams = './processed_data/params.p' ProcessedDataDir = './processed_data' try: title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load( open(ProcessedDataDir + os.sep + 'preprocess.p', mode='rb')) except OSError: title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data() embed_dim, uid_max, gender_max, age_max, job_max, movie_id_max, movie_categories_max, \ movie_title_max, combiner, sentences_size, window_sizes, filter_num = load_config() movieid2idx = {val[0]: i for i, val in enumerate(movies.values)} ''' Hyper parameters ''' # Number of Epochs num_epochs = 1 # Batch Size batch_size = 256 dropout_keep = 0.5 # Learning Rate
# end setupDB if __name__ == "__main__": # run standalone, init SQL db, drop all tables and create all import os, sys py = sys.version_info py3k = py >= (3, 0, 0) try: web_server_folder = os.path.join(os.getcwd(), '..', '..') sys.path.insert(0, web_server_folder) from tools import load_config # import from web server folder config_file = os.path.join(os.getcwd(), 'config.json') config = load_config(config_file) DSN = config.get('DSN') # for sqlite chdir to server folder # if sqlite database is stored in server folder os.chdir("../..") msg = "Init SQL DB (drop and create all tables)? [Y/N]" if py3k: ans = input(msg) else: ans = raw_input(msg) if ans.upper().startswith('Y'): setupDB(DSN, True) else: print("SQL Init aborted.\nNothing changed.") except Exception as ex: print(ex)
# example of mask inference with a pre-trained model (COCO) import sys from keras.preprocessing.image import img_to_array from keras.preprocessing.image import load_img from mrcnn.config import Config from mrcnn.model import MaskRCNN from mrcnn.visualize import display_instances from tools import load_config # load config params - labels cfg_dict = load_config('config.yaml') class_names = cfg_dict['class_names'] # config settings for model inference class ConfigParams(Config): NAME = "test" GPU_COUNT = 1 IMAGES_PER_GPU = 1 NUM_CLASSES = 1 + 80 # replicate the model for pure inference rcnn_model = MaskRCNN(mode='inference', model_dir='models/', config=ConfigParams()) # model weights input <<<<<<< HEAD rcnn_model.load_weights('models/mask_rcnn_coco.h5', by_name=True) ======= >>>>>>> 2ffc4581f4632ec494d19a7af0f5912e7482a631 path_weights_file = 'models/mask_rcnn_coco.h5' rcnn_model.load_weights(path_weights_file, by_name=True)
def main(): cli_args = docopt(__doc__, version="1.0") args = dict(cli_args) log = Logger() models = [ chib1s_mc_model.ChibMCModel, chib2s_mc_model.ChibMCModel, chib3s_mc_model.ChibMCModel, ] tuples_cfg = tools.load_config("tuples") mc_cfg = tools.load_config("mc") mcfits_cfg = tools.load_config("mcfits") # binning tree = ROOT.TChain("ChibAlg/Chib") utree = ROOT.TChain("UpsilonAlg/Upsilon") for filename in tuples_cfg[args["--data"]]: tree.Add(filename) utree.Add(filename) # TODO: create arrays with respect to ns if not args["--ns"]: ns_arr = [1, 2, 3] else: ns_arr = [int(args["--ns"])] if not args["--np"]: np_arr = [1, 2, 3] else: np_arr = [int(args["--np"])] if not args["--nb"]: nb_arr = [1, 2] else: nb_arr = [int(args["--nb"])] for ns in ns_arr: decay_cfg = mc_cfg["decays"]["ups%ds" % ns] for np in np_arr: for nb in nb_arr: if (ns == 2 and np == 1) or (ns == 3 and np < 3): continue axis = mcfits_cfg["axis"]["ups%ds" % ns] process( output_db=mcfits_cfg["output_db"], output_figs=mcfits_cfg["output_figs"], data_key=args["--data"], tree=tree, models=models, ns=ns, np=np, nb=nb, cut=decay_cfg["cut"], pt_axis=axis, is_unbinned=mc_cfg["unbinned?"], binning=decay_cfg["binning"]["%d" % np], is_save=args["-s"] ) if args["-u"]: print("Count upsilons") count_upsilons( name=mc_cfg["name"], data_key=args["--data"], tree=utree, ns=ns, nb=nb, np=np, pt_axis=axis, cut=decay_cfg["ucut"], is_save=args["-s"]) if args['-i']: db = tools.get_db(mcfits_cfg["output_db"]) print db.keys() shell()
""" Reverse proxy for Next Bus server using flask """ import flask from flask import Response, g, request import reverseProxy import tools import time import json from flask_caching import Cache from redis import ConnectionError from redis import Redis import logging config = tools.load_config("config.ini") ##Setup Logging #logging.basicConfig( filename='logs/nextbusApp.log',level=logging.DEBUG ) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger('reverseProxyApp') """ Initializing the flask app with redis caching """ APP = flask.Flask(__name__) redis_host = config['proxy_config']['redis_host'] redis_port = config['proxy_config']['redis_port'] cache = Cache(APP, config={ 'CACHE_TYPE': 'redis', 'CACHE_REDIS_HOST': redis_host, 'CACHE_REDIS_PORT': redis_port, 'CACHE_DEFAULT_TIMEOUT': config['proxy_config']['cache_timeout'] })
from control import DroneControl import socketio import threading import config import eventlet.wsgi import tools from data_parser import ParseAndWork import time if __name__ == '__main__': sio = socketio.Server() reset_config_data = tools.make_config(False) tools.load_config() worker = DroneControl(config.DRONE_LOCAL, sio) print "Worker initialised" @sio.on('connect') def connect(sid, environ): sio.emit("vehicle_success", worker.success) @sio.on("vehicle_connect") def vehicle_connect(sid): if not worker.success: worker.connect(config.DRONE_LOCAL) tools.config_settable_init(worker) else: print "Already connected" sio.emit('response', {'data': "Already connected"})
def trojan_detector(args): """ Overview: This detector uses a gradient-based trigger inversion approach with these steps - calculate the trigger inversion loss and the gradient w.r.t. trigger embeddings - get the top-k most promising candidates with the gradient from the previous step - calculate the trigger inversion loss of each of the top-k candidates - pick the best candidate, which gives us the lowest trigger inversion loss, as the new trigger - repeat until convergence At the end of this procedure we use the trigger inversion loss as the only predictive feature in a logistic regression model. If our trigger inversion approach was successful, we get a very low trigger inversion loss (close to zero) Input: In order to perform trigger inversion we need at least one clean model with the same architecture and dataset as the evaluation model, as well as clean examples (i.e. without the trigger). Output: This function's output depends on wether this is meant to be inside of a submission container or not If it's a submission, we output the probability that the evaluation model is trojaned Otherwise, we output the trigger inversion loss, which we then use to train our classifier """ # print args for arg in vars(args): print(f'{arg}: {getattr(args, arg)}') # load config if args.is_manual_config: config = { 'source_dataset': args.source_dataset, 'model_architecture': args.model_architecture } else: config = tools.load_config(args.eval_model_filepath) # load all models and get the clean_model_filepaths, which is used to get additional clean examples models, clean_model_filepaths = tools.load_models( config, args, CLEAN_TRAIN_MODELS_FILEPATH, CLEAN_TEST_MODELS_FILEPATH, DEVICE) # add hooks to pull the gradients out from all models when doing backward in the compute_loss function def add_hooks_to_all_models(models): def add_hooks_to_single_model(model, is_clean): def find_word_embedding_module(classification_model): word_embedding_tuple = [ (name, module) for name, module in classification_model.named_modules() if 'embeddings.word_embeddings' in name ] assert len(word_embedding_tuple) == 1 return word_embedding_tuple[0][1] module = find_word_embedding_module(model) module.weight.requires_grad = True if is_clean: def extract_clean_grad_hook(module, grad_in, grad_out): EXTRACTED_GRADS['clean_train'].append(grad_out[0]) module.register_backward_hook(extract_clean_grad_hook) else: def extract_grad_hook(module, grad_in, grad_out): EXTRACTED_GRADS['eval'].append(grad_out[0]) module.register_backward_hook(extract_grad_hook) add_hooks_to_single_model(models['eval'][0], is_clean=False) for clean_model in models['clean_train']: add_hooks_to_single_model(clean_model, is_clean=True) add_hooks_to_all_models(models) # get the word_embeddings for all the models input_id_embeddings = tools.get_all_input_id_embeddings(models) tokenizer = tools.load_tokenizer(args.is_submission, args.tokenizer_filepath, config) # get the candidate pool of tokens to start from at random initialization total_cand_pool = tools.get_most_changed_embeddings( input_id_embeddings, tokenizer, DEVICE) def only_keep_avg_input_id_embeddings(input_id_embeddings): return { model_type: { 'avg': input_id_embeddings[model_type]['avg'] } for model_type in list(input_id_embeddings.keys()) } input_id_embeddings = only_keep_avg_input_id_embeddings( input_id_embeddings) # load and transform qa dataset dataset = tools.load_qa_dataset(args.examples_dirpath, args.scratch_dirpath, clean_model_filepaths, more_clean_data=args.more_clean_data) print(f'dataset length: {len(dataset)}') tokenized_dataset = tools.tokenize_for_qa(tokenizer, dataset) tokenized_dataset = tools.select_examples_with_an_answer_in_context( tokenized_dataset, tokenizer) tokenized_dataset = tools.select_unique_inputs(tokenized_dataset) df = pd.DataFrame() # these are the behavior|insterions specific to QA for behavior, insertion in [('self', 'both'), ('cls', 'both'), ('self', 'context'), ('cls', 'context'), ('cls', 'question')]: best_test_loss = None start_time = time.time() args.trigger_behavior, args.trigger_insertion_type = behavior, insertion # add a dummy trigger into input_ids, attention_mask, and token_type as well as provide masks for loss calculations triggered_dataset = tools.get_triggered_dataset( args, DEVICE, tokenizer, tokenized_dataset) # insert trigger and populate baselines def insert_trigger_and_populate_baselines(): tools.insert_new_trigger( args, triggered_dataset, torch.tensor([tokenizer.pad_token_id] * args.trigger_length, device=DEVICE).long()) # zero out attention on trigger tools.insert_new_trigger(args, triggered_dataset, torch.zeros(args.trigger_length, device=DEVICE).long(), where_to_insert='attention_mask') # train loss to get train baseline tools.compute_loss(args, models, triggered_dataset, args.batch_size, DEVICE, LAMBDA, with_gradient=False, populate_baselines=True) # test loss to get train baseline models['clean_test'] = [ model.to(DEVICE, non_blocking=True) for model in models['clean_test'] ] tools.compute_loss(args, models, triggered_dataset, args.batch_size, DEVICE, LAMBDA, with_gradient=False, train_or_test='test', populate_baselines=True) models['clean_test'] = [ model.to(CPU, non_blocking=True) for model in models['clean_test'] ] # add back attention tools.insert_new_trigger(args, triggered_dataset, torch.ones(args.trigger_length, device=DEVICE).long(), where_to_insert='attention_mask') insert_trigger_and_populate_baselines() triggered_dataset = tools.take_best_k_inputs(triggered_dataset) def put_embeds_on_device(device=DEVICE): input_id_embeddings['eval']['avg'] = input_id_embeddings['eval'][ 'avg'].to(device, non_blocking=True) input_id_embeddings['clean_train']['avg'] = input_id_embeddings[ 'clean_train']['avg'].to(device, non_blocking=True) put_embeds_on_device() for i in range(args.num_random_tries): # get and insert new trigger num_non_random_tries = args.num_random_tries // 2 if 'electra' in config[ 'model_architecture'] else 2 * (args.num_random_tries // 3) init_fn = args.trigger_init_fn if i < num_non_random_tries else 'random' new_trigger = tools.initialize_trigger(args, init_fn, total_cand_pool, tokenizer, DEVICE) tools.insert_new_trigger(args, triggered_dataset, new_trigger) old_trigger, n_iter = torch.tensor([ randint(0, 20000) for _ in range(args.trigger_length) ]).to(DEVICE), 0 with autocast(): # main trigger inversion loop for a given random start while not torch.equal(old_trigger, new_trigger) and n_iter < args.max_iter: n_iter += 1 old_trigger, old_loss = deepcopy( new_trigger), tools.compute_loss(args, models, triggered_dataset, args.batch_size, DEVICE, LAMBDA, with_gradient=True) @torch.no_grad() def find_best_k_candidates_for_each_trigger_token( num_candidates, tokenizer): ''' equation 2: (embedding_matrix - trigger embedding)T @ trigger_grad ''' embeds_shape = [ len(triggered_dataset['input_ids']), -1, input_id_embeddings['eval']['avg'].shape[-1] ] def get_mean_trigger_grads(eval_or_clean): concat_grads = torch.cat( EXTRACTED_GRADS[eval_or_clean]) grads_list = [] if args.trigger_insertion_type in [ 'context', 'both' ]: mean_context_grads_over_inputs = concat_grads[ triggered_dataset['c_trigger_mask']].view( embeds_shape).mean(dim=0) grads_list.append( mean_context_grads_over_inputs) if args.trigger_insertion_type in [ 'question', 'both' ]: mean_question_grads_over_inputs = concat_grads[ triggered_dataset['q_trigger_mask']].view( embeds_shape).mean(dim=0) grads_list.append( mean_question_grads_over_inputs) return torch.stack(grads_list).mean(dim=0) eval_mean_trigger_grads = get_mean_trigger_grads( 'eval') clean_train_mean_trigger_grads = get_mean_trigger_grads( 'clean_train') eval_grad_dot_embed_matrix = torch.einsum( "ij,kj->ik", (eval_mean_trigger_grads, input_id_embeddings['eval']['avg'])) eval_grad_dot_embed_matrix[ eval_grad_dot_embed_matrix != eval_grad_dot_embed_matrix] = 1e2 clean_grad_dot_embed_matrix = torch.einsum( "ij,kj->ik", (clean_train_mean_trigger_grads, input_id_embeddings['clean_train']['avg'])) clean_grad_dot_embed_matrix[ clean_grad_dot_embed_matrix != clean_grad_dot_embed_matrix] = 1e2 # fill nans eval_grad_dot_embed_matrix[ eval_grad_dot_embed_matrix != eval_grad_dot_embed_matrix] = 1e2 clean_grad_dot_embed_matrix[ clean_grad_dot_embed_matrix != clean_grad_dot_embed_matrix] = 1e2 # weigh clean_train and eval dot products and get the smallest ones for each position gradient_dot_embedding_matrix = eval_grad_dot_embed_matrix + LAMBDA * clean_grad_dot_embed_matrix BANNED_TOKEN_IDS = [ tokenizer.pad_token_id, tokenizer.cls_token_id, tokenizer.unk_token_id, tokenizer.sep_token_id, tokenizer.mask_token_id ] for token_id in BANNED_TOKEN_IDS: gradient_dot_embedding_matrix[:, token_id] = 1e2 _, best_k_ids = torch.topk( -gradient_dot_embedding_matrix, num_candidates, dim=1) return best_k_ids candidates = find_best_k_candidates_for_each_trigger_token( args.num_candidates, tokenizer) candidates = tools.add_candidate_variations_to_candidates( old_trigger, args, candidates) def clear_all_model_grads(models): for model_type, model_list in models.items(): for model in model_list: optimizer = optim.Adam(model.parameters()) optimizer.zero_grad(set_to_none=True) for model_type in EXTRACTED_GRADS.keys(): EXTRACTED_GRADS[model_type] = [] clear_all_model_grads(models) new_loss, new_trigger = tools.evaluate_and_pick_best_candidate( args, models, DEVICE, LAMBDA, old_loss, old_trigger, triggered_dataset, candidates, args.beam_size) tools.insert_new_trigger(args, triggered_dataset, new_trigger) if args.is_submission == False: tools.print_results_of_trigger_inversion_iterate( n_iter, tokenizer, old_loss, new_loss, old_trigger, new_trigger, LAMBDA, triggered_dataset) del old_loss, candidates new_test_loss = tools.get_new_test_loss( args, models, triggered_dataset, DEVICE, LAMBDA) if best_test_loss is None or best_test_loss[ 'trigger_inversion_loss'] > new_test_loss[ 'trigger_inversion_loss']: best_trigger, best_train_loss, best_test_loss = deepcopy( new_trigger), deepcopy(new_loss), deepcopy(new_test_loss) if best_test_loss[ 'trigger_inversion_loss'] < args.stopping_threshold: break torch.cuda.empty_cache() df = tools.add_results_to_df(args, df, tokenizer, best_trigger, best_train_loss, best_test_loss, total_cand_pool, triggered_dataset, start_time) if args.is_submission: tools.submit_results() else: tools.save_results(df, args)
def main(): label = input('Enter config file label: ') config = tools.load_config(label) correction = config['correction'] ap_func = get_func(config['ap_func']) evaluate(config, ap_func, correction, label)
def main(): cli_args = docopt(__doc__, version='v1.0') cfg_tuples = tools.load_config("tuples") cfg_pol = tools.load_config("polarization") chib_chain = ROOT.TChain("ChibAlg/Chib") ups_chain = ROOT.TChain("UpsilonAlg/Upsilon") cfg_decays = tools.load_config("mc")["decays"] # ups_chain = ROOT.TChain("UpsilonAlg/Upsilon") name = cli_args["--name"] for data_key in cfg_pol["data_keys"]: save_to = "{name}/{data_key}/".format(name=name, data_key=data_key) chib_chain.Reset() ups_chain.Reset() for ntuple_file in cfg_tuples[data_key]: print "NTuple ", ntuple_file chib_chain.Add(ntuple_file) ups_chain.Add(ntuple_file) for ns in cfg_pol["ns"]: cfg_cuts = cfg_decays["ups%ds" % ns] chib_cut = cfg_cuts["cut"] ups_cut = cfg_cuts["ucut"] # tools.tree_preselect(chib_chain, chib_cut) # tools.tree_preselect(ups_chain, ups_cut) for np in cfg_pol["np"]: if np not in pdg.VALID_UPS_DECAYS[ns]: continue axis = cfg_pol["axis"]["ups%ds" % ns][str(np)] for nb in cfg_pol["nb"]: d, dangles = process(ns=ns, nb=nb, np=np, chain=chib_chain, cut=chib_cut, axis=axis) n, nangles = process(ns=ns, nb=nb, np=np, chain=ups_chain, cut=ups_cut, axis=axis) # ref = d[3] // n[3] res = [] for i in range(3): if nb == 1 and i > 1: continue # h_old = (d[i] // n[i]) / ref h = d[3].Clone(pyroot.hID()) for j in h: r1 = (d[i][j] / d[3][j]).value() r2 = (n[i][j] / n[3][j]).value() s2d = ((d[i][j].error() ** 2 - d[i][j].value() ** 2 / d[3][j].value()) / (d[3][j] - 1) ** 2) s2n = ((n[i][j].error() ** 2 - n[i][j].value() ** 2 / n[3][j].value()) / (n[3][j] - 1) ** 2) h[j] = ( pyroot.VE(r1, s2d) / pyroot.VE(r2, s2n) ) h.red() # h_old.blue() res.append(h) h.Draw() h.level(1) if cli_args['--save']: tools.save_figure( name=(save_to + "chib{nb}{np}p_ups{ns}s_w{w}_ratio" .format(nb=nb, np=np, ns=ns, w=i)) ) for angle in dangles: hunpol = dangles[angle][3] hunpol.scale() for i in range(0, 3): h = dangles[angle][i] if not h.GetEntries(): continue h.scale() wname = "w%d" % i tools.draw_hists([h, hunpol], minimum=0) if cli_args['--save']: tools.save_figure( save_to + "/angles/{wname}_{angle}_chib{nb}{np}p_ups{ns}s".format( wname=wname, angle=angle, nb=nb, np=np, ns=ns) ) if cli_args['--save']: save(data_key, ns, np, nb, res, d, n) # chib_chain.SetEntryList(0) # ups_chain.SetEntryList(0) shell()
# External Packages import matplotlib.pyplot as plt import matplotlib.animation as anim import numpy as np # Custom Packages import tools # Get input to designate system for plotting label = input('Enter config file label (e.g. core): ') config = tools.load_config(label) fig = plt.figure() plt.axis('square') # Define limits of plot relative to the lengths of the rods lim = (config['L1'] + config['L2'])*1.25 ax = plt.axes(xlim=(-lim, lim), ylim=(-lim, lim)) mass1, = ax.plot([], [], 'o', lw=9) mass2, = ax.plot([], [], 'o', lw=9) rod1, = ax.plot([], [], lw=2) rod2, = ax.plot([], [], lw=2) positions = np.loadtxt('data/double_positions_%s.txt' % label) t = positions[:, 0] x1 = positions[:, 1] y1 = positions[:, 2] x2 = positions[:, 3] y2 = positions[:, 4]
if building_number_first: try: match = re.search(regex2, street_name) building_number = match.group() mod_number = match.expand(r'\1\2\3\4') street_name = mod_number + ', ' + street_name.replace(building_number, '').strip() except AttributeError: None return street_name.strip() if __name__ == "__main__": # Config ---------------------------------------------------------------------------- config = load_config('config.yaml') xls_path = config['xls']['path'] xls_name = os.path.splitext(os.path.basename(xls_path))[0] xls_has_header = config['xls']['has_header'] xls_min_row = config['xls']['min_row'] xls_max_row = config['xls']['max_row'] xls_max_column = config['xls']['max_column'] address_columns_indxs = config['address']['col_indxs'] illegal_street_names = config['address']['illegal_street_names'] abbrev_dict = config['address']['abbrev_expansions'] remove_abbrev = config['address']['remove_abbrev'] strict_search = config['strict_search']