def test_CreateCustomers(self, after_test_CreateCustomers): """ 当前账号没有客户,创建客户,预期结果,通过公司名称可查找到客户 :param after_test_CreateCustomers: :return: """ # 点击进入待跟客户 self.commonPage.click_customerManagement() self.commonPage.click_waittingCustomers() # 切换到对应的待跟客户iframe,进行创建客户 self.commonPage.switch_to_waittingCustomers_iframe() self.waittingCustomersPage.createCustomers() # 创建完毕,自动关闭弹窗,等待3秒,防止弹窗未关闭就进行操作导致失败 time.sleep(3) # 刷新页面,重新进入待跟客户iframe self.commonPage.refresh() self.commonPage.switch_to_waittingCustomers_iframe() # 查找刚创建的客户 # 获取刚才创建的客户信息 customersInfo = read_yaml("configs/createCustomers.yaml") name = customersInfo["customersInfo"]["name"] # 根据公司名称查找客户 self.waittingCustomersPage.find_customers(name) # 获取查找结果的列表信息 messages = self.waittingCustomersPage.get_tables_costomersInfo() assert name == messages["name"]
def init_customers(user_login): """ 初始化,创建客户,提供客户进行签约 :param user_login: :return: """ commonPage = user_login[1] waittingCustomersPage = WaittingCustomersPage() # 点击客户管理 commonPage.click_customerManagement() # 点击待跟客户 commonPage.click_waittingCustomers() # 切换到对应的待跟客户iframe commonPage.switch_to_waittingCustomers_iframe() # 进行创建客户 waittingCustomersPage.createCustomers() # 创建完毕,自动关闭弹窗,等待3秒,防止弹窗未关闭就进行操作导致失败 time.sleep(3) # 查找刚创建的客户 # 获取刚才创建的客户信息 customersInfo = read_yaml("configs/createCustomers.yaml") # 关闭待跟客户tab,自动回到首页 commonPage.close_WaittingCustomersPage_tab() # 返回需要的数据 yield commonPage, waittingCustomersPage, customersInfo
def setUp(self) -> None: config_dict = read_yaml(CONFIG_PATH) self.input_size = config_dict['architecture']['input_size'] self.base_lr = config_dict['optimizer']['params']['lr'] self.n_warmup_steps = 1 self.batch_size = 128 self.width = config_dict['architecture']['width'] self.L = 3 config_dict['architecture']['n_layers'] = self.L + 1 config_dict['optimizer']['params']['lr'] = self.base_lr config_dict['scheduler'] = { 'name': 'warmup_switch', 'params': { 'n_warmup_steps': self.n_warmup_steps, 'calibrate_base_lr': True } } self.base_model_config = ModelConfig(config_dict) self.training_dataset, _ = load_data(download=False, flatten=True) self.train_data_loader = DataLoader(self.training_dataset, shuffle=True, batch_size=self.batch_size) self.batches = list(self.train_data_loader)
def setUp(self) -> None: config_dict = read_yaml(CONFIG_PATH) self.base_model_config = ModelConfig(config_dict) self.width = 0 self.ntk = ntk.FCNTK(self.base_model_config, self.width) self.ip = ip.FCIP(self.base_model_config, c=0, width=self.width) self.muP = muP.FCmuP(self.base_model_config, self.width)
def get_param(path='data/contracts_data.yaml'): """ 读取指定配置文件的数据,以列表嵌套列表的形式返回:[[],[],[]] :param path: :return: """ data = read_yaml(path) values = [data[key] for key in data] res = [] for i in range(len(values[0])): ele = [] for k in range(len(values)): ele.append(values[k][i]) res.append(ele) return res
def setUp(self) -> None: config_dict = read_yaml(CONFIG_PATH) self.input_size = config_dict['architecture']['input_size'] self.base_lr = config_dict['optimizer']['params']['lr'] self.n_warmup_steps = 2 self.width = config_dict['architecture']['width'] self.L = config_dict['architecture']['n_layers'] - 1 config_dict['optimizer']['params']['lr'] = self.base_lr config_dict['scheduler'] = {'name': 'warmup_switch', 'params': {'n_warmup_steps': self.n_warmup_steps, 'calibrate_base_lr': False}} self.base_model_config = ModelConfig(config_dict) self.ipllr = FcIPLLR(self.base_model_config, n_warmup_steps=4)
def run(n_trials=10, download=False): config_dict = read_yaml(os.path.join(ROOT, 'pytorch/configs/abc_parameterizations', CONFIG_FILE)) # define corresponding directory in experiments folder base_experiment_path = os.path.join(ROOT, EXPERIMENTS_DIR, MODEL_NAME) # base experiment folder # prepare data training_dataset, test_dataset = load_data(download=download, flatten=True) for L in Ls: for width in WIDTHS: config_dict['architecture']['n_layers'] = L + 1 config_dict['architecture']['width'] = width runner = ABCRunner(config_dict, base_experiment_path, model=FcIPLLR, train_dataset=training_dataset, test_dataset=test_dataset, early_stopping=False, n_trials=n_trials) runner.run()
def __init_eles(self, conf_path): """ 动态读取配置文件的页面元素,把继承树上的页面元素都赋值到当前页面类中作为属性 :param conf_path: :return: """ # 使用当前页面继承的所有页面类作为元素配置项,去掉basepage和object class_names = [m.__name__ for m in self.__class__.mro()][:-2] for class_name in class_names: eles = read_yaml(conf_path)[class_name] if eles: # eles的key作为属性名称,value作为属性值 for key in eles: # self.__setattr__(属性名称key, 属性值eles[key]),动态生成对象属性 self.__setattr__(key, eles[key])
def setUp(self) -> None: wide_2_layer_runner.Ks = [3, 4] wide_2_layer_runner.Rs = [0.5, 1] wide_2_layer_runner.BATCH_SIZES = [32, 128] wide_2_layer_runner.N_TRAINs = [256, 512] wide_2_layer_runner.Ds = [15, 50] wide_2_layer_runner.Ms = [50, 100] n_rep = 3 config_dict = read_yaml( os.path.join(ROOT, 'pytorch/configs/', CONFIG_FILE)) # read model config self.base_experiment_path = os.path.join(ROOT, EXPERIMENTS_DIR, MODEL_NAME) self.runner = wide_2_layer_runner.Wide2LayerRunner( config_dict, self.base_experiment_path, n_rep)
def __init__(self, cookies): """ 初始化, :param cookies: 鉴权信息 """ # cookies信息必须在登录接口里获取 self.cookies = cookies # cookies 里包含spaceid self.space_id = self.cookies["X-Space-Id"] # 测试接口的URL地址, self.host = HOST # 接口数据模板的文件相对路径 path = "configs/api_conf.yaml" api_template = read_yaml(path) # 获取当前类的类名,便于根据类名读取yaml文件里的模板数据 current_className = self.__class__.__name__ # 根据当前类名获取yaml文件接口数据模板 self.conf = api_template[current_className] self.path = self.conf["path"]
def setUp(self) -> None: config_dict = read_yaml(CONFIG_PATH) L = 4 width = 1024 mean = 1.0 self.input_size = config_dict['architecture']['input_size'] self.base_lr = config_dict['optimizer']['params']['lr'] self.n_warmup_steps = 1 self.width = width config_dict['architecture']['width'] = width self.L = L self.mean = mean config_dict['architecture']['n_layers'] = L + 1 config_dict['optimizer']['params']['lr'] = self.base_lr self.base_model_config = ModelConfig(config_dict) self.base_model_config.initializer.params["mean"] = mean self.ip_non_centered = StandardFCIP(self.base_model_config)
def setUp(self) -> None: n_trials = 4 n_epochs = 5 config_dict = read_yaml( os.path.join(ROOT, 'pytorch/configs/abc_parameterizations', CONFIG_FILE)) config_dict['training']['n_epochs'] = n_epochs config_dict['training']['n_steps'] = 5000 # define corresponding directory in experiments folder base_experiment_path = os.path.join( ROOT, EXPERIMENTS_DIR, MODEL_NAME) # base experiment folder # prepare data training_dataset, test_dataset = load_data(download=False, flatten=True) self.runner = ABCRunner(config_dict, base_experiment_path, model=StandardFCIP, train_dataset=training_dataset, test_dataset=test_dataset, early_stopping=False, n_trials=n_trials)
def main(activation="relu", n_steps=300, base_lr=0.01, batch_size=512, dataset="mnist"): config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset)) figures_dir = os.path.join(FIGURES_DIR, dataset) create_dir(figures_dir) log_path = os.path.join(figures_dir, 'log_ipllr_{}.txt'.format(activation)) logger = set_up_logger(log_path) logger.info('Parameters of the run:') logger.info('activation = {}'.format(activation)) logger.info('n_steps = {:,}'.format(n_steps)) logger.info('base_lr = {}'.format(base_lr)) logger.info('batch_size = {:,}'.format(batch_size)) logger.info('dataset = {}'.format(dataset)) logger.info('Random SEED : {:,}'.format(SEED)) logger.info( 'Number of random trials for each model : {:,}'.format(N_TRIALS)) try: set_random_seeds(SEED) # set random seed for reproducibility config_dict = read_yaml(config_path) fig_name_template = 'IPLLRs_1_last_small_{}_{}_L={}_m={}_act={}_lr={}_bs={}.png' config_dict['architecture']['width'] = width config_dict['architecture']['n_layers'] = L + 1 config_dict['optimizer']['params']['lr'] = base_lr config_dict['activation']['name'] = activation config_dict['scheduler'] = { 'name': 'warmup_switch', 'params': { 'n_warmup_steps': n_warmup_steps, 'calibrate_base_lr': True, 'default_calibration': False } } # Load data & define models logger.info('Loading data ...') if dataset == 'mnist': from utils.dataset.mnist import load_data elif dataset == 'cifar10': from utils.dataset.cifar10 import load_data elif dataset == 'cifar100': # TODO : add cifar100 to utils.dataset pass else: error = ValueError( "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}" .format(dataset)) logger.error(error) raise error training_dataset, test_dataset = load_data(download=False, flatten=True) train_data_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size) batches = list(train_data_loader) logger.info('Number of batches (steps) per epoch : {:,}'.format( len(batches))) logger.info('Number of epochs : {:,}'.format(n_steps // len(batches))) config_dict['scheduler']['params']['calibrate_base_lr'] = False config = ModelConfig(config_dict) logger.info('Defining models') ipllrs = [FcIPLLR(config) for _ in range(N_TRIALS)] config_dict['scheduler']['params']['calibrate_base_lr'] = True config = ModelConfig(config_dict) ipllrs_calib = [ FcIPLLR(config, lr_calibration_batches=batches) for _ in range(N_TRIALS) ] ipllrs_calib_renorm = [ FcIPLLR(config, lr_calibration_batches=batches) for _ in range(N_TRIALS) ] ipllrs_calib_renorm_scale_lr = [ FcIPLLR(config, lr_calibration_batches=batches) for _ in range(N_TRIALS) ] logger.info('Copying parameters of base ipllr') for i in range(N_TRIALS): ipllrs_calib[i].copy_initial_params_from_model(ipllrs[i]) ipllrs_calib_renorm[i].copy_initial_params_from_model(ipllrs[i]) ipllrs_calib_renorm_scale_lr[i].copy_initial_params_from_model( ipllrs[i]) ipllrs_calib[i].initialize_params() ipllrs_calib_renorm[i].initialize_params() ipllrs_calib_renorm_scale_lr[i].initialize_params() # Make sure calibration takes into account normalization logger.info('Recalibrating lrs with new initialisation') for ipllr in ipllrs_calib: initial_base_lrs = ipllr.scheduler.calibrate_base_lr( ipllr, batches=batches, normalize_first=False) ipllr.scheduler._set_param_group_lrs(initial_base_lrs) for ipllr in ipllrs_calib_renorm: initial_base_lrs = ipllr.scheduler.calibrate_base_lr( ipllr, batches=batches, normalize_first=True) ipllr.scheduler._set_param_group_lrs(initial_base_lrs) for ipllr in ipllrs_calib_renorm_scale_lr: initial_base_lrs = ipllr.scheduler.calibrate_base_lr( ipllr, batches=batches, normalize_first=True) ipllr.scheduler._set_param_group_lrs(initial_base_lrs) # scale lr of first layer if needed for ipllr in ipllrs_calib_renorm_scale_lr: ipllr.scheduler.warm_lrs[0] = ipllr.scheduler.warm_lrs[0] * ( ipllr.d + 1) # with calibration results = dict() logger.info('Generating training results ...') results['ipllr_calib'] = [ collect_training_losses(ipllrs_calib[i], batches, n_steps, normalize_first=False) for i in range(N_TRIALS) ] results['ipllr_calib_renorm'] = [ collect_training_losses(ipllrs_calib_renorm[i], batches, n_steps, normalize_first=True) for i in range(N_TRIALS) ] results['ipllr_calib_renorm_scale_lr'] = [ collect_training_losses(ipllrs_calib_renorm_scale_lr[i], batches, n_steps, normalize_first=True) for i in range(N_TRIALS) ] mode = 'training' losses = dict() for key, res in results.items(): losses[key] = [r[0] for r in res] chis = dict() for key, res in results.items(): chis[key] = [r[1] for r in res] # Plot losses and derivatives logger.info('Saving figures at {}'.format(figures_dir)) key = 'loss' plt.figure(figsize=(12, 8)) plot_losses_models(losses, key=key, L=L, width=width, activation=activation, lr=base_lr, batch_size=batch_size, mode=mode, normalize_first=renorm_first, marker=None, name='IPLLR') plt.savefig( os.path.join( figures_dir, fig_name_template.format(mode, key, L, width, activation, base_lr, batch_size))) key = 'chi' plt.figure(figsize=(12, 8)) plot_losses_models(chis, key=key, L=L, width=width, activation=activation, lr=base_lr, batch_size=batch_size, mode=mode, marker=None, name='IPLLR') plt.savefig( os.path.join( figures_dir, fig_name_template.format(mode, key, L, width, activation, base_lr, batch_size))) except Exception as e: logger.exception("Exception when running the script : {}".format(e))
def main(activation="relu", base_lr=0.01, batch_size=512, dataset="mnist"): config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset)) figures_dir = os.path.join(FIGURES_DIR, dataset) create_dir(figures_dir) log_path = os.path.join(figures_dir, 'log_muP_{}.txt'.format(activation)) logger = set_up_logger(log_path) logger.info('Parameters of the run:') logger.info('activation = {}'.format(activation)) logger.info('base_lr = {}'.format(base_lr)) logger.info('batch_size = {:,}'.format(batch_size)) logger.info('Random SEED : {:,}'.format(SEED)) logger.info( 'Number of random trials for each model : {:,}'.format(N_TRIALS)) try: set_random_seeds(SEED) # set random seed for reproducibility config_dict = read_yaml(config_path) version = 'L={}_m={}_act={}_lr={}_bs={}'.format( L, width, activation, base_lr, batch_size) template_name = 'muP_{}_ranks_{}_' + version config_dict['architecture']['width'] = width config_dict['architecture']['n_layers'] = L + 1 config_dict['optimizer']['params']['lr'] = base_lr config_dict['activation']['name'] = activation base_model_config = ModelConfig(config_dict) # Load data & define models logger.info('Loading data ...') if dataset == 'mnist': from utils.dataset.mnist import load_data elif dataset == 'cifar10': from utils.dataset.cifar10 import load_data elif dataset == 'cifar100': # TODO : add cifar100 to utils.dataset pass else: error = ValueError( "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}" .format(dataset)) logger.error(error) raise error training_dataset, test_dataset = load_data(download=False, flatten=True) train_data_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size) batches = list(train_data_loader) full_x = torch.cat([a for a, _ in batches], dim=0) full_y = torch.cat([b for _, b in batches], dim=0) logger.info('Defining models') base_model_config.scheduler = None muPs = [FCmuP(base_model_config) for _ in range(N_TRIALS)] for muP in muPs: for i, param_group in enumerate(muP.optimizer.param_groups): if i == 0: param_group['lr'] = param_group['lr'] * (muP.d + 1) # save initial models muPs_0 = [deepcopy(muP) for muP in muPs] # train model one step logger.info('Training model a first step (t=1)') x, y = batches[0] muPs_1 = [] for muP in muPs: train_model_one_step(muP, x, y, normalize_first=True) muPs_1.append(deepcopy(muP)) # train models for a second step logger.info('Training model a second step (t=2)') x, y = batches[1] muPs_2 = [] for muP in muPs: train_model_one_step(muP, x, y, normalize_first=True) muPs_2.append(deepcopy(muP)) # set eval mode for all models for i in range(N_TRIALS): muPs[i].eval() muPs_0[i].eval() muPs_1[i].eval() muPs_2[i].eval() logger.info('Storing initial and update matrices') # define W0 and b0 W0s = [] b0s = [] for muP_0 in muPs_0: W0, b0 = get_W0_dict(muP_0, normalize_first=True) W0s.append(W0) b0s.append(b0) # define Delta_W_1 and Delta_b_1 Delta_W_1s = [] Delta_b_1s = [] for i in range(N_TRIALS): Delta_W_1, Delta_b_1 = get_Delta_W1_dict(muPs_0[i], muPs_1[i], normalize_first=True) Delta_W_1s.append(Delta_W_1) Delta_b_1s.append(Delta_b_1) # define Delta_W_2 and Delta_b_2 Delta_W_2s = [] Delta_b_2s = [] for i in range(N_TRIALS): Delta_W_2, Delta_b_2 = get_Delta_W2_dict(muPs_1[i], muPs_2[i], normalize_first=True) Delta_W_2s.append(Delta_W_2) Delta_b_2s.append(Delta_b_2) x, y = full_x, full_y # compute pre-activations on full batch # contributions after first step h0s = [] delta_h_1s = [] h1s = [] x1s = [] for i in range(N_TRIALS): h0, delta_h_1, h1, x1 = get_contributions_1(x, muPs_1[i], W0s[i], b0s[i], Delta_W_1s[i], Delta_b_1s[i], normalize_first=True) h0s.append(h0) delta_h_1s.append(delta_h_1) h1s.append(h0) x1s.append(x1) # ranks of initial weight matrices and first two updates logger.info('Computing ranks of weight matrices ...') weight_ranks_dfs_dict = dict() tol = None weight_ranks_dfs_dict['svd_default'] = [ get_svd_ranks_weights(W0s[i], Delta_W_1s[i], Delta_W_2s[i], L, tol=tol) for i in range(N_TRIALS) ] tol = 1e-7 weight_ranks_dfs_dict['svd_tol'] = [ get_svd_ranks_weights(W0s[i], Delta_W_1s[i], Delta_W_2s[i], L, tol=tol) for i in range(N_TRIALS) ] weight_ranks_dfs_dict['squared_tr'] = [ get_square_trace_ranks_weights(W0s[i], Delta_W_1s[i], Delta_W_2s[i], L) for i in range(N_TRIALS) ] weight_ranks_df_dict = { key: get_concatenated_ranks_df(weight_ranks_dfs_dict[key]) for key in weight_ranks_dfs_dict.keys() } avg_ranks_df_dict = { key: get_avg_ranks_dfs(weight_ranks_df_dict[key]) for key in weight_ranks_df_dict.keys() } logger.info('Saving weights ranks data frames to csv ...') for key in weight_ranks_df_dict.keys(): logger.info(key) logger.info('\n' + str(avg_ranks_df_dict[key]) + '\n\n') avg_ranks_df_dict[key].to_csv(os.path.join( figures_dir, template_name.format(key, 'weights') + '.csv'), header=True, index=True) ranks_dfs = [ weight_ranks_df_dict['svd_default'], weight_ranks_df_dict['svd_tol'], weight_ranks_df_dict['squared_tr'] ] # plot weights ranks logger.info('Plotting weights ranks') plt.figure(figsize=(12, 6)) plot_weights_ranks_vs_layer('W0', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('W0', 'weights') + '.png')) plt.figure(figsize=(12, 6)) plot_weights_ranks_vs_layer('Delta_W_1', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('Delta_W_1', 'weights') + '.png')) plt.figure(figsize=(12, 6)) plot_weights_ranks_vs_layer('Delta_W_2', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('Delta_W_2', 'weights') + '.png')) # ranks of the pre-activations logger.info('Computing ranks of (pre-)activations ...') act_ranks_dfs_dict = dict() tol = None act_ranks_dfs_dict['svd_default'] = [ get_svd_ranks_acts(h0s[i], delta_h_1s[i], h1s[i], x1s[i], L, tol=tol) for i in range(N_TRIALS) ] tol = 1e-7 act_ranks_dfs_dict['svd_tol'] = [ get_svd_ranks_acts(h0s[i], delta_h_1s[i], h1s[i], x1s[i], L, tol=tol) for i in range(N_TRIALS) ] act_ranks_dfs_dict['squared_tr'] = [ get_square_trace_ranks_acts(h0s[i], delta_h_1s[i], h1s[i], x1s[i], L) for i in range(N_TRIALS) ] act_ranks_df_dict = { key: get_concatenated_ranks_df(act_ranks_dfs_dict[key]) for key in act_ranks_dfs_dict.keys() } avg_ranks_df_dict = { key: get_avg_ranks_dfs(act_ranks_df_dict[key]) for key in act_ranks_df_dict.keys() } logger.info('Saving (pre-)activation ranks data frames to csv ...') for key in avg_ranks_df_dict.keys(): logger.info(key) logger.info('\n' + str(avg_ranks_df_dict[key]) + '\n\n') avg_ranks_df_dict[key].to_csv(os.path.join( figures_dir, template_name.format(key, 'acts') + '.csv'), header=True, index=True) ranks_dfs = [ act_ranks_df_dict['svd_default'], act_ranks_df_dict['svd_tol'], act_ranks_df_dict['squared_tr'] ] logger.info('Plotting (pre-)activation ranks') plt.figure(figsize=(12, 6)) plot_acts_ranks_vs_layer('h0', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('h0', 'acts') + '.png')) plt.figure(figsize=(12, 6)) plot_acts_ranks_vs_layer('h1', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('h1', 'acts') + '.png')) plt.figure(figsize=(12, 6)) plot_acts_ranks_vs_layer('x1', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('x1', 'acts') + '.png')) plt.figure(figsize=(12, 6)) plot_acts_ranks_vs_layer('delta_h_1', ranks_dfs, tol, L, width, base_lr, batch_size, y_scale='log') plt.savefig( os.path.join(figures_dir, template_name.format('delta_h_1', 'acts') + '.png')) # diversity in terms of the index of the maximum entry logger.info( 'Computing diversity of the maximum entry of pre-activations...') max_acts_diversity_dfs = [ get_max_acts_diversity(h0s[i], delta_h_1s[i], h1s[i], L) for i in range(N_TRIALS) ] max_acts_diversity_df = get_concatenated_ranks_df( max_acts_diversity_dfs) avg_max_acts_diversity_df = get_avg_ranks_dfs(max_acts_diversity_df) logger.info('Diversity of the maximum activation index df:') logger.info(str(avg_max_acts_diversity_df)) avg_max_acts_diversity_df.to_csv(os.path.join( figures_dir, 'muP_max_acts_' + version + '.csv'), header=True, index=True) except Exception as e: logger.exception("Exception when running the script : {}".format(e))
def main(activation="relu", n_steps=300, base_lr=0.01, batch_size=512, dataset="mnist"): config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset)) figures_dir = os.path.join(FIGURES_DIR, dataset) create_dir(figures_dir) log_path = os.path.join(figures_dir, 'log_muP_{}.txt'.format(activation)) logger = set_up_logger(log_path) logger.info('Parameters of the run:') logger.info('activation = {}'.format(activation)) logger.info('n_steps = {:,}'.format(n_steps)) logger.info('base_lr = {}'.format(base_lr)) logger.info('batch_size = {:,}'.format(batch_size)) logger.info('Random SEED : {:,}'.format(SEED)) logger.info( 'Number of random trials for each model : {:,}'.format(N_TRIALS)) try: set_random_seeds(SEED) # set random seed for reproducibility config_dict = read_yaml(config_path) fig_name_template = 'muP_{}_{}_L={}_m={}_act={}_lr={}_bs={}.png' config_dict['architecture']['width'] = width config_dict['architecture']['n_layers'] = L + 1 config_dict['optimizer']['params']['lr'] = base_lr config_dict['activation']['name'] = activation base_model_config = ModelConfig(config_dict) # Load data & define models logger.info('Loading data ...') if dataset == 'mnist': from utils.dataset.mnist import load_data elif dataset == 'cifar10': from utils.dataset.cifar10 import load_data elif dataset == 'cifar100': # TODO : add cifar100 to utils.dataset config_dict['architecture']['output_size'] = 100 pass else: error = ValueError( "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}" .format(dataset)) logger.error(error) raise error training_dataset, test_dataset = load_data(download=False, flatten=True) train_data_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size) batches = list(train_data_loader) logger.info('Defining models') base_model_config.scheduler = None muPs = [FCmuP(base_model_config) for _ in range(N_TRIALS)] muPs_renorm = [FCmuP(base_model_config) for _ in range(N_TRIALS)] muPs_renorm_scale_lr = [ FCmuP(base_model_config) for _ in range(N_TRIALS) ] for muP in muPs_renorm_scale_lr: for i, param_group in enumerate(muP.optimizer.param_groups): if i == 0: param_group['lr'] = param_group['lr'] * (muP.d + 1) logger.info('Copying parameters of base muP') for i in range(N_TRIALS): muPs_renorm[i].copy_initial_params_from_model(muPs[i]) muPs_renorm_scale_lr[i].copy_initial_params_from_model(muPs[i]) muPs_renorm[i].initialize_params() muPs_renorm_scale_lr[i].initialize_params() results = dict() logger.info('Generating training results ...') results['muP'] = [ collect_training_losses(muPs[i], batches, n_steps, normalize_first=False) for i in range(N_TRIALS) ] results['muP_renorm'] = [ collect_training_losses(muPs_renorm[i], batches, n_steps, normalize_first=True) for i in range(N_TRIALS) ] results['muP_renorm_scale_lr'] = [ collect_training_losses(muPs_renorm_scale_lr[i], batches, n_steps, normalize_first=True) for i in range(N_TRIALS) ] mode = 'training' losses = dict() for key, res in results.items(): losses[key] = [r[0] for r in res] chis = dict() for key, res in results.items(): chis[key] = [r[1] for r in res] # Plot losses and derivatives logger.info('Saving figures at {}'.format(figures_dir)) key = 'loss' plt.figure(figsize=(12, 8)) plot_losses_models(losses, key=key, L=L, width=width, activation=activation, lr=base_lr, batch_size=batch_size, mode=mode, normalize_first=renorm_first, marker=None, name='muP') plt.ylim(0, 2.5) plt.savefig( os.path.join( figures_dir, fig_name_template.format(mode, key, L, width, activation, base_lr, batch_size))) key = 'chi' plt.figure(figsize=(12, 8)) plot_losses_models(chis, key=key, L=L, width=width, activation=activation, lr=base_lr, batch_size=batch_size, mode=mode, marker=None, name='muP') plt.savefig( os.path.join( figures_dir, fig_name_template.format(mode, key, L, width, activation, base_lr, batch_size))) except Exception as e: logger.exception("Exception when running the script : {}".format(e))
remain_spot = random.randint(9999999, 100000000) phone_num = "1{}{}{}".format(second_spot, third_spot, remain_spot) return phone_num def create_Str(text): """ 生成以text开头,以当前时间结尾的字符串 :param text: :return: """ return text + get_dataTime(time_formate="%Y%m%d%H%M%S") def write_yaml(args, path="configs/createCustomers.yaml"): with open(path, "w", encoding="utf-8") as f: yaml.dump(args, f, allow_unicode=True) if __name__ == '__main__': # 生成手机号 costomers = { "name": create_Str("客户名称"), "phone": get_phone_num(), } args = {"costomers": costomers} write_yaml(args) a = read_yaml("configs/createCustomers.yaml") print(a)
def setUp(self) -> None: config_dict = read_yaml(CONFIG_PATH) self.base_model_config = ModelConfig(config_dict) self.width = 0 self.standard_ip = StandardFCIP(self.base_model_config, self.width)
def main(): print('ROOT :', ROOT) print('CONFIG_PATH :', CONFIG_PATH) # constants SEED = 30 L = 6 width = 1024 n_warmup_steps = 1 batch_size = 512 base_lr = 0.1 set_random_seeds(SEED) # set random seed for reproducibility config_dict = read_yaml(CONFIG_PATH) config_dict['architecture']['width'] = width config_dict['architecture']['n_layers'] = L + 1 config_dict['optimizer']['params']['lr'] = base_lr config_dict['scheduler'] = { 'name': 'warmup_switch', 'params': { 'n_warmup_steps': n_warmup_steps, 'calibrate_base_lr': True, 'default_calibration': False } } base_model_config = ModelConfig(config_dict) # Load data & define model training_dataset, test_dataset = load_data(download=False, flatten=True) train_data_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size) batches = list(train_data_loader) full_x = torch.cat([a for a, _ in batches], dim=0) full_y = torch.cat([b for _, b in batches], dim=0) # Define model ipllr = FcIPLLR(base_model_config, n_warmup_steps=12, lr_calibration_batches=batches) ipllr.scheduler.warm_lrs[0] = ipllr.scheduler.warm_lrs[0] * (ipllr.d + 1) # Save initial model : t=0 ipllr_0 = deepcopy(ipllr) # Train model one step : t=1 x, y = batches[0] train_model_one_step(ipllr, x, y, normalize_first=True) ipllr_1 = deepcopy(ipllr) # Train model for a second step : t=2 x, y = batches[1] train_model_one_step(ipllr, x, y, normalize_first=True) ipllr_2 = deepcopy(ipllr) ipllr.eval() ipllr_0.eval() ipllr_1.eval() ipllr_2.eval() layer_scales = ipllr.layer_scales intermediate_layer_keys = [ "layer_{:,}_intermediate".format(l) for l in range(2, L + 1) ] # Define W0 and b0 with torch.no_grad(): W0 = { 1: layer_scales[0] * ipllr_0.input_layer.weight.data.detach() / math.sqrt(ipllr_0.d + 1) } for i, l in enumerate(range(2, L + 1)): layer = getattr(ipllr_0.intermediate_layers, intermediate_layer_keys[i]) W0[l] = layer_scales[l - 1] * layer.weight.data.detach() W0[L + 1] = layer_scales[L] * ipllr_0.output_layer.weight.data.detach() with torch.no_grad(): b0 = layer_scales[0] * ipllr_0.input_layer.bias.data.detach( ) / math.sqrt(ipllr_0.d + 1) # Define Delta_W_1 and Delta_b_1 with torch.no_grad(): Delta_W_1 = { 1: layer_scales[0] * (ipllr_1.input_layer.weight.data.detach() - ipllr_0.input_layer.weight.data.detach()) / math.sqrt(ipllr_1.d + 1) } for i, l in enumerate(range(2, L + 1)): layer_1 = getattr(ipllr_1.intermediate_layers, intermediate_layer_keys[i]) layer_0 = getattr(ipllr_0.intermediate_layers, intermediate_layer_keys[i]) Delta_W_1[l] = layer_scales[l - 1] * ( layer_1.weight.data.detach() - layer_0.weight.data.detach()) Delta_W_1[ L + 1] = layer_scales[L] * (ipllr_1.output_layer.weight.data.detach() - ipllr_0.output_layer.weight.data.detach()) with torch.no_grad(): Delta_b_1 = layer_scales[0] * ( ipllr_1.input_layer.bias.data.detach() - ipllr_0.input_layer.bias.data.detach()) / math.sqrt(ipllr_1.d + 1) # Define Delta_W_2 with torch.no_grad(): Delta_W_2 = { 1: layer_scales[0] * (ipllr_2.input_layer.weight.data.detach() - ipllr_1.input_layer.weight.data.detach()) / math.sqrt(ipllr_2.d + 1) } for i, l in enumerate(range(2, L + 1)): layer_2 = getattr(ipllr_2.intermediate_layers, intermediate_layer_keys[i]) layer_1 = getattr(ipllr_1.intermediate_layers, intermediate_layer_keys[i]) Delta_W_2[l] = layer_scales[l - 1] * ( layer_2.weight.data.detach() - layer_1.weight.data.detach()) Delta_W_2[ L + 1] = layer_scales[L] * (ipllr_2.output_layer.weight.data.detach() - ipllr_1.output_layer.weight.data.detach()) with torch.no_grad(): Delta_b_2 = layer_scales[0] * ( ipllr_2.input_layer.bias.data.detach() - ipllr_1.input_layer.bias.data.detach()) / math.sqrt(ipllr_1.d + 1) # Ranks print('computing sympy Matrix ...') M = sympy.Matrix(Delta_W_1[1].numpy().tolist()) print('Computing row echelon form ...') start = time() row_echelon = M.rref() end = time() print('Time for computing row echelon form : {:.3f} minutes'.format( (end - start) / 60)) print(row_echelon) print(row_echelon[1]) print(len(row_echelon[1]))