def obj4_2(save): df_country = pd.read_parquet('data/output_country.parquet.gzip') countries = df_country.country.unique() country_indexes = {} for c in countries: country_indexes[c] = df_country.index[df_country['country'] == c].tolist() df_numeric = pd.read_parquet('data/output_numeric.parquet.gzip') new = [] for (country, indexes) in country_indexes.items(): rows = df_numeric.iloc[indexes, :] temp = { 'country': country, 'avg_points': rows['points'].mean(axis=0), 'sd_points': rows['points'].std(axis=0, ddof=0), 'avg_price': rows['price'].mean(axis=0), 'sd_price': rows['price'].std(axis=0, ddof=0) } new += [temp] #print(str(len(new))+' countries aggeregated') df_new = pd.DataFrame(new) #print(df_new) df_new.to_parquet('data/output_avg_std.parquet.gzip', compression='gzip') if save: mkdir('aggeregated') df_new.to_parquet('aggeregated/output_avg_std.parquet.gzip', compression='gzip') print('folder "aggeregated" created')
def execute(self): model, cls_info, history, ft_history = self.fit_model() utils.mkdir(self.dst_dir, rm=True) model.save(self.est_file) mutils.save_model_info(self.info_file, self.graph_file, model) with open(self.cls_file, 'wb') as f: pickle.dump(cls_info, f) print(f'Classes: {cls_info}') utils.plot(history, self.hist_file) utils.plot(ft_history, self.ft_hist_file) def get_min(loss): min_val = min(loss) min_ind = loss.index(min_val) return min_val, min_ind print('Before fine-tuning') min_val, min_ind = get_min(history['val_loss']) print(f'val_loss: {min_val} (Epochs: {min_ind + 1})') print('After fine-tuning') min_val, min_ind = get_min(ft_history['val_loss']) print(f'val_loss: {min_val} (Epochs: {min_ind + 1})')
def load_agw_1d(base_dir, get_feats=False): if not path.exists(base_dir + '/agw_data'): mkdir(base_dir + '/agw_data') urllib.urlretrieve( 'https://raw.githubusercontent.com/wjmaddox/drbayes/master/experiments/synthetic_regression/ckpts/data.npy', filename=base_dir + '/agw_data/data.npy') def features(x): return np.hstack([x[:, None] / 2.0, (x[:, None] / 2.0)**2]) data = np.load(base_dir + '/agw_data/data.npy') x, y = data[:, 0], data[:, 1] y = y[:, None] f = features(x) x_means, x_stds = x.mean(axis=0), x.std(axis=0) y_means, y_stds = y.mean(axis=0), y.std(axis=0) f_means, f_stds = f.mean(axis=0), f.std(axis=0) X = ((x - x_means) / x_stds).astype(np.float32) Y = ((y - y_means) / y_stds).astype(np.float32) F = ((f - f_means) / f_stds).astype(np.float32) if get_feats: return F, Y return X[:, None], Y
def execute(self): estimator = load_model(self.est_file) with open(self.cls_file, 'rb') as f: cls_info = pickle.load(f) pred_labels, true_labels, output = [], [], [] for subdir in os.listdir(self.src_dir): for f in os.listdir(os.path.join(self.src_dir, subdir)): filename = os.path.join(self.src_dir, subdir, f) img = utils.load_target_image(filename, self.input_size) pred_class = np.argmax(estimator.predict(img)) pred_label = cls_info[pred_class] pred_labels.append(pred_label) true_label = subdir true_labels.append(true_label) output.append(f'{filename} -> {pred_label}') report = classification_report(true_labels, pred_labels) labels = list(cls_info.values()) cnfmtx = confusion_matrix(true_labels, pred_labels, labels) cm = pd.DataFrame(cnfmtx, index=labels, columns=labels) utils.mkdir(self.dst_dir, rm=True) with open(self.drs_file, 'w') as f: f.writelines(output) with open(self.srs_file, 'w') as f: f.write(report) f.write('¥n¥n') f.write(str(cm)) f.write('¥n')
def singleVolume(cls, vid: int, name: str): c = request(f"http://{cls.downSite}.wenku8.com/packtxt.php?aid={cls.novel.id}&vid={vid}&charset=gbk", SelfUser.cookies) chapter_dir = cls.root_dir + "/" + name mkdir(chapter_dir) with open(chapter_dir + "/" + name + ".txt", "w") as f: f.write(no_utf8_code(c.text))
def save_model(self): """ 保存模型 :return: 保存.pkl文件 """ mkdir(self.args.result_path) torch.save(self.model.state_dict(), self.args.result_path + 'GAT_PPI_model.pkl') print("模型已保存成功!")
def save_model(self): """ 保存模型 """ mkdir(self.args.result_path) torch.save( self.model.state_dict(), self.args.result_path + self.args.model + '_' + self.args.dataset_name + '_model.pkl') print("模型已保存成功!")
def obj2(save): df = pd.read_csv('data/data.csv') df.to_parquet('data/output.parquet.gzip', compression='gzip') if save: mkdir('original') df.to_parquet('original/output.parquet.gzip', compression='gzip') print('folder "original" created') #print(df)
def pictures(cls, cid, is_resize, name): imgs = get_imgs(status=cls.novel.statusCode, aid=cls.novel.id, cid=cid, cookies=SelfUser.cookies) chapter_dir = cls.root_dir + "/" + name mkdir(chapter_dir) imgs_dir = chapter_dir + "/" + "插图" mkdir(imgs_dir) for i in imgs: with open(imgs_dir + "/" + str(imgs.index(i)) + ".jpg", "wb") as f: f.write( resize(request(i).content if is_resize else request(i).content))
def obj3(save): numeric = ['id', 'points', 'price'] df = pd.read_parquet('data/output.parquet.gzip') df_clean = df.filter(numeric, axis=1).dropna().reset_index() df_clean.to_parquet('data/output_numeric.parquet.gzip', compression='gzip') if (save): mkdir('clean') df_clean.to_parquet('clean/output_numeric.parquet.gzip', compression='gzip') print('folder "clean" created')
def save_embedding(self, emds): """ 保存嵌入结果 :param emds: 测试集的嵌入表示列表 :return: 如:embedding_1.csv """ print("正在保存嵌入结果...") mkdir(self.args.result_path) count = 1 for o in emds: result = pd.DataFrame(o.cpu().numpy()) result.to_csv(self.args.result_path + 'embedding_' + str(count) + '.csv', index=None) count += 1 print("嵌入结果保存成功!")
def save_preds(self, preds): """ 保存预测值 :param preds: 测试集的预测值列表 :return: 如:pred_1.csv """ print("正在保存测试集的预测值...") mkdir(self.args.result_path) count = 1 for o in preds: result = pd.DataFrame(o.cpu().numpy()) result.to_csv(self.args.result_path + 'pred_' + str(count) + '.csv', index=None) count += 1 print("测试集的预测值保存成功!")
def save_reals(self, reals): """ 保存真实值 :param reals: 测试集的真实值列表 :return: 如:real_1.csv """ print("正在保存测试集的真实值...") mkdir(self.args.result_path) count = 1 for o in reals: result = pd.DataFrame(o.cpu().numpy()) result.to_csv(self.args.result_path + 'real_' + str(count) + '.csv', index=None) count += 1 print("测试集的真实值保存成功!")
def find_address(self, address): """return path where net and training info are saved""" if address == 'last': addresses = sorted(os.listdir(self.res_dir)) tb_address = os.path.join(self.tb_dir, str(len(addresses))) address = os.path.join(self.res_dir, addresses[-1]) elif address is None: now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") address = os.path.join(self.res_dir, now) mkdir(address) tb_address = os.path.join(self.tb_dir, now) else: tb_address = None return address, tb_address
def load_matern_1d(base_dir): if not path.exists(base_dir + '/matern_data/'): mkdir(base_dir + '/matern_data/') def gen_1d_matern_data(): from GPy.kern.src.sde_matern import Matern32 np.random.seed(4) lengthscale = 0.5 variance = 1.0 sig_noise = 0.15 n1_points = 200 x1 = np.random.uniform(-2, -1, n1_points)[:, None] n2_points = 200 x2 = np.random.uniform(0.5, 2.5, n2_points)[:, None] no_points = n1_points + n2_points x = np.concatenate([x1, x2], axis=0) x.sort(axis=0) k = Matern32(input_dim=1, variance=variance, lengthscale=lengthscale) C = k.K(x, x) + np.eye(no_points) * sig_noise**2 y = np.random.multivariate_normal(np.zeros((no_points)), C)[:, None] x_means, x_stds = x.mean(axis=0), x.std(axis=0) y_means, y_stds = y.mean(axis=0), y.std(axis=0) X = ((x - x_means) / x_stds).astype(np.float32) Y = ((y - y_means) / y_stds).astype(np.float32) return X, Y x, y = gen_1d_matern_data() xy = np.concatenate([x, y], axis=1) np.save(base_dir + '/matern_data/matern_1d.npy', xy) return x, y else: xy = np.load(base_dir + '/matern_data/matern_1d.npy') x = xy[:, 0] x = x[:, None] y = xy[:, 1] y = y[:, None] return x, y
def loop_test(self, dataset, criterion): """Forward loop over test data""" self.net.eval() for i in range(len(dataset)): seq = dataset.sequences[i] us, xs = dataset[i] with torch.no_grad(): hat_xs = self.net(us.cuda().unsqueeze(0)) loss = criterion(xs.cuda().unsqueeze(0), hat_xs) mkdir(self.address, seq) mondict = { 'hat_xs': hat_xs[0].cpu(), 'loss': loss.cpu().item(), } pdump(mondict, self.address, seq, 'results.p')
def main(args): dataset = args.dataset split = args.split method = args.method network = args.network width = args.width batch_size = args.batch_size valprop = args.valprop num = args.num data_dir = args.data_folder hpo_results_dir = args.hpo_results_dir results_dir = args.results_dir if dataset == "flights": split = "800k" if split == "1" else "2M" df = get_best_configs(hpo_results_dir) df = df[(df.dataset == dataset) & (df.split == split) & (df.method == method) & (df.network == network) & (df.valprop == str(valprop)) & (df.width == str(width)) & (df.batch_size == str(batch_size))] if len(df) > 0: config = df.to_dict('records')[0] else: raise RuntimeError("HPO results for chosen config not found.") epochs = int(config["best_itr"]) save_path = f"{results_dir}/{dataset}/{split}/{valprop}/{method}/{network}/{width}/{batch_size}/{num}" mkdir(save_path) # create data trainset, testset, N_train, input_dim, output_dim = get_dset_split(dataset, split, data_dir) # create net if "MLP" in method: method = method[:-4] keep_trying = True while keep_trying: net = create_net(method, config, input_dim, output_dim, N_train, network, width, cuda) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=cuda) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=cuda) # train net keep_trying = train_loop(net, trainloader, testloader, epochs, save_path)
def save_result(self): """ 保存节点预测结果 :return: 以csv文件存入文件 """ mkdir(self.args.result_path) pre = self.model()[self.data.test_mask].max(1)[1].view(1000, 1) real = self.data.y[self.data.test_mask].view(1000, 1) result = torch.cat((pre, real), 1).detach().cpu().numpy() index = ["node_" + str(x) for x in range(1000)] columns = ["prediction", "real"] result = pd.DataFrame(result, index=index, columns=columns) result.to_csv(self.args.result_path + self.args.model + '_' + self.args.dataset_name + '_result.csv', index=None) print("测试集的节点预测结果保存成功!")
def load_official_flight(base_dir, k800=False): if not path.exists(base_dir + '/flight'): mkdir(base_dir + '/flight') if not path.isfile(base_dir + '/flight/filtered_data.pickle'): urllib.urlretrieve( 'https://javierantoran.github.io/assets/datasets/filtered_flight_data.pickle.zip', filename=base_dir + '/flight/filtered_flight_data.pickle.zip') with zipfile.ZipFile( base_dir + '/flight/filtered_flight_data.pickle.zip', 'r') as zip_ref: zip_ref.extractall(base_dir + '/flight/') file1 = base_dir + '/flight/filtered_data.pickle' filtered = pd.read_pickle(file1) inputs = filtered[[ 'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'ArrTime', 'AirTime', 'Distance', 'plane_age' ]].values outputs = filtered[['ArrDelay']].values if k800 is False: X_train = inputs[:-100000].astype(np.float32) y_train = outputs[:-100000].astype(np.float32) X_test = inputs[-100000:].astype(np.float32) y_test = outputs[-100000:].astype(np.float32) else: X_train = inputs[:700000].astype(np.float32) y_train = outputs[:700000].astype(np.float32) X_test = inputs[700000:800000].astype(np.float32) y_test = outputs[700000:800000].astype(np.float32) x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0) y_means, y_stds = y_train.mean(axis=0), y_train.std(axis=0) x_stds[x_stds < 1e-10] = 1. X_train = ((X_train - x_means) / x_stds) y_train = ((y_train - y_means) / y_stds) X_test = ((X_test - x_means) / x_stds) y_test = ((y_test - y_means) / y_stds) return X_train, X_test, x_means, x_stds, y_train, y_test, y_means, y_stds
def save_embedding(self): """ 保存嵌入结果 :return: 以csv格式存入文件 """ mkdir(self.args.result_path) embedding = self.model.hidden_representations[-2][ self.data.test_mask].detach().cpu().numpy() print("正在对测试集的嵌入结果进行降维可视化...") X = embedding Y = self.data.y[self.data.test_mask].detach().cpu().numpy() GATTrainer.embed_visualization(self, X=X, Y=Y) index = ["node_" + str(x) for x in range(1000)] columns = ["x_" + str(x) for x in range(len(embedding[0]))] embedding = pd.DataFrame(embedding, index=index, columns=columns) embedding.to_csv(self.args.result_path + self.args.model + '_' + self.args.dataset_name + '_embedding.csv', index=None) print("测试集的节点嵌入表示保存成功!")
def imageCorrection(image_map, category_id_name): img_file = dir_name + image_map['file_name'] color = (0, 255, 0) x = int(image_map['x']) y = int(image_map['y']) w = int(image_map['w']) h = int(image_map['h']) img = cv2.imread(img_file) # frame_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) frame_gray = img # x = max(x, y) # w = max(w, h) # y = x # h = w img_tmp = frame_gray[y:y + h, x:x + w] # img_tmp = frame_gray[0:frame_gray.shape[0], 0:frame_gray.shape[1]] # cv2.rectangle(frame_gray, (x, y), (x+w, y+h), color, thickness=2) directory = "../datas/" + category_id_name + "_feature/" utils.mkdir(directory) cv2.imwrite(directory + image_map['file_name'], img_tmp)
# from html4vision import Col, imagetable from src.utils import mkdir do_scores = False do_html = False dir_data = '../data/v0.1.2/' dir_interim = '../data/interim/' f_datalist = f'{dir_data}lists/verification_pairs_list_5_fold.pkl' f_features = f'{dir_interim}features-sphereface-off-the-shelf.pkl' dir_out = '../results/verification/off-the-shelf-sphereface/' datatable = pd.read_pickle(f_datalist) mkdir(dir_out) if do_scores or 'score' not in datatable: features = pd.read_pickle(f_features) features = {k.replace('../', ''): v for k, v in features.items()} datatable['score'] = datatable.apply( lambda row: np.dot(features[row['p1']], features[row['p2']]), axis=1) ts_matches = [] sim = [] thresholds = np.arange(datatable.score.values.min(), datatable.score.values.max(), 100) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100)
try: self_user = SelfUser(account, password) print("登录成功!") cookies = self_user.cookies print("保存Cookies中...") pickle.dump(cookies, open(cookiePath, 'wb')) print("保存成功!") except LoginFailedError: print("登录错误!") sys.exit() isResize = True if input("是否缩放图片?(Y/N):").lower() == "y" else False while True: try: id = input("请输入作品ID:") novel = Novel(int(id)) mkdir("download") root_dir = os.path.dirname('.') + "download/" + novel.title print(f"正在下载{novel.title}...") mkdir(root_dir) for i in novel.volumeList: chapter_dir = root_dir + "/" + i["name"] mkdir(chapter_dir) print(f"正在下载{i['name']}") for j in i["chapters"]: if j["name"] == "插图": print(f"下载{i['name']}插图中...") imgs_dir = chapter_dir + "/" + "插图" mkdir(imgs_dir) imgs = get_imgs(status=novel.statusCode, aid=novel.id, cid=j['cid'],
config = load_yaml() Feature.dir = config["path"]["feature"] """ import category_encoders as ce class NeighborhoodOrdinal(Feature): def create_features(self): # self.columns には特徴量生成に必要な列名を書く self.columns = ["Neighborhood"] self.load(self.columns) oe = ce.ordinal.OrdinalEncoder() self.train["Neighborhood_ordinal"] = oe.fit_transform( self.train["Neighborhood"] ) self.test["Neighborhood_ordinal"] = oe.transform(self.test["Neighborhood"]) create_memo("Neighborhood_ordinal", "Neighborhood をラベル化した") """ if __name__ == "__main__": # train / test を列ごとに分割して保存しておく save_column() # CSVのヘッダーを書き込み create_memo("feature", "memo") mkdir(Feature.dir) args = get_arguments() generate_features(globals(), args.overwrite)
import argparse import os from abstractRunner import AbstractRunner from src.utils import mkdir, load_yaml parser = argparse.ArgumentParser(description="argparse for run.py") parser.add_argument("--debug", action="store_true", help="debug mode") parser.add_argument( "--model", default="lgb", required=False, choices=["lgb", "cb", "xgb", "nn", "linear"], help="model type", ) p = vars(parser.parse_args()) class Runner(AbstractRunner): def __init__(self, parser, config): super().__init__(parser, config) if __name__ == "__main__": config = load_yaml() for k, v in config["path"].items(): # path が file ではなく directory の場合 mkdir する if os.path.splitext(os.path.basename(v))[1] == "": mkdir(v) Runner(p, config).run()
('lucene', 'v4.0'), ('mahout', 'v0.8'), ('openjpa', 'v2.0.1'), ('openjpa', 'v2.2.0'), ('pig', 'v0.8.0'), ('pig', 'v0.11.1'), ('solr', 'v4.4.0'), ('tika', 'v1.3'), ('zookeeper', 'v3.4.5'), ] #projects = [('hibernate', 'v3.5.0b2')] for project, version in projects: path = '/'.join(['data', project, version]) print(path) mkdir(path + '/queries') with open(path + '/ids.txt') as f: bugs = [x.strip() for x in f] p = etree.XMLParser() hp = etree.HTMLParser() for bugid in bugs: print("Fetching bugid", bugid) fname = project.upper() + '-' + bugid # fname = 'HHH-' + bugid r = requests.get(url_base % (fname, fname)) try: tree = etree.parse(StringIO(r.text), p) except etree.XMLSyntaxError:
def main(args): extra_string = '' if args.dataset == 'flights': if args.n_split == 0: extra_string += '_2M' elif args.n_split == 1: extra_string += '_800k' else: raise Exception( 'Only Valid values for flight splits are 0 (2M) or 1 (800k)') extra_string += '_valprop_' + str(args.valprop) elif args.dataset in [ 'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap', 'protein_gap' ]: extra_string += '_split_' + str(args.n_split) extra_string += '_valprop_' + str(args.valprop) working_dir = args.result_folder + '/' + args.dataset + extra_string + '/' + args.method +\ ('-' + args.network if args.network != "ResNet" else '') + '/' + str(args.width) + '/' + str(args.batch_size) +\ '/' + args.run_id print("WORKING DIR") print(working_dir) # Create data dir if necessary if not os.path.exists(args.data_folder): mkdir(args.data_folder) # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=False) # Start a nameserver: NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=working_dir) ns_host, ns_port = NS.start() workers = [] for i in range(args.num_workers): print("CREATING WORKER:", i) if args.dataset == 'spirals': worker_class = create_SpiralsWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) elif args.dataset == 'flights': worker_class = create_FlightWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(base_dir=args.data_folder, prop_val=args.valprop, k800=(args.n_split == 1), early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) elif args.dataset in [ 'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap', 'protein_gap' ]: worker_class = create_UCIWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(dname=args.dataset, base_dir=args.data_folder, prop_val=args.valprop, n_split=args.n_split, early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) else: raise ValueError('Dataset not implemented yet!') worker.run(background=True) workers.append(worker) n_iterations = args.n_iterations previous_run = None if args.previous_result_folder is not None: try: previous_run = hpres.logged_results_to_HBS_result( args.previous_result_folder) except Exception as e: print(e) # Run an optimizer bohb = BOHB( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, previous_result=previous_run, ) res = bohb.run(n_iterations=n_iterations, min_n_workers=args.num_workers) # store results with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() all_runs = res.get_all_runs() print('Best found configuration:', id2config[incumbent]['config']) print('A total of %i unique configurations where sampled.' % len(id2config.keys())) print('A total of %i runs where executed.' % len(res.get_all_runs())) print('Total budget corresponds to %.1f full function evaluations.' % (sum([r.budget for r in all_runs]) / args.max_budget)) print('The run took %.1f seconds to complete.' % (all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
# traer desde el entorno los valores del espacio de acciones state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # crear la politica, replay buffer y como se van a evaluar policy = TD3(state_dim, action_dim, max_action) replay_buffer = ReplayBuffer() evaluations = [evaluate_policy(env, policy)] # crear carpetas de monitoreo work_dir = mkdir('exp', 'brs') monitor_dir = mkdir(work_dir, 'monitor') # máximo numéro de episodios max_episode_steps = env._max_episode_steps # guardar o no el video save_env_vid = False if save_env_vid: env = wrappers.Monitor(env, monitor_dir, force=True) env.reset() # inicializar las variables de entrenamiento total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True
def train_loop(net, dname, data_dir, epochs=90, workers=4, resume='', savedir='./', save_all_epochs=False, q_nograd_its=0, batch_size=256): mkdir(savedir) global best_err1 # Load data here: _, train_loader, val_loader, _, _, Ntrain = \ get_image_loader(dname, batch_size, cuda=True, workers=workers, distributed=False, data_dir=data_dir) net.N_train = Ntrain start_epoch = 0 marginal_loglike = np.zeros(epochs) train_loss = np.zeros(epochs) dev_loss = np.zeros(epochs) err_train = np.zeros(epochs) err_dev = np.zeros(epochs) # optionally resume from a checkpoint if resume: if os.path.isfile(resume): print("=> loading checkpoint '{}'".format(resume)) start_epoch, best_err1 = net.load(resume) print("=> loaded checkpoint '{}' (epoch {})".format( resume, start_epoch)) else: print("=> no checkpoint found at '{}'".format(resume)) candidate_progress_file = resume.split('/') candidate_progress_file = '/'.join( candidate_progress_file[:-1]) + '/stats_array.pkl' if os.path.isfile(candidate_progress_file): print("=> found progress file at '{}'".format( candidate_progress_file)) try: marginal_loglike, err_train, train_loss, err_dev, dev_loss = \ load_object(candidate_progress_file) print("=> Loaded progress file at '{}'".format( candidate_progress_file)) except Exception: print("=> Unable to load progress file at '{}'".format( candidate_progress_file)) else: print("=> NOT found progress file at '{}'".format( candidate_progress_file)) if q_nograd_its > 0: net.prob_model.q_logits.requires_grad = False for epoch in range(start_epoch, epochs): if q_nograd_its > 0 and epoch == q_nograd_its: net.prob_model.q_logits.requires_grad = True tic = time.time() nb_samples = 0 for x, y in train_loader: marg_loglike_estimate, minus_loglike, err = net.fit(x, y) marginal_loglike[epoch] += marg_loglike_estimate * x.shape[0] err_train[epoch] += err * x.shape[0] train_loss[epoch] += minus_loglike * x.shape[0] nb_samples += len(x) marginal_loglike[epoch] /= nb_samples train_loss[epoch] /= nb_samples err_train[epoch] /= nb_samples toc = time.time() # ---- print print('\n depth approx posterior', net.prob_model.current_posterior.data.cpu().numpy()) print( "it %d/%d, ELBO/evidence %.4f, pred minus loglike = %f, err = %f" % (epoch, epochs, marginal_loglike[epoch], train_loss[epoch], err_train[epoch]), end="") cprint('r', ' time: %f seconds\n' % (toc - tic)) net.update_lr() # ---- dev tic = time.time() nb_samples = 0 for x, y in val_loader: minus_loglike, err = net.eval(x, y) dev_loss[epoch] += minus_loglike * x.shape[0] err_dev[epoch] += err * x.shape[0] nb_samples += len(x) dev_loss[epoch] /= nb_samples err_dev[epoch] /= nb_samples toc = time.time() cprint('g', ' pred minus loglike = %f, err = %f\n' % (dev_loss[epoch], err_dev[epoch]), end="") cprint('g', ' time: %f seconds\n' % (toc - tic)) filename = 'checkpoint.pth.tar' if save_all_epochs: filename = str(epoch) + '_' + filename net.save(os.path.join(savedir, filename), best_err1) if err_dev[epoch] < best_err1: best_err1 = err_dev[epoch] cprint('b', 'best top1 dev err: %f' % err_dev[epoch]) shutil.copyfile(os.path.join(savedir, filename), os.path.join(savedir, 'model_best.pth.tar')) all_results = [ marginal_loglike, err_train, train_loss, err_dev, dev_loss ] save_object(all_results, os.path.join(savedir, 'stats_array.pkl'))
def save(self, path): mkdir(path) self.saver.save(self.sess, path + self.NAME) pickle_dump(self.dm, path + 'dm.pkl') logger.info("Model saved to '{}'".format(path))