def main(): parser = argparse.ArgumentParser() parser.add_argument('--nt', type=int, default=10, help='number of buckets') parser.add_argument('--train_cnt', default=16, type=int, help='number of train samples') parser.add_argument('--nvs', type=int, nargs='+', default=[2**i for i in range(3, 21)]) parser.add_argument('--prefix', type=str, default='', help='Additional prefix of out file name') args = parser.parse_args() print(args) methods = [ # memory error (for 32GB RAM) after 2048 (inclusive) (baselines.TimeVaryingGraphLasso(name='T-GLASSO'), { 'lamb': 0.1, 'beta': 1.0, 'indexOfPenalty': 1, 'max_iter': 100, 'lengthOfSlice': args.train_cnt, }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (pytorch, cpu)'), { 'max_iter': 100, 'anneal': True, 'l1': 0.1, 'gamma': 0.8, 'reg_type': 'W', 'init': True, 'device': 'cpu' }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (pytorch, cuda)'), { 'max_iter': 100, 'anneal': True, 'l1': 0.1, 'gamma': 0.8, 'reg_type': 'W', 'init': True, 'device': 'cuda' }), (baselines.LTGL(name='LTGL'), { 'alpha': 3.0, 'tau': 30.0, 'beta': 30.0, 'psi': 'l1', 'eta': 3.0, 'phi': 'l1', 'rho': 1.0 / np.sqrt(args.train_cnt), 'max_iter': 500, 'verbose': False }), (baselines.QUIC(name='QUIC'), { 'lamb': 0.1, 'tol': 1e-6, 'msg': 1, 'max_iter': 100 }), (baselines.BigQUIC(name='BigQUIC'), { 'lamb': 3, 'tol': 1e-3, 'verbose': 1, 'max_iter': 100 }) ] times = {} for method, params in methods[:-2]: times[method.name] = [] out_file = 'outputs/scalability/{}nt{}.train_cnt{}.json'.format( args.prefix, args.nt, args.train_cnt) make_sure_path_exists(out_file) print("Output file path = {}".format(out_file)) stop_methods = set() for nv in args.nvs: bs = min(nv, 16) n_hidden = nv // bs # generate data data, _, = load_nglf_sudden_change(nv=nv, m=n_hidden, nt=args.nt, ns=args.train_cnt, shuffle=False, from_matrix=False) for method, params in methods[:-2]: # start timing print("{}\nTiming method: {}, nv: {}".format( '-' * 80, method.name, nv)) if method.name.find('T-Corex') != -1: params['nv'] = nv params['n_hidden'] = min(n_hidden, 64) if method in stop_methods: print("\tskipped") continue try: ct = method.timeit(data, params) times[method.name].append((nv, ct)) print("\ttook {:.2f} seconds".format(ct)) # do not time this method again if ct is more than 6 hours if ct > 3600 * 6: stop_methods.add(method.name) except Exception as e: print("\tfailed with message: '{}'".format(str(e))) # save results with open(out_file, 'w') as f: json.dump(times, f) print("Results are saved in {}".format(out_file))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--nt', type=int, default=10, help='number of train time periods') parser.add_argument('--train_cnt', default=12, type=int, help='number of train samples') parser.add_argument('--val_cnt', default=3, type=int, help='number of validation samples') parser.add_argument('--start_period', type=int, default=1) parser.add_argument( '--noise_var', type=float, default=1e-4, help='variance of Gaussian noise that will be added to time series') parser.add_argument('--prefix', type=str, default='', help='optional prefix of experiment name') parser.add_argument('--output_dir', type=str, default='outputs/portfolio/') parser.add_argument('--left', type=int, default=0) parser.add_argument('--right', type=int, default=-2) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--device', type=str, default='cpu') args = parser.parse_args() print(args) ''' Load data ''' train_data, val_data, _, _, _, df_index = load_sp500( train_cnt=args.train_cnt, val_cnt=args.val_cnt, test_cnt=0, commodities=False, log_return=False, start_date='2000-01-01', end_date='2016-01-01', noise_var=args.noise_var, standardize=False, return_index=True, seed=args.seed) # Take last nt+1 time steps. Use first nt of them for training / validation. # The last time period is used for testing. start_period = args.start_period test_period = args.start_period + args.nt nv = train_data[0].shape[-1] test_data = np.concatenate( [train_data[test_period], val_data[test_period]], axis=0) train_data = train_data[start_period:test_period] val_data = val_data[start_period:test_period] start_date = df_index[start_period * (args.train_cnt + args.val_cnt)].date() end_date = df_index[(test_period + 1) * (args.train_cnt + args.val_cnt) - 1].date() print("Number of train/val time periods: {}".format(len(train_data))) print("Start date: {}".format(start_date)) print("End date: {}".format(end_date)) print("Test data shape: {}".format(test_data.shape)) ''' Define baselines and the grid of parameters ''' # gamma --- eps means samples only from the current bucket, while 1 means all samples tcorex_gamma_range = [0.8, 0.9] n_hidden_grid = [16, 32] # [16, 32, 64] methods = [ (baselines.LedoitWolf(name='Ledoit-Wolf'), {}), (baselines.LinearCorex(name='Linear CorEx'), { 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True }), ( baselines.TCorex(tcorex=TCorex, name='T-Corex'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.3, 1.0, 3.0, 10.0, 30.0], # [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, 'device': args.device, 'verbose': 1 }), ( baselines.TimeVaryingGraphLasso(name='T-GLASSO'), { 'lamb': [0.03, 0.1, 0.3, 1.0, 3.0], 'beta': [0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'indexOfPenalty': [1], # NOTE: L2 is very slow and gives bad results 'max_iter': 500, # NOTE: checked 1500 no improvement 'lengthOfSlice': args.train_cnt }), ( baselines.LTGL(name='LTGL'), { 'alpha': [0.3, 1.0, 3.0, 10.0], 'tau': [30.0, 100.0, 300.0, 1e3], 'beta': [10.0, 30.0, 100.0], 'psi': 'l1', 'eta': [0.3, 1.0, 3.0], 'phi': 'l1', # NOTE: tried Laplacian and l2 too no improvement 'rho': 1.0 / np.sqrt(args.train_cnt), 'max_iter': 500, # NOTE: tried 1000 no improvement 'verbose': False }) ] exp_name = 'nt{}.train_cnt{}.val_cnt{}.start_date{}.end_date{}.noise_var{}'.format( args.nt, args.train_cnt, args.val_cnt, start_date, end_date, args.noise_var) exp_name = args.prefix + exp_name best_results_path = "{}.results.json".format(exp_name) best_results_path = os.path.join(args.output_dir, 'best', best_results_path) make_sure_path_exists(best_results_path) best_results = {} if os.path.exists(best_results_path): with open(best_results_path, 'r') as f: best_results = json.load(f) all_results_path = "{}.results.json".format(exp_name) all_results_path = os.path.join(args.output_dir, 'all', all_results_path) make_sure_path_exists(all_results_path) all_results = {} if os.path.exists(all_results_path): with open(all_results_path, 'r') as f: all_results = json.load(f) mu_path = "{}.pkl".format(exp_name) mu_path = os.path.join(args.output_dir, 'mu', mu_path) make_sure_path_exists(mu_path) mus = {} if os.path.exists(mu_path): with open(mu_path, 'rb') as f: mus = pickle.load(f) sigma_path = "{}.pkl".format(exp_name) sigma_path = os.path.join(args.output_dir, 'sigma', sigma_path) make_sure_path_exists(sigma_path) sigmas = {} if os.path.exists(sigma_path): with open(sigma_path, 'rb') as f: sigmas = pickle.load(f) qp_solutions_path = "{}.pkl".format(exp_name) qp_solutions_path = os.path.join(args.output_dir, 'qp_solution', qp_solutions_path) make_sure_path_exists(qp_solutions_path) qp_solutions = {} if os.path.exists(qp_solutions_path): with open(qp_solutions_path, 'rb') as f: qp_solutions = pickle.load(f) test_data_path = "{}.txt".format(exp_name) test_data_path = os.path.join(args.output_dir, 'test_data', test_data_path) make_sure_path_exists(test_data_path) np.savetxt(test_data_path, test_data) for (method, params) in methods[args.left:args.right]: name = method.name best_score, best_params, best_covs, best_method, all_cur_results =\ method.select(train_data, val_data, params) mu = np.mean(train_data[-1], axis=0) if name == 'T-Corex': mu = best_method.theta[-1][0] mu = mu.astype(np.float64) sigma = best_covs[-1].astype(np.float64) # portfolio optimization using mu and sigma # solvers.qp needs: # minimize 1/2 x^T P x + q^T x # subject to Gx <= h # Ax = b # # our program: # minimize x^T Sigma x # subject to mu^T x >= r # 1^T x = 1 # x >= 0 qp_solutions[name] = {} for r in np.linspace(0.0, np.percentile(mu, 99), 100): P = 2.0 * matrix(sigma) q = matrix(0.0, (nv, 1)) G = matrix( np.concatenate([-np.eye(nv), -mu.reshape((1, -1))], axis=0)) h = matrix( np.concatenate([np.zeros((nv, 1)), -r * np.ones((1, 1))], axis=0)) A = matrix(np.ones((1, nv))) b = matrix(1.0) qp_solutions[name][r] = solvers.qp(P, q, G, h, A, b) # save qp_solutions with open(qp_solutions_path, 'wb') as f: pickle.dump(qp_solutions, f) # save mu and sigma mus[name] = mu sigmas[name] = sigma with open(mu_path, 'wb') as f: pickle.dump(mus, f) with open(sigma_path, 'wb') as f: pickle.dump(sigmas, f) # save model selection data best_results[name] = {} best_results[name]['best_params'] = best_params best_results[name]['best_val_score'] = best_score all_results[name] = all_cur_results with open(best_results_path, 'w') as f: json.dump(best_results, f) with open(all_results_path, 'w') as f: json.dump(all_results, f) print("Best results are saved in {}".format(best_results_path)) print("All results are saved in {}".format(all_results_path)) print("Means are saved in {}".format(mu_path)) print("Sigmas are saved in {}".format(sigma_path)) print("Solutions are saved in {}".format(qp_solutions_path)) print("Test data is saved in {}".format(test_data_path))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--nt', type=int, help='number of buckets') parser.add_argument('--train_cnt', default=16, type=int, help='number of train samples') parser.add_argument('--val_cnt', default=4, type=int, help='number of validation samples') parser.add_argument('--test_cnt', default=4, type=int, help='number of test samples') parser.add_argument('--commodities', dest='commodities', action='store_true', help='whether to include commodity prices too') parser.add_argument('--log_return', dest='log_return', action='store_true', help='whether to take log returns or normal returns') parser.add_argument('--standard_return', dest='log_return', action='store_false') parser.add_argument('--start_date', type=str, default='2000-01-01') parser.add_argument('--end_date', type=str, default='2016-01-01') parser.add_argument( '--noise_var', type=float, default=1e-4, help='variance of Gaussian noise that will be added to time series') parser.add_argument('--prefix', type=str, default='', help='optional prefix of experiment name') parser.add_argument('--output_dir', type=str, default='outputs/quantitative/') parser.add_argument('--left', type=int, default=0) parser.add_argument('--right', type=int, default=-2) parser.add_argument('--seed', type=int, default=42) parser.set_defaults(commodities=False) parser.set_defaults(log_return=True) args = parser.parse_args() print(args) ''' Load data ''' train_data, val_data, test_data, _, _ = load_sp500( train_cnt=args.train_cnt, val_cnt=args.val_cnt, test_cnt=args.test_cnt, commodities=args.commodities, log_return=args.log_return, start_date=args.start_date, end_date=args.end_date, noise_var=args.noise_var, seed=args.seed) # take last nt time steps nv = train_data[0].shape[-1] train_data = train_data[-args.nt:] val_data = val_data[-args.nt:] test_data = test_data[-args.nt:] ''' Define baselines and the grid of parameters ''' # gamma --- eps means samples only from the current bucket, while 1-eps means all samples tcorex_gamma_range = None if 0 < args.train_cnt <= 16: tcorex_gamma_range = [0.5, 0.6, 0.7, 0.8, 0.9] if 16 < args.train_cnt <= 32: tcorex_gamma_range = [0.4, 0.5, 0.6, 0.7, 0.8] if 32 < args.train_cnt <= 64: tcorex_gamma_range = [0.1, 0.2, 0.3, 0.4, 0.5] elif 64 < args.train_cnt: tcorex_gamma_range = [1e-9, 0.1, 0.2] n_hidden_grid = [16, 32, 64, 128] methods = [ (baselines.Diagonal(name='Diagonal'), {}), (baselines.LedoitWolf(name='Ledoit-Wolf'), {}), (baselines.OAS(name='Oracle approximating shrinkage'), {}), (baselines.PCA(name='PCA'), { 'n_components': n_hidden_grid }), ( baselines.SparsePCA(name='SparsePCA'), { 'n_components': n_hidden_grid, 'alpha': [0.1, 0.3, 1.0, 3.0, 10.0, 30.0], 'ridge_alpha': [0.01], 'tol': 1e-3, 'max_iter': 100, # NOTE: tried 500 no improvement, just slows down a lot ! }), (baselines.FactorAnalysis(name='Factor Analysis'), { 'n_components': n_hidden_grid }), (baselines.GraphLasso(name='Graphical LASSO (sklearn)'), { 'alpha': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3], 'mode': 'lars', 'max_iter': 500, }), (baselines.LinearCorex(name='Linear CorEx'), { 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True }), ( baselines.TimeVaryingGraphLasso(name='T-GLASSO'), { 'lamb': [0.03, 0.1, 0.3, 1.0, 3.0], 'beta': [0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'indexOfPenalty': [1], # NOTE: L2 is very slow and gives bad results 'max_iter': 500, # NOTE: checked 1500 no improvement 'lengthOfSlice': args.train_cnt }), (baselines.TimeVaryingGraphLasso(name='T-GLASSO (no reg)'), { 'lamb': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0], 'beta': [0.0], 'indexOfPenalty': [1], 'max_iter': 500, 'lengthOfSlice': args.train_cnt }), ( baselines.TCorex(tcorex=TCorex, name='T-Corex (simple)'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], # NOTE: L1 works slightly better # 'l2': [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'reg_type': 'W', 'gamma': 1e-9, 'init': False, }), ( baselines.TCorex(tcorex=TCorex, name='T-Corex'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], # 'l2': [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, }), ( baselines.TCorex(tcorex=TCorex, name='T-Corex (weighted objective)'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.001, 0.003, 0.01, 0.03], # 'l2': [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, 'weighted_obj': True }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (no reg)'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'l1': 0.0, 'l2': 0.0, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, }), ( baselines.TCorex(tcorex=TCorex, name='T-Corex (no init)'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], # 'l2': [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': False, }), ( baselines.TCorex(tcorex=TCorexLearnable, name='T-Corex (learnable)'), { 'nv': nv, 'n_hidden': n_hidden_grid, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.001, 0.003, 0.01, 0.03], # 'l2': [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], }, 'reg_type': 'W', 'init': True, 'entropy_lamb': [0.0, 0.1, 0.3, 0.5, 0.8], 'weighted_obj': True }), ( baselines.LVGLASSO(name='LVGLASSO'), { 'alpha': [0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'tau': [1.0, 3.0, 10.0, 30.0, 100.0, 300.0], 'rho': 1.0 / np.sqrt(args.train_cnt ), # NOTE works good, also rho doesn't change much 'max_iter': 500, # NOTE: tried 1000 no improvement 'verbose': False }), ( baselines.LTGL(name='LTGL'), { 'alpha': [0.3, 1.0, 3.0, 10.0], 'tau': [30.0, 100.0, 300.0, 1e3], 'beta': [10.0, 30.0, 100.0], 'psi': 'l1', 'eta': [0.3, 1.0, 3.0], 'phi': 'l1', # NOTE: tried Laplacian and l2 too no improvement 'rho': 1.0 / np.sqrt(args.train_cnt), 'max_iter': 500, # NOTE: tried 1000 no improvement 'verbose': False }) ] exp_name = 'stocks_first_setup.nt{}.train_cnt{}.val_cnt{}.test_cnt{}.start_date{}.end_date{}.noise_var{}'.format( args.nt, args.train_cnt, args.val_cnt, args.test_cnt, args.start_date, args.end_date, args.noise_var) exp_name = args.prefix + exp_name if args.commodities: exp_name += '.commodities' if args.log_return: exp_name += '.log_return' best_results_path = "{}.results.json".format(exp_name) best_results_path = os.path.join(args.output_dir, 'best', best_results_path) make_sure_path_exists(best_results_path) all_results_path = "{}.results.json".format(exp_name) all_results_path = os.path.join(args.output_dir, 'all', all_results_path) make_sure_path_exists(all_results_path) best_results = {} all_results = {} # read previously stored values if os.path.exists(best_results_path): with open(best_results_path, 'r') as f: best_results = json.load(f) if os.path.exists(all_results_path): with open(all_results_path, 'r') as f: all_results = json.load(f) for (method, params) in methods[args.left:args.right]: name = method.name best_score, best_params, _, _, all_cur_results = method.select( train_data, val_data, params) best_results[name] = {} best_results[name]['test_score'] = method.evaluate( test_data, best_params) best_results[name]['best_params'] = best_params best_results[name]['best_val_score'] = best_score all_results[name] = all_cur_results with open(best_results_path, 'w') as f: json.dump(best_results, f) with open(all_results_path, 'w') as f: json.dump(all_results, f) print("Best results are saved in {}".format(best_results_path)) print("All results are saved in {}".format(all_results_path))
def main(): nv = 32 # number of observed variables m = 4 # number of hidden variables nt = 10 # number of time periods train_cnt = 16 # number of training samples for each time period val_cnt = 4 # number of validation samples for each time period # Generate some data with a sudden change in the middle. data, ground_truth_sigma = load_nglf_sudden_change(nv=nv, m=m, nt=nt, ns=(train_cnt + val_cnt)) # Split it into train and validation. train_data = [X[:train_cnt] for X in data] val_data = [X[train_cnt:] for X in data] # NOTE: the load_nglf_sudden_change function above creates data where the time axis # is already divided into time periods. If your data is not divided into time periods # you can use the following procedure to do that: # bucketed_data, index_to_bucket = make_buckets(data, window=train_cnt + val_cnt, stride='full') # where the make_buckets function can be found at tcorex.experiments.data # The core method we have is the tcorex.TCorex class. tc = TCorex( nt=nt, nv=nv, n_hidden=m, max_iter=500, device='cpu', # for GPU set 'cuda', l1=0.3, # coefficient of temporal regularization term gamma=0.3, # parameter that controls sample weights verbose=1, # 0, 1, 2 ) # Fit the parameters of T-CorEx. tc.fit(train_data) # We can compute the clusters of observed variables for each time period. t = 8 clusters = tc.clusters() print("Clusters at time period {}: {}".format(t, clusters[t])) # We can get an estimate of the covariance matrix for each time period. # When normed=True, estimates of the correlation matrices will be returned. covs = tc.get_covariance() # We can visualize the covariance matrices. fig, ax = plt.subplots(1, figsize=(5, 5)) im = ax.imshow(covs[t]) fig.colorbar(im) ax.set_title("Estimated covariance matrix\nat time period {}".format(t)) fig.savefig('covariance-matrix.png') # It is usually useful to compute the inverse correlation matrices, # since this matrices can be interpreted as adjacency matrices of # Markov random fields. cors = tc.get_covariance(normed=True) inv_cors = [np.linalg.inv(x) for x in cors] # We can visualize the thresholded inverse correlation matrices. fig, ax = plt.subplots(1, figsize=(5, 5)) thresholded_inv_cor = np.abs(inv_cors[t]) > 0.05 ax.imshow(thresholded_inv_cor) ax.set_title( "Thresholded inverse correlation\nmatrix at time period {}".format(t)) fig.savefig('thresholded-inverse-correlation-matrix.png') # We can also plot the Frobenius norm of the differences of inverse # correlation matrices of neighboring time periods. This is helpful # for detecting the sudden change points of the system. diffs = cov_utils.diffs(inv_cors) fig, ax = plt.subplots(1, figsize=(5, 5)) ax.plot(diffs) ax.set_xlabel('t') ax.set_ylabel('$||\Sigma^{-1}_{t+1} - \Sigma^{-1}_{t}||_2$') ax.set_title( "Frobenius norms of differences between\ninverse correlation matrices") fig.savefig('inv-correlation-difference-norms.png') # We can also do grid search on a hyperparameter grid the following way. # NOTE: this can take time! baseline, grid = (baselines.TCorex(tcorex=TCorex, name='T-Corex'), { 'nv': nv, 'n_hidden': m, 'max_iter': 500, 'device': 'cpu', 'l1': [0.0, 0.03, 0.3, 3.0], 'gamma': [1e-6, 0.3, 0.5, 0.8] }) best_score, best_params, best_covs, best_method, all_results = baseline.select( train_data, val_data, grid) tc = best_method # this is the model that performed the best on the validation data, you can use it as above base.save(tc, 'best_method.pkl')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--nt', type=int, help='number of buckets') parser.add_argument('--m', type=int, help='number of latent factors') parser.add_argument('--bs', type=int, help='block size') parser.add_argument('--train_cnt', default=16, type=int, help='number of train samples') parser.add_argument('--val_cnt', default=16, type=int, help='number of validation samples') parser.add_argument('--test_cnt', default=1000, type=int, help='number of test samples') parser.add_argument('--snr', type=float, default=5.0, help='signal to noise ratio') parser.add_argument('--min_std', type=float, default=0.25, help='minimum x-std') parser.add_argument('--max_std', type=float, default=4.0, help='maximum x-std') parser.add_argument('--prefix', type=str, default='', help='optional prefix of experiment name') parser.add_argument('--data_type', dest='data_type', action='store', default='modular', choices=['modular', 'general', 'sparse'], help='which dataset to load/create') parser.add_argument('--output_dir', type=str, default='outputs/quantitative/') parser.add_argument('--shuffle', dest='shuffle', action='store_true', help='whether to shuffle parent-child relation') parser.set_defaults(shuffle=False) parser.add_argument('--n_segments', type=int, default=2) parser.add_argument('--left', type=int, default=0) parser.add_argument('--right', type=int, default=-2) parser.add_argument('--seed', type=int, default=42) args = parser.parse_args() args.nv = args.m * args.bs print(args) ''' Load data ''' if args.data_type == 'modular': (data, ground_truth_covs) = load_modular_sudden_change(nv=args.nv, m=args.m, nt=args.nt, ns=args.train_cnt + args.val_cnt + args.test_cnt, snr=args.snr, min_std=args.min_std, max_std=args.max_std, shuffle=args.shuffle, n_segments=args.n_segments, seed=args.seed) else: raise ValueError("data_type={} is not implemented yet.".format(args.data_type)) train_data = [x[:args.train_cnt] for x in data] val_data = [x[args.train_cnt:args.train_cnt + args.val_cnt] for x in data] test_data = [x[-args.test_cnt:] for x in data] ''' Define baselines and the grid of parameters ''' # gamma --- eps means samples only from the current bucket, while 1-eps means all samples tcorex_gamma_range = None if 0 < args.train_cnt <= 16: tcorex_gamma_range = [0.3, 0.4, 0.5, 0.6, 0.7] if 16 < args.train_cnt <= 32: tcorex_gamma_range = [0.1, 0.3, 0.4, 0.5, 0.6] if 32 < args.train_cnt <= 64: tcorex_gamma_range = [1e-9, 0.1, 0.3, 0.4, 0.5] elif 64 < args.train_cnt: tcorex_gamma_range = [1e-9, 0.1, 0.3] methods = [ (baselines.GroundTruth(name='Ground Truth', covs=ground_truth_covs, test_data=test_data), {}), (baselines.Diagonal(name='Diagonal'), {}), (baselines.LedoitWolf(name='Ledoit-Wolf'), {}), (baselines.OAS(name='Oracle approximating shrinkage'), {}), (baselines.PCA(name='PCA'), { 'n_components': [args.m], }), (baselines.SparsePCA(name='SparsePCA'), { 'n_components': [args.m], 'alpha': [0.1, 0.3, 1.0, 3.0, 10.0, 30.0], 'ridge_alpha': [0.01], 'tol': 1e-6, 'max_iter': 500, }), (baselines.FactorAnalysis(name='Factor Analysis'), { 'n_components': [args.m], }), (baselines.GraphLasso(name='Graphical LASSO (sklearn)'), { 'alpha': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3], 'mode': 'lars', 'max_iter': 500, }), (baselines.LinearCorex(name='Linear CorEx'), { 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, }), (baselines.TimeVaryingGraphLasso(name='T-GLASSO'), { 'lamb': [0.03, 0.1, 0.3, 1.0, 3.0], 'beta': [0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'indexOfPenalty': [1], # NOTE: L2 is very slow, gives bad results; Laplacian gives worse results 'max_iter': 500, # NOTE: checked 1500 no improvement 'lengthOfSlice': args.train_cnt, }), (baselines.TimeVaryingGraphLasso(name='T-GLASSO (no reg)'), { 'lamb': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0], 'beta': [0.0], 'indexOfPenalty': [1], 'max_iter': 500, 'lengthOfSlice': args.train_cnt, }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (simple)'), { 'nv': args.nv, 'n_hidden': args.m, 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], # NOTE: L1 works slightly better 'l2': [0, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0] }, 'reg_type': 'W', 'gamma': 1e-9, 'init': False, }), (baselines.TCorex(tcorex=TCorex, name='T-Corex'), { 'nv': args.nv, 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'l2': [0, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0] }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (weighted objective)'), { 'nv': args.nv, 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.001, 0.003, 0.01, 0.03], 'l2': [0, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0] }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, 'weighted_obj': True }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (no reg)'), { 'nv': args.nv, 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, 'l1': 0.0, 'l2': 0.0, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': True, }), (baselines.TCorex(tcorex=TCorex, name='T-Corex (no init)'), { 'nv': args.nv, 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'l2': [0, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0] }, 'gamma': tcorex_gamma_range, 'reg_type': 'W', 'init': False, }), (baselines.TCorex(tcorex=TCorexLearnable, name='T-Corex (learnable)'), { 'nv': args.nv, 'n_hidden': [args.m], 'max_iter': 500, 'anneal': True, 'reg_params': { 'l1': [0.0, 0.001, 0.003, 0.01, 0.03], 'l2': [0, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0] }, 'reg_type': 'W', 'init': True, 'entropy_lamb': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6], 'weighted_obj': True }), (baselines.LVGLASSO(name='LVGLASSO'), { 'alpha': [0.03, 0.1, 0.3, 1.0, 3.0, 10.0], 'tau': [1.0, 3.0, 10.0, 30.0, 100.0, 300.0], 'rho': 1.0 / np.sqrt(args.train_cnt), # NOTE works good, also rho doesn't change much 'max_iter': 500, # NOTE: tried 1000 no improvement 'verbose': False, }), (baselines.LTGL(name='LTGL'), { 'alpha': [0.3, 1.0, 3.0, 10.0], 'tau': [10.0, 30.0, 100.0, 300.0, 1e3], 'beta': [1.0, 3.0, 10.0, 30.0, 100.0], 'psi': 'l1', # tried L2, works worse 'eta': [3.0, 10.0, 30.0], 'phi': 'l1', # tried L2, works worse 'rho': 1.0 / np.sqrt(args.train_cnt), 'max_iter': 500, # NOTE: tried 1000 no improvement 'verbose': False }), (baselines.QUIC(name='QUIC'), { 'lamb': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3], 'tol': 1e-6, 'msg': 1, # NOTE: 0 - no verbosity; 1 - just two lines; 2 - max verbosity 'max_iter': 100, # NOTE: tried 500, no improvement, }), (baselines.BigQUIC(name='BigQUIC'), { 'lamb': [0.3, 1, 3, 10.0, 30.0], 'tol': 1e-3, 'verbose': 0, # NOTE: 0 - no verbosity; 1 - just two lines; 2 - max verbosity 'max_iter': 100, # NOTE: tried 500, no improvement }) ] exp_name = 'sudden_first_setup.{}.nt{}.m{}.bs{}.train_cnt{}.val_cnt{}.test_cnt{}'\ '.snr{:.2f}.min_std{:.2f}.max_std{:.2f}.n_segments{}'.format( args.data_type, args.nt, args.m, args.bs, args.train_cnt, args.val_cnt, args.test_cnt, args.snr, args.min_std, args.max_std, args.n_segments) exp_name = args.prefix + exp_name if args.shuffle: exp_name += '.shuffle' best_results_path = "{}.results.json".format(exp_name) best_results_path = os.path.join(args.output_dir, 'best', best_results_path) make_sure_path_exists(best_results_path) all_results_path = "{}.results.json".format(exp_name) all_results_path = os.path.join(args.output_dir, 'all', all_results_path) make_sure_path_exists(all_results_path) best_results = {} all_results = {} # read previously stored values if os.path.exists(best_results_path): with open(best_results_path, 'r') as f: best_results = json.load(f) if os.path.exists(all_results_path): with open(all_results_path, 'r') as f: all_results = json.load(f) for (method, params) in methods[args.left:args.right]: name = method.name best_score, best_params, _, _, all_cur_results = method.select(train_data, val_data, params) best_results[name] = {} best_results[name]['test_score'] = method.evaluate(test_data, best_params) best_results[name]['best_params'] = best_params best_results[name]['best_val_score'] = best_score all_results[name] = all_cur_results with open(best_results_path, 'w') as f: json.dump(best_results, f) with open(all_results_path, 'w') as f: json.dump(all_results, f) print("Best results are saved in {}".format(best_results_path)) print("All results are saved in {}".format(all_results_path))