def upload(request): if not _is_logged_in(request): return redirect(users.create_login_url('/')) if request.method == 'POST': from tools import import_data data = request.POST['data'] import_data(data) return redirect('/') return render(request, 'upload.html')
def import_data(self, data_file="data_test.csv"): """ Function to import data from csv file import_data(self,data_file="data_test.csv") """ #problem paramters self.time_hor, \ self.nb_obj, \ self.alpha, \ self.beta, \ self.cost_setup, \ self.cost_stor, \ self.cost_prod, \ self.cons_prod, \ self.constraint = import_data(data_file) if len(self.constraint.shape) > 1 : self.constraint = self.constraint[0] #problem variables self.coef = np.zeros(self.time_hor, float) # lagrangian multiplier self.production = np.zeros((self.nb_obj, self.time_hor), float) self.price = np.zeros((self.nb_obj, self.time_hor), float) self.setup = np.zeros((self.nb_obj, self.time_hor), float) self.storage = np.zeros((self.nb_obj, self.time_hor), float) if self.verbose > 0: show_data(self)
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ algorithm = 'CRep' K = 3 in_folder = '../data/input/' out_folder = '../data/output/' end_file = '_test' adj = 'syn111.dat' ego = 'source' alter = 'target' force_dense = False flag_conv = 'log' ''' Import data ''' network = in_folder + adj # network complete path A, B, B_T, data_T_vals = tl.import_data(network, ego=ego, alter=alter, force_dense=force_dense, header=0) nodes = A[0].nodes() ''' Setting to run the algorithm ''' with open('setting_' + algorithm + '.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) conf['end_file'] = end_file model = CREP.CRep(N=A[0].number_of_nodes(), L=len(A), K=K, **conf) # test case function to check the crep.set_name function def test_import_data(self): print("Start import data test\n") if self.force_dense: self.assertTrue(self.B.sum() > 0) print('B has ', self.B.sum(), ' total weight.') else: self.assertTrue(self.B.vals.sum() > 0) print('B has ', self.B.vals.sum(), ' total weight.') # test case function to check the Person.get_name function def test_running_algorithm(self): print("\nStart running algorithm test\n") _ = self.model.fit(data=self.B, data_T=self.B_T, data_T_vals=self.data_T_vals, flag_conv=self.flag_conv, nodes=self.nodes) theta = np.load(self.model.out_folder+'theta'+self.model.end_file+'.npz') thetaGT = np.load(self.model.out_folder+'theta_'+self.algorithm+'.npz') self.assertTrue(np.array_equal(self.model.u_f, theta['u'])) self.assertTrue(np.array_equal(self.model.v_f, theta['v'])) self.assertTrue(np.array_equal(self.model.w_f, theta['w'])) self.assertTrue(np.array_equal(self.model.eta_f, theta['eta'])) self.assertTrue(np.array_equal(thetaGT['u'], theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'], theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'], theta['w'])) self.assertTrue(np.array_equal(thetaGT['eta'], theta['eta']))
def main(): df = import_data(data_file) # dictionary = construct_json_from_df(df,json_file) dico = json_to_dict(json_file) print dico df = replace_value(df, dico) df.to_csv('output/data_cleaned.csv', sep=';', encoding='utf-8', float_format='%.12g')
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ N = 100 L = 4 C = 2 gamma = 0.5 in_folder = '../data/input/' out_folder = '../data/output/test/' end_file = '_test' adj_name = 'adj.csv' cov_name = 'X.csv' ego = 'source' alter = 'target' egoX = 'Name' attr_name = 'Metadata' rseed = 107261 N_real = 1 undirected = False force_dense = True err = 0.1 tolerance = 0.0001 decision = 10 maxit = 500 assortative = False inf = 1e10 err_max = 0.0000001 ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=adj_name, cov_name=cov_name, ego=ego, alter=alter, egoX=egoX, attr_name=attr_name) Xs = np.array(X) MTCOV = mtcov.MTCOV( N=A[0].number_of_nodes(), # number of nodes L=len(B), # number of layers C=C, # number of communities Z=X.shape[1], # number of modalities of the attribute gamma=gamma, # scaling parameter gamma undirected=undirected, # if True, the network is undirected rseed=rseed, # random seed for the initialization inf=inf, # initial value for log-likelihood and parameters err_max=err_max, # minimum value for the parameters err=err, # error for the initialization of W N_real= N_real, # number of iterations with different random initialization tolerance=tolerance, # tolerance parameter for convergence decision=decision, # convergence parameter maxit=maxit, # maximum number of EM steps before aborting folder=out_folder, # path for storing the output end_file=end_file, # output file suffix assortative=assortative # if True, the network is assortative ) # test case function to check the mtcov.set_name function def test_import_data(self): print("Start import data test\n") self.assertTrue(self.B.sum() > 0) print('B has ', self.B.sum(), ' total weight.') # test case function to check the Person.get_name function def test_running_algorithm(self): print("\nStart running algorithm test\n") _ = self.MTCOV.fit(data=self.B, data_X=self.Xs, flag_conv='log', nodes=self.nodes) theta = np.load(self.MTCOV.folder + 'theta' + self.MTCOV.end_file + '.npz') thetaGT = np.load(self.MTCOV.folder + 'theta_test_GT.npz') self.assertTrue(np.array_equal(self.MTCOV.u_f, theta['u'])) self.assertTrue(np.array_equal(self.MTCOV.v_f, theta['v'])) self.assertTrue(np.array_equal(self.MTCOV.w_f, theta['w'])) self.assertTrue(np.array_equal(self.MTCOV.beta_f, theta['beta'])) self.assertTrue(np.array_equal(thetaGT['u'], theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'], theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'], theta['w'])) self.assertTrue(np.array_equal(thetaGT['beta'], theta['beta']))
def main_cv(): p = ArgumentParser() p.add_argument('-f', '--in_folder', type=str, default='../data/input/') # path of the input network p.add_argument('-j', '--adj_name', type=str, default='adj_cv.csv') # name of the adjacency tensor p.add_argument('-c', '--cov_name', type=str, default='X_cv.csv') # name of the design matrix p.add_argument('-o', '--ego', type=str, default='source') # name of the source of the edge p.add_argument('-r', '--alter', type=str, default='target') # name of the target of the edge p.add_argument('-x', '--egoX', type=str, default='Name') # name of the column with node labels p.add_argument('-a', '--attr_name', type=str, default='Metadata') # name of the attribute to consider p.add_argument('-C', '--C', type=int, default=2) # number of communities p.add_argument('-g', '--gamma', type=float, default=0.5) # scaling hyper parameter p.add_argument('-u', '--undirected', type=bool, default=True) # flag to call the undirected network p.add_argument('-F', '--flag_conv', type=str, choices=['log', 'deltas'], default='log') # flag for convergence # p.add_argument('-d', '--force_dense', type=bool, default=False) # flag to force a dense transformation in input p.add_argument('-b', '--batch_size', type=int, default=None) # size of the batch to use to compute the likelihood p.add_argument('-v', '--cv_type', type=str, choices=['kfold', 'random'], default='kfold') # type of CV to use p.add_argument('-NF', '--NFold', type=int, default=5) # number of fold to perform cross-validation p.add_argument('-T', '--out_mask', type=int, default=False) # flag to output the masks p.add_argument('-or', '--out_results', type=bool, default=True) # flag to output the results in a csv file args = p.parse_args() # setting to run the algorithm with open('setting_MTCOV.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) conf['out_folder'] = '../data/output/5-fold_cv/' if not os.path.exists(conf['out_folder']): os.makedirs(conf['out_folder']) force_dense = True ''' Cross validation parameters ''' cv_type = args.cv_type NFold = args.NFold prng = np.random.RandomState(seed=conf['rseed']) # set seed random number generator rseed = prng.randint(1000) out_mask = args.out_mask out_results = args.out_results ''' Model parameters ''' C = args.C gamma = args.gamma ''' Set up directories ''' in_folder = args.in_folder out_folder = conf['out_folder'] dataset = args.adj_name.split('.')[0] ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=args.adj_name, cov_name=args.cov_name, ego=args.ego, alter=args.alter, egoX=args.egoX, attr_name=args.attr_name, undirected=args.undirected, force_dense=force_dense, noselfloop=True, verbose=True) Xs = np.array(X) valid_types = [np.ndarray, skt.dtensor, skt.sptensor] assert any(isinstance(B, vt) for vt in valid_types) if args.batch_size and args.batch_size > len(nodes): raise ValueError('The batch size has to be smaller than the number of nodes.') if len(nodes) > 1000: args.flag_conv = 'deltas' print('\n### CV procedure ###') comparison = [0 for _ in range(10)] comparison[0], comparison[1] = C, gamma # save the results if out_results: out_file = out_folder+dataset+'_results.csv' if not os.path.isfile(out_file): # write header with open(out_file, 'w') as outfile: wrtr = csv.writer(outfile, delimiter=',', quotechar='"') wrtr.writerow(['C', 'gamma', 'fold', 'rseed', 'logL', 'acc_train', 'auc_train', 'logL_test', 'acc_test', 'auc_test']) outfile = open(out_file, 'a') wrtr = csv.writer(outfile, delimiter=',', quotechar='"') print(f'Results will be saved in: {out_file}') time_start = time.time() L = B.shape[0] N = B.shape[1] assert N == X.shape[0] ''' Extract masks ''' if cv_type == 'kfold': idxG = cvfun.shuffle_indicesG(N, L, rseed=rseed) idxX = cvfun.shuffle_indicesX(N, rseed=rseed) else: idxG = None idxX = None print('\nC =', C, 'gamma =', gamma, '\n') for fold in range(NFold): print('FOLD ', fold) rseed += prng.randint(500) comparison[2], comparison[3] = fold, rseed maskG, maskX = cvfun.extract_masks(N, L, idxG=idxG, idxX=idxX, cv_type=cv_type, NFold=NFold, fold=fold, rseed=rseed, out_mask=out_mask) ''' Set up training dataset ''' B_train = B.copy() B_train[maskG > 0] = 0 X_train = Xs.copy() X_train[maskX > 0] = 0 conf['end_file'] = 'GT'+str(fold)+'C'+str(C)+'g'+str(gamma)+'_'+dataset ''' Run MTCOV on the training ''' tic = time.time() U, V, W, BETA, comparison[4] = cvfun.train_running_model(B_cv=B_train, X_cv=X_train, flag_conv=args.flag_conv, C=C, Z=X.shape[1], gamma=gamma, undirected=args.undirected, nodes=nodes, batch_size=args.batch_size, **conf) ''' Output performance results ''' if gamma != 0: comparison[5] = cvfun.covariates_accuracy(X, U, V, BETA, mask=np.logical_not(maskX)) comparison[8] = cvfun.covariates_accuracy(X, U, V, BETA, mask=maskX) if gamma != 1: comparison[6] = cvfun.calculate_AUC(B, U, V, W, mask=np.logical_not(maskG)) comparison[9] = cvfun.calculate_AUC(B, U, V, W, mask=maskG) comparison[7] = cvfun.loglikelihood(B, X, U, V, W, BETA, gamma, maskG=maskG, maskX=maskX) print("Time elapsed:", np.round(time.time() - tic, 2), " seconds.") if out_results: wrtr.writerow(comparison) outfile.flush() if out_results: outfile.close() print("\nTime elapsed:", np.round(time.time() - time_start, 2), " seconds.")
def sheets(username): u = models.User.query.filter_by(username=username).first() import_data(u) return redirect( 'https://docs.google.com/spreadsheets/d/1M_DSqPghGoCGUzn8dciDoXeHW0lAHoDk0i90G913A5U/edit#gid=0' )
def main(): p = ArgumentParser() p.add_argument('-a', '--algorithm', type=str, choices=['Crep', 'Crepnc', 'Crep0'], default='CRep') # configuration p.add_argument('-K', '--K', type=int, default=3) # number of communities p.add_argument('-A', '--adj', type=str, default='syn111.dat') # name of the network p.add_argument('-f', '--in_folder', type=str, default='../data/input/') # path of the input network p.add_argument( '-o', '--out_folder', type=str, default='../data/output/5-fold_cv/') # path to store outputs p.add_argument('-e', '--ego', type=str, default='source') # name of the source of the edge p.add_argument('-t', '--alter', type=str, default='target') # name of the target of the edge # p.add_argument('-d', '--force_dense', type=bool, default=True) # flag to force a dense transformation in input p.add_argument('-F', '--flag_conv', type=str, choices=['log', 'deltas'], default='log') # flag for convergence p.add_argument('-N', '--NFold', type=int, default=5) # number of fold to perform cross-validation p.add_argument('-m', '--out_mask', type=bool, default=False) # flag to output the masks p.add_argument('-r', '--out_results', type=bool, default=True) # flag to output the results in a csv file p.add_argument('-i', '--out_inference', type=bool, default=True) # flag to output the inferred parameters args = p.parse_args() prng = np.random.RandomState(seed=17) # set seed random number generator ''' Cross validation parameters and set up output directory ''' NFold = args.NFold out_mask = args.out_mask out_results = args.out_results out_folder = args.out_folder if not os.path.exists(out_folder): os.makedirs(out_folder) ''' Model parameters ''' K = args.K network = args.in_folder + args.adj # network complete path algorithm = args.algorithm # algorithm to use to generate the samples adjacency = args.adj.split('.dat')[ 0] # name of the network without extension with open('setting_' + algorithm + '.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) conf['out_folder'] = out_folder conf['out_inference'] = args.out_inference ''' Import data ''' A, B, B_T, data_T_vals = tl.import_data(network, ego=args.ego, alter=args.alter, force_dense=True, header=0) nodes = A[0].nodes() valid_types = [np.ndarray, skt.dtensor, skt.sptensor] assert any(isinstance(B, vt) for vt in valid_types) print('\n### CV procedure ###') comparison = [0 for _ in range(11)] comparison[0] = K # save the results if out_results: out_file = out_folder + adjacency + '_cv.csv' if not os.path.isfile(out_file): # write header with open(out_file, 'w') as outfile: wrtr = csv.writer(outfile, delimiter=',', quotechar='"') wrtr.writerow([ 'K', 'fold', 'rseed', 'eta', 'auc_train', 'auc_test', 'auc_cond_train', 'auc_cond_test', 'opt_func_train', 'opt_func_test', 'max_it' ]) outfile = open(out_file, 'a') wrtr = csv.writer(outfile, delimiter=',', quotechar='"') print(f'Results will be saved in: {out_file}') time_start = time.time() L = B.shape[0] N = B.shape[-1] rseed = prng.randint(1000) indices = cvfun.shuffle_indices_all_matrix(N, L, rseed=rseed) init_end_file = conf['end_file'] for fold in range(NFold): print('\nFOLD ', fold) comparison[1], comparison[2] = fold, rseed mask = cvfun.extract_mask_kfold(indices, N, fold=fold, NFold=NFold) if out_mask: outmask = out_folder + 'mask_f' + str( fold) + '_' + adjacency + '.pkl' print(f'Mask saved in: {outmask}') with open(outmask, 'wb') as f: pickle.dump(np.where(mask > 0), f) ''' Set up training dataset ''' B_train = B.copy() B_train[mask > 0] = 0 ''' Run CRep on the training ''' tic = time.time() conf['end_file'] = init_end_file + '_' + str(fold) + 'K' + str(K) u, v, w, eta, maxPSL, algo_obj = cvfun.fit_model( B_train, B_T, data_T_vals, nodes=nodes, N=N, L=L, K=K, algo=algorithm, flag_conv=args.flag_conv, **conf) ''' Output performance results ''' comparison[3] = eta M = cvfun.calculate_expectation(u, v, w, eta=eta) comparison[4] = cvfun.calculate_AUC(M, B, mask=np.logical_not(mask)) comparison[5] = cvfun.calculate_AUC(M, B, mask=mask) M_cond = cvfun.calculate_conditional_expectation(B, u, v, w, eta=eta) comparison[6] = cvfun.calculate_AUC(M_cond, B, mask=np.logical_not(mask)) comparison[7] = cvfun.calculate_AUC(M_cond, B, mask=mask) comparison[9] = cvfun.calculate_opt_func( B, algo_obj, mask=mask, assortative=conf['assortative']) comparison[8] = maxPSL comparison[10] = algo_obj.final_it print(f'Time elapsed: {np.round(time.time() - tic, 2)} seconds.') if out_results: wrtr.writerow(comparison) outfile.flush() if out_results: outfile.close() print(f'\nTime elapsed: {np.round(time.time() - time_start, 2)} seconds.')
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ N = 100 L = 2 C = 2 gamma = 0.5 in_folder = '../data/input/' out_folder = '../data/output/test/' end_file = '_test' adj_name = 'adj_cv.csv' cov_name = 'X_cv.csv' ego = 'source' alter = 'target' egoX = 'Name' attr_name = 'Metadata' rseed = 107261 N_real = 1 undirected = False force_dense = True flag_conv = 'log' err = 0.1 tolerance = 0.0001 decision = 10 maxit = 500 assortative = False inf = 1e10 err_max = 0.0000001 cv_type = 'kfold' NFold = 5 ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=adj_name, cov_name=cov_name, ego=ego, alter=alter, egoX=egoX, attr_name=attr_name) Xs = np.array(X) def test_running_algorithm(self): print("\nStart running algorithm test\n") L = self.B.shape[0] N = self.B.shape[1] assert N == self.X.shape[0] if self.cv_type == 'kfold': idxG = cvfun.shuffle_indicesG(N, L, rseed=self.rseed) idxX = cvfun.shuffle_indicesX(N, rseed=self.rseed) else: idxG = None idxX = None for fold in range(self.NFold): ind = self.rseed + fold maskG, maskX = cvfun.extract_masks(self.A[0].number_of_nodes(), len(self.B), idxG=idxG, idxX=idxX, cv_type=self.cv_type, NFold=self.NFold, fold=fold, rseed=ind) ''' Set up training dataset ''' B_train = self.B.copy() print(B_train.shape, maskG.shape) B_train[maskG > 0] = 0 X_train = self.Xs.copy() X_train[maskX > 0] = 0 U, V, W, BETA, logL = cvfun.train_running_model( B_train, X_train, self.flag_conv, N=self.A[0].number_of_nodes(), L=len(self.B), C=self.C, Z=self.X.shape[1], gamma=self.gamma, undirected=self.undirected, cv=True, rseed=self.rseed, inf=self.inf, err_max=self.err_max, err=self.err, N_real=self.N_real, tolerance=self.tolerance, decision=self.decision, maxit=self.maxit, folder=self.out_folder, end_file=self.end_file, assortative=self.assortative) ''' Output parameters ''' outinference = self.out_folder + 'theta_cv' + str( fold) + 'C' + str(self.C) + 'g' + str(self.gamma) np.savez_compressed(outinference + '.npz', u=U, v=V, w=W, beta=BETA, fold=fold) # To load: theta = np.load('test.npz'), e.g. print(np.array_equal(U, theta['u'])) ''' Load parameters ''' theta = np.load(outinference + '.npz') thetaGT = np.load(self.out_folder + 'thetaGT' + str(fold) + 'C2g0.5_adj_cv.npz') self.assertTrue(np.array_equal(U, theta['u'])) self.assertTrue(np.array_equal(V, theta['v'])) self.assertTrue(np.array_equal(W, theta['w'])) self.assertTrue(np.array_equal(BETA, theta['beta'])) self.assertTrue(np.array_equal(thetaGT['u'], theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'], theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'], theta['w'])) self.assertTrue(np.array_equal(thetaGT['beta'], theta['beta']))
def import_data(): import_data()
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ C = 2 gamma = 0.5 in_folder = '../data/input/' adj_name = 'adj_cv.csv' cov_name = 'X_cv.csv' ego = 'source' alter = 'target' egoX = 'Name' attr_name = 'Metadata' undirected = True flag_conv = 'log' batch_size = None cv_type = 'kfold' NFold = 5 out_mask = False with open('setting_MTCOV.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) prng = np.random.RandomState(seed=conf['rseed']) rseed = prng.randint(1000) dataset = adj_name.split('.')[0] ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=adj_name, cov_name=cov_name, ego=ego, alter=alter, egoX=egoX, attr_name=attr_name, undirected=undirected, force_dense=True) Xs = np.array(X) def test_running_algorithm(self): print("\nStart running algorithm test\n") L = self.B.shape[0] N = self.B.shape[1] assert N == self.X.shape[0] ''' Extract masks ''' if self.cv_type == 'kfold': idxG = cvfun.shuffle_indicesG(N, L, rseed=self.rseed) idxX = cvfun.shuffle_indicesX(N, rseed=self.rseed) else: idxG = None idxX = None for fold in range(self.NFold): self.rseed += self.prng.randint(500) maskG, maskX = cvfun.extract_masks(N, L, idxG=idxG, idxX=idxX, cv_type=self.cv_type, NFold=self.NFold, fold=fold, rseed=self.rseed, out_mask=self.out_mask) ''' Set up training dataset ''' B_train = self.B.copy() print(B_train.shape, maskG.shape) B_train[maskG > 0] = 0 X_train = self.Xs.copy() X_train[maskX > 0] = 0 self.conf['end_file'] = 'CV' + str(fold) + 'C' + str(self.C) + 'g' + str(self.gamma) + '_' + self.dataset U, V, W, BETA, logL = cvfun.train_running_model(B_cv=B_train, X_cv=X_train, flag_conv=self.flag_conv, C=self.C, Z=self.X.shape[1], gamma=self.gamma, undirected=self.undirected, nodes=self.nodes, batch_size=self.batch_size, **self.conf) ''' Load parameters ''' outinference = self.conf['out_folder'] + 'thetaCV' + str(fold) + 'C' + str(self.C) + \ 'g' + str(self.gamma) + '_' + self.dataset theta = np.load(outinference+'.npz') thetaGT = np.load('../data/output/5-fold_cv/thetaGT'+str(fold)+'C' + str(self.C) + \ 'g' + str(self.gamma) + '_' + self.dataset + '.npz') self.assertTrue(np.array_equal(U,theta['u'])) self.assertTrue(np.array_equal(V,theta['v'])) self.assertTrue(np.array_equal(W,theta['w'])) self.assertTrue(np.array_equal(BETA,theta['beta'])) self.assertTrue(np.array_equal(thetaGT['u'],theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'],theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'],theta['w'])) self.assertTrue(np.array_equal(thetaGT['beta'],theta['beta']))
def main_cv(): inf = 1e10 err_max = 0.0000001 p = ArgumentParser() p.add_argument('-j', '--adj_name', type=str, default='adj_cv.csv') p.add_argument('-c', '--cov_name', type=str, default='X_cv.csv') p.add_argument('-o', '--ego', type=str, default='source') p.add_argument('-r', '--alter', type=str, default='target') p.add_argument('-x', '--egoX', type=str, default='Name') p.add_argument('-a', '--attr_name', type=str, default='Metadata') p.add_argument('-C', '--C', type=int, default=2) p.add_argument('-g', '--gamma', type=float, default=0.5) p.add_argument('-u', '--undirected', type=bool, default=False) p.add_argument('-d', '--force_dense', type=bool, default=True) p.add_argument('-F', '--flag_conv', type=str, choices=['log', 'deltas'], default='log') p.add_argument('-z', '--rseed', type=int, default=107261) p.add_argument('-e', '--err', type=float, default=0.1) p.add_argument('-i', '--N_real', type=int, default=1) p.add_argument('-t', '--tolerance', type=float, default=0.0001) p.add_argument('-y', '--decision', type=int, default=10) p.add_argument('-m', '--maxit', type=int, default=500) p.add_argument('-E', '--end_file', type=str, default='_results.csv') p.add_argument('-I', '--in_folder', type=str, default='../data/input/') p.add_argument('-O', '--out_folder', type=str, default='../data/output/5-fold_cv/') p.add_argument('-A', '--assortative', type=bool, default=False) p.add_argument('-v', '--cv_type', type=str, choices=['kfold', 'random'], default='kfold') p.add_argument('-NF', '--NFold', type=int, default=5) p.add_argument('-T', '--out_mask', type=int, default=False) p.add_argument('-W', '--out_inference', type=int, default=False) args = p.parse_args() ''' Cross validation parameters ''' cv_type = args.cv_type NFold = args.NFold rseed = args.rseed out_mask = args.out_mask out_inference = args.out_inference end_file = args.end_file ''' Model parameters ''' C = args.C gamma = args.gamma dataset = args.adj_name.split('.')[0] ''' Set up output directory ''' in_folder = args.in_folder out_folder = args.out_folder if not os.path.exists(out_folder): os.makedirs(out_folder) ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=args.adj_name, cov_name=args.cov_name, ego=args.ego, alter=args.alter, egoX=args.egoX, attr_name=args.attr_name, undirected=args.undirected, force_dense=args.force_dense) Xs = np.array(X) valid_types = [np.ndarray, skt.dtensor, skt.sptensor] assert any(isinstance(B, vt) for vt in valid_types) print('\n### CV procedure ###') comparison = [0 for _ in range(10)] comparison[0], comparison[1] = C, gamma out_file = out_folder + dataset + end_file if not os.path.isfile(out_file): # write header with open(out_file, 'w') as outfile: wrtr = csv.writer(outfile, delimiter=',', quotechar='"') wrtr.writerow([ 'C', 'gamma', 'fold', 'rseed', 'logL', 'acc_train', 'auc_train', 'logL_test', 'acc_test', 'auc_test' ]) time_start = time.time() L = B.shape[0] N = B.shape[1] assert N == X.shape[0] if cv_type == 'kfold': idxG = cvfun.shuffle_indicesG(N, L, rseed=rseed) idxX = cvfun.shuffle_indicesX(N, rseed=rseed) else: idxG = None idxX = None with open(out_file, 'a') as outfile: wrtr = csv.writer(outfile, delimiter=',', quotechar='"') print('Results will be saved in:', out_file) print('\nC =', C, 'gamma =', gamma, '\n') for fold in range(NFold): print('FOLD ', fold) ind = rseed + fold # set the random seed comparison[2], comparison[3] = fold, ind maskG, maskX = cvfun.extract_masks(N, L, idxG=idxG, idxX=idxX, cv_type=cv_type, NFold=NFold, fold=fold, rseed=ind, out_mask=out_mask) ''' Set up training dataset ''' B_train = B.copy() B_train[maskG > 0] = 0 X_train = Xs.copy() X_train[maskX > 0] = 0 ''' Run MTCOV on the training ''' tic = time.time() U, V, W, BETA, comparison[4] = cvfun.train_running_model( B_train, X_train, args.flag_conv, N=A[0].number_of_nodes(), # number of nodes L=len(B), # number of layers C=args.C, # number of communities Z=X.shape[1], # number of modalities of the attribute gamma=args.gamma, # scaling parameter gamma undirected=args. undirected, # if True, the network is undirected cv=True, rseed=args.rseed, # random seed for the initialization inf=inf, # initial value for log-likelihood and parameters err_max=err_max, # minimum value for the parameters err=args.err, # error for the initialization of W N_real=args. N_real, # number of iterations with different random initialization tolerance=args. tolerance, # tolerance parameter for convergence decision=args.decision, # convergence parameter maxit=args.maxit, # maximum number of EM steps before aborting folder=out_folder, # path for storing the output end_file='GT' + str(fold) + 'C' + str(args.C) + 'g' + str(args.gamma), # output file suffix assortative=args. assortative # if True, the network is assortative ) ''' Output parameters ''' if out_inference: outinference = '../data/output/test/thetaGT' + str( fold) + 'C' + str(C) + 'g' + str(gamma) + '_' + dataset np.savez_compressed(outinference + '.npz', u=U, v=V, w=W, beta=BETA) # To load: theta = np.load('test.npz'), e.g. print(np.array_equal(U, theta['u'])) print('Parameters saved in: ', outinference + '.npz') ''' Output performance results ''' if gamma != 0: comparison[5] = cvfun.covariates_accuracy( X, U, V, BETA, mask=np.logical_not(maskX)) comparison[8] = cvfun.covariates_accuracy(X, U, V, BETA, mask=maskX) if gamma != 1: comparison[6] = cvfun.calculate_AUC(B, U, V, W, mask=np.logical_not(maskG)) comparison[9] = cvfun.calculate_AUC(B, U, V, W, mask=maskG) comparison[7] = cvfun.loglikelihood(B, X, U, V, W, BETA, gamma, maskG=maskG, maskX=maskX) print("Time elapsed:", np.round(time.time() - tic, 2), " seconds.") wrtr.writerow(comparison) outfile.flush() print("\nTime elapsed:", np.round(time.time() - time_start, 2), " seconds.")
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ C = 2 gamma = 0.5 in_folder = '../data/input/' out_folder = '../data/output/test/' end_file = '_test' adj_name = 'adj.csv' cov_name = 'X.csv' ego = 'source' alter = 'target' egoX = 'Name' attr_name = 'Metadata' undirected = False flag_conv = 'log' force_dense = False batch_size = None with open('setting_MTCOV.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) ''' Import data ''' A, B, X, nodes = tl.import_data(in_folder, adj_name=adj_name, cov_name=cov_name, ego=ego, alter=alter, egoX=egoX, attr_name=attr_name, undirected=undirected, force_dense=force_dense) Xs = np.array(X) MTCOV = mtcov.MTCOV(N=A[0].number_of_nodes(), L=len(A), C=C, Z=X.shape[1], gamma=gamma, undirected=undirected, **conf) def test_import_data(self): print("Start import data test\n") if self.force_dense: self.assertTrue(self.B.sum() > 0) print('B has ', self.B.sum(), ' total weight.') else: self.assertTrue(self.B.vals.sum() > 0) print('B has ', self.B.vals.sum(), ' total weight.') def test_running_algorithm(self): print("\nStart running algorithm test\n") _ = self.MTCOV.fit(data=self.B, data_X=self.Xs, flag_conv=self.flag_conv, nodes=self.nodes, batch_size=self.batch_size) theta = np.load(self.MTCOV.out_folder + 'theta' + self.MTCOV.end_file + '.npz') thetaGT = np.load(self.MTCOV.out_folder + 'theta_test_GT.npz') self.assertTrue(np.array_equal(self.MTCOV.u_f, theta['u'])) self.assertTrue(np.array_equal(self.MTCOV.v_f, theta['v'])) self.assertTrue(np.array_equal(self.MTCOV.w_f, theta['w'])) self.assertTrue(np.array_equal(self.MTCOV.beta_f, theta['beta'])) self.assertTrue(np.array_equal(thetaGT['u'], theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'], theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'], theta['w'])) self.assertTrue(np.array_equal(thetaGT['beta'], theta['beta']))
def main(): p = ArgumentParser() p.add_argument('-a', '--algorithm', type=str, choices=['Crep', 'Crepnc', 'Crep0'], default='CRep') # configuration p.add_argument('-K', '--K', type=int, default=3) # number of communities p.add_argument('-A', '--adj', type=str, default='syn111.dat') # name of the network p.add_argument('-f', '--in_folder', type=str, default='../data/input/') # path of the input network p.add_argument('-e', '--ego', type=str, default='source') # name of the source of the edge p.add_argument('-t', '--alter', type=str, default='target') # name of the target of the edge p.add_argument( '-d', '--force_dense', type=bool, default=False) # flag to force a dense transformation in input p.add_argument('-F', '--flag_conv', type=str, choices=['log', 'deltas'], default='log') # flag for convergence args = p.parse_args() # setting to run the algorithm with open('setting_' + args.algorithm + '.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) if not os.path.exists(conf['out_folder']): os.makedirs(conf['out_folder']) with open(conf['out_folder'] + '/setting_' + args.algorithm + '.yaml', 'w') as f: yaml.dump(conf, f) ''' Import data ''' network = args.in_folder + args.adj # network complete path A, B, B_T, data_T_vals = tl.import_data(network, ego=args.ego, alter=args.alter, force_dense=args.force_dense, header=0) nodes = A[0].nodes() valid_types = [np.ndarray, skt.dtensor, skt.sptensor] assert any(isinstance(B, vt) for vt in valid_types) ''' Run CRep ''' print(f'\n### Run {args.algorithm} ###') time_start = time.time() model = CREP.CRep(N=A[0].number_of_nodes(), L=len(A), K=args.K, **conf) _ = model.fit(data=B, data_T=B_T, data_T_vals=data_T_vals, flag_conv=args.flag_conv, nodes=nodes) print(f'\nTime elapsed: {np.round(time.time() - time_start, 2)} seconds.')
import tools import sys import pandas as pd path = sys.argv[1] if len(sys.argv) > 1 else '2016.xlsx' path2 = sys.argv[2] if len(sys.argv) > 2 else 'divisions.xlsx' # true: find the minimum total no of seshs for all faculties # false: find the most even schedule for all tALLfEVEN = False # initializations # st = students DataFrame # ID, SID, Mj, Mn # ts = teachers DataFrame # ID, SID, DP, 1Y, 2Y, UB st, ts = tools.import_data() modfile = open('ampl/mock.mod', 'w') datfile = open('ampl/mock.dat', 'w') dept_list = tools.get_depts(path) div_dept, div_prof = tools.get_div(path, path2) # cache s_count = len(st) # number of students t_count = len(ts) # number of teachers d_count = 3 # number of days i_count = 7 # number of sesh/day depts_c = len(dept_list) # number of depts maxpday = 4 # max no of sesh/day if tALLfEVEN: maxpall = 12 # max no of sesh/all
class Test(unittest.TestCase): """ The basic class that inherits unittest.TestCase """ algorithm = 'CRep' K = 3 in_folder = '../data/input/' out_folder = '../data/output/5-fold_cv/' end_file = '_test' adj = 'syn111.dat' ego = 'source' alter = 'target' # force_dense = True flag_conv = 'log' NFold = 5 out_mask = False out_results = True out_inference = True prng = np.random.RandomState(seed=17) # set seed random number generator rseed = prng.randint(1000) ''' Setting to run the algorithm ''' with open('setting_' + algorithm + '.yaml') as f: conf = yaml.load(f, Loader=yaml.FullLoader) conf['out_folder'] = out_folder ''' Import data ''' network = in_folder + adj # network complete path A, B, B_T, data_T_vals = tl.import_data(network, ego=ego, alter=alter, force_dense=True, header=0) nodes = A[0].nodes() def test_running_algorithm(self): print("\nStart running algorithm test\n") L = self.B.shape[0] N = self.B.shape[1] indices = cvfun.shuffle_indices_all_matrix(N, L, rseed=self.rseed) for fold in range(self.NFold): mask = cvfun.extract_mask_kfold(indices, N, fold=fold, NFold=self.NFold) ''' Set up training dataset ''' B_train = self.B.copy() print(B_train.shape, mask.shape) B_train[mask > 0] = 0 self.conf['end_file'] = '_' + str(fold) + 'K' + str( self.K) + self.end_file u, v, w, eta, maxPSL, algo_obj = cvfun.fit_model( B_train, self.B_T, self.data_T_vals, nodes=self.nodes, N=N, L=L, K=self.K, algo=self.algorithm, flag_conv=self.flag_conv, **self.conf) ''' Load parameters ''' theta = np.load(self.out_folder + 'theta_' + str(fold) + 'K' + str(self.K) + self.end_file + '.npz') thetaGT = np.load(self.out_folder + 'theta_' + str(self.algorithm) + '_' + str(fold) + 'K' + str(self.K) + '.npz') self.assertTrue(np.array_equal(u, theta['u'])) self.assertTrue(np.array_equal(v, theta['v'])) self.assertTrue(np.array_equal(w, theta['w'])) self.assertTrue(np.array_equal(algo_obj.eta_f, theta['eta'])) self.assertTrue(np.array_equal(thetaGT['u'], theta['u'])) self.assertTrue(np.array_equal(thetaGT['v'], theta['v'])) self.assertTrue(np.array_equal(thetaGT['w'], theta['w'])) self.assertTrue(np.array_equal(thetaGT['eta'], theta['eta']))