def __init__(self): # +++ Initialize time stepping +++ # self.nsteps = nl.nsteps self.dt = nl.dt self.nforward = nl.nforward # forward step every # steps # self.start_time = datetime(2020, 10, 9, 0) self.start_time = datetime.now() # +++ Initialize model grid +++ # self.latd = np.linspace(90., -90., 64) # dummy arguments self.lond = np.linspace(0., 360., 128) # dummy arguments self.latr = np.deg2rad(self.latd) self.lonr = np.deg2rad(self.lond) self.ny = len(self.latd) self.nx = len(self.lond) # +++ Initialize spectral routines +++ # self.s = spectral(self.latd, self.lond) # +++ Initialize model fields +++ # self.vortp_tend = np.zeros((self.ny, self.nx)) self.vortp = np.zeros((self.ny, self.nx, 3)) # +++ Initialize forcing +++ # self.f = forcing(self.latr, self.lonr) self.topo = self.f.topography_simple() self.dxtopo, self.dytopo = self.s.gradient(self.topo) # pp = plot_tools() # pp.quick_plot(self.latd, self.lond, self.topo) # pp.quick_plot(self.latd, self.lond, self.dxtopo) # +++ Model diagnostics +++ # # netCDF output self.output_freq = nl.output_freq self.output_dir = nl.output_dir # Create directory if not existing if not os.path.isdir(self.output_dir): os.mkdir(self.output_dir) # Plot figures self.plot_freq = nl.plot_freq self.plot_dir = nl.plot_dir # Create directory if not existing if not os.path.isdir(self.plot_dir): os.mkdir(self.plot_dir)
lat = lat[::-1] u = u[::-1, :] v = v[::-1, :] ### pp = plot_tools(central_longitude=270.) # pp = plot_tools(central_longitude=180.) # Stat by plotting u # pp.quick_plot(lat, lon, u, add_cyclic=True) ### # Initialize spectral spec = spectral(lat, lon) ## # Caluclate horizontal gradients dudx, dudy = spec.gradient(u) # pp.quick_plot(lat, lon, dudy, add_cyclic=True) ## # Calculate planetart and relative vorticity vrt = spec.uv2vrt(u, v) f = spec.planetaryvorticity() # pp.quick_plot(lat, lon, f+vrt, add_cyclic=True)
metavar="FILE", required=True, type=lambda x: file_exists(parser, x)) parser.add_argument("-npmi", dest="npmi", help="File containing NPMI of words", metavar="FILE", required=True, type=lambda x: file_exists(parser, x)) parser.add_argument("-dict", dest="vdict", help="File containing the words", metavar="FILE", required=True, type=lambda x: file_exists(parser, x)) parser.add_argument("-k", dest="k", help="K for the KNN", required=False, default=30, type=int) parser.add_argument("-c", dest="c", help="Number of clusters in spectral clustering", required=False, default=30, type=int) options = parser.parse_args() spectral(options.tweets, options.npmi, options.vdict, options.k, options.c)
for line in mFr: a = line.split(',') b = [] for item in a: b.append(float(item)) categoryM.append(b[-1]) mDataSet.append(b) mFr.close() calculate.calculate(kMeans.kMeans(gDataSet, 2), categoryG, 2) calculate.calculate(kMeans.kMeans(mDataSet, 10), categoryM, 10) calculate.calculate(nmf.NMF(gDataSet, 2), categoryG, 2) calculate.calculate(nmf.NMF(mDataSet, 10), categoryG, 10) calculate.calculate(spectral.spectral(gDataSet, 2, 3), categoryG, 2) calculate.calculate(spectral.spectral(gDataSet, 2, 6), categoryG, 2) calculate.calculate(spectral.spectral(gDataSet, 2, 9), categoryG, 2) calculate.calculate(spectral.spectral(mDataSet, 10, 3), categoryM, 10) calculate.calculate(spectral.spectral(mDataSet, 10, 6), categoryM, 10) calculate.calculate(spectral.spectral(mDataSet, 10, 9), categoryM, 10)
parser = argparse.ArgumentParser(description='Turn a file into a matrix') def file_exists(parser, arg): if not os.path.exists(arg): parser.error("The file %s does not exist!"%arg) else: return arg parser.add_argument( "-tweets", dest="tweets",help="File containing tweets in JSON, one per line", metavar="FILE", required=True, type=lambda x: file_exists(parser,x) ) parser.add_argument( "-npmi", dest="npmi",help="File containing NPMI of words", metavar="FILE", required=True, type=lambda x: file_exists(parser,x) ) parser.add_argument( "-dict", dest="vdict",help="File containing the words", metavar="FILE", required=True, type=lambda x: file_exists(parser,x) ) parser.add_argument( "-k", dest="k",help="K for the KNN", required=False, default=30, type=int ) parser.add_argument( "-c", dest="c",help="Number of clusters in spectral clustering", required=False, default=30, type=int ) options = parser.parse_args() spectral(options.tweets,options.npmi,options.vdict,options.k,options.c)
def fit(self, X): ''' Copulafit using Gaussian copula with marginals evaluated by Gaussian KDE Precision matrix is evaluated using specified method, default to graphical LASSO :param X: input dataset :return: estimated precision matrix rho ''' N, d = X.shape if self.scaler is not None: X_scale = self.scaler.fit_transform(X) else: X_scale = X if len(self.vertexes) == 0: self.vertexes = [str(id) for id in range(d)] self.theta = 1.0 / N cum_marginals = np.zeros_like(X) inv_norm_cdf = np.zeros_like(X) # inv_norm_cdf_scaled = np.zeros_like(X) self.kernels = list([]) # TODO: complexity O(Nd) is high if self.verbose: colored('>> Computing marginals', color='blue') for j in range(cum_marginals.shape[1]): self.kernels.append(gaussian_kde(X_scale[:, j])) cum_pdf_overall = self.kernels[-1].integrate_box_1d( X_scale[:, j].min(), X_scale[:, j].max()) for i in range(cum_marginals.shape[0]): cum_marginals[i, j] = self.kernels[-1].integrate_box_1d( X_scale[:, j].min(), X_scale[i, j]) / cum_pdf_overall # truncate cumulative marginals if cum_marginals[i, j] < self.theta: cum_marginals[i, j] = self.theta elif cum_marginals[i, j] > 1 - self.theta: cum_marginals[i, j] = 1 - self.theta # inverse of normal CDF: \Phi(F_j(x))^{-1} inv_norm_cdf[i, j] = norm.ppf(cum_marginals[i, j]) # scaled to preserve mean and variance: u_j + \sigma_j*\Phi(F_j(x))^{-1} # inv_norm_cdf_scaled[i, j] = X_scale[:, j].mean() + X_scale[:, j].std() * inv_norm_cdf[i, j] if self.method == 'mle': # maximum-likelihood estiamtor empirical_cov = EmpiricalCovariance() empirical_cov.fit(inv_norm_cdf) if self.verbose: print colored('>> Running MLE to estiamte precision matrix', color='blue') self.est_cov = empirical_cov.covariance_ self.corr = scale_matrix(self.est_cov) self.precision_ = inv(empirical_cov.covariance_) if self.method == 'glasso': if self.verbose: print colored('>> Running glasso to estiamte precision matrix', color='blue') empirical_cov = EmpiricalCovariance() empirical_cov.fit(inv_norm_cdf) # shrunk convariance to avoid numerical instability shrunk_cov = shrunk_covariance(empirical_cov.covariance_, shrinkage=0.8) self.est_cov, self.precision_ = graph_lasso(emp_cov=shrunk_cov, alpha=self.penalty, verbose=self.verbose, max_iter=self.max_iter) self.corr = scale_matrix(self.est_cov) if self.method == 'ledoit_wolf': if self.verbose: print colored( '>> Running ledoit_wolf to estiamte precision matrix', color='blue') self.est_cov, _ = ledoit_wolf(inv_norm_cdf) self.corr = scale_matrix(self.est_cov) self.precision_ = linalg.inv(self.est_cov) if self.method == 'spectral': '''L2 mehtod, use paper Inverse covariance estimation for high dimension data in linear time and space :formular: in paper eq(8) ''' if self.verbose: print colored( '>> Running Riccati to estiamte precision matrix', color='blue') # TODO: note estimated cov is sample cov self.est_cov, self.precision_ = spectral(inv_norm_cdf, rho=2 * self.penalty, assume_centered=False) self.corr = scale_matrix(self.est_cov) if self.method == 'pc': clf = pgmlearner.PGMLearner() data_list = list([]) for row_id in range(X_scale.shape[0]): instance = dict() for i, n in enumerate(self.vertexes): instance[n] = X_scale[row_id, i] data_list.append(instance) graph = clf.lg_constraint_estimatestruct(data=data_list, pvalparam=self.pval, bins=self.bins) dag = np.zeros(shape=(len(graph.V), len(graph.V))) for e in graph.E: dag[self.vertexes.index(e[0]), self.vertexes.index(e[1])] = 1 self.conditional_independences_ = dag if self.method == 'ic': df = dict() variable_types = dict() for j in range(X_scale.shape[1]): df[self.vertexes[j]] = X_scale[:, j] variable_types[self.vertexes[j]] = 'c' data = pd.DataFrame(df) # run the search ic_algorithm = IC(RobustRegressionTest, data, variable_types, alpha=self.pval) graph = ic_algorithm.search() dag = np.zeros(shape=(X_scale.shape[1], X_scale.shape[1])) for e in graph.edges(data=True): i = self.vertexes.index(e[0]) j = self.vertexes.index(e[1]) dag[i, j] = 1 dag[j, i] = 1 arrows = set(e[2]['arrows']) head_len = len(arrows) if head_len > 0: head = arrows.pop() if head_len == 1 and head == e[0]: dag[i, j] = 0 if head_len == 1 and head == e[1]: dag[j, i] = 0 self.conditional_independences_ = dag # finally we fit the structure self.fit_structure(self.precision_)
warnings.filterwarnings("ignore") # # Spectral Exp 1 # In[174]: data = sio.loadmat('data/cluster_data.mat') X = data['X'] k_in_knn_graph = 200 threshold = 0.5 plt.figure() plt.suptitle("Spectral") W = knn_graph(X, k_in_knn_graph, threshold) idx = spectral(W, 2) cluster_plot(X, idx) plt.figure() plt.suptitle("Kmeans") idx = KMeans(2).fit(X).labels_ cluster_plot(X, idx) # # Spectral exp 2 # In[92]: data = sio.loadmat('data/TDT2_data.mat') # X = data['X'] fea = data['fea'] gnd = data['gnd'].flatten()
def __init__(self): #height of atmosphere 10k meters #++initialize plot tools self.pp=plot_tools() # +++ Initialize time stepping +++ # self.nsteps = nl.nsteps self.dt = nl.dt self.nforward=nl.forward #forward step every # steps # self.start_time = datetime(2020, 10, 9, 0) self.start_time = datetime.now() # +++ Initialize model grid +++ # self.latd=xarray_IO(nl.dfile_um).get_values('lat')[::-1] #have to flip to start at north pole and go down but reads in at SP and goes up self.lond=xarray_IO(nl.dfile_um).get_values('lon') self.latr=np.deg2rad(self.latd) # degree to radians self.lonr=np.deg2rad(self.lond) self.ny=len(self.latd) # getting the number of steps self.nx=len(self.lond) # +++ Initialize spectral routines +++ # self.s=spectral(self.latd,self.lond) # +++ Initialize model fields +++ # self.vortp_tend = np.zeros((self.ny,self.nx)) self.vort_tend = np.zeros((self.ny,self.nx)) self.vortp_div = np.zeros((self.ny,self.nx)) # 2 dimensions self.vortp = np.zeros((self.ny,self.nx,3)) # 3 dimensions self.vort = np.zeros((self.ny,self.nx,3)) #initialize v,u prime and f that you need to solve vorticity prime #f is coriolis parameter self.vp = np.zeros((self.ny,self.nx)) self.up = np.zeros((self.ny,self.nx)) self.f = self.s.planetaryvorticity() _,self.dyf=self.s.gradient(self.f) #underscore is a way to disregard the x direction derivative of f, which is always zero self.um=xarray_IO(nl.dfile_um).get_values('u')[::-1,:] #read in zonal mean winds,flipped using [] part self.vm=np.zeros((self.ny,self.nx)) # +++ Initialize forcing +++ # self.forcing=forcing(self.latr,self.lonr) if nl.topo_case =='real': ##### If else statement to toggle between topo cases, defined in namelist self.topo=self.forcing.topography_real()[::-1,:] else: self.topo=self.forcing.topography_simple() self.dxtopo,self.dytopo=self.s.gradient(self.topo) # #calc dh/dx in forcing # +++ Model diagnostics +++ # # netCDF output self.output_freq = nl.output_freq self.output_dir = nl.output_dir # Create directory if not existing if not os.path.isdir(self.output_dir): os.mkdir(self.output_dir) # Plot figures self.plot_freq = nl.plot_freq self.plot_dir = nl.plot_dir # Create directory if not existing if not os.path.isdir(self.plot_dir): os.mkdir(self.plot_dir)
def main(): statistics = open(MAINPATH + "/" + "statistics_ppmi.txt", "w") M, labels, label_names, relations, nounDict = pp.get_M_fromDB() #Choose a method to build your similarity matrix #Term Frequency-Inverse Document Frequency M_ppmi = sim.get_tf_idf_M(M, "raw", "c", norm_samps=True) similarity = "tfidf" #Jensen Shanon Divergence #M_ppmi = sim.JensenShanon(M) #similarity = "jsd" #Positive Pointwise Mutual Information #M_ppmi = sim.raw2ppmi(M) #similarity = "ppmi" #Change this value according to expected number of clusters required #We tested with 50, 100, 200, 300 based on our dataset k = 300 print("Length features and labels:", len(M_ppmi), len(labels)) c = spectral.spectral(M_ppmi, labels, sim.cos_s, dist.euclidean) #c = spectral.spectral(X, Y, sim.gauss_s, dist.euclidean) #Fully connceted c.full_graph("cosine") print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) n = M.shape[0] '''cosine and knn mutual / gauss mutual''' number = int(2 * (n / np.log(n))) '''gaus non-mutual''' #number = int((n/np.log(n))) #K nearest neighbors c.kNN_graph(number, "euclidean", False) print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) #Epsilon T = mst(c.W) A = T.toarray().astype(float) eps = np.min(A[np.nonzero(A)]) print("eps", eps) c.eps_graph(eps) print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) statistics.close()
def gauss_s(x1, x2, d): sigma = 1 return np.exp(-(d(x1, x2)) / (2 * sigma**2)) def cos_s(x1, x2, d): return -(dist.cosine(x1, x2) - 1) #similarities = [] #for x1 in X: # for x2 in X: # similarities.append(gauss_s(x1,x2)) #create a clustering object c = spectral.spectral(X, Y, gauss_s, dist.euclidean) #c.kNN_graph(30, "cosine", True) #c.kNN_graph(10, "euclidean", False) #c.eps_graph(0.4) #c.full_graph() #c.show_sim_g() #c.norm_rw_sc(3) #c.norm_sym_sc(3) #c.show_clust() #c.show_correct_class() #c.evaluate() for algo in [c.norm_sym_sc, c.norm_rw_sc, c.unnorm_sc]: c.kNN_graph(30, "euclidean", True) algo(3) print(c.graph)
#define a similarity function def gauss_s(x1, x2, d): sigma = 1 return np.exp(-(d(x1, x2))/(2*sigma**2)) def cos_s(x1, x2, d): return -(dist.cosine(x1, x2) - 1) #similarities = [] #for x1 in X: # for x2 in X: # similarities.append(gauss_s(x1,x2)) #create a clustering object c = spectral.spectral(X, Y, gauss_s, dist.euclidean) #c.kNN_graph(30, "cosine", True) #c.kNN_graph(10, "euclidean", False) #c.eps_graph(0.4) #c.full_graph() #c.show_sim_g() #c.norm_rw_sc(3) #c.norm_sym_sc(3) #c.show_clust() #c.show_correct_class() #c.evaluate() for algo in [c.norm_sym_sc, c.norm_rw_sc, c.unnorm_sc]: c.kNN_graph(30, "euclidean", True) algo(3) print(c.graph)