コード例 #1
0
    def __init__(self):

        # +++ Initialize time stepping +++ #
        self.nsteps = nl.nsteps
        self.dt = nl.dt

        self.nforward = nl.nforward  # forward step every # steps

        #        self.start_time = datetime(2020, 10, 9, 0)
        self.start_time = datetime.now()

        # +++ Initialize model grid +++ #
        self.latd = np.linspace(90., -90., 64)  # dummy arguments
        self.lond = np.linspace(0., 360., 128)  # dummy arguments

        self.latr = np.deg2rad(self.latd)
        self.lonr = np.deg2rad(self.lond)

        self.ny = len(self.latd)
        self.nx = len(self.lond)

        # +++ Initialize spectral routines +++ #
        self.s = spectral(self.latd, self.lond)

        # +++ Initialize model fields +++ #
        self.vortp_tend = np.zeros((self.ny, self.nx))

        self.vortp = np.zeros((self.ny, self.nx, 3))

        # +++ Initialize forcing +++ #
        self.f = forcing(self.latr, self.lonr)
        self.topo = self.f.topography_simple()
        self.dxtopo, self.dytopo = self.s.gradient(self.topo)

        #        pp = plot_tools()
        #        pp.quick_plot(self.latd, self.lond, self.topo)
        #        pp.quick_plot(self.latd, self.lond, self.dxtopo)

        # +++ Model diagnostics +++ #
        # netCDF output
        self.output_freq = nl.output_freq
        self.output_dir = nl.output_dir
        # Create directory if not existing
        if not os.path.isdir(self.output_dir):
            os.mkdir(self.output_dir)

        # Plot figures
        self.plot_freq = nl.plot_freq
        self.plot_dir = nl.plot_dir
        # Create directory if not existing
        if not os.path.isdir(self.plot_dir):
            os.mkdir(self.plot_dir)
コード例 #2
0
    lat = lat[::-1]
    u = u[::-1, :]
    v = v[::-1, :]

    ###

    pp = plot_tools(central_longitude=270.)
#    pp = plot_tools(central_longitude=180.)    

    # Stat by plotting u
#    pp.quick_plot(lat, lon, u, add_cyclic=True)

    ###

    # Initialize spectral
    spec = spectral(lat, lon)

    ##

    # Caluclate horizontal gradients
    dudx, dudy = spec.gradient(u)
#    pp.quick_plot(lat, lon, dudy, add_cyclic=True)

    ##

    # Calculate planetart and relative vorticity
    vrt = spec.uv2vrt(u, v)
    f = spec.planetaryvorticity()

#    pp.quick_plot(lat, lon, f+vrt, add_cyclic=True)
コード例 #3
0
                    metavar="FILE",
                    required=True,
                    type=lambda x: file_exists(parser, x))
parser.add_argument("-npmi",
                    dest="npmi",
                    help="File containing NPMI of words",
                    metavar="FILE",
                    required=True,
                    type=lambda x: file_exists(parser, x))
parser.add_argument("-dict",
                    dest="vdict",
                    help="File containing the words",
                    metavar="FILE",
                    required=True,
                    type=lambda x: file_exists(parser, x))
parser.add_argument("-k",
                    dest="k",
                    help="K for the KNN",
                    required=False,
                    default=30,
                    type=int)

parser.add_argument("-c",
                    dest="c",
                    help="Number of clusters in spectral clustering",
                    required=False,
                    default=30,
                    type=int)
options = parser.parse_args()
spectral(options.tweets, options.npmi, options.vdict, options.k, options.c)
コード例 #4
0
ファイル: clustering.py プロジェクト: wsgan001/DataMining-4
    for line in mFr:
        a = line.split(',')
        b = []
        for item in a:
            b.append(float(item))
        categoryM.append(b[-1])
        mDataSet.append(b)
mFr.close()


calculate.calculate(kMeans.kMeans(gDataSet, 2), categoryG, 2)

calculate.calculate(kMeans.kMeans(mDataSet, 10), categoryM, 10)

calculate.calculate(nmf.NMF(gDataSet, 2), categoryG, 2)

calculate.calculate(nmf.NMF(mDataSet, 10), categoryG, 10)

calculate.calculate(spectral.spectral(gDataSet, 2, 3), categoryG, 2)

calculate.calculate(spectral.spectral(gDataSet, 2, 6), categoryG, 2)

calculate.calculate(spectral.spectral(gDataSet, 2, 9), categoryG, 2)

calculate.calculate(spectral.spectral(mDataSet, 10, 3), categoryM, 10)

calculate.calculate(spectral.spectral(mDataSet, 10, 6), categoryM, 10)

calculate.calculate(spectral.spectral(mDataSet, 10, 9), categoryM, 10)

コード例 #5
0
parser = argparse.ArgumentParser(description='Turn a file into a matrix')
def file_exists(parser, arg):
    if not os.path.exists(arg):
       parser.error("The file %s does not exist!"%arg)
    else:
       return arg
parser.add_argument(
	"-tweets", dest="tweets",help="File containing tweets in JSON, one per line", metavar="FILE", required=True, 
	type=lambda x: file_exists(parser,x)
)
parser.add_argument(
	"-npmi", dest="npmi",help="File containing NPMI of words", metavar="FILE", required=True, 
	type=lambda x: file_exists(parser,x)
)
parser.add_argument(
	"-dict", dest="vdict",help="File containing the words", metavar="FILE", required=True, 
	type=lambda x: file_exists(parser,x)
)
parser.add_argument(
	"-k", dest="k",help="K for the KNN",  required=False, 
	default=30, type=int
)

parser.add_argument(
	"-c", dest="c",help="Number of clusters in spectral clustering",  required=False, 
	default=30, type=int
)
options = parser.parse_args()
spectral(options.tweets,options.npmi,options.vdict,options.k,options.c)
コード例 #6
0
    def fit(self, X):
        '''
      Copulafit using Gaussian copula with marginals evaluated by Gaussian KDE
      Precision matrix is evaluated using specified method, default to graphical LASSO
      :param X: input dataset
      :return: estimated precision matrix rho
      '''

        N, d = X.shape
        if self.scaler is not None:
            X_scale = self.scaler.fit_transform(X)
        else:
            X_scale = X
        if len(self.vertexes) == 0:
            self.vertexes = [str(id) for id in range(d)]

        self.theta = 1.0 / N
        cum_marginals = np.zeros_like(X)
        inv_norm_cdf = np.zeros_like(X)
        # inv_norm_cdf_scaled = np.zeros_like(X)
        self.kernels = list([])
        # TODO: complexity O(Nd) is high
        if self.verbose:
            colored('>> Computing marginals', color='blue')
        for j in range(cum_marginals.shape[1]):
            self.kernels.append(gaussian_kde(X_scale[:, j]))
            cum_pdf_overall = self.kernels[-1].integrate_box_1d(
                X_scale[:, j].min(), X_scale[:, j].max())
            for i in range(cum_marginals.shape[0]):
                cum_marginals[i, j] = self.kernels[-1].integrate_box_1d(
                    X_scale[:, j].min(), X_scale[i, j]) / cum_pdf_overall
                # truncate cumulative marginals
                if cum_marginals[i, j] < self.theta:
                    cum_marginals[i, j] = self.theta
                elif cum_marginals[i, j] > 1 - self.theta:
                    cum_marginals[i, j] = 1 - self.theta
                # inverse of normal CDF: \Phi(F_j(x))^{-1}
                inv_norm_cdf[i, j] = norm.ppf(cum_marginals[i, j])
                # scaled to preserve mean and variance: u_j + \sigma_j*\Phi(F_j(x))^{-1}
                # inv_norm_cdf_scaled[i, j] = X_scale[:, j].mean() + X_scale[:, j].std() * inv_norm_cdf[i, j]

        if self.method == 'mle':
            # maximum-likelihood estiamtor
            empirical_cov = EmpiricalCovariance()
            empirical_cov.fit(inv_norm_cdf)
            if self.verbose:
                print colored('>> Running MLE to estiamte precision matrix',
                              color='blue')

            self.est_cov = empirical_cov.covariance_
            self.corr = scale_matrix(self.est_cov)
            self.precision_ = inv(empirical_cov.covariance_)

        if self.method == 'glasso':
            if self.verbose:
                print colored('>> Running glasso to estiamte precision matrix',
                              color='blue')

            empirical_cov = EmpiricalCovariance()
            empirical_cov.fit(inv_norm_cdf)
            # shrunk convariance to avoid numerical instability
            shrunk_cov = shrunk_covariance(empirical_cov.covariance_,
                                           shrinkage=0.8)
            self.est_cov, self.precision_ = graph_lasso(emp_cov=shrunk_cov,
                                                        alpha=self.penalty,
                                                        verbose=self.verbose,
                                                        max_iter=self.max_iter)
            self.corr = scale_matrix(self.est_cov)

        if self.method == 'ledoit_wolf':
            if self.verbose:
                print colored(
                    '>> Running ledoit_wolf to estiamte precision matrix',
                    color='blue')

            self.est_cov, _ = ledoit_wolf(inv_norm_cdf)
            self.corr = scale_matrix(self.est_cov)
            self.precision_ = linalg.inv(self.est_cov)

        if self.method == 'spectral':
            '''L2 mehtod, use paper Inverse covariance estimation for high dimension data in linear time and space
         :formular: in paper eq(8)
         '''
            if self.verbose:
                print colored(
                    '>> Running Riccati to estiamte precision matrix',
                    color='blue')

            # TODO: note estimated cov is sample cov
            self.est_cov, self.precision_ = spectral(inv_norm_cdf,
                                                     rho=2 * self.penalty,
                                                     assume_centered=False)
            self.corr = scale_matrix(self.est_cov)

        if self.method == 'pc':
            clf = pgmlearner.PGMLearner()
            data_list = list([])
            for row_id in range(X_scale.shape[0]):
                instance = dict()
                for i, n in enumerate(self.vertexes):
                    instance[n] = X_scale[row_id, i]
                data_list.append(instance)
            graph = clf.lg_constraint_estimatestruct(data=data_list,
                                                     pvalparam=self.pval,
                                                     bins=self.bins)
            dag = np.zeros(shape=(len(graph.V), len(graph.V)))
            for e in graph.E:
                dag[self.vertexes.index(e[0]), self.vertexes.index(e[1])] = 1
            self.conditional_independences_ = dag

        if self.method == 'ic':
            df = dict()
            variable_types = dict()
            for j in range(X_scale.shape[1]):
                df[self.vertexes[j]] = X_scale[:, j]
                variable_types[self.vertexes[j]] = 'c'
            data = pd.DataFrame(df)
            # run the search
            ic_algorithm = IC(RobustRegressionTest,
                              data,
                              variable_types,
                              alpha=self.pval)
            graph = ic_algorithm.search()
            dag = np.zeros(shape=(X_scale.shape[1], X_scale.shape[1]))
            for e in graph.edges(data=True):
                i = self.vertexes.index(e[0])
                j = self.vertexes.index(e[1])
                dag[i, j] = 1
                dag[j, i] = 1
                arrows = set(e[2]['arrows'])
                head_len = len(arrows)
                if head_len > 0:
                    head = arrows.pop()
                    if head_len == 1 and head == e[0]:
                        dag[i, j] = 0
                    if head_len == 1 and head == e[1]:
                        dag[j, i] = 0
            self.conditional_independences_ = dag

        # finally we fit the structure
        self.fit_structure(self.precision_)
コード例 #7
0
ファイル: run.py プロジェクト: tuqingyun/data-mining-python
warnings.filterwarnings("ignore")

# # Spectral Exp 1

# In[174]:

data = sio.loadmat('data/cluster_data.mat')
X = data['X']

k_in_knn_graph = 200
threshold = 0.5

plt.figure()
plt.suptitle("Spectral")
W = knn_graph(X, k_in_knn_graph, threshold)
idx = spectral(W, 2)
cluster_plot(X, idx)

plt.figure()
plt.suptitle("Kmeans")
idx = KMeans(2).fit(X).labels_
cluster_plot(X, idx)

# # Spectral exp 2

# In[92]:

data = sio.loadmat('data/TDT2_data.mat')
# X = data['X']
fea = data['fea']
gnd = data['gnd'].flatten()
コード例 #8
0
    def __init__(self):
        
        #height of atmosphere 10k meters
        #++initialize plot tools
        self.pp=plot_tools()
        # +++ Initialize time stepping +++ #
        self.nsteps = nl.nsteps
        self.dt = nl.dt
        self.nforward=nl.forward #forward step every # steps

#        self.start_time = datetime(2020, 10, 9, 0)
        self.start_time = datetime.now()

        # +++ Initialize model grid +++ #
        self.latd=xarray_IO(nl.dfile_um).get_values('lat')[::-1] #have to flip to start at north pole and go down but reads in at SP and goes up
        self.lond=xarray_IO(nl.dfile_um).get_values('lon')
        
        self.latr=np.deg2rad(self.latd) # degree to radians
        self.lonr=np.deg2rad(self.lond)
        
        self.ny=len(self.latd) # getting the number of steps
        self.nx=len(self.lond)
        

        # +++ Initialize spectral routines +++ #
        self.s=spectral(self.latd,self.lond)
        
        

        # +++ Initialize model fields +++ #
        self.vortp_tend = np.zeros((self.ny,self.nx))
        self.vort_tend = np.zeros((self.ny,self.nx))

        self.vortp_div = np.zeros((self.ny,self.nx)) # 2 dimensions
        self.vortp = np.zeros((self.ny,self.nx,3)) # 3 dimensions
        self.vort = np.zeros((self.ny,self.nx,3))
    #initialize v,u prime and f that you need to solve vorticity prime
        #f is coriolis parameter 
        self.vp = np.zeros((self.ny,self.nx))
        self.up = np.zeros((self.ny,self.nx))

        self.f = self.s.planetaryvorticity()
        _,self.dyf=self.s.gradient(self.f) #underscore is a way to disregard the x direction derivative of f, which is always zero
        
        self.um=xarray_IO(nl.dfile_um).get_values('u')[::-1,:] #read in zonal mean winds,flipped using [] part 
        self.vm=np.zeros((self.ny,self.nx))


    

        # +++ Initialize forcing +++ #
        self.forcing=forcing(self.latr,self.lonr)
        if nl.topo_case =='real': ##### If else statement to toggle between topo cases, defined in namelist
            self.topo=self.forcing.topography_real()[::-1,:] 

        else:
            self.topo=self.forcing.topography_simple() 
        self.dxtopo,self.dytopo=self.s.gradient(self.topo) #
        #calc dh/dx in forcing

        # +++ Model diagnostics +++ #
        # netCDF output
        self.output_freq = nl.output_freq
        self.output_dir = nl.output_dir
        # Create directory if not existing
        if not os.path.isdir(self.output_dir):
            os.mkdir(self.output_dir)

        # Plot figures
        self.plot_freq = nl.plot_freq
        self.plot_dir = nl.plot_dir
        # Create directory if not existing
        if not os.path.isdir(self.plot_dir):
            os.mkdir(self.plot_dir)
コード例 #9
0
def main():
    statistics = open(MAINPATH + "/" + "statistics_ppmi.txt", "w")

    M, labels, label_names, relations, nounDict = pp.get_M_fromDB()

    #Choose a method to build your similarity matrix
    #Term Frequency-Inverse Document Frequency
    M_ppmi = sim.get_tf_idf_M(M, "raw", "c", norm_samps=True)
    similarity = "tfidf"

    #Jensen Shanon Divergence
    #M_ppmi = sim.JensenShanon(M)
    #similarity = "jsd"

    #Positive Pointwise Mutual Information
    #M_ppmi = sim.raw2ppmi(M)
    #similarity = "ppmi"

    #Change this value according to expected number of clusters required
    #We tested with 50, 100, 200, 300 based on our dataset
    k = 300
    print("Length features and labels:", len(M_ppmi), len(labels))

    c = spectral.spectral(M_ppmi, labels, sim.cos_s, dist.euclidean)
    #c = spectral.spectral(X, Y, sim.gauss_s, dist.euclidean)

    #Fully connceted
    c.full_graph("cosine")
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

    n = M.shape[0]
    '''cosine and knn mutual / gauss mutual'''
    number = int(2 * (n / np.log(n)))
    '''gaus non-mutual'''
    #number = int((n/np.log(n)))

    #K nearest neighbors
    c.kNN_graph(number, "euclidean", False)
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

#Epsilon
    T = mst(c.W)
    A = T.toarray().astype(float)
    eps = np.min(A[np.nonzero(A)])
    print("eps", eps)
    c.eps_graph(eps)
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

    statistics.close()
コード例 #10
0
def gauss_s(x1, x2, d):
    sigma = 1
    return np.exp(-(d(x1, x2)) / (2 * sigma**2))


def cos_s(x1, x2, d):
    return -(dist.cosine(x1, x2) - 1)


#similarities = []
#for x1 in X:
#    for x2 in X:
#        similarities.append(gauss_s(x1,x2))

#create a clustering object
c = spectral.spectral(X, Y, gauss_s, dist.euclidean)
#c.kNN_graph(30, "cosine", True)
#c.kNN_graph(10, "euclidean", False)
#c.eps_graph(0.4)
#c.full_graph()
#c.show_sim_g()
#c.norm_rw_sc(3)
#c.norm_sym_sc(3)
#c.show_clust()
#c.show_correct_class()
#c.evaluate()

for algo in [c.norm_sym_sc, c.norm_rw_sc, c.unnorm_sc]:
    c.kNN_graph(30, "euclidean", True)
    algo(3)
    print(c.graph)
コード例 #11
0
#define a similarity function
def gauss_s(x1, x2, d):
    sigma = 1
    return np.exp(-(d(x1, x2))/(2*sigma**2))

def cos_s(x1, x2, d):
    return -(dist.cosine(x1, x2) - 1)

#similarities = []
#for x1 in X:
#    for x2 in X:
#        similarities.append(gauss_s(x1,x2))

#create a clustering object
c = spectral.spectral(X, Y, gauss_s, dist.euclidean)
#c.kNN_graph(30, "cosine", True)
#c.kNN_graph(10, "euclidean", False)
#c.eps_graph(0.4)
#c.full_graph()
#c.show_sim_g()
#c.norm_rw_sc(3)
#c.norm_sym_sc(3)
#c.show_clust()
#c.show_correct_class()
#c.evaluate()

for algo in [c.norm_sym_sc, c.norm_rw_sc, c.unnorm_sc]:
    c.kNN_graph(30, "euclidean", True)
    algo(3)
    print(c.graph)