def estimate_network(A,C,num_nodes,horizon,type_diffusion): # make a zero matrix with size num_nodes * num_nodes num_cascades = np.zeros((num_nodes,num_nodes)) A_potential = sp.sparse(np.zeros(A.shape)) A_bad = sp.sparse(np.zeros(A.shape)) A_hat = sp.sparse(np.zeros(A.shape)) total_obj = 0 for c in range(1,C.shape[1]): # get cascades that is not equal -1 # active cascades idx = C[c,:]!=-1 # sort the matrix based on the value (val, order) = np.sort(C[c,idx]) for i in range(2:length(val)):
def load(filename, A=False, b=False, x_true=False): data = sio.loadmat(filename) if A: return sparse(data['A']) if b: return array(data['b']) if x_true: return array(data['x_true'])
def top5(file, alpha, linelst2): sparsed = sparse(file) ranks = pr.ranking_sparse(sparsed, alpha, steps=100) order = [] print(linelst2[0]) for i in range(len(sparsed)): f = linelst2[i].index('h') order.append((linelst2[i][f:], ranks[i])) finorder = sorted(order, key=lambda order: order[1], reverse=True) final = [finorder[i][0] for i in range(len(finorder))] return final[:5]
def plotVector(): sortedVector, reguVec, reguVecSO, label = sparse() length = len(sortedVector) Y = np.zeros(length) Z = np.ones(length) Q = np.dot(np.ones(length), 2) plt.figure(2, figsize=(8, 6)) plt.scatter(sortedVector, Y, c=label, s=10) plt.scatter(reguVec, Z, s=10) plt.scatter(reguVecSO, Q, s=10) print(len(reguVec), len(reguVecSO)) plt.xlabel('x-axis') plt.ylim(-2,10) plt.title('蓝点表示一维排序后图,橙色点表示一次差分求优化后') plt.show()
def gmm_mixup(gmm): mu = gmm.mu sigma = gmm.sigma w = gmm.w [ndim, nmix] = np.size(sigma) [sig_max, arg_max] = max(sigma) eps = sparse(0 * mu) for inx in range(0, nmix - 1): idx = arg_max + (inx - 1) * np.size(ndim, nmix) eps[idx] = np.sqrt(sig_max) mu = [mu - eps, mu + eps] sigma = [sigma, sigma] w = [w, w] * 0.5 gmm.w = w gmm.mu = mu gmm.sigma = sigma return gmm
def stitcher_d2_flat(self): """Compute sparse matrix that applies stitching to d2 forms acts on flattened d2-form Returns ------- sparse matrix """ info = self.boundary_info info = info[~np.logical_and(info[:, 1] == info[:, 2], info[:, 3] == 0)] # remove diagonal r = self.ravel(info[:, 1], info[:, 0]) c = self.ravel(info[:, 2], info[:, 0]) import scipy.sparse def sparse(r, c): n = np.prod(self.shape_p0) return scipy.sparse.coo_matrix((np.ones_like(r), (r, c)), shape=(n, n)) return sparse(r, c)
def block_sizes_to_N(block_sizes): """Converts a list of the block sizes to a scipy.sparse matrix. The matrix will start in lil format, as this is the best way to generate it, but can easily be converted to another format such as csr for efficient multiplication. I will return it in csr so that each function doesn't need to convert it itself. """ block_sizes = np.squeeze(np.asarray(block_sizes)) m = np.sum(block_sizes) n = m - block_sizes.shape[0] N = sps.lil_matrix((m, n)) start_row = 0 start_col = 0 for i, block_size in enumerate(block_sizes): if block_size < 2: start_row += block_size start_col += block_size - 1 continue for j in xrange(block_size-1): N[start_row+j, start_col+j] = 1 N[start_row+j+1, start_col+j] = -1 start_row += block_size start_col += block_size - 1 return sparse(N)
def block_sizes_to_N(block_sizes): """Converts a list of the block sizes to a scipy.sparse matrix. The matrix will start in lil format, as this is the best way to generate it, but can easily be converted to another format such as csr for efficient multiplication. I will return it in csr so that each function doesn't need to convert it itself. """ block_sizes = array(block_sizes) m = np.sum(block_sizes) n = m - block_sizes.shape[0] N = sps.lil_matrix((m, n)) start_row = 0 start_col = 0 for i, block_size in enumerate(block_sizes): if block_size < 2: start_row += block_size start_col += block_size - 1 continue for j in xrange(block_size - 1): N[start_row + j, start_col + j] = 1 N[start_row + j + 1, start_col + j] = -1 start_row += block_size start_col += block_size - 1 return sparse(N)
something is corpus when: - string, after CountVect it has overlap contains spaces probably something is a date when: - can be parsed as date, either - , / or ' ' - year: max 100 unique values, all between 1900 - 2100 - month: 1 - 12 only, and year found - day: 1 - 31, and month or year found numeric, ID converts to empty numeric, continuous converts to scale(float(1)), sparse(ohe(bins(m))) numeric, discrete converts to scale(float(1)), sparse(ohe(m)) numeric, categorical converts to ohe(m), sparse(ohe(m)) numeric, date converts to string, categorical converts to ohe(m), sparse(ohe(m)) string, corpus converts to string, ID converts to date converts to float(4), sparse(Y, M, D, S) numeric, continuous, nomiss impute, scale numeric, continuous, miss numeric, discrete, nomiss numeric, discrete, miss numeric, date, nomiss numeric, date, miss
ss=s[i].split(':') col.append(int(ss[0])) row.append(row_count) data.append(float(ss[1])) row_count+=1 if max_fea==None: train=coo_matrix((data,(row,col))) else: train=coo_matrix((data,(row,col)),shape=(row_count,max_fea if max_fea>max(col)+1 else max(col)+1)) return [train,test] if not (args.s or args.libsvm): print "non-sparse input" [train,test]=non_sparse(args.i) if args.s: #sparse input print "sparse input" [train,test]=sparse(args.i) if args.libsvm: #sparse input print "libsvm input" [train,test]=libsvm(args.i,341473) print train.shape if args.pcw: model=LinearSVC(class_weight={args.pos:args.pcw,args.neg:args.ncw}).fit(train,test) else: model=LinearSVC().fit(train,test) if args.pm: para=model.coef_[0] print len(para) if args.fea: fea=list(open(args.fea,'r').read().strip().splitlines())
def load_data(filename,full=False,OD=False,CP=False,eq=None): """ Load data from file about network state Notation: x_true = route flow x_split = route split :param filename: :param full: Use A_full, b_full instead of A,b :param OD: Extract information from T :param CP: Extract information from U :param eq: None uses block_sizes to generate equality constraint; OD uses T to generate equality constraint; CP uses U :return: """ print filename logging.debug('Loading %s...' % filename) data = sio.loadmat(filename) logging.debug('Unpacking...') # Link-route and route # FIXME deprecate use of key 'x' if full and 'A_full' in data and 'b_full' in data and 'x_true' in data: x_true = array(data['x_true']) A = sparse(data['A_full']) b = array(data['b_full']) elif 'A' in data and 'b' in data: x_true = load_x_true(data) A = sparse(data['A']) b = array(data['b']) elif 'phi' in data and 'b' in data and 'real_a' in data: x_true = array(data['real_a']) A = sparse(data['phi']) b = array(data['b']) #assert_scaled_incidence(A) # Remove rows of zeros (unused sensors) nz = [i for i in xrange(A.shape[0]) if A[i,:].nnz == 0] nnz = [i for i in xrange(A.shape[0]) if A[i,:].nnz > 0] A, b = A[nnz,:], b[nnz] assert la.norm(A.dot(x_true) - b) < 1e-3, 'Check data input: Ax != b' n = x_true.shape[0] # OD-route if OD and 'T' in data and 'd' in data: T,d = sparse(data['T']), array(data['d']) assert_simplex_incidence(T, n) # ASSERT # Cellpath-route if CP and 'U' in data and 'f' in data: U,f = sparse(data['U']), array(data['f']) assert_simplex_incidence(U, n) # ASSERT # Reorder routes by blocks of flow, e.g. OD flow or waypoint flow given by U if data.has_key('block_sizes'): eq = None block_sizes = array(data['block_sizes']) rsort_index = None else: W = T if eq == 'OD' else U block_sizes = get_block_sizes(W) rank = W.nonzero()[0] sort_index = np.argsort(rank) if CP and 'U' in data: U = U[:,sort_index] # reorder if OD and 'T' in data: T = T[:,sort_index] # reorder A = A[:,sort_index] # reorder x_true = x_true[sort_index] # reorder rsort_index = np.argsort(sort_index) # revert sort logging.debug('Creating sparse N matrix') N = block_sizes_to_N(block_sizes) logging.debug('File loaded successfully') # Scale matrices by block print la.norm(A.dot(x_true) - b) if eq == 'OD' and 'T' in data: scaling = T.T.dot(T.dot(x_true)) x_split = x_true / scaling DT = sps.diags([scaling],[0]) A = A.dot(DT) if CP and 'U' in data: U = U.dot(DT) AA,bb = sps.vstack([A,U]), np.concatenate((b,f)) else: AA,bb = A,b elif eq == 'CP' and 'U' in data: scaling = U.T.dot(U.dot(x_true)) x_split = x_true / scaling DU = sps.diags([scaling],[0]) A = A.dot(DU) if OD and 'T' in data: T = T.dot(DU) AA,bb = sps.vstack([A,T]), np.concatenate((b,d)) else: AA,bb = A,b else: x_split = x_true # TODO what is going on here???? #scaling = array(A.sum(axis=0)/(A > 0).sum(axis=0)) #scaling[np.isnan(scaling)]=0 # FIXME this is not accurate scaling = f AA,bb = A,b assert la.norm(A.dot(x_split) - b) < 1e-3, 'Improper scaling: Ax != b' return (AA, bb, N, block_sizes, x_split, nz, scaling, rsort_index)
index = abs(numpy.diag(r)) <= 1e-15 q[index][:, index].shape r[index].shape newA = q[index][:, index].dot(r[index]) import scipy.sparse sparseG = scipy.sparse.csc_matrix(G) u, s, vt = scipy.sparse.linalg.svds(sparseG) from cvxopt import sparse, spmatrix, matrix, solvers sparse(matrix(G)) sol = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(A)), matrix(b)) sol1 = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(A)), matrix(b), 'glpk') sol1 = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(newA)), matrix(b), 'glpk') print(sol['x']) c = matrix([-4., -5.]) G = matrix([[2., 1., -1., 0.], [1., 2., 0., -1.]]) h = matrix([3., 3., 0., 0.])
def regr_xzw(X, z, w=None, nargout=2): """ [b,brmse,sk,n,msz,msr,nmse] = regr_xzw(X,z,w); general linear regression call -nan- returned if data-data correlation matrix is badly conditioned Input X, nxm dependendant variables, comprised of dataX and dataY as column vector entries z, nx1 observations OPTIONAL w, nx1 weights (0 means observation has no influence) Output b, mx1 estimated parameters: z^ = X*b; brmse, mx1 estimated variances (root mean square error) of parameter(s) (confidence intervals assume gaussian white-noise dist., with bmse estimated variance) sk, the model skill n, the effective dof =(sum(w)/max(w)) msz, variance of data msr, variance of residuals nmse, percent of white error input variance passed by weights """ w = None # inputs n, m = np.shape(X) nz = np.size(z) if w == None: # Defualt input w = np.ones((n, 1), float) nw = n else: # User-overwrite input nw = np.size(w, axis=0) # init output b = np.nan * np.ones((m, 1), float) brmse = b sk = np.nan msz = np.nan msr = np.nan nmse = 1 if (nz != n or nw != n or nw != nz): print 'X and z or w are different lengths \n' return b, brmse #, sk, n, msz, msr, nmse # find valid data by summing tmp = np.concatenate((X, z, w), axis=1) idd = (np.nonzero( np.isfinite(np.dot(tmp, np.ones((m + 2, 1), float))) == 1))[0] if (np.size(idd) < 2): print 'n < 2 -- exiting\n' return b, brmse #, sk, n, msz, msr, nmse # number of dof n = np.sum(w[idd]) / max(w[idd]) #n = n[0] # some wierd dimensionality thing happens... you gotta extract n out of the structure that results from the above # convert to weighted space (priestly p.315) # Fienen pointed out this is wrong: z = (z).*w; X = X.*(repmat(w,1,m)); try: Q = sp.sparse(np.diag(w[idd]**2)) except: print 'Q is too big, use constant!' Q = 1 # and compute covariances # wrong: XX = (X(id,:)'*X(id,:))/n; # wrong: XZ = (z(id)'*X(id,:))/n; #tmp1 = X[idd,:].conj().T #tmp2 = Q * X[idd,:] Xx = np.dot(X[idd, :].conj().T, Q * X[idd, :]) / n Xz = np.dot(z[idd].conj().T, Q * X[idd, :]) / n # solve the weighted least squares problem from numpy.linalg import inv XX_inv = inv(Xx) if (nargout == 2): return b, brmse # compute parameters b = np.dot(XX_inv, Xz.conj().T) # model residuals msz = np.dot(z[idd].conj().T, Q * z[idd]) / n msz = msz[0, 0] msr = msz - np.dot(np.dot(b.conj().T, Xx), b) msr = msr[0, 0] sk = 1 - msr / msz # and perhaps we want all variance estimates # mse = XX_inv(1)*msr/(n-m) brmse = np.sqrt(np.diag(XX_inv) * msr / (n - m)) # get normalized error, based on convoltion if (nargout == 7): # first comput regresion weights, assuming first input is all ones tmp = np.reshape(XX_inv[:, 1], (len(XX_inv[:, 1]), 1)) bX = np.dot(X[idd, :], tmp) a = bX * w[idd] # element-wise multiplication a = a / sum(a) # sum of squared weights is normalized error: also, good est of dof nmse = np.dot(a.conj().T, a) return b, brmse, sk, n, msz, msr, nmse """
from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_extraction.text import TfidfVectorizer n = 0 docs = [] label = [] with open("DocumentClassification.txt", 'r') as f: n = int(f.readline()) for i in range(n): l = f.readline().strip().split(' ') docs.append(' '.join(l[1:])) label.append(int(l[0])) t = int(input()) for i in range(t): docs.append(input().strip()) TfIdfVectorizer = TfidfVectorizer(min_df=1) WordsTfs = scipy.sparse(TfIdfVectorizer.fit_transform(docs)) TrainingWords = WordsTfs[:n] PredictWords = WordsTfs[n:] clf = GradientBoostingClassifier(n_estimators=200, learning_rate=.7, max_depth=1) clf.fit(TrainingWords, label) for w in PredictWords: print(clf.predict(w)) # northern telecom proposes two for one stock split
gamma = Gamma_ac #const.Gamma_ac/(1e6*2*pi) # Relaxation rate of the transmon. gamma_phi = Gamma_Phi#/(1e6*2*pi) # Dephasing rate of the transmon. wd = f0#const.f0/1e6 # Drive frequency. # ---------------------------------------- # From here on, it's purely Anton's code # ---------------------------------------- N_gamma = 0.0 #1/(exp(wd/T)-1) # Thermal population of the transmon phonon bath around wd. dim = Nt # Dimension of the total Hilbert space. # Operators It = sparse(identity(Nt)) # Unity matrix for the transmon. Itot = It # Unity matrix for the total Hilbert space. tm = sparse(diag(sqrt(range(1, Nt)),1)) # Lowering operator for the transmon. tp = tm.transpose().conjugate() #ctranspose(tm) # Raising operator for the transmon. tdiag = sparse(diag(range(Nt))) # Diagonal matrix for the transmon. tdiag_l = kron(Itot,tdiag) # tdiag operator multiplying rho from the left. tm_l = kron(Itot,tm) # tm operator multiplying rho from the left. p = -1.0j*(tp - tm) # "P" operator. # Dissipation terms
index = abs(numpy.diag(r))<=1e-15 q[index][:,index].shape r[index].shape newA = q[index][:,index].dot(r[index]) import scipy.sparse sparseG = scipy.sparse.csc_matrix(G) u,s,vt = scipy.sparse.linalg.svds(sparseG) from cvxopt import sparse, spmatrix, matrix, solvers sparse(matrix(G)) sol = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(A)), matrix(b)) sol1 = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(A)), matrix(b),'glpk') sol1 = solvers.lp(matrix(c), sparse(matrix(G)), matrix(h), sparse(matrix(newA)), matrix(b),'glpk') print(sol['x']) c = matrix([-4., -5.]) G = matrix([[2., 1., -1., 0.], [1., 2., 0., -1.]]) h = matrix([3., 3., 0., 0.]) sol = solvers.lp(c, G, h) sol = solvers.lp(c, G, h, None, None, 'glpk')
# print(ww) for vv in range(1, Global.time_pts): Model.TimeSeries[vv, ww] = numpy.zeros(Model.num_basis, 1) for ii in range(1, Model.num_basis): tmp = Model.basis_nd(Global.sample_pts[vv][ww, :] - Model.Basis_Loc[ii, :], Global.Data_Dim, Model.Basis_eps(ii)) Model.TimeSeries[vv, ww][ii] = tmp Model.TimeSeries[vv, ww] = Model.TimeSeries[vv, ww] / sum(Model.TimeSeries[vv, ww]) # Plot graphs of a few time points. # G_tmp = graph(Model.MassMat) # G_tmp = rmedge(G_tmp, range(1, numnodes(G_tmp)), range(1, numnodes(G_tmp))) # Define useful quantities for later. Model.InvSums = sum(Model.InvMassMat) Model.Indc = sparse(Model.MassMat != 0) Indc_sum = sum(Model.Indc) Indc_zero = sparse(Model.MassMat == 0) Indc_vec = find(Model.MassMat != 0) Indc_diag = find(ismember(Indc_vec.T, range(1, Model.num_basis ** 2, (Model.num_basis + 1)))) # Define initial guess at P matrix init_P = double(Model.Indc) for vv in range(1, Model.num_basis).reshape(-1): init_P[vv, vv] = 0 init_P[vv, vv] = - sum(init_P[:, vv]) # Calculate corresponding Q and vectorise non-zero entries init_Q = (Model.MassMat * init_P) init_Q[Indc_zero] = 0 init_Q_vec = init_Q[logical_not(Indc_zero)] # Specify effective dimension of problem.
import numpy import scipy from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_extraction.text import TfidfVectorizer n = 0 docs = [] label = [] with open("DocumentClassification.txt", 'r') as f: n = int(f.readline()); for i in range(n): l = f.readline().strip().split(' ') docs.append(' '.join(l[1:])) label.append(int(l[0])) t = int(input()) for i in range(t): docs.append(input().strip()) TfIdfVectorizer = TfidfVectorizer(min_df=1) WordsTfs = scipy.sparse(TfIdfVectorizer.fit_transform(docs)) TrainingWords = WordsTfs[:n] PredictWords = WordsTfs[n:] clf = GradientBoostingClassifier(n_estimators=200, learning_rate=.7, max_depth=1) clf.fit(TrainingWords, label) for w in PredictWords: print(clf.predict(w)) # northern telecom proposes two for one stock split
def makematPA(Sphere_Coords,timein,configfile): """Make a Ntimeout*Nbeam*Nrng x Ntime*Nloc matrix. The output space will have range repeated first, then beams then time. The coordinates will be [t0,b0,r0],[t0,b0,r1],[t0,b0,r2],... [t0,b1,r0],[t0,b1,r1], ... [t1,b0,r0],[t1,b0,r1],...[t1,b1,r0]...""" # (sensdict,simparams) = readconfigfile(configfile) timeout = simparams['Timevec'] Tint = simparams['Tint'] timeout = sp.column_stack((timeout,timeout+Tint)) fullmat = True rng_vec = simparams['Rangegates'] rng_bin=sensdict['t_s']*v_C_0/1000.0 sumrule = simparams['SUMRULE'] # minrgbin = -sumrule[0].min() maxrgbin = len(rng_vec)-sumrule[1].max() minrg = minrgbin*rng_bin maxrg = maxrgbin*rng_bin angles = simparams['angles'] Nbeams = len(angles) rho = Sphere_Coords[:,0] Az = Sphere_Coords[:,1] El = Sphere_Coords[:,2] rng_vec2 = simparams['Rangegatesfinal'] nrgout = len(rng_vec2) Nlocbeg = len(rho) Ntbeg = len(timein) Ntout = len(timeout) if fullmat: outmat = sp.matrix(sp.zeros((Ntout*Nbeams*nrgout,Nlocbeg*Ntbeg))) else: outmat = sp.sparse((Ntout*Nbeams*nrgout,Nlocbeg*Ntbeg),dype =sp.float64) weights = {ibn:sensdict['ArrayFunc'](Az,El,ib[0],ib[1],sensdict['Angleoffset']) for ibn, ib in enumerate(angles)} for iton,ito in enumerate(timeout): overlaps = sp.array([getOverlap(ito,x) for x in timein]) weights_time = overlaps/overlaps.sum() itpnts = sp.where(weights_time>0)[0] # usually the matrix size is nbeamsxnrange for ibn in range(Nbeams): print('\t\t Making Beam {0:d} of {1:d}'.format(ibn,Nbeams)) weight = weights[ibn] for isamp in range(nrgout): # make the row irow = isamp+ibn*nrgout+iton*nrgout*Nbeams range_g = rng_vec2[isamp] rnglims = [range_g-minrg,range_g+maxrg] rangelog = sp.argwhere((rho>=rnglims[0])&(rho<rnglims[1])) # This is a nearest neighbors interpolation for the spectrums in the range domain if sp.sum(rangelog)==0: minrng = sp.argmin(sp.absolute(range_g-rho)) rangelog[minrng] = True #create the weights and weight location based on the beams pattern. weight_cur =weight[rangelog[:,0]] weight_cur = weight_cur/weight_cur.sum() weight_loc = sp.where(rangelog[:,0])[0] w_loc_rep = sp.tile(weight_loc,len(itpnts)) t_loc_rep = sp.repeat(itpnts,len(weight_loc)) icols = t_loc_rep*Nlocbeg+w_loc_rep weights_final = weights_time[t_loc_rep]*weight_cur[w_loc_rep]*range_g**2/rho[w_loc_rep]**2 outmat[irow,icols] = weights_final return(outmat)