def singvalsTestMat(matName): N = 1000 if matName == 'lowRankMedNoiseR10': A = sp.load('TestMatrices/lowRankMedNoiseR10.npy') elif matName == 'lowRankHiNoiseR10': A = sp.load('TestMatrices/lowRankHiNoiseR10.npy') elif matName == 'polySlowR10': A = matrixPolyDecay(N, 1., R=10) elif matName == 'polyFastR10': A = matrixPolyDecay(N, 2., R=10) elif matName == 'expSlowR10': A = matrixExpDecay(N, 0.25, R=10) elif matName == 'expFastR10': A = matrixExpDecay(N, 1., R=10) # elif matName == 'SDlarge': # A = sp.load('TestMatrices/SD_6135obs_10years_24kParams.npy') if matName == 'SDlarge': s = sp.load('TestMatrices/singvalsSDlarge.npy') else: s = svd(A, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=True)[1] return s
def svm_prastice_two(): # 读取数据 movie_data = sp.load('movie_data.npy') movie_target = sp.load('movie_target.npy') x = movie_data y = movie_target # 切分数据集,调用tfidfVector接口 count_vec = TfidfVectorizer(binary=False, decode_error='ignore', stop_words='english') x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) x_train = count_vec.fit_transform(x_train) x_test = count_vec.transform(x_test) ''' svm ''' titles = [ 'LinearSVC (linear kernel', 'SVC with polynomial (degree 3) kernel', 'SVC with RBF kernel', 'SVC with Sigmoid kernel' ] clf_linear = svm.SVC(kernel='linear').fit(x_train, y_train) clf_poly = svm.SVC(kernel='poly', degree=3).fit(x_train, y_train) clf_rbf = svm.SVC().fit(x_train, y_train) clf_sigmoid = svm.SVC(kernel='sigmoid').fit(x_train, y_train) for i, clf in enumerate((clf_linear, clf_poly, clf_rbf, clf_sigmoid)): answer = clf.predict(x_test) print(titles[i]) print(clf) print(np.mean((answer == y_test))) print(answer) print(y_test)
def train(nn_name='12-net', k=12): """ Fucntion for traning 12-net with testing on part of data using cross validation """ suff = str(k) if nn_name.find('calib') > 0: X_data_name = 'train_data_icalib_' + suff + '.npy' y_data_name = 'labels_icalib_' + suff + '.npy' else: X_data_name = 'train_data_' + suff + '.npy' y_data_name = 'labels_' + suff + '.npy' rates12 = sp.hstack((0.05 * sp.ones(25, dtype=sp.float32), 0.005 * sp.ones(15, dtype=sp.float32), 0.0005 * sp.ones(10, dtype=sp.float32))) rates24 = sp.hstack((0.01 * sp.ones(25, dtype=sp.float32), 0.0001 * sp.ones(15, dtype=sp.float32))) rates48 = sp.hstack([ 0.05 * sp.ones(15, dtype=sp.float32), 0.005 * sp.ones(10, dtype=sp.float32) ]) if nn_name == '24-net': nn = Cnnl( nn_name=nn_name, l_rates=rates24, subnet=Cnnl(nn_name='12-net', l_rates=rates12).load_model('12-net_lasagne_.pickle')) elif nn_name == '48-net': nn = Cnnl(nn_name=nn_name, l_rates=rates48, subnet=Cnnl( nn_name='24-net', l_rates=rates24, subnet=Cnnl( nn_name='12-net', l_rates=rates12).load_model('12-net_lasagne_.pickle') ).load_model('24-net_lasagne_.pickle')) else: nn = Cnnl(nn_name=nn_name, l_rates=rates12) if not os.path.exists(nn_name + '_lasagne_.pickle'): if nn_name.find('calib') > 0: ds.get_train_wider_calib_data(k=k) else: ds.get_train_data(k=k) X, y = sp.load(X_data_name), sp.load(y_data_name) X_train, y_train = X, y if not os.path.exists(nn_name + '_lasagne_.pickle'): if nn_name == '24-net': X_sub_train12 = sp.load('train_data_12.npy') nn.fit(X=X_train, y=y_train, X12=X_sub_train12) elif nn_name == '48-net': X_sub_train12 = sp.load('train_data_12.npy') X_sub_train24 = sp.load('train_data_24.npy') nn.fit(X=X_train, y=y_train, X12=X_sub_train12, X24=X_sub_train24) else: nn.fit(X=X_train, y=y_train) nn.save_model(nn_name + '_lasagne_.pickle')
def runTestForMat(inputs): [rank, matName, MainSubfolder, inneriter, OversVec, N] = inputs # Get specified test matrix: if matName == 'lowRankMedNoiseR10': A = sp.load('TestMatrices/lowRankMedNoiseR10.npy') elif matName == 'lowRankHiNoiseR10': A = sp.load('TestMatrices/lowRankHiNoiseR10.npy') elif matName == 'polySlowR10': A = matrixPolyDecay(N, 1., R=10) elif matName == 'polyFastR10': A = matrixPolyDecay(N, 2., R=10) elif matName == 'expSlowR10': A = matrixExpDecay(N, 0.25, R=10) elif matName == 'expFastR10': A = matrixExpDecay(N, 1., R=10) elif matName == 'SDlarge': #### A = sp.load('TestMatrices/SD_6135obs_10years_24kParams.npy') # For use the following as a surrogate for large Jacobian (SD): A = sp.diag(sp.load('TestMatrices/singvalsSDlarge.npy')[0:1000]) # Create folder for saving results: matName = matName + '_Rank' + str(rank) subfolder = MainSubfolder + matName newpath = os.getcwd() + '/' + subfolder if not os.path.exists(newpath): os.makedirs(newpath) # Run subspace iteration tests for test matrix: temptime = time.clock() main(A, rank, matName, subfolder, inneriter, OversVec, viewMax=8) print matName, 'Time spent on Test', time.clock() - temptime
def create_tag(self, mfcc_features_40): data_mean = mfcc_features_40.mean(axis=0).tolist() data_std = mfcc_features_40.std(axis=0).tolist() data_median = np.median(mfcc_features_40, axis=0).tolist() data_skev = skew(mfcc_features_40, axis=0).tolist() data_kurt = kurtosis(mfcc_features_40, axis=0).tolist() data = data_mean + data_std + data_median + data_skev + data_kurt coef = scipy.load(self.clust_dir + 'med_coef.npy') f_importances = scipy.load(self.clust_dir + 'feature_importances.npy') np_data = np.array(data) np_data = np_data / np.linalg.norm(np_data) datafor_lregr = np_data[f_importances > coef[1]] regr = joblib.load(self.clust_dir + 'lrc.pkl') data_regr = regr.predict_proba([datafor_lregr]).tolist() self.data = np_data[f_importances > coef[2]] data_km = [] kmean1 = joblib.load(self.clust_dir + 'kmeans1.pkl') data_km += kmean1.predict([self.data]).tolist() kmean2 = joblib.load(self.clust_dir + 'kmeans2.pkl') data_km += kmean2.predict([self.data]).tolist() kmean3 = joblib.load(self.clust_dir + 'kmeans3.pkl') data_km += kmean3.predict([self.data]).tolist() kmean4 = joblib.load(self.clust_dir + 'kmeans4.pkl') data_km += kmean4.predict([self.data]).tolist() datafor_rfc = np_data[f_importances > coef[0]] rfc = joblib.load(self.clust_dir + 'rfc.pkl') self.ftag[rfc.predict( np.array([datafor_rfc.tolist() + data_regr[0] + data_km]))[0]] += 1 self.clust = data_km[2]
def cv(nn_name,d_num = 10000,k_fold = 7,score_metrics = 'accuracy',verbose = 0): suff = str(nn_name[:2]) if nn_name.find('calib') > 0: X_data_name = 'train_data_icalib_'+ suff + '.npy' y_data_name = 'labels_icalib_'+ suff + '.npy' else: X_data_name = 'train_data_'+ suff + '.npy' y_data_name = 'labels_'+ suff + '.npy' X,y = sp.load(X_data_name),sp.load(y_data_name) d_num = min(len(X),d_num) X = X[:d_num] y = y[:d_num] rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32))) rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32))) rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ]) if nn_name == '48-net': X12 = sp.load('train_data_12.npy')[:d_num] X24 = sp.load('train_data_24.npy')[:d_num] elif nn_name == '24-net': X12 = sp.load('train_data_12.npy')[:d_num] if score_metrics == 'accuracy': score_fn = accuracy_score else: score_fn = f1_score scores = [] iteration = 0 for t_indx,v_indx in util.kfold(X,y,k_fold=k_fold): nn = None X_train,X_test,y_train,y_test = X[t_indx], X[v_indx], y[t_indx], y[v_indx] #print('\t \t',str(iteration+1),'fold out of ',str(k_fold),'\t \t' ) if nn_name == '24-net': nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')) nn.fit(X = X_train,y = y_train,X12 = X12[t_indx]) elif nn_name == '48-net': nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle')) nn.fit(X = X_train,y = y_train,X12 = X12[t_indx],X24 = X24[t_indx]) else: nn = Cnnl(nn_name = nn_name,l_rates=rates12,verbose=verbose) nn.fit(X = X_train,y = y_train) if nn_name == '24-net': y_pred = nn.predict(X_test,X12=X12[v_indx]) elif nn_name == '48-net': y_pred = nn.predict(X_test,X12=X12[v_indx],X24=X24[v_indx]) else: y_pred = nn.predict(X_test) score = score_fn(y_test,y_pred) #print(iteration,'fold score',score) scores.append(score) iteration += 1 score_mean = sp.array(scores).mean() print(d_num,'mean score',score) return score_mean
def ParseToDataContainers(self, Delimiter=None): # Parse an input file into the DataContainers object DCs = DataContainer.DataContainers() if(re.search('.npy',self.GetName())): Arrays = None if(self.GetboCompressed()): Arrays = scipy.load(self.GetDecomprName()) else: Arrays = scipy.load(self.GetName()) Header = Arrays[:,0].tolist() for i in xrange(len(Header)): Name = Header[i] # The names of the datacontainers are determined by the # header column names. DCs.DataContainers[Name] = DataContainer.DataContainer() DCs.Names2Columns[Name] = i DCs.Columns2Names[i] = Name DCs.DataContainers[Name].SetDataArray(Arrays[i,1:]) DCs.DataContainers[Name].SetDataName(Name) del Arrays else: Line = self.GetFileHandle().readline() if(self.GetboHeader()): Line = re.sub('#','',Line) Names = Line.strip().split(Delimiter) # The file should be space or tab delimited! for i in range(len(Names)): Name = Names[i] # The names of the datacontainers are determined by the # header column names. DCs.DataContainers[Name] = DataContainer.DataContainer() DCs.Names2Columns[Name] = i DCs.Columns2Names[i] = Name DCs.DataContainers[Name].InitDataArray() DCs.DataContainers[Name].SetDataName(Name) else: LSplit = Line.strip().split(Delimiter) for i in range(len(LSplit)): Name = str(i) DCs.DataContainers[Name] = DataContainer.DataContainer() DCs.Names2Columns[Name] = i DCs.Columns2Names[i] = Name DCs.DataContainers[Name].InitDataArray() DCs.DataContainers[Name].SetDataName(Name) Entry = LSplit[i] DCs.DataContainers[Name].AppendToArray(Entry) for Line in self.GetFileHandle(): LSplit = Line.strip().split(Delimiter) for i in range(len(LSplit)): Name = DCs.Columns2Names[i] Entry = LSplit[i] DCs.DataContainers[Name].AppendToArray(Entry) for Key in DCs.DataContainers.iterkeys(): DCs.DataContainers[Key].CastDataArrayToScipy() # Make scipy.arrays of the lists. return DCs
def save_andor_load_arrays(endog, exog, true_params, save_arrays, load_old_arrays): if save_arrays: sp.save("endog.npy", endog) sp.save("exog.npy", exog) sp.save("true_params.npy", true_params) if load_old_arrays: endog = sp.load("endog.npy") exog = sp.load("exog.npy") true_params = sp.load("true_params.npy") return endog, exog, true_params
def save_andor_load_arrays(endog, exog, true_params, save_arrays, load_old_arrays): if save_arrays: sp.save('endog.npy', endog) sp.save('exog.npy', exog) sp.save('true_params.npy', true_params) if load_old_arrays: endog = sp.load('endog.npy') exog = sp.load('exog.npy') true_params = sp.load('true_params.npy') return endog, exog, true_params
def save_andor_load_arrays( endog, exog, true_params, save_arrays, load_old_arrays): if save_arrays: sp.save('endog.npy', endog) sp.save('exog.npy', exog) sp.save('true_params.npy', true_params) if load_old_arrays: endog = sp.load('endog.npy') exog = sp.load('exog.npy') true_params = sp.load('true_params.npy') return endog, exog, true_params
def load(cls, dirname): import json model = cls() params = json.load(open("{}/params.json".format(dirname))) model.alpha = params["alpha"] model.beta = params["beta"] model.n_topics = params["n_topics"] model.n_docs = params["n_docs"] model.n_words = params["n_words"] model.B = params["B"] model.phi = sp.load("{}/phi.npy".format(dirname)) model.theta = sp.load("{}/theta.npy".format(dirname)) return model
def execute(self, nprocesses=1): params = self.params boxshape = params['boxshape'] boxunit = params['boxunit'] resultf = params['hr'][0] if len(params['last']) != 0: resultf = resultf + params['last'][0] resultf = resultf + '-' + params['hr'][1] if len(params['last']) != 0: resultf = resultf + params['last'][1] FKPweight = params['FKPweight'] in_root = params['input_root'] out_root = params['output_root'] mid = params['mid'] fkpp = params['FKPpk'] WindowF_fname = out_root+'WindowF_'+\ str(boxshape[0])+'x'+str(boxshape[1])+'x'+\ str(boxshape[2])+'x'+str(boxunit)+'_'+resultf kWindowF_fname = out_root+'k_WindowF_'+\ str(boxshape[0])+'x'+str(boxshape[1])+'x'+\ str(boxshape[2])+'x'+str(boxunit)+'_'+resultf print WindowF_fname try: WindowF = sp.load(WindowF_fname+'.npy') k = sp.load(kWindowF_fname+'.npy') except IOError: print '\tWindow Functin ReMake' WindowF, k = self.GetWindowFunctionData() non0 = WindowF.nonzero() sp.save(WindowF_fname, WindowF) sp.save(kWindowF_fname, k) #txtf = open(out_root+'window_for_idl.txt', 'w') #try: # for i in range(len(WindowF)): # if WindowF[i]==0: continue # print >>txtf, '{0} {1}'.format(k[i], WindowF[i]) #finally: # txtf.close() return WindowF, k
def make_video(image_dir, filename="vidout.avi", fixation_file=None): MPEG_FOURCC = 827148624 vwriter = cv2.VideoWriter() if fixation_file is not None: fixations = sp.load(fixation_file) fixations[sp.isnan(fixations)] = -100 fixations[abs(fixations) > 1000] = 1000 else: fixations = [] im_base_name = "cam1_frame_" im_extension = ".bmp" suc = vwriter.open(os.path.join(image_dir, filename), cv.CV_FOURCC('M', 'J', 'P', 'G'), 30, (640,480)) if not suc: raise IOError("Failed to open movie") for frame_num in xrange(1000): im_name = "".join([im_base_name, str(frame_num), im_extension]) im_path = os.path.join(image_dir, im_name) im = cv2.imread(im_path) if len(fixations) != 0: cv2.circle(im, tuple(fixations[frame_num]), 3, (255, 255, 255)) vwriter.write(im)
def loadTFIDF(path): weight=sp.load('tfidf_weight.npy') fp=codecs.open('tfidf_words.txt','r','utf-8') words=json.load(fp) fp.close() return words,weight
def process_newton_args(args, log): log("Newton Solver Setup") log("-------------------") mcmcsteps = args.mcsteps log("Running {} Newton-MCMC rounds".format(mcmcsteps)) param = {'mcmcsteps': args.mcsteps, 'newtonSteps': args.newtonsteps, 'gamma0': args.gamma, 'pcdamping': args.damping, 'jclamp': args.jclamp, 'resetseqs': args.resetseqs, 'preopt': args.preopt } p = attrdict(param) cutoffstr = ('dJ clamp {}'.format(p.jclamp) if p.jclamp != 0 else 'no dJ clamp') log(("Updating J locally with gamma = {}, {}, and pc-damping {}. " "Running {} Newton update steps per round.").format( p.gamma0, cutoffstr, p.pcdamping, p.newtonSteps)) log("Reading target marginals from file {}".format(args.bimarg)) bimarg = scipy.load(args.bimarg) if bimarg.dtype != dtype('<f4'): raise Exception("Bimarg in wrong format") #could convert, but this helps warn that something may be wrong if any(~((bimarg.flatten() >= 0) & (bimarg.flatten() <= 1))): raise Exception("Bimarg must be nonzero and 0 < f < 1") log("Target Marginals: " + printsome(bimarg) + "...") log("") p['bimarg'] = bimarg return p
def load(self, filename): r''' Loads a previously saved object's data onto new, empty Generic object Parameters ---------- filename : string The file containing the saved object data in Numpy zip format (npz) Examples -------- >>> pn = OpenPNM.Network.Cubic(shape=[3,3,3]) >>> pn.save('test_pn') >>> gn = OpenPNM.Network.GenericNetwork() >>> gn.load('test_pn') >>> # Remove newly created file >>> import os >>> os.remove('test_pn.npz') ''' if (self.Np == 0) and (self.Nt == 0): filename = filename.split('.')[0] + '.npz' temp = sp.load(filename) data_dict = temp['data'].item() info_dict = temp['info'].item() self.update(data_dict) self._name = info_dict['name'] temp.close() else: raise Exception('Cannot load saved data onto an active object')
def test_BCSSTK15(self): ''' A is using BCSSTK15 finded at https://math.nist.gov/MatrixMarket/data/Harwell-Boeing/bcsstruc2/bcsstk15.html x is generated by scipy.random.rand 3948 x 3948, 60882 entries, 99.61% sparsity ''' data = sp.load('./BCSSTK15.npz') self._test_cg(self.test_BCSSTK15.__name__, linalg.solve, data['A'], data['b'], data['x']) self._test_cg(self.test_BCSSTK15.__name__, BookCG, data['A'], data['b'], data['x']) self._test_cg(self.test_BCSSTK15.__name__, WikiCG, data['A'], data['b'], data['x']) # scipy.sparse.linalg.cg needs about 1.5 hours to benchmark it # self._test_cg(self.test_BCSSTK15.__name__, ScipyCG, csc_matrix(data['A']), data['b'], data['x']) self._test_cg(self.test_BCSSTK15.__name__, ScipyCGS, csc_matrix(data['A']), data['b'], data['x']) # scipy.sparse.linalg.bicg needs about 2.6 hours to benchmark it # self._test_cg(self.test_BCSSTK15.__name__, ScipyBicG, csc_matrix(data['A']), data['b'], data['x']) self._test_cg(self.test_BCSSTK15.__name__, ScipyBicGStab, csc_matrix(data['A']), data['b'], data['x']) self._test_cg(self.test_BCSSTK15.__name__, ScipySpSolve, csc_matrix(data['A']), data['b'], data['x'])
def load_feature_matrix(src, dtype=sp.float32): if src.endswith(".npz"): return smat.load_npz(src).tocsr().astype(dtype) elif src.endswith(".npy"): return smat.csr_matrix(sp.ascontiguousarray(sp.load(src), dtype=dtype)) else: raise ValueError("src must end with .npz or .npy")
def loadMatrix(self, fname): """ Loads the free Hamiltonian and potential matrices from file """ f = scipy.load(fname) self.L = f['arr_0'].item() self.m = f['arr_1'].item() Emax = f['arr_2'].item() nmax = f['arr_3'].item() bcs = f['arr_4'].item() self.buildFullBasis(L=self.L, m=self.m, Emax=Emax, bcs=bcs) basisI = self.fullBasis basisJ = self.fullBasis print(basisI.size) self.h0 = Matrix( basisI, basisJ, scipy.sparse.coo_matrix((f['arr_5'], (f['arr_6'], f['arr_7'])), shape=(basisI.size, basisJ.size))) self.potential = Matrix( basisI, basisJ, scipy.sparse.coo_matrix((f['arr_8'], (f['arr_9'], f['arr_10'])), shape=(basisI.size, basisJ.size)))
def from_file(fname): """Load model from a npz file""" params = dict(sc.load(fname).items()) model = Model(fname, **params) if "seed" in params: model.set_seed(model["seed"]) return model
def wrapper(cls, path, **kwargs): state = {} try: state = scipy.load(path, allow_pickle=True)[()] except IOError: raise IOError('Invalid path: {}.'.format(path)) cls.logger.info('Loading {}: {}.'.format(cls.__name__, path)) return loadstateclass(func)(cls, state, **kwargs)
def load(cls, path): state = {} try: state = scipy.load(path)[()] except IOError: raise IOError('Invalid path: {}.'.format(path)) cls.logger.info('Loading {}: {}.'.format(cls.__name__, path)) return cls.loadstate(state)
def load_est_data_VA(data_flag, IC): in_dir = '%s/objects/%s' % (DATA_DIR, data_flag) with gzip.open('%s/obj_IC=%s.pklz' % (in_dir, IC), 'rb') as f: obj = cPickle.load(f) params = sp.load('%s/params_IC=%s.npy' % (in_dir, IC)) paths = sp.load('%s/paths_IC=%s.npy' % (in_dir, IC)) errors = sp.load('%s/action_errors_IC=%s.npy' % (in_dir, IC)) est_dict = dict() est_dict['obj'] = obj est_dict['params'] = params est_dict['paths'] = paths est_dict['errors'] = errors return est_dict
def test_brown_clustering(): fname = "test-data/text-1e2.npz" F = sc.load( fname ) C, D = F['C'], F['D'] k = 100 W = 1000 bc = BrownClusteringAlgorithm( C ) bc.run( k, W )
def train(nn_name = '12-net',k = 12): """ Fucntion for traning 12-net with testing on part of data using cross validation """ suff = str(k) if nn_name.find('calib') > 0: X_data_name = 'train_data_icalib_'+ suff + '.npy' y_data_name = 'labels_icalib_'+ suff + '.npy' else: X_data_name = 'train_data_'+ suff + '.npy' y_data_name = 'labels_'+ suff + '.npy' rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32))) rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32))) rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ]) if nn_name == '24-net': nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')) elif nn_name == '48-net': nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle')) else: nn = Cnnl(nn_name = nn_name,l_rates=rates12) if not os.path.exists(nn_name + '_lasagne_.pickle'): if nn_name.find('calib') > 0: ds.get_train_wider_calib_data(k=k) else: ds.get_train_data(k=k) X,y = sp.load(X_data_name),sp.load(y_data_name) X_train,y_train = X,y if not os.path.exists(nn_name + '_lasagne_.pickle'): if nn_name == '24-net': X_sub_train12 = sp.load('train_data_12.npy') nn.fit(X = X_train,y = y_train,X12 = X_sub_train12) elif nn_name == '48-net': X_sub_train12 = sp.load('train_data_12.npy') X_sub_train24 = sp.load('train_data_24.npy') nn.fit(X = X_train,y = y_train,X12 = X_sub_train12,X24 = X_sub_train24) else: nn.fit(X = X_train,y = y_train) nn.save_model(nn_name + '_lasagne_.pickle')
def getCouplings(args, L, nB, bimarg, log): couplings = None if args.seqmodel and args.seqmodel in ['zero', 'logscore']: args.couplings = args.seqmodel if args.couplings is not None: #first try to generate couplings (requires L, nB) if args.couplings in ['zero', 'logscore']: if L is None: # we are sure to have nB raise Exception("Need L to generate couplings") if args.couplings == 'zero': log("Setting Initial couplings to 0") couplings = zeros((L*(L-1)/2, nB*nB), dtype='<f4') elif args.couplings == 'logscore': log("Setting Initial couplings to Independent Log Scores") if bimarg is None: raise Exception("Need bivariate marginals to generate " "logscore couplings") h = -np.log(getUnimarg(bimarg)) J = zeros((L*(L-1)/2,nB*nB), dtype='<f4') couplings = fieldlessGaugeEven(h, J)[1] else: #otherwise load them from file log("Reading couplings from file {}".format(args.couplings)) couplings = scipy.load(args.couplings) if couplings.dtype != dtype('<f4'): raise Exception("Couplings must be in 'f4' format") elif args.seqmodel and args.seqmodel not in ['zero', 'logscore']: # and otherwise try to load them from model directory fn = os.path.join(args.seqmodel, 'J.npy') if os.path.isfile(fn): log("Reading couplings from file {}".format(fn)) couplings = scipy.load(fn) if couplings.dtype != dtype('<f4'): raise Exception("Couplings must be in 'f4' format") L2, nB2 = seqsize_from_param_shape(couplings.shape) L, nB = updateLnB(L, nB, L2, nB2, 'couplings') if couplings is None: raise Exception("Could not find couplings. Use either the " "'couplings' or 'seqmodel' options.") return couplings, L, nB
def main(): """ 朴素贝叶斯实现 """ # 加载数据 movies_reviews = load_files("./data/tokens") sp.save('./data/movie_data.npy', movies_reviews.data) sp.save('./data/movie_target.npy', movies_reviews.target) movie_data = sp.load('./data/movie_data.npy') movie_target = sp.load('./data/movie_target.npy') x = movie_data y = movie_target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) count_vec = TfidfVectorizer(binary=False, decode_error='ignore', stop_words="english") # 训练数据 x_train = count_vec.fit_transform(x_train) x_test = count_vec.transform(x_test) # 分类器 clf = MultinomialNB().fit(x_train, y_train) # doc_pred = clf.predict(x_test) # print("平均值:", np.mean(doc_pred == y_test)) # 可用 clf.score 代替以上均值 score = clf.score(x_test, y_test) print("score:",score) # 准确率 召回率 precision, recall, thresholds = precision_recall_curve( y_test, clf.predict(x_test)) answer = clf.predict_proba(x_test)[:, 1] report = answer > 0.5 print(classification_report(y_test, report, target_names=['net', 'pos'])) # 特征名称 # print(count_vec.get_feature_names()) # 保存模型 model_path = "./models/clf_bayes.model" joblib.dump(clf, model_path, compress=0)
def full_image(i=None): global _full if _full is None: path = Path('cache/full.npy') if not path.exists(): ims = [_full_image(i) for i in range(1, COUNTS['full'] + 1)] sp.save(path, sp.stack(ims)) _full = sp.load(path) ims = _full[i - 1] if i is not None else _full return ims
def from_file( fname ): """Load model from a HDF file""" if not fname.endswith(".npz"): fname += ".npz" params = dict( sc.load( fname ).items() ) model = Model( fname, **params ) if "seed" in params: model.set_seed( model.get_parameter("seed") ) return model
def calc_dispersion_npy(self, src, dst, axis=1): data = sp.load(src) background = data[0, :, :, :, :] data = data - background[None, :, :, :, :] disp = sp.sum(sp.absolute( fftpack.fftshift(fftpack.fft2(data, axes=(0, axis)), axes=(0, axis))), axis=tuple([a for a in range(5) if a not in (axis, 0)])) sp.save(dst, disp) return 0
def returnABmatsConFluxesEnthalpies(NEQ, strlcount, timestepnr, timestepnrB): # Open and read adjoint file: if timestepnr == -1: # Read nat state file adjfile = h5py.File('natFWD' + strlcount + '.h5', 'r') else: # Read production file adjfile = h5py.File('prodFWD' + strlcount + '.h5', 'r') # Read current time-step from file: dt = adjfile['fulltimes']['DELTEX'][timestepnr] fwdrows = sp.load('fwdrows.npy') fwdcols = sp.load('fwdcols.npy') accrows = sp.load('accrows.npy') acccols = sp.load('acccols.npy') zerosAmat = sp.load('zerosAmat.npy') onesAmat = sp.load('onesAmat.npy') zerosBmat = sp.load('zerosBmat.npy') # Assemble forward matrix from file for the latest time-step: fwdels = adjfile['adjoint/fwdmat/co'][:, timestepnr] # Adjust values for large or constant boundary blocks: fwdels[zerosAmat] = 0. fwdels[onesAmat] = 1. A = csr_matrix((fwdels, (fwdrows, fwdcols)), shape=(NEQ, NEQ)) # FWD matrix if (timestepnrB == -1) and (timestepnr == 0): # First production time-step using direct method # Read B matrix from last natural state time: natadjfile = h5py.File('natFWD' + strlcount + '.h5', 'r') accels = natadjfile['adjoint/accmat/co'][:, timestepnrB] natadjfile.close() else: accels = adjfile['adjoint/accmat/co'][:, timestepnrB] # Adjust values for large or constant boundary blocks: zerosBmat = sp.load('zerosBmat.npy') # Adjust values for large or constant boundary blocks: accels[zerosBmat] = 0. B = csr_matrix((accels, (accrows, acccols)), shape=(NEQ, NEQ)) # ACC matrix # Read flux information at block interfaces: fluxw = adjfile['adjoint/flowenth'][ timestepnr, :, 2] # for storing liquid phase interface fluxes enthw = adjfile['adjoint/flowenth'][ timestepnr, :, 3] # for storing liquid phase interface enthalpies fluxg = adjfile['adjoint/flowenth'][ timestepnr, :, 0] # for storing vapour phase interface fluxes enthg = adjfile['adjoint/flowenth'][ timestepnr, :, 1] # for storing vapour phase interface enthalpies adjfile.close() return dt, A, B, fluxw, enthw, fluxg, enthg
def draw_raw_signal_around_genes(raw_signals, out_png, windowsize=20000): """draw the raw signals as computed by make_raw_signal_around_genes""" gene_expr = filter(lambda f: 'gene_expr' in f, raw_signals) reads = filter(lambda f: 'gene_expr' not in f and 'matched_size' not in f, raw_signals) pyplot.figure() f, plots = pyplot.subplots(1, len(reads)+1, sharex=False, sharey=True, squeeze=False) #sig_min = reduce(min, map(min, map(sp.load, reads))) #sig_max = reduce(max, map(max, map(sp.load, reads))) for i, read_sig in enumerate(reads): #plots[i+1].imshow(sp.load(read_sig), interpolation='nearest', vmin=sig_min, vmax=sig_max) plots[0, i+1].imshow(sp.ma.filled(sp.load(read_sig), fill_value=0).T, interpolation='nearest', aspect=.05) plots[0, i+1].text(0,0,read_sig.split('gene.expression.')[1].split('.')[0], rotation=30, verticalalignment='bottom') gexpr_ma = sp.load(gene_expr[0]).astype(float) plots[0, 0].imshow(sp.ma.filled(gexpr_ma.reshape(1,gexpr_ma.shape[0]), fill_value=0).T, interpolation='nearest', aspect=.002) #yticks(sp.arange()) shape = sp.load(read_sig).shape pyplot.xticks(sp.arange(0, shape[0] + shape[0]/4, shape[0] / 4), sp.arange(-windowsize/2, windowsize/2 + windowsize/4, windowsize/4)) f.savefig(out_png) pyplot.close('all')
def AandBmatZeroOneFill(h5filename): # Open and read HDF5 output file: # (the natural state file may typically be smaller than the production file) adjfile = h5py.File(h5filename, 'r') # IRN (forward matrix row indices), ICN (forward matrix column indices) as an array fwdrows = adjfile[ 'adjoint/fwdmat/irnicn'][:, 0] - 1 # Minus 1 to convert indexing from FORTRAN to Python fwdcols = adjfile[ 'adjoint/fwdmat/irnicn'][:, 1] - 1 # Minus 1 to convert indexing from FORTRAN to Python sp.save('fwdrows.npy', fwdrows) sp.save('fwdcols.npy', fwdcols) # IRN (accumulation matrix row indices), ICN (accumulation matrix column indices) as an array accrows = adjfile[ 'adjoint/accmat/irnicn'][:, 0] - 1 # Minus 1 to convert indexing from FORTRAN to Python acccols = adjfile[ 'adjoint/accmat/irnicn'][:, 1] - 1 # Minus 1 to convert indexing from FORTRAN to Python sp.save('accrows.npy', accrows) sp.save('acccols.npy', acccols) # Indices for large constant boundary blocks: VOLS = sp.load('Volumes.npy') zerosAmat = [] # Indices for FWD matrix elements to be set to zero onesAmat = [] # Indices for FWD matrix elements to be set to one for i in range(0, len(fwdrows)): irn = fwdrows[i] icn = fwdcols[i] rVol = VOLS[int( irn / 2 )] # SHOULD PROBABLY USE NK1 instead of 2 ???? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! cVol = VOLS[int(icn / 2)] if (rVol > 1.E20) or (rVol < 0.) or (cVol > 1.E20) or (cVol < 0.): if (irn == icn): onesAmat.extend([i]) else: zerosAmat.extend([i]) zerosBmat = [] # Indices for ACC matrix elements to be set to zero for i in range(0, len(accrows)): irn = accrows[i] icn = acccols[i] rVol = VOLS[int( irn / 2 )] # SHOULD PROBABLY USE NK1 instead of 2 ???? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! cVol = VOLS[int(icn / 2)] if (rVol > 1.E20) or (rVol < 0.) or (cVol > 1.E20) or (cVol < 0.): zerosBmat.extend([i]) sp.save('zerosAmat.npy', zerosAmat) sp.save('onesAmat.npy', onesAmat) sp.save('zerosBmat.npy', zerosBmat) adjfile.close() return
def loadMSER_npy(fn=nn_data_sets.NN_DATA_MSER,datadir=NN_DATA_DIR): ''' As a shortcut to loading the MSER data set from the 1000's of files found in the ukbench_extract folder, one should call loadMSER() once, and save the resulting numpy array to a single file. This function assumes you have done so, and will load the MSER data from the specified numpy file. @Note: This function really doesn't do anything but put comments around the use of numpy.load(...). Use numpy.save( filename, M) to create the saved file in the first place. ''' return scipy.load( os.path.join(datadir, fn) )
def read_file(name): fname = fname_template %name if os.path.exists(fname+'_x.npy') and os.path.exists(fname+'_y.npy'): xs = scipy.load(fname+'_x.npy') ys = scipy.load(fname+'_y.npy') return xs, ys elif os.path.exists(fname): with open(fname) as fh: lines = fh.readlines() ns = len(lines) x, y = numpy.ndarray(ns), numpy.ndarray(ns, dtype=complex) for i, l in enumerate(lines): xr, yr = l.split('\t') x[i] = eval(xr) y[i] = eval(yr) return x, y else: print >>sys.stderr, 'Manca il file %s' %(fname_template %name,) #sys.exit(1) return None, None
def read_file(name): fname = fname_template % name if os.path.exists(fname + '_x.npy') and os.path.exists(fname + '_y.npy'): xs = scipy.load(fname + '_x.npy') ys = scipy.load(fname + '_y.npy') return xs, ys elif os.path.exists(fname): with open(fname) as fh: lines = fh.readlines() ns = len(lines) x, y = numpy.ndarray(ns), numpy.ndarray(ns, dtype=complex) for i, l in enumerate(lines): xr, yr = l.split('\t') x[i] = eval(xr) y[i] = eval(yr) return x, y else: print >> sys.stderr, 'Manca il file %s' % (fname_template % name, ) #sys.exit(1) return None, None
def get_train_data(n_pos=31929, n_neg=164863, k=12): ''' megre positive and negative examples ''' suff = str(k) X_name = 'train_data_' + suff + '.npy' y_name = 'labels_' + suff + '.npy' if not (os.path.exists(X_name) and os.path.exists(y_name)): X_train_face, y_train_face = Datasets.get_train_face_wider_data( k=k) #X_pos = X_train_face[y_train_face==1] X_pos = X_train_face X_aflw, y_train_face_aflw = Datasets.get_aflw_face_data(k=k) X_pos = sp.vstack([X_pos, X_aflw]) X_train_non_face, y_train_non_face = Datasets.get_train_non_face_data( k=k) print('c1_pos:', len(X_pos)) if len(X_train_face[y_train_face == 0]) > 0: X_neg = sp.vstack( (X_train_face[y_train_face == 0], X_train_non_face)) else: X_neg = X_train_non_face X_pos = shuffle(X_pos, random_state=42) X_neg = shuffle(X_neg, random_state=42) X_pos = X_pos[:n_pos] X_neg = X_neg[:n_neg] n_neg = len(X_neg) n_pos = len(X_pos) y_pos = sp.ones(n_pos, int) y_neg = sp.zeros(n_neg, int) X = sp.vstack((X_pos, X_neg)) y = sp.hstack((y_pos, y_neg)) X, y = shuffle(X, y, random_state=42) sp.save(X_name, X) sp.save(y_name, y) else: X = sp.load(X_name) y = sp.load(y_name) print("Done", "Positive examples count, Negative exapmples count:", len(y[y == 1]), len(y[y == 0]))
def resample(name='NAO1', fs=2., **kwargs): # load the data datpath = '../output/' datname = 'nao_indices_2013-10-05_minmax.npz' data = sp.load(datpath + datname) NAO = data[name].tolist() # interpolate the index to an evenly sampled time grid fs = 4. # := sampling frq = 4 => 4 samples/yr => inter-sample time = 0.25 yr f_nao1 = interp1d(NAO['time'].squeeze(), NAO['index'].squeeze()) t_nao1_new = sp.arange(NAO['time'].min(), NAO['time'].max() + 0.1, 1/fs) y_nao1_new = f_nao1(t_nao1_new) return t_nao1_new, y_nao1_new
def load_def_data(path_to_model, file_name_prefix, iter_type=None, iter_val=None): """Load saved def_data for the batch.""" if iter_type=='_norm': file_name_data = file_name_prefix + '_data' +iter_type+'_%.1f.npz'%iter_val elif iter_type=='_iter': file_name_data = file_name_prefix + '_data' +iter_type+'_%i.npz'%iter_val else: print('error - fix load_batch func!') data = scipy.load(path_to_model+file_name_data)['a'][()] return data
def read_fft(genre_list, base_dir=GENRE_DIR): X = [] y = [] labels = [] for label, genre in enumerate(genre_list): genre_dir = os.path.join(base_dir, genre, "*.fft.npy") file_list = glob.glob(genre_dir) for fn in file_list: fft_features = scipy.load(fn) X.append(fft_features[:1000]) y.append(label) return np.array(X), np.array(y)
def quadrant_image(i=None): global _quadrant if _quadrant is None: path = Path('cache/quadrant.npy') if not path.exists(): ims = [ _quadrant_image(i) for i in range(1, COUNTS['quadrant'] + 1) ] sp.save(path, sp.stack(ims)) _quadrant = sp.load(path) ims = _quadrant[i - 1] if i is not None else _quadrant return ims
def regjac(NRadj): NRadjHalf = NRadj / 2 mpr = sp.load('mprior.npy') Npr = len(mpr) Nregsmooth = 2 * 15820 # 2 times the number of connections between adjustable rock-types Nreglocalxz = 8000 # Number of adjustable rock-types Nreg = Nregsmooth + Nreglocalxz + Npr sp.save('Nreg.npy', Nreg) rJac = lil_matrix((Nreg, NRadj)) x = 0 # Create horizontal smoothing of log10kx (perm index 1): for i in range(0, 80): for j in range(0, 99): rJac[x, j + i * 100] = 1 rJac[x, j + i * 100 + 1] = -1 x += 1 # Create vertical smoothing of log10kx (perm index 1): for i in range(0, 79): for j in range(0, 100): rJac[x, j + i * 100] = 1 rJac[x, j + (i + 1) * 100] = -1 x += 1 # Create horizontal smoothing of log10kz (perm index 3): for i in range(0, 80): for j in range(0, 99): rJac[x, j + i * 100 + NRadjHalf] = 1 rJac[x, j + i * 100 + 1 + NRadjHalf] = -1 x += 1 ## Create vertical smoothing of log10kz (perm index 3): for i in range(0, 79): for j in range(0, 100): rJac[x, j + i * 100 + NRadjHalf] = 1 rJac[x, j + (i + 1) * 100 + NRadjHalf] = -1 x += 1 # Add regularization to make log10kx similar to log10kz: for i in range(0, Nreglocalxz): rJac[x, i] = 1 rJac[x, i + NRadjHalf] = -1 x += 1 # Add prior paramater regularisation: for i in range(0, Npr): rJac[x, i] = 1 * 0.001 x += 1 return csr_matrix(rJac)
def read_fft_features(path, value): features = [] t = time.time() for filename in os.listdir(path): if filename.endswith(".npy"): real_path = "{0}/{1}".format(path, filename) fft_features = scipy.load(real_path) features.append(np.asarray(fft_features[:1000])) print time.time() - t length = len(features) data = np.array(features) values = np.array([value] * length) return data, values
def read_fft(genre_list, base_dir): X = [] y = [] for label, genre in enumerate(genre_list): genre_dir = os.path.join(base_dir, genre, "*.fft.npy") # print genre_dir file_list =glob.glob(genre_dir) for fn in file_list: fft_features = scipy.load(fn) X.append(fft_features[:1000]) y.append(label) return np.array(X), np.array(y)
def reconstruct_target(target_file,base_prefix,regul = None): """ Reconstruct the target in 'target_file' using constrained, and optionally regularized, least square optimisation. arguments : target_file : file contaiing the target to fit base_prefix : prefix for the files of the base. """ vlist = read_vertex_list(base_prefix+'_vertices.dat') t = read_target(target_file,vlist) U = load(base_prefix+"_U.npy").astype('float') S = load(base_prefix+"_S.npy").astype('float') V = load(base_prefix+"_V.npy").astype('float') ntargets,dim = V.shape nvert = len(t) pt = dot(U.T,t.reshape(nvert*3,1)) pbase = S[:dim].reshape(dim,1)*V.T A = param('A',value = matrix(pbase)) b = param('b',value = matrix(pt)) x = optvar('x',ntargets) if regul is None : prob = problem(minimize(norm2(A*x-b)),[x>=0.,x<=1.]) else : prob = problem(minimize(norm2(A*x-b) + regul * norm1(x)),[x>=0.,x<=1.]) prob.solve() targ_names_file = base_prefix+"_names.txt" with open(targ_names_file) as f : tnames = [line.strip() for line in f.readlines() ] tnames.sort() base,ext = os.path.splitext(target_file) bs_name = base+".bs" with open(bs_name,"w") as f : for tn,v in zip(tnames,x.value): if v >= 1e-3 : f.write("%s %0.3f\n"%(tn,v))
def main( fname ): """Run on sample in fname""" lda = sc.load( fname ) k, d, a0, O, X = lda['k'], lda['d'], lda['a0'], lda['O'], lda['data'] X1, X2, X3 = X P, T = sample_moments( X1, X2, X3, k, a0 ) O_ = recover_topics( P, T, k, a0 ) O_ = closest_permuted_matrix( O.T, O_.T ).T print k, d, a0, norm( O - O_ )
def get_train_data(n_pos = 31929, n_neg = 164863,k=12): ''' megre positive and negative examples ''' suff = str(k) X_name = 'train_data_'+ suff + '.npy' y_name = 'labels_'+ suff + '.npy' if not(os.path.exists(X_name) and os.path.exists(y_name)): X_train_face,y_train_face = Datasets.get_train_face_wider_data(k = k) #X_pos = X_train_face[y_train_face==1] X_pos = X_train_face X_aflw,y_train_face_aflw = Datasets.get_aflw_face_data(k = k) X_pos = sp.vstack( [X_pos,X_aflw] ) X_train_non_face,y_train_non_face = Datasets.get_train_non_face_data(k = k) print('c1_pos:',len(X_pos)) if len(X_train_face[y_train_face==0]) > 0: X_neg = sp.vstack( (X_train_face[y_train_face==0],X_train_non_face) ) else: X_neg = X_train_non_face X_pos = shuffle(X_pos,random_state=42) X_neg = shuffle(X_neg,random_state=42) X_pos = X_pos[:n_pos] X_neg = X_neg[:n_neg] n_neg = len(X_neg) n_pos = len(X_pos) y_pos = sp.ones(n_pos,int) y_neg = sp.zeros(n_neg,int) X = sp.vstack((X_pos,X_neg)) y = sp.hstack( (y_pos,y_neg) ) X,y = shuffle(X,y,random_state=42) sp.save(X_name,X) sp.save(y_name,y) else: X = sp.load(X_name) y = sp.load(y_name) print("Done","Positive examples count, Negative exapmples count:",len(y[y==1]),len(y[y==0]))
def play_movie(image_dir, fixation_file=None, fps=30): pygame.init() clock = pygame.time.Clock() flags = pygame.NOFRAME depth = 32 surf = pygame.display.set_mode((640,480), flags, depth) im_base_name = "cam1_frame_" im_extension = ".bmp" if fixation_file is not None: fixations = sp.load(fixation_file) fixations[sp.isnan(fixations)] = -100 fixations[abs(fixations) > 1000] = 1000 else: fixations = [] try: pygame.event.clear() pygame.event.set_allowed(None) pygame.event.set_allowed(pygame.KEYDOWN) for framenum in xrange(4000): im_name = "".join([im_base_name, str(framenum), im_extension]) im_path = os.path.join(image_dir, im_name) im = cv2.imread(im_path) if len(fixations) != 0: if sp.floor(fixations[framenum, 1]) == 237: continue cv2.circle(im, tuple(fixations[framenum]), 3, (255, 255, 255)) im_buf = im.tostring() im = pygame.image.frombuffer(im_buf, (640,480), "RGB") surf.blit(im, (0,0)) pygame.display.flip() print "Frame", framenum for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: sys.exit(0) clock.tick(30) except KeyboardInterrupt: print "Quitting" except Exception as e: print "An exception!" print e raise finally: pygame.quit()
def compare_error_bounds( model_fname, log_fname, delta = 0.1 ): """Compare error bounds theoretical analysis""" gmm = GaussianMixtureModel.from_file( model_fname ) k, d, M, w = gmm.k, gmm.d, gmm.means, gmm.weights P, T = exact_moments( M, w ) lg = sc.load( log_fname ) # TODO: Use concentration bounds on aerr_P12 n_M, sk_M = lg["norm_M_2"], lg["s_k_M"], e_P, e_T = lg["aerr_P_2"], lg["aerr_T"], n_P, sk_P, n_T = lg["norm_Pe_2"], lg["s_k_P"], lg["norm_Te"] w_min = min(w) # TODO: Ah, not computing sigma2! # alpha_P and \beta_P a_P = e_P/sk_P b_P = a_P/(1-a_P) e_Wb = 2/sqrt(sk_P) * b_P e_W = lg["aerr_W_2"] e_Twb = 1/sqrt(sk_M * (1-a_P)) * e_T + n_T/sk_M * (1 + 1/sqrt(1-a_P) + 1/(1-a_P)) * e_W e_Tw = lg["aerr_Tw"] e_Lb = e_Tw e_L = lg["aerr_lambda"] D_M = column_sep( M ) D_Tw = delta/(sqrt(sc.e) * k**2 * (1+sqrt(2 * log(k/delta)))) * D_M e_vb = 4 * sqrt(2) * e_Tw / D_Tw e_v = lg["aerr_v_col"] e_Wtb = 2 * sqrt( n_P + e_P ) * b_P n_Wtb = sqrt( n_P + e_P ) e_mub = e_Lb + (1+1/sqrt(w_min)) * n_Wtb * e_vb + e_Wtb e_mu = lg["aerr_M_col"] print "A\t\tbound\t\tactual" print "W\t\t%f\t\t%f" % (e_Wb, e_W) print "Tw\t\t%f\t\t%f" % (e_Twb, e_Tw) print "L\t\t%f\t\t%f" % (e_Lb, e_L) print "v\t\t%f\t\t%f" % (e_vb, e_v) print "mu\t\t%f\t\t%f" % (e_mub, e_mu) return [(e_W/e_Wb), (e_Tw/e_Twb), (e_L / e_Lb), (e_v/e_vb), (e_mu / e_mub),]
def load(file, metafile=None): """Open a .npy file and load it into memory as an info_aray. Similar to the numpy.load function. Does not support memory mapping (use open_memmap). Parameters ---------- file: file handle or str .npy file or file name to read the array from. metafile: str File name for which the `info` attribute of the returned InfoArray will be read from. Default is None, where the it is assumed to be the file name associated with `file` with ".meta" appended. If the file does not exist, the info attribute is initialized to an empty dictionary. Returns ------- iarray: InfoArray object """ # Load the data from .npy format. array = sp.load(file) # Figure out what the filename for the meta data should be. if metafile is None: try: fname = file.name except AttributeError: fname = file metafile = fname + ".meta" # Read the meta data. if os.path.isfile(metafile): info_fid = open(metafile, 'r') try: infostring = info_fid.readline() finally: info_fid.close() info = safe_eval(infostring) else: info = {} # Construct the infor array. array = info_header.InfoArray(array, info) return array
def load_state(self, file_name, autogrow=False): toload = sp.load(file_name) try: if toload.shape[0] != 9: print "Error loading state: Bad data!" return if autogrow and toload[0].shape[0] != self.A.shape[0]: newN = toload[0].shape[0] - 3 print "Changing N to: %u" % newN self.grow_left(newN - self.N) if toload[0].shape != self.A.shape: print "Cannot load state: Dimension mismatch!" return self.A = toload[0] self.l[0] = toload[1] self.uni_l.r = toload[2] self.uni_l.K_left = toload[3] self.r[self.N] = toload[4] self.r[self.N + 1] = self.r[self.N] self.uni_r.l = toload[5] self.uni_r.K = toload[6] self.grown_left = toload[7][0, 0] self.grown_right = toload[7][0, 1] self.shrunk_left = toload[7][1, 0] self.shrunk_right = toload[7][1, 1] self.uni_l.A = self.A[0] self.uni_l.l = self.l[0] self.uni_l.l_before_CF = self.uni_l.l self.uni_l.r_before_CF = self.uni_l.r self.uni_r.A = self.A[self.N + 1] self.uni_r.r = self.r[self.N] self.uni_r.l_before_CF = self.uni_r.l self.uni_r.r_before_CF = self.uni_r.r print "loaded." return toload[8] except AttributeError: print "Error loading state: Bad data!" return
def compare_error_bounds( model_fname, log_fname, delta = 0.1 ): """Compare error bounds theoretical analysis""" mvgmm = MultiViewGaussianMixtureModel.from_file( model_fname ) k, d, M, w = mvgmm.k, mvgmm.d, mvgmm.means, mvgmm.weights M1, M2, M3 = M P12, P13, P123 = exact_moments( w, M1, M2, M3 ) U1, _, U2 = svdk( P12, k ) _, _, U3 = svdk( P13, k ) U2, U3 = U2.T, U3.T lg = sc.load( log_fname ) # TODO: Use concentration bounds on aerr_P12 e_P12, e_P13, e_P123 = lg["aerr_P12_2"], lg["aerr_P13_2"], lg["aerr_P123"], n_P12, n_P13, n_P123 = lg["norm_P12_2"], lg["norm_P13_2"], lg["norm_P123"], P12_ = U1.T.dot( P12 ).dot( U2 ) n_P12i = norm( inv(P12_) ) K_P12 = condition_number( P12_ ) P13_ = U1.T.dot( P13 ).dot( U3 ) n_P13i = norm( inv(P13_) ) e_P12ib = n_P12i * K_P12 / (n_P12 - e_P12 * K_P12) * e_P12 e_B123b = e_P123 * n_P12i + n_P123 * e_P12ib e_B123 = lg["aerr_B123"] D_M3 = column_gap( U3.T.dot( M3 ), k ) D_L = delta/(sqrt(sc.e) * k**2 * (1+sqrt(2 * log(k/delta)))) * D_M3 n_R, K_R = lg["norm_R_2"], lg["K_R"] e_Lb = k**3 * K_R**2 * n_R / D_L * e_B123 # TODO: Adjust because this is a bound on Frob. norm, not spec. norm e_Lb = e_Lb * k e_L = lg["aerr_L_2"] n_mu = max(norm(M3.T[i]) for i in xrange(k)) e_mub = sqrt(k) * e_L + 2 * n_mu * n_P13i * e_P13/ (1 - n_P13i * e_P13) e_mu = lg["aerr_M3_col"] print "A\t\tbound\t\tactual" print "B123\t\t%f\t\t%f" % (e_B123b, e_B123) print "L\t\t%f\t\t%f" % (e_Lb, e_L) print "M3\t\t%f\t\t%f" % (e_mub, e_mu) return [ e_B123/e_B123b, e_L/e_Lb, e_mu/e_mub,]
def __init__(self,fn): # data['n_x'] is the x polarization data for the nth antenna. # data['n_y'] is the y polariztion data. Each data element is a 1001 # element array. We want to coalesce the polarization pairs into # 2x1001 element arrays, because both polarizations have to be used # together, and we can concatenate all the antennas vertically and find # the optimal frequency. data = sp.load(fn) self.ants = [] self.reversemap = {} # iterate over physical antennas for antsx,antnum in enumerate(set( (int(i.split('_')[0]) for i in data.files) )): ant = sp.array(( data["{}_x".format(antnum)], data["{}_y".format(antnum)] )) ant = sp.atleast_2d(ant) self.ants.append(ant) self.reversemap[antsx] = antnum
def load_state(self, file): """Loads the parameter tensors self.A from a file. The saved state must contain the right number of tensors with the correct shape corresponding to self.N and self.q. self.D will be recovered from the saved state. Parameters ---------- file ; path or file The file to load the state from. """ tmp_A = sp.load(file) self.D[0] = 1 for n in xrange(self.N): self.D[n + 1] = tmp_A[n + 1].shape[2] self._init_arrays() self.A = tmp_A
def get_ls_and_Cls_from_observers(parameterfile,observerfile): print "This function has not been tested yet" f = open(parameterfile,'r') parameters = cPickle.load(f) f.close() # parameters = sp.load(path + 'parameters.save') observers = sp.load(observerfile) # bin_number = 0 bin_number = 2 # bin_number = 11 bn = bin_number for mind_bin, maxd_bin in zip(parameters.bindistances[bn:bn+1],parameters.bindistances[bn+1:bn+2]): d = sp.mean((mind_bin,maxd_bin)) ls = observers[0].ls[bin_number] Cls = sp.array([observer.cls[bin_number] for observer in observers]) # cls = observers[0].cls[bin_number] bin_number = bin_number+1 return ls, Cls
def getAcquaintances(A, clusters, person = 'Barack_Obama', outputfile = '../data/people_in_the_cluster.txt', t2i = '../data/title-ID_dict.pickle', i2t = '../data/ID-title_dict.pickle', pid2ind = '../data/person_id2ind_6800.pickle', printRating = 0): t2i = scipy.load(t2i) pid2ind = scipy.load(pid2ind) if person in t2i: ix = t2i[person] if ix: print 'Wiki ID is', ix else: print 'Wiki ID is None!' return else: print 'Person isn\'t found in t2i!' return # get person's dense-dense id (in 15k array) ix = pid2ind[ix] if ix: print 'In dense matrix he/she is', ix else: print 'Peron\'s id not found in t2i!' return # find the cluster that contains the person cluster = np.where(map(lambda x: ix in x, clusters))[0] ids = np.array(clusters[cluster[0]]) if len(cluster): print person, 'is acquaintant with', len(ids), 'people' else: print 'Person isn\'t found in any cluster!' return # create inverse index-to-person dictionary ind2pid = dict(zip(pid2ind.values(), pid2ind.keys())) # get ratings # ratings = A[ids,:].sum(axis=1) # among all vertex ratings = A[ids,:][:,ids].sum(axis=1) # only inside cluster ratings = np.array(ratings).flatten() # sort people by rating ix = ratings.argsort()[::-1] ids = ids[ix] ratings = ratings[ix] i2t = scipy.load(i2t) ids = map(ind2pid.get, ids) # print all the people in this cluster if printRating: string = '\n'.join([i2t[i]+", "+str(r) for i, r in zip(ids, ratings)]) else: string = '\n'.join(map(i2t.get, ids)) # write to file f = open(outputfile, 'w') f.write(string) f.close()
ALvls = scipy.append(ALvls,scipy.array([0.241])) ALvls = scipy.sort(ALvls) AlphaLvls = [] for a in ALvls: AlphaLvls.append(a) DataDict['AlphaLvls'] = AlphaLvls AllGenes = scipy.array([]) for F in GWPValFiles: File = os.path.join(GeneWisePValuePath,F) DecomprFile = File[:-4] Trait = re.sub('_','.',os.path.basename(DecomprFile)[:-4]) DataDict[Trait] = {} os.system('lbzip2 -d -k -f '+File) Data = scipy.load(DecomprFile) PVals = Data[1,1:].astype(float) Genes = Data[0,1:] AllGenes = scipy.append(AllGenes,Genes) AllGenes = scipy.unique(AllGenes) NGenes = len(Genes) DataDict[Trait]['NGenes'] = NGenes os.remove(DecomprFile) for Alpha in AlphaLvls: BH = statsmodels.stats.multitest.multipletests(pvals=PVals, alpha=Alpha, method='fdr_bh', returnsorted=False) DataDict[Trait]['Alpha_'+str(Alpha)] = Alpha DataDict[Trait]['AlphaBonf_'+str(Alpha)] = BH[3] BHPVals = scipy.array(['BHp_value_alpha='+str(Alpha)])
unpack=True) scipy.save(file='BackgroundGeneSet.npy', arr=BGSData) os.system('lbzip2 BackgroundGeneSet.csv') os.system('lbzip2 BackgroundGeneSet.npy') if('-e' in sys.argv): os.system('lbzip2 -d BackgroundGeneSet.csv.bz2') fr = open('BackgroundGeneSet.csv','r') BGSHeader = fr.readline().strip().split('|') fr.close() os.system('lbzip2 BackgroundGeneSet.csv') # os. system('lbzip2 -d BackgroundGeneSet.npy.bz2') BGSData = scipy.load('BackgroundGeneSet.npy') # os. system('lbzip2 BackgroundGeneSet.npy.bz2') print BGSHeader print len(BGSData),len(BGSData[0]) # GO term enrichment GODBIndices = scipy.where(BGSData[BGSHeader.index('GODB')]=='Gene Ontology')[0] GenesWithGOEntry = scipy.unique(BGSData[BGSHeader.index('GeneEntrezID'),GODBIndices]) NGenesWithGOEntry = len(GenesWithGOEntry) QueryGeneSymbols = [] fr = open(sys.argv[sys.argv.index('-e')+1],'r') for Line in fr: QueryGeneSymbols.append(Line.strip()) fr.close() GeneSymbolArray,\ Ind = scipy.unique(BGSData[BGSHeader.index('GeneSymbol(s)'),GODBIndices],return_index=True)