def wannier_zeeman(self, data, taxis=0, xaxis=1, zaxis=2): temp = sp.sum(data, axis=zaxis) temp = fftpack.fft(temp, axis=taxis) sp.save("freq_x.npy", temp) temp = fftpack.fft(temp, axis=xaxis) sp.save("disp.npy", temp) return 0
def execute(self, nprocesses=1): params = self.params params['transfer_redshift(1)'] = params['transfer_redshift'] inifilename= params['output_root'] + 'params.ini' inifile = open(inifilename, 'w') #parse_ini.write_params(params, inifile ,prefix='') try: for key in params: print >>inifile, '{0} = {1}'.format(key, params[key]) finally: inifile.close() cambpath = os.getenv('CAMBPATH') + 'camb' os.system(cambpath + ' ' + inifilename) P = [] k = [] fname = params['output_root'] + '_matterpower.dat' f = open(fname, 'r') data = f.readlines() for line in data: line = line.split() k.append(line[0]) P.append(line[1]) f.close() PK = np.ndarray(shape=(2,len(k))) PK[0] = k PK[1] = P sp.save(params['output_root']+'PKcamb', PK)
def create_fft(fn): sample_rate, X = scipy.io.wavfile.read(fn) fft_features = abs(scipy.fft(X)[:1000]) base_fn, ext = os.path.splitext(fn) data_fn = base_fn + ".fft" scipy.save(data_fn, fft_features)
def run_training(self, train_data=None, train_size=None, val_data=None, val_size=None, batch_size=1024, nb_epoch=100): if not train_data: logger.error('No training data provided') early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='auto') with K.tf.device(self._keras_device): self._model.compile(loss='mse', optimizer='Adagrad') history_object = self._model.fit_generator(train_data, steps_per_epoch=train_size, validation_data=val_data, validation_steps=val_size, nb_epoch=nb_epoch) logger.info("... Training complete, saved as " + 'model_' + self._model_name + '.h5') self._model.save('model_' + self._model_name + '.h5') print(history_object.history.keys()) # Plot the training and validation loss for each epoch history_ = history_object.history print(history_.keys()) sp.save('history_obj_origin.npy', history_) plt.figure() plt.plot(history_object.history['loss'],'-o') plt.plot(history_object.history['val_loss'],'-o') plt.title('Mean Squared Error of model ...') plt.ylabel('mean squared error loss') plt.xlabel('epoch') plt.legend(['training set', 'validation set'], loc='upper right') plt.ylim([0, 0.1]) plt.savefig('training.png') plt.show()
def wrapper(self, save=None, **kwargs): state = func(self) pathdir = os.path.dirname(save) mkdir(pathdir) self.logger.info('Saving {} to {}.'.format(self.__class__.__name__, save)) scipy.save(save, state)
def create_fft(fn): print(fn) sample_rate, X = scipy.io.wavfile.read(fn) fft_features = abs(scipy.fft(X)[:1000]) base_fn, ext = os.path.splitext(fn) data_fn = base_fn + ".fft" scipy.save(data_fn, fft_features)
def save_task_data(self, taskdata_path, data, task_id, parent_id): dlist = [ task_id, parent_id, data['parent_path'], data['branch_num'], data['t_0'], data['t_1'], data['t_max'], data['state'], data['coeff'], data['num_children'] ] sp.save(taskdata_path, sp.array(dlist, dtype=object))
def write_file(xs, ys, name): assert len(xs) == len(ys) fname = fname_template % (name, ) # with open(fname, 'w') as fh: # for x, y in zip(xs, ys): # fh.write('%s\t%s\n' %(repr(x), repr(y))) scipy.save(fname + '_x', xs) scipy.save(fname + '_y', ys)
def save_marks(self, event): """ Save the current marks to file """ extension = os.path.splitext(self.ecgfile)[1] marks_filename = self.ecgfile.rstrip(extension) + '.ann' scipy.save(marks_filename, self.model.marks) print 'saved to ', marks_filename
def ovf_to_npy(self, dst, ovfs=[]): """Load the data from multiple .ovf files into memory, then save them into a single .npy file""" data = [] for ovf in tqdm(ovfs): data.append(self.read_ovf(ovf, target='data')) sp.concatenate(data, axis=-1) sp.save(dst, data) return 0
def write_file(xs, ys, name): assert len(xs) == len(ys) fname = fname_template %(name,) # with open(fname, 'w') as fh: # for x, y in zip(xs, ys): # fh.write('%s\t%s\n' %(repr(x), repr(y))) scipy.save(fname+'_x', xs) scipy.save(fname+'_y', ys)
def RecordProbs(bitstring, density, fname, rpath, outinfo): """ Record the final-state probabilities. """ path = rpath + outinfo['outdir'] + '/' + fname if outinfo['binary']: sp.save(path, density) else: sp.savetxt(path, density)
def RecordEigSpec(eigspec, outdir, binary): """ Output eigenspectrum to data file. """ eigpath = os.path.dirname(os.path.realpath(__file__)) + "/" + outdir + "/eigenspectrum.dat" if binary: sp.save(eigpath, eigspec) else: sp.savetxt(eigpath, eigspec)
def RecordProbs(bitstring, density, fname, rpath, outinfo): """ Record the final-state probabilities. """ path = rpath + outinfo["outdir"] + "/" + fname if outinfo["binary"]: sp.save(path, density) else: sp.savetxt(path, density)
def RecordFidelity(fidelity, outdir, binary): """ Output fidelity to data file. """ path = os.path.dirname(os.path.realpath(__file__)) + "/" + outdir + "/fidelity.dat" if binary: sp.save(path, fidelity) else: sp.savetxt(path, fidelity)
def RecordOverlap(overlap, outdir, T, binary): """ Output overlap (in time) to data file. """ path = os.path.dirname(os.path.realpath(__file__)) + "/" + outdir + "/overlap" + str(T) + ".dat" if binary: sp.save(path, overlap) else: sp.savetxt(path, overlap)
def make_raw_signal_around_genes(in_files, _, out_pattern, binsize=50, windowsize=20000): """Use expression data to sort raw read signal for several datasets sorting by expression data Regions must be associated with """ #from hts_waterworks.visualize import bed_to_bedgraph, bedgraph_to_bigwig #in_expression, in_genes, in_other_reads = (in_files[0], in_files[1], # in_files[2:]) in_expression, in_genes, in_bigwigs = (in_files[0], in_files[1], in_files[2:]) # parse gene expression values gene_expr = {} for line in [line for line in open(in_expression) if "N/A" not in line]: try: gene_id, expr_val = line.strip().split('\t') except: continue else: gene_expr[gene_id] = expr_val gene_expr_sorted = sorted(gene_expr.items(), key=lambda x:float(x[1])) # gather gene positions gene_positions = {} for line in open(in_genes): chrom, start, stop, name, score, strand = line.strip().split('\t') gene_positions[name] = (chrom, int(start), int(stop)) sp.save(out_pattern % 'gene_expr', sp.array([e[1] for e in gene_expr_sorted if e[0] in gene_positions])) for in_wig_name in in_bigwigs: in_wig = bigwig_file.BigWigFile(open(in_wig_name)) read_density = sp.zeros((windowsize // binsize, len(gene_expr))) for i, (genename, expr) in enumerate(gene_expr_sorted): try: chrom, start, stop = gene_positions[genename] except KeyError: print 'skipping', genename continue #print genename start = max(0, start - windowsize // 2) stop = start + windowsize #density_by_base = in_wig.get_as_array(chrom, start, stop) #if density_by_base is None: # for j in xrange(windowsize // binsize): # read_density[j,i] = 0 #else: # density_by_base = sp.ma.masked_invalid(density_by_base) # for j in xrange(windowsize // binsize): # read_density[j,i] = sp.ma.compressed(density_by_base[j*binsize:(j+1)*binsize]).sum() print chrom, start, stop reads_here = in_wig.get(chrom, start, stop) if reads_here is None: continue for j in xrange(windowsize // binsize): #reads_here = in_wig.get(chrom, start + j * binsize, start + (j+1) * binsize) start_bin = start + j*binsize stop_bin = start + (j+1) * binsize read_density[j,i] = sum(l[2] for l in reads_here if start_bin <= (l[0] + l[1]) / 2 <= stop_bin) sp.save(out_pattern % in_wig_name, read_density)
def RecordFidelity(fidelity, outdir, binary): """ Output fidelity to data file. """ path = os.path.dirname(os.path.realpath(__file__)) + "/" + \ outdir + "/fidelity.dat" if binary: sp.save(path, fidelity) else: sp.savetxt(path, fidelity)
def calc_dispersion_npy(self, src, dst, axis=1): data = sp.load(src) background = data[0, :, :, :, :] data = data - background[None, :, :, :, :] disp = sp.sum(sp.absolute( fftpack.fftshift(fftpack.fft2(data, axes=(0, axis)), axes=(0, axis))), axis=tuple([a for a in range(5) if a not in (axis, 0)])) sp.save(dst, disp) return 0
def full_image(i=None): global _full if _full is None: path = Path('cache/full.npy') if not path.exists(): ims = [_full_image(i) for i in range(1, COUNTS['full'] + 1)] sp.save(path, sp.stack(ims)) _full = sp.load(path) ims = _full[i - 1] if i is not None else _full return ims
def saveNpy(filename): print("============saveNpy============:"+filename) if filename.find('.txt')!=-1: filename = filename.split('.txt')[0] f = open(filename+'.txt', 'r') data = np.loadtxt(f) x = data[:,:-1] y = data[:,-1] sp.save(filename+'_data.npy', x.data) sp.save(filename+'_target.npy', y.data)
def RecordOverlap(overlap, outdir, T, binary): """ Output overlap (in time) to data file. """ path = os.path.dirname(os.path.realpath(__file__)) + "/" + \ outdir + "/overlap" + str(T) + ".dat" if binary: sp.save(path, overlap) else: sp.savetxt(path, overlap)
def RecordEigSpec(eigspec, outdir, binary): """ Output eigenspectrum to data file. """ eigpath = os.path.dirname(os.path.realpath(__file__)) + "/" + \ outdir + "/eigenspectrum.dat" if binary: sp.save(eigpath, eigspec) else: sp.savetxt(eigpath, eigspec)
def save_state(self, file): """Saves the parameter tensors self.A to a file. Uses numpy binary format. Parameters ---------- file ; path or file The file to save the state into. """ sp.save(file, self.A)
def regjac(NRadj): NRadjHalf = NRadj / 2 mpr = sp.load('mprior.npy') Npr = len(mpr) Nregsmooth = 2 * 15820 # 2 times the number of connections between adjustable rock-types Nreglocalxz = 8000 # Number of adjustable rock-types Nreg = Nregsmooth + Nreglocalxz + Npr sp.save('Nreg.npy', Nreg) rJac = lil_matrix((Nreg, NRadj)) x = 0 # Create horizontal smoothing of log10kx (perm index 1): for i in range(0, 80): for j in range(0, 99): rJac[x, j + i * 100] = 1 rJac[x, j + i * 100 + 1] = -1 x += 1 # Create vertical smoothing of log10kx (perm index 1): for i in range(0, 79): for j in range(0, 100): rJac[x, j + i * 100] = 1 rJac[x, j + (i + 1) * 100] = -1 x += 1 # Create horizontal smoothing of log10kz (perm index 3): for i in range(0, 80): for j in range(0, 99): rJac[x, j + i * 100 + NRadjHalf] = 1 rJac[x, j + i * 100 + 1 + NRadjHalf] = -1 x += 1 ## Create vertical smoothing of log10kz (perm index 3): for i in range(0, 79): for j in range(0, 100): rJac[x, j + i * 100 + NRadjHalf] = 1 rJac[x, j + (i + 1) * 100 + NRadjHalf] = -1 x += 1 # Add regularization to make log10kx similar to log10kz: for i in range(0, Nreglocalxz): rJac[x, i] = 1 rJac[x, i + NRadjHalf] = -1 x += 1 # Add prior paramater regularisation: for i in range(0, Npr): rJac[x, i] = 1 * 0.001 x += 1 return csr_matrix(rJac)
def quadrant_image(i=None): global _quadrant if _quadrant is None: path = Path('cache/quadrant.npy') if not path.exists(): ims = [ _quadrant_image(i) for i in range(1, COUNTS['quadrant'] + 1) ] sp.save(path, sp.stack(ims)) _quadrant = sp.load(path) ims = _quadrant[i - 1] if i is not None else _quadrant return ims
def get_train_wider_calib_data(n=None, k=12): ''' for calibration net return X - features y - labels cnt - count of examples ''' X, y = [], [] sn = (0.83, 0.91, 1.0, 1.10, 1.21) xn = (-0.17, 0.0, 0.17) yn = (-0.17, 0.0, 0.17) prod = [e for e in itertools.product(sn, xn, yn)] inv_calib = lambda i, j, h, w, n: [ round(i - (-prod[n][1] * w / (prod[n][0]**-1))), round(j - (-prod[n][2] * h / (prod[n][0]**-1))), round(h / prod[n][0]**-1), round(w / prod[n][0]**-1) ] suff = str(k) X_name = 'train_data_icalib_' + suff + '.npy' y_name = 'labels_icalib_' + suff + '.npy' root = 'F:\\Datasets\\image_data_sets\\faces\\WIDERFace\\' pattern = "*.jpg" bboxs = Datasets.load_wider_face( os.path.join(root, 'wider_face_split', 'wider_face_train_v7.mat')) for path, subdirs, files in os.walk(root, 'WIDER_train'): for iname in files: if fnmatch(iname, pattern): ipath = os.path.join(path, iname) img = fr.get_frame(ipath) H, W = img.shape[:2] bbox_list = bboxs[iname[:-4]] for bbox in bbox_list: label = sp.random.randint(0, 45) i, j, h, w = [ int(e) for e in inv_calib(bbox[1], bbox[0], bbox[2], bbox[3], label) ] face = fr.get_patch(img, i, j, (h, w)) face_r, good_example = Datasets.sample_resize(face, k) if good_example: #print('orig:',bbox[1],bbox[0],bbox[2],bbox[3]) #print('inv_calib:',i,j,h,w) vec_icalib = fr.frame_to_vect(face_r) X.append(vec_icalib) y.append(label) print('face calib:', label, ipath) y = sp.array(y) sp.save(y_name, y) X = sp.array(X) sp.save(X_name, X) return X, y
def save(self, dirname): import json params = { "alpha": self.alpha, "beta": self.beta, "n_topics": self.n_topics, "n_docs": self.n_docs, "n_words": self.n_words, "B": self.B } json.dump(params, open("{}/params.json".format(dirname), "wb")) sp.save("{}/phi.npy".format(dirname), self.phi) sp.save("{}/theta.npy".format(dirname), self.theta)
def StateOverlapOutput(t, outinfo, psi): """ Output the overlap with psi and a specified state at some timestep. """ # Fix up probabilities idx = sp.array(outinfo["stateoverlap"], dtype=int)[:, 0] probs = sp.power(abs(psi[idx]), 2).ravel() # Write to file fname = outinfo["outdir"] + "/state_overlap_T" + str(t) + ".txt" if outinfo["binary"]: sp.save(fname, probs) else: sp.savetxt(fname, probs)
def StateOverlapOutput(t, outinfo, psi): """ Output the overlap with psi and a specified state at some timestep. """ # Fix up probabilities idx = sp.array(outinfo['stateoverlap'], dtype=int)[:, 0] probs = sp.power(abs(psi[idx]), 2).ravel() # Write to file fname = outinfo['outdir'] + "/state_overlap_T" + str(t) + ".txt" if outinfo['binary']: sp.save(fname, probs) else: sp.savetxt(fname, probs)
def getTFIDF(path): texts=[] processDir(path,defaultS,defaultW,texts) word,weight=caculateTFIDF.caculateTFIDF(texts) fp=codecs.open('tfidf_words.txt','w','utf-8') json.dump(texts,fp) fp.close() sp.save('tfidf_weight.npy',weight) return word,weight
def save_state(self, file_name, userdata=None): tosave = sp.empty((9), dtype=sp.ndarray) tosave[0] = self.A tosave[1] = self.l[0] tosave[2] = self.uni_l.r tosave[3] = self.uni_l.K_left tosave[4] = self.r[self.N] tosave[5] = self.uni_r.l tosave[6] = self.uni_r.K tosave[7] = sp.array([[self.grown_left, self.grown_right], [self.shrunk_left, self.shrunk_right]]) tosave[8] = userdata sp.save(file_name, tosave)
def execute(self, nprocesses=1): params = self.params boxshape = params['boxshape'] boxunit = params['boxunit'] resultf = params['hr'][0] if len(params['last']) != 0: resultf = resultf + params['last'][0] resultf = resultf + '-' + params['hr'][1] if len(params['last']) != 0: resultf = resultf + params['last'][1] FKPweight = params['FKPweight'] in_root = params['input_root'] out_root = params['output_root'] mid = params['mid'] fkpp = params['FKPpk'] WindowF_fname = out_root+'WindowF_'+\ str(boxshape[0])+'x'+str(boxshape[1])+'x'+\ str(boxshape[2])+'x'+str(boxunit)+'_'+resultf kWindowF_fname = out_root+'k_WindowF_'+\ str(boxshape[0])+'x'+str(boxshape[1])+'x'+\ str(boxshape[2])+'x'+str(boxunit)+'_'+resultf print WindowF_fname try: WindowF = sp.load(WindowF_fname+'.npy') k = sp.load(kWindowF_fname+'.npy') except IOError: print '\tWindow Functin ReMake' WindowF, k = self.GetWindowFunctionData() non0 = WindowF.nonzero() sp.save(WindowF_fname, WindowF) sp.save(kWindowF_fname, k) #txtf = open(out_root+'window_for_idl.txt', 'w') #try: # for i in range(len(WindowF)): # if WindowF[i]==0: continue # print >>txtf, '{0} {1}'.format(k[i], WindowF[i]) #finally: # txtf.close() return WindowF, k
def bumpmaps(category, number=None): """Uses a NN to generate a perfect image, and caches the result so it'll be fast to load next time""" if number is None: return sp.stack([ bumpmaps(category, n) for n in tqdm(range(1, tools.COUNTS[category] + 1)) ]) path = Path(f'cache/nn/output/{category}/{number}.npy') if not path.exists(): path.parent.mkdir(exist_ok=True, parents=True) losses, model = load(*MODEL) bumps = evaluate(category, number, model)[1] sp.save(path, bumps) return sp.load(path)
def wifi_info2csv(datas, names, shop_info, root_path): assert isinstance(datas, list) assert isinstance(names, list) mall_ids = shop_info.mall_id.unique() if os.path.exists(root_path)==False: os.mkdirs(root_path, mode=0o777) for _data in datas: if "basic_wifi_info" not in _data.columns: preprocess_basic_wifi(_data) for _mall_id in mall_ids: print _mall_id part_datas = [_data[_data.mall_id == _mall_id] for _data in datas] sorted_wifi = get_sorted_wifi(part_datas) df = pd.DataFrame( { "wifi_name": [wifi[0] for wifi in sorted_wifi], "wifi_num": [wifi[1] for wifi in sorted_wifi] } ) df.index.name = "wifi_rank" df.to_csv(os.path.join(root_path, "{}_rank.csv".format(_mall_id))) d = rank_sorted_wifi(sorted_wifi) for _part_data, name in zip(part_datas, names): wifi_matrix = np.zeros((_part_data.shape[0], len(sorted_wifi))) use_wifi_str = [] _part_data.loc[:, "i_index"] = range(_part_data.shape[0]) _part_data[["basic_wifi_info", "i_index"]].apply( lambda x: basic_wifi_map2matrix(x, wifi_matrix, d, use_wifi_str), axis=1) a = np.asarray(use_wifi_str) # 用csv 存读取很慢, 将index, usewifi 和matrix 分开存 np.save(os.path.join(root_path, "{}_{}_index.csv".format(name, _mall_id)), _part_data.index) np.save(os.path.join(root_path, "{}_{}_use_wifi".format(name, _mall_id)), a) # 用稀疏矩阵存取 x = sp.csc_matrix(wifi_matrix) scipy.save(os.path.join(root_path, "{}_{}_matrix".format(name, _mall_id)), x)
def wifi_info2csv_just_train(train, test): if not os.path.exists("../data/wifi_info_cache2"): os.mkdir("../data/wifi_info_cache2") shop_info = load_shop_info() mall_ids = shop_info.mall_id.unique() for _data in [train, test]: if "basic_wifi_info" not in _data.columns: preprocess_basic_wifi(_data) for _mall_id in mall_ids: print _mall_id train_mall = train[train.mall_id == _mall_id] test_mall = test[test.mall_id == _mall_id] sorted_wifi = get_sorted_wifi_just_train(train_mall, test_mall) df = pd.DataFrame({ "wifi_name": [wifi[0] for wifi in sorted_wifi], "wifi_num": [wifi[1] for wifi in sorted_wifi] }) df.index.name = "wifi_rank" df.to_csv("../data/wifi_info_cache2/{}_rank.csv".format(_mall_id)) d = rank_sorted_wifi(sorted_wifi) for _part_data, name in zip([train_mall, test_mall], ["train", "test"]): wifi_matrix = np.zeros((_part_data.shape[0], len(sorted_wifi))) # wifi_matrix[:] = -115 use_wifi_str = [] _part_data.loc[:, "i_index"] = range(_part_data.shape[0]) _part_data[["basic_wifi_info", "i_index"]].apply(lambda x: basic_wifi_map2matrix( x, wifi_matrix, d, use_wifi_str), axis=1) a = np.asarray(use_wifi_str) # 用csv 存读取很慢, 将index, usewifi 和matrix 分开存 np.save( "../data/wifi_info_cache2/{}_{}_index".format(name, _mall_id), _part_data.index) np.save( "../data/wifi_info_cache2/{}_{}_use_wifi".format( name, _mall_id), a) # 用稀疏矩阵存取 x = sp.csc_matrix(wifi_matrix) scipy.save( "../data/wifi_info_cache2/{}_{}_matrix".format(name, _mall_id), x)
def get_train_wider_calib_data(n = None,k = 12): ''' for calibration net return X - features y - labels cnt - count of examples ''' X,y = [],[] sn = (0.83, 0.91, 1.0, 1.10, 1.21) xn = (-0.17, 0.0, 0.17) yn = (-0.17, 0.0, 0.17) prod = [e for e in itertools.product(sn,xn,yn)] inv_calib = lambda i,j,h,w,n: [ round(i-(-prod[n][1]*w/(prod[n][0]**-1))),round(j-(-prod[n][2]*h/(prod[n][0]**-1))),round(h/prod[n][0]**-1),round(w/prod[n][0]**-1) ] suff = str(k) X_name = 'train_data_icalib_'+ suff + '.npy' y_name = 'labels_icalib_'+ suff + '.npy' root = 'F:\\Datasets\\image_data_sets\\faces\\WIDERFace\\' pattern = "*.jpg" bboxs = Datasets.load_wider_face(os.path.join(root,'wider_face_split','wider_face_train_v7.mat')) for path, subdirs, files in os.walk(root,'WIDER_train'): for iname in files: if fnmatch(iname, pattern): ipath = os.path.join(path, iname) img = fr.get_frame(ipath) H,W = img.shape[:2] bbox_list = bboxs[iname[:-4]] for bbox in bbox_list: label = sp.random.randint(0,45) i,j,h,w = [int(e) for e in inv_calib(bbox[1],bbox[0],bbox[2],bbox[3],label)] face = fr.get_patch(img,i,j,(h,w)) face_r,good_example = Datasets.sample_resize(face,k) if good_example: #print('orig:',bbox[1],bbox[0],bbox[2],bbox[3]) #print('inv_calib:',i,j,h,w) vec_icalib = fr.frame_to_vect(face_r) X.append(vec_icalib) y.append(label) print('face calib:',label,ipath) y = sp.array(y) sp.save(y_name,y) X = sp.array(X) sp.save(X_name,X) return X,y
def create_fft(directory): print(directory) #os.system("mkdir "+directory+"wav") for filename in os.listdir(directory): print(filename) base_fn, ext = os.path.splitext(filename) if ext == ".wav": sample_rate, X = wavfile.read(directory + filename) fft_features = abs(scipy.fft(X)[:5000]) data_filename = directory_fft + "/" + folder + "/" + filename + ".fft" # Create target Directory if don't exist if not os.path.exists(directory_fft + "/" + folder): os.mkdir(directory_fft + "/" + folder) print("Directory ", directory_fft + "/" + folder, " Created ") print(data_filename) scipy.save(data_filename, fft_features) print("File", data_filename, "saved")
def main(): """ 朴素贝叶斯实现 """ # 加载数据 movies_reviews = load_files("./data/tokens") sp.save('./data/movie_data.npy', movies_reviews.data) sp.save('./data/movie_target.npy', movies_reviews.target) movie_data = sp.load('./data/movie_data.npy') movie_target = sp.load('./data/movie_target.npy') x = movie_data y = movie_target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) count_vec = TfidfVectorizer(binary=False, decode_error='ignore', stop_words="english") # 训练数据 x_train = count_vec.fit_transform(x_train) x_test = count_vec.transform(x_test) # 分类器 clf = MultinomialNB().fit(x_train, y_train) # doc_pred = clf.predict(x_test) # print("平均值:", np.mean(doc_pred == y_test)) # 可用 clf.score 代替以上均值 score = clf.score(x_test, y_test) print("score:",score) # 准确率 召回率 precision, recall, thresholds = precision_recall_curve( y_test, clf.predict(x_test)) answer = clf.predict_proba(x_test)[:, 1] report = answer > 0.5 print(classification_report(y_test, report, target_names=['net', 'pos'])) # 特征名称 # print(count_vec.get_feature_names()) # 保存模型 model_path = "./models/clf_bayes.model" joblib.dump(clf, model_path, compress=0)
def execute(self, nprocesses=1) : params = self.params model = params["model"] kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=prefix) # Loop over files to process. for file_middle in params['file_middles'] : input_fname = (params['input_root'] + file_middle + params['input_end']) Reader = core.fitsGBT.Reader(input_fname, feedback=self.feedback) output_fname = params["output_root"] + file_middle + ".npy" if model == "scan_var" : n_scans = len(Reader.scan_set) n_IFs = len(Reader.IF_set) first_block = True for jj in range(n_IFs) : # These all become arrays on the first iteration. var = 0.0 mean = 0.0 counts = 0 for ii in range(n_scans) : Data = Reader.read(ii, jj) if first_block : out_shape = (n_IFs,) + Data.dims[1:] out_arr = sp.empty(out_shape, dtype=float) first_block = False var += ma.sum(Data.data**2, 0).filled(0) mean += ma.sum(Data.data, 0).filled(0) counts += ma.count(Data.data, 0) # If we didn't get at least 5 good hits, throw aways the # scan. counts[counts < 5] = -1 var = var/counts - (mean/counts)**2 var[counts < 5] = 1.0e10 out_arr[jj, ...] = var sp.save(output_fname, out_arr) if self.feedback > 1 : print ("Wrote noise parameters to file: " + utils.abbreviate_file_path(output_fname)) else : raise ValueError("Invalid noise model: " + model)
def main(A, ktrunc, matName, subfolder, inneriter, Tvec): print 'Tests using matrix : ', matName if matName[0:7] == 'SDlarge': RunOracle = False else: RunOracle = True # Save sketch budgets Tvec: sp.save(subfolder + '/' + 'Tvec.npy', Tvec) # Evaluate Frobenius norm difference between A and optimal rank-p approximation: FrobOptRankP = optFrobErrRankp(A, ktrunc) temptime = time.clock() # Evaluate performance of baseline 1-view method proposed by # Tropp et al. (2017) (Algorithm 7 in Bjarkason (2018) with ellCut = ell1): print '####################################################################' print 'Running baseline Tropp 1-view experiments' print '####################################################################' runStandard1viewTroppTests(A, ktrunc, matName, subfolder, inneriter, Tvec, FrobOptRankP, RunOracle) print matName, 'Time spent on baseline Tropp', time.clock() - temptime temptime = time.clock() # Evaluate performance of 1-view Algorithm 7 in Bjarkason (2018) # using ellCut and ell1=ell2 (sometimes ell1=ell2-1): print '####################################################################' print 'Running Lcut Alg. 7 1-view experiments' print '####################################################################' runLcut1viewTroppTests(A, ktrunc, matName, subfolder, inneriter, Tvec, FrobOptRankP, RunOracle) print matName, 'Time spent on Lcut Alg. 7', time.clock() - temptime temptime = time.clock() # Evaluate performance of extended SRFT schemes, see Section 6.7: print '####################################################################' print 'Running Bwz 1-view experiments' print '####################################################################' run1viewBwzTests(A, ktrunc, matName, subfolder, inneriter, Tvec, FrobOptRankP, RunOracle) print matName, 'Time spent on Bwz', time.clock() - temptime
def get_train_data(n_pos=31929, n_neg=164863, k=12): ''' megre positive and negative examples ''' suff = str(k) X_name = 'train_data_' + suff + '.npy' y_name = 'labels_' + suff + '.npy' if not (os.path.exists(X_name) and os.path.exists(y_name)): X_train_face, y_train_face = Datasets.get_train_face_wider_data( k=k) #X_pos = X_train_face[y_train_face==1] X_pos = X_train_face X_aflw, y_train_face_aflw = Datasets.get_aflw_face_data(k=k) X_pos = sp.vstack([X_pos, X_aflw]) X_train_non_face, y_train_non_face = Datasets.get_train_non_face_data( k=k) print('c1_pos:', len(X_pos)) if len(X_train_face[y_train_face == 0]) > 0: X_neg = sp.vstack( (X_train_face[y_train_face == 0], X_train_non_face)) else: X_neg = X_train_non_face X_pos = shuffle(X_pos, random_state=42) X_neg = shuffle(X_neg, random_state=42) X_pos = X_pos[:n_pos] X_neg = X_neg[:n_neg] n_neg = len(X_neg) n_pos = len(X_pos) y_pos = sp.ones(n_pos, int) y_neg = sp.zeros(n_neg, int) X = sp.vstack((X_pos, X_neg)) y = sp.hstack((y_pos, y_neg)) X, y = shuffle(X, y, random_state=42) sp.save(X_name, X) sp.save(y_name, y) else: X = sp.load(X_name) y = sp.load(y_name) print("Done", "Positive examples count, Negative exapmples count:", len(y[y == 1]), len(y[y == 0]))
def save_analysis_objects(self, output_dir, site_tag): (event_set, event_activity, source_model, sites, motion, eqrm_flags) = self.create_analysis_objects() # 2. Save test objects to file event_set.save(os.path.join(output_dir, '%s_event_set' % site_tag)) event_activity.save(os.path.join(output_dir, '%s_event_set' % site_tag)) source_model.save(os.path.join(output_dir, '%s_event_set' % site_tag)) sites.save(os.path.join(output_dir, '%s_sites' % site_tag)) # Motion is an numpy.ndarray so save manually os.mkdir(os.path.join(output_dir, '%s_motion' % site_tag)) save( os.path.join(output_dir, '%s_motion' % site_tag, 'bedrock_SA.npy'), motion) save( os.path.join(output_dir, '%s_motion' % site_tag, 'atten_periods.npy'), eqrm_flags['atten_periods']) # ... and eqrm_flags eqrm_flags_to_control_file(os.path.join(output_dir, 'eqrm_flags.py'), eqrm_flags)
def save_analysis_objects(self, output_dir, site_tag): (event_set, event_activity, source_model, sites, motion, eqrm_flags) = self.create_analysis_objects() # 2. Save test objects to file event_set.save(os.path.join(output_dir, '%s_event_set' % site_tag)) event_activity.save(os.path.join(output_dir, '%s_event_set' % site_tag)) source_model.save(os.path.join(output_dir, '%s_event_set' % site_tag)) sites.save(os.path.join(output_dir, '%s_sites' % site_tag)) # Motion is an numpy.ndarray so save manually os.mkdir(os.path.join(output_dir, '%s_motion' % site_tag)) save(os.path.join(output_dir, '%s_motion' % site_tag, 'bedrock_SA.npy'), motion) save(os.path.join(output_dir, '%s_motion' % site_tag, 'atten_periods.npy'), eqrm_flags['atten_periods']) # ... and eqrm_flags eqrm_flags_to_control_file(os.path.join(output_dir, 'eqrm_flags.py'), eqrm_flags)
def get_train_data(n_pos = 31929, n_neg = 164863,k=12): ''' megre positive and negative examples ''' suff = str(k) X_name = 'train_data_'+ suff + '.npy' y_name = 'labels_'+ suff + '.npy' if not(os.path.exists(X_name) and os.path.exists(y_name)): X_train_face,y_train_face = Datasets.get_train_face_wider_data(k = k) #X_pos = X_train_face[y_train_face==1] X_pos = X_train_face X_aflw,y_train_face_aflw = Datasets.get_aflw_face_data(k = k) X_pos = sp.vstack( [X_pos,X_aflw] ) X_train_non_face,y_train_non_face = Datasets.get_train_non_face_data(k = k) print('c1_pos:',len(X_pos)) if len(X_train_face[y_train_face==0]) > 0: X_neg = sp.vstack( (X_train_face[y_train_face==0],X_train_non_face) ) else: X_neg = X_train_non_face X_pos = shuffle(X_pos,random_state=42) X_neg = shuffle(X_neg,random_state=42) X_pos = X_pos[:n_pos] X_neg = X_neg[:n_neg] n_neg = len(X_neg) n_pos = len(X_pos) y_pos = sp.ones(n_pos,int) y_neg = sp.zeros(n_neg,int) X = sp.vstack((X_pos,X_neg)) y = sp.hstack( (y_pos,y_neg) ) X,y = shuffle(X,y,random_state=42) sp.save(X_name,X) sp.save(y_name,y) else: X = sp.load(X_name) y = sp.load(y_name) print("Done","Positive examples count, Negative exapmples count:",len(y[y==1]),len(y[y==0]))
def save_andor_load_arrays(endog, exog, true_params, save_arrays, load_old_arrays): if save_arrays: sp.save("endog.npy", endog) sp.save("exog.npy", exog) sp.save("true_params.npy", true_params) if load_old_arrays: endog = sp.load("endog.npy") exog = sp.load("exog.npy") true_params = sp.load("true_params.npy") return endog, exog, true_params
def save_andor_load_arrays( endog, exog, true_params, save_arrays, load_old_arrays): if save_arrays: sp.save('endog.npy', endog) sp.save('exog.npy', exog) sp.save('true_params.npy', true_params) if load_old_arrays: endog = sp.load('endog.npy') exog = sp.load('exog.npy') true_params = sp.load('true_params.npy') return endog, exog, true_params
def splitTrainTestBasedTime(filename): print("============splitTrainTestBasedTime============:"+filename) if filename.find('.txt')!=-1: filename = filename.split('.txt')[0] trainName = filename+'_Train' f = open(trainName+'.txt', 'r') data = np.loadtxt(f) x = data[:, :-1] y = data[:, -1] sp.save(trainName+'_data.npy', x.data) sp.save(trainName+'_target.npy', y.data) f.close() testName = filename+'_Test' f = open(testName+'.txt', 'r') data = np.loadtxt(f) x = data[:, :-1] y = data[:, -1] sp.save(testName+'_data.npy', x.data) sp.save(testName+'_target.npy', y.data)
def compute_matrices(base,prefix): """ Computes and saves the matrices from the Singular Value Decomposition (SVD) of the target base : A = U*S*V^t The suffixes "_U.npy", "_S.npy", "_S.npy" are added to the prefix. """ if base.ndim == 3 : ntargets,nvertices,dim = base.shape base = base.reshape(ntargets,nvertices*3) else : ntargets,nvertices = base.shape nvertices/=3 U,S,Vt = svd(base.T,full_matrices = False, overwrite_a = True) #We keep only significant singular values Scum = S.cumsum()/S.sum() save(prefix+'_U',U[:,Scum<1.]) save(prefix+'_S',S) # we still save the full singular values just in case... save(prefix+'_V',Vt.T[:,Scum<1.])
BH = statsmodels.stats.multitest.multipletests(pvals=PVals, alpha=Alpha, method='fdr_bh', returnsorted=False) DataDict[Trait]['Alpha_'+str(Alpha)] = Alpha DataDict[Trait]['AlphaBonf_'+str(Alpha)] = BH[3] BHPVals = scipy.array(['BHp_value_alpha='+str(Alpha)]) BHPVals = scipy.append(BHPVals,BH[1].astype(str)) Data = scipy.vstack((Data,BHPVals)) BHAccept = scipy.array(['BHAccept_alpha='+str(Alpha)]) BHAccept = scipy.append(BHAccept,BH[0].astype(str)) DataDict[Trait]['GeneSetAtAlpha_'+str(Alpha)] = scipy.compress(condition=BH[0], a=Genes).tolist() Data = scipy.vstack((Data,BHAccept)) OutFile = os.path.join('Data',os.path.basename(DecomprFile)) scipy.save(file=OutFile, arr=Data) os.system('lbzip2 -f '+OutFile) print OutFile fw = open('Data/DataDict.json','w') json.dump(obj=DataDict, fp=fw) fw.close() os.system('lbzip2 -f Data/DataDict.json') AllGenesFile = 'Data/UniqGenesOverAllTraits.npy' scipy.save(file=AllGenesFile, arr=AllGenes) os.system('lbzip2 -f '+AllGenesFile) if(False): AllGenesFile = 'Data/UniqGenesOverAllTraits.npy'
if('-p' in sys.argv): fr = open('BackgroundGeneSet.csv','r') BGSHeader = fr.readline().strip().split('|') fr.close() os.system('recode HTML BackgroundGeneSet.csv') BGSData = scipy.genfromtxt(fname='BackgroundGeneSet.csv', dtype=str, comments=None, delimiter='|', skip_header=1, unpack=True) scipy.save(file='BackgroundGeneSet.npy', arr=BGSData) os.system('lbzip2 BackgroundGeneSet.csv') os.system('lbzip2 BackgroundGeneSet.npy') if('-e' in sys.argv): os.system('lbzip2 -d BackgroundGeneSet.csv.bz2') fr = open('BackgroundGeneSet.csv','r') BGSHeader = fr.readline().strip().split('|') fr.close() os.system('lbzip2 BackgroundGeneSet.csv') # os. system('lbzip2 -d BackgroundGeneSet.npy.bz2') BGSData = scipy.load('BackgroundGeneSet.npy') # os. system('lbzip2 BackgroundGeneSet.npy.bz2') print BGSHeader print len(BGSData),len(BGSData[0])
def execute(self, nprocesses=1): params = self.params boxshape = params['boxshape'] boxunit = params['boxunit'] resultf = params['hr'][0] if len(params['last']) != 0: resultf = resultf + params['last'][0] resultf = resultf + '-' + params['hr'][1] if len(params['last']) != 0: resultf = resultf + params['last'][1] # Make parent directory and write parameter file. kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root']+'params.ini',prefix='nl_' ) in_root = params['input_root'] out_root = params['output_root'] cambin_root = params['camb_input_root'] all_out_fname_list = [] all_in_fname_list = [] #### Process #### kiyopy.utils.mkparents(params['output_root']) PKcamb_fname = cambin_root + 'PKcamb.npy' PKcamb = algebra.load(PKcamb_fname) N = len(params['boxunitlist']) yy = np.ndarray(shape=(N, 10)) xx = np.ndarray(shape=(N, 10)) for params['boxshape'], params['boxunit'], i\ in zip(params['boxshapelist'], params['boxunitlist'], range(N)): params['plot'] = False parse_ini.write_params(params, params['output_root']+'params.ini',prefix='wd_' ) WindowF, k = \ windowf.WindowFunctionMaker(params['output_root']+'params.ini', feedback=self.feedback).execute() if yy.shape[1] != WindowF.shape[0]: yy.resize((N,WindowF.shape[0])) xx.resize((N,WindowF.shape[0])) yy[i] = WindowF.copy() xx[i] = k.copy() def chisq(A, y, x, e): err = (y - windowfunction(x, A))**2/e**2 return err non0 = yy[0].nonzero() y = yy[0].take(non0)[0][:-10] x = xx[0].take(non0)[0][:-10] non0 = yy[-1].nonzero() y = np.append(y, yy[-1].take(non0)[0][10:-4]) x = np.append(x, xx[-1].take(non0)[0][10:-4]) err = y.copy()*10. err[5:] = err[5:]*1.e-8 print x.min(), x.max() ki = np.logspace(log10(0.01), log10(1.5), num=300) A1 = 1. A2 = 1. A3 = 1.8 A0 = np.array([A1, A2, A3]) A, status = leastsq(chisq, A0, args=(y, x, err), maxfev=20000) window = windowfunction(PKcamb[0], A) #boxinf = str(boxshape[0])+'x'\ # +str(boxshape[1])+'x'+str(boxshape[2])+'x'+str(boxunit) sp.save(out_root+'window_fit_'+resultf, window) CC = 1. # CC = romberg(lambda k2: K(ki,k2)*k2*k2, PKcamb[0].min(), PKcamb[0].max()) # # CC = romberg(lambda k2: K(ki,k2)*k2*k2, 1.e-10, 1.e10) print A aaa = A[1]*1.e-3 bbb = A[2]*1.e-3 if bbb**4<4*aaa**4: CC = 1./(pi*bbb*(2.-(bbb/aaa)**2)**(0.5)) def g(x): return atan((bbb**4 + 2.*aaa**2*x**2)/ (bbb**2*(4.*aaa**4-bbb**4)**0.5)) def K(k1, k2): return CC/(k1*k2)*(g(k1+k2)-g(k1-k2)) else: mu = bbb**2*(bbb**4-4.*aaa**4)**0.5 CC = aaa/(pi*2**0.5*((bbb**4+mu)**0.5-(bbb**4-mu)**0.5)) def g(x): return (mu+bbb**4+2*aaa**2*x**2)/(mu-bbb**4-2*aaa**2*x**2) def K(k1, k2): return CC/(k1*k2)*log(g(k1-k2)/g(k1+k2)) #def K(k1,k2): # uplim = k1+k2 # downlim = np.fabs(k1-k2) # C = 8*pi**2/(k1*k2)*CC # return C*romberg(lambda Q: windowfunction(Q,A)*Q, downlim, uplim) # print CC P = interp1d(PKcamb[0], PKcamb[1], kind='cubic') #print PKcamb[0].min(), PKcamb[0].max() Pnl = np.zeros(len(ki)) Pnl_err = np.zeros(len(ki)) for i in range(len(ki)): #Pnl[i] = romberg(lambda k1: k1**2*P(k1)*K(k1,ki[i]), Pnl[i], Pnl_err = quad(lambda k1: k1**2*P(k1)*K(k1,ki[i]), PKcamb[0].min(), PKcamb[0].max(), limit=200) #Pnl = sp.load(out_root+'nonlPK_'+resultf+'.npy') CCC = romberg(lambda k1: k1**2*K(k1, 0.01), ki.min(), ki.max()) print CCC #Pnl = Pnl/CCC OmegaHI = params['OmegaHI'] Omegam = params['Omegam'] OmegaL = params['OmegaL'] z = params['z'] a3 = (1+z)**(-3) Tb = 0.3e-3 * (OmegaHI/1.e-3) * ((Omegam + a3*OmegaL)/0.29)**(-0.5)\ * ((1.+z)/2.5)**0.5 #Pnl = Pnl*(Tb**2) #PKcamb[1] = PKcamb[1]*(Tb**2) #print Pnl sp.save(out_root+'nonlPK_'+resultf, Pnl) sp.save(out_root+'k_nonlPK_'+resultf, ki) if self.plot==True: #plt.figure(figsize=(6,6)) ##print k #plt.subplot('111') ##kj = sp.linspace(0,PKcamb[0][-1], num=500) ##KI = np.zeros(500) ##for j in sp.linspace(ki.min(),ki.max(), num=20): ## for i in range(500): ## KI[i] = K(j, kj[i]) ## #plt.plot(kj, KI, label=str(ki)) ## plt.plot(kj, KI, 'r-', linewidth=1) #plt.semilogy() ##plt.loglog() ##plt.ylim(ymin=1.e-0) #plt.xlim(xmin=0, xmax=ki.max()) #plt.title('Coupling Kernels') #plt.xlabel('$k$') #plt.ylabel('$K(k, k_i)$') ##plt.legend() #plt.savefig(out_root+'Ki.eps', format='eps') plt.figure(figsize=(8,4)) plt.subplot('111') #plt.plot(PKcamb[0], window, 'b--', linewidth=1, # label='Fitted Window Function') plt.plot(PKcamb[0], PKcamb[1], 'g-', linewidth=1, label='Camb Power Spectrum') plt.plot(ki, Pnl, 'r-', linewidth=1, label='Power Spectrum') plt.loglog() plt.xlim(xmin=ki.min(), xmax=ki.max()) plt.legend() plt.savefig(out_root+'nonlPK.eps', format='eps') plt.show() #print 'Finished @_@ ' return PKcamb
def save(self, kernel, R, dx=0, dy=0, **kw): rescaled = (255.0 * kernel).astype('uint8') name = "kernels/Airy/kernel.Airy.R%d.dx%1.2f.dy%1.2f" % (R, dx, dy) image = fromarray(rescaled) image.save(name+".png") scipy.save(name+".npy", kernel)
V = potV(X, Y, xyMax, a, b, V0) # Creating or loading operator matrix if (platform.system() == 'Windows'): mPath = 'Operator_Matrices\\delta_' + str(delta) + 'V0_' + str(V0) + '.npy' else: mPath = 'Operator_Matrices/delta_' + str(delta) + 'V0_' + str(V0) + '.npy' try: M = sp.load(mPath) print 'Matrix will be loaded' except: start = time.time() print 'Creating operator matrix. Sit back, this may take a while :)' M = H(X, Y, xyMax, spacing, nm, nmIndexes, V) sp.save(mPath, M) print 'Matrix ready' print 'Took ' + str(time.time() - start) + ' seconds!' # Calculating eigen energies values = linalg.eig(M)[0] values = values[values.argsort()] e0Array.append(values[0].real) e1Array.append(values[1].real) e2Array.append(values[2].real) e3Array.append(values[3].real) e4Array.append(values[4].real) e0Array = sp.array(e0Array) e1Array = sp.array(e1Array) e2Array = sp.array(e2Array)
def save_state(self, file): sp.save(file, self.A)
def execute(self, nprocesses=1) : """Worker funciton.""" params = self.params # Make parent directory and write parameter file. kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=prefix) save_noise_diag = params['save_noise_diag'] in_root = params['input_root'] all_out_fname_list = [] all_in_fname_list = [] # Figure out what the band names are. bands = params['bands'] if not bands: map_files = glob.glob(in_root + 'dirty_map_' + pol_str + "_*.npy") bands = [] root_len = len(in_root + 'dirty_map_') for file_name in map_files: bands.append(file_name[root_len:-4]) # Loop over files to process. for pol_str in params['polarizations']: for band in bands: if band == -1: band_str = '' else: band_str = "_" + repr(band) dmap_fname = (in_root + 'dirty_map_' + pol_str + band_str + '.npy') all_in_fname_list.append( kiyopy.utils.abbreviate_file_path(dmap_fname)) # Load the dirty map and the noise matrix. dirty_map = algebra.load(dmap_fname) dirty_map = algebra.make_vect(dirty_map) if dirty_map.axes != ('freq', 'ra', 'dec') : msg = ("Expeced dirty map to have axes ('freq'," "'ra', 'dec'), but it has axes: " + str(dirty_map.axes)) raise ce.DataError(msg) shape = dirty_map.shape # Initialize the clean map. clean_map = algebra.info_array(sp.zeros(dirty_map.shape)) clean_map.info = dict(dirty_map.info) clean_map = algebra.make_vect(clean_map) # If needed, initialize a map for the noise diagonal. if save_noise_diag : noise_diag = algebra.zeros_like(clean_map) if params["from_eig"]: # Solving from eigen decomposition of the noise instead of # the noise itself. # Load in the decomposition. evects_fname = (in_root + 'noise_evects_' + pol_str + + band_str + '.npy') if self.feedback > 1: print "Using dirty map: " + dmap_fname print "Using eigenvectors: " + evects_fname evects = algebra.open_memmap(evects_fname, 'r') evects = algebra.make_mat(evects) evals_inv_fname = (in_root + 'noise_evalsinv_' + pol_str + "_" + repr(band) + '.npy') evals_inv = algebra.load(evals_inv_fname) evals_inv = algebra.make_mat(evals_inv) # Solve for the map. if params["save_noise_diag"]: clean_map, noise_diag = solve_from_eig(evals_inv, evects, dirty_map, True, self.feedback) else: clean_map = solve_from_eig(evals_inv, evects, dirty_map, False, self.feedback) # Delete the eigen vectors to recover memory. del evects else: # Solving from the noise. noise_fname = (in_root + 'noise_inv_' + pol_str + band_str + '.npy') if self.feedback > 1: print "Using dirty map: " + dmap_fname print "Using noise inverse: " + noise_fname all_in_fname_list.append( kiyopy.utils.abbreviate_file_path(noise_fname)) noise_inv = algebra.open_memmap(noise_fname, 'r') noise_inv = algebra.make_mat(noise_inv) # Two cases for the noise. If its the same shape as the map # then the noise is diagonal. Otherwise, it should be # block diagonal in frequency. if noise_inv.ndim == 3 : if noise_inv.axes != ('freq', 'ra', 'dec') : msg = ("Expeced noise matrix to have axes " "('freq', 'ra', 'dec'), but it has: " + str(noise_inv.axes)) raise ce.DataError(msg) # Noise inverse can fit in memory, so copy it. noise_inv_memory = sp.array(noise_inv, copy=True) # Find the non-singular (covered) pixels. max_information = noise_inv_memory.max() good_data = noise_inv_memory < 1.0e-10*max_information # Make the clean map. clean_map[good_data] = (dirty_map[good_data] / noise_inv_memory[good_data]) if save_noise_diag : noise_diag[good_data] = \ 1/noise_inv_memory[good_data] elif noise_inv.ndim == 5 : if noise_inv.axes != ('freq', 'ra', 'dec', 'ra', 'dec'): msg = ("Expeced noise matrix to have axes " "('freq', 'ra', 'dec', 'ra', 'dec'), " "but it has: " + str(noise_inv.axes)) raise ce.DataError(msg) # Arrange the dirty map as a vector. dirty_map_vect = sp.array(dirty_map) # A view. dirty_map_vect.shape = (shape[0], shape[1]*shape[2]) frequencies = dirty_map.get_axis('freq')/1.0e6 # Allowcate memory only once. noise_inv_freq = sp.empty((shape[1], shape[2], shape[1], shape[2]), dtype=float) if self.feedback > 1 : print "Inverting noise matrix." # Block diagonal in frequency so loop over frequencies. for ii in xrange(dirty_map.shape[0]) : if self.feedback > 1: print "Frequency: ", "%5.1f"%(frequencies[ii]), if self.feedback > 2: print ", start mmap read:", sys.stdout.flush() noise_inv_freq[...] = noise_inv[ii, ...] if self.feedback > 2: print "done, start eig:", sys.stdout.flush() noise_inv_freq.shape = (shape[1]*shape[2], shape[1]*shape[2]) # Solve the map making equation by diagonalization. noise_inv_diag, Rot = sp.linalg.eigh( noise_inv_freq, overwrite_a=True) if self.feedback > 2: print "done", map_rotated = sp.dot(Rot.T, dirty_map_vect[ii]) # Zero out infinite noise modes. bad_modes = (noise_inv_diag < 1.0e-5 * noise_inv_diag.max()) if self.feedback > 1: print ", discarded: ", print "%4.1f" % (100.0 * sp.sum(bad_modes) / bad_modes.size), print "% of modes", if self.feedback > 2: print ", start rotations:", sys.stdout.flush() map_rotated[bad_modes] = 0. noise_inv_diag[bad_modes] = 1.0 # Solve for the clean map and rotate back. map_rotated /= noise_inv_diag map = sp.dot(Rot, map_rotated) if self.feedback > 2: print "done", sys.stdout.flush() # Fill the clean array. map.shape = (shape[1], shape[2]) clean_map[ii, ...] = map if save_noise_diag : # Using C = R Lambda R^T # where Lambda = diag(1/noise_inv_diag). temp_noise_diag = 1/noise_inv_diag temp_noise_diag[bad_modes] = 0 # Multiply R by the diagonal eigenvalue matrix. # Broadcasting does equivalent of mult by diag # matrix. temp_mat = Rot*temp_noise_diag # Multiply by R^T, but only calculate the # diagonal elements. for jj in range(shape[1]*shape[2]) : temp_noise_diag[jj] = sp.dot( temp_mat[jj,:], Rot[jj,:]) temp_noise_diag.shape = (shape[1], shape[2]) noise_diag[ii, ...] = temp_noise_diag # Return workspace memory to origional shape. noise_inv_freq.shape = (shape[1], shape[2], shape[1], shape[2]) if self.feedback > 1: print "" sys.stdout.flush() elif noise_inv.ndim == 6 : if save_noise_diag: # OLD WAY. #clean_map, noise_diag, chol = solve(noise_inv, # dirty_map, True, feedback=self.feedback) # NEW WAY. clean_map, noise_diag, noise_inv_diag, chol = \ solve(noise_fname, noise_inv, dirty_map, True, feedback=self.feedback) else: # OLD WAY. #clean_map, chol = solve(noise_inv, dirty_map, # False, feedback=self.feedback) # NEW WAY. clean_map, noise_inv_diag, chol = \ solve(noise_fname, noise_inv, dirty_map, False, feedback=self.feedback) if params['save_cholesky']: chol_fname = (params['output_root'] + 'chol_' + pol_str + band_str + '.npy') sp.save(chol_fname, chol) if params['save_noise_inv_diag']: noise_inv_diag_fname = (params['output_root'] + 'noise_inv_diag_' + pol_str + band_str + '.npy') algebra.save(noise_inv_diag_fname, noise_inv_diag) # Delete the cholesky to recover memory. del chol else : raise ce.DataError("Noise matrix has bad shape.") # In all cases delete the noise object to recover memeory. del noise_inv # Write the clean map to file. out_fname = (params['output_root'] + 'clean_map_' + pol_str + band_str + '.npy') if self.feedback > 1: print "Writing clean map to: " + out_fname algebra.save(out_fname, clean_map) all_out_fname_list.append( kiyopy.utils.abbreviate_file_path(out_fname)) if save_noise_diag : noise_diag_fname = (params['output_root'] + 'noise_diag_' + pol_str + band_str + '.npy') algebra.save(noise_diag_fname, noise_diag) all_out_fname_list.append( kiyopy.utils.abbreviate_file_path(noise_diag_fname)) # Check the clean map for faileur. if not sp.alltrue(sp.isfinite(clean_map)): n_bad = sp.sum(sp.logical_not(sp.isfinite(clean_map))) msg = ("Non finite entries found in clean map. Solve" " failed. %d out of %d entries bad" % (n_bad, clean_map.size)) raise RuntimeError(msg)