def getRAandDec(self, fitsFile, catalog): self._debug("\tRunning xy2sky") tempDir = self.tempDir + os.sep + "xy2sky" self._debug("\tGenerating temp directory at %s" % tempDir) try: shutil.rmtree(tempDir) # delete directory except OSError as exc: if exc.errno != errno.ENOENT: raise # re-raise exception os.mkdir(tempDir) imfile = tempDir + os.sep + "imfile.txt" np.savetxt(imfile, catalog[['X_IMAGE','Y_IMAGE']], fmt="%0.3f") outfile = tempDir + os.sep + "skys.txt" # | cut -d \" \" -f 1-2 > %s commandline = wcsToolsPath + "/xy2sky -d %s @%s | awk '{print $1,$2}'> %s" % (fitsFile, imfile, outfile) p = subprocess.Popen(["/bin/bash", "-i", "-c", commandline], stderr=subprocess.PIPE, stdout=subprocess.PIPE) output = p.communicate() #now wait res = np.loadtxt(outfile) catalog = append_fields(catalog, 'RA', res[:,0], usemask=False) catalog = append_fields(catalog, 'DEC', res[:,1], usemask=False) return catalog
def __init__(self,d={}): # time: timestep number self.time=d.get('time',0) # box: numpy array of lattice vectors box = d.get('box',N.zeros((3,3))) self.box = box if box.shape == (3,3): self.vc = box elif box.shape == (3): self.vc = N.diag(box) else: raise ValueError ('Box should be (3,3) or (3) array') # atoms: atoms numpy array atoms = d.get('atoms',[]) leg_list = atoms.dtype.names if not 'id' in leg_list: # add 'id' column atoms = nlrf.append_fields(atoms, 'id', N.arange(len(atoms))+1, asrecarray=True, usemask=False) if not 'itype' in leg_list: # add 'itype' column if 'label' in leg_list: labels = list(N.unique(atoms['label'])) labels = dict(zip(labels, range(1,len(labels)+1))) ityp = N.array([labels[atom['label']] for atom in atoms]) atoms = nlrf.append_fields(atoms, 'itype', ityp, asrecarray=True, usemask=False) else: atoms = nlrf.append_fields(atoms, 'itype', N.ones(len(atoms)), asrecarray=True, usemask=False) self.atoms = atoms
def load_training_data(self, training_data='', **extras): """Read an HDF5 file with `parameters` a structured ndarray and `spectra` an ndarray. Convert to a structured array of labels of length `ntrain` with `nlabel` fields. and an ndarray of training spectra of shape (nwave, ntrain). """ self.has_errors = False with h5py.File(training_data, "r") as f: self.library_spectra = f['spectra'][:] self.library_labels = f['parameters'][:] self.wavelengths = f['wavelengths'][:] try: self.library_snr = self.library_spectra / f['uncertainty'][:] self.has_errors = True except: pass ancillary = f['ancillary'][:] # add and rename labels here. Note that not all labels need to be or # will be used in the feature generation newfield = ['logt', 'miles_id'] newdata = [np.log10(self.library_labels['teff']), ancillary['miles_id']] self.library_labels = rfn.append_fields(self.library_labels, newfield, newdata, usemask=False) try: # assuming f_nu fbol = np.trapz(self.library_spectra / self.wavelengths**2, self.wavelengths) newfield = ['logl', 'luminosity', 'fbol'] newdata = [ancillary['logl'], 10**ancillary['logl'], fbol] self.library_labels = rfn.append_fields(self.library_labels, newfield, newdata, usemask=False) except: pass self.reset_mask()
def append_fields(base, names, data, dtypes=None, fill_value=-1, usemask=False, # Different from recfunctions default asrecarray=False): """Append fields to numpy structured array If fields already exists in data, will overwrite """ if isinstance(names, (tuple, list)): # Add multiple fields at once if dtypes is None: dtypes = [d.dtype for d in data] # Convert to numpy arrays so we can use boolean index arrays names = np.array(names) data = np.array(data) dtypes = np.array(dtypes) not_yet_in_data = True ^ np.in1d(names, base.dtype.names) # Append the fields that were not in the data base = recfunctions.append_fields(base, names[not_yet_in_data].tolist(), data[not_yet_in_data].tolist(), dtypes[not_yet_in_data].tolist(), fill_value, usemask, asrecarray) # Overwrite the fields that are already in the data for i in np.where(True ^ not_yet_in_data)[0]: base[names[i]] = data[i] return base else: # Add single field if names in base.dtype.names: # Field already exists: overwrite data base[names] = data return base else: return recfunctions.append_fields(base, names, data, dtypes, fill_value, usemask, asrecarray)
def get_cat(cat_filename, cut=0): ''' Opens the target file using numpy genfromtxt. Assumes the format is given by thehead. If coordinates are sexagesimal then converts them to degrees and appends a two new columns ('degra' and 'degdec') to the record array. Even if coordinates are already in degrees, the 'degra' and 'degdec' columns are appended but will be identical to the original 'ra' and 'dec'. ''' f = open(cat_filename,'r') header = f.readline().lower().split() f.close() thehead = ['hstid', 'field', 'ra', 'dec', 'v', 'verr', 'bvcol', 'bvcolerr', 'vicol', 'vicolerr'] dtype = ['S30', 'S30', 'S30', 'S30', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8'] if header == thehead: data = np.genfromtxt(cat_filename, names=header, dtype=dtype, skip_header=True, skip_footer=cut) if ':' in data['ra'][0]: ra_degrees= np.array([sex2deg(x) for x in data['ra']]) dec_degrees = np.array([sex2deg(x, RA=False) for x in data['dec']]) cat = rfn.append_fields(data, names=['degra','degdec'], data=[ra_degrees, dec_degrees], dtypes=['f8','f8'], usemask=False) else: cat = rfn.append_fields(data, names=['degra','degdec'], data=[data['ra'], data['dec']], dtypes=['f8','f8'], usemask=False) return cat else: sys.exit('Columns need to be title: %s' % thehead)
def readH5(self, fname): """ Reads Catalog from H5 file, specified as argument """ of=h5py.File(fname, "r") self.data=of["objects"].value self.meta=of["meta"].attrs if "dNdz" in of.keys(): self.dNdz=of['dNdz'].value if "bz" in of.keys(): self.bz=of['bz'].value self.window=window.readWindowH5(of['window']) self.photoz=photoz.readPhotoZH5(of['photoz']) cversion=float(self.meta['version']) if cversion==0.1: print("updating 0.1 to version ", self.version) self.data=recfunctions.append_fields(self.data,'sigma_pz',(1+self.data["z_real_t"])*self.data["z_error"], usemask=False) self.data=recfunctions.append_fields(self.data,'z',self.data["z_real_t"]+(1+self.data["z_real_t"])*self.data["z_error"], usemask=False) self.data=self.data[ [ name for name in self.data.dtype.names if name not in ["z_real_t", "z_rsd_t","z_error"] ] ] if cversion==0.2: print("WARNING: upgrading from 0.2 to 0.3, photozs internally slightly inconsistent.") self.data=recfunctions.append_fields(self.data,'sigma_pz',(1+self.data["z"])*self.photoz.sigma, usemask=False)
def get_demodulated_data_from_list(filelist,freq=10,supply_index=True,phase_offset=0): filelist.sort() #just in case dd=[] for f in filelist: #only use full size files stats=os.stat(f) if stats.st_size == 10752000: print f d=demod.demodulate_dat(f,freq,supply_index=True,phase_offset=phase_offset) #filename is start of data taking (I think) and we'll just add 1/samprate seconds per rev h=np.float64(f[-12:-10]) m=np.float64(f[-10:-8]) s=np.float64(f[-8:-6]) t=h+m/60.+(s+(d['rev']-d['rev'][0])/samprate)/3600. d=recf.append_fields(d,'localtime',t) ut=np.mod(t+7.,24.) if len(f)>21: y=np.zeros(len(d),dtype=np.int)+np.int(f[-21:-17]) mo=np.zeros(len(d),dtype=np.int)+np.int(f[-17:-15]) dy=np.zeros(len(d),dtype=np.int)+np.int(f[-15:-13]) ut=np.mod(t+7.,24.) utt=t+7. dy[utt>ut]=dy[utt>ut]+1 d=recf.append_fields(d,['year','month','day'],[y,mo,dy]) d=recf.append_fields(d,'ut',ut) dd.append(d) return np.concatenate(dd)
def merge_paradigm(trial_info, paradigm, behavioural=None, **conf): baseline_condition = '' for arg in conf: if arg == 'baseline': baseline_condition = conf[arg] mask_blink_outlier = np.in1d(paradigm['Trial'], trial_info['Trial']) trial_info = nprec.append_fields(trial_info, 'Label', paradigm['Label'][mask_blink_outlier]).data mask_task = paradigm['Label'] != baseline_condition print 'Trials no.' + str(len(trial_info)) if behavioural != None: m = mask_task * mask_blink_outlier m = m[1::2] trial_task_info = trial_info[trial_info['Label'] != baseline_condition] trial_cond = nprec.append_fields(trial_task_info, behavioural.dtype.names, [behavioural[b][m] for b in behavioural.dtype.names]).data return trial_cond, trial_info else: return trial_info
def whichGalaxyProfile(sdss): exp_L = np.exp(np.array([sdss['LNLEXP_G'],sdss['LNLEXP_R'],sdss['LNLEXP_I'],sdss['LNLEXP_Z']])).T dev_L = np.exp(np.array([sdss['LNLDEV_G'],sdss['LNLDEV_R'],sdss['LNLDEV_I'],sdss['LNLDEV_Z']])).T star_L = np.exp(np.array([sdss['LNLSTAR_G'],sdss['LNLSTAR_R'],sdss['LNLSTAR_I'],sdss['LNLSTAR_Z']])).T expfracL = exp_L /(exp_L + dev_L + star_L) devfracL = dev_L /(exp_L + dev_L + star_L) modelmode = np.zeros((len(sdss), 4), dtype=np.int32) expmodel = (expfracL > 0.5) modelmode[expmodel] = 0 devmodel = (devfracL > 0.5) modelmode[devmodel] = 1 neither = - (expmodel | devmodel) modelmode[neither] = 2 sdss = rf.append_fields(sdss, 'BESTPROF_G', modelmode[:,0]) sdss = rf.append_fields(sdss, 'BESTPROF_R', modelmode[:,1]) sdss = rf.append_fields(sdss, 'BESTPROF_I', modelmode[:,2]) sdss = rf.append_fields(sdss, 'BESTPROF_Z', modelmode[:,3]) #print ' exp :', np.sum(expmodel),' dev :', np.sum(devmodel), 'neither :', np.sum(neither) return sdss
def getClassifiers(self): if not os.path.exists(self.outDir): os.mkdir(self.outDir) outDir = self.outDir + os.sep + "classPickle" if not os.path.exists(outDir): os.mkdir(outDir) class1Save = outDir + os.sep + "classifier1.pkl" class2Save = outDir + os.sep + "classifier2.pkl" class1Exists = os.path.exists(class1Save) class2Exists = os.path.exists(class2Save) if not (class1Exists and class2Exists): self._setupTempDir() self.fitsFiles = [f[:-5] for f in os.listdir(self.fitsFolder) if ".fits" in f] self.fitsFilesLoc = [os.path.abspath(self.fitsFolder + os.sep + f) for f in os.listdir(self.fitsFolder) if ".fits" in f] for f in self.fitsFiles: self.mainCatalog[f] = self.getCatalog(self.fitsFolder + os.sep + f + ".fits", ishape=True) self.candidateMask[f] = self._getCandidateMask(self.mainCatalog[f], np.loadtxt(self.fitsFolder + os.sep + f + ".txt")) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'WEIGHT', self.candidateMask[f] * 1.0, usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'EXTENDED', self.candidateMask[f], usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'HLR', np.zeros(self.mainCatalog[f].shape), usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'MAG', np.zeros(self.mainCatalog[f].shape), usemask=False) self._trainClassifier() joblib.dump(self.sc, class1Save) joblib.dump(self.sc2, class2Save) else: self.sc = joblib.load(class1Save) self.sc2 = joblib.load(class2Save) #self._testClassifier(catalog, candidateMask) #self._cleanTempDir() self._debug("Classifier generated. Now you can invoke .clasify(catalog)")
def analyze_chamber_data(self,raw_chamber_data): ethanol_data = raw_chamber_data[raw_chamber_data['status']=='Ethanol'] analyzed_ethanol_data = self.analyze_data(ethanol_data) status_array = numpy.array(['Ethanol']*len(analyzed_ethanol_data),dtype='|S25') analyzed_chamber_data = recfunctions.append_fields(analyzed_ethanol_data, 'status', status_array, dtypes='|S25', usemask=False) air_before_data = raw_chamber_data[raw_chamber_data['status']=='AirBefore'] if air_before_data.size != 0: analyzed_air_before_data = self.analyze_data(air_before_data) status_array = numpy.array(['AirBefore']*len(analyzed_air_before_data),dtype='|S25') analyzed_air_before_data = recfunctions.append_fields(analyzed_air_before_data, 'status', status_array, dtypes='|S25', usemask=False) analyzed_chamber_data = recfunctions.stack_arrays((analyzed_air_before_data,analyzed_chamber_data),usemask=False) air_after_data = raw_chamber_data[raw_chamber_data['status']=='AirAfter'] if air_after_data.size != 0: analyzed_air_after_data = self.analyze_data(air_after_data) status_array = numpy.array(['AirAfter']*len(analyzed_air_after_data),dtype='|S25') analyzed_air_after_data = recfunctions.append_fields(analyzed_air_after_data, 'status', status_array, dtypes='|S25', usemask=False) analyzed_chamber_data = recfunctions.stack_arrays((analyzed_chamber_data,analyzed_air_after_data),usemask=False) return analyzed_chamber_data
def training_split(self, holdout_unit='none', holdout_prop=.2): ''' Splits the data up into test and train subsets ''' if holdout_prop > .99 or holdout_prop < .01: raise ValueError('The holdout proportion must be between .1 and .99.') if holdout_unit == 'none': self.training_data = self.observation_matrix self.test_data = self.prediction_matrix self.training_type = 'make predictions' print 'Fitting model to all data' elif holdout_unit == 'datapoint': holdouts = np.random.binomial(1, holdout_prop, self.data_rows) self.training_data = np.delete(self.observation_matrix, np.where(holdouts==1)[0], axis=0) self.test_data = np.delete(self.observation_matrix, np.where(holdouts==0)[0], axis=0) self.training_type = 'datapoint' print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of datapoints' elif holdout_unit == 'country-year': country_years = [self.observation_matrix.country[i] + '_' + str(self.observation_matrix.year[i]) for i in range(self.data_rows)] data_flagged = recfunctions.append_fields(self.observation_matrix, 'holdout', np.zeros(self.data_rows)).view(np.recarray) for i in np.unique(country_years): data_flagged.holdout[np.where(data_flagged.country + '_' + data_flagged.year.astype('|S4')==i)[0]] = np.random.binomial(1, holdout_prop) self.training_data = np.delete(data_flagged, np.where(data_flagged.holdout==1)[0], axis=0) self.test_data = np.delete(data_flagged, np.where(data_flagged.holdout==0)[0], axis=0) self.training_type = 'country-year' print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of country-years' elif holdout_unit == 'country': data_flagged = recfunctions.append_fields(self.observation_matrix, 'holdout', np.zeros(self.data_rows)).view(np.recarray) for i in self.country_list: data_flagged.holdout[np.where(data_flagged.country==i)[0]] = np.random.binomial(1, holdout_prop) self.training_data = np.delete(data_flagged, np.where(data_flagged.holdout==1)[0], axis=0) self.test_data = np.delete(data_flagged, np.where(data_flagged.holdout==0)[0], axis=0) self.training_type = 'country' print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of countries' else: raise ValueError("The holdout unit must be either 'datapoint', 'country-year', or 'country'.")
def Tracks2Cells(tracks, falarms=None) : """ Convert lists of tracks (and falarms) into a single recarray of storm cells with track IDs. This can be reversed with Cells2Tracks(). """ if falarms is None : falarms = [] # NOTE: This function can not handle arrays of tracks that do/do not have # a trackID field in a mix. Either they all have it, or not. if not any('trackID' in aTrack.dtype.names for aTrack in tracks) : tracks = [nprf.append_fields(aTrack, 'trackID', [trackIndex] * len(aTrack), usemask=False) for trackIndex, aTrack in enumerate(tracks)] if not any('trackID' in aTrack.dtype.names for aTrack in falarms) : falarms = [nprf.append_fields(aTrack, 'trackID', [-trackIndex - 1] * len(aTrack), usemask=False) for trackIndex, aTrack in enumerate(falarms)] # If both are empty, then create an array without hstack() if len(tracks) != 0 or len(falarms) != 0 : allCells = np.hstack(tracks + falarms) else : allCells = np.array([], dtype=volume_dtype) return allCells
def get_raw_chamber_data(self,filtered_data): # chamber_dtype = numpy.dtype([('time_secs', '<u4'), # ('time_nsecs', '<u4'), # ('time_rel', '<f4'), # ('status', '|S25'), # ('tunnel', '<u2'), # ('fly_x', '<f4'), # ('fly_y', '<f4'), # ('fly_angle', '<f4'), # ]) header = list(FILE_TOOLS.chamber_dtype.names) tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End'] tracking_chamber_data = tracking_chamber_data[header] tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype) tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1 indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol' raw_chamber_data_ethanol = tracking_chamber_data[indicies] raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol, 'status', usemask=False) status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25') raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = raw_chamber_data_ethanol ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0] indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time raw_chamber_data_air_before = tracking_chamber_data[indicies] raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before, 'status', usemask=False) status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25') raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False) indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time raw_chamber_data_air_after = tracking_chamber_data[indicies] raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after, 'status', usemask=False) status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25') raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False) return raw_chamber_data
def add_constant(data, prepend=False): ''' This appends a column of ones to an array if prepend==False. For ndarrays it checks to make sure a constant is not already included. If there is at least one column of ones then the original array is returned. Does not check for a constant if a structured or recarray is given. Parameters ---------- data : array-like `data` is the column-ordered design matrix prepend : bool True and the constant is prepended rather than appended. Returns ------- data : array The original array with a constant (column of ones) as the first or last column. Notes ----- .. WARNING:: The default of prepend will be changed to True in the next release of statsmodels. We recommend to use an explicit prepend in any permanent code. ''' import warnings warnings.warn("The default of `prepend` will be changed to True in the " "next release, use explicit prepend", FutureWarning) if not data.dtype.names: data = np.asarray(data) if np.any(data[0]==1): ind = np.squeeze(np.where(data[0]==1)) if ind.size == 1 and np.all(data[:,ind] == 1): return data elif ind.size > 1: for col in ind: if np.all(data[:,col] == 1): return data data = np.column_stack((data, np.ones((data.shape[0], 1)))) if prepend: return np.roll(data, 1, 1) else: return_rec = data.__class__ is np.recarray if prepend: ones = np.ones((data.shape[0], 1), dtype=[('const', float)]) data = nprf.append_fields(ones, data.dtype.names, [data[i] for i in data.dtype.names], usemask=False, asrecarray=return_rec) else: data = nprf.append_fields(data, 'const', np.ones(data.shape[0]), usemask=False, asrecarray = return_rec) return data
def __init__(self, pathToFile=""): self.path = pathToFile self._points = np.genfromtxt(pathToFile, delimiter=' ', names='birth, death', dtype='f8, f8') self.lifespan_coords = [] self.birth_coords = [] # Generate commonly-used derived fields: lifespan, avg_coord self._points = rf.append_fields(self._points, 'lifespan', self._points['death'] - self._points['birth'], dtypes='f8') self._points = rf.append_fields(self._points, 'avg_coord', (self._points['death'] + self._points['birth'])/2, dtypes='f8')
def count_good_trials(): count_file = open(path_b_2_8+'/count_trials_blink.txt', 'w') count_file.write('Subj C_inc C_tot NC_inc NC_tot 1_inc 1_tot 2_inc 2_tot 3_inc 3_tot 4_inc 4_tot\r\n') for file in file_list: d_data = load_data_eye(path_blink, file) trial_info = extract_trials_info(d_data) mask_blink_outlier = np.in1d(paradigm['Trial'], trial_info['Trial']) trial_info = nprec.append_fields(trial_info, 'Condition', paradigm['Condition'][mask_blink_outlier]).data task_trial = trial_info[trial_info['Condition'] != 'FIX'] name = file.split('.')[0] try: behavioural = open_behavioural(path_b, name+'.xlsx') except IOError, err: print err continue m = mask_blink_outlier[1::2] trial_cond = nprec.append_fields(task_trial, ['Accuracy', 'Combination'], [behavioural['Accuracy'][m], behavioural['Combination'][m]]).data par = nprec.append_fields(paradigm[1::2], 'Accuracy', behavioural['Accuracy']).data ''' trial_cond = trial_cond[trial_cond['Accuracy'] == 1] par = par[par['Accuracy'] == 1] behavioural = behavioural[behavioural['Accuracy'] == 1] ''' count_file.write(file+' ') count_file.write(str(np.count_nonzero(trial_cond['Condition'] == 'C'))) count_file.write(' ') count_file.write(str(np.count_nonzero(par['Condition'] == 'C'))) count_file.write(' ') count_file.write(str(np.count_nonzero(trial_cond['Condition'] == 'NC'))) count_file.write(' ') count_file.write(str(np.count_nonzero(par['Condition'] == 'NC'))) count_file.write(' ') count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 1))) count_file.write(' ') count_file.write(str(np.count_nonzero(behavioural['Combination'] == 1))) count_file.write(' ') count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 2))) count_file.write(' ') count_file.write(str(np.count_nonzero(behavioural['Combination'] == 2))) count_file.write(' ') count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 3))) count_file.write(' ') count_file.write(str(np.count_nonzero(behavioural['Combination'] == 3))) count_file.write(' ') count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 4))) count_file.write(' ') count_file.write(str(np.count_nonzero(behavioural['Combination'] == 4))) count_file.write(' \r\n')
def DESdperp_to_SDSSdperp(fullsdss, fulldes): modelmag_g = fullsdss['MODELMAG_G'] - fullsdss['EXTINCTION_G'] modelmag_r = fullsdss['MODELMAG_R'] - fullsdss['EXTINCTION_R'] modelmag_i = fullsdss['MODELMAG_I'] - fullsdss['EXTINCTION_I'] modelmag_z = fullsdss['MODELMAG_Z'] - fullsdss['EXTINCTION_Z'] dperp_sdss = (modelmag_r - modelmag_i) - (modelmag_g - modelmag_r)/8.0 des, sdss = match(fulldes, fullsdss) modelmag_g_des = des['MODELMAG_G_DES'] - des['XCORR_SFD98_G'] modelmag_r_des = des['MODELMAG_R_DES'] - des['XCORR_SFD98_R'] modelmag_i_des = des['MODELMAG_I_DES'] - des['XCORR_SFD98_I'] modelmag_z_des = des['MODELMAG_Z_DES'] - des['XCORR_SFD98_Z'] dperp_des = (modelmag_r_des - modelmag_i_des) - (modelmag_g_des - modelmag_r_des)/8.0 expcut = (des['IM3_GALPROF'] == 1) devcut = (des['IM3_GALPROF'] == 2) #magcut = ((des['MODELMAG_G_DES'] < 22.0) & (des['MODELMAG_R_DES'] < 22.0) & #(des['MODELMAG_I_DES'] < 22.0) & (des['MODELMAG_Z_DES'] < 22.0) ) #magcut = ((sdss['MODELMAG_R'] < 22.0) & (sdss['MODELMAG_I'] < 22.0) # &(sdss['MODELMAG_G'] < 22.0) & (sdss['MODELMAG_Z'] < 22.0)) use = (#(18.0 < des['CMODELMAG_I_DES']) & #(20.9 > des['CMODELMAG_I_DES']) & #(des['MODELMAG_R_DES'] - des['MODELMAG_I_DES'] < 2.) & (des['FIBER2MAG_I_DES'] < 21.5 ) ) #use = use & magcut des_exp = des[expcut & use] des_dev = des[devcut & use] sdss_exp = sdss[expcut & use] sdss_dev = sdss[devcut & use] #des_exp_dperp = dperp_fitting(des_exp, sdss_exp) #des_dev_dperp = dperp_fitting(des_dev, sdss_dev) #SDSSlike_dperp = np.zeros(len(fulldes), dtype=np.float32) #SDSSlike_dperp[expcut & use] = des_exp_dperp #SDSSlike_dperp[devcut & use] = des_dev_dperp try : #fulldes = rf.append_fields(fulldes, 'DPERP_DES', SDSSlike_dperp) fulldes = rf.append_fields(fulldes, 'DPERP', dperp_des) fullsdss = rf.append_fields(fullsdss, 'DPERP', dperp_sdss) except ValueError: #fulldes['DPERP_DES'] = SDSSlike_dperp fulldes['DPERP'] = dperp_des fullsdss['DPERP'] = dperp_sdss return fullsdss, fulldes
def add_constant(data, prepend=True, has_constant='skip'): ''' This appends a column of ones to an array if prepend==False. Parameters ---------- data : array-like `data` is the column-ordered design matrix prepend : bool True and the constant is prepended rather than appended. has_constant : str {'raise', 'add', 'skip'} Behavior if ``data'' already has a constant. The default will return data without adding another constant. If 'raise', will raise an error if a constant is present. Using 'add' will duplicate the constant, if one is present. Has no effect for structured or recarrays. There is no checking for a constant in this case. Returns ------- data : array The original array with a constant (column of ones) as the first or last column. ''' if _is_using_pandas(data, None): # work on a copy return _pandas_add_constant(data.copy(), prepend, has_constant) else: data = np.asarray(data) if not data.dtype.names: var0 = data.var(0) == 0 if np.any(var0): if has_constant == 'raise': raise ValueError("data already contains a constant.") elif has_constant == 'skip': return data elif has_constant == 'add': pass else: raise ValueError("Option {0} not understood for " "has_constant.".format(has_constant)) data = np.column_stack((data, np.ones((data.shape[0], 1)))) if prepend: return np.roll(data, 1, 1) else: return_rec = data.__class__ is np.recarray if prepend: ones = np.ones((data.shape[0], 1), dtype=[('const', float)]) data = nprf.append_fields(ones, data.dtype.names, [data[i] for i in data.dtype.names], usemask=False, asrecarray=return_rec) else: data = nprf.append_fields(data, 'const', np.ones(data.shape[0]), usemask=False, asrecarray=return_rec) return data
def readascii(self, infile, form='short'): if form=='short': dnames=('name', 'targ_ra', 'targ_dec', 'equinox', 'mag', 'band', 'priority') dformat=('S30', 'f4', 'f4', 'i4', 'f4', 'S1', 'f4') object_arr=np.loadtxt(infile, dtype={'names': dnames, 'formats': dformat}, converters={1:ra_read, 2:dec_read}) #determine the missing values mnames=[] mtypes=[] for i in range(len(self.dnames)): if self.dnames[i] not in dnames: mnames.append(self.dnames[i]) mtypes.append(self.dformat[i]) #set up the default values default_list=[np.zeros(len(object_arr))]*len(mnames) default_list[0]=default_list[0]+self.default_width default_list[1]=default_list[1]+0.5*self.default_length default_list[2]=default_list[2]+0.5*self.default_length object_arr=rfn.append_fields(object_arr, names=mnames, data=default_list, dtypes=mtypes, fill_value=0, usemask=False) elif form=='long': dnames=('name', 'targ_ra', 'targ_dec', 'equinox', 'mag', 'band', 'priority', 'width', 'length', 'tilt') dformat=('S30', 'f4', 'f4', 'i4', 'f4', 'S1', 'f4', 'f4', 'f4', 'f4') object_arr=np.loadtxt(infile, dtype={'names': dnames, 'formats': dformat}, converters={1:ra_read, 2:dec_read}) #determine the missing values mnames=[] mtypes=[] for i in range(len(self.dnames)): if self.dnames[i] not in dnames: mnames.append(self.dnames[i]) mtypes.append(self.dformat[i]) #set up the default values default_list=[np.zeros(len(object_arr))]*len(mnames) object_arr=rfn.append_fields(object_arr, names=mnames, data=default_list, dtypes=mtypes, fill_value=0, usemask=False) else: message='This format is not supported' raise SlitError(message) #set objects that are preselected object_arr['inmask_flag'] = 1.0*(object_arr['priority'] >= 1.0) #set reference stars object_arr['refstar_flag'] = 1.0*(object_arr['priority'] == -1.0) #stack the data if it already exists if self.data is None: self.data=object_arr else: self.data=self.add_arrays(self.data, object_arr) # total number of objects: self.nobjects=len(self.data) self.update_flags()
def test_append_fields_dtype_list(self): # Ticket #1676 from numpy.lib.recfunctions import append_fields base = np.array([1, 2, 3], dtype=np.int32) names = ['a', 'b', 'c'] data = np.eye(3).astype(np.int32) dlist = [np.float64, np.int32, np.int32] try: append_fields(base, names, data, dlist) except Exception: raise AssertionError()
def analyze_data(self,raw_data): initialized = False tunnels = set(raw_data['tunnel']) for tunnel in tunnels: tunnel_data_raw = raw_data[raw_data['tunnel']==tunnel] time_rel = tunnel_data_raw['time_rel'] delta_time = numpy.diff(time_rel) tunnel_array = numpy.ones(len(delta_time),dtype=numpy.uint16)*tunnel tunnel_array.dtype = numpy.dtype([('tunnel','<u2')]) tunnel_data_analyzed = tunnel_array fly_x = tunnel_data_raw['fly_x'] delta_fly_x = numpy.diff(fly_x) fly_y = tunnel_data_raw['fly_y'] delta_fly_y = numpy.diff(fly_y) distance = numpy.sqrt(numpy.square(delta_fly_x)+numpy.square(delta_fly_y)) velocity = distance/delta_time fly_angle = tunnel_data_raw['fly_angle'] delta_fly_angle = numpy.abs(numpy.diff(fly_angle)) flipped = 180 - delta_fly_angle flipped_is_less = flipped < delta_fly_angle delta_fly_angle[flipped_is_less] = flipped[flipped_is_less] angular_velocity = delta_fly_angle/delta_time time_secs = tunnel_data_raw['time_secs'][:-1] time_nsecs = tunnel_data_raw['time_nsecs'][:-1] names = ['time_secs','time_nsecs'] tunnel_data_seq = [time_secs,time_nsecs] tunnel_data_analyzed = recfunctions.append_fields(tunnel_data_analyzed, names, tunnel_data_seq, dtypes=numpy.uint64, usemask=False) names = ['delta_time','delta_fly_x','delta_fly_y','distance','velocity','delta_fly_angle','angular_velocity'] tunnel_data_seq = [delta_time,delta_fly_x,delta_fly_y,distance,velocity,delta_fly_angle,angular_velocity] tunnel_data_analyzed = recfunctions.append_fields(tunnel_data_analyzed, names, tunnel_data_seq, dtypes=numpy.float32, usemask=False) if initialized: analyzed_data = recfunctions.stack_arrays((analyzed_data,tunnel_data_analyzed),usemask=False) else: analyzed_data = tunnel_data_analyzed initialized = True return analyzed_data
def vol_international(data,dbh,log_length): if log_length == 8: vol_8log = ((0.44*(data[dbh]))**2)-(1.20*(data[dbh])-(1.30)) return recfunctions.append_fields(data,"8ft_log_vol",vol_8log) elif log_length == 12: vol_12log = ((0.66*(data[dbh]))**2)-(1.47*(data[dbh])-(0.79)) return recfunctions.append_fields(data,"12ft_log_vol",vol_12log) elif log_length == 16: vol_16log = ((0.88*(data[dbh]))**2)-(1.52*(data[dbh])-(1.36)) return recfunctions.append_fields(data,"16ft_log_vol",vol_16log) else: print "No valid log lenth entered!" sys.exit(1)
def add_field(array,field_name,field_type,replace=False): """ Note: I think we're passed a copy of the array, so you have to take the result and assign it to your variable example: data = add_field(data,'new_field',float32) Possibilities for field_type include: float32, _s Delete this and only use the method below if it proves to be good enough """ if not data_mine.has_fields(array,[field_name]): return recfunctions.append_fields(array,field_name,zeros(len(array)),field_type,usemask=False) elif(replace): return recfunctions.append_fields(array,field_name,zeros(len(array)),field_type,usemask=False)
def compare_columns(data, name_1, name_2, name_combo, mask, function): ''' Compare two columns and if there is data in both of them complete a particular function: Functions can be 'average', 'diff' or 'max' The mask is a column of 0, 1, 2 and 3 which tells you whether to use neither data point (999), only data['name_1'], only data['name_2'] or carryout the function on both data. Returns: data ''' # Define your data data_1 = data[ name_1 ] data_2 = data[ name_2 ] data_combo_name = name_combo # Fill in the data initally with 999s data_combo = np.ones_like(data_1) * 999. # If there is only one data point fill that in data_combo[mask==1] = data_1[mask==1] data_combo[mask==2] = data_2[mask==2] # If there are two data points calculate: if function == 'average': data_combo[mask==3] = ( data_1[mask==3] + data_2[mask==3] ) / 2 elif function == 'diff': data_combo[mask==3] = ( data_2[mask==3] - data_1[mask==3] ) elif function == 'max': data_combo[mask==3] = np.maximum( data_2[mask==3], data_1[mask==3] ) # Append this data to our recarray data = rec.append_fields(base=data, names=name_combo, data=data_combo, dtypes=None, usemask=False, asrecarray=True ) # And also include a copy of the mask data = rec.append_fields(base=data, names=name_combo + '_mask', data=mask, dtypes=None, usemask=False, asrecarray=True ) return data
def load_data(fname='Brain24ft_v0307-Jun-2014 091700_labels.csv'): # NOTE "nodes" is used to refer to both nodes and bars! TODO fix this naming convention X = np.genfromtxt(fname, delimiter=',', names=True, dtype=[('step', '|S50'), ('nodes', '|S50'), ('angles', '|S50'), ('length', '|S50')]) X = recfunctions.append_fields(X, 'step int', np.zeros((X.shape[0],), dtype=int)) X = recfunctions.append_fields(X, 'node 1 inout', np.zeros((X.shape[0],), dtype='|S3')) X = recfunctions.append_fields(X, 'node 2 inout', np.zeros((X.shape[0],), dtype='|S3')) X = recfunctions.append_fields(X, 'node 1 TLA', np.zeros((X.shape[0],), dtype='|S3')) X = recfunctions.append_fields(X, 'node 2 TLA', np.zeros((X.shape[0],), dtype='|S3')) X = recfunctions.append_fields(X, 'node 1 order', np.zeros((X.shape[0],), dtype=int)) X = recfunctions.append_fields(X, 'node 2 order', np.zeros((X.shape[0],), dtype=int)) X = recfunctions.append_fields(X, 'bar length', np.zeros((X.shape[0],), dtype=float)) for ii in range(X.shape[0]): X['step int'][ii] = extract_step_number(X['step'][ii]) X['node 1 TLA'][ii], X['node 1 order'][ii], X['node 1 inout'][ii], \ X['node 2 TLA'][ii], X['node 2 order'][ii], X['node 2 inout'][ii], \ X['bar length'][ii] = \ extract_nodes_length(X['nodes'][ii] + ', ' + X['length'][ii]) max_order = defaultdict(int) for ii in range(X.shape[0]): tla = X['node 1 TLA'][ii] order = X['node 1 order'][ii] if order > max_order[tla]: max_order[tla] = order tla = X['node 2 TLA'][ii] order = X['node 2 order'][ii] if order > max_order[tla]: max_order[tla] = order return X, max_order
def build_utc_array(source, sink, start, end): source_prices = retrieve_node_data(source, start, end) sink_prices = retrieve_node_data(sink, start, end) source_data = [] for element in source_prices: source_data.append((element[0].replace(tzinfo=pytz.timezone('EST')), element[1], element[2], element[5])) sink_data = [] for element in sink_prices: sink_data.append((element[0].replace(tzinfo=pytz.timezone('EST')), element[1], element[2], element[5])) sink_dt = numpy.dtype([('time_id', 'S32'), ('sink_node_id', 'i8'), ('sink_rt_lmp', 'f8'), ('sink_da_lmp', 'f8')]) source_dt = numpy.dtype([('time_id', 'S32'), ('source_node_id', 'i8'), ('source_rt_lmp', 'f8'), ('source_da_lmp', 'f8')]) sink_array = numpy.array(sink_data, dtype=sink_dt) source_array = numpy.array(source_data, dtype=source_dt) joined = rfn.join_by('time_id', sink_array, source_array, jointype='inner', usemask=False) rt_congestion_rounded = numpy.round(joined['sink_rt_lmp'] - joined['source_rt_lmp'], 2) da_congestion_rounded = numpy.round(joined['sink_da_lmp'] - joined['source_da_lmp'], 2) profit_rounded = numpy.round(rt_congestion_rounded - da_congestion_rounded, 2) joined = rfn.append_fields(joined, 'rt_congestion', data=rt_congestion_rounded) joined = rfn.append_fields(joined, 'da_congestion', data=da_congestion_rounded) joined = rfn.append_fields(joined, 'profit', data=profit_rounded) return joined[['time_id', 'rt_congestion']]
def readPhotInfo(self,level=.5): ''' Read the photometry band information associated with photometry of this SED. @keyword level: The level at which the cut off for significant transmission of the photometric bands is placed. (default: 0.5) @type level: float ''' #-- Get photometry bands info from IvS repo. recarray structure same as # self.photbands_ivs filter_info = filters.get_info() keep = np.searchsorted(filter_info['photband'],self.photbands) self.filter_info = filter_info[keep] self.filter_info.eff_wave = self.filter_info.eff_wave/1e4 response = [filters.get_response(photband) for photband in self.photbands] selection = [waver[transr/max(transr)>level]/1e4 for waver,transr in response] wlower = [sel[0] for sel in selection] wupper = [sel[-1] for sel in selection] self.filter_info = recfunc.append_fields(self.filter_info,\ ['wlower','wupper'],\ [wlower,wupper],usemask=0,\ asrecarray=1)
def merge_cort(data, cortisol_filename): cort_data = np.genfromtxt(cortisol_filename, dtype=None, names=True, delimiter='\t') names = list(cort_data.dtype.names) # Find all the columns in cort_data that have 'av' in their title # and not '_mask' drop_names = names[8:] cort_data = nprf.drop_fields(cort_data, drop_names, usemask=False, asrecarray=True) data = nprf.join_by('SubID', data, cort_data, jointype='leftouter', r1postfix='KW', r2postfix='KW2', usemask=False,asrecarray=True) # Bizzarely, the join_by function pads with the biggest numbers it can think of! # So we're going to replace everything over 999 with 999 for name in names[1:8]: data[name][data[name]>999] = 999 # Define a UsableCort field: 1 if ANY of the cortisol values are not 999 cort_array = np.vstack( [ data[name] for name in names[1:8]]) usable_cort_array = np.ones(cort_array.shape[1]) usable_cort_array[np.any(cort_array<>999, axis=0)] = 1 data = nprf.append_fields(base = data, names='UsableCort', data = usable_cort_array, usemask=False) return data
def filter_effects(self): """ Merge effects and data, and flip effect alleles """ effect_positions=self.effects[["CHR", "POS"]] data_positions=self.data.snp[["CHR", "POS"]] effect_include=np.in1d(effect_positions, data_positions) data_include=np.in1d(data_positions, effect_positions) self.data.filter_snps(data_include) self.effects=self.effects[effect_include] # Just give up and convert to float. I have no idea why int doesn't work here # but it's something to do with the fact that you can't have None as a numpy int # wheras float gets converted to nan. tmp_data=nprec.append_fields(self.data.snp, "GENO", None, dtypes=[(float,self.data.geno.shape[1])],usemask=False) tmp_data["GENO"]=self.data.geno self.effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_data, usemask=False, jointype="inner") flipped=0 removed=0 for rec in self.effects: if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]: pass elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]: flipped+=1 rec["OTHER"]=rec["ALT"] rec["EFFECT"]=rec["REF"] rec["BETA"]=-rec["BETA"] else: removed+=1 rec["EFFECT"]=rec["OTHER"]="N" self.effects=self.effects[self.effects["EFFECT"]!="N"] print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr) print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)
def knearest(distarr, k): dists = distarr.shape[1] pointdists = np.zeros((dists, 2)) scores = np.array(distarr[:, dists-1]) nearest = [] #for each test point for i in range(dists-1): pointdists = distarr[:, i] pointdists = append_fields(pointdists, 'scores', scores, usemask=False) # get distances sorted smallest to largest sorted = np.sort(pointdists) # get k smallest classifiers (+1 or -1) from the sorted list nearest.append(sorted[:k]['scores']) #list of each test points k nearest neghbors as their classifiers return nearest
def get_demodulated_data_from_list(filelist, freq=10, supply_index=False, phase_offset=0): filelist.sort() #just in case dd = [] for f in filelist: #only use full size files stats = os.stat(f) #print(stats.st_size) if stats.st_size > 5000000: #full length is 10752000: d = demod.demodulate_dat(f, freq, supply_index=False, phase_offset=phase_offset) #filename is start of data taking (I think) and we'll just add 1/samprate seconds per rev h = np.float64(f[-12:-10]) m = np.float64(f[-10:-8]) s = np.float64(f[-8:-6]) t = h + m / 60. + (s + (d['rev'] - d['rev'][0]) / samprate) / 3600. d = recf.append_fields(d, 'localtime', t) ut = np.mod(t + 7., 24.) if len(f) > 21: y = np.zeros(len(d), dtype=np.int) + np.int(f[-21:-17]) mo = np.zeros(len(d), dtype=np.int) + np.int(f[-17:-15]) dy = np.zeros(len(d), dtype=np.int) + np.int(f[-15:-13]) ut = np.mod(t + 7., 24.) utt = t + 7. dy[utt > ut] = dy[utt > ut] + 1 d = recf.append_fields(d, ['year', 'month', 'day'], [y, mo, dy]) d = recf.append_fields(d, 'ut', ut) dd.append(d) return np.concatenate(dd)
def __setattr__(self, name, value): listofattributes = self.__dict__.keys() if isinstance(value, numpy.ndarray) and name != "_data" and name not in listofattributes : if value.shape != self.data .shape: raise ValueError('Arrays should have the same dimensions') else: from numpy.lib import recfunctions detail = getattr(value, 'provenance', None) data = recfunctions.append_fields(self.data , name, value, usemask=False) self._data = data self._provenance += Transformation("Array {0} has been created".format(name), detail) self.__add_var(name) else: dict.__setattr__(self, name, value)
def freq(a, cls_flds=None, stat_fld=None): """Frequency and crosstabulation Parameters ---------- a : array A structured array. flds : field Fields to use in the analysis. Notes ----- 1. Slice the input array by the classification fields. 2. Sort the sliced array using the flds as sorting keys. 3. Use unique on the sorted array to return the results and the counts. >>> np.unique(ar, return_index=False, return_inverse=False, ... return_counts=True, axis=None) """ if stat_fld is None: a = a[cls_flds] # (1) It is actually faster to slice the whole table else: all_flds = cls_flds + [stat_fld] a = a[all_flds] idx = np.argsort(a, axis=0, order=cls_flds) # (2) a_sort = a[idx] uni, inv, cnts = np.unique(a_sort[cls_flds], False, True, return_counts=True) # (3) out_flds = "Counts" out_data = cnts if stat_fld is not None: splitter = np.where(np.diff(inv) == 1)[0] + 1 a0 = a_sort[stat_fld] splits = np.split(a0, splitter) sums = np.asarray([np.nansum(i.tolist()) for i in splits]) nans = np.asarray([np.sum(np.isnan(i.tolist())) for i in splits]) mins = np.asarray([np.nanmin(i.tolist()) for i in splits]) means = np.asarray([np.nanmean(i.tolist()) for i in splits]) maxs = np.asarray([np.nanmax(i.tolist()) for i in splits]) out_flds = [ out_flds, stat_fld + "_sums", stat_fld + "_NaN", stat_fld + "_min", stat_fld + "_mean", stat_fld + "_max" ] out_data = [out_data, sums, nans, mins, means, maxs] out = append_fields(uni, names=out_flds, data=out_data, usemask=False) return out
def genrand(data, n, cosmo, width=.2, plot=True, plot_filename=None): ''' generates random catalog with random sky distribution and redshift To filter based on the BASS sensitivity map, set 'use_BASS_sens_map' to True ''' goodz = data['z'] > 0 if 'weight' in data.dtype.names: weights = data['weight'][goodz] else: weights = None d = data[goodz] z_arr = d['z'] ra_arr = d['ra'] dec_arr = d['dec'] ur, uind = np.unique(d['ra'], return_index=True) udata = d[uind] ndata = len(udata) #generate random redshifts n_rand = int(round(n * ndata)) z_grid = np.linspace(min(z_arr), max(z_arr), 1000) kde = weighted_gaussian_kde(z_arr, bw_method=width, weights=weights) kdepdfz = kde.evaluate(z_grid) zr_arr = generate_rand_from_pdf(pdf=kdepdfz, num=n_rand, x_grid=z_grid) #generate sky coords ind = np.random.randint(ndata, size=n_rand) rar_arr = udata['ra'][ind] decr_arr = udata['dec'][ind] temp = list(zip(zr_arr, rar_arr, decr_arr)) rcat = np.zeros((len(zr_arr), ), dtype=[('z', '<f8'), ('ra', '<f8'), ('dec', '<f8')]) rcat[:] = temp randoms = rcat rcdists = np.array( [cosmo.comoving_distance(z).value for z in randoms['z']]) * cosmo.h randoms = append_fields(randoms, 'cdist', rcdists) random = np.array(randoms) print('number of randoms:', len(randoms)) if plot: plot_zdist(d, randoms, z_grid, kdepdfz, plot_filename, weights=weights) return randoms
def add_standard_properties(self): """ Augments the dataset with a series of standard properties that we use in assembly bias calculations. """ # 5 definitions of halo concentration. cNFW200b = self.data['halo_R200b'] / self.data['halo_rs'] cV200b = self.calculate_cV('200b') cVsp_mean = self.calculate_cV('sp_mean') cVsp_percentile75 = self.calculate_cV('sp_percentile75') cVsp_percentile87 = self.calculate_cV('sp_percentile87') # Plus several halo size ratios. sizeratiosp87_200b = self.data['halo_Rsp_percentile87'] / self.data[ 'halo_R200b'] sizeratiosp75_200b = self.data['halo_Rsp_percentile75'] / self.data[ 'halo_R200b'] sizeratiospmean_200b = self.data['halo_Rsp_mean'] / self.data[ 'halo_R200b'] sizeratiosp87_spmean = self.data['halo_Rsp_percentile87'] / self.data[ 'halo_Rsp_mean'] # And the same mass ratios massratiosp87_200b = self.data['halo_Msp_percentile87'] / self.data[ 'halo_M200b'] massratiosp75_200b = self.data['halo_Msp_percentile75'] / self.data[ 'halo_M200b'] massratiospmean_200b = self.data['halo_Msp_mean'] / self.data[ 'halo_M200b'] massratiosp87_spmean = self.data['halo_Msp_percentile87'] / self.data[ 'halo_Msp_mean'] # And randoms for calculation ease uniformrands = np.random.uniform(0, 1, len(self.data)) # Add these all into the data. self.data = append_fields( self.data, ('halo_cNFW200b', 'halo_cV200b', 'halo_cVsp_mean', 'halo_cVsp_percentile75', 'halo_cVsp_percentile87', 'halo_sizeratiosp87_200b', 'halo_sizeratiosp75_200b', 'halo_sizeratiospmean_200b', 'halo_sizeratiosp87_spmean', 'halo_massratiosp87_200b', 'halo_massratiosp75_200b', 'halo_massratiospmean_200b', 'halo_massratiosp87_spmean', 'err_rands'), (cNFW200b, cV200b, cVsp_mean, cVsp_percentile75, cVsp_percentile87, sizeratiosp87_200b, sizeratiosp75_200b, sizeratiospmean_200b, sizeratiosp87_spmean, massratiosp87_200b, massratiosp75_200b, massratiospmean_200b, massratiosp87_spmean, uniformrands), usemask=False)
def load_array(self, d, file): import time t0 = time.time() if self.params['has_sheared'] & (file == 'shapefile'): d['flags_1p'] = 'flags_select_1p' d['flags_1m'] = 'flags_select_1m' d['flags_2p'] = 'flags_select_2p' d['flags_2m'] = 'flags_select_2m' if self.params['pdf_type'] == 'pdf': keys = [ key for key in d.keys() if (d[key] is not None) & (key is not 'pzstack') ] else: keys = [key for key in d.keys() if (d[key] is not None)] if 'objid' in keys: dtypes = [('objid', 'i8')] else: raise ValueError('missing object id in ' + file) dtypes += [(key, 'f8') for key in keys if (key is not 'objid')] if self.params['pdf_type'] == 'pdf': dtypes += [('pzstack_' + str(i), 'f8') for i in range(len(self.params['pdf_z']))] fits = fio.FITS(self.params[file])[-1] array = fits.read(columns=[d[key] for key in keys]) array = rename_fields(array, {v: k for k, v in d.iteritems()}) if ('weight' not in array.dtype.names) & (file == 'shapefile'): array = append_fields(array, 'weight', np.ones(len(array)), usemask=False) if self.params['pdf_type'] == 'pdf': for i in range(len(self.params['pdf_z'])): array['pzstack' + str(i)] = fits.read(columns=d['pzstack'] + str(i)) if np.any(np.diff(array['objid']) < 1): raise ValueError('misordered or duplicate ids in ' + file) return array
def cross_validate(args): assert len(args['bw_key']) == len(args['bw']) if not os.path.exists(args['outfolder']): os.makedirs(args['outfolder']) args['phi0'] *= 1e-18 # correct units kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True) config = read_config() print('Load MC: {}'.format(config['IC_MC']['path'])) mc = np.load(str(config['IC_MC']['path']))[:] mc = mc_cut(mc) if args['weights'] == 'pl': weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) elif args['weights'] == 'conv': weights = mc['conv'] elif args['weights'] == 'conv+pl': diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) weights = mc['conv'] + diff_weight print('Rates [1/yr]:') print(np.sum(mc['conv']) * np.pi * 1e7) print(np.sum(diff_weight) * np.pi * 1e7) else: print('{} is not a valid weights argument'.format(args['weights'])) sys.exit(0) mc = append_fields(mc, 'cur_weight', weights) args['weights'] = 'default' model, mname = load_model(args['model']) bw_dict = dict() for i, key in enumerate(args['bw_key']): bw_dict[key] = args['bw'][i] lh_arr, zero_arr = [], [] for train_index, val_index in kf.split(mc): args['no_save'] = True res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict) mc_val = mc[val_index] val_settings, grid = model.setup_KDE(mc_val) lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight']) print('Number of zeros {}'.format(zeros)) print('Likelihood Value {}'.format(lh)) zero_arr.append(zeros) lh_arr.append(lh) fname = '' for i in range(len(args['bw'])): fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i]) fname = fname[:-1] + '.npy' odict = {'zeros': zero_arr, 'lh': lh_arr} np.save(os.path.join(args['outfolder'], fname), odict)
def AddColumns(arr, columnName): types = np.unique(arr[columnName]) for type in types: arr = rfn.append_fields(arr, names=columnName + "_" + type, data=-np.ones(N), usemask=False) for r in arr: for type in types: if r[columnName] == type: r[columnName + '_' + type] = 1 break return rfn.drop_fields(arr, drop_names=columnName)
def add_groups(g, group_dict): """ Add group info to a sGraph object, if already presents it raises a warning. """ if hasattr(g, 'gv'): msg = 'Group info already present, will overwrite.' warnings.warn(msg, UserWarning) g.gv = graphs.GroupVertexList() g.group_dict = group_dict g.num_groups = len(group_dict) num_bytes = mt.get_num_bytes(g.num_groups) g.group_dtype = np.dtype('u' + str(num_bytes)) if 'group' in g.v.dtype.names: g.v.group = group_dict else: g.v = append_fields(g.v, 'group', group_dict)
def __getattr__(self, attrname): # See __init__ for column names try: return self.data[attrname] except: pass # If we got here, stations column wasn't in file. # Try getting it from station mask. if attrname == 'stations': stations = self.hexMaskToStationCount() self.data = append_fields(self.data, ('stations', ), (stations, )) return stations return None
def compute_xt(self): """ Compute dimensionless curvilinear distance `xt` (from 0 to 1) /!\ Column `Xt` has to exist """ if len(self.array) > 1: xt = (self.array['Xt'] - self.array['Xt'][0])/(self.array['Xt'][-1] - self.array['Xt'][0]) else: xt = np.empty(len(self.array)) xt.fill(-999.) # Update or append `xt` array column if 'xt' in self.array.dtype.fields: self.array['xt'] = xt else: self.array = append_fields(self.array, 'xt', xt, usemask=False)
def __setitem__(self, key, value): value = np.asanyarray(value) # Filtered data handling (autofilled with 0) if len(value) == self.size: full_value = np.zeros(len(self._rdata), dtype=value.dtype) full_value[~self._rdata['mask']] = value value = full_value elif len(value) != len(self._rdata): raise ValueError('wrong dimension') if key in self.names: #Update self._rdata[key] = value else: #Add self._rdata = recfunctions.append_fields(self._rdata, names=key, data=value, usemask=False)
def strength(self, get=False): """ Compute the undirected strength sequence. If get is true it returns the array otherwise it adds the result to v. """ if 'strength' in self.v.dtype.names: strength = self.v.strength else: strength = mt.compute_strength(self.e, self.num_vertices) self.v = append_fields(self.v, 'strength', strength, dtypes=np.float64) if get: return strength
def in_strength(self, get=False): """ Compute the out strength sequence. If get is true it returns the array otherwise it adds the result to v. """ if 'in_strength' in self.v.dtype.names: s_in = self.v.in_strength else: s_out, s_in = mt.compute_in_out_strength(self.e, self.num_vertices) self.v = append_fields(self.v, ['out_strength', 'in_strength'], (s_out, s_in), dtypes=[np.float64, np.float64]) if get: return s_in
def getChromosomeBySample(self, sampleID, chromosome, getexons=True): d = rpkm_data() data_tbl = self.h5file.root._f_getChild("chr" + str(chromosome)) sample_tbl = data_tbl._f_getChild("sample_" + sampleID) d.rpkm = sample_tbl.read(field="rpkm") if getexons: probe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome)) d.exons = probe_tbl.read() d.exons = rfn.append_fields(d.exons, "chrom", np.repeat(chromosome, len(d.exons)), usemask=False) d.contig = chromosome return d
def process_data(statData): # calculate heritability time tauHer = mlsg.calc_tauHer_numeric(statData['n0'], statData['mig']) tauVar = mlsg.calc_tauV(statData['cost']) tauHerRel = tauHer / statData['TAU_H'] tauVar_rel = tauVar / statData['TAU_H'] BH_cat = mlsg.make_categorial(statData['B_H']) dataToStore = (tauHer, tauVar, tauHerRel, tauVar_rel, BH_cat) nameToStore = ('tauHer', 'tauVar', 'tauHer_rel', 'tauVar_rel', 'BH_cat') statData = rf.append_fields(statData, nameToStore, dataToStore, usemask=False) return statData
def load_ill_data(filenumbers, prefix, monitor='M1'): """ Loads one or several ILL data files and returns a single structured array """ if type(filenumbers) is int: d = load_ill_ascii(prefix + str(filenumbers)) else: d = load_ill_ascii(prefix + str(filenumbers[0])) for f in filenumbers[1::]: d = np.append(d, load_ill_ascii(prefix + str(f))) I = d['CNTS']/d['M1'] err = np.sqrt(d['CNTS'])/d['M1'] d = append_fields(d, ['I', 'err'], [I, err]) return d
def make_SHAM_mock(mock, P_xy, mock_prop='mvir', gal_prop='mstar', use_log_mock_prop=True): """ make a SHAM mock given a halo catalogue. Parameters ========== mock: array_like structured array containing halo catalogue P_xy: function probability function that returns probability of x_gal given y_halo mock_prop: string key into mock which returns the halo property to build the SHAM mock Returns ======= mock: structured array mock with new column containing galaxy property gal_prop Notes ===== The probability of galaxy property 'x' given a halo with property 'y', where mock[mock_prop] returns halo property 'y'. """ from numpy.lib.recfunctions import append_fields mock = mock.view(np.recarray) if use_log_mock_prop == True: y = np.log10(mock[mock_prop]) else: y = mock[mock_prop] x = P_xy(y).rvs(len(mock)) if gal_prop in mock.dtype.names: mock[gal_prop] = x else: mock = append_fields(mock, gal_prop, x) return mock
def _select_and_weight(self, N=0, gamma=-2, source={ 'ra': np.pi / 2, 'dec': np.pi / 6 }, time_profiles=None, sampling_width=np.radians(1)): '''Prune the simulation set to only events close to a given source and calculate the weight for each event. Add the weights as a new column to the simulation set time_profiles should be a list of tuples. the first element in each tuple should be a time profile, and the second should be the proportion of total events in that time profile. ''' assert ('ow' in self.sim.dtype.names) assert (time_profiles != None) # Pick out only those events that are close in # declination. We only want to sample from those. sindec_dist = np.abs(source['dec'] - self.sim['trueDec']) close = sindec_dist < sampling_width reduced_sim = rf.append_fields(self.sim[close].copy(), 'weight', np.zeros(close.sum()), dtypes=np.float32) # Assign the weights using the newly defined "time profile" # classes above. If you want to make this a more complicated # shape, talk to me and we can work it out. reduced_sims = np.array([reduced_sim.copy() for _ in time_profiles]) for i, time_profile in enumerate(time_profiles): effective_livetime = time_profile[0].effective_exposure() reduced_sims[i]['weight'] = time_profile[1]*reduced_sims[i]['ow'] *\ N * (reduced_sims[i]['trueE']/100.e3)**gamma *\ effective_livetime * 24 * 3600. # Apply the sampling width, which ensures that we # sample events from similar declinations. # When we do this, correct for the solid angle # we're including for sampling omega = 2*np.pi * (np.min([np.sin(source['dec']+sampling_width), 1]) -\ np.max([np.sin(source['dec']-sampling_width), -1])) reduced_sims[i]['weight'] /= omega return reduced_sims
def appendSumKernels(csvData, columnPrefixes): '''Calculate the sum of matching pixels from different maps. Maps indicated by columnPrefixes. Append sum as column to structured array''' #examine column headers for common kernels headers = [] kernels = [] for ind, prefix in enumerate(columnPrefixes): headers.append( list( filter(lambda x: x.startswith(prefix.upper()), csvData.dtype.names))) kernels.append([]) for i in headers[ind]: kernels[ind].append(i.split(prefix)[1]) #confirm common kernels for all maps, then append sum for each kernel swap = np.transpose(kernels) check_common = all( all(x == swap[i][0] for x in swap[:][i]) for i in range(len(kernels[0]))) if check_common: #append new headers for sum calculation addHeaders = [ "_".join(["SUM"] + columnPrefixes + [i.strip("_")]) for i in kernels[0] ] csvData = append_fields( csvData, addHeaders, data=[np.zeros(csvData.size) for i in addHeaders], dtypes='f8') for ind, row in enumerate(csvData): for h in addHeaders: headers_to_sum = filter(lambda x: x.endswith(h[-1]), np.asarray(headers).flatten()) sum = 0 for i in headers_to_sum: sum += int(row[i]) csvData[h][ind] = sum else: sys.exit("Cannot append Kernel Sum; Headers in unfamiliar format.") return csvData, '_'.join(addHeaders[0].split('_')[:-1])
def zlim_from_effi(self, effiInterp, zplot): """ Method to estimate the redshift limit from efficiency curves The redshift limit is defined here as the redshift value beyond which efficiency decreases up to zero. Parameters --------------- effiInterp: interpolator use to get efficiencies zplot: numpy array redshift values Returns ----------- zlimit: float the redshift limit """ # get efficiencies effis = effiInterp(zplot) # select data with efficiency decrease idx = np.where(np.diff(effis) < -0.005)[0] # Bail out if there is no data if np.size(idx) == 0: return 0 z_effi = np.array(zplot[idx], dtype={ 'names': ['z'], 'formats': [np.float] }) # from this make some "z-periods" to avoid accidental zdecrease at low z z_gap = 0.05 seasoncalc = np.ones(z_effi.size, dtype=int) diffz = np.diff(z_effi['z']) flag = np.where(diffz > z_gap)[0] if len(flag) > 0: for i, indx in enumerate(flag): seasoncalc[indx + 1:] = i + 2 z_effi = rf.append_fields(z_effi, 'season', seasoncalc) # now take the highest season (end of the efficiency curve) idd = z_effi['season'] == np.max(z_effi['season']) zlimit = np.min(z_effi[idd]['z']) return zlimit
def normalize_profile(self, params=None): ndata = self.data.copy() scale = 1.0 / np.mean(ndata[self.reference]["intensity"]) ndata["intensity"] *= scale ndata["error"] *= abs(scale) if params is not None: values = self.calculate_profile(params) * scale ndata = rfn.append_fields(ndata, "intensity_calc", values, usemask=False) return ndata, scale
def neutralize_np(self, df, industry_set): from numpy.lib.recfunctions import append_fields for i in range(len(industry_set)): df = append_fields( df, industry_set[i], [0]*df.size, [int], usemask=False ) industry_stock = get_industry_stocks(industry_set[i]) for j in range(df.size): if df['stock_code'][j] in industry_stock: df[industry_set[i]][j] = 1 return df
def BASS_sensitivity_filter(path, data, rcat, survey, use_lognlogs=True): flux_arr = data['flux'] n_rand = len(rcat) #generate random fluxes log_flux_grid = np.linspace(min(np.log10(flux_arr)), max(np.log10(flux_arr)), 1000) if use_lognlogs is True: lognlogs = get_lognlogs(path) kdepdff = 10**lognlogs(log_flux_grid) / np.sum(10** lognlogs(log_flux_grid)) else: kde = weighted_gaussian_kde(np.log10(flux_arr), bw_method=0.1, weights=None) kdepdff = kde.evaluate(log_flux_grid) log_fluxr_arr = generate_rand_from_pdf(pdf=kdepdff, num=n_rand, x_grid=log_flux_grid) fluxr_arr = 10**(log_fluxr_arr) rcat = append_fields(rcat, 'flux', fluxr_arr) smaps, wcses = get_BASSsmap(path + 'sensitivity_maps/', survey) #filter based on sensitivity good = [] for i, r in enumerate(rcat): l = r['l'] b = r['b'] flux = r['flux'] #ergs/s/cm^2 px, py, sind = BASSmap_ind(l, b, wcses) sens_map = smaps[sind] try: sensitivity = sens_map[px, py] * 2.39e-8 * 4.8 #in ergs/s/cm^-2 except IndexError: print(l, b) if flux > sensitivity: good = np.append(good, i) randoms = rcat[good.astype(int)] return randoms
def degree(self, get=False): """ Compute the undirected degree sequence. If get is true it returns the array otherwise it adds the result to v. """ if 'degree' in self.v.dtype.names: degree = self.v.degree else: degree = mt.compute_degree(self.e, self.num_vertices) dtype = 'u' + str(mt.get_num_bytes(np.max(degree))) self.v = append_fields(self.v, 'degree', degree.astype(dtype), dtypes=dtype) if get: return degree
def in_degree(self, get=False): """ Compute the out degree sequence. If get is true it returns the array otherwise it adds the result to v. """ if 'in_degree' in self.v.dtype.names: d_in = self.v.in_degree else: d_out, d_in = mt.compute_in_out_degree(self.e, self.num_vertices) dtype = 'u' + str( mt.get_num_bytes(max(np.max(d_out), np.max(d_in)))) self.v = append_fields(self.v, ['out_degree', 'in_degree'], (d_out.astype(dtype), d_in.astype(dtype)), dtypes=[dtype, dtype]) if get: return d_in
def extrapolate_ws(self, h1, h): """ Extrapolates a windspeed profile using the power law coefficient at `self.alpha`. Parameters ---------- h1 : int | float Measurement height. h : int | float Desired profile height. """ ts1 = self.state[f"windspeed_{h1}m"] ts = ts1 * (h / h1)**self.alpha self.state = np.array(append_fields(self.state, f"windspeed_{h}m", ts))
def _update_catalog_ephem(self): if not self._has_all_eph_keys(self._imeta): sys.stderr.write("_imeta missing required eph keys!\n") return tdata = self._imcat.copy() nobjs = len(tdata) for kk in _EPH_KEYS: vec = np.zeros(nobjs, dtype='float') + self._imeta[kk] if kk in tdata.dtype.names: sys.stderr.write("Column %s exists ... updating!\n" % kk) tdata[kk] = vec else: sys.stderr.write("Column %s not found ... adding!\n" % kk) tdata = append_fields(tdata, kk, vec, usemask=False) pass self._imcat = tdata return
def set_backexchange(self, back_exchange): """ Sets the normalized percentage of uptake through a fixed backexchange value for all peptides. Parameters ---------- back_exchange : :obj:`float` Percentage of back exchange """ back_exchange /= 100 rfu = self.data['uptake'] / ((1-back_exchange)*self.data['ex_residues']) uptake_corrected = self.data['uptake'] / (1 - back_exchange) self.data = append_fields(self.data, ['rfu', 'uptake_corrected'], data=[rfu, uptake_corrected], usemask=False)