def rnQ(r0): """ Record Array Normalize Quarter Parameters ---------- r0 : Record array with the columns to be normalized. Returns ------- r : Light curve with new, median-normalized columns. """ r = r0.copy() col = ['SAP_FLUX', 'PDCSAP_FLUX'] ecol = ['SAP_FLUX_ERR', 'PDCSAP_FLUX_ERR'] col2 = ['f', 'fpdc'] # Names for the modified columns. ecol2 = ['ef', 'efpdc'] for c, ec, c2, ec2 in zip(col, ecol, col2, ecol2): medf = np.median(r[c]) norm = r[c] / medf - 1 enorm = r[ec] / medf r = mlab.rec_append_fields(r, c2, norm) r = mlab.rec_append_fields(r, ec2, enorm) return r
def rdt(r0): """ Detrend light curve with GP-based detrending. Parameters ---------- r0 : with `f`, `fmask`, `t` fields Returns ------- r : same record array with the following fields: label - 0,1,2 identifies groups for spline detrending. ftnd - the best fit trend fdt - f - ftnd. """ r = r0.copy() fm = ma.masked_array(r['f'], r['fmask']) ftnd = fm.copy() rvalid = r[~r['fmask']] t = r['t'] x, y = detrend.bin(rvalid) # Compute GP using binned lc (speed) yi = detrend.GPdt(t, x, y) # evaluate at all points ftnd[:] = yi # Assign a label to the segEnd segment fdt = fm - ftnd r = mlab.rec_append_fields(r, 'ftnd', ftnd.data) r = mlab.rec_append_fields(r, 'fdt', fdt.data) return r
def calc_SI(cat1, cat2, limit): """ module to use a catalogue at a different frequency, do cross matching, and calculate the spectral index The module also looks for multiple matches and assigns the flux of one source matching multiple ones linearly to calculate the spectral index. I tonly looks into sources which are not further apart as the limit parameter. cat1: The catalogue where you want to add the spectral index to the sources. Usually NVSS or FIRST. cat2: The catalogue to match and calculate the spectral index from. Usually WENSS. limit: Maximum distance in arcseconds for two sources to match each other. returns: cat1 with added spectral indices. Sources with no counterpart where set to -0.7. """ try: # Handle the exception if the WENSS query did not give any results. coords1 = SkyCoord( ra=cat1.RA, dec=cat1.DEC, unit=(u.deg, u.deg) ) # Convert the coordinates of the two source catalogues to the right format coords2 = SkyCoord(ra=cat2.RA, dec=cat2.DEC, unit=(u.deg, u.deg)) idx, d2d, d3d = coords1.match_to_catalog_sky( coords2 ) # Get the indices of the matches (idx), and their distance on the sky (d2d) dist = (d2d * u.deg * 3600) / (u.deg * u.deg) # Convert to arcsec nomatch = np.where(dist > limit) # Index of sources with no match match = np.where(dist <= limit) # Index of sources with match idx_match = idx[match] flux1 = np.delete( cat1.flux, nomatch ) # Array of source fluxes at 20cm for all matches including resolved sources flux2 = np.asarray( cat2.flux )[idx_match] # Array of source fluxes at 90cm for all matches including multiples src, counts = np.unique(idx_match, return_counts=True) logging.debug( ' Found ' + str(len(np.asarray(nomatch)[0])) + ' source(s) with no counterparts. Setting their spectral index to -0.7' ) num, occ = np.unique(counts, return_counts=True) for n, g in enumerate(num): logging.debug(' Found ' + str(occ[n]) + ' source(s) with ' + str(num[n]) + ' counterpart(s)') src_wgt_1 = np.zeros( len(flux2) ) # Calculate the fluxes for the matched and resolved sources using weighting for s in src: src_idx = np.where(s == idx_match) src_sum_1 = np.sum(flux1[src_idx]) src_wgt_1[src_idx] = flux1[src_idx] / src_sum_1 src_flux_2 = flux2 * src_wgt_1 src_si = np.log10(flux1 / src_flux_2) / np.log10(1.4 / 0.33) si = np.zeros( len(idx) ) # Create the array for the spectral index and put the values into the right position si[nomatch] = -0.7 si[match] = src_si si[si < -3] = -0.7 # Change the value to -0.7 in case of high absolute values. Maybe wrong source match or variable source si[si > 2] = -0.7 cat = mplab.rec_append_fields(cat1, 'SI', si, dtypes=float) except Exception: # In case the queried area is not covered by WENSS give all sources a spectral index of -0.7. cat = mplab.rec_append_fields(cat1, 'SI', -0.7, dtypes=float) return cat
def make_footprint(tilefile=defaulttilefile): lbound1 = [240, 365] lbound2 = [-5, 5] bbound = [-4, 4] tiles = fits.getdata(tilefile) lt, bt = equgal(tiles['ra'], tiles['dec']) mpilot = ((bt > bbound[0]) & (bt < bbound[1]) & ( (lt > lbound1[0]) & (lt < lbound1[1]) | (lt > lbound2[0]) & (lt < lbound2[1]) )) mpilot = extend_footprint_to_matches(tiles, mpilot) zeros = numpy.zeros(len(tiles), dtype='i4') tiles = rec_append_fields(tiles, ['i_done', 'y_done', 'i_expnum', 'y_expnum', 'in_decaps'], [zeros.copy() for i in xrange(5)]) zeros = numpy.zeros(len(tiles), dtype='S10') tiles = rec_append_fields(tiles, ['i_date', 'y_date'], [zeros.copy() for i in xrange(2)]) tiles['in_decaps'][mpilot] |= 2**0 lbound1 = [240, 365] lbound2 = [-5, 5] bbound = [-10, 10] mall = ((bt > bbound[0]) & (bt < bbound[1]) & ( (lt > lbound1[0]) & (lt < lbound1[1]) | (lt > lbound2[0]) & (lt < lbound2[1]) )) tiles['in_decaps'][mall] |= 2**1 tiles.dtype.names = [n.lower() for n in tiles.dtype.names] return tiles
def testCalculateStatistics(self): """ builder.sed.calculate_statistics()""" dtypes = [('teff','f8'), ('logg','f8'),('ebv','f8'),('rv','f8'),('z','f8'), ('chisq','f8')] grid = [array([ 22674., 21774., 22813., 29343., 28170.]), array([ 5.75, 6.07, 6.03, 6.38, 5.97]), array([ 0.0018, 0.0077, 0.0112, 0.0046, 0.0110]), array([ 2.20, 2.40, 2.60, 2.80, 3.00]), array([0,0,0,0,0]), array([1.,3.,2.,0.1,10.0])] master = np.rec.fromarrays(grid,dtype=dtypes) master = mlab.rec_append_fields(master, 'ci_raw', np.zeros(len(master))) master = mlab.rec_append_fields(master, 'ci_red', np.zeros(len(master))) self.sed.results['igrid_search']['grid'] = master self.sed.master['include'] = [True,True,False,True,True] with mock.patch.object(builder.SED, 'calculateDF', return_value=5) as mock_method: self.sed.calculate_statistics(df=5) res = self.sed.results['igrid_search'] raw = [0.6826894, 0.9167354, 0.8427007, 0.2481703, 0.9984345] red = [0.2481703, 0.4161175, 0.3452791, 0.0796556, 0.6826894] self.assertFalse(mock_method.called) self.assertArrayAlmostEqual(res['grid']['ci_raw'].tolist(), raw, places=5) self.assertArrayAlmostEqual(res['grid']['ci_red'].tolist(), red, places=5) self.assertEqual(res['factor'], 10.0)
def random_recarray(size): initial = np.empty(size, dtype=np.dtype([('Y', np.float)])) initial['Y'] = np.random.standard_normal(size) numeric_vars = [np.random.standard_normal(size) for _ in range(10)] categorical_vars = [random_letters(size, l) for l in [3,4,7,6,4,5,8]] inter = ML.rec_append_fields(initial, ['n%s' % l for l in uppercase[:10]], numeric_vars) final = ML.rec_append_fields(inter, ['c%s' % l for l in uppercase[:len(categorical_vars)]], categorical_vars) return final, sympy.symbols(['n%s' % l for l in uppercase[:10]]), [Factor('c%s' % s, np.unique(l)) for s, l in zip(uppercase[:len(categorical_vars)], categorical_vars)]
def tdpep(t,fm,PG0): """ Transit-duration - Period - Epoch Parameters ---------- fm : Flux with bad data points masked out. It is assumed that elements of f are evenly spaced in time. PG0 : Initial period grid. Returns ------- epoch2d : Grid (twd,P) of best epoch df2d : Grid (twd,P) of depth epoch count2d : number of filled data for particular (twd,P) noise : Grid (twd) typical scatter PG : The Period grid twd : Grid of trial transit widths. """ assert fm.fill_value ==0 # Determine the grid of periods that corresponds to integer # multiples of cadence values PcadG,PG = P2Pcad(PG0) # Initialize tdur grid. twdMi = a2tdur( P2a( PG[0 ] ) ) /keptoy.lc twdMa = a2tdur( P2a( PG[-1] ) ) /keptoy.lc twdG = np.round(np.linspace(twdMi,twdMa,4)).astype(int) rec2d = [] noise = [] for twd in twdG: dM = mtd(t,fm.filled(),twd) dM.mask = fm.mask | ~isfilled(t,fm,twd) rec2d.append( pep(t[0],dM,PcadG) ) # Noise per transit mad = ma.abs(dM) mad = ma.median(mad) noise.append(mad) rec2d = np.vstack(rec2d) make2d = lambda x : np.tile( np.vstack(x), (1,rec2d.shape[1] )) rec2d = mlab.rec_append_fields(rec2d,'noise',make2d(noise)) rec2d = mlab.rec_append_fields(rec2d,'twd', make2d(twdG)) PG = np.tile( PG, (rec2d.shape[0],1 )) rec2d = mlab.rec_append_fields(rec2d,'PG',PG) s2n = rec2d['fom']/rec2d['noise']*rec2d['count'] rec2d = mlab.rec_append_fields(rec2d,'s2n', s2n ) return rec2d
def random_from_terms_factors(terms, factors, size): dtype = np.dtype([(str(t), np.float) for t in terms] + [(f.name,'S30') for f in factors]) data = np.empty(size, np.dtype([(str(terms[0]), np.float)])) data[str(terms[0])] = np.random.standard_normal(size) for t in terms[1:]: data = ML.rec_append_fields(data, str(t), np.random.standard_normal(size)) for f in factors: data = ML.rec_append_fields(data, f.name, random_from_factor(f, size)) return data
def random_from_terms_factors(terms, factors, size): dtype = np.dtype([(str(t), np.float) for t in terms] + [(f.name, 'S30') for f in factors]) data = np.empty(size, np.dtype([(str(terms[0]), np.float)])) data[str(terms[0])] = np.random.standard_normal(size) for t in terms[1:]: data = ML.rec_append_fields(data, str(t), np.random.standard_normal(size)) for f in factors: data = ML.rec_append_fields(data, f.name, random_from_factor(f, size)) return data
def random_recarray(size): initial = np.empty(size, dtype=np.dtype([('Y', np.float)])) initial['Y'] = np.random.standard_normal(size) numeric_vars = [np.random.standard_normal(size) for _ in range(10)] categorical_vars = [random_letters(size, l) for l in [3, 4, 7, 6, 4, 5, 8]] inter = ML.rec_append_fields(initial, ['n%s' % l for l in uppercase[:10]], numeric_vars) final = ML.rec_append_fields( inter, ['c%s' % l for l in uppercase[:len(categorical_vars)]], categorical_vars) return final, sympy.symbols(['n%s' % l for l in uppercase[:10]]), [ Factor('c%s' % s, np.unique(l)) for s, l in zip(uppercase[:len(categorical_vars)], categorical_vars) ]
def add_timespan(recarray): ''' input: array of dates as strings output: datetime list, dt relative to first date [days]''' pltdates = np.array([datetime.datetime.strptime(date,'%Y/%m/%d') for date in recarray.date]) dt = np.cumsum(np.diff(pltdates)) #could do array of total seconds or decimal years dt = np.insert(dt,0,datetime.timedelta(0)) roidates = np.array([D.strftime('%y%m%d') for D in pltdates]) recarray = mlab.rec_append_fields(recarray,'dt',dt,object) recarray = mlab.rec_append_fields(recarray,'pltdate',pltdates,object) # append datetime objects as new field recarray = mlab.rec_append_fields(recarray,'roidate',roidates, roidates.dtype) #print "appended 'dt' and 'pltdate' fields" return recarray
def channel_transform(fitsfiles, h5file, iref= None): """ Channel Transformation Take a list of k2 pixel files (must be from the same channel). Find the centroids of each image and solve for the linear transformation that takes one scene to another """ nstars = len(fitsfiles) # Pull the first file to get length and data type fitsfile0 = fitsfiles[0] cent0 = fits_to_chip_centroid(fitsfile0) channel = get_channel(fitsfile0) print "Using channel = %i" % channel # Determine the refence frame if iref==None: dfcent0 = pd.DataFrame(LE(cent0)) ncad = len(dfcent0) med = dfcent0.median() dfcent0['dist'] = ( (dfcent0['centx'] - med['centx'])**2 + (dfcent0['centy'] - med['centy'])**2 ) dfcent0 = dfcent0.iloc[ncad/4:-ncad/4] dfcent0 = dfcent0.dropna(subset=['centx','centy']) iref = dfcent0['dist'].idxmin() print "using reference frame %i" % iref assert np.isnan(cent0['centx'][iref])==False,\ "Must select a valid reference cadence. No nans" cent = np.zeros((nstars,cent0.shape[0]), cent0.dtype) for i,fitsfile in enumerate(fitsfiles): if (i%10)==0: print i cent[i] = fits_to_chip_centroid(fitsfile) channel_i = get_channel(fitsfile) assert channel==channel_i,"%i != %i" % (channel, channel_i) trans,pnts = imtran.linear_transform(cent['centx'],cent['centy'],iref) trans = pd.DataFrame(trans) trans = pd.concat([trans,pd.DataFrame(LE(cent0))[['t','cad']]],axis=1) trans = trans.to_records(index=False) keys = cent.dtype.names pnts = mlab.rec_append_fields(pnts,keys,[cent[k] for k in keys]) if h5file!=None: with h5plus.File(h5file) as h5: h5['trans'] = trans h5['pnts'] = pnts trans,pnts = read_channel_transform(h5file) plot_trans(trans, pnts) figpath = h5file[:-3] + '.png' plt.gcf().savefig(figpath) print "saving %s " % figpath return cent
def testR(d=simple(), size=500): X = random_from_categorical_formula(d, size) X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size)) fname = tempfile.mktemp() ML.rec2csv(X, fname) Rstr = ''' data = read.table("%s", sep=',', header=T) cur.lm = lm(response ~ %s, data) COEF = coef(cur.lm) ''' % (fname, d.Rstr) rpy2.robjects.r(Rstr) remove(fname) nR = list(np.array(rpy2.robjects.r("names(COEF)"))) nt.assert_true('(Intercept)' in nR) nR.remove("(Intercept)") nF = [str(t).replace("_","").replace("*",":") for t in d.formula.terms] nR = sorted([sorted(n.split(":")) for n in nR]) nt.assert_true('1' in nF) nF.remove('1') nF = sorted([sorted(n.split(":")) for n in nF]) nt.assert_equal(nR, nF) return d, X, nR, nF
def read(system='WFC3', dir='/n/fink1/schlafly/mist/bcs'): files = list(util_efs.locate('*%s' % system, root=dir)) res = [] for file in files: lastline = '' fehline = -1 if file[-3:] == 'iso': fehline = 4 with open(file, 'r') as fp: for i, line in enumerate(fp): if line[0] != '#': break if i == fehline: isoline = line lastline = line if fehline > 0: feh = float(isoline.split()[3]) names = lastline.split()[1:] grid = ascii.read(file, comment='#', names=names).as_array() if fehline > 0: grid = rec_append_fields(grid, '[Fe/H]', feh * numpy.ones(len(grid), dtype='f4')) if 'Av' in grid.dtype.names: m = grid['Av'] == 0 res.append(grid[m]) else: res.append(grid) return numpy.concatenate(res)
def join_rec(r1,field1,r2,field2): """1-to-1 joining with non-unique lefthand side keys""" mapping = dict(zip(r2[field2], range(len(r2)))) diff = np.setdiff1d(r1[field1],r2[field2]) r2len = len(r2) if len(diff) > 0: print "WARNING: %s no matching key: %s" % (field2, diff) for i in range(len(diff)): mapping[diff[i]]=r2len r2copy = mlab.rec_drop_fields(r2, (field2,)) r2copy.resize(r2len+1) joinfields = list(r2copy.dtype.names) dtypes = [] for i in range(len(joinfields)): if r2copy.dtype[i].kind == "i": dtypes.append(np.double) else: dtypes.append(r2copy.dtype[i]) if r2copy.dtype[i].kind == "f": r2copy[r2copy.dtype.names[i]][-1]=NULL_VALUE while joinfields[i] in r1.dtype.names: joinfields[i] = joinfields[i]+"_" rightrec = r2copy[[mapping[key] for key in r1[field1]]] r1 = mlab.rec_append_fields(r1, joinfields, [rightrec[n] for n in rightrec.dtype.names], dtypes) return r1
def gethistprices(query, numrows=1000, **kwargs): rec_arr = sqlite2rec(query, **kwargs) import matplotlib.mlab as mlab import numpy as np (syms, posuniq, pos) = np.unique(rec_arr.sym, True, True) new_rec_arr = mlab.rec_append_fields(rec_arr, 'idx', pos) nosym = mlab.rec_drop_fields(new_rec_arr, ['sym',]) recnumrecs = mlab.rec_groupby(nosym, ('idx',), (('idx', len, 'idxcount'), )) idx = np.nonzero(recnumrecs.idxcount >= numrows)[0] idxcount = len(recnumrecs[idx]) xs = np.empty((idxcount, numrows, len(nosym[0])-1), dtype=float) for i in xrange(idxcount): if kwargs.has_key('verbose') and kwargs['verbose'] and i % 50 == 0: print '%d of %d' % (i, idxcount) curdata = nosym[nosym.idx == idx[i]] curdata_arr = np.array(curdata.tolist(), dtype=float) xs[i] = curdata_arr[0:numrows:,0:-1] return (syms[idx], xs)
def testR(d=simple(), size=500): X = random_from_categorical_formula(d, size) X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size)) fname = tempfile.mktemp() ML.rec2csv(X, fname) Rstr = ''' data = read.table("%s", sep=',', header=T) cur.lm = lm(response ~ %s, data) COEF = coef(cur.lm) ''' % (fname, d.Rstr) rpy2.robjects.r(Rstr) remove(fname) nR = list(np.array(rpy2.robjects.r("names(COEF)"))) nt.assert_true('(Intercept)' in nR) nR.remove("(Intercept)") nF = [str(t).replace("_", "").replace("*", ":") for t in d.formula.terms] nR = sorted([sorted(n.split(":")) for n in nR]) nt.assert_true('1' in nF) nF.remove('1') nF = sorted([sorted(n.split(":")) for n in nF]) nt.assert_equal(nR, nF) return d, X, nR, nF
def pixelizeCatalog(infiles, config, force=False): """ Break catalog into chunks by healpix pixel. Parameters: ----------- infiles : List of input files config : Configuration file force : Overwrite existing files (depricated) Returns: -------- None """ nside_catalog = config['coords']['nside_catalog'] nside_pixel = config['coords']['nside_pixel'] outdir = mkdir(config['catalog']['dirname']) filenames = config.getFilenames() for i, filename in enumerate(infiles): logger.info('(%i/%i) %s' % (i + 1, len(infiles), filename)) data = fitsio.read(filename) logger.info("%i objects found" % len(data)) if not len(data): continue glon, glat = cel2gal(data['RA'], data['DEC']) cat_pix = ang2pix(nside_catalog, glon, glat) pix_pix = ang2pix(nside_pixel, glon, glat) cat_pix_name = 'PIX%i' % nside_catalog pix_pix_name = 'PIX%i' % nside_pixel data = mlab.rec_append_fields( data, names=['GLON', 'GLAT', cat_pix_name, pix_pix_name], arrs=[glon, glat, cat_pix, pix_pix], dtypes=['f4', 'f4', int, int]) for pix in np.unique(cat_pix): logger.debug("Processing pixel %s" % pix) arr = data[cat_pix == pix] outfile = filenames.data['catalog'][pix] if not os.path.exists(outfile): logger.debug("Creating %s" % outfile) out = fitsio.FITS(outfile, mode='rw') out.write(arr) hdr = ugali.utils.healpix.header_odict(nside=nside_catalog, coord='G') for key in ['PIXTYPE', 'ORDERING', 'NSIDE', 'COORDSYS']: out[1].write_key(*list(hdr[key].values())) out[1].write_key('PIX', pix, comment='HEALPIX pixel for this file') else: out = fitsio.FITS(outfile, mode='rw') out[1].append(arr) logger.debug("Writing %s" % outfile) out.close()
def rsQ(rL): """ Stitch Quarters together. Fills in missing times and cadences with their proper values. It assigns placeholder values for other columns. - floats --> nan - bools --> True Parameters ---------- rL : List of record arrays Returns ------- rLC : Record array of all the joined quarters. Notes ----- Will join put quarters in the proper order """ startTimes = np.array([r['t'][0] for r in rL]) sid = np.argsort(startTimes) rL = list(np.array(rL)[sid]) # Figure out which cadences are missing and fill them in. cad = [r['cad'] for r in rL] cad = np.hstack(cad) cad, iFill = cadFill(cad) nFill = cad.size rLC = np.rec.fromarrays([cad], names='cad') # Add all the columns from the FITS file. fitsname = rL[0].dtype.fields.keys() fitsname.remove('cad') for fn in fitsname: col = [r[fn] for r in rL] # Column in list form col = np.hstack(col) # Fill new array elements if col.dtype is np.dtype('bool'): fill_value = True else: fill_value = np.nan ctemp = np.empty(nFill, dtype=col.dtype) # Temporary column ctemp[::] = fill_value ctemp[iFill] = col rLC = mlab.rec_append_fields(rLC, fn, ctemp) # nanTime doesn't work here because I've update the "cad" field tm = ma.masked_invalid(rLC['t']) cad, rLC['t'] = detrend.maskIntrp(rLC['cad'], tm) return rLC
def channel_transform(fitsfiles, h5file, iref=None): """ Channel Transformation Take a list of k2 pixel files (must be from the same channel). Find the centroids of each image and solve for the linear transformation that takes one scene to another """ nstars = len(fitsfiles) # Pull the first file to get length and data type fitsfile0 = fitsfiles[0] cent0 = fits_to_chip_centroid(fitsfile0) channel = get_channel(fitsfile0) print "Using channel = %i" % channel # Determine the refence frame if iref == None: dfcent0 = pd.DataFrame(LE(cent0)) ncad = len(dfcent0) med = dfcent0.median() dfcent0['dist'] = ((dfcent0['centx'] - med['centx'])**2 + (dfcent0['centy'] - med['centy'])**2) dfcent0 = dfcent0.iloc[ncad / 4:-ncad / 4] dfcent0 = dfcent0.dropna(subset=['centx', 'centy']) iref = dfcent0['dist'].idxmin() print "using reference frame %i" % iref assert np.isnan(cent0['centx'][iref])==False,\ "Must select a valid reference cadence. No nans" cent = np.zeros((nstars, cent0.shape[0]), cent0.dtype) for i, fitsfile in enumerate(fitsfiles): if (i % 10) == 0: print i cent[i] = fits_to_chip_centroid(fitsfile) channel_i = get_channel(fitsfile) assert channel == channel_i, "%i != %i" % (channel, channel_i) trans, pnts = imtran.linear_transform(cent['centx'], cent['centy'], iref) trans = pd.DataFrame(trans) trans = pd.concat([trans, pd.DataFrame(LE(cent0))[['t', 'cad']]], axis=1) trans = trans.to_records(index=False) keys = cent.dtype.names pnts = mlab.rec_append_fields(pnts, keys, [cent[k] for k in keys]) if h5file != None: with h5plus.File(h5file) as h5: h5['trans'] = trans h5['pnts'] = pnts trans, pnts = read_channel_transform(h5file) plot_trans(trans, pnts) figpath = h5file[:-3] + '.png' plt.gcf().savefig(figpath) print "saving %s " % figpath return cent
def rec_zip(rL): """ """ ro = rL[0] for i in range(1, len(rL)): fields = list(rL[i].dtype.names) vals = [rL[i][f] for f in fields] ro = mlab.rec_append_fields(ro, fields, vals) return ro
def add_bperp(recarray): ''' input: position vector [m], velocity vector [m], offnadir angle [deg] output: perpendicular baseline [m] ''' pos = np.vstack((recarray.x, recarray.y, recarray.z)).T * 1000 vel = np.vstack((recarray.dx, recarray.dy, recarray.dz)).T * 1000 x,y,z = np.hsplit(pos,3) #if pos.shape = (7,3) #dx,dy,dz = np.hsplit(vel,3) #x,y,z = pos[0],pos[1],pos[2] #pos is (3,7) offnadir = np.radians(recarray.offnadir) # Get mean parameters of satellite ((1,n) row vector) pos0 = np.mean(pos,0).reshape(1,-1) #average position of all acquisitions xm,ym,zm = pos0.flat vel0 = np.mean(vel,0).reshape(1,-1) # get geodetic lat/lon/height (above wgs84) of satellite ecef = pyproj.Proj(proj='geocent', ellps='WGS84', datum='WGS84') wgs84 = pyproj.Proj(proj='latlong', ellps='WGS84', datum='WGS84') lon, lat, h = pyproj.transform(ecef, wgs84, x, y, z, radians=True) h_ave = np.mean(h) # Convert geocentric coordinates to average ENU in plane of satellite xl,yl,zl = ecef2enu(pos, pos0) # Calculate travel direction in ENU coordinates from differencing unit velocity motion # NOTE: not sure why this works..., need to *1000 to keep consistent w/ matlab code, but units don't exactly match pos1 = pos0 + vel0/np.linalg.norm(vel0)*1000 pos2 = pos0 - vel0/np.linalg.norm(vel0)*1000 p1,p2,p3 = ecef2enu(pos1, pos0) q1,q2,q3 = ecef2enu(pos2, pos0) vxl = q1 - p1 vyl = q2 - p2 vzl = q3 - p3 # Along-track direction trackdir = np.arctan(vxl/vyl) # Calculate perpendicular baseline Bx = xl-xl[0] By = yl-yl[0] zr = h.flat-h_ave Br = zr-zr[0] #Bv = zl-zl[0] Bh = Bx*np.cos(trackdir) - By*np.sin(trackdir) Bperp = Bh*np.cos(offnadir) + Br*np.sin(offnadir) Bpara = Bh*np.sin(offnadir) - Br*np.cos(offnadir) # Match ROI_PAC Baseline sign convention for ascending data if trackdir < 0: Bperp = -Bperp recarray = mlab.rec_append_fields(recarray,'bperp',Bperp,float) #print "appended 'bperp' field" return recarray
def load_aeronet(fname, keep_fields='all', header=False): """loads aeronet lev 2.0 csv file. fname: data file name keep_fields: 'all' or a list of fields header: whether to return header information along with the data. """ std_day = datetime(1900,1,1,0,0,0) def date2daynum(datestr): the_day = datetime.strptime(datestr, '%d:%m:%Y') return float((the_day - std_day).days) def time2seconds(timestr): h, m, s = [int(t) for t in timestr.split(':')] return float(h * 3600 + m * 60 + s) def daynum_seconds2datetime(daynum, seconds): return std_day + timedelta(days=int(daynum), seconds=int(seconds)) headlines = [] f = open(fname, 'r') for line_i, line in enumerate(f): line = line.rstrip() if line.startswith('Date(dd-mm-yy'): datefield, timefield = [re.sub(r'\W', '', tk) for tk in line.split(',')[0:2]] break headlines.append(line) skip_header_lines = line_i if header: headline = ','.join(headlines) headerd = dict() for attrname, converter in [('location', str), ('long', float), ('lat', float), ('elev', float), ('nmeas', int), ('PI', str), ('email', str)]: m = re.search(r'%s.{0,1}=([^,\s]*)' % attrname, headline, flags=re.I) if m: try: headerd[attrname] = converter(m.group(1)) except Exception: pass rawd = np.genfromtxt(fname, skip_header=skip_header_lines, delimiter=',', names=True, converters={0:date2daynum, 1:time2seconds}) lend = len(rawd) dates = np.zeros(len(rawd), dtype='O') for i in range(lend): dates[i] = daynum_seconds2datetime(rawd[datefield][i], rawd[timefield][i]) newd = mlab.rec_append_fields(rawd, 'datetime', dates) newd = mlab.rec_drop_fields(newd, [datefield, timefield, 'Last_Processing_Date']) if keep_fields is not 'all': keep_fields = ['datetime'] + keep_fields # print keep_fields newd = mlab.rec_keep_fields(newd, keep_fields) if header: return newd, headerd else: return newd
def typeIII(response, ancova, recarray): """ Produce an ANCOVA table with type III sum of squares from a given ANCOVA formula. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ X = ancova.formula.design(recarray, return_float=True) Y = recarray[response] model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] fs = [] dfs = [] sss = [] pvals = [] for contrast in ancova.contrast_names: r = results.f_test(ancova.contrast_matrices[contrast]) names.append(contrast) fs.append(r.fvalue) dfs.append(r.df_num) pvals.append(r.pvalue) sss.append(r.fvalue * results.scale * r.df_num) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
def calc_offset(infile, cat): """ calc_offset: Calculate the offset of the catalogue entries towards the pointing centre infile: Input MIRIAD uv-file cat: Input catalogue of sources to calculate the offset for returns: A catalogue with the offsets for the individual sources """ ra_off = (cat.RA - getradec(infile).ra.deg) * 3600.0 * np.cos(getradec(infile).dec.rad) dec_off = (cat.DEC - getradec(infile).dec.deg) * 3600.0 cat = mplab.rec_append_fields(cat, ['RA_off', 'DEC_off'], [ra_off, dec_off], dtypes=[float, float]) return cat
def draw_paths(self, filename, **kwargs): """Draw a text file containing multiple polygons""" try: data = np.genfromtxt(filename, names=['ra', 'dec', 'poly']) except ValueError: data = np.genfromtxt(filename, names=['ra', 'dec']) data = mlab.rec_append_fields(data, 'poly', np.zeros(len(data))) for p in np.unique(data['poly']): poly = data[data['poly'] == p] self.draw_path_radec(poly['ra'], poly['dec'], **kwargs)
def to_hdf(ec,h5file): with h5plus.File(h5file) as h5: for k in dsetkeys: h5[k] = getattr(ec,k) r = ec.dfAc_st.to_records() rless = mlab.rec_drop_fields(r,['index']) sindex = r['index'].astype(str) r = mlab.rec_append_fields(rless,'index', sindex) h5.attrs['dfAc_st'] = r h5.attrs['kAs'] = ec.kAs
def typeIII(response, ancova, recarray): """ Produce an ANCOVA table with type III sum of squares from a given ANCOVA formula. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ X = ancova.formula.design(recarray, return_float=True) Y = recarray[response] model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] fs = [] dfs = [] sss = [] pvals = [] for contrast in ancova.contrast_names: r = results.f_test(ancova.contrast_matrices[contrast]) names.append(contrast) fs.append(r.fvalue) dfs.append(r.df_num) pvals.append(r.pvalue) sss.append(r.fvalue * results.scale * r.df_num) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
def with_new_field(self, name, data, mask_arr=None, mask_fn=None): recarray = self.arr.view(np.recarray) recarray = mlab.rec_append_fields(recarray, name, data) if mask_arr: data = np.ma.array(recarray, mask=mask_arr) else: data = recarray r = data.view(mrecords.mrecarray) if mask_fn: mask_fn(r) return TimeSeries(r)
def val(tLC,tRES,nCheck=50,ver=True): # Unpack array from table. t = tLC.t fm = ma.masked_array(tLC.f-tLC.fcbv,mask=tLC.fmask) tres = tRES.data tres = mlab.rec_append_fields(tres,['P','tdur','df'], \ [tres['PG'],tres['twd']*keptoy.lc,tres['fom']]) sid = np.argsort(-tres['s2n']) tres = tres[sid][:nCheck] rval = tval.val(t,fm,tres) tVAL = qalg.rec2tab(rval) return tVAL
def dict_list_to_frame(dict_list): df = pd.DataFrame(dict_list) d0 = dict( df.iloc[0] ) goodkeys = [ k for k in d0.keys() if (type(d0[k])!=fits.card.Undefined)] df = df[goodkeys] # comb = pdplus.df_to_rec_strings(df) dfs = df.select_dtypes(include=['object']) dfns = df.select_dtypes(exclude=['object']) dfs = rec.fromarrays(np.array(dfs).astype('S100').T,names=list(dfs.columns)) names = list(dfns.columns) arrs = [dfns[n] for n in names] comb = mlab.rec_append_fields(dfs,names,arrs) return comb
def _defineVariables(self): """ Helper funtion to define pertinent variables from catalog data. ADW (20170627): This has largely been replaced by properties. """ logger.info('Catalog contains %i objects' % (len(self.data))) mc_source_id_field = self.config['catalog']['mc_source_id_field'] if mc_source_id_field is not None: if mc_source_id_field not in self.data.dtype.names: array = np.zeros(len(self.data), dtype=int) self.data = mlab.rec_append_fields(self.data, names=mc_source_id_field, arrs=array) logger.info('Found %i simulated objects' % (np.sum(self.mc_source_id > 0)))
def append(self, picker_list, **kwargs): """Resize my data and add in the data from Pickers in picker_list note: equality test fails on picker2 for some reason Will also add a new column if you specify. Usage: p1.append([p2, p3], ratname=(1,2,3)) Now p1 has all of the data from p1, p2, and p3. p1['ratname'] is 1, 2, or 3, depending on the source. """ # Calculate new size and resize old_length = len(self) new_length = old_length + np.sum(len(p) for p in picker_list) new_data = np.resize(self._data, (new_length,)) # Store data from each new picker row_idx = old_length for picker in picker_list: new_data[row_idx:row_idx+len(picker._data)] = picker._data row_idx += len(picker._data) # optionally add a new column if len(kwargs) > 0: if len(kwargs) > 1: print "warning: too many arguments" # get the name of the new field and the labels for each Picker fieldname = kwargs.keys()[0] labels = kwargs[fieldname] # create the new column and store the labels for each Picker newcolumn = np.empty(shape=(new_length,), dtype=np.int) newcolumn[:old_length] = labels[0] row_idx = old_length for label, picker in zip(labels[1:], picker_list): newcolumn[row_idx:row_idx+len(picker._data)] = label row_idx += len(picker._data) # store the new column new_data = mlab.rec_append_fields(new_data, fieldname, newcolumn) # overwrite my data with the new version self._data = new_data
def append(self, picker_list, **kwargs): """Resize my data and add in the data from Pickers in picker_list note: equality test fails on picker2 for some reason Will also add a new column if you specify. Usage: p1.append([p2, p3], ratname=(1,2,3)) Now p1 has all of the data from p1, p2, and p3. p1['ratname'] is 1, 2, or 3, depending on the source. """ # Calculate new size and resize old_length = len(self) new_length = old_length + np.sum(len(p) for p in picker_list) new_data = np.resize(self._data, (new_length, )) # Store data from each new picker row_idx = old_length for picker in picker_list: new_data[row_idx:row_idx + len(picker._data)] = picker._data row_idx += len(picker._data) # optionally add a new column if len(kwargs) > 0: if len(kwargs) > 1: print("warning: too many arguments") # get the name of the new field and the labels for each Picker fieldname = list(kwargs.keys())[0] labels = kwargs[fieldname] # create the new column and store the labels for each Picker newcolumn = np.empty(shape=(new_length, ), dtype=np.int) newcolumn[:old_length] = labels[0] row_idx = old_length for label, picker in zip(labels[1:], picker_list): newcolumn[row_idx:row_idx + len(picker._data)] = label row_idx += len(picker._data) # store the new column new_data = mlab.rec_append_fields(new_data, fieldname, newcolumn) # overwrite my data with the new version self._data = new_data
def modcols(r0): """ Modify Columns 1. Changes TIME, CADENCENO to t, cad 2. rnQ - normalize quarter 3. rnanTime - remove nans from time series """ r = r0.copy() oldName = ['TIME', 'CADENCENO'] newName = ['t', 'cad'] for o, n in zip(oldName, newName): r = mlab.rec_append_fields(r, n, r[o]) r = mlab.rec_drop_fields(r, o) r = keplerio.rnQ(r) r = keplerio.rnanTime(r) return r
def calc_appflux(infile, cat, beam): """ calc_appflux: module to calculate the apparent fluxes of sources from an input catalogue using primary beam correction infile: Input MIRIAD uv-file cat: catalogue (most likely from query_catalogue) beam: the beam type to correct for. Only 'WSRT' allowed at the moment returns: an extended catalogue file including the distances RA- and DEC-offsets and apparent fluxes from th pointing centre """ if beam == 'WSRT': # Check which beam model to use. APERTIF going to be included later. logging.info(' Using standard WSRT beam for calculating apparent fluxes!') else: logging.info(' Beam model not supported yet! Using standard WSRT beam instead!') sep = cat.dist appflux = np.zeros((len(cat))) for c in range(0, len(cat)): # calculate the apparent flux of the sources appflux[c] = (cat.flux[c]) * wsrtBeam(sep[c], getfreq(infile)) cat = mplab.rec_append_fields(cat, ['appflux'], [appflux], dtypes=[float]) return cat
def createAssociations(self): objects = self.objects tol = self.config['search']['proximity'] columns = odict() names = np.empty(len(objects), dtype=object) names.fill('') for i, refs in enumerate(self.config['search']['catalogs']): i += 1 catalog = SourceCatalog() for ref in refs: print ref catalog += catalogFactory(ref) # String length (should be greater than longest name) length = len(max(catalog['name'], key=len)) + 1 dtype = 'S%i' % length fitstype = '%iA' % length assoc = np.empty(len(objects), dtype=dtype) assoc.fill('') angsep = np.zeros(len(objects), dtype=np.float32) idx1, idx2, sep = catalog.match(objects['GLON'], objects['GLAT'], tol=tol) assoc[idx1] = catalog['name'][idx2].astype(dtype) angsep[idx1] = sep columns['ASSOC%i' % i] = assoc columns['ANGSEP%i' % i] = angsep if length > objects['NAME'].itemsize: logger.warning("Association name may not fit.") names = np.where(names == '', assoc, names) names = names.astype(objects['NAME'].dtype) objects['NAME'][:] = np.where(names == '', objects['NAME'], names) objects['NAME'][:] = np.char.replace(objects['NAME'], '_', ' ') self.assocs = mlab.rec_append_fields(objects, columns.keys(), columns.values()) self.assocs = self.assocs[self.assocs['NAME'].argsort()]
def __init__(self, data=None, fileName="all_mds.csv", **kwargs): self.conditions = {} if len(kwargs) > 0: for arg, value in kwargs.iteritems(): setattr(self, arg, value) self.conditions[arg] = value if data is None: self.loadData(fileName) if 'Area' not in self.header: areas = self.getMDAreas() centroids = self.getMDCentroids() self.data = mlab.rec_append_fields( self.data, ['Area', 'CentroidLon', 'CentroidLat'], [areas, centroids[:, 0], centroids[:, 1]]) self.header = self.data.dtype.names else: self.header = data.dtype.names self.data = data
def select_in_path(filename,ra,dec,polys=None,wrap=180.): import matplotlib.path from matplotlib import mlab ra,dec = np.copy(ra), np.copy(dec) try: data = np.genfromtxt(filename,names=['ra','dec','poly']) except ValueError: data = np.genfromtxt(filename,names=['ra','dec']) data = mlab.rec_append_fields(data,'poly',np.zeros(len(data))) paths = [] ra -= 360 * (ra > wrap) for p in np.unique(data['poly']): if polys and (p not in polys): continue poly = data[data['poly'] == p] vertices = np.vstack(np.vstack([poly['ra'],poly['dec']])).T paths.append(matplotlib.path.Path(vertices)) sel = np.sum([p.contains_points(np.vstack([ra,dec]).T) for p in paths],axis=0) > 0 return sel
def create_mag_table(self, outputPath, isocType="pdva", specType="basel"): """Create an HDF5 table of that describes a set of magnitudes.""" if os.path.exists(outputPath): os.remove(outputPath) title = os.path.splitext(os.path.basename(outputPath))[0] h5file = tables.openFile(outputPath, mode="w", title=title) table = h5file.createTable("/", 'mags', MagTableDef, "Mag Model Table") print h5file docs = self.collection.find({"compute_complete":True, "np_data": {"$exists": 1}}) # , limit=2 print "working on %i docs to read" % docs.count() lut = get_metallicity_LUT(isocType, specType) for doc in docs: print "reading", doc['_id'] # print doc.keys() # print doc['np_data'] npData = doc['np_data'] # print npData.dtype # binData = Binary(doc['np_data']['data']) # print type(binData) # npData = pickle.load(binData) nRows = len(npData) # Append model information (about SFH, dust, etc) zmet = doc['pset']['zmet'] Z = lut[zmet-1] zmets = np.ones(nRows, dtype=np.float) * Z tau = doc['pset']['tau'] taus = np.ones(nRows, dtype=np.float) * tau npDataAll = mlab.rec_append_fields(npData, ['Z','tau'],[zmets,taus]) # Trim the recarray to just the desired fields npDataTrim = mlab.rec_keep_fields(npDataAll, ['Z','tau','age','mass','lbol','sfr','TMASS_J','TMASS_H', 'TMASS_Ks','MegaCam_u','MegaCam_g','MegaCam_r','MegaCam_i', 'MegaCam_z','GALEX_NUV','GALEX_FUV']) for i in xrange(nRows): row = npDataTrim[i] print row['Z'], row['tau'],row['TMASS_J'],row['TMASS_Ks'] # Append to HDF5 table.append(npDataTrim) h5file.flush() h5file.close()
def __init__(self, data=None, fileName="all_mds.csv", **kwargs): self.conditions = {} if len(kwargs) > 0: for arg,value in kwargs.iteritems(): setattr(self,arg,value) self.conditions[arg] = value if data is None: self.loadData(fileName) if 'Area' not in self.header: areas = self.getMDAreas() centroids = self.getMDCentroids() self.data = mlab.rec_append_fields(self.data,['Area','CentroidLon','CentroidLat'],[areas,centroids[:,0],centroids[:,1]]) self.header = self.data.dtype.names else: self.header = data.dtype.names self.data = data
def fits_to_chip_centroid(fitsfile): """ Grab centroids from fits file Parameters ---------- fitsfile : path to pixel file Returns ------- centx : centroid in the x (column) axis centy : centroid in the y (row) axis """ apsize = 7 hdu0,hdu1,hdu2 = fits.open(fitsfile) cube = hdu1.data flux = cube['FLUX'] t = cube['TIME'] cad = cube['CADENCENO'] nframe,nrow,ncol = flux.shape # Define rectangular aperture wcs = get_wcs(fitsfile) ra,dec = hdu0.header['RA_OBJ'],hdu0.header['DEC_OBJ'] try: x,y = wcs.wcs_world2pix(ra,dec,0) except: # if WCS is bogus, make the simplest reasonable assumption x, y = ncol/2., nrow/2. scentx,scenty = np.round([x,y]).astype(int) nrings = (apsize-1)/2 x0 = scentx - nrings x1 = scentx + nrings y0 = scenty - nrings y1 = scenty + nrings mask = np.zeros((nrow,ncol)) mask[y0:y1+1,x0:x1+1] = 1 # 1 means use in aperture # Compute background flux # mask = True aperture, don't use to compute bg flux_sky = flux.copy() flux_sky_mask = np.zeros(flux.shape) flux_sky_mask += mask[np.newaxis,:,:].astype(bool) flux_sky = ma.masked_array(flux_sky, flux_sky_mask) fbg = ma.median(flux_sky.reshape(flux.shape[0],-1),axis=1) if not np.isfinite(fbg).any(): fbg2 = [ma.median(frame[np.isfinite(frame)]) for frame in flux_sky.reshape(flux.shape[0], -1)] fbg = ma.masked_array(fbg2, np.isnan(fbg2)) # Subtract off background flux = flux - fbg[:,np.newaxis,np.newaxis] flux = ma.masked_invalid(flux) flux.fill_value = 0 flux = flux.filled() # Compute aperture photometry fsap = flux * mask fsap = np.sum(fsap.reshape(fsap.shape[0],-1),axis=1) # Compute centroids centx,centy = centroid(flux * mask) # table column physical WCS ax 1 ref value # hdu1.header['1CRV4P'] corresponds to column of flux[:,0,0] # starting counting at 1. centx += hdu1.header['1CRV4P'] - 1 centy += hdu1.header['2CRV4P'] - 1 r = np.rec.fromarrays( [t,cad,centx,centy,fsap,fbg], names='t,cad,centx,centy,fsap,fbg' ) r = mlab.rec_append_fields(r,'starname',hdu0.header['KEPLERID']) return r
import matplotlib.mlab as mlab # grab the price data off yahoo u1 = urllib.urlretrieve('http://ichart.finance.yahoo.com/table.csv?s=AAPL&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv') u2 = urllib.urlretrieve('http://ichart.finance.yahoo.com/table.csv?s=GOOG&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv') # load the CSV files into record arrays r1 = mlab.csv2rec(file(u1[0])) r2 = mlab.csv2rec(file(u2[0])) # compute the daily returns and add these columns to the arrays gains1 = np.zeros_like(r1.adj_close) gains2 = np.zeros_like(r2.adj_close) gains1[1:] = np.diff(r1.adj_close)/r1.adj_close[:-1] gains2[1:] = np.diff(r2.adj_close)/r2.adj_close[:-1] r1 = mlab.rec_append_fields(r1, 'gains', gains1) r2 = mlab.rec_append_fields(r2, 'gains', gains2) # now join them by date; the default postfixes are 1 and 2 r = mlab.rec_join('date', r1, r2) # long appl, short goog g = r.gains1-r.gains2 tr = (1+g).cumprod() # the total return # plot the return fig = plt.figure() ax = fig.add_subplot(111) ax.plot(r.date, tr) ax.set_title('total return: long appl, short goog') ax.grid()
def typeII(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type II sums of squares. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] for name, expr_factors in zip(ancova.contrast_names, ancova.sequence()): expr, factors = expr_factors F = ancova.all_but_above(expr, factors) C = ancova.contrasts[name] XF, contrast_matrices = F.formula.design(recarray, contrasts={'C':C}) modelF = OLS(Y, XF) resultsF = modelF.fit() SSEF = np.sum(resultsF.resid**2) dfF = resultsF.df_resid ftest = resultsF.f_test(contrast_matrices['C']) SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF) dfR = dfF + ftest.df_num sss.append(SSER - SSEF) dfs.append(ftest.df_num) fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], dfR-dfF, df_F)) names.append(name) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
def typeI(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type I sums of squares where the order is based on the order of terms in the contrast_names of ancova. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True)) results = model.fit() SSE_old = np.sum(results.resid**2) df_old = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] names.append(ancova.contrast_names[0]) fs.append(((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F)) sss.append((np.sum(Y**2) - SSE_old)) dfs.append(Y.shape[0] - df_old) pvals.append(f_dbn.sf(fs[-1], Y.shape[0]-df_old, df_F)) for d in range(1,len(ancova.formulae)): terms = [] for f in ancova.formulae[:(d+1)]: terms += list(f.terms) # JT: this is not numerically efficient # could be done by updating some factorization of the full X X = Formula(terms).design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_new = np.sum(results.resid**2) df_new = results.df_resid sss.append(SSE_old - SSE_new) dfs.append(df_old - df_new) fs.append(((SSE_old-SSE_new) / (df_old - df_new)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], df_old-df_new, df_new)) names.append(ancova.contrast_names[d]) SSE_old = SSE_new df_old = df_new # Add in the "residual row" sss.append(SSE_new) dfs.append(df_new) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
# pair ids where the cluster in the pair is near an absorber # cluster id # qso id # total zpath over all pairs outname = run_id + '/qso_cluster_pairs_zpath.fits' if os.path.exists(outname): print 'Reading', outname, '...', pairs = fits.getdata(outname) print ' done' else: # Find all qso-cluster pairs. takes about 10 min to run. pairs0 = match_clus_qso(clus, qso) # assign a unique identifier to each pair. pairs1 = rec_append_fields(pairs0, ['pid'], [np.arange(len(pairs0))]) # find tot zpath (including both field and cluster paths up to # z=1, only towards sightlines with a nearby cluster though) also? print 'Calculating MgII hits and the total z path length' if DEBUG: fig4 = plt.figure(4, figsize=(6,6)) ax = fig4.add_subplot(111) print 'Looping over QSOs' # extra columns for the qso-cluster pair table. extra_cols = {} n_unique_qsos = len(np.unique(pairs1['qid'])) for i,(qid,ind) in enumerate(indgroupby(pairs1, 'qid')):
ax4=fig1.add_subplot(414) ax4.plot(data['Np'],'bo') ax4.set_title('Np') plt.show() pass if __name__ == '__main__': # os.chdir('/home/shankar/Desktop/Research/modem-sim2/logs') # subprocess.call(["cat log-0.txt | grep printGrandPlay | sed 's/^.*FINE\|//g' | sed 's/^.*ment//g' \ # > experiment_results.txt ; cat experiment_results.txt"], shell=True) host_name='192.168.0.22' graph_animator=GraphAnimator.GraphAnimator() filename=graph_animator.animateGraph(host_name) command_to_refine="cat "+filename+"| grep 'history' | sed 's/^.*history//' > "+filename+"_3d_plots.txt" plot_filename=filename+"_3d_plots.txt" subprocess.call([command_to_refine], shell=True) data_format={'names':('timestamp', 'bandit','banditID','result','BER', 'absolute_data_rate', 'alpha', 'beta', 'gittins_index', 'gittins_index_norm', 'MTYPE','DMODE','MPSK','Nc', 'Np', 'Nz','PKT_LEN','FEC'), 'formats':( 'S10', 'S10', 'f4', 'S10', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')} raw_data=np.loadtxt(plot_filename, dtype=data_format) raw_data=mlab.rec_append_fields(raw_data, 'time_as_int', np.array([int(times) for times in raw_data['timestamp']]) ) raw_data=mlab.rec_append_fields(raw_data, 'bandit_as_int', np.array([int(bandits) for bandits in raw_data['bandit']]) ) filtered_data=raw_data[raw_data['BER']<0.8] plotBanditParams(filtered_data) plotOfdmParams(filtered_data) plotBerDataRate(filtered_data) plt.show()
def typeI(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type I sums of squares where the order is based on the order of terms in the contrast_names of ancova. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True)) results = model.fit() SSE_old = np.sum(results.resid**2) df_old = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] names.append(ancova.contrast_names[0]) fs.append( ((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F)) sss.append((np.sum(Y**2) - SSE_old)) dfs.append(Y.shape[0] - df_old) pvals.append(f_dbn.sf(fs[-1], Y.shape[0] - df_old, df_F)) for d in range(1, len(ancova.formulae)): terms = [] for f in ancova.formulae[:(d + 1)]: terms += list(f.terms) # JT: this is not numerically efficient # could be done by updating some factorization of the full X X = Formula(terms).design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_new = np.sum(results.resid**2) df_new = results.df_resid sss.append(SSE_old - SSE_new) dfs.append(df_old - df_new) fs.append(((SSE_old - SSE_new) / (df_old - df_new)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], df_old - df_new, df_new)) names.append(ancova.contrast_names[d]) SSE_old = SSE_new df_old = df_new # Add in the "residual row" sss.append(SSE_new) dfs.append(df_new) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
'http://ichart.finance.yahoo.com/table.csv?s=AAPL&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv' ) u2 = urllib.request.urlretrieve( 'http://ichart.finance.yahoo.com/table.csv?s=GOOG&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv' ) # load the CSV files into record arrays r1 = mlab.csv2rec(open(u1[0])) r2 = mlab.csv2rec(open(u2[0])) # compute the daily returns and add these columns to the arrays gains1 = np.zeros_like(r1.adj_close) gains2 = np.zeros_like(r2.adj_close) gains1[1:] = np.diff(r1.adj_close) / r1.adj_close[:-1] gains2[1:] = np.diff(r2.adj_close) / r2.adj_close[:-1] r1 = mlab.rec_append_fields(r1, 'gains', gains1) r2 = mlab.rec_append_fields(r2, 'gains', gains2) # now join them by date; the default postfixes are 1 and 2. The # default jointype is inner so it will do an intersection of dates and # drop the dates in AAPL which occurred before GOOG started trading in # 2004. r1 and r2 are reverse ordered by date since Yahoo returns # most recent first in the CSV files, but rec_join will sort by key so # r below will be properly sorted r = mlab.rec_join('date', r1, r2) # long appl, short goog g = r.gains1 - r.gains2 tr = (1 + g).cumprod() # the total return # plot the return
def create_table(self, outputPath, query={}, tage=None, isocType="pdva", specType="basel", clobber=True): """Create an HDF5 table that combines outputs from models in the library. """ query.update({"compute_complete": True, "np_data": {"$exists": 1}}) docs = self.collection.find(query) # , limit=2 print "working on %i docs to read" % docs.count() lut = get_metallicity_LUT(isocType, specType) # TODO need to generalize definition of columns. A user ought to # be able to use any pset columns, any set of mags, and the spectra #magNames = ['TMASS_J','TMASS_H','TMASS_Ks','MegaCam_u','MegaCam_g', # 'MegaCam_r','MegaCam_i','MegaCam_z','GALEX_NUV','GALEX_FUV'] magCols = [(s, np.float,) for (i, s, c) in FILTER_LIST] #magCols = [(s,np.float) for s in magNames] psetCols = [('dust_type', np.int), ('imf_type', np.int), ('sfh', np.int), ('tau', np.float), ('const', np.float), ('sf_start', np.float), ('fburst', np.float), ('tburst', np.float), ('dust_tesc', np.float), ('dust1', np.float), ('dust2', np.float), ('frac_nodust', np.float)] sfhCols = [('age', np.float), ('mass', np.float), ('lbol', np.float), ('sfr', np.float)] miscCols = [('Z', np.float)] # metallicity, taken from zmet LUT specCols = [('spec', np.float, SpecParser.nlambda(specType))] allCols = psetCols + sfhCols + miscCols + magCols + specCols tableDtype = np.dtype(allCols) if os.path.exists(outputPath) and clobber: os.remove(outputPath) title = os.path.splitext(os.path.basename(outputPath))[0] h5file = tables.openFile(outputPath, mode="w", title=title) table = h5file.createTable("/", 'models', tableDtype, "Model Output Table") print h5file for doc in docs: print "reading", doc['_id'] npData = doc['np_data'] nRows = len(npData) # Appent pset cols and misc cols extraNames = [] extraArrays = [] zmet = doc['pset']['zmet'] Z = lut[zmet - 1] Z = np.ones(nRows, dtype=np.float) * Z extraNames.append('Z') extraArrays.append(Z) for cName, cType in psetCols: p = doc['pset'][cName] pArray = np.ones(nRows, dtype=cType) * p extraNames.append(cName) extraArrays.append(pArray) npDataAll = mlab.rec_append_fields(npData, extraNames, extraArrays) # select row closest to the target age if tage is not None: ageGyr = 10. ** npDataAll['age'] / 10. ** 9 i = np.argmin((ageGyr - tage) ** 2) row = np.atleast_1d(np.array(npDataAll[i], copy=True)) table.append(row) else: #table.append(npDataAll) # should work but corrupts data row = table.row for i in xrange(nRows): print "row", i for x in allCols: name = x[0] print name, npDataAll[i][name] row[name] = npDataAll[i][name] row.append() table.flush() h5file.flush() h5file.close()
ab, iqso_from_id, iMgII_from_id = read_zhu() qso = ab['qso'] # find qso sightlines that are within 10 proper Mpc of a foreground cluster. if os.path.exists(run_id + '/qso_cluster_pairs.fits'): print 'Reading', run_id + '/qso_cluster_pairs.fit' pairs0 = fits.getdata(run_id + '/qso_cluster_pairs.fits') else: # takes about 10 min to run. pairs0 = match_clus_qso(clus, qso, filename=run_id + '/qso_cluster_pairs.fits') # assign a unique identifier to each pair. modifies pairs in place. pairs0 = rec_append_fields(pairs0, ['pid'], [np.arange(len(pairs0))]) if PLOTRES: plot_hist(run_id, clus, ab['MgII'], run_id) if CALC: cids = clus['id'] pairs = pairs0[np.in1d(pairs0['cid'], cids)] # for each qso-cluster pair find any absorbers with impact par < # 1 Mpc within some z range of the cluster. # for rho < 1 # z path length within 1Mpc of cluster per pair # absorber id for a cluster-absorber pair # pair ids where the cluster in the pair is near an absorber
def bugtrend(milestone): baseWorkingDirectory = "/tmp/" wikiTableBaseFileName = baseWorkingDirectory + "DefectChurnReport" wikiImageFileBaseLocation = "http://metrics.arubanetworks.com/metrics/margot_autopages/" wikiContent = [] Queries = milestone.split(",") default_column_value = { "datemaxbabug_when": datetime.date(2030, 12, 1), "datebcreation_ts": datetime.date(2005, 1, 1), "cf_customers": "Aruba Internal", } for params in Queries: print "processing : " + params wikiContent = [] baseFileName = params bugReportName = baseWorkingDirectory + baseFileName + "_bugs.csv" fixedReportName = baseWorkingDirectory + baseFileName + "_fixed.csv" outputReportName = baseWorkingDirectory + baseFileName + "_merged.csv" r = mlab.csv2rec(bugReportName) s = mlab.csv2rec(fixedReportName) k = mlab.rec_join("bug_id", s, r, jointype="outer", defaults=default_column_value, r1postfix="1", r2postfix="2") t = mlab.csv2rec("/home/automation/bugzilla_tool/Org_Mapping.csv") # mlab.rec2csv(k,outputReportName,delimiter=',',missing="",missingd=None,withheader=True) org_mapping = dict(zip(t.login_name, range(len(t)))) # orgList = [] DirectorArray = np.zeros_like(k.login_name) ComponentArray = np.zeros_like(k.login_name) ManagerArray = np.zeros_like(k.login_name) for i in range(len(k)): if k[i].login_name in org_mapping.keys(): DirectorArray[i] = t[org_mapping[k[i].login_name]].director ComponentArray[i] = t[org_mapping[k[i].login_name]].functional_group ManagerArray[i] = t[org_mapping[k[i].login_name]].manager else: DirectorArray[i] = t[org_mapping["*****@*****.**"]].director ComponentArray[i] = t[org_mapping["*****@*****.**"]].functional_group ManagerArray[i] = t[org_mapping["*****@*****.**"]].manager k = mlab.rec_append_fields(k, "Director", DirectorArray) k = mlab.rec_append_fields(k, "Component", ComponentArray) k = mlab.rec_append_fields(k, "Manager", ManagerArray) mlab.rec2csv(k, outputReportName, delimiter=",", missing="", missingd=None, withheader=True) # Start preparing the data for plotting chartFileName = baseWorkingDirectory + baseFileName + ".png" plotDefectTrend(k, chartFileName, baseFileName) s = "= Overall Defect Trend = \n" wikiContent.append(s) s = wikiImageFileBaseLocation + baseFileName + ".png \n" wikiContent.append(s) # Directors = ('Murali Duvvury','Shankar','Jie Jiang') Directors = list(np.unique(np.array(k.Director))) s = "= Director level Defect Trend = \n" wikiContent.append(s) wikiTableFileName = wikiTableBaseFileName + "_" + params + ".wiki" f = open(wikiTableFileName, "w") hdrList = ("Director", "Open Defects", "Need Info", "Observe", "Resolved-Fixed", "Resolved-Other", "Incoming") printWikiTableOpen(f, hdrList) for Dir in Directors: s = Dir DirFileName = Dir.replace(" ", "_") DirRe = re.compile(s) DirReMatch = np.vectorize(lambda x: bool(DirRe.match(x))) sel = DirReMatch(np.array(k.Director)) chartFileName = baseWorkingDirectory + baseFileName + "-" + DirFileName + ".png" plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + DirFileName) printChurnReport(k[sel], Dir, f) s = wikiImageFileBaseLocation + baseFileName + "-" + DirFileName + ".png \n" wikiContent.append(s) # chartFileName = baseWorkingDirectory + baseFileName + "-" + Dir +".png" # plotDefectTrend(k[k.Director == Dir],chartFileName, baseFileName + '-' + Dir) printWikiTableClose(f) s = "= Component level Defect Trend = \n" wikiContent.append(s) ComponentList = [ ["GSM", "GSM"], ["UI-Configuration", "UI"], ["AP-Platform", "11ac"], ["Switch-Datapath", "Datapath"], ["HA-Lite", "HA-Lite"], ["Switch-Platform", "CIMU"], ["Feature-Bugs", "\w+]"], ] for c in ComponentList: s = "^\[*" + c[1] componentRe = re.compile(s) componentReMatch = np.vectorize(lambda x: bool(componentRe.match(x))) sel = np.logical_or(componentReMatch(np.array(k.short_desc)), k.name == c[0]) chartFileName = baseWorkingDirectory + baseFileName + "-" + c[0] + ".png" plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + c[0]) s = "= Keyword level Defect Trend = \n" wikiContent.append(s) KeywordList = [ ["TC-Blocker", "TC\-blocker"], ["SystemTest", "ST"], ["Smoke", "Smoke\-Failure"], ["CFT", "CFT"], ["MustFix", "MustFix"], ] for keyword in KeywordList: s = keyword[1] keywordRe = re.compile(s) keywordReMatch = np.vectorize(lambda x: bool(keywordRe.match(x))) sel = keywordReMatch(np.array(k.keywords)) chartFileName = baseWorkingDirectory + baseFileName + "-" + keyword[0] + ".png" plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + keyword[0]) # wikiTableFileName = wikiTableBaseFileName + '_' + params + '.wiki' # f = open(wikiTableFileName , 'w') hdrList = ("Manager", "Open Defects", "Need Info", "Observe", "Resolved-Fixed", "Resolved-Other", "Incoming") printWikiTableOpen(f, hdrList) ManagerList = list(np.unique(np.array(k.Manager))) today = datetime.date.today() resolvedDateRange = today + datetime.timedelta(days=-14) s = "= Manager Defect Trend = \n" wikiContent.append(s) for mgr in ManagerList: s = mgr mgrFileName = mgr.replace(" ", "_") mgrRe = re.compile(s) mgrReMatch = np.vectorize(lambda x: bool(mgrRe.match(x))) sel = mgrReMatch(np.array(k.Manager)) chartFileName = baseWorkingDirectory + baseFileName + "-" + mgrFileName + ".png" plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + mgrFileName) printChurnReport(k[sel], mgr, f) s = wikiImageFileBaseLocation + baseFileName + "-" + mgrFileName + ".png \n" wikiContent.append(s) printWikiTableClose(f) f.write("".join(wikiContent)) f.close() plt.close("all")
for i in range(females_needed): ridx = np.random.randint(len(nv_pool)) while sex[nv_pool[ridx]] != '"F"': ridx = np.random.randint(0, len(nv_pool)) subj = nv_pool[ridx] nv_pool.remove(subj) nv_matches.append(subj) base_ages = age[subj][age[subj]<split_age] rbase = base_ages[np.random.randint(len(base_ages))] fu_ages = age[subj][age[subj]>split_age] rfu = fu_ages[np.random.randint(len(fu_ages))] nv_ages.append(rfu - rbase) nv_rows.append([rows[subj][np.nonzero(age[subj]==rbase)[0]], rows[subj][np.nonzero(age[subj]==rfu)[0]]]) # check if we fulfill our requirement pval = np.min([stats.ttest_ind(nv_ages,per_diffs)[1], stats.ttest_ind(nv_ages,rem_diffs)[1]]) #pval = stats.ttest_ind(nv_ages,per_diffs)[1] cnt+=1 if cnt==num_tries: print 'Cannot find good NV set, giving up!' else: good_rows = good_rows + nv_rows # flattening the list good_rows = [i for j in good_rows for i in j] # finally, create new variable and output it to a new file match_bool = np.zeros(len(gf)) for row in good_rows: match_bool[row] = 1 match_bool = mlab.rec_append_fields(gf, var, match_bool) #mlab.rec2csv(match_bool, csv_file[:-4] + '_matched_on' + str(split_age) + '_dsm' + str(dsm) + '_diff.csv')
def _postprocess(self, output_file, gu_poly, generator_list, overwrite=False, supplementary_figures=False, **kwargs): generatoroutputs = [] for g in generator_list: if supplementary_figures: gkw = g.G_kwargs else: gkw = {} generatoroutputs.append(g.gen(*g.G_args, overwrite = overwrite, **gkw)) gu_arr = gen_gu.gen(*gen_gu.G_args, overwrite=overwrite, **gen_gu.G_kwargs) print gu_arr print "merging arrays" out_arr = gen_merge.join_recs_on_keys(gu_arr, generatoroutputs, (BASIN_ID_FIELD, ADMIN_ID_FIELD, GW_ID_FIELD)) sr = ap.SpatialReference(PRJNAME) ap.Project_management(gu_poly,output_file,sr) print out_arr[BASIN_NAME_FIELD] missing_fields = np.setdiff1d(ALL_FIELDS,out_arr.dtype.names) if len(missing_fields)>0: print "WARNING: missing fields %s" % missing_fields obs = len(out_arr[GU_FIELD]) out_arr = mlab.rec_append_fields(out_arr, missing_fields, [np.repeat(np.nan, obs) for _ in missing_fields]) extra_fields = np.setdiff1d(out_arr.dtype.names,ALL_FIELDS) print "dropping extra fields %s" % extra_fields out_arr = mlab.rec_drop_fields(out_arr,extra_fields) print "generating pre-weighted_columns" if WEIGHTING_SCHEMES is not None: new_cols = [] names = [] for n, weights in WEIGHTING_SCHEMES.iteritems(): keys = weights.keys() values = weights.values() indicator_array = np.vstack([out_arr[f] for f in keys]).T indicator_array[indicator_array==NULL_VALUE] = np.nan scores = np.squeeze(np.asarray(aggregate_scores.aggregate_scores(indicator_array,values))) scores[np.isnan(scores)]=NULL_VALUE new_cols.append(scores) names.append(n) out_arr = mlab.rec_append_fields(out_arr, names, new_cols) for field in MAP_FIELDS: out_arr[field][out_arr[field]==""] = "No data" mlab.rec2csv(out_arr,"bin/test.csv") print "dropping fields" drop = [f.baseName for f in ap.ListFields(output_file) if not(f.required) and not(f.baseName == GU_FIELD)] if len(drop)>0: ap.DeleteField_management(output_file,drop) print "joining" ap.da.ExtendTable(output_file,GU_FIELD,out_arr,GU_FIELD) print "indexing" try: ap.AddSpatialIndex_management(output_file) ap.AddIndex_management(output_file,GU_FIELD,GU_FIELD,"UNIQUE") except Exception, e: print e
def readEvents(self): self.mapping = self._readMappingFile() self.pos_list = self._get_positions_dirs() fmt = "%%0%dd" %(len(self.pos_list[0])) for pos_idx, pos_name in enumerate(self.mapping['position']): if isinstance(pos_name, int): pos_name = fmt %pos_name print pos_name, pos_name in self.pos_list if pos_name not in self.pos_list: # raise RuntimeError("Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in)) print "Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in) self._logger.warning("Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in)) continue event_path = os.path.join(self.path_in, pos_name, 'statistics' , 'events') if not os.path.exists(event_path): raise RuntimeError("For position %s no event path found %s" % (pos_name, event_path)) event_file_list = sorted(os.listdir(event_path)) if len(event_file_list) == 0: self._logger.warning("No events found for position %s" % pos_name) continue self._logger.info("Reading Events for position '%s' (%d files)" % (pos_name, len(event_file_list))) hmm_correction_available = False if '_hmm' in event_file_list: hmm_correction_available = True event_file_list.remove('_hmm') for event_file in event_file_list: res = self.EVENT_REGEXP.search(event_file) if res is None: self._logger.warning("Could not parse event file name '%s' for position %s" % (event_file, pos_name)) continue res = res.groupdict() if pos_name != res['pos']: self._logger.error("Event file %s has different pos identifier than %s" % (event_file, pos_name)) raise RuntimeError("Event file %s has different pos identifier than %s" % (event_file, pos_name)) channel = res["channel"] region = res["region"] branch = int(res["branch"]) time = int(res["time"]) obj = int(res["obj"]) if branch != 1: continue if pos_name not in self._positions.keys(): if 'oligoid' in self.mapping.dtype.fields.keys(): self.oligo_header_name = 'oligoid' elif 'sirna_id' in self.mapping.dtype.fields.keys(): self.oligo_header_name = 'sirna_id' else: raise RuntimeError('Mapping file has no header: oligoid or siRNA_id missing') self._positions[pos_name] = Position(plate=self.plate_id, position=self.mapping[pos_idx]['position'], well=self.mapping[pos_idx]['well'], site=self.mapping[pos_idx]['site'], row=self.mapping[pos_idx]['row'], column=self.mapping[pos_idx]['column'], gene_symbol=self.mapping[pos_idx]['gene_symbol'], oligoid=self.mapping[pos_idx][self.oligo_header_name], group=self.mapping[pos_idx]['group'], ) event_id = 'T%03d_O%04d_B%d' % (time, obj, branch) if event_id not in self._positions[pos_name]: self._positions[pos_name][event_id] = {} if channel not in self._positions[pos_name][event_id]: self._positions[pos_name][event_id][channel] = {} filename = os.path.join(event_path, event_file) self._positions[pos_name][event_id][channel][region] = numpy.recfromcsv(filename, delimiter='\t') if hmm_correction_available and region == 'primary': filename = os.path.join(event_path, '_hmm', event_file) if not os.path.exists(filename): raise RuntimeError('HMM correction folder is there but event file not found %s' % filename) class__label__hmm = numpy.recfromcsv(filename, delimiter='\t')['class__b__label'] self._positions[pos_name][event_id][channel][region] = \ rec_append_fields(self._positions[pos_name][event_id][channel][region], 'class__label__hmm', class__label__hmm, numpy.uint8) self.class_label_selector = 'class__label__hmm' self.save(True)
def typeII(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type II sums of squares. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] for name, expr_factors in zip(ancova.contrast_names, ancova.sequence()): expr, factors = expr_factors F = ancova.all_but_above(expr, factors) C = ancova.contrasts[name] XF, contrast_matrices = F.formula.design(recarray, contrasts={'C': C}) modelF = OLS(Y, XF) resultsF = modelF.fit() SSEF = np.sum(resultsF.resid**2) dfF = resultsF.df_resid ftest = resultsF.f_test(contrast_matrices['C']) SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF) dfR = dfF + ftest.df_num sss.append(SSER - SSEF) dfs.append(ftest.df_num) fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], dfR - dfF, df_F)) names.append(name) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result