Exemple #1
0
def rnQ(r0):
    """
    Record Array Normalize Quarter

    Parameters
    ----------
    r0 : Record array with the columns to be normalized.
    
    Returns
    -------
    r  : Light curve with new, median-normalized columns.

    """
    r = r0.copy()

    col = ['SAP_FLUX', 'PDCSAP_FLUX']
    ecol = ['SAP_FLUX_ERR', 'PDCSAP_FLUX_ERR']
    col2 = ['f', 'fpdc']  # Names for the modified columns.
    ecol2 = ['ef', 'efpdc']

    for c, ec, c2, ec2 in zip(col, ecol, col2, ecol2):
        medf = np.median(r[c])
        norm = r[c] / medf - 1
        enorm = r[ec] / medf
        r = mlab.rec_append_fields(r, c2, norm)
        r = mlab.rec_append_fields(r, ec2, enorm)

    return r
Exemple #2
0
def rdt(r0):
    """
    Detrend light curve with GP-based detrending.

    Parameters
    ----------
    r0 : with `f`, `fmask`, `t` fields

    Returns
    -------
    r  : same record array with the following fields:
         label - 0,1,2 identifies groups for spline detrending.
         ftnd  - the best fit trend
         fdt   - f - ftnd.
    """
    r = r0.copy()
    fm = ma.masked_array(r['f'], r['fmask'])
    ftnd = fm.copy()

    rvalid = r[~r['fmask']]
    t = r['t']
    x, y = detrend.bin(rvalid)  # Compute GP using binned lc (speed)
    yi = detrend.GPdt(t, x, y)  # evaluate at all points
    ftnd[:] = yi

    # Assign a label to the segEnd segment
    fdt = fm - ftnd
    r = mlab.rec_append_fields(r, 'ftnd', ftnd.data)
    r = mlab.rec_append_fields(r, 'fdt', fdt.data)
    return r
Exemple #3
0
def calc_SI(cat1, cat2, limit):
    """
    module to use a catalogue at a different frequency, do cross matching, and calculate the spectral index
    The module also looks for multiple matches and assigns the flux of one source matching multiple ones linearly to
    calculate the spectral index. I tonly looks into sources which are not further apart as the limit parameter.

    cat1: The catalogue where you want to add the spectral index to the sources. Usually NVSS or FIRST.
    cat2: The catalogue to match and calculate the spectral index from. Usually WENSS.
    limit: Maximum distance in arcseconds for two sources to match each other.
    returns: cat1 with added spectral indices. Sources with no counterpart where set to -0.7.
    """
    try:  # Handle the exception if the WENSS query did not give any results.
        coords1 = SkyCoord(
            ra=cat1.RA, dec=cat1.DEC, unit=(u.deg, u.deg)
        )  # Convert the coordinates of the two source catalogues to the right format
        coords2 = SkyCoord(ra=cat2.RA, dec=cat2.DEC, unit=(u.deg, u.deg))
        idx, d2d, d3d = coords1.match_to_catalog_sky(
            coords2
        )  # Get the indices of the matches (idx), and their distance on the sky (d2d)
        dist = (d2d * u.deg * 3600) / (u.deg * u.deg)  # Convert to arcsec
        nomatch = np.where(dist > limit)  # Index of sources with no match
        match = np.where(dist <= limit)  # Index of sources with match
        idx_match = idx[match]
        flux1 = np.delete(
            cat1.flux, nomatch
        )  # Array of source fluxes at 20cm for all matches including resolved sources
        flux2 = np.asarray(
            cat2.flux
        )[idx_match]  # Array of source fluxes at 90cm for all matches including multiples
        src, counts = np.unique(idx_match, return_counts=True)
        logging.debug(
            ' Found ' + str(len(np.asarray(nomatch)[0])) +
            ' source(s) with no counterparts. Setting their spectral index to -0.7'
        )
        num, occ = np.unique(counts, return_counts=True)
        for n, g in enumerate(num):
            logging.debug(' Found ' + str(occ[n]) + ' source(s) with ' +
                          str(num[n]) + ' counterpart(s)')
        src_wgt_1 = np.zeros(
            len(flux2)
        )  # Calculate the fluxes for the matched and resolved sources using weighting
        for s in src:
            src_idx = np.where(s == idx_match)
            src_sum_1 = np.sum(flux1[src_idx])
            src_wgt_1[src_idx] = flux1[src_idx] / src_sum_1
        src_flux_2 = flux2 * src_wgt_1
        src_si = np.log10(flux1 / src_flux_2) / np.log10(1.4 / 0.33)
        si = np.zeros(
            len(idx)
        )  # Create the array for the spectral index and put the values into the right position
        si[nomatch] = -0.7
        si[match] = src_si
        si[si < -3] = -0.7
        # Change the value to -0.7 in case of high absolute values. Maybe wrong source match or variable source
        si[si > 2] = -0.7
        cat = mplab.rec_append_fields(cat1, 'SI', si, dtypes=float)
    except Exception:
        # In case the queried area is not covered by WENSS give all sources a spectral index of -0.7.
        cat = mplab.rec_append_fields(cat1, 'SI', -0.7, dtypes=float)
    return cat
Exemple #4
0
def make_footprint(tilefile=defaulttilefile):
    lbound1 = [240, 365]
    lbound2 = [-5, 5]
    bbound = [-4, 4]
    tiles = fits.getdata(tilefile)
    lt, bt = equgal(tiles['ra'], tiles['dec'])
    mpilot = ((bt > bbound[0]) & (bt < bbound[1]) &
              (
                  (lt > lbound1[0]) & (lt < lbound1[1]) |
                  (lt > lbound2[0]) & (lt < lbound2[1])
              ))
    mpilot = extend_footprint_to_matches(tiles, mpilot)
    zeros = numpy.zeros(len(tiles), dtype='i4')
    tiles = rec_append_fields(tiles, ['i_done', 'y_done', 'i_expnum',
                                      'y_expnum', 'in_decaps'],
                              [zeros.copy() for i in xrange(5)])
    zeros = numpy.zeros(len(tiles), dtype='S10')
    tiles = rec_append_fields(tiles, ['i_date', 'y_date'],
                              [zeros.copy() for i in xrange(2)])
    tiles['in_decaps'][mpilot] |= 2**0
    lbound1 = [240, 365]
    lbound2 = [-5, 5]
    bbound = [-10, 10]
    mall = ((bt > bbound[0]) & (bt < bbound[1]) &
            (
                (lt > lbound1[0]) & (lt < lbound1[1]) |
                (lt > lbound2[0]) & (lt < lbound2[1])
            ))
    tiles['in_decaps'][mall] |= 2**1
    tiles.dtype.names = [n.lower() for n in tiles.dtype.names]
    return tiles
Exemple #5
0
 def testCalculateStatistics(self):
     """ builder.sed.calculate_statistics()"""
     dtypes = [('teff','f8'), ('logg','f8'),('ebv','f8'),('rv','f8'),('z','f8'), ('chisq','f8')]
     grid = [array([ 22674.,  21774.,  22813.,  29343., 28170.]),
             array([ 5.75, 6.07,  6.03,  6.38,  5.97]),
             array([ 0.0018, 0.0077,  0.0112,  0.0046,  0.0110]),
             array([ 2.20, 2.40, 2.60, 2.80, 3.00]),
             array([0,0,0,0,0]),      
             array([1.,3.,2.,0.1,10.0])]
     master = np.rec.fromarrays(grid,dtype=dtypes)
     master = mlab.rec_append_fields(master, 'ci_raw', np.zeros(len(master)))
     master = mlab.rec_append_fields(master, 'ci_red', np.zeros(len(master)))
     self.sed.results['igrid_search']['grid'] = master
     self.sed.master['include'] = [True,True,False,True,True]
     
     with mock.patch.object(builder.SED, 'calculateDF', return_value=5) as mock_method:
         self.sed.calculate_statistics(df=5)
     
     res = self.sed.results['igrid_search']
     raw = [0.6826894, 0.9167354, 0.8427007, 0.2481703, 0.9984345]
     red = [0.2481703, 0.4161175, 0.3452791, 0.0796556, 0.6826894]
     
     self.assertFalse(mock_method.called)
     self.assertArrayAlmostEqual(res['grid']['ci_raw'].tolist(), raw, places=5)
     self.assertArrayAlmostEqual(res['grid']['ci_red'].tolist(), red, places=5)
     self.assertEqual(res['factor'], 10.0)
Exemple #6
0
def random_recarray(size):
    initial = np.empty(size, dtype=np.dtype([('Y', np.float)]))
    initial['Y'] = np.random.standard_normal(size)
    numeric_vars = [np.random.standard_normal(size) for _ in range(10)]
    categorical_vars = [random_letters(size, l) for l in [3,4,7,6,4,5,8]]
    inter = ML.rec_append_fields(initial, ['n%s' % l for l in uppercase[:10]], numeric_vars)
    final = ML.rec_append_fields(inter, ['c%s' % l for l in uppercase[:len(categorical_vars)]], categorical_vars)
    return final, sympy.symbols(['n%s' % l for l in uppercase[:10]]), [Factor('c%s' % s, np.unique(l)) for s, l in zip(uppercase[:len(categorical_vars)],
                                                                                                                       categorical_vars)]
Exemple #7
0
def tdpep(t,fm,PG0):
    """
    Transit-duration - Period - Epoch

    Parameters 
    ---------- 
    fm  : Flux with bad data points masked out.  It is assumed that
          elements of f are evenly spaced in time.
    PG0 : Initial period grid.

    Returns
    -------

    epoch2d : Grid (twd,P) of best epoch 
    df2d    : Grid (twd,P) of depth epoch 
    count2d : number of filled data for particular (twd,P)
    noise   : Grid (twd) typical scatter 
    PG      : The Period grid
    twd     : Grid of trial transit widths.

    """
    assert fm.fill_value ==0
    # Determine the grid of periods that corresponds to integer
    # multiples of cadence values
    PcadG,PG = P2Pcad(PG0)
       
    # Initialize tdur grid.  
    twdMi = a2tdur( P2a( PG[0 ] ) ) /keptoy.lc
    twdMa = a2tdur( P2a( PG[-1] ) ) /keptoy.lc
    twdG = np.round(np.linspace(twdMi,twdMa,4)).astype(int)

    rec2d = []
    noise = []
    for twd in twdG:
        dM = mtd(t,fm.filled(),twd)
        dM.mask = fm.mask | ~isfilled(t,fm,twd)
        rec2d.append( pep(t[0],dM,PcadG) )

        # Noise per transit 
        mad = ma.abs(dM)
        mad = ma.median(mad)
        noise.append(mad)

    rec2d = np.vstack(rec2d)

    make2d = lambda x : np.tile( np.vstack(x), (1,rec2d.shape[1] ))
    rec2d = mlab.rec_append_fields(rec2d,'noise',make2d(noise))
    rec2d = mlab.rec_append_fields(rec2d,'twd',  make2d(twdG))

    PG = np.tile( PG, (rec2d.shape[0],1 ))
    rec2d = mlab.rec_append_fields(rec2d,'PG',PG)

    s2n   = rec2d['fom']/rec2d['noise']*rec2d['count']
    rec2d = mlab.rec_append_fields(rec2d,'s2n',  s2n )
    return rec2d
Exemple #8
0
def random_from_terms_factors(terms, factors, size):
    dtype = np.dtype([(str(t), np.float) for t in terms] + [(f.name,'S30') for f in factors])
    data = np.empty(size, 
                    np.dtype([(str(terms[0]), np.float)]))
    data[str(terms[0])] = np.random.standard_normal(size)
    for t in terms[1:]:
        data = ML.rec_append_fields(data, str(t), 
                                    np.random.standard_normal(size))
    for f in factors:
        data = ML.rec_append_fields(data, f.name, random_from_factor(f, size))
    return data
Exemple #9
0
def random_from_terms_factors(terms, factors, size):
    dtype = np.dtype([(str(t), np.float) for t in terms] + [(f.name, 'S30')
                                                            for f in factors])
    data = np.empty(size, np.dtype([(str(terms[0]), np.float)]))
    data[str(terms[0])] = np.random.standard_normal(size)
    for t in terms[1:]:
        data = ML.rec_append_fields(data, str(t),
                                    np.random.standard_normal(size))
    for f in factors:
        data = ML.rec_append_fields(data, f.name, random_from_factor(f, size))
    return data
Exemple #10
0
def random_recarray(size):
    initial = np.empty(size, dtype=np.dtype([('Y', np.float)]))
    initial['Y'] = np.random.standard_normal(size)
    numeric_vars = [np.random.standard_normal(size) for _ in range(10)]
    categorical_vars = [random_letters(size, l) for l in [3, 4, 7, 6, 4, 5, 8]]
    inter = ML.rec_append_fields(initial, ['n%s' % l for l in uppercase[:10]],
                                 numeric_vars)
    final = ML.rec_append_fields(
        inter, ['c%s' % l for l in uppercase[:len(categorical_vars)]],
        categorical_vars)
    return final, sympy.symbols(['n%s' % l for l in uppercase[:10]]), [
        Factor('c%s' % s, np.unique(l))
        for s, l in zip(uppercase[:len(categorical_vars)], categorical_vars)
    ]
def add_timespan(recarray):
    ''' input: array of dates as strings
        output: datetime list, dt relative to first date [days]'''
    pltdates = np.array([datetime.datetime.strptime(date,'%Y/%m/%d') for date in recarray.date])
    
    dt = np.cumsum(np.diff(pltdates)) #could do array of total seconds or decimal years
    dt = np.insert(dt,0,datetime.timedelta(0))
    
    roidates = np.array([D.strftime('%y%m%d') for D in pltdates])

    recarray = mlab.rec_append_fields(recarray,'dt',dt,object)
    recarray = mlab.rec_append_fields(recarray,'pltdate',pltdates,object) # append datetime objects as new field
    recarray = mlab.rec_append_fields(recarray,'roidate',roidates, roidates.dtype)
    #print "appended 'dt' and 'pltdate' fields"
    return recarray
Exemple #12
0
def channel_transform(fitsfiles, h5file, iref= None):
    """
    Channel Transformation

    Take a list of k2 pixel files (must be from the same
    channel). Find the centroids of each image and solve for the
    linear transformation that takes one scene to another
    """
    nstars = len(fitsfiles)

    # Pull the first file to get length and data type
    fitsfile0 = fitsfiles[0]
    cent0 = fits_to_chip_centroid(fitsfile0)
    channel = get_channel(fitsfile0)
    print "Using channel = %i" % channel

    # Determine the refence frame
    if iref==None:
        dfcent0 = pd.DataFrame(LE(cent0))
        ncad = len(dfcent0)
        med = dfcent0.median()
        dfcent0['dist'] = (
            (dfcent0['centx'] - med['centx'])**2 +
            (dfcent0['centy'] - med['centy'])**2
            )
        dfcent0 = dfcent0.iloc[ncad/4:-ncad/4]
        dfcent0 = dfcent0.dropna(subset=['centx','centy'])
        iref = dfcent0['dist'].idxmin()
    
    print "using reference frame %i" % iref
    assert np.isnan(cent0['centx'][iref])==False,\
        "Must select a valid reference cadence. No nans"

    cent = np.zeros((nstars,cent0.shape[0]), cent0.dtype)
    for i,fitsfile in enumerate(fitsfiles):
        if (i%10)==0:
            print i
        cent[i] = fits_to_chip_centroid(fitsfile)
        channel_i = get_channel(fitsfile)
        assert channel==channel_i,"%i != %i" % (channel, channel_i)

    trans,pnts = imtran.linear_transform(cent['centx'],cent['centy'],iref)
    trans = pd.DataFrame(trans)
    trans = pd.concat([trans,pd.DataFrame(LE(cent0))[['t','cad']]],axis=1)
    trans = trans.to_records(index=False)

    keys = cent.dtype.names
    pnts = mlab.rec_append_fields(pnts,keys,[cent[k] for k in keys])

    if h5file!=None:
        with h5plus.File(h5file) as h5:
            h5['trans'] = trans
            h5['pnts'] = pnts
            
    trans,pnts = read_channel_transform(h5file)
    plot_trans(trans, pnts)
    figpath = h5file[:-3] + '.png'
    plt.gcf().savefig(figpath)
    print "saving %s " % figpath
    return cent
Exemple #13
0
def testR(d=simple(), size=500):

    X = random_from_categorical_formula(d, size)

    X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
    fname = tempfile.mktemp()
    ML.rec2csv(X, fname)
    Rstr = '''
    data = read.table("%s", sep=',', header=T)
    cur.lm = lm(response ~ %s, data)
    COEF = coef(cur.lm)
    ''' % (fname, d.Rstr)
    rpy2.robjects.r(Rstr)
    remove(fname)
    nR = list(np.array(rpy2.robjects.r("names(COEF)")))

    nt.assert_true('(Intercept)' in nR)
    nR.remove("(Intercept)")
    nF = [str(t).replace("_","").replace("*",":") for t in d.formula.terms]
             
    nR = sorted([sorted(n.split(":")) for n in nR])

    nt.assert_true('1' in nF)
    nF.remove('1')

    nF = sorted([sorted(n.split(":")) for n in nF])
    nt.assert_equal(nR, nF)

    return d, X, nR, nF
Exemple #14
0
def read(system='WFC3', dir='/n/fink1/schlafly/mist/bcs'):
    files = list(util_efs.locate('*%s' % system, root=dir))
    res = []
    for file in files:
        lastline = ''
        fehline = -1
        if file[-3:] == 'iso':
            fehline = 4
        with open(file, 'r') as fp:
            for i, line in enumerate(fp):
                if line[0] != '#':
                    break
                if i == fehline:
                    isoline = line
                lastline = line
        if fehline > 0:
            feh = float(isoline.split()[3])
        names = lastline.split()[1:]
        grid = ascii.read(file, comment='#', names=names).as_array()
        if fehline > 0:
            grid = rec_append_fields(grid, '[Fe/H]',
                                     feh * numpy.ones(len(grid), dtype='f4'))
        if 'Av' in grid.dtype.names:
            m = grid['Av'] == 0
            res.append(grid[m])
        else:
            res.append(grid)
    return numpy.concatenate(res)
Exemple #15
0
def join_rec(r1,field1,r2,field2):
    """1-to-1 joining with non-unique lefthand side keys"""
    mapping = dict(zip(r2[field2], range(len(r2))))
    diff = np.setdiff1d(r1[field1],r2[field2])
    r2len = len(r2)
    if len(diff) > 0:
        print "WARNING: %s no matching key: %s" % (field2, diff)
        for i in range(len(diff)):
            mapping[diff[i]]=r2len
    r2copy = mlab.rec_drop_fields(r2, (field2,))
    r2copy.resize(r2len+1)
    joinfields = list(r2copy.dtype.names)
    dtypes = []
    for i in range(len(joinfields)):
        if r2copy.dtype[i].kind == "i":
            dtypes.append(np.double)
        else:
            dtypes.append(r2copy.dtype[i])
        if r2copy.dtype[i].kind == "f":
            r2copy[r2copy.dtype.names[i]][-1]=NULL_VALUE
        while joinfields[i] in r1.dtype.names:
            joinfields[i] = joinfields[i]+"_"
    rightrec = r2copy[[mapping[key] for key in r1[field1]]]
    r1 = mlab.rec_append_fields(r1, joinfields, [rightrec[n] for n in rightrec.dtype.names], dtypes)
    return r1
def gethistprices(query, numrows=1000, **kwargs):
    
    rec_arr = sqlite2rec(query, **kwargs)

    import matplotlib.mlab as mlab
    
    import numpy as np

    (syms, posuniq, pos) = np.unique(rec_arr.sym, True, True)
    
    new_rec_arr = mlab.rec_append_fields(rec_arr, 'idx', pos)
    
    nosym = mlab.rec_drop_fields(new_rec_arr, ['sym',])
    
    recnumrecs = mlab.rec_groupby(nosym, ('idx',), (('idx', len, 'idxcount'), ))

    idx = np.nonzero(recnumrecs.idxcount >= numrows)[0]

    idxcount = len(recnumrecs[idx])

    xs = np.empty((idxcount, numrows, len(nosym[0])-1), dtype=float)

    for i in xrange(idxcount):

        if kwargs.has_key('verbose') and kwargs['verbose'] and i % 50 == 0:
            print '%d of %d' % (i, idxcount)
        
        curdata = nosym[nosym.idx == idx[i]]

        curdata_arr = np.array(curdata.tolist(), dtype=float)
        xs[i] = curdata_arr[0:numrows:,0:-1]
        
    return (syms[idx], xs)
Exemple #17
0
def testR(d=simple(), size=500):

    X = random_from_categorical_formula(d, size)

    X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
    fname = tempfile.mktemp()
    ML.rec2csv(X, fname)
    Rstr = '''
    data = read.table("%s", sep=',', header=T)
    cur.lm = lm(response ~ %s, data)
    COEF = coef(cur.lm)
    ''' % (fname, d.Rstr)
    rpy2.robjects.r(Rstr)
    remove(fname)
    nR = list(np.array(rpy2.robjects.r("names(COEF)")))

    nt.assert_true('(Intercept)' in nR)
    nR.remove("(Intercept)")
    nF = [str(t).replace("_", "").replace("*", ":") for t in d.formula.terms]

    nR = sorted([sorted(n.split(":")) for n in nR])

    nt.assert_true('1' in nF)
    nF.remove('1')

    nF = sorted([sorted(n.split(":")) for n in nF])
    nt.assert_equal(nR, nF)

    return d, X, nR, nF
Exemple #18
0
def pixelizeCatalog(infiles, config, force=False):
    """
    Break catalog into chunks by healpix pixel.
    
    Parameters:
    -----------
    infiles : List of input files
    config  : Configuration file
    force   : Overwrite existing files (depricated)
    
    Returns:
    --------
    None
    """
    nside_catalog = config['coords']['nside_catalog']
    nside_pixel = config['coords']['nside_pixel']
    outdir = mkdir(config['catalog']['dirname'])
    filenames = config.getFilenames()

    for i, filename in enumerate(infiles):
        logger.info('(%i/%i) %s' % (i + 1, len(infiles), filename))
        data = fitsio.read(filename)
        logger.info("%i objects found" % len(data))
        if not len(data): continue

        glon, glat = cel2gal(data['RA'], data['DEC'])
        cat_pix = ang2pix(nside_catalog, glon, glat)
        pix_pix = ang2pix(nside_pixel, glon, glat)
        cat_pix_name = 'PIX%i' % nside_catalog
        pix_pix_name = 'PIX%i' % nside_pixel

        data = mlab.rec_append_fields(
            data,
            names=['GLON', 'GLAT', cat_pix_name, pix_pix_name],
            arrs=[glon, glat, cat_pix, pix_pix],
            dtypes=['f4', 'f4', int, int])

        for pix in np.unique(cat_pix):
            logger.debug("Processing pixel %s" % pix)

            arr = data[cat_pix == pix]
            outfile = filenames.data['catalog'][pix]

            if not os.path.exists(outfile):
                logger.debug("Creating %s" % outfile)
                out = fitsio.FITS(outfile, mode='rw')
                out.write(arr)
                hdr = ugali.utils.healpix.header_odict(nside=nside_catalog,
                                                       coord='G')
                for key in ['PIXTYPE', 'ORDERING', 'NSIDE', 'COORDSYS']:
                    out[1].write_key(*list(hdr[key].values()))
                out[1].write_key('PIX',
                                 pix,
                                 comment='HEALPIX pixel for this file')
            else:
                out = fitsio.FITS(outfile, mode='rw')
                out[1].append(arr)

            logger.debug("Writing %s" % outfile)
            out.close()
Exemple #19
0
def rsQ(rL):
    """
    Stitch Quarters together.

    Fills in missing times and cadences with their proper values.  It
    assigns placeholder values for other columns.
    - floats --> nan
    - bools  --> True

    Parameters
    ----------
    rL : List of record arrays

    Returns
    -------
    rLC : Record array of all the joined quarters.


    Notes
    -----
    Will join put quarters in the proper order

    """
    startTimes = np.array([r['t'][0] for r in rL])
    sid = np.argsort(startTimes)
    rL = list(np.array(rL)[sid])

    # Figure out which cadences are missing and fill them in.
    cad = [r['cad'] for r in rL]
    cad = np.hstack(cad)
    cad, iFill = cadFill(cad)
    nFill = cad.size

    rLC = np.rec.fromarrays([cad], names='cad')

    # Add all the columns from the FITS file.
    fitsname = rL[0].dtype.fields.keys()
    fitsname.remove('cad')

    for fn in fitsname:
        col = [r[fn] for r in rL]  # Column in list form
        col = np.hstack(col)
        # Fill new array elements
        if col.dtype is np.dtype('bool'):
            fill_value = True
        else:
            fill_value = np.nan

        ctemp = np.empty(nFill, dtype=col.dtype)  # Temporary column
        ctemp[::] = fill_value
        ctemp[iFill] = col
        rLC = mlab.rec_append_fields(rLC, fn, ctemp)

    # nanTime doesn't work here because I've update the "cad" field
    tm = ma.masked_invalid(rLC['t'])
    cad, rLC['t'] = detrend.maskIntrp(rLC['cad'], tm)

    return rLC
Exemple #20
0
def channel_transform(fitsfiles, h5file, iref=None):
    """
    Channel Transformation

    Take a list of k2 pixel files (must be from the same
    channel). Find the centroids of each image and solve for the
    linear transformation that takes one scene to another
    """
    nstars = len(fitsfiles)

    # Pull the first file to get length and data type
    fitsfile0 = fitsfiles[0]
    cent0 = fits_to_chip_centroid(fitsfile0)
    channel = get_channel(fitsfile0)
    print "Using channel = %i" % channel

    # Determine the refence frame
    if iref == None:
        dfcent0 = pd.DataFrame(LE(cent0))
        ncad = len(dfcent0)
        med = dfcent0.median()
        dfcent0['dist'] = ((dfcent0['centx'] - med['centx'])**2 +
                           (dfcent0['centy'] - med['centy'])**2)
        dfcent0 = dfcent0.iloc[ncad / 4:-ncad / 4]
        dfcent0 = dfcent0.dropna(subset=['centx', 'centy'])
        iref = dfcent0['dist'].idxmin()

    print "using reference frame %i" % iref
    assert np.isnan(cent0['centx'][iref])==False,\
        "Must select a valid reference cadence. No nans"

    cent = np.zeros((nstars, cent0.shape[0]), cent0.dtype)
    for i, fitsfile in enumerate(fitsfiles):
        if (i % 10) == 0:
            print i
        cent[i] = fits_to_chip_centroid(fitsfile)
        channel_i = get_channel(fitsfile)
        assert channel == channel_i, "%i != %i" % (channel, channel_i)

    trans, pnts = imtran.linear_transform(cent['centx'], cent['centy'], iref)
    trans = pd.DataFrame(trans)
    trans = pd.concat([trans, pd.DataFrame(LE(cent0))[['t', 'cad']]], axis=1)
    trans = trans.to_records(index=False)

    keys = cent.dtype.names
    pnts = mlab.rec_append_fields(pnts, keys, [cent[k] for k in keys])

    if h5file != None:
        with h5plus.File(h5file) as h5:
            h5['trans'] = trans
            h5['pnts'] = pnts

    trans, pnts = read_channel_transform(h5file)
    plot_trans(trans, pnts)
    figpath = h5file[:-3] + '.png'
    plt.gcf().savefig(figpath)
    print "saving %s " % figpath
    return cent
Exemple #21
0
def rec_zip(rL):
    """
    """
    ro = rL[0]
    for i in range(1, len(rL)):
        fields = list(rL[i].dtype.names)
        vals = [rL[i][f] for f in fields]
        ro = mlab.rec_append_fields(ro, fields, vals)
    return ro
def add_bperp(recarray):
    ''' input: position vector [m], velocity vector [m], offnadir angle [deg]
        output: perpendicular baseline [m] '''
    pos = np.vstack((recarray.x, recarray.y, recarray.z)).T * 1000
    vel = np.vstack((recarray.dx, recarray.dy, recarray.dz)).T * 1000
    
    x,y,z = np.hsplit(pos,3) #if pos.shape = (7,3)
    #dx,dy,dz = np.hsplit(vel,3)
    #x,y,z = pos[0],pos[1],pos[2] #pos is (3,7)
    offnadir = np.radians(recarray.offnadir)
    
    # Get mean parameters of satellite ((1,n) row vector)
    pos0 = np.mean(pos,0).reshape(1,-1) #average position of all acquisitions
    xm,ym,zm = pos0.flat
    vel0 = np.mean(vel,0).reshape(1,-1)
    
    # get geodetic lat/lon/height (above wgs84) of satellite
    ecef = pyproj.Proj(proj='geocent',  ellps='WGS84', datum='WGS84')
    wgs84 = pyproj.Proj(proj='latlong', ellps='WGS84', datum='WGS84')
    lon, lat, h = pyproj.transform(ecef, wgs84, x, y, z, radians=True)
    h_ave = np.mean(h)
    
    # Convert geocentric coordinates to average ENU in plane of satellite
    xl,yl,zl = ecef2enu(pos, pos0)
    
    # Calculate travel direction in ENU coordinates from differencing unit velocity motion  
    # NOTE: not sure why this works..., need to *1000 to keep consistent w/ matlab code, but units don't exactly match
    pos1 = pos0 + vel0/np.linalg.norm(vel0)*1000
    pos2 = pos0 - vel0/np.linalg.norm(vel0)*1000
    p1,p2,p3 = ecef2enu(pos1, pos0)
    q1,q2,q3 = ecef2enu(pos2, pos0)
    vxl = q1 - p1
    vyl = q2 - p2
    vzl = q3 - p3
    
    # Along-track direction
    trackdir = np.arctan(vxl/vyl)
    
    # Calculate perpendicular baseline
    Bx = xl-xl[0]
    By = yl-yl[0]
    zr = h.flat-h_ave
    Br = zr-zr[0]
    #Bv = zl-zl[0]
    Bh = Bx*np.cos(trackdir) - By*np.sin(trackdir)
    
    Bperp = Bh*np.cos(offnadir) + Br*np.sin(offnadir)
    Bpara = Bh*np.sin(offnadir) - Br*np.cos(offnadir)
    
	# Match ROI_PAC Baseline sign convention for ascending data
    if trackdir < 0:
		Bperp = -Bperp

    recarray = mlab.rec_append_fields(recarray,'bperp',Bperp,float)
    #print "appended 'bperp' field"
    return recarray
Exemple #23
0
def load_aeronet(fname, keep_fields='all', header=False):
    """loads aeronet lev 2.0 csv file.
    fname: data file name
    keep_fields: 'all' or a list of fields
    header: whether to return header information along with the data.
    """
    std_day = datetime(1900,1,1,0,0,0)
    def date2daynum(datestr):
        the_day = datetime.strptime(datestr, '%d:%m:%Y')
        return float((the_day - std_day).days)

    def time2seconds(timestr):
        h, m, s = [int(t) for t in timestr.split(':')]
        return float(h * 3600 + m * 60 + s)

    def daynum_seconds2datetime(daynum, seconds):
        return std_day + timedelta(days=int(daynum), seconds=int(seconds))

    headlines = []
    f = open(fname, 'r')
    for line_i, line in enumerate(f):
        line = line.rstrip()
        if line.startswith('Date(dd-mm-yy'):
            datefield, timefield = [re.sub(r'\W', '', tk) for tk in line.split(',')[0:2]]
            break
        headlines.append(line)
    skip_header_lines = line_i

    if header:
        headline = ','.join(headlines)
        headerd = dict()
        for attrname, converter in [('location', str), ('long', float), ('lat', float), ('elev', float), ('nmeas', int), ('PI', str), ('email', str)]:
            m = re.search(r'%s.{0,1}=([^,\s]*)' % attrname, headline, flags=re.I)
            if m:
                try:
                    headerd[attrname] = converter(m.group(1))
                except Exception:
                    pass

    rawd = np.genfromtxt(fname, skip_header=skip_header_lines, delimiter=',', names=True, converters={0:date2daynum, 1:time2seconds})
    lend = len(rawd)
    dates = np.zeros(len(rawd), dtype='O')
    for i in range(lend):
        dates[i] = daynum_seconds2datetime(rawd[datefield][i], rawd[timefield][i])

    newd = mlab.rec_append_fields(rawd, 'datetime', dates)
    newd = mlab.rec_drop_fields(newd, [datefield, timefield, 'Last_Processing_Date'])

    if keep_fields is not 'all':
        keep_fields = ['datetime'] + keep_fields
#        print keep_fields
        newd = mlab.rec_keep_fields(newd, keep_fields)
    if header:
        return newd, headerd
    else:
        return newd
Exemple #24
0
def typeIII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    with type III sum of squares
    from a given ANCOVA formula.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    X = ancova.formula.design(recarray, return_float=True)
    Y = recarray[response]
    model = OLS(Y, X)

    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    fs = []
    dfs = []
    sss = []
    pvals = []
    for contrast in ancova.contrast_names:
        r = results.f_test(ancova.contrast_matrices[contrast])
        names.append(contrast)
        fs.append(r.fvalue)
        dfs.append(r.df_num)
        pvals.append(r.pvalue)
        sss.append(r.fvalue * results.scale * r.df_num)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result
Exemple #25
0
def calc_offset(infile, cat):
    """
    calc_offset: Calculate the offset of the catalogue entries towards the pointing centre
    infile: Input MIRIAD uv-file
    cat: Input catalogue of sources to calculate the offset for
    returns: A catalogue with the offsets for the individual sources
    """
    ra_off = (cat.RA - getradec(infile).ra.deg) * 3600.0 * np.cos(getradec(infile).dec.rad)
    dec_off = (cat.DEC - getradec(infile).dec.deg) * 3600.0
    cat = mplab.rec_append_fields(cat, ['RA_off', 'DEC_off'], [ra_off, dec_off], dtypes=[float, float])
    return cat
Exemple #26
0
    def draw_paths(self, filename, **kwargs):
        """Draw a text file containing multiple polygons"""
        try:
            data = np.genfromtxt(filename, names=['ra', 'dec', 'poly'])
        except ValueError:
            data = np.genfromtxt(filename, names=['ra', 'dec'])
            data = mlab.rec_append_fields(data, 'poly', np.zeros(len(data)))

        for p in np.unique(data['poly']):
            poly = data[data['poly'] == p]
            self.draw_path_radec(poly['ra'], poly['dec'], **kwargs)
Exemple #27
0
def to_hdf(ec,h5file):
    with h5plus.File(h5file) as h5:
        for k in dsetkeys:
            h5[k] = getattr(ec,k)

        r = ec.dfAc_st.to_records()
        rless = mlab.rec_drop_fields(r,['index'])
        sindex = r['index'].astype(str)
        r = mlab.rec_append_fields(rless,'index', sindex)
        h5.attrs['dfAc_st'] = r
        h5.attrs['kAs'] = ec.kAs
Exemple #28
0
def typeIII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    with type III sum of squares
    from a given ANCOVA formula.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    X = ancova.formula.design(recarray, return_float=True)
    Y = recarray[response]
    model = OLS(Y, X)

    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    fs = []
    dfs = []
    sss = []
    pvals = []
    for contrast in ancova.contrast_names:
        r = results.f_test(ancova.contrast_matrices[contrast])
        names.append(contrast)
        fs.append(r.fvalue)
        dfs.append(r.df_num)
        pvals.append(r.pvalue)
        sss.append(r.fvalue * results.scale * r.df_num)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result
    def with_new_field(self, name, data, mask_arr=None, mask_fn=None):
        recarray = self.arr.view(np.recarray)
        recarray = mlab.rec_append_fields(recarray, name, data)
        if mask_arr:
            data = np.ma.array(recarray, mask=mask_arr)
        else:
            data = recarray

        r = data.view(mrecords.mrecarray)       
        if mask_fn:
            mask_fn(r)

        return TimeSeries(r)
Exemple #30
0
def val(tLC,tRES,nCheck=50,ver=True):
    # Unpack array from table.

    t  = tLC.t
    fm = ma.masked_array(tLC.f-tLC.fcbv,mask=tLC.fmask)

    tres = tRES.data    
    tres = mlab.rec_append_fields(tres,['P','tdur','df'], \
        [tres['PG'],tres['twd']*keptoy.lc,tres['fom']])
    
    sid  = np.argsort(-tres['s2n'])
    tres = tres[sid][:nCheck]
    rval = tval.val(t,fm,tres)
    tVAL = qalg.rec2tab(rval)
    return tVAL
Exemple #31
0
def dict_list_to_frame(dict_list):
    df = pd.DataFrame(dict_list)
    d0 = dict( df.iloc[0] )
    goodkeys = [ k for k in d0.keys() if (type(d0[k])!=fits.card.Undefined)]
    df = df[goodkeys]

#    comb = pdplus.df_to_rec_strings(df)
    dfs = df.select_dtypes(include=['object'])
    dfns = df.select_dtypes(exclude=['object'])
    dfs = rec.fromarrays(np.array(dfs).astype('S100').T,names=list(dfs.columns))

    names = list(dfns.columns)
    arrs = [dfns[n] for n in names]
    comb = mlab.rec_append_fields(dfs,names,arrs)

    return comb
Exemple #32
0
    def _defineVariables(self):
        """
        Helper funtion to define pertinent variables from catalog data.

        ADW (20170627): This has largely been replaced by properties.
        """
        logger.info('Catalog contains %i objects' % (len(self.data)))

        mc_source_id_field = self.config['catalog']['mc_source_id_field']
        if mc_source_id_field is not None:
            if mc_source_id_field not in self.data.dtype.names:
                array = np.zeros(len(self.data), dtype=int)
                self.data = mlab.rec_append_fields(self.data,
                                                   names=mc_source_id_field,
                                                   arrs=array)
            logger.info('Found %i simulated objects' %
                        (np.sum(self.mc_source_id > 0)))
Exemple #33
0
 def append(self, picker_list, **kwargs):
     """Resize my data and add in the data from Pickers in picker_list
     note: equality test fails on picker2 for some reason 
     
     Will also add a new column if you specify.
     
     Usage:
     p1.append([p2, p3], ratname=(1,2,3))
     
     Now p1 has all of the data from p1, p2, and p3.
     p1['ratname'] is 1, 2, or 3, depending on the source.        
     """
     # Calculate new size and resize
     old_length = len(self)
     new_length = old_length + np.sum(len(p) for p in picker_list)
     new_data = np.resize(self._data, (new_length,))
     
     # Store data from each new picker
     row_idx = old_length
     for picker in picker_list:
         new_data[row_idx:row_idx+len(picker._data)] = picker._data
         row_idx += len(picker._data)
     
     # optionally add a new column
     if len(kwargs) > 0:
         if len(kwargs) > 1: print "warning: too many arguments"
         
         # get the name of the new field and the labels for each Picker
         fieldname = kwargs.keys()[0]
         labels = kwargs[fieldname]
         
         # create the new column and store the labels for each Picker
         newcolumn = np.empty(shape=(new_length,), dtype=np.int)
         newcolumn[:old_length] = labels[0]
         row_idx = old_length
         for label, picker in zip(labels[1:], picker_list):
             newcolumn[row_idx:row_idx+len(picker._data)] = label
             row_idx += len(picker._data)
         
         # store the new column
         new_data = mlab.rec_append_fields(new_data, fieldname, newcolumn)
     
     # overwrite my data with the new version
     self._data = new_data
Exemple #34
0
    def append(self, picker_list, **kwargs):
        """Resize my data and add in the data from Pickers in picker_list
        note: equality test fails on picker2 for some reason 
        
        Will also add a new column if you specify.
        
        Usage:
        p1.append([p2, p3], ratname=(1,2,3))
        
        Now p1 has all of the data from p1, p2, and p3.
        p1['ratname'] is 1, 2, or 3, depending on the source.        
        """
        # Calculate new size and resize
        old_length = len(self)
        new_length = old_length + np.sum(len(p) for p in picker_list)
        new_data = np.resize(self._data, (new_length, ))

        # Store data from each new picker
        row_idx = old_length
        for picker in picker_list:
            new_data[row_idx:row_idx + len(picker._data)] = picker._data
            row_idx += len(picker._data)

        # optionally add a new column
        if len(kwargs) > 0:
            if len(kwargs) > 1: print("warning: too many arguments")

            # get the name of the new field and the labels for each Picker
            fieldname = list(kwargs.keys())[0]
            labels = kwargs[fieldname]

            # create the new column and store the labels for each Picker
            newcolumn = np.empty(shape=(new_length, ), dtype=np.int)
            newcolumn[:old_length] = labels[0]
            row_idx = old_length
            for label, picker in zip(labels[1:], picker_list):
                newcolumn[row_idx:row_idx + len(picker._data)] = label
                row_idx += len(picker._data)

            # store the new column
            new_data = mlab.rec_append_fields(new_data, fieldname, newcolumn)

        # overwrite my data with the new version
        self._data = new_data
Exemple #35
0
def modcols(r0):
    """
    Modify Columns

    1. Changes TIME, CADENCENO to t, cad
    2. rnQ      - normalize quarter
    3. rnanTime - remove nans from time series
    """

    r = r0.copy()
    oldName = ['TIME', 'CADENCENO']
    newName = ['t', 'cad']
    for o, n in zip(oldName, newName):
        r = mlab.rec_append_fields(r, n, r[o])
        r = mlab.rec_drop_fields(r, o)

    r = keplerio.rnQ(r)
    r = keplerio.rnanTime(r)
    return r
Exemple #36
0
def calc_appflux(infile, cat, beam):
    """
    calc_appflux: module to calculate the apparent fluxes of sources from an input catalogue using primary beam correction
    infile: Input MIRIAD uv-file
    cat: catalogue (most likely from query_catalogue)
    beam: the beam type to correct for. Only 'WSRT' allowed at the moment
    returns: an extended catalogue file including the distances RA- and DEC-offsets and apparent fluxes from th
             pointing centre
    """
    if beam == 'WSRT':  # Check which beam model to use. APERTIF going to be included later.
        logging.info(' Using standard WSRT beam for calculating apparent fluxes!')
    else:
        logging.info(' Beam model not supported yet! Using standard WSRT beam instead!')
    sep = cat.dist
    appflux = np.zeros((len(cat)))
    for c in range(0, len(cat)):  # calculate the apparent flux of the sources
        appflux[c] = (cat.flux[c]) * wsrtBeam(sep[c], getfreq(infile))
    cat = mplab.rec_append_fields(cat, ['appflux'], [appflux], dtypes=[float])
    return cat
Exemple #37
0
    def createAssociations(self):
        objects = self.objects

        tol = self.config['search']['proximity']
        columns = odict()

        names = np.empty(len(objects), dtype=object)
        names.fill('')
        for i, refs in enumerate(self.config['search']['catalogs']):
            i += 1
            catalog = SourceCatalog()
            for ref in refs:
                print ref
                catalog += catalogFactory(ref)

            # String length (should be greater than longest name)
            length = len(max(catalog['name'], key=len)) + 1
            dtype = 'S%i' % length
            fitstype = '%iA' % length

            assoc = np.empty(len(objects), dtype=dtype)
            assoc.fill('')
            angsep = np.zeros(len(objects), dtype=np.float32)
            idx1, idx2, sep = catalog.match(objects['GLON'],
                                            objects['GLAT'],
                                            tol=tol)
            assoc[idx1] = catalog['name'][idx2].astype(dtype)
            angsep[idx1] = sep
            columns['ASSOC%i' % i] = assoc
            columns['ANGSEP%i' % i] = angsep

            if length > objects['NAME'].itemsize:
                logger.warning("Association name may not fit.")
            names = np.where(names == '', assoc, names)
        names = names.astype(objects['NAME'].dtype)
        objects['NAME'][:] = np.where(names == '', objects['NAME'], names)
        objects['NAME'][:] = np.char.replace(objects['NAME'], '_', ' ')

        self.assocs = mlab.rec_append_fields(objects, columns.keys(),
                                             columns.values())
        self.assocs = self.assocs[self.assocs['NAME'].argsort()]
Exemple #38
0
    def __init__(self, data=None, fileName="all_mds.csv", **kwargs):
        self.conditions = {}
        if len(kwargs) > 0:
            for arg, value in kwargs.iteritems():
                setattr(self, arg, value)
                self.conditions[arg] = value

        if data is None:
            self.loadData(fileName)
            if 'Area' not in self.header:
                areas = self.getMDAreas()
                centroids = self.getMDCentroids()
                self.data = mlab.rec_append_fields(
                    self.data, ['Area', 'CentroidLon', 'CentroidLat'],
                    [areas, centroids[:, 0], centroids[:, 1]])

                self.header = self.data.dtype.names

        else:
            self.header = data.dtype.names
            self.data = data
Exemple #39
0
    def select_in_path(filename,ra,dec,polys=None,wrap=180.):
        import matplotlib.path
        from matplotlib import mlab
        ra,dec = np.copy(ra), np.copy(dec)

        try:
            data = np.genfromtxt(filename,names=['ra','dec','poly'])
        except ValueError:
            data = np.genfromtxt(filename,names=['ra','dec'])
            data = mlab.rec_append_fields(data,'poly',np.zeros(len(data)))

        paths = []
        ra -= 360 * (ra > wrap)

        for p in np.unique(data['poly']):
            if polys and (p not in polys): continue
            poly = data[data['poly'] == p]
            vertices = np.vstack(np.vstack([poly['ra'],poly['dec']])).T
            paths.append(matplotlib.path.Path(vertices))
        sel = np.sum([p.contains_points(np.vstack([ra,dec]).T) for p in paths],axis=0) > 0
        return sel
Exemple #40
0
 def create_mag_table(self, outputPath, isocType="pdva", specType="basel"):
     """Create an HDF5 table of that describes a set of magnitudes."""
     if os.path.exists(outputPath): os.remove(outputPath)
     title = os.path.splitext(os.path.basename(outputPath))[0]
     h5file = tables.openFile(outputPath, mode="w", title=title)
     table = h5file.createTable("/", 'mags', MagTableDef, "Mag Model Table")
     print h5file
     docs = self.collection.find({"compute_complete":True,
         "np_data": {"$exists": 1}}) # , limit=2
     print "working on %i docs to read" % docs.count()
     lut = get_metallicity_LUT(isocType, specType)
     for doc in docs:
         print "reading", doc['_id']
         # print doc.keys()
         # print doc['np_data']
         npData = doc['np_data']
         # print npData.dtype
         # binData = Binary(doc['np_data']['data'])
         # print type(binData)
         # npData = pickle.load(binData)
         nRows = len(npData)
         # Append model information (about SFH, dust, etc)
         zmet = doc['pset']['zmet']
         Z = lut[zmet-1]
         zmets = np.ones(nRows, dtype=np.float) * Z
         tau = doc['pset']['tau']
         taus = np.ones(nRows, dtype=np.float) * tau
         npDataAll = mlab.rec_append_fields(npData, ['Z','tau'],[zmets,taus])
         # Trim the recarray to just the desired fields
         npDataTrim = mlab.rec_keep_fields(npDataAll,
             ['Z','tau','age','mass','lbol','sfr','TMASS_J','TMASS_H',
             'TMASS_Ks','MegaCam_u','MegaCam_g','MegaCam_r','MegaCam_i',
             'MegaCam_z','GALEX_NUV','GALEX_FUV'])
         for i in xrange(nRows):
             row = npDataTrim[i]
             print row['Z'], row['tau'],row['TMASS_J'],row['TMASS_Ks']
         # Append to HDF5
         table.append(npDataTrim)
     h5file.flush()
     h5file.close()
Exemple #41
0
 def __init__(self,
             data=None,
             fileName="all_mds.csv",
             **kwargs):
     self.conditions = {}
     if len(kwargs) > 0:
         for arg,value in kwargs.iteritems():
             setattr(self,arg,value) 
             self.conditions[arg] = value
          
     if data is None:
         self.loadData(fileName)
         if 'Area' not in self.header:
             areas = self.getMDAreas()
             centroids = self.getMDCentroids()
             self.data = mlab.rec_append_fields(self.data,['Area','CentroidLon','CentroidLat'],[areas,centroids[:,0],centroids[:,1]])
             
             self.header = self.data.dtype.names
             
     else:
         self.header = data.dtype.names
         self.data = data
Exemple #42
0
def fits_to_chip_centroid(fitsfile):
    """
    Grab centroids from fits file

    Parameters
    ----------
    fitsfile : path to pixel file

    Returns
    -------
    centx : centroid in the x (column) axis
    centy : centroid in the y (row) axis
    """
    apsize = 7

    hdu0,hdu1,hdu2 = fits.open(fitsfile)
    cube = hdu1.data
    flux = cube['FLUX']
    t = cube['TIME']
    cad = cube['CADENCENO']

    nframe,nrow,ncol = flux.shape

    # Define rectangular aperture
    wcs = get_wcs(fitsfile)
    ra,dec = hdu0.header['RA_OBJ'],hdu0.header['DEC_OBJ']
    try:
        x,y = wcs.wcs_world2pix(ra,dec,0)
    except: # if WCS is bogus, make the simplest reasonable assumption
        x, y = ncol/2., nrow/2.

    scentx,scenty = np.round([x,y]).astype(int)
    nrings = (apsize-1)/2

    x0 = scentx - nrings
    x1 = scentx + nrings
    y0 = scenty - nrings
    y1 = scenty + nrings
    mask = np.zeros((nrow,ncol))
    mask[y0:y1+1,x0:x1+1] = 1 # 1 means use in aperture

    # Compute background flux
    # mask = True aperture, don't use to compute bg
    flux_sky = flux.copy()
    flux_sky_mask = np.zeros(flux.shape)
    flux_sky_mask += mask[np.newaxis,:,:].astype(bool)
    flux_sky = ma.masked_array(flux_sky, flux_sky_mask)
    fbg = ma.median(flux_sky.reshape(flux.shape[0],-1),axis=1)
    if not np.isfinite(fbg).any():
    	fbg2 = [ma.median(frame[np.isfinite(frame)]) for frame in flux_sky.reshape(flux.shape[0], -1)]
        fbg = ma.masked_array(fbg2, np.isnan(fbg2))

    # Subtract off background
    flux = flux - fbg[:,np.newaxis,np.newaxis]
    flux = ma.masked_invalid(flux)
    flux.fill_value = 0 
    flux = flux.filled()

    # Compute aperture photometry
    fsap = flux * mask
    fsap = np.sum(fsap.reshape(fsap.shape[0],-1),axis=1)

    # Compute centroids
    centx,centy = centroid(flux * mask)

    # table column physical WCS ax 1 ref value       
    # hdu1.header['1CRV4P'] corresponds to column of flux[:,0,0]
    # starting counting at 1. 
    centx += hdu1.header['1CRV4P'] - 1
    centy += hdu1.header['2CRV4P'] - 1

    r = np.rec.fromarrays(
        [t,cad,centx,centy,fsap,fbg],
        names='t,cad,centx,centy,fsap,fbg'
        )

    r = mlab.rec_append_fields(r,'starname',hdu0.header['KEPLERID'])
    return r
import matplotlib.mlab as mlab

# grab the price data off yahoo
u1 = urllib.urlretrieve('http://ichart.finance.yahoo.com/table.csv?s=AAPL&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv')
u2 = urllib.urlretrieve('http://ichart.finance.yahoo.com/table.csv?s=GOOG&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv')

# load the CSV files into record arrays
r1 = mlab.csv2rec(file(u1[0]))
r2 = mlab.csv2rec(file(u2[0]))

# compute the daily returns and add these columns to the arrays
gains1 = np.zeros_like(r1.adj_close)
gains2 = np.zeros_like(r2.adj_close)
gains1[1:] = np.diff(r1.adj_close)/r1.adj_close[:-1]
gains2[1:] = np.diff(r2.adj_close)/r2.adj_close[:-1]
r1 = mlab.rec_append_fields(r1, 'gains', gains1)
r2 = mlab.rec_append_fields(r2, 'gains', gains2)

# now join them by date; the default postfixes are 1 and 2
r = mlab.rec_join('date', r1, r2)

# long appl, short goog
g = r.gains1-r.gains2
tr = (1+g).cumprod()  # the total return

# plot the return
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(r.date, tr)
ax.set_title('total return: long appl, short goog')
ax.grid()
Exemple #44
0
def typeII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type II sums of squares.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    for name, expr_factors in zip(ancova.contrast_names,
                                  ancova.sequence()):
        expr, factors = expr_factors
        F = ancova.all_but_above(expr, factors)
        C = ancova.contrasts[name]
        XF, contrast_matrices = F.formula.design(recarray, contrasts={'C':C})
        modelF = OLS(Y, XF)
        resultsF = modelF.fit()

        SSEF = np.sum(resultsF.resid**2)
        dfF = resultsF.df_resid
        ftest = resultsF.f_test(contrast_matrices['C'])

        SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF)
        dfR = dfF + ftest.df_num

        sss.append(SSER - SSEF)
        dfs.append(ftest.df_num)
        fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], dfR-dfF, df_F))
        names.append(name)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result
Exemple #45
0
def typeI(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type I sums of squares
    where the order is based on the order of terms
    in the contrast_names of ancova.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True))
    results = model.fit()
    SSE_old = np.sum(results.resid**2)
    df_old = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    names.append(ancova.contrast_names[0])
    fs.append(((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F))
    sss.append((np.sum(Y**2) - SSE_old))
    dfs.append(Y.shape[0] - df_old)
    pvals.append(f_dbn.sf(fs[-1], Y.shape[0]-df_old, df_F))

    for d in range(1,len(ancova.formulae)):
        terms = []
        for f in ancova.formulae[:(d+1)]:
            terms += list(f.terms)

        # JT: this is not numerically efficient
        # could be done by updating some factorization of the full X

        X = Formula(terms).design(recarray, return_float=True)
        model = OLS(Y, X)
        results = model.fit()
        SSE_new = np.sum(results.resid**2)
        df_new = results.df_resid

        sss.append(SSE_old - SSE_new)
        dfs.append(df_old - df_new)
        fs.append(((SSE_old-SSE_new) / (df_old - df_new)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], df_old-df_new, df_new))
        names.append(ancova.contrast_names[d])
        SSE_old = SSE_new
        df_old = df_new

    # Add in the "residual row"

    sss.append(SSE_new)
    dfs.append(df_new)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(names, np.dtype([('contrast','S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result
    # pair ids where the cluster in the pair is near an absorber
    # cluster id
    # qso id
    # total zpath over all pairs

    outname = run_id + '/qso_cluster_pairs_zpath.fits'

    if os.path.exists(outname):
        print 'Reading', outname, '...',
        pairs = fits.getdata(outname)
        print ' done'
    else:
        # Find all qso-cluster pairs. takes about 10 min to run.
        pairs0 = match_clus_qso(clus, qso)
        # assign a unique identifier to each pair.
        pairs1 = rec_append_fields(pairs0, ['pid'], [np.arange(len(pairs0))])

        # find tot zpath (including both field and cluster paths up to
        # z=1, only towards sightlines with a nearby cluster though) also?
    
        print 'Calculating MgII hits and the total z path length'
    
        if DEBUG:
            fig4 = plt.figure(4, figsize=(6,6))
            ax = fig4.add_subplot(111)

        print 'Looping over QSOs'
        # extra columns for the qso-cluster pair table.
        extra_cols = {}
        n_unique_qsos = len(np.unique(pairs1['qid']))
        for i,(qid,ind) in enumerate(indgroupby(pairs1, 'qid')):
    
    ax4=fig1.add_subplot(414)
    ax4.plot(data['Np'],'bo')
    ax4.set_title('Np')
    plt.show()
    pass

if __name__ == '__main__':
#    os.chdir('/home/shankar/Desktop/Research/modem-sim2/logs')
#    subprocess.call(["cat log-0.txt | grep printGrandPlay | sed 's/^.*FINE\|//g' | sed 's/^.*ment//g' \
#    > experiment_results.txt ; cat experiment_results.txt"], shell=True)
    host_name='192.168.0.22'
    graph_animator=GraphAnimator.GraphAnimator()
    filename=graph_animator.animateGraph(host_name)
    command_to_refine="cat "+filename+"| grep 'history' | sed 's/^.*history//' > "+filename+"_3d_plots.txt"
    plot_filename=filename+"_3d_plots.txt"
    subprocess.call([command_to_refine], shell=True)
    data_format={'names':('timestamp',  'bandit','banditID','result','BER',  'absolute_data_rate',  'alpha',    'beta', 'gittins_index', 'gittins_index_norm',  'MTYPE','DMODE','MPSK','Nc',  'Np',   'Nz','PKT_LEN','FEC'),
                 'formats':( 'S10',      'S10',     'f4',    'S10',   'f4',          'f4',           'f4',       'f4',       'f4',              'f4',             'f4',   'f4',  'f4', 'f4',  'f4',   'f4',  'f4',   'f4')}
    raw_data=np.loadtxt(plot_filename, dtype=data_format)
    raw_data=mlab.rec_append_fields(raw_data, 'time_as_int', np.array([int(times) for times in raw_data['timestamp']]) )
    raw_data=mlab.rec_append_fields(raw_data, 'bandit_as_int', np.array([int(bandits) for bandits in raw_data['bandit']]) )
    filtered_data=raw_data[raw_data['BER']<0.8] 
    plotBanditParams(filtered_data)
    plotOfdmParams(filtered_data)
    plotBerDataRate(filtered_data)
    plt.show()
    
    
    
    
Exemple #48
0
def typeI(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type I sums of squares
    where the order is based on the order of terms
    in the contrast_names of ancova.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True))
    results = model.fit()
    SSE_old = np.sum(results.resid**2)
    df_old = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    names.append(ancova.contrast_names[0])
    fs.append(
        ((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F))
    sss.append((np.sum(Y**2) - SSE_old))
    dfs.append(Y.shape[0] - df_old)
    pvals.append(f_dbn.sf(fs[-1], Y.shape[0] - df_old, df_F))

    for d in range(1, len(ancova.formulae)):
        terms = []
        for f in ancova.formulae[:(d + 1)]:
            terms += list(f.terms)

        # JT: this is not numerically efficient
        # could be done by updating some factorization of the full X

        X = Formula(terms).design(recarray, return_float=True)
        model = OLS(Y, X)
        results = model.fit()
        SSE_new = np.sum(results.resid**2)
        df_new = results.df_resid

        sss.append(SSE_old - SSE_new)
        dfs.append(df_old - df_new)
        fs.append(((SSE_old - SSE_new) / (df_old - df_new)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], df_old - df_new, df_new))
        names.append(ancova.contrast_names[d])
        SSE_old = SSE_new
        df_old = df_new

    # Add in the "residual row"

    sss.append(SSE_new)
    dfs.append(df_new)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result
Exemple #49
0
    'http://ichart.finance.yahoo.com/table.csv?s=AAPL&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv'
)
u2 = urllib.request.urlretrieve(
    'http://ichart.finance.yahoo.com/table.csv?s=GOOG&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv'
)

# load the CSV files into record arrays
r1 = mlab.csv2rec(open(u1[0]))
r2 = mlab.csv2rec(open(u2[0]))

# compute the daily returns and add these columns to the arrays
gains1 = np.zeros_like(r1.adj_close)
gains2 = np.zeros_like(r2.adj_close)
gains1[1:] = np.diff(r1.adj_close) / r1.adj_close[:-1]
gains2[1:] = np.diff(r2.adj_close) / r2.adj_close[:-1]
r1 = mlab.rec_append_fields(r1, 'gains', gains1)
r2 = mlab.rec_append_fields(r2, 'gains', gains2)

# now join them by date; the default postfixes are 1 and 2.  The
# default jointype is inner so it will do an intersection of dates and
# drop the dates in AAPL which occurred before GOOG started trading in
# 2004.  r1 and r2 are reverse ordered by date since Yahoo returns
# most recent first in the CSV files, but rec_join will sort by key so
# r below will be properly sorted
r = mlab.rec_join('date', r1, r2)

# long appl, short goog
g = r.gains1 - r.gains2
tr = (1 + g).cumprod()  # the total return

# plot the return
Exemple #50
0
    def create_table(self, outputPath, query={}, tage=None,
            isocType="pdva", specType="basel", clobber=True):
        """Create an HDF5 table that combines outputs from models
        in the library.
        """
        query.update({"compute_complete": True,
            "np_data": {"$exists": 1}})
        docs = self.collection.find(query)  # , limit=2
        print "working on %i docs to read" % docs.count()
        lut = get_metallicity_LUT(isocType, specType)
        # TODO need to generalize definition of columns. A user ought to
        # be able to use any pset columns, any set of mags, and the spectra
        #magNames = ['TMASS_J','TMASS_H','TMASS_Ks','MegaCam_u','MegaCam_g',
        #        'MegaCam_r','MegaCam_i','MegaCam_z','GALEX_NUV','GALEX_FUV']
        magCols = [(s, np.float,) for (i, s, c) in FILTER_LIST]
        #magCols = [(s,np.float) for s in magNames]
        psetCols = [('dust_type', np.int), ('imf_type', np.int),
                ('sfh', np.int), ('tau', np.float), ('const', np.float),
                ('sf_start', np.float), ('fburst', np.float),
                ('tburst', np.float), ('dust_tesc', np.float),
                ('dust1', np.float), ('dust2', np.float),
                ('frac_nodust', np.float)]
        sfhCols = [('age', np.float), ('mass', np.float), ('lbol', np.float),
                ('sfr', np.float)]
        miscCols = [('Z', np.float)]  # metallicity, taken from zmet LUT
        specCols = [('spec', np.float, SpecParser.nlambda(specType))]
        allCols = psetCols + sfhCols + miscCols + magCols + specCols
        tableDtype = np.dtype(allCols)

        if os.path.exists(outputPath) and clobber:
            os.remove(outputPath)
        title = os.path.splitext(os.path.basename(outputPath))[0]
        h5file = tables.openFile(outputPath, mode="w", title=title)
        table = h5file.createTable("/", 'models', tableDtype,
                "Model Output Table")
        print h5file
        for doc in docs:
            print "reading", doc['_id']
            npData = doc['np_data']
            nRows = len(npData)
            
            # Appent pset cols and misc cols
            extraNames = []
            extraArrays = []

            zmet = doc['pset']['zmet']
            Z = lut[zmet - 1]
            Z = np.ones(nRows, dtype=np.float) * Z
            extraNames.append('Z')
            extraArrays.append(Z)
            for cName, cType in psetCols:
                p = doc['pset'][cName]
                pArray = np.ones(nRows, dtype=cType) * p
                extraNames.append(cName)
                extraArrays.append(pArray)
            npDataAll = mlab.rec_append_fields(npData, extraNames, extraArrays)

            # select row closest to the target age
            if tage is not None:
                ageGyr = 10. ** npDataAll['age'] / 10. ** 9
                i = np.argmin((ageGyr - tage) ** 2)
                row = np.atleast_1d(np.array(npDataAll[i], copy=True))
                table.append(row)
            else:
                #table.append(npDataAll) # should work but corrupts data
                row = table.row
                for i in xrange(nRows):
                    print "row", i
                    for x in allCols:
                        name = x[0]
                        print name, npDataAll[i][name]
                        row[name] = npDataAll[i][name]
                    row.append()
        table.flush()
        h5file.flush()
        h5file.close()
    ab, iqso_from_id, iMgII_from_id = read_zhu()

    qso = ab['qso']
    # find qso sightlines that are within 10 proper Mpc of a foreground cluster.

    if os.path.exists(run_id + '/qso_cluster_pairs.fits'):
        print 'Reading', run_id + '/qso_cluster_pairs.fit'
        pairs0 = fits.getdata(run_id + '/qso_cluster_pairs.fits')
    else:
        # takes about 10 min to run.
        pairs0 = match_clus_qso(clus, qso, 
            filename=run_id + '/qso_cluster_pairs.fits')

    # assign a unique identifier to each pair. modifies pairs in place.

    pairs0 = rec_append_fields(pairs0, ['pid'], [np.arange(len(pairs0))])

if PLOTRES:
    plot_hist(run_id, clus, ab['MgII'], run_id)

if CALC:
    cids = clus['id']
    pairs = pairs0[np.in1d(pairs0['cid'], cids)]

    # for each qso-cluster pair find any absorbers with impact par <
    # 1 Mpc within some z range of the cluster.

    # for rho < 1
    # z path length within 1Mpc of cluster per pair
    # absorber id for a cluster-absorber pair
    # pair ids where the cluster in the pair is near an absorber
Exemple #52
0
def bugtrend(milestone):
    baseWorkingDirectory = "/tmp/"
    wikiTableBaseFileName = baseWorkingDirectory + "DefectChurnReport"
    wikiImageFileBaseLocation = "http://metrics.arubanetworks.com/metrics/margot_autopages/"
    wikiContent = []
    Queries = milestone.split(",")
    default_column_value = {
        "datemaxbabug_when": datetime.date(2030, 12, 1),
        "datebcreation_ts": datetime.date(2005, 1, 1),
        "cf_customers": "Aruba Internal",
    }
    for params in Queries:
        print "processing : " + params
        wikiContent = []
        baseFileName = params
        bugReportName = baseWorkingDirectory + baseFileName + "_bugs.csv"
        fixedReportName = baseWorkingDirectory + baseFileName + "_fixed.csv"
        outputReportName = baseWorkingDirectory + baseFileName + "_merged.csv"

        r = mlab.csv2rec(bugReportName)
        s = mlab.csv2rec(fixedReportName)
        k = mlab.rec_join("bug_id", s, r, jointype="outer", defaults=default_column_value, r1postfix="1", r2postfix="2")
        t = mlab.csv2rec("/home/automation/bugzilla_tool/Org_Mapping.csv")
        # mlab.rec2csv(k,outputReportName,delimiter=',',missing="",missingd=None,withheader=True)

        org_mapping = dict(zip(t.login_name, range(len(t))))
        # orgList = []
        DirectorArray = np.zeros_like(k.login_name)
        ComponentArray = np.zeros_like(k.login_name)
        ManagerArray = np.zeros_like(k.login_name)

        for i in range(len(k)):
            if k[i].login_name in org_mapping.keys():
                DirectorArray[i] = t[org_mapping[k[i].login_name]].director
                ComponentArray[i] = t[org_mapping[k[i].login_name]].functional_group
                ManagerArray[i] = t[org_mapping[k[i].login_name]].manager
            else:
                DirectorArray[i] = t[org_mapping["*****@*****.**"]].director
                ComponentArray[i] = t[org_mapping["*****@*****.**"]].functional_group
                ManagerArray[i] = t[org_mapping["*****@*****.**"]].manager

        k = mlab.rec_append_fields(k, "Director", DirectorArray)
        k = mlab.rec_append_fields(k, "Component", ComponentArray)
        k = mlab.rec_append_fields(k, "Manager", ManagerArray)
        mlab.rec2csv(k, outputReportName, delimiter=",", missing="", missingd=None, withheader=True)

        # Start preparing the data for plotting

        chartFileName = baseWorkingDirectory + baseFileName + ".png"
        plotDefectTrend(k, chartFileName, baseFileName)

        s = "= Overall Defect Trend = \n"
        wikiContent.append(s)
        s = wikiImageFileBaseLocation + baseFileName + ".png \n"
        wikiContent.append(s)

        #    Directors = ('Murali Duvvury','Shankar','Jie Jiang')
        Directors = list(np.unique(np.array(k.Director)))

        s = "= Director level Defect Trend = \n"
        wikiContent.append(s)

        wikiTableFileName = wikiTableBaseFileName + "_" + params + ".wiki"

        f = open(wikiTableFileName, "w")
        hdrList = ("Director", "Open Defects", "Need Info", "Observe", "Resolved-Fixed", "Resolved-Other", "Incoming")
        printWikiTableOpen(f, hdrList)

        for Dir in Directors:
            s = Dir
            DirFileName = Dir.replace(" ", "_")
            DirRe = re.compile(s)
            DirReMatch = np.vectorize(lambda x: bool(DirRe.match(x)))
            sel = DirReMatch(np.array(k.Director))
            chartFileName = baseWorkingDirectory + baseFileName + "-" + DirFileName + ".png"
            plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + DirFileName)
            printChurnReport(k[sel], Dir, f)
            s = wikiImageFileBaseLocation + baseFileName + "-" + DirFileName + ".png \n"
            wikiContent.append(s)
        #        chartFileName = baseWorkingDirectory + baseFileName + "-" + Dir +".png"
        #        plotDefectTrend(k[k.Director == Dir],chartFileName, baseFileName + '-' + Dir)

        printWikiTableClose(f)

        s = "= Component level Defect Trend = \n"
        wikiContent.append(s)

        ComponentList = [
            ["GSM", "GSM"],
            ["UI-Configuration", "UI"],
            ["AP-Platform", "11ac"],
            ["Switch-Datapath", "Datapath"],
            ["HA-Lite", "HA-Lite"],
            ["Switch-Platform", "CIMU"],
            ["Feature-Bugs", "\w+]"],
        ]

        for c in ComponentList:
            s = "^\[*" + c[1]
            componentRe = re.compile(s)
            componentReMatch = np.vectorize(lambda x: bool(componentRe.match(x)))
            sel = np.logical_or(componentReMatch(np.array(k.short_desc)), k.name == c[0])
            chartFileName = baseWorkingDirectory + baseFileName + "-" + c[0] + ".png"
            plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + c[0])

        s = "= Keyword level Defect Trend = \n"
        wikiContent.append(s)

        KeywordList = [
            ["TC-Blocker", "TC\-blocker"],
            ["SystemTest", "ST"],
            ["Smoke", "Smoke\-Failure"],
            ["CFT", "CFT"],
            ["MustFix", "MustFix"],
        ]

        for keyword in KeywordList:
            s = keyword[1]
            keywordRe = re.compile(s)
            keywordReMatch = np.vectorize(lambda x: bool(keywordRe.match(x)))
            sel = keywordReMatch(np.array(k.keywords))
            chartFileName = baseWorkingDirectory + baseFileName + "-" + keyword[0] + ".png"
            plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + keyword[0])

        #   wikiTableFileName = wikiTableBaseFileName + '_' + params + '.wiki'

        #    f = open(wikiTableFileName , 'w')
        hdrList = ("Manager", "Open Defects", "Need Info", "Observe", "Resolved-Fixed", "Resolved-Other", "Incoming")
        printWikiTableOpen(f, hdrList)

        ManagerList = list(np.unique(np.array(k.Manager)))
        today = datetime.date.today()
        resolvedDateRange = today + datetime.timedelta(days=-14)

        s = "= Manager Defect Trend = \n"
        wikiContent.append(s)

        for mgr in ManagerList:
            s = mgr
            mgrFileName = mgr.replace(" ", "_")
            mgrRe = re.compile(s)
            mgrReMatch = np.vectorize(lambda x: bool(mgrRe.match(x)))
            sel = mgrReMatch(np.array(k.Manager))
            chartFileName = baseWorkingDirectory + baseFileName + "-" + mgrFileName + ".png"
            plotDefectTrend(k[sel], chartFileName, baseFileName + "-" + mgrFileName)
            printChurnReport(k[sel], mgr, f)
            s = wikiImageFileBaseLocation + baseFileName + "-" + mgrFileName + ".png \n"
            wikiContent.append(s)

        printWikiTableClose(f)
        f.write("".join(wikiContent))
        f.close()
    plt.close("all")
        for i in range(females_needed):
            ridx = np.random.randint(len(nv_pool))
            while sex[nv_pool[ridx]] != '"F"':
                ridx = np.random.randint(0, len(nv_pool))
            subj = nv_pool[ridx]
            nv_pool.remove(subj)
            nv_matches.append(subj)
            base_ages = age[subj][age[subj]<split_age]
            rbase = base_ages[np.random.randint(len(base_ages))]
            fu_ages = age[subj][age[subj]>split_age]
            rfu = fu_ages[np.random.randint(len(fu_ages))]
            nv_ages.append(rfu - rbase)
            nv_rows.append([rows[subj][np.nonzero(age[subj]==rbase)[0]], rows[subj][np.nonzero(age[subj]==rfu)[0]]])
        # check if we fulfill our requirement
        pval = np.min([stats.ttest_ind(nv_ages,per_diffs)[1], stats.ttest_ind(nv_ages,rem_diffs)[1]])
        #pval = stats.ttest_ind(nv_ages,per_diffs)[1]
        cnt+=1
if cnt==num_tries:
    print 'Cannot find good NV set, giving up!'
else:
    good_rows = good_rows + nv_rows
    # flattening the list
    good_rows = [i for j in good_rows for i in j]

    # finally, create new variable and output it to a new file
    match_bool = np.zeros(len(gf))
    for row in good_rows:
        match_bool[row] = 1
    match_bool = mlab.rec_append_fields(gf, var, match_bool)
    #mlab.rec2csv(match_bool, csv_file[:-4] + '_matched_on' + str(split_age) + '_dsm' + str(dsm) + '_diff.csv')
Exemple #54
0
 def _postprocess(self, output_file, gu_poly, generator_list, overwrite=False, supplementary_figures=False, **kwargs):
     generatoroutputs = []
 
     for g in generator_list:
         if supplementary_figures:
             gkw = g.G_kwargs
         else:
             gkw = {}
         generatoroutputs.append(g.gen(*g.G_args, overwrite = overwrite, **gkw))
     
     gu_arr = gen_gu.gen(*gen_gu.G_args, overwrite=overwrite, **gen_gu.G_kwargs)
     print gu_arr
     
     print "merging arrays"
     out_arr = gen_merge.join_recs_on_keys(gu_arr, generatoroutputs, (BASIN_ID_FIELD, ADMIN_ID_FIELD, GW_ID_FIELD))
     sr = ap.SpatialReference(PRJNAME)
     ap.Project_management(gu_poly,output_file,sr)
     print out_arr[BASIN_NAME_FIELD]
     
     missing_fields = np.setdiff1d(ALL_FIELDS,out_arr.dtype.names)
     if len(missing_fields)>0:
         print "WARNING: missing fields %s" % missing_fields
         obs = len(out_arr[GU_FIELD])
         out_arr = mlab.rec_append_fields(out_arr, missing_fields, [np.repeat(np.nan, obs) for _ in missing_fields])
             
     extra_fields = np.setdiff1d(out_arr.dtype.names,ALL_FIELDS)
     print "dropping extra fields %s" % extra_fields
     out_arr = mlab.rec_drop_fields(out_arr,extra_fields)
     
     print "generating pre-weighted_columns"
     if WEIGHTING_SCHEMES is not None:
         new_cols = []
         names = []
         for n, weights in WEIGHTING_SCHEMES.iteritems():
             keys = weights.keys()
             values = weights.values()
             indicator_array = np.vstack([out_arr[f] for f in keys]).T
             indicator_array[indicator_array==NULL_VALUE] = np.nan
             scores = np.squeeze(np.asarray(aggregate_scores.aggregate_scores(indicator_array,values)))
             scores[np.isnan(scores)]=NULL_VALUE
             new_cols.append(scores)
             names.append(n)
         out_arr = mlab.rec_append_fields(out_arr, names, new_cols)
     
     for field in MAP_FIELDS:
         out_arr[field][out_arr[field]==""] = "No data"
     
     
     mlab.rec2csv(out_arr,"bin/test.csv")
     print "dropping fields"
     drop = [f.baseName for f in ap.ListFields(output_file) if not(f.required) and not(f.baseName == GU_FIELD)]
     if len(drop)>0:
         ap.DeleteField_management(output_file,drop)
     
     print "joining"
     ap.da.ExtendTable(output_file,GU_FIELD,out_arr,GU_FIELD)
     
     print "indexing"
     try:
         ap.AddSpatialIndex_management(output_file)
         ap.AddIndex_management(output_file,GU_FIELD,GU_FIELD,"UNIQUE")
     except Exception, e:
         print e
Exemple #55
0
    def readEvents(self):
        self.mapping = self._readMappingFile()
        self.pos_list = self._get_positions_dirs()

        fmt = "%%0%dd" %(len(self.pos_list[0]))

        for pos_idx, pos_name in enumerate(self.mapping['position']):
            if isinstance(pos_name, int):
                pos_name = fmt %pos_name
            print pos_name, pos_name in self.pos_list
            if pos_name not in self.pos_list:
#                raise RuntimeError("Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in))
                print "Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in)
                self._logger.warning("Position from Mapping file %s not found in in path %s" % (pos_name, self.path_in))
                continue

            event_path = os.path.join(self.path_in, pos_name, 'statistics' , 'events')
            if not os.path.exists(event_path):
                raise RuntimeError("For position %s no event path found %s" % (pos_name, event_path))

            event_file_list = sorted(os.listdir(event_path))
            if len(event_file_list) == 0:
                self._logger.warning("No events found for position %s" % pos_name)
                continue

            self._logger.info("Reading Events for position '%s' (%d files)" % (pos_name, len(event_file_list)))

            hmm_correction_available = False
            if '_hmm' in event_file_list:
                hmm_correction_available = True
                event_file_list.remove('_hmm')

            for event_file in event_file_list:
                res = self.EVENT_REGEXP.search(event_file)
                if res is None:
                    self._logger.warning("Could not parse event file name '%s' for position %s" % (event_file, pos_name))
                    continue

                res = res.groupdict()
                if pos_name != res['pos']:
                    self._logger.error("Event file %s has different pos identifier than %s" % (event_file, pos_name))
                    raise RuntimeError("Event file %s has different pos identifier than %s" % (event_file, pos_name))

                channel = res["channel"]
                region = res["region"]
                branch = int(res["branch"])
                time = int(res["time"])
                obj = int(res["obj"])

                if branch != 1:
                    continue

                if pos_name not in self._positions.keys():
                    if 'oligoid' in self.mapping.dtype.fields.keys():
                        self.oligo_header_name = 'oligoid'
                    elif 'sirna_id' in self.mapping.dtype.fields.keys():
                        self.oligo_header_name = 'sirna_id'
                    else:
                        raise RuntimeError('Mapping file has no header: oligoid or siRNA_id missing')

                    self._positions[pos_name] = Position(plate=self.plate_id,
                                                        position=self.mapping[pos_idx]['position'],
                                                        well=self.mapping[pos_idx]['well'],
                                                        site=self.mapping[pos_idx]['site'],
                                                        row=self.mapping[pos_idx]['row'],
                                                        column=self.mapping[pos_idx]['column'],
                                                        gene_symbol=self.mapping[pos_idx]['gene_symbol'],
                                                        oligoid=self.mapping[pos_idx][self.oligo_header_name],
                                                        group=self.mapping[pos_idx]['group'],
                                                        )

                event_id = 'T%03d_O%04d_B%d' % (time, obj, branch)
                if event_id not in self._positions[pos_name]:
                    self._positions[pos_name][event_id] = {}

                if channel not in self._positions[pos_name][event_id]:
                    self._positions[pos_name][event_id][channel] = {}

                filename = os.path.join(event_path, event_file)
                self._positions[pos_name][event_id][channel][region] = numpy.recfromcsv(filename, delimiter='\t')

                if hmm_correction_available and region == 'primary':

                    filename = os.path.join(event_path, '_hmm', event_file)
                    if not os.path.exists(filename):
                        raise RuntimeError('HMM correction folder is there but event file not found %s' % filename)
                    class__label__hmm = numpy.recfromcsv(filename, delimiter='\t')['class__b__label']
                    self._positions[pos_name][event_id][channel][region] = \
                        rec_append_fields(self._positions[pos_name][event_id][channel][region],
                                          'class__label__hmm',
                                          class__label__hmm,
                                          numpy.uint8)

                    self.class_label_selector = 'class__label__hmm'
        self.save(True)
Exemple #56
0
def typeII(response, ancova, recarray):
    """
    Produce an ANCOVA table
    from a given ANCOVA formula
    with type II sums of squares.

    Inputs
    ------

    response: str
              field name of response in recarray

    ancova: ANCOVA
            specifies the model to be fit

    recarray: np.ndarray
              should contain all field names in the terms of ancova
              as well as response
    """

    Y = recarray[response]
    X = ancova.formula.design(recarray, return_float=True)
    model = OLS(Y, X)
    results = model.fit()
    SSE_F = np.sum(results.resid**2)
    df_F = results.df_resid

    names = []
    sss = []
    fs = []
    dfs = []
    pvals = []

    for name, expr_factors in zip(ancova.contrast_names, ancova.sequence()):
        expr, factors = expr_factors
        F = ancova.all_but_above(expr, factors)
        C = ancova.contrasts[name]
        XF, contrast_matrices = F.formula.design(recarray, contrasts={'C': C})
        modelF = OLS(Y, XF)
        resultsF = modelF.fit()

        SSEF = np.sum(resultsF.resid**2)
        dfF = resultsF.df_resid
        ftest = resultsF.f_test(contrast_matrices['C'])

        SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF)
        dfR = dfF + ftest.df_num

        sss.append(SSER - SSEF)
        dfs.append(ftest.df_num)
        fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F))
        pvals.append(f_dbn.sf(fs[-1], dfR - dfF, df_F))
        names.append(name)

    # Add in the "residual row"

    sss.append(SSE_F)
    dfs.append(df_F)
    pvals.append(np.nan)
    fs.append(np.nan)
    names.append('Residuals')

    result = np.array(
        names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))]))
    result = ML.rec_append_fields(
        result, ['SS', 'df', 'MS', 'F', 'p_value'],
        [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals])
    return result