def _rm_walls(self, data, publisher):
        variance = 0.1 #to account for noise
        switch = int(self.switch) ### The PLS Model oscillates between returning 180 pts and 181 pts, this toggle tracks that in a hacky way
        self.switch = not self.switch #toggle switch
        
        #if the filter is in place remove walls
        if self.filter_set:
            walls = self.walls[switch]
            ranges = data.ranges
            filtered_ranges = []
            
            for i in xrange(len(walls)):
                try:
                    if ranges[i] < (walls[i]-variance):
                        filtered_ranges.append(ranges[i])
                    else:
                        filtered_ranges.append(data.range_max+1) #invalidate the result at this point
                except IndexError:
                    filtered_ranges.append(data.range_max+1)
            
            #publish filtered_ranges as LaserScan
            filtered_scan = data
            h = std_msgs.msg.Header()
            h.stamp = data.header.stamp
            h.frame_id = data.header.frame_id
            filtered_scan.header = h
            filtered_scan.ranges = filtered_ranges
            publisher.publish(filtered_scan)
        
        #if the filter reset has been called use data to change filter instead
        else:
            #scan the room a specified # of times into an array
            # take the median of those scans at each index value 
            # once the minimum number of scans is reached
            if self.reset_count < self.reset_thresh:
                self.new_walls.append([])
                self.new_walls[self.reset_count] = data.ranges
                self.reset_count += 1
            
            elif self.reset_count == self.reset_thresh:
                #unzip new_walls (to go by point instead of dataset) - len should be ~180

                zipped0 = map(list, zip(*filter(None, self.new_walls[0::2])))
                zipped1 = map(list, zip(*filter(None, self.new_walls[1::2])))
                self.walls[0] = [numpy.median(z) for z in zipped0]
                self.walls[1] = [numpy.median(z) for z in zipped1]
                
                #reset vars
                self.reset_count = 0
                self.new_walls = [[]]
                self.filter_set = True

                #save the new wall data (as 2 seperate files since there is a 180 range and 181 range)
                wall_array_zero = np.array(self.walls[0], dtype=np.float64)
                wall_array_one = np.array(self.walls[1], dtype=np.float64)

                numpy.savetxt(self.fp_0, wall_array_zero, delimiter=",")
                numpy.savetxt(self.fp_1, wall_array_one, delimiter=",")

                print "New walls learned and saved"
Example #2
0
def exp(inF1,inF2):
    G = Gene(inF1)
    ouFile = open(inF1 + '.exp', 'w')
    ouFile.write('Gene\tMock\tMERS\n')
    D = {}
    inFile = open(inF2)
    head = inFile.readline()
    for line in inFile:
        line = line.strip()
        fields = line.split('\t')
        gene = fields[1]
        D.setdefault(gene, [])
        #mock = (float(fields[2]) + float(fields[3]))/2
        #rsv20h = (float(fields[14]) + float(fields[15]))/2
        Mock = np.median([float(fields[2]), float(fields[3]), float(fields[4])])
        MERS = np.median([float(fields[5]), float(fields[6]), float(fields[7])])
        D[gene].append([Mock,MERS])
    inFile.close()
    for g in G:
        if g in D:
            if len(D[g]) > 1:
                #print(D[g])
                pass
            ouFile.write(g + '\t' + str(D[g][0][0]) + '\t' + str(D[g][0][1]) + '\n')
    ouFile.close()
Example #3
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        self.worked = True
        samples = LGMM1(rng=self.rng,
                size=(self.n_samples,),
                **self.LGMM1_kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        centers = .5 * edges[:-1] + .5 * edges[1:]
        print edges

        pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(centers, y)
            plt.plot(centers, pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Example #4
0
 def _getTotalDuration(self,actStream): #for bed toilet transition  margin = 1 hr 
         totDuration=0;
         count = 0;        
         durlist = []; 
         for i in range(0,len(actStream)-2):
             #print actStream[i]
             firstLine = actStream[i].split(" ");
             secondLine =actStream[i+1].split(" ");
             #get a date from here
             d1= self._get_datetime(firstLine[0],firstLine[1]);
             d2=self._get_datetime(secondLine[0],secondLine[1]);
             td= d2-d1;
             duration =td.total_seconds();
             #print td, duration 
             #durlist.append(duration)
             margin = self._calculateMargin(d1,d2);
             if duration > 60*margin:              
                 #check to see if there were other activities, 
                 count=count+1;
                 continue;
             durlist.append(duration)
             totDuration=duration+totDuration;   
         try:
             #print round(min(durlist)/3600, 2), round(max(durlist)/3600, 2), round(totDuration/3600,2), round(sum(durlist)/3600, 2)
             #return (round(totDuration/60,5), count, round(numpy.min(durlist)/60, 5), round(numpy.max(durlist)/60, 5), round(numpy.median(durlist)/60,5), round(numpy.average(durlist)/60, 5));
             return (round(numpy.median(durlist)/60,5), count);
         except ValueError:
             #return (round(totDuration/60,5),count, 0, 0, 0, 0);
             return (round(numpy.median(durlist)/60,5), count);
Example #5
0
def compute_ks_by_contained(contigs_by_lib_name, sinks, sources):
    # compute median of maxmin as well as ks p-value of contained maxmin
    for lib_snk in contigs_by_lib_name:
        # for a fixed lib_snk; do all source libs together
        # contained_ctg: contig names of all source libraries stored by source library names
        contained_ctg=collections.defaultdict(set)
        for snkCtg in contigs_by_lib_name[lib_snk].itervalues():
            for srcCtg in snkCtg.contained_in:
                contained_ctg[srcCtg.lib].add(srcCtg.name)
        for lib_src in contigs_by_lib_name:
            if lib_src in contained_ctg:
                contained=[]
                not_contained=[]
                for ctg in contigs_by_lib_name[lib_src]:
                    if ctg in contained_ctg[lib_src]:
                        contained.append(contigs_by_lib_name[lib_src][ctg].maxmin)
                    else:
                        not_contained.append(contigs_by_lib_name[lib_src][ctg].maxmin)
 #               contained=[contigs_by_lib_name[lib_src][ctg].maxmin for ctg in contigs_by_lib_name[lib_src] if ctg in contained_ctg[lib_src]]
 #               not_contained=[contigs_by_lib_name[lib_src][ctg].maxmin for ctg in contigs_by_lib_name[lib_src] if ctg not in contained_ctg[lib_src]]
                ks_pvalue = stats.ks_2samp(contained, not_contained)[1]
                print lib_src, lib_snk, ks_pvalue, sum(contained)/len(contained), sum(not_contained)/len(not_contained)
                if ks_pvalue < 0.05 and np.median(contained) > np.median(not_contained):
                    sources[lib_snk] |= {lib_src}
                    sinks[lib_src] |= {lib_snk}
Example #6
0
 def __init__(self, fndark, nblocksize):
     if (os.path.isfile(fndark+'-dark.npz')):
         npzfile=np.load(fndark+'-dark.npz');
         self.dmean=npzfile['dmean'];
         self.dstd=npzfile['dstd'];
         self.dbpm=npzfile['dbpm'];
     else:
         dark=Binary(fndark);
         nframes=dark.nframes; my=dark.my; mx=dark.mx;
         nblocks=nframes//nblocksize;
         
         bmed=np.zeros((nblocks,my,mx));
         bstd=np.zeros((nblocks,my,mx));
         for iblock in range(nblocks):
             t0=time.clock();
             a=dark.data[iblock*nblocksize:(iblock+1)*nblocksize];
             a,idx=dropbadframes(a);
             print '- read block, dropped bad, subtracted dark in '+str(time.clock()-t0)+'s';
             nfb=a.shape[0];                
             bmed[iblock,:,:]=np.median(a,axis=0);
             bstd[iblock,:,:]=np.std(a,axis=0);
         self.dmean=np.mean(bmed,axis=0);
         self.dstd=np.sqrt(np.sum((bstd)**2,axis=0));
         self.dbpm=self.dstd<(np.median(self.dstd)+5*np.std(self.dstd));
         self.dbpm=self.dstd<(np.median(self.dstd*self.dbpm)+5*np.std(self.dstd*self.dbpm));
         
         np.savez(fndark+'-dark',dmean=self.dmean,dstd=self.dstd,dbpm=self.dbpm);
         del dark;
Example #7
0
    def work(self):
        self.worked = True
        kwargs = dict(
                weights=self.weights,
                mus=self.mus,
                sigmas=self.sigmas,
                low=self.low,
                high=self.high,
                q=self.q,
                )
        samples = GMM1(rng=self.rng,
                size=(self.n_samples,),
                **kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        #print samples

        pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(edges[:-1], y)
            plt.plot(edges[:-1], pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
def meanclip2(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0):
    from numpy import array
    import numpy
    xx=array(xx)
    yy=array(yy)
    xx0=array(xx[:])
    yy0=array(yy[:])
    ct=len(yy)
    slope=float(slope)
    iter = 0; c1 = 1.0 ; c2 = 0.0
    while (c1 >= c2) and (iter < maxiter):
        lastct = ct
        sig=numpy.std(yy0-xx0*slope)
#        mean=numpy.mean(array(yy0)-array(xx0)*slope)
        mean=numpy.median(array(yy0)-array(xx0)*slope)
        wsm = numpy.where( abs(yy0-xx0*slope) < mean+clipsig*sig )
        ct = len(wsm[0])
        if ct > 0:
            xx0=xx0[wsm]
            yy0=yy0[wsm]
        c1 = abs(ct - lastct)
        c2 = converge_num * lastct
        iter += 1
# End of while loop
#    mean=numpy.mean(array(yy0)-array(xx0)*slope)
    mean=numpy.median(array(yy0)-array(xx0)*slope)
    sig=numpy.std(array(yy0)-array(xx0)*float(slope))
    if verbose: pass
    return mean, sig,yy0,xx0
Example #9
0
    def getStripStatistics(self, yKey='vPhi', nMin=10):

        """For each of the strips, get the strip statistics"""

        if np.size(self.stripsFeH) < 1:
            self.buildStripsFeH()

        # may as well loop through!!

        # View of what we're using for our vertical quantity
        x = self.tSim['FeHObs']
        y = self.tSim[yKey]

        nStrips = np.size(self.stripsFeH) - 1
        self.stripCounts = np.zeros(nStrips, dtype='int')
        self.stripMeans = np.zeros(nStrips)
        self.stripMedns = np.zeros(nStrips)
        self.stripStdds = np.zeros(nStrips)
        self.stripFeHs = np.zeros(nStrips) # central point for sample

        for iStrip in range(nStrips):
            xLo = self.stripsFeH[iStrip]
            xHi = self.stripsFeH[iStrip+1]

            bStrip = (self.bSel) & (x >= xLo) & (x < xHi)

            self.stripCounts[iStrip] = np.sum(bStrip)
            if self.stripCounts[iStrip] < nMin:
                continue
            
            self.stripMeans[iStrip] = np.mean(y[bStrip])
            self.stripMedns[iStrip] = np.median(y[bStrip])
            self.stripStdds[iStrip] = np.std(y[bStrip])
            self.stripFeHs[iStrip] = np.median(x[bStrip])
def bench(workers, sizes, max_partition_fill_rates, byte_sizes, num_runs):
    for worker in workers:
        for size in sizes:
            for max_partition_fill_rate in max_partition_fill_rates:
                for byte_size in byte_sizes:
                    with open(result_dir + "/" + str(worker) + "_" + str(size) + "_" + str(max_partition_fill_rate) + "_" + str(byte_size) + "_S", "w+") as file1:
                        times = []
                        #flushes = []
                        #collisions = []
                        spills = []
                        for _ in range(num_runs):
                            process = subprocess.Popen(['../../build/benchmarks/hashtable_bench_probing_hashtable', '-s', str(size), '-w', str(worker), '-f', str(max_partition_fill_rate), '-t', str(byte_size)], stdout=subprocess.PIPE)
                            process.wait()
                            out = process.communicate()[0]
                            out_s = out.split()
                            times.append(float(out_s[0]))
                            #flushes.append(float(out_s[1]))
                            #collisions.append(float(out_s[2]))
                            spills.append(float(out_s[1]))
                        time = numpy.median(times)
                        #flush = numpy.median(flushes)
                        #collision = numpy.median(collisions)
                        spill = numpy.median(spills)
                        print str(worker) + "_" + str(size) + "_" + "_" + str(max_partition_fill_rate) + "_" + str(byte_size) + ": " + str(time) + " " + str(spill)
                        file1.write(str(time) + " " + str(spill) + "\n")
                    file1.close()
Example #11
0
def group_images_in_blocks(times, limit=3):
    """ In a night at the telescope, we can observe blocks of images on the same 
        field of the sky. For example, five blank fields, then the object, then 
        another 4 blank fields, then the object... We might want to distinguish 
        those blocks, in order to, e.g., combine the blank fields of each block, 
        correct a block of imafges of the object with a certain blank or bias
        image... This routine gets the datetime.datetime objects that indicate
        the date and time of observations, and separates them in blocks, giving 
        back an array:
            indices = [ind0,ind1,ind2,ind3]
        so that, each slice [ind0:ind1], [ind1:ind2] and [ind2:ind3] gives a block
        of the incoming images """
    delta_times = np.asarray( [(times[ii+1] - times[ii]).seconds for ii in 
                               range(len(times)-1)] )
    # Median and median absolute deviation of delta_times as first guesses
    median_delta = np.median(delta_times)
    MAD = np.median(abs(delta_times - median_delta))

    # We have found a boundary between blocks it the time between images is larger
    # than limit. The limit will be assigned to the upper image, hence the +1
    block_limits = np.where(delta_times > median_delta + limit * MAD)[0] +1
    
    # Now we have the limits between blocks, we need to add the first image (where 
    # the first block starts) and the last image (where last block ends)
    block_limits = np.insert( np.append(block_limits, len(times)+1) , 0, 0) 
    return block_limits
Example #12
0
    def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest):
        """Summarize the observed ipds at one template position/strand, using a case-control analysis"""
        # Compute stats on the observed ipds
        caseData = caseObservations['data']['ipd']
        controlData = controlObservations['data']['ipd']

        res = dict()
        res['refId'] = self.refId

        # FASTA header name
        res['refName'] = self.refName

        strand = res['strand'] = 1 - caseObservations['strand']
        tpl = res['tpl'] = caseObservations['tpl']
        res['base'] = self.cognateBaseFunc(tpl, strand)

        res['coverage'] = int(round((caseData.size + controlData.size) / 2.0))  # need a coverage annotation

        res['caseCoverage'] = caseData.size
        res['controlCoverage'] = controlData.size

        res['caseMean'] = caseData.mean().item()
        res['caseMedian'] = np.median(caseData).item()
        res['caseStd'] = np.std(caseData).item()

        res['controlMean'] = controlData.mean().item()
        res['controlMedian'] = np.median(controlData).item()
        res['controlStd'] = np.std(controlData).item()

        trim = (0.001, 0.03)
        ctrlMean = mstats.trimmed_mean(controlData, trim).item()
        if abs(ctrlMean) > 1e-3:
            res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean)
        else:
            res['ipdRatio'] = 1.0

        testResults = testProcedure(caseData, controlData)
        res['testStatistic'] = testResults['testStatistic']
        res['pvalue'] = testResults['pvalue']

        pvalue = max(sys.float_info.min, res['pvalue'])
        res['score'] = round(-10.0 * math.log10(pvalue))

        # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case.
        if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag:
            if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov:

                # Instantiate mixture estimation methods:
                mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov)
                x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData)

                res[FRAC] = x[0]
                res[FRAClow] = x[1]
                res[FRACup] = x[2]
            else:
                res[FRAC] = np.nan
                res[FRACup] = np.nan
                res[FRAClow] = np.nan

        return res
Example #13
0
def allclose_with_out(x, y, atol=0.0, rtol=1.0e-5):
    # run the np.allclose on x and y
    # if it fails print some stats
    # before returning
    ac = np.allclose(x, y, rtol=rtol, atol=atol)
    if not ac:
        dd = np.abs(x - y)
        neon_logger.display('abs errors: %e [%e, %e] Abs Thresh = %e'
                            % (np.median(dd), np.min(dd), np.max(dd), atol))
        amax = np.argmax(dd)

        if np.isscalar(x):
            neon_logger.display('worst case: %e %e' % (x, y.flat[amax]))
        elif np.isscalar(y):
            neon_logger.display('worst case: %e %e' % (x.flat[amax], y))
        else:
            neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax]))

        dd = np.abs(dd - atol) / np.abs(y)
        neon_logger.display('rel errors: %e [%e, %e] Rel Thresh = %e'
                            % (np.median(dd), np.min(dd), np.max(dd), rtol))
        amax = np.argmax(dd)
        if np.isscalar(x):
            neon_logger.display('worst case: %e %e' % (x, y.flat[amax]))
        elif np.isscalar(y):
            neon_logger.display('worst case: %e %e' % (x.flat[amax], y))
        else:
            neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax]))
    return ac
	def medianVolume(self):
		volpath = os.path.join(self.params['rundir'], "volumes/*a.mrc")
		mrcfiles = glob.glob(volpath)
		volumes = []
		for filename in mrcfiles:
			if os.path.isfile(filename):
				vol = mrc.read(filename)
				print filename, vol.shape
				volumes.append(vol)
		volarray = numpy.asarray(volumes, dtype=numpy.float32)
		try:
			medarray = numpy.median(volarray, axis=0)
		except:
			medarray = numpy.median(volarray)
		medfile = os.path.join(self.params['rundir'], "volumes/medianVolume.mrc")
		print medfile, medarray.shape
		mrc.write(medarray, medfile)

		apix = apStack.getStackPixelSizeFromStackId(self.params['stackid'])
		sessiondata = apStack.getSessionDataFromStackId(self.params['stackid'])

		uploadcmd = ( ("uploadModel.py --projectid=%d --session=%s --file=%s "
				+"--apix=%.3f --sym=%s --name=satmedian-recon%d.mrc --res=30 --description='%s %d'")
			%(self.params['projectid'], sessiondata['name'], medfile, 
				apix, self.params['symmname'], self.params['reconid'],
				"SAT selected median volume for recon", self.params['reconid'], ) )
		apDisplay.printColor(uploadcmd, "purple")
		f = open("upload.sh", "w")
		f.write(uploadcmd+"\n")
		f.close()
Example #15
0
 def start_requests(self):
     summary_utc = datetime.utcnow() - timedelta(days=1)
     db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI'))
     db_session = sessionmaker(bind=db_engine)()
     db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'),
                                 LiveTVRoom.url.label('room_url'),
                                 LiveTVRoomPresent.crawl_date_format.label('summary_date'),
                                 func.array_agg(LiveTVRoomPresent.online).label('online_list'))\
         .join(LiveTVSite, LiveTVRoom, LiveTVRoomPresent)\
         .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT))\
         .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format)
     for group_row in db_query:
         meta_info = {
             'site_id': group_row.site_id,
             'room_id': group_row.room_id,
             'summary_date': group_row.summary_date,
             'online': numpy.median(group_row.online_list)
         }
         room = self.session.query(LiveTVRoom).filter_by(id=meta_info['room_id']).one_or_none()
         if room:
             yield DailyItem(site_id=group_row.site_id, room_id=group_row.room_id,
                             summary_date=group_row.summary_date, online=numpy.median(group_row.online_list),
                             followers=room.followers, description=room.description, announcement=room.announcement,
                             fallback=False)
     db_session.close()
Example #16
0
def explore_city_data(city_data):
    """Calculate the Boston housing statistics."""

    # Get the labels and features from the housing data
    housing_prices = city_data.target
    housing_features = city_data.data

    ###################################
    ### Step 1. YOUR CODE GOES HERE ###
    ###################################

    # Please calculate the following values using the Numpy library
    print "Size of data (number of houses)"
    print np.size(housing_prices)
    print "Number of features"
    print np.size(housing_features, 1)
    print "Minimum price"
    print np.min(housing_prices)
    print "Maximum price"
    print np.max(housing_prices)
    print "Calculate mean price"
    print np.mean(housing_prices)
    print "Calculate median price"
    print np.median(housing_prices)
    print "Calculate standard deviation"
    print np.std(housing_prices)
Example #17
0
def make_lick_individual(targetSN, w1, w2):
    """ Make maps for the kinematics. """
    filename = "lick_corr_sn{0}.tsv".format(targetSN)
    binimg = pf.getdata("voronoi_sn{0}_w{1}_{2}.fits".format(targetSN, w1, w2))
    intens = "collapsed_w{0}_{1}.fits".format(w1, w2)
    extent = calc_extent(intens)
    bins = np.loadtxt(filename, usecols=(0,), dtype=str).tolist()
    bins = np.array([x.split("bin")[1] for x in bins]).astype(int)
    data = np.loadtxt(filename, usecols=np.arange(25)+1).T
    labels = [r'Hd$_A$', r'Hd$_F$', r'CN$_1$', r'CN$_2$', r'Ca4227', r'G4300',
             r'Hg$_A$', r'Hg$_F$', r'Fe4383', r'Ca4455', r'Fe4531', r'C4668',
             r'H$_\beta$', r'Fe5015', r'Mg$_1$', r'Mg$_2$', r'Mg$_b$', r'Fe5270',
             r'Fe5335', r'Fe5406', r'Fe5709', r'Fe5782', r'Na$_D$', r'TiO$_1$',
             r'TiO$_2$']
    mag = "[mag]"
    ang = "[\AA]"
    units = [ang, ang, mag, mag, ang, ang,
             ang, ang, ang, ang, ang, ang,
             ang, ang, mag, mag, ang, ang,
             ang, ang, ang, ang, ang, mag,
             mag]
    lims = [[None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None],
            [None, None], [None, None], [None, None], [None, None]]
    pdf = PdfPages("figs/lick_sn{0}.pdf".format(targetSN))
    fig = plt.figure(1, figsize=(6.25,5))
    plt.subplots_adjust(bottom=0.12, right=0.97, left=0.09, top=0.96)
    plt.minorticks_on()
    ax = plt.subplot(111)
    ax.minorticks_on()
    plot_indices = np.arange(12,22)
    for i, vector in enumerate(data):
        if i not in plot_indices:
            continue
        print "Making plot for {0}...".format(labels[i])
        kmap = np.zeros_like(binimg)
        kmap[:] = np.nan
        for bin,v in zip(bins, vector):
            idx = np.where(binimg == bin)
            kmap[idx] = v
        vmin = lims[i][0] if lims[i][0] else np.median(vector) - 2 * vector.std()
        vmax = lims[i][1] if lims[i][1] else np.median(vector) + 2 * vector.std()
        m = plt.imshow(kmap, cmap="inferno", origin="bottom", vmin=vmin,
                   vmax=vmax, extent=extent, aspect="equal")
        make_contours()
        plt.minorticks_on()
        plt.xlabel("X [kpc]")
        plt.ylabel("Y [kpc]")
        plt.xlim(extent[0], extent[1])
        plt.ylim(extent[2], extent[3])
        cbar = plt.colorbar(m)
        cbar.set_label("{0} {1}".format(labels[i], units[i]))
        pdf.savefig()
        plt.clf()
    pdf.close()
    return
Example #18
0
def plotB2reg(prefix=''):
    w=loadStanFit(prefix+'revE2B2LHregCa.fit')
    px=np.array(np.linspace(-0.5,0.5,101),ndmin=2)
    a1=np.array(w['ma'][:,4],ndmin=2).T+1
    a0=np.array(w['ma'][:,3],ndmin=2).T
    printCI(w,'ma')
    y=np.concatenate([sap(a0+a1*px,97.5,axis=0),sap(a0+a1*px[:,::-1],2.5,axis=0)])
    x=np.squeeze(np.concatenate([px,px[:,::-1]],axis=1))
    man=np.array([-0.4,-0.2,0,0.2,0.4])
    plt.plot(px[0,:],np.median(a0)+np.median(a1)*px[0,:],'red')
    #plt.plot([-1,1],[0.5,0.5],'grey')
    ax=plt.gca()
    ax.set_aspect(1)
    ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w'))
    y=np.concatenate([sap(a0+a1*px,75,axis=0),sap(a0+a1*px[:,::-1],25,axis=0)])
    ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w'))
    mus=[]
    for m in range(len(man)):
        mus.append(loadStanFit(prefix+'revE2B2LHC%d.fit'%m)['ma4']+man[m])
    mus=np.array(mus).T
    errorbar(mus,x=man)
    ax.set_xticks(man)
    plt.xlim([-0.5,0.5])
    plt.ylim([-0.6,0.8])
    plt.xlabel('Pivot Displacement')
    plt.ylabel('Perceived Displacemet')
Example #19
0
def plotB3reg():
    w=loadStanFit('revE2B3BHreg.fit')
    printCI(w,'mmu')
    printCI(w,'mr')
    for b in range(2):
        subplot(1,2,b+1)
        plt.title('')
        px=np.array(np.linspace(-0.5,0.5,101),ndmin=2)
        a0=np.array(w['mmu'][:,b],ndmin=2).T
        a1=np.array(w['mr'][:,b],ndmin=2).T
        y=np.concatenate([sap(a0+a1*px,97.5,axis=0),sap(a0+a1*px[:,::-1],2.5,axis=0)])
        x=np.squeeze(np.concatenate([px,px[:,::-1]],axis=1))
        plt.plot(px[0,:],np.median(a0)+np.median(a1)*px[0,:],'red')
        #plt.plot([-1,1],[0.5,0.5],'grey')
        ax=plt.gca()
        ax.set_aspect(1)
        ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w'))
        y=np.concatenate([sap(a0+a1*px,75,axis=0),sap(a0+a1*px[:,::-1],25,axis=0)])
        ax.add_patch(plt.Polygon(np.array([x,y]).T,alpha=0.2,fill=True,fc='red',ec='w'))
        man=np.array([-0.4,-0.2,0,0.2,0.4])
        mus=[]
        for m in range(len(man)):
            mus.append(loadStanFit('revE2B3BH%d.fit'%m)['mmu'][:,b])
        mus=np.array(mus).T
        errorbar(mus,x=man)
        ax.set_xticks(man)
        plt.xlim([-0.5,0.5])
        plt.ylim([-0.4,0.8])
        #plt.xlabel('Manipulated Displacement')
        if b==0:
            plt.ylabel('Perceived Displacemet')
            plt.gca().set_yticklabels([])
        subplot_annotate()
    plt.text(-1.1,-0.6,'Pivot Displacement',fontsize=8);
Example #20
0
def columnpull(column, index, bg, stdev):
    """Define a column pull detector artifact.

    Parameters
    ----------
    column : array
      The column from a detector.
    index : int
      The index at which the column pull may have started, e.g., the
      location of a bright star.
    bg : float
      The background level of the image.
    stdev : float
      The background standard deviation.

    Returns
    -------
    pull : ndarray
      The shape of the column pull.

    """

    if (index < 0) or (index >= column.shape[0]):
        return

    m1 = np.median(column[:index]) - bg
    m2 = np.median(column[index:]) - bg

    pull = np.zeros_like(column)
    if (np.abs(m1 - m2) / stdev) > 1.0:
        pull[:index] = m1
        pull[index:] = m2

    return pull
 def run(self, spikesorter, sign = '-', relative_thresh = 4., 
                     noise_estimation = 'MAD',  threshold_mode = 'crossing',peak_span =  0.3*pq.ms,
                     consistent_across_channels = False,
                     consistent_across_segments = True,
                     ):
     sps = spikesorter
     
     # Threshold estimation
     centers = np.zeros(sps.filtered_sigs.shape, dtype = float)
     noises = np.zeros(sps.filtered_sigs.shape, dtype = float)
     
     for c, s in np.ndindex(sps.filtered_sigs.shape):
         sig = sps.filtered_sigs[c, s]
         if noise_estimation=='MAD':
             centers[c, s] = np.median(sig)
             noises[c, s] = np.median(np.abs(sig-np.median(sig))) / .6745
         elif noise_estimation=='STD':
             centers[c, s] = np.mean(sig)
             noises[c, s] = np.std(sig)
             
     if sign == '+':
         thresholds = centers + noises*abs(relative_thresh) 
     if sign == '-':
         thresholds = centers - noises*abs(relative_thresh) 
     
     
     
     peak_span = int((sps.sig_sampling_rate*peak_span).simplified)
     peak_span = (peak_span//2)*2+1
     # Detect
     sps.spike_index_array = threshold_detection_multi_channel_multi_segment(
                             sps.filtered_sigs, thresholds, sign, 
                             consistent_across_channels,consistent_across_segments,
                             threshold_mode, peak_span)
     sps.detection_thresholds = thresholds
Example #22
0
    def test_compare_cache_benchmark(self, varying_param, analytics_data, plt):
        stats = pytest.importorskip('scipy.stats')

        d1, d2 = analytics_data
        assert np.all(d1[varying_param] == d2[varying_param]), (
            'Cannot compare different parametrizations')
        axis_label = self.param_to_axis_label[varying_param]

        print("Cache, varying {0}:".format(axis_label))
        for label, key in zip(self.labels, self.keys):
            clean_d1 = [self.reject_outliers(d) for d in d1[key]]
            clean_d2 = [self.reject_outliers(d) for d in d2[key]]
            diff = [np.median(b) - np.median(a)
                    for a, b in zip(clean_d1, clean_d2)]

            p_values = np.array([2. * stats.mannwhitneyu(a, b)[1]
                                 for a, b in zip(clean_d1, clean_d2)])
            overall_p = 1. - np.prod(1. - p_values)
            if overall_p < .05:
                print("  {label}: Significant change (p <= {p:.3f}). See plots"
                      " for details.".format(
                          label=label, p=np.ceil(overall_p * 1000.) / 1000.))
            else:
                print("  {label}: No significant change.".format(label=label))

            plt.plot(d1[varying_param], diff, label=label)

        plt.xlabel("Number of %s" % axis_label)
        plt.ylabel("Difference in build time (s)")
        plt.legend(loc='best')
Example #23
0
def q1():
    # generate random clusters
    clusters = []
    sizes = range(2, 201)
    for size in sizes:
        clusters.append(gen_random_clusters(size))
    
    # get running times
    random.seed(912)
    
    # run 10 trials, and take the median time for each n to smooth data
    slow_trials = np.zeros((10, 199))
    fast_trials = np.zeros((10, 199))
    for i in range(10):
        slow_trials[i,:] = timer(slow_closest_pair, clusters)
        fast_trials[i,:] = timer(fast_closest_pair, clusters)
       
    # times
    slow_times = np.median(slow_trials, 0)
    fast_times = np.median(fast_trials, 0)
    
    # plot
    plt.figure()
    plt.plot(sizes, slow_times, 'c-', label='slow_closest_pair')
    plt.plot(sizes, fast_times, 'm-', label='fast_closest_pair')
    plt.legend(loc='upper left')
    plt.xlabel('Size of Cluster List')
    plt.ylabel('Median Running Time (s), 10 Trials')
    plt.title('Comparison of Running Times on Desktop Python')
    plt.show()
    
    return None
Example #24
0
def lonlat2xy(lon,lat,lon_0=None,lat_0=None):
    """ Convert pairs of (Lat,Lon) into (x,y)

        Input:
                      Lon [deg]
          Lat [deg]
          Lon_0 [deg] => Lon of the origin of the cartesian system
          Lat_0 [deg] => Lat of the origin of the cartesian system
        Output:
                      x [m]
          y [m]

        The projection is deformed as get away from the center. Since the
          Latitudes don't deform, the y is estimated first, then for each
          point is estimated the distante to the meridian of reference
          (Lon_0) considering the Latitude of the measurement.
    """
    if (lat_0==None) or (lon_0==None):
        lat_0=numpy.median(lat)
        lon_0=numpy.median(lon)
    from fluid.common.distance import distance
    y=distance(lat,0,lat_0,0)
    y[lat<lat_0]=-1*y[lat<lat_0]
    x=distance(lat,lon,lat,lon_0)
    x[lon<lon_0]=-1*x[lon<lon_0]
    return x,y
def is_outlier(points, threshold=3.5):
    """
    Returns a boolean array with True if points are outliers and False 
    otherwise.
    
    Data points with a modified z-score greater than this 
    # value will be classified as outliers.
    """
    # transform into vector
    if len(points.shape) == 1:
        points = points[:,None]

    # compute median value    
    median = np.median(points, axis=0)
    
    # compute diff sums along the axis
    diff = np.sum((points - median)**2, axis=-1)
    diff = np.sqrt(diff)
    # compute MAD
    med_abs_deviation = np.median(diff)
    
    # compute modified Z-score
    # http://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm#Iglewicz
    modified_z_score = 0.6745 * diff / med_abs_deviation

    # return a mask for each outlier
    return modified_z_score > threshold
Example #26
0
 def denoise(self, data, wavelet):
     noiseSigma = median(absolute(data - median(data))) / 0.6745
     levels = int(floor(log(len(data))))
     WC = pywt.wavedec(data, wavelet, level=levels)
     threshold = noiseSigma * sqrt(2 * log(len(data)))
     NWC = map(lambda x: pywt.thresholding.hard(x, threshold), WC)
     return pywt.waverec(NWC, wavelet)
Example #27
0
def _idealize_uncert(dds):
    for action in dds.actions:
        field = action.diffeo.d
        field_inv = action.diffeo_inv.d
    
        I = np.zeros(field.shape)
        Y, X = np.meshgrid(range(field.shape[1]), range(field.shape[0]))
        I[:, :, 0] = X
        I[:, :, 1] = Y
        
        D = field - I
        v = (np.median(D[:, :, 0]), np.median(D[:, :, 1]))
        
        D_inv = field_inv - I
        v_inv = (np.median(D_inv[:, :, 0]), np.median(D_inv[:, :, 1]))
        
        print('v     = ' + str(v))
        print('v_inv = ' + str(v_inv))
        
        for c in itertools.product(range(X.shape[0]), range(X.shape[1])):
            
            if defined_cell(c, X.shape, v):
                action.diffeo.variance[c] = 1.0
            else:
                action.diffeo.variance[c] = 0.0
                
            if defined_cell(c, X.shape, v_inv):
                action.diffeo_inv.variance[c] = 1.0
            else:
                action.diffeo_inv.variance[c] = 0.0
    return dds
Example #28
0
 def remaining_time(self):
     """Return our best estimate of the remaining duration, or None
     if we have no bases for guessing."""
     if self.end_times is None:
         return None # We have not started the first module yet
     else:
         module_index = self.current_module.module_num - 1
         index = self.image_set_index * self.num_modules + module_index
         durations = (self.end_times[1:] - self.end_times[:-1]).reshape(self.num_image_sets, self.num_modules)
         per_module_estimates = np.zeros(self.num_modules)
         per_module_estimates[:module_index] = np.median(durations[:self.image_set_index+1,:module_index], 0)
         current_module_so_far = self.adjusted_time() - self.end_times[1 + index - 1]
         if self.image_set_index > 0:
             per_module_estimates[module_index:] = np.median(durations[:self.image_set_index,module_index:], 0)
             per_module_estimates[module_index] = max(per_module_estimates[module_index], current_module_so_far)
         else:
             # Guess that the modules that haven't finished yet are
             # as slow as the slowest one we've seen so far.
             per_module_estimates[module_index] = current_module_so_far
             per_module_estimates[module_index:] = per_module_estimates[:module_index+1].max()
         if False:
             print "current_module_so_far =", current_module_so_far, "; adjusted_time =", self.adjusted_time(), "; end_times =", self.end_times
             print "durations:"
             print durations
             print "per_module_estimates:"
             print per_module_estimates
         per_module_estimates[:module_index] *= self.num_image_sets - self.image_set_index - 1
         per_module_estimates[module_index:] *= self.num_image_sets - self.image_set_index
         per_module_estimates[module_index] -= current_module_so_far
         return per_module_estimates.sum()
Example #29
0
    def __init__(self, f, label, color="k", linestyle="-"):
        d = np.load(f)
        self.data = d
        self.mass = d["mass"]
        self.ul_med = []
        self.ul68_lo = []
        self.ul68_hi = []
        self.ul95_lo = []
        self.ul95_hi = []
        self.label = label
        self.color = color
        self.linestyle = linestyle

        for i in range(len(d["mass"])):

            ul = np.sort(d["ul"][:, i])
            ul = ul[ul > 0]

            n = len(ul)

            m = np.median(ul)

            self.ul68_lo.append(ul[max(0, n / 2.0 - n * 0.34)])
            self.ul68_hi.append(ul[min(n - 1, n / 2.0 + n * 0.34)])
            self.ul95_lo.append(ul[max(0, n / 2.0 - n * 0.95 / 2.0)])
            self.ul95_hi.append(ul[min(n - 1, n / 2.0 + n * 0.95 / 2.0)])
            self.ul_med.append(np.median(ul))
Example #30
0
    def createModel(self,b,g,r):
        bMinusr = self.bMinusr
        bMinusg = self.bMinusg
        b0 = b.copy()
        g0 = g.copy()
        r0 = r.copy()
        
        w = r.shape[0]/2-5
        rb = r0/b0
        gb = g0/b0
        rnorm = numpy.median(rb[w:-w,w:-w])
        gnorm = numpy.median(gb[w:-w,w:-w])
        r0 /= rnorm
        g0 /= gnorm
        r0 *= 10**(0.4*bMinusr)
        g0 *= 10**(0.4*bMinusg)

        r0 /= 620.
        g0 /= 540.
        b0 /= 460.

        I = (r0+g0+b0)/3.
        self.I = I
        self.rnorm = rnorm
        self.gnorm = gnorm
        return self.colorize(b,g,r)
    print('Finished runInfo- which assesses the refresh and processes of this computer')
    #check screen refresh is what assuming it is ##############################################
    Hzs = list()
    myWin.flip()
    myWin.flip()
    myWin.flip()
    myWin.flip()
    myWin.setRecordFrameIntervals(True)  # otherwise myWin.fps won't work
    print('About to measure frame flips')
    for i in range(50):
        myWin.flip()
        Hzs.append(myWin.fps())  # varies wildly on successive runs!
    myWin.setRecordFrameIntervals(False)
    # end testing of screen refresh########################################################
    Hzs = np.array(Hzs)
    Hz = np.median(Hzs)
    msPerFrame = 1000./Hz
    refreshMsg1 = 'Frames per second ~=' + str(np.round(Hz, 1))
    refreshRateTolerancePct = 3
    pctOff = abs((np.median(Hzs)-refreshRate) / refreshRate)
    refreshRateWrong = pctOff > (refreshRateTolerancePct/100.)
    if refreshRateWrong:
        refreshMsg1 += ' BUT'
        refreshMsg1 += ' program assumes ' + str(refreshRate)
        refreshMsg2 = 'which is off by more than' + str(round(refreshRateTolerancePct, 0)) + '%!!'
    else:
        refreshMsg1 += ', which is close enough to desired val of ' + str(round(refreshRate, 1))
    myWinRes = myWin.size
    myWin.allowGUI = True
    print(myWinRes)
myWin.close()  # have to close window to show dialog box
Example #32
0
def main():
  # Command line arguments
  parser = argparse.ArgumentParser('Label an image using the cat model')
  parser.add_argument(
      '-s',
      '--server',
      help='URL of host serving the cat model'
  )
  parser.add_argument(
      '-p',
      '--port',
      type=int,
      default=9000,
      help='Port at which cat model is being served'
  )
  parser.add_argument(
      '-m',
      '--model',
      type=str,
      default='resnet',
      help='Paths (local or url) to images you would like to label'
  )
  parser.add_argument(
      '-d',
      '--dim',
      type=int,
      default=224,
      help='Size of (square) image, an integer indicating its width and '
           'height. Resnet\'s default is 224'
  )
  parser.add_argument(
      '-r',
      '--replications',
      type=int,
      default=1,
      help='How many times to replicate samples to send a larger batch size'
  )
  parser.add_argument(
      'images',
      type=str,
      nargs='+',
      help='Paths (local, GCS, or url) to images you would like to label'
  )
  parser.add_argument(
      '-n',
      '--num_trials',
      type=int,
      default='.txt',
      help='File used to log batch serving request delays. Will create file'
           'if it does not exist. Otherwise, it will append to the file.'
  )
  args = parser.parse_args()

  # Preprocess images at the client and compress as jpeg
  img_size = args.dim
  images = args.images

  jpeg_batch = preprocess_and_encode_images(images, img_size)

  # Create r copies of the array for profiling.
  batch_array = []
  for i in range(0, args.replications):
    batch_array = np.append(batch_array, jpeg_batch, axis=0)
  batch_size = len(batch_array)
  print("Batch size: " + str(batch_size))

  # Call the server num_trials times
  elapsed_times = []
  for t in range(0, args.num_trials):
    # Call the server to predict top 5 classes and probabilities, and time taken
    result, elapsed = predict_and_profile(
        args.server, args.port, args.model, batch_array)
    # Print and log the delay
    print('Request delay: ' + str(elapsed) + ' ms')
    elapsed_times.append(elapsed)

  print('Mean: %0.2f' % np.mean(elapsed_times))
  print('Median: %0.2f' % np.median(elapsed_times))
  print('Min: %0.2f' % np.min(elapsed_times))
  print('Max: %0.2f' % np.max(elapsed_times))
Example #33
0
    def data_statistics(self, Ephem):
        '''
        Make statistics on the data.
        Useful to summarize night conditions.
        '''
        def select_bests(values, number):
            return (np.sort(values)[::-1][0:number])

        def fourier_filter(array, nterms):
            '''
            Make a fourier filter for the first nterms terms.
            '''
            array_fft = np.fft.fft(array)
            # Filter data
            array_fft[nterms:] = 0
            filtered_array = np.fft.ifft(array_fft)
            return (filtered_array)

        def window_smooth(x, window_len=10, window='hanning'):
            # http://scipy-cookbook.readthedocs.io/items/SignalSmooth.html
            x = np.asarray(x)
            if x.ndim != 1: raise ValueError, "smooth requires 1-d arrays"
            if x.size < window_len:
                raise ValueError, "size(input) < window_size"
            if window_len < 3: return x
            if not window in [
                    'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            ]:
                raise ValueError, \
                    "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
            s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
            if window == 'flat':  # moving average
                w = np.ones(window_len, 'd')
            else:
                w = eval('np.' + window + '(window_len)')
            y = np.convolve(w / w.sum(), s, mode='valid')
            return (y)

        astronomical_night_filter = ( \
                    (np.array(self.all_night_dt) > Ephem.twilight_prev_set) * \
                    (np.array(self.all_night_dt) < Ephem.twilight_next_rise))

        if np.sum(astronomical_night_filter) > 10:
            self.astronomical_night_sb = \
                np.array(self.all_night_sb)[astronomical_night_filter]
            self.astronomical_night_temp = \
                np.array(self.all_night_temp)[astronomical_night_filter]
        else:
            print( \
                        'Warning, < 10 points in astronomical night, ' + \
                        ' using the whole night data instead')
            self.astronomical_night_sb = self.all_night_sb
            self.astronomical_night_temp = self.all_night_temp

        Stat = self.Statistics
        # with self.Statistics as Stat:
        # Complete list
        Stat.mean = np.mean(self.astronomical_night_sb)
        Stat.median = np.median(self.astronomical_night_sb)
        Stat.std = np.median(self.astronomical_night_sb)
        Stat.number = np.size(self.astronomical_night_sb)
        # Only the best 1/100th.
        Stat.bests_number = 1 + Stat.number / 25
        Stat.bests_mean = np.mean(
            select_bests(self.astronomical_night_sb, Stat.bests_number))
        Stat.bests_median = np.median(
            select_bests(self.astronomical_night_sb, Stat.bests_number))
        Stat.bests_std = np.std(
            select_bests(self.astronomical_night_sb, Stat.bests_number))
        Stat.bests_err = Stat.bests_std * 1. / np.sqrt(Stat.bests_number)

        Stat.model_nterm = 1 + Stat.number / 25
        # data_smooth = fourier_filter(self.astronomical_night_sb,nterms=Stat.model_nterm)
        data_smooth = window_smooth(self.astronomical_night_sb,
                                    window_len=Stat.model_nterm)
        min_length = min(len(data_smooth), len(self.astronomical_night_sb))
        data_residuals = self.astronomical_night_sb[:
                                                    min_length] - data_smooth[:
                                                                              min_length]
        Stat.data_model_abs_meandiff = np.mean(np.abs(data_residuals))
        Stat.data_model_sum_squareresiduals = np.sum(data_residuals**2)

        # Other interesting data
        Stat.min_temperature = np.min(self.astronomical_night_temp)
        Stat.max_temperature = np.max(self.astronomical_night_temp)
Example #34
0
    all_regions = np.unique(current_latency_dataframe['region'])
    for region in all_regions:
        if len(selected_frame) == 0:
            region_units = current_latency_dataframe[(
                current_latency_dataframe['region'] == region)]
        else:
            region_units = current_latency_dataframe[
                (current_latency_dataframe['region'] == region)
                & (current_latency_dataframe['frame'] == selected_frame)]

        all_latencies = region_units[current_version].values.astype(float)
        all_latencies = all_latencies[~np.isnan(all_latencies)]
        if len(all_latencies) == 0:
            all_latencies = np.zeros(1)
        latencies_across_region.append(all_latencies)
        mean_per_region.append(np.median(all_latencies))
        all_region_names.append(region)

    if display_plot:
        regional_medians = []
        for region_e in regions_in_all_exps:
            if region_e in all_region_names:
                regional_medians.append(
                    mean_per_region[all_region_names.index(region_e)])
            else:
                regional_medians.append(0)
        ax.set_ylim([0, 200])
        x_axis_vals = range(1, len(regions_in_all_exps) + 1)
        ax.set_xticks(x_axis_vals)
        ax.set_xticklabels(regions_in_all_exps)
        ax.plot(x_axis_vals, regional_medians, marker='o')
Example #35
0
import random
import numpy as np
import matplotlib.pyplot as plt
num_tries = 1000
succ_chance = 0.4
i = 0
avg_tries = []
while (i < num_tries):
    tries = 0
    while (True):
        roll = random.random()
        tries += 1
        if roll > (1 - succ_chance):
            break
    avg_tries.append(tries)
    i += 1

print("empirical: ", np.median(avg_tries))
print("logarithm: ", np.log(0.5) / np.log(1 - succ_chance))
print(" division: ", 1 / succ_chance)

plt.hist(avg_tries, bins=20)
plt.show()
def main():
    args = get_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    log_dir = os.path.expanduser(args.log_dir)
    eval_log_dir = log_dir + "_eval"
    utils.cleanup_log_dir(log_dir)
    utils.cleanup_log_dir(eval_log_dir)

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    name = "compiled_dataset_08131950" #add 50 back in
    embed_dim = 300 # switch this later!!
    embed_size = embed_dim

    with open('data/'+name+'_all_instructions', 'rb') as f:
        all_instructions = pickle.load(f)

    vocab, vocab_weights = build_vocabulary(all_instructions, name, embed_dim)

    vocab.add_word('<pad>')
    vocab.add_word('<start>')
    vocab.add_word('<end>')
    vocab.add_word('<unk>')

    envs = make_vec_envs(args.env_name, args.seed, args.num_processes,
                         args.gamma, args.log_dir, device, False, vocabulary=vocab)

    #actor_critic = Policy(
    #    envs.observation_space.shape,
    #    envs.action_space,
    #    base_kwargs={'recurrent': args.recurrent_policy})
    
    actor_critic, ob_rms = torch.load(args.load_dir + ".pt")
    actor_critic.to(device)

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(
            actor_critic,
            args.value_loss_coef,
            args.entropy_coef,
            lr=args.lr,
            eps=args.eps,
            alpha=args.alpha,
            max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(
            actor_critic,
            args.clip_param,
            args.ppo_epoch,
            args.num_mini_batch,
            args.value_loss_coef,
            args.entropy_coef,
            lr=args.lr,
            eps=args.eps,
            max_grad_norm=args.max_grad_norm)
    elif args.algo == 'acktr':
        agent = algo.A2C_ACKTR(
            actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True)

    if args.gail:
        assert len(envs.observation_space.shape) == 1
        discr = gail.Discriminator(
            envs.observation_space.shape[0] + envs.action_space.shape[0], 100,
            device)
        file_name = os.path.join(
            args.gail_experts_dir, "trajs_{}.pt".format(
                args.env_name.split('-')[0].lower()))

        gail_train_loader = torch.utils.data.DataLoader(
            gail.ExpertDataset(
                file_name, num_trajectories=4, subsample_frequency=20),
            batch_size=args.gail_batch_size,
            shuffle=True,
            drop_last=True)

    #print(args.num_env_steps)
    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)

    start = time.time()
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes
    
    #print(num_updates)
    for j in range(num_updates):

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            utils.update_linear_schedule(
                agent.optimizer, j, num_updates,
                agent.optimizer.lr if args.algo == "acktr" else args.lr)

        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor(
                [[0.0] if done_ else [1.0] for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        if args.gail:
            if j >= 10:
                envs.venv.eval()

            gail_epoch = args.gail_epoch
            if j < 10:
                gail_epoch = 100  # Warm up
            for _ in range(gail_epoch):
                discr.update(gail_train_loader, rollouts,
                             utils.get_vec_normalize(envs)._obfilt)

            for step in range(args.num_steps):
                rollouts.rewards[step] = discr.predict_reward(
                    rollouts.obs[step], rollouts.actions[step], args.gamma,
                    rollouts.masks[step])

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        # save for every interval-th episode or for the last epoch
        if (j % args.save_interval == 0
                or j == num_updates - 1) and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            torch.save([
                actor_critic,
                getattr(utils.get_vec_normalize(envs), 'ob_rms', None)
            ], os.path.join(save_path, args.model_name + ".pt"))

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            end = time.time()
            print(
                "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        len(episode_rewards), np.mean(episode_rewards),
                        np.median(episode_rewards), np.min(episode_rewards),
                        np.max(episode_rewards), dist_entropy, value_loss,
                        action_loss))

        if (args.eval_interval is not None and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            ob_rms = utils.get_vec_normalize(envs).ob_rms
            evaluate(actor_critic, ob_rms, args.env_name, args.seed,
                     args.num_processes, eval_log_dir, device)
Example #37
0
def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
    """
    Bootstrap plot on mean, median and mid-range statistics.

    The bootstrap plot is used to estimate the uncertainty of a statistic
    by relaying on random sampling with replacement [1]_. This function will
    generate bootstrapping plots for mean, median and mid-range statistics
    for the given number of samples of the given size.

    .. [1] "Bootstrapping (statistics)" in \
    https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29

    Parameters
    ----------
    series : pandas.Series
        Pandas Series from where to get the samplings for the bootstrapping.
    fig : matplotlib.figure.Figure, default None
        If given, it will use the `fig` reference for plotting instead of
        creating a new one with default parameters.
    size : int, default 50
        Number of data points to consider during each sampling. It must be
        greater or equal than the length of the `series`.
    samples : int, default 500
        Number of times the bootstrap procedure is performed.
    **kwds :
        Options to pass to matplotlib plotting method.

    Returns
    -------
    fig : matplotlib.figure.Figure
        Matplotlib figure

    See Also
    --------
    pandas.DataFrame.plot : Basic plotting for DataFrame objects.
    pandas.Series.plot : Basic plotting for Series objects.

    Examples
    --------

    .. plot::
            :context: close-figs

            >>> s = pd.Series(np.random.uniform(size=100))
            >>> fig = pd.plotting.bootstrap_plot(s)  # doctest: +SKIP
    """
    import random
    import matplotlib.pyplot as plt

    # random.sample(ndarray, int) fails on python 3.3, sigh
    data = list(series.values)
    samplings = [random.sample(data, size) for _ in range(samples)]

    means = np.array([np.mean(sampling) for sampling in samplings])
    medians = np.array([np.median(sampling) for sampling in samplings])
    midranges = np.array([(min(sampling) + max(sampling)) * 0.5
                          for sampling in samplings])
    if fig is None:
        fig = plt.figure()
    x = lrange(samples)
    axes = []
    ax1 = fig.add_subplot(2, 3, 1)
    ax1.set_xlabel("Sample")
    axes.append(ax1)
    ax1.plot(x, means, **kwds)
    ax2 = fig.add_subplot(2, 3, 2)
    ax2.set_xlabel("Sample")
    axes.append(ax2)
    ax2.plot(x, medians, **kwds)
    ax3 = fig.add_subplot(2, 3, 3)
    ax3.set_xlabel("Sample")
    axes.append(ax3)
    ax3.plot(x, midranges, **kwds)
    ax4 = fig.add_subplot(2, 3, 4)
    ax4.set_xlabel("Mean")
    axes.append(ax4)
    ax4.hist(means, **kwds)
    ax5 = fig.add_subplot(2, 3, 5)
    ax5.set_xlabel("Median")
    axes.append(ax5)
    ax5.hist(medians, **kwds)
    ax6 = fig.add_subplot(2, 3, 6)
    ax6.set_xlabel("Midrange")
    axes.append(ax6)
    ax6.hist(midranges, **kwds)
    for axis in axes:
        plt.setp(axis.get_xticklabels(), fontsize=8)
        plt.setp(axis.get_yticklabels(), fontsize=8)
    return fig
            
            # NOTE: pseudocounts that come from the F4 dilution estimate, so we
            # only listen to the data if there is enough data points to listen to
            len_pseudo = 1
            n_pseudo = sample.get_n_templates_dilutions()
            n_allp = np.concatenate([n_all, ([n_pseudo] * len_pseudo)])

            if VERBOSE >= 2:
                print 'Number of doubly polymorphic sites:', nsites, 'n_pseudo:', n_pseudo

            # NOTE: the estimate of n has a bad distribution because some points are
            # exactly on the diagonal, so we average the inverse (which is well
            # behaved) and also take the medians as alternatives
            n = 1.0 / (1.0 / n_allp).mean()
            ninv = n_allp.mean()
            nmed = np.median(n_allp)
            if VERBOSE >= 2:
                print fr1, fr2, n, ninv, nmed

            key = (samplename, fr1, fr2)
            data['af'][key] = (af1[indfm], af2[indfm])
            data['mean'][key] = mea
            data['var'][key] = var
            data['n_all'][key] = n_all
            data['n'][key] = n
            data['ninv'][key] = ninv
            data['nmed'][key] = nmed
            data['nsites'][key] = nsites
            data['npseudo'][samplename] = n_pseudo

    if use_plot:
Example #39
0
    def plan_experiment(self):
        use_nonzero_mask_for_normalization = self.determine_whether_to_use_mask_for_norm()
        print("Are we using the nonzero mask for normalizaion?", use_nonzero_mask_for_normalization)
        spacings = self.dataset_properties['all_spacings']
        sizes = self.dataset_properties['all_sizes']

        all_classes = self.dataset_properties['all_classes']
        modalities = self.dataset_properties['modalities']
        num_modalities = len(list(modalities.keys()))

        target_spacing = self.get_target_spacing()
        new_shapes = [np.array(i) / target_spacing * np.array(j) for i, j in zip(spacings, sizes)]

        max_spacing_axis = np.argmax(target_spacing)
        remaining_axes = [i for i in list(range(3)) if i != max_spacing_axis]
        self.transpose_forward = [max_spacing_axis] + remaining_axes
        self.transpose_backward = [np.argwhere(np.array(self.transpose_forward) == i)[0][0] for i in range(3)]

        # we base our calculations on the median shape of the datasets
        median_shape = np.median(np.vstack(new_shapes), 0)
        print("the median shape of the dataset is ", median_shape)

        max_shape = np.max(np.vstack(new_shapes), 0)
        print("the max shape in the dataset is ", max_shape)
        min_shape = np.min(np.vstack(new_shapes), 0)
        print("the min shape in the dataset is ", min_shape)

        print("we don't want feature maps smaller than ", self.unet_featuremap_min_edge_length, " in the bottleneck")

        # how many stages will the image pyramid have?
        self.plans_per_stage = list()

        target_spacing_transposed = np.array(target_spacing)[self.transpose_forward]
        median_shape_transposed = np.array(median_shape)[self.transpose_forward]
        print("the transposed median shape of the dataset is ", median_shape_transposed)

        print("generating configuration for 3d_fullres")
        self.plans_per_stage.append(self.get_properties_for_stage(target_spacing_transposed, target_spacing_transposed,
                                                                  median_shape_transposed,
                                                                  len(self.list_of_cropped_npz_files),
                                                                  num_modalities, len(all_classes) + 1))

        # thanks Zakiyi (https://github.com/MIC-DKFZ/nnUNet/issues/61) for spotting this bug :-)
        # if np.prod(self.plans_per_stage[-1]['median_patient_size_in_voxels'], dtype=np.int64) / \
        #        architecture_input_voxels < HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0:
        architecture_input_voxels_here = np.prod(self.plans_per_stage[-1]['patch_size'], dtype=np.int64)
        if np.prod(median_shape) / architecture_input_voxels_here < \
                self.how_much_of_a_patient_must_the_network_see_at_stage0:
            more = False
        else:
            more = True

        if more:
            print("generating configuration for 3d_lowres")
            # if we are doing more than one stage then we want the lowest stage to have exactly
            # HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0 (this is 4 by default so the number of voxels in the
            # median shape of the lowest stage must be 4 times as much as the network can process at once (128x128x128 by
            # default). Problem is that we are downsampling higher resolution axes before we start downsampling the
            # out-of-plane axis. We could probably/maybe do this analytically but I am lazy, so here
            # we do it the dumb way

            lowres_stage_spacing = deepcopy(target_spacing)
            num_voxels = np.prod(median_shape, dtype=np.float64)
            while num_voxels > self.how_much_of_a_patient_must_the_network_see_at_stage0 * architecture_input_voxels_here:
                max_spacing = max(lowres_stage_spacing)
                if np.any((max_spacing / lowres_stage_spacing) > 2):
                    lowres_stage_spacing[(max_spacing / lowres_stage_spacing) > 2] \
                        *= 1.01
                else:
                    lowres_stage_spacing *= 1.01
                num_voxels = np.prod(target_spacing / lowres_stage_spacing * median_shape, dtype=np.float64)

                lowres_stage_spacing_transposed = np.array(lowres_stage_spacing)[self.transpose_forward]
                new = self.get_properties_for_stage(lowres_stage_spacing_transposed, target_spacing_transposed,
                                                    median_shape_transposed,
                                                    len(self.list_of_cropped_npz_files),
                                                    num_modalities, len(all_classes) + 1)
                architecture_input_voxels_here = np.prod(new['patch_size'], dtype=np.int64)
            if 2 * np.prod(new['median_patient_size_in_voxels'], dtype=np.int64) < np.prod(
                    self.plans_per_stage[0]['median_patient_size_in_voxels'], dtype=np.int64):
                self.plans_per_stage.append(new)

        self.plans_per_stage = self.plans_per_stage[::-1]
        self.plans_per_stage = {i: self.plans_per_stage[i] for i in range(len(self.plans_per_stage))}  # convert to dict

        print(self.plans_per_stage)
        print("transpose forward", self.transpose_forward)
        print("transpose backward", self.transpose_backward)

        normalization_schemes = self.determine_normalization_scheme()
        only_keep_largest_connected_component, min_size_per_class, min_region_size_per_class = None, None, None
        # removed training data based postprocessing. This is deprecated

        # these are independent of the stage
        plans = {'num_stages': len(list(self.plans_per_stage.keys())), 'num_modalities': num_modalities,
                 'modalities': modalities, 'normalization_schemes': normalization_schemes,
                 'dataset_properties': self.dataset_properties, 'list_of_npz_files': self.list_of_cropped_npz_files,
                 'original_spacings': spacings, 'original_sizes': sizes,
                 'preprocessed_data_folder': self.preprocessed_output_folder, 'num_classes': len(all_classes),
                 'all_classes': all_classes, 'base_num_features': self.unet_base_num_features,
                 'use_mask_for_norm': use_nonzero_mask_for_normalization,
                 'keep_only_largest_region': only_keep_largest_connected_component,
                 'min_region_size_per_class': min_region_size_per_class, 'min_size_per_class': min_size_per_class,
                 'transpose_forward': self.transpose_forward, 'transpose_backward': self.transpose_backward,
                 'data_identifier': self.data_identifier, 'plans_per_stage': self.plans_per_stage,
                 'preprocessor_name': self.preprocessor_name,
                 'conv_per_stage': self.conv_per_stage,
                 }

        self.plans = plans
        self.save_my_plans()
Example #40
0
                #label = ['Min','Mean','Median','Max','Std']
                #clust_data = [[np.min(x)],[np.mean(x)],[np.median(x)],np.max(x),[np.std(x)]]
                #the_table = ax.table(cellText=clust_data,rowLabels=label,loc='center')
                #ax.text(2, 10, r'$\cos(2 \pi t) \exp(-t)$', fontdict=font)
                #ax2.text(2, 10, r'$min$={min}'.format(min=np.min(x)))
                #ax2.text(0.1,0.6,'$min={:0.3f}$'.format(np.min(x)),fontsize=12)
                #ax2.text(0.1,0.5,'$mean={:0.3f}$'.format(np.mean(x)),fontsize=12)
                #ax2.text(0.1,0.4,'r$median={:0.3f}$'.format(np.median(x)),fontsize=12)
                #ax2.text(0.1,0.3,'$max={:0.3f}$'.format(np.max(x)),fontsize=12)
                #ax2.text(0.1,0.2,'$\sigma={:0.3f}$'.format(np.std(x)),fontsize=12)
                #table version
                row_labels = ['min', 'mean', 'median', 'max', 'std']

                celldata = [['{:0.3f}'.format(np.min(x))],
                            ['{:0.3f}'.format(np.mean(x))],
                            ['{:0.3f}'.format(np.median(x))],
                            ['{:0.3f}'.format(np.max(x))],
                            ['{:0.3f}'.format(np.std(x))]]
                ax2.table(cellText=celldata,
                          rowLabels=row_labels,
                          loc='center left',
                          fontsize=24,
                          colWidths=[0.4])
                #row_labels=['min','mean','median','max','$\sigma$']
                #table_vals=['${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x)),'${:0.3f}$'.format(np.min(x))]
                #table = r'''\begin{tabular}{ c | c | c | c } & col1 & col2 & col3 \\\hline row1 & 11 & 12 & 13 \\\hline row2 & 21 & 22 & 23 \\\hline  row3 & 31 & 32 & 33 \end{tabular}'''
                #plt.text(0.1,0.8,table,size=12)

            elif var_types[v] == 3 and var_types[w] != 3:
                #boxplot
                d = []
Example #41
0
def find_LFEs(filename, stations, tbegin, tend, outputfile, TDUR=10.0, filt=(1.5, 9.0), \
        freq0=1.0, nattempts=2, waittime=5.0, draw=False, \
        type_threshold='MAD', threshold=0.0075):
    """
    Find LFEs with the temporary stations from FAME
    using the templates from Plourde et al. (2015)

    Input:
        type filename = string
        filename = Name of the template
        type stations = list of strings
        stations = name of the stations used for the matched-filter algorithm
        type tebgin = tuplet of 6 integers
        tbegin = Time when we begin looking for LFEs
        type tend = tuplet of 6 integers
        tend = Time we stop looking for LFEs
        type TDUR = float
        TDUR = Time to add before and after the time window for tapering
        type filt = tuple of floats
        filt = Lower and upper frequencies of the filter
        type freq0 = float
        freq0 = Maximum frequency rate of LFE occurrence
        type nattempts = integer
        nattempts = Number of times we try to download data
        type waittime = positive float
        waittime = Type to wait between two attempts at downloading
        type draw = boolean
        draw = Do we draw a figure of the cross-correlation?
        type type_threshold = string
        type_threshold = 'MAD' or 'Threshold'
        type threshold = float
        threshold = Cross correlation value must be higher than that
    Output:
        None
    """

    # Get the network, channels, and location of the stations
    staloc = pd.read_csv(os.path.join(DATADIR, 'station_locations.txt'), \
        sep=r'\s{1,}', header=None, engine='python')
    staloc.columns = ['station', 'network', 'channels', 'location', \
        'server', 'latitude', 'longitude']

    # Create directory to store the LFEs times
    namedir = 'LFEs/' + filename
    if not os.path.exists(namedir):
        os.makedirs(namedir)

    # File to write error messages
    namedir = 'error'
    if not os.path.exists(namedir):
        os.makedirs(namedir)
    errorfile = 'error/' + filename + '.txt'

    # Read the templates
    templates = Stream()
    for station in stations:
        data = pickle.load(open(DATADIR + '/templates/' + filename + \
            '/' + station + '.pkl', 'rb'))
        if (len(data) == 3):
            EW = data[0]
            NS = data[1]
            UD = data[2]
            EW.stats.station = station
            NS.stats.station = station
            EW.stats.channel = 'E'
            NS.stats.channel = 'N'
            templates.append(EW)
            templates.append(NS)
        else:
            UD = data[0]
        UD.stats.station = station
        UD.stats.channel = 'Z'
        templates.append(UD)

    # Begin and end time of analysis
    t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
        day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
        second=tbegin[5])
    t2 = UTCDateTime(year=tend[0], month=tend[1], \
        day=tend[2], hour=tend[3], minute=tend[4], \
        second=tend[5])

    # Read the data
    data = []
    for station in stations:
        # Get station metadata for downloading
        for ir in range(0, len(staloc)):
            if (station == staloc['station'][ir]):
                network = staloc['network'][ir]
                channels = staloc['channels'][ir]
                location = staloc['location'][ir]
                server = staloc['server'][ir]

        # Duration of template
        template = templates.select(station=station, component='Z')[0]
        dt = template.stats.delta
        nt = template.stats.npts
        duration = (nt - 1) * dt
        Tstart = t1 - TDUR
        Tend = t2 + duration + TDUR
        delta = t2 + duration - t1
        ndata = int(delta / dt) + 1

        # Orientation of template
        # Date chosen: January 1st 2020
        mychannels = channels.split(',')
        mylocation = location
        if (mylocation == '--'):
            mylocation = ''
        response = DATADIR + '/response/' + network + '_' + station + '.xml'
        inventory = read_inventory(response, format='STATIONXML')
        reference = []
        for channel in mychannels:
            angle = inventory.get_orientation(network + '.' + \
                station + '.' + mylocation + '.' + channel, \
                UTCDateTime(2020, 1, 1, 0, 0, 0))
            reference.append(angle)

        # First case: we can get the data from IRIS
        if (server == 'IRIS'):
            (D, orientation) = get_from_IRIS(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile, DATADIR)
        # Second case: we get the data from NCEDC
        elif (server == 'NCEDC'):
            (D, orientation) = get_from_NCEDC(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile, DATADIR)
        else:
            raise ValueError('You can only download data from IRIS and NCEDC')

        # Append data to stream
        if (type(D) == obspy.core.stream.Stream):
            stationdata = fill_data(D, orientation, station, channels, \
                reference)
            if (len(stationdata) > 0):
                for stream in stationdata:
                    data.append(stream)

    # Number of hours of data to analyze
    nhour = int(ceil((t2 - t1) / 3600.0))

    # Create dataframe to store LFE times
    df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

    # Loop on hours of data
    for hour in range(0, nhour):
        nchannel = 0
        Tstart = t1 + hour * 3600.0
        Tend = t1 + (hour + 1) * 3600.0 + duration
        delta = Tend - Tstart
        ndata = int(delta / dt) + 1

        # Loop on channels
        for channel in range(0, len(data)):
            # Cut the data
            subdata = data[channel]
            subdata = subdata.slice(Tstart, Tend)
            # Check whether we have a complete one-hour-long recording
            if (len(subdata) == 1):
                if (len(subdata[0].data) == ndata):
                    # Get the template
                    station = subdata[0].stats.station
                    component = subdata[0].stats.channel
                    template = templates.select(station=station, \
                        component=component)[0]
                    # Cross correlation
                    cctemp = correlate.optimized(template, subdata[0])
                    if (nchannel > 0):
                        cc = np.vstack((cc, cctemp))
                    else:
                        cc = cctemp
                    nchannel = nchannel + 1

        if (nchannel > 0):

            # Compute average cross-correlation across channels
            meancc = np.mean(cc, axis=0)
            if (type_threshold == 'MAD'):
                MAD = np.median(np.abs(meancc - np.mean(meancc)))
                index = np.where(meancc >= threshold * MAD)
            elif (type_threshold == 'Threshold'):
                index = np.where(meancc >= threshold)
            else:
                raise ValueError('Type of threshold must be MAD or Threshold')
            times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

            # Get LFE times
            if np.shape(index)[1] > 0:
                (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                # Add LFE times to dataframe
                i0 = len(df.index)
                for i in range(0, len(time)):
                    timeLFE = Tstart + time[i]
                    df.loc[i0 + i] = [int(timeLFE.year), int(timeLFE.month), \
                        int(timeLFE.day), int(timeLFE.hour), \
                        int(timeLFE.minute), timeLFE.second + \
                        timeLFE.microsecond / 1000000.0, cc[i], nchannel]

            # Draw figure
            if (draw == True):
                params = {'xtick.labelsize':16,
                          'ytick.labelsize':16}
                pylab.rcParams.update(params)
                plt.figure(1, figsize=(20, 8))
                if np.shape(index)[1] > 0:
                    for i in range(0, len(time)):
                        plt.axvline(time[i], linewidth=2, color='grey')
                plt.plot(np.arange(0.0, np.shape(meancc)[0] * dt, \
                    dt), meancc, color='black')
                if (type_threshold == 'MAD'):
                    plt.axhline(threshold * MAD, linewidth=2, color='red', \
                        label = '{:6.2f} * MAD'.format(threshold))
                elif (type_threshold == 'Threshold'):
                    plt.axhline(threshold, linewidth=2, color='red', \
                        label = 'Threshold = {:8.4f}'.format(threshold))
                else:
                    raise ValueError( \
                        'Type of threshold must be MAD or Threshold')
                plt.xlim(0.0, (np.shape(meancc)[0] - 1) * dt)
                plt.xlabel('Time (s)', fontsize=24)
                plt.ylabel('Cross-correlation', fontsize=24)
                plt.title('Average cross-correlation across stations', \
                    fontsize=30)
                plt.legend(loc=2, fontsize=24)
                plt.savefig('LFEs/' + filename + '/' + \
                    '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format( \
                    Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                    Tstart.minute, Tstart.second) + '.png', format='png')
                plt.close(1)

    # Add to pandas dataframe and save
    df_all = df
    df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
        'day':'int32', 'hour':'int32', 'minute':'int32', \
        'second':'float', 'cc':'float', 'nchannel':'int32'})
    df_all.to_csv('LFEs/' + filename + '/' + outputfile)
Example #42
0
def spatio_spectral_patterns(epochs,y, n_components = 4, output_dir="", test_name = "test", doClassif = False, legend = ['A', 'B']):
    """Computes the Common Spatial Pattern (CSP) on all data and print the most discriminant feature and
    performs a simple CSP+Logistic Regression Classification. The results will be stacked for all subjects
    at the end of pipeline_1.
    code inspired by Alexandre Barachant's Kaggle
    https://www.kaggle.com/alexandrebarachant/common-spatial-pattern-with-mne
    another reading for CSP decoding
    https://www.nmr.mgh.harvard.edu/mne/dev/auto_examples/decoding/plot_decoding_csp_eeg.html
    """
    score = []
    X = epochs.get_data()
    # run CSP
    zeta.util.blockPrint()  # to clean the terminal
    csp = mne.decoding.CSP(reg='ledoit_wolf')
    csp.fit(X, y)
    zeta.util.enablePrint()  # restore print

    # compute spatial filtered spectrum for each components
    fig = []
    for indc in range(n_components):
        po = []
        for x in X:
            f, p = welch(np.dot(csp.filters_[indc, :].T, x), int(epochs.info['sfreq']), nperseg=256)
            po.append(p)
        po = np.array(po)

        # prepare topoplot
        _, epos, _, _, _ = mne.viz.topomap._prepare_topo_plot(epochs, 'eeg', None)

        # plot first pattern
        pattern = csp.patterns_[indc, :]
        pattern -= pattern.mean()
        ix = np.argmax(abs(pattern))

        # the parttern is sign invariant.
        # invert it for display purpose
        if pattern[ix] > 0:
            sign = 1.0
        else:
            sign = -1.0

        fig[indc], ax_topo = plt.subplots(1, 1, figsize=(12, 4))
        title = 'Spatial Pattern'
        fig.suptitle(title, fontsize=14)
        img, _ = mne.viz.topomap.plot_topomap(sign * pattern, epos, axes=ax_topo, show=False)
        divider = make_axes_locatable(ax_topo)
        # add axes for colorbar
        ax_colorbar = divider.append_axes('right', size='5%', pad=0.05)
        plt.colorbar(img, cax=ax_colorbar)

        # plot spectrum
        fix = (f > 1) & (f < 35)
        ax_spectrum = divider.append_axes('right', size='300%', pad=1.2)
        ax_spectrum.plot(f[fix], np.log(po[y == 0][:, fix].mean(axis=0).T), '-r', lw=2)
        ax_spectrum.plot(f[fix], np.log(po[y == 1][:, fix].mean(axis=0).T), '-b', lw=2)

        ax_spectrum.plot(f[fix], np.log(np.median(po[y == 0][:, fix], axis=0).T), '-r', lw=0.5)
        ax_spectrum.plot(f[fix], np.log(np.min(po[y == 0][:, fix], axis=0).T), '--r', lw=0.5)
        ax_spectrum.plot(f[fix], np.log(np.max(po[y == 0][:, fix], axis=0).T), '--r', lw=0.5)

        ax_spectrum.plot(f[fix], np.log(np.median(po[y == 1][:, fix], axis=0).T), '-b', lw=0.5)
        ax_spectrum.plot(f[fix], np.log(np.min(po[y == 1][:, fix], axis=0).T), '--b', lw=0.5)
        ax_spectrum.plot(f[fix], np.log(np.max(po[y == 1][:, fix], axis=0).T), '--b', lw=0.5)

        ax_spectrum.set_xlabel('Frequency (Hz)')
        ax_spectrum.set_ylabel('Power (dB)')
        plt.grid()
        plt.legend(legend)

        # plt.show()
        plt.savefig( os.path.join(output_dir, 'spatial_pattern_subject_' + test_name +
                    '_c' + str(indc) + '.png'), bbox_inches='tight')

    # run cross validation
    if doClassif:
        zeta.util.blockPrint()  # to have a clean terminal

        clf = sklearn.pipeline.make_pipeline(mne.decoding.CSP(n_components=n_components),
                                             sklearn.linear_model.LogisticRegression(solver="lbfgs"))
        cv = sklearn.model_selection.StratifiedKFold(n_splits=5)
        score = sklearn.model_selection.cross_val_score(clf, X, y, cv=cv, scoring='roc_auc')
        zeta.util.enablePrint()

        print(test_name + " : AUC cross val score : %.3f" % (score.mean()))

    return fig, score
def filter_paired_reads(bam, fasta, min_ani = 0.97, min_mapq = 2, min_insert = 50, max_insert = 1500, write_bam = False):
    '''
    Filter reads from a .bam file
    Returns:
        pair2info - dictionary of read pair -> (mismatches, insert distance, mapq score, combined length)
    '''
    scaffolds, fasta_length = get_fasta(fasta)
    filtered = set() # Information on pairs
    samfile = pysam.AlignmentFile(bam)

    if write_bam:
        logging.info("Copying header for new bam...")
        samfile_out = pysam.AlignmentFile(bam.split("/")[-1].split(".")[0] + "_filtered.bam", "wb", template=samfile)
    
    total = mapped_pairs = mapq_good = insert_good = 0
    insert_sizes = []
    read_lengths = []

    for scaff in tqdm(scaffolds, desc='Filtering Reads'):
        read_data = {} # Information on the first pair of each read
        for read in samfile.fetch(scaff):
            total += 1
            # If we've seen this read's pair before
            if read.query_name in read_data:
                # Make sure that the pair is on the same scaffold and that it's mapped at all
                if ((read_data[read.query_name]['scaf'] == scaff) & (read.get_reference_positions() != [])):
                    mapped_pairs += 1
                    pairMM = float(read_data[read.query_name]['read'].get_tag('NM')) + float(read.get_tag('NM')) #number of mismatches in pair
                    mapped_read_lengths = float(read_data[read.query_name]['read'].infer_query_length() + read.infer_query_length()) #total length of pair
                    if read.get_reference_positions()[-1] > read_data[read.query_name]['read'].get_reference_positions()[0]:
                        pair_inserts = read.get_reference_positions()[-1] - read_data[read.query_name]['read'].get_reference_positions()[0] #insert distance
                    else:
                        pair_inserts = read_data[read.query_name]['read'].get_reference_positions()[-1] - read.get_reference_positions()[0] #insert distance
                    pair_mapq = max(read.mapping_quality, read_data[read.query_name]['read'].mapping_quality) #pair mapq
                    pair_ani =  1 - (pairMM / mapped_read_lengths) #pair %ANI to reference
                    insert_sizes.append(pair_inserts)
                    read_lengths.append(mapped_read_lengths)
                    # Final filter
                    if pair_inserts >= min_insert and pair_inserts <= max_insert:
                        insert_good += 1
                    if pair_mapq >= min_mapq:
                        mapq_good += 1
                        if pair_ani >= min_ani:
                            filtered.add(read.query_name)
                            if write_bam:
                                samfile_out.write(read_data[read.query_name]['read'])
                                samfile_out.write(read)

            # Add this read, in future search for its mate
            elif read.get_reference_positions() != []: # don't use unmapped reads:
                read_data[read.query_name] = {"read": read, "scaf": scaff}
    # total = total / 2
    logging.info("Total read count, divided by 2\t" + str(total))
    logging.info("Mean read pair sequences length\t" + str(np.mean(read_lengths)))
    logging.info("Total FASTA length\t" + str(fasta_length))
    logging.info("Expected total coverage\t" + str(float(total)*np.mean(read_lengths) / fasta_length))
    logging.info("Mapped read pairs\t" + str(mapped_pairs) + "\t" + str(int(100*mapped_pairs / total)) + "%")
    logging.info("Median end-to-end insert length\t" + str(np.median(insert_sizes)))
    logging.info("Read pairs which pass insert distance filters:\t" + str(insert_good) + "\t" + str(int(100*float(insert_good) / total)) + "%")
    logging.info("Read pairs which also meet min_mapq of " + str(min_mapq) + "\t" + str(mapq_good) + "\t" + str(int(100*float(mapq_good) / total)) +  "%")
    logging.info("Read pairs which also pass final read pair PID >" + str(min_ani) + "%\t" + str(len(filtered)) + "\t" + str(int(100*len(filtered) / total)) + "%")
    logging.info("Final expected coverage\t" + str(float(len(filtered)) * np.mean(read_lengths) / fasta_length))
    samfile.close()
    
    if write_bam:
        samfile_out.close()
        logging.info("sorting new bam")
        pysam.sort("-o", bam.split("/")[-1].split(".")[0] + "_filtered_sort.bam", bam.split("/")[-1].split(".")[0] + "_filtered.bam")
        os.system('rm ' + bam.split("/")[-1].split(".")[0] + "_filtered.bam")
    return filtered
def compute_input_stats(rg_iostream=None,
                        print_all=False,
                        make_plots=True,
                        save_figures=False):

    # N = len(rg_inputoutput[0][1][0])  # Taking the first reber string's ([0]) one-hot encodings ([1]) for the
    # # first letter 'A' ([0]) and finding its length gives us the value of N.

    graph_idx = get_graph_from_dataset(rg_iostream)

    logger.info('Reading input stream file...')
    with open(
            os.path.join(data_path,
                         rg_iostream.replace('.npy', '') + '.npy'),
            'rb') as stream:
        rg_inputoutput = np.load(stream, allow_pickle=True)

    in_reber_strings = [
        rg_inputoutput[i][0] for i in range(len(rg_inputoutput))
    ]

    dict_count_allTransitions = count_allTransitions(
        graph_idx=graph_idx, in_reber_strings=in_reber_strings)
    # NOF TOTAL CHARACTERS in the input stream
    total_len_inputstream = 0
    for string in in_reber_strings:
        total_len_inputstream += len(string)

    len_reber_strings = []
    for ex in in_reber_strings:
        len_reber_strings.append(len(ex))

    logger.info('The stream consists of a total of {} strings. \n\
    With: \n\
    Number of characters in total = {}. \n\
    Mean length of string = {}. \n\
    Median length of string = {}. \n\
    \n Unique Strings = {}. \n\
    Number of Unique Strings = {}.'.format(len(in_reber_strings),
                                           total_len_inputstream,
                                           np.mean(len_reber_strings),
                                           np.median(len_reber_strings),
                                           np.unique(in_reber_strings),
                                           len(np.unique(in_reber_strings))))

    if make_plots:
        logger.info(
            'Plotting distribution of lengths of sample reber strings in the inputstream...'
        )
        y, binEdges = np.histogram(len_reber_strings,
                                   bins=np.unique(len_reber_strings))
        plt.figure(figsize=(15, 8))
        plt.bar(binEdges[:-1], y, width=1, color='maroon')
        plt.errorbar(binEdges[:-1],
                     y,
                     yerr=np.sqrt(y),
                     fmt='o',
                     color='Black',
                     elinewidth=3,
                     capthick=2,
                     alpha=0.7,
                     markersize=5,
                     capsize=5)
        plt.xlabel('Length of Reber String', fontsize=18)
        plt.ylabel('Number of Occurrences \n in Inputstream', fontsize=18)
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)
        plt.text(
            x=binEdges[-1] / 2,
            y=0.8 * y[0],
            s=f'Mean Length of Reber String: {np.mean(len_reber_strings)}',
            fontsize=17)
        plt.text(
            x=binEdges[-1] / 2,
            y=0.7 * y[0],
            s=f'Median Length of Reber String: {np.median(len_reber_strings)}',
            fontsize=17)
        plt.grid(True, linestyle="--", color='black', alpha=0.4)
        if save_figures:
            fig_name = 'StringLengthDist_{}.svg'.format(
                rg_iostream.replace('.npy', ''))
            plt.savefig(fname=os.path.join(fig_path, fig_name), format='svg')
            logger.info('Figure saved in svg format at {}.svg.'.format(
                os.path.join(fig_path, fig_name)))
        plt.show()
        plt.close()

        logger.info(
            'Plotting distribution of possible transitions (trigrams) in reber strings in the inputstream...'
        )
        transitions = list(dict_count_allTransitions.keys())
        counts = list(dict_count_allTransitions.values())

        plt.figure(figsize=(15, 8))
        plt.bar(transitions, counts, color='maroon', width=0.5)
        plt.xlabel('Possible Transitions in Simple Reber Grammar', fontsize=18)
        plt.ylabel('Number of Occurrences in Input Stream', fontsize=18)
        plt.xticks(rotation=50, fontsize=15)
        plt.yticks(fontsize=15)
        plt.grid(True, linestyle="--", color='black', alpha=0.4)

        if save_figures:
            fig_name = 'TransitionsDist_{}.svg'.format(
                rg_iostream.replace('.npy', ''))
            plt.savefig(fname=os.path.join(fig_path, fig_name), format='svg')
            logger.info('Figure saved in svg format at {}.svg.'.format(
                os.path.join(fig_path, fig_name)))

        plt.show()
        plt.close()

    if print_all:
        for i, string in enumerate(in_reber_strings):
            print(i, string)
Example #45
0
 def compute_shortest_paths(self, graph_goal):
   self.shortest_paths = nx.shortest_path(self.graph, target=graph_goal, weight='weight')
   self.shortest_distances = [len(value) - 1 for value in self.shortest_paths.values()]
   print 'Mean shortest_distances to goal:', mean(self.shortest_distances)
   print 'Median shortest_distances to goal:', median(self.shortest_distances)
Example #46
0
def bootstrap_sample_medians(data, n_bootstrap_samples=10000):
    bootstrap_sample_medians = []
    for i in range(n_bootstrap_samples):
        bootstrap_sample = np.random.choice(data, size=len(data), replace=True)
        bootstrap_sample_medians.append(np.median(bootstrap_sample))
    return bootstrap_sample_medians
Example #47
0
    runcount=int(sys.argv[3])
else:
    print "usage: %s SIZE STRIDE [COUNT]" % sys.argv[0]
    print "run the benchmark COUNT times (default 500), and display the results as an histogram"
    sys.exit(1)

for i in range(runcount):
    log=subprocess.check_output(["./benchmark",
                                     str(size),
                                     str(stride)]);

    for l in log.splitlines():
        if "MB/s" in l:
            print l
            data.append( float(l[ l.find('=')+1: l.find('MB/s') ]) )

plt.hist(data)
plt.xlabel("Throughput (MB/s)");
plt.ylabel("Count (out of %d)" % runcount);

plt.title("Benchmark was run %d times.\n Throughput (MB/s) min=%.1f, max=%.1f, med=%.1f, avg=%.1f" % (
    runcount, min(data),max(data),np.median(data),np.average(data) ) )

print '%d runs with size %d, stride %d -> median throughput = %.1f MB/s' % (runcount, size, stride, np.median(data))
print 'min=%.1f, max=%.1f, med=%.1f, avg=%.1f' % (min(data),max(data),np.median(data),np.average(data))


plt.show()


Example #48
0
    top_two_hundred_harris = np.array(top_two_hundred_harris)
    top_two_hundred_orb = np.array(top_two_hundred_orb)

    image_distance_matrix = np.zeros((top_two_hundred_harris.shape[0],
                                      top_two_hundred_orb.shape[0]))
    
    '''calculate pairwise image distance matrix'''
    for i in range(image_distance_matrix.shape[0]):
        image_distance_matrix[i] = calculate_distance(top_two_hundred_harris[i],
                                                      top_two_hundred_orb)
    
    '''Harris 100 -> ORB 200
    sort ASC all elements in each row'''
    rowwise_sort = np.sort(image_distance_matrix, axis=1)
    rowwise_index_sort = np.argsort(image_distance_matrix, axis=1)
    h20_median_dist = np.median(rowwise_sort[:100,0])
    h20_avg_dist = np.mean(rowwise_sort[:100,0])
    h20_median_rank_dist = np.median(np.abs(rowwise_index_sort[:100,0]-np.arange(100)))
    h20_avg_rank_dist = np.mean(np.abs(rowwise_index_sort[:100,0]-np.arange(100))) 
    
    '''ORB 100 -> Harris 200
    sort ASC all elements in each column'''   
    columnwise_sort = np.sort(image_distance_matrix, axis=0)
    columnwise_index_sort = np.argsort(image_distance_matrix, axis=0)
    o2h_median_dist = np.median(columnwise_sort[0,:100])
    o2h_avg_dist = np.mean(columnwise_sort[0,:100])
    o2h_median_rank_dist = np.median(np.abs(columnwise_index_sort[0,:100]-np.arange(100)))
    o2h_avg_rank_dist = np.mean(np.abs(columnwise_index_sort[0,:100]-np.arange(100))) 
    
    '''print the formatted outputs'''
    print("\nHarris keypoint to ORB distances:")
def assess_on_models():
    errors = []
    predicates = Config.predicates
    cols = Config.columns
    aggregate_str = Config.aggregates
    rmse_results = []
    for pred in predicates:
        for col in cols:
            if col <= pred:
                continue
            print("Predicates {0} || Columns {1}".format(pred, col))
            workload = np.loadtxt(
                'input/synthetic_workloads/{}-Queries/query-workload-predicates_{}-cols_{}.csv'
                .format(Config.queries, pred, col),
                delimiter=',')
            workload = workload[~np.isnan(workload).any(axis=1)]
            if workload.shape[0] < 0.1 * Config.queries:
                print(
                    "Error on workload possibly containing large fraction of nans : {}"
                    .format(1 - workload.shape[0] / Config.queries))
                errors.append(
                    'query-workload-predicates_{}-cols_{}.csv'.format(
                        pred, col))
                continue
            aggregate = range(workload.shape[1] - 5, workload.shape[1])
            for t_y, l_Y in zip(aggregate, aggregate_str):
                X_train, X_test, y_train, y_test = train_test_split(
                    workload[:, :workload.shape[1] - 5],
                    workload[:, t_y],
                    test_size=0.3,
                    random_state=0)
                # X_train[(X_train==1e-8) | (X_train==1e+8)] = np.mean(X_train)
                # X_test[(X_test==1e-8) | (X_test==1e+8)] = np.mean(X_test)
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train = scaler.transform(X_train)
                X_test = scaler.transform(
                    X_test)  # apply same transformation to test data
                for m, m_l in zip(model, model_str):
                    #                 print("\tFitting for Agg {0} with {1}".format(l_Y, m_l))
                    m.fit(X_train, y_train)
                    predictions_test = m.predict(X_test)
                    ml_relative_error = np.mean(
                        np.abs((y_test - predictions_test) / y_test))
                    ml_relative_error_median = np.median(
                        np.abs((y_test - predictions_test) / y_test))
                    rmse = np.sqrt(
                        metrics.mean_squared_error(y_test, predictions_test))
                    mae = metrics.median_absolute_error(
                        y_test, predictions_test)
                    nrmsd = np.sqrt(
                        metrics.mean_squared_error(
                            y_test, predictions_test)) / np.std(y_test)
                    rmse_results.append([
                        pred, col, m_l, l_Y, rmse, nrmsd, mae,
                        ml_relative_error, ml_relative_error_median
                    ])
    if len(errors) != 0:
        print("Finished with errors on:")
        for e in errors:
            print(e)
    test_df = pd.DataFrame(rmse_results,
                           columns=[
                               'predicates', 'columns', 'model', 'aggregate',
                               'rmse', 'nrmsd', 'mae', 'rel_error_mean',
                               'rel_error_median'
                           ])

    test_df.to_csv(
        'output/accuracy/csvs/synthetic_workloads_eval_on_models_{}_queries.csv'
        .format(Config.queries))
Example #50
0
def evaluate_a(a00_t, a10_t, a01_t, a11_t):
    print np.median(a00_t[BURN:])
    print np.median(a10_t[BURN:])
    print np.median(a01_t[BURN:])
    print np.median(a11_t[BURN:])

    plt.figure()
    plt.plot(range(0, ITS), a00_t)
    plt.axhline(A00, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A00")
    plt.savefig('a00_plot.png')

    plt.figure()
    plt.plot(range(0, ITS), a10_t)
    plt.axhline(A10, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A10")
    plt.savefig('a10_plot.png')

    plt.figure()
    plt.plot(range(0, ITS), a01_t)
    plt.axhline(A01, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A01")
    plt.savefig('a01_plot.png')

    plt.figure()
    plt.plot(range(0, ITS), a11_t)
    plt.axhline(A11, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A11")
    plt.savefig('a11_plot.png')

    med00 = np.median(a00_t[BURN:])
    var00 = np.var(a00_t[BURN:])
    plt.figure()
    plt.hist(a00_t[BURN:], normed=True)
    plt.axvline(A00, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A00: med=%f, var=%f" % (med00, var00))
    plt.savefig('a00_hist.png')

    med10 = np.median(a10_t[BURN:])
    var10 = np.var(a10_t[BURN:])
    plt.figure()
    plt.hist(a10_t[BURN:], normed=True)
    plt.axvline(A10, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A10: med=%f, var=%f" % (med10, var10))
    plt.savefig('a10_hist.png')

    med01 = np.median(a01_t[BURN:])
    var01 = np.var(a01_t[BURN:])
    plt.figure()
    plt.hist(a01_t[BURN:], normed=True)
    plt.axvline(A01, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A01: med=%f, var=%f" % (med01, var01))
    plt.savefig('a01_hist.png')

    med11 = np.median(a11_t[BURN:])
    var11 = np.median(a11_t[BURN:])
    plt.figure()
    plt.hist(a11_t[BURN:], normed=True)
    plt.axvline(A11, color='r', linestyle='dashed', linewidth=2)
    plt.title("Prob of causal SNP - A11: med=%f, var=%f" % (med11, var11))
    plt.savefig('a11_hist.png')
Example #51
0
        radcs = np.zeros(pgeo.npix)
        radcs[ipixarr] = np.nansum(radwvcs, axis=1) / nlamb * ereo
        radcl = np.zeros(pgeo.npix)
        radcl[ipixarr] = np.nansum(radwvcl, axis=1) / nlamb * ereo
        radcs[radcs < 0.0] = 0.0

        ####force to zero if fcl < 0:
        fclall[fclall < 0.0] = 0.0
        ############################

        rad = radcs * (1.0 - fclall) + fclall * radcl
        lc.append(np.nansum(rad) * dOmega)
        lccs.append(np.nansum(radcs * (1.0 - fclall)) * dOmega)
        lccl.append(np.nansum(radcl * fclall) * dOmega)
        lccsf.append(np.nansum(radcs) * dOmega)
        fcl.append(np.median(data["arr_6"]))

        if args.x:
            ereoarr = np.zeros(pgeo.npix)
            ereoarr[ipixarr] = ereo

            #            hp.mollview(radcs,title="radcs",flip="geo",cmap=plt.cm.pink)
            hp.mollview(radcl, title="radcl", flip="geo", cmap=plt.cm.pink)

            #            hp.mollview(ereoarr,title="radcs",flip="geo",cmap=plt.cm.CMRmap,min=0.0,max=np.pi)
            hp.graticule(color="orange")
            plt.savefig("tmp/" + str(idPM) + ".png")

        if args.m:
            print(irrad_solar)
            hp.mollview(np.pi * rad / irrad_solar,
import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit, train_test_split, GridSearchCV
from sklearn.metrics import r2_score, make_scorer
from sklearn.tree import DecisionTreeRegressor
import visuals as vs

data = pd.read_csv('housing.csv')
prices = data['MEDV']
features = data.drop('MEDV', axis = 1)
  
# Statistics
minimum_price = np.min(prices)
maximum_price = np.max(prices)
mean_price = np.mean(prices)
median_price = np.median(prices)
std_price = np.std(prices)
print("Statistics for Boston housing dataset:\n")
print("Minimum price: ${}".format(minimum_price)) 
print("Maximum price: ${}".format(maximum_price))
print("Mean price: ${}".format(mean_price))
print("Median price ${}".format(median_price))
print("Standard deviation of prices: ${}".format(std_price))

def performance_metric(y_true, y_predict):
    """ Calculates and returns the performance score between 
        true and predicted values based on the metric chosen. """
    
    # Calculate the performance score between 'y_true' and 'y_predict'
    score = r2_score(y_true, y_predict)
    return score
Example #53
0
def _ep_trigger_avg(x, trig_code, pre=0, post=0, iqr_thresh=-1, envelope=False):
    """
    Average response to 1 or more experimental conditions

    Arguments
    ---------
    x: data (nchan, npts)
    trig_code : sequence-type (2, stim) or StimulatedExperiment
        First row is the trigger indices, second row is a condition
        ID (integer). Condition ID -1 codes for a flagged trial to
        be skipped. If a StimulatedExperiment, then triggers and
        conditions are available from this object.
    pre, post : ints
        Number of pre- and post-stim samples in interval. post + pre > 0
        default: 0 and stim-to-stim interval
    sum_limit : int
        Do partial sum up to this many terms
    iqr_thresh : float
        If set, do simple outlier detection on all groups of repeated
        conditions based on RMS power in the epoch interval. The iqr_thresh
        multiplies the width of the inter-quartile range to determine the
        "inlier" range of RMS power.

    Returns
    -------
    avg
        (nchan, ncond, epoch_length)
    n_avg
        number of triggers found for each condition
    skipped
        (nskip, nchan, epoch_length) epochs that were not averaged

    """
    x.shape = (1,) + x.shape if x.ndim == 1 else x.shape
    #pos_edge = trig_code[0]; conds = trig_code[1]
    pos_edge, conds = trigs_and_conds(trig_code)
    epoch_len = int(np.round(np.median(np.diff(pos_edge))))

    n_cond = len(np.unique(conds))
    n_pt = x.shape[1]

    if not (post or pre):
        post = epoch_len

    # this formula should provide consistent epoch lengths,
    # no matter the offset
    epoch_len = int(round(post + pre))
    pre = int(round(pre))
    post = epoch_len - pre

    # edit trigger list to exclude out-of-bounds epochs
    while pos_edge[0] - pre < 0:
        pos_edge = pos_edge[1:]
        conds = conds[1:]
    while pos_edge[-1] + post >= n_pt:
        pos_edge = pos_edge[:-1]
        conds = conds[:-1]

    avg = np.zeros((x.shape[0], n_cond, epoch_len), x.dtype)
    n_avg = np.zeros((x.shape[0], n_cond), 'i')

    for n, c in enumerate(np.unique(conds)):
        trials = np.where(conds == c)[0]
        if not len(trials):
            continue
        epochs = extract_epochs(x, pos_edge, trials, pre, post)
        if iqr_thresh > 0:
            pwr = np.sqrt(np.sum(epochs**2, axis=-1))
            # analyze outlier trials per channel
            out_mask = ut.fenced_out(
                pwr, thresh=iqr_thresh, axis=1, low=False
            )
            epochs = epochs * out_mask[:, :, None]
            n_avg[:, n] = np.sum(out_mask, axis=1)
        else:
            n_avg[:, n] = len(trials)

        if envelope:
            epochs = signal.hilbert(
                epochs, N=ut.nextpow2(epoch_len), axis=-1
            )
            epochs = np.abs(epochs[..., :epoch_len])**2

        avg[:, c - 1, :] = np.sum(epochs, axis=1) / n_avg[:, c - 1][:, None]

    x.shape = [x for x in x.shape if x > 1]
    if envelope:
        np.sqrt(avg, avg)
    return avg, n_avg
def assess_on_no_queries():
    errors = []
    predicates = Config.predicates
    cols = Config.columns
    aggregate_str = Config.aggregates
    queries_number = np.linspace(0.1, 1, 10) * (Config.queries / 10)
    relative_errors = []
    for n in queries_number:
        n = int(n)
        print("Number of Queries {0}".format(n))
        for pred in predicates:
            for col in cols:
                if col <= pred:
                    continue
                print("Predicates {0} || Columns {1}".format(pred, col))

                workload = np.loadtxt(
                    'input/synthetic_workloads/{}-Queries/query-workload-predicates_{}-cols_{}.csv'
                    .format(Config.queries, pred, col),
                    delimiter=',')
                workload = workload[~np.isnan(workload).any(axis=1)]
                if workload.shape[0] < 0.1 * Config.queries:
                    print(
                        "Error on workload possibly containing large fraction of nans : {}"
                        .format(1 - workload.shape[0] / Config.queries))
                    errors.append(
                        'query-workload-predicates_{}-cols_{}.csv'.format(
                            pred, col))
                    continue
                workload = workload[:n, :]
                aggregate = range(workload.shape[1] - 5, workload.shape[1])
                for t_y, l_Y in zip(aggregate, aggregate_str):
                    non_outliers_ratio = __compute_ratio_of_non_outliers(
                        workload[:, t_y])
                    xgb = XGBRegressor()
                    X_train, X_test, y_train, y_test = train_test_split(
                        workload[:, :workload.shape[1] - 5],
                        workload[:, t_y],
                        test_size=0.2,
                        random_state=0)

                    scaler = StandardScaler()
                    scaler.fit(X_train)
                    X_train = scaler.transform(X_train)
                    X_test = scaler.transform(
                        X_test)  # apply same transformation to test data

                    xgb.fit(X_train, y_train)
                    predictions_training = xgb.predict(X_train)
                    #         print("Training RMSE {0}".format(np.sqrt(metrics.mean_squared_error(y_train, predictions_training))))
                    predictions_test = xgb.predict(X_test)
                    ml_relative_error = np.mean(
                        np.abs((y_test - predictions_test) / y_test))
                    ml_relative_error_median = np.median(
                        np.abs((y_test - predictions_test) / y_test))
                    rmse = np.sqrt(
                        metrics.mean_squared_error(y_test, predictions_test))
                    mae = metrics.median_absolute_error(
                        y_test, predictions_test)
                    nrmsd = np.sqrt(
                        metrics.mean_squared_error(
                            y_test, predictions_test)) / np.std(y_test)
                    relative_errors.append([
                        pred, col, n, rmse, mae, ml_relative_error,
                        ml_relative_error_median, nrmsd, l_Y,
                        non_outliers_ratio
                    ])
    if len(errors) != 0:
        print("Finished with errors on:")
        for e in errors:
            print(e)
    eval_df = pd.DataFrame(relative_errors,
                           columns=[
                               'predicates', 'columns', 'queries', 'rmse',
                               'mae', 'relative_error_mean',
                               'relative_error_median', 'nrmsd', 'aggregate',
                               'non_outliers_ratio'
                           ])
    eval_df.to_csv(
        'output/accuracy/csvs/synthetic_workloads_eval_on_workloads_varying_queries_{}_queries.csv'
        .format(Config.queries))
def median_datasets(datasetArr, m, n, l):
    istack = np.dstack(datasetArr)
    median = np.median(istack, axis=2)
    return median
Example #56
0
def iter_epochs(x, pivots, selected=(), pre=0, post=0, fill=np.nan):
    """
    Generator that yields epochs pivoted at the specified triggers.

    Parameters
    ----------
    x : data (n_chan, n_pt)
    pivots : array-like or StimulatedExperiment
        A sequence of literal pivot samples, or an experiment wrapper
        containing the timestamps.
    selected : sequence
        Indices into trig_code for a subset of stims. If empty, return *ALL*
        epochs (*a potentially very large array*)
    pre, post : ints
        Number of pre- and post-stim samples in interval. post + pre > 0
        default: 0 and stim-to-stim interval

    """

    x = np.atleast_2d(x) if x.ndim == 1 else x
    if isinstance(pivots, StimulatedExperiment):
        pivots, _ = trigs_and_conds(pivots)
    if not np.iterable(pivots):
        pivots = [pivots]

    if not (post or pre):
        if len(pivots) > 1:
            print('Default epoch length based on median inter-trial time')
            post = int(np.median(np.diff(pivots)))
        else:
            print('Default epoch length 200 pts')
            post = 200

    epoch_len = int(round(post + pre))
    pre = int(round(pre))
    post = epoch_len - pre

    if len(selected):
        if hasattr(selected, 'dtype') and selected.dtype.char == '?':
            selected = np.where(selected)[0]
        pivots = np.take(pivots, selected)

    epoch = np.empty((x.shape[0], epoch_len), x.dtype)

    for k in pivots:
        if k - pre < 0:
            start_put = pre - k
            pre = k
        else:
            start_put = 0
        if k + post >= x.shape[1]:
            stop_put = x.shape[1] - k + pre
            post = x.shape[1] - k
        else:
            stop_put = pre + post

        grab_idx = (slice(None), slice(k - pre, k + post))
        put_idx = (slice(None), slice(start_put, stop_put))
        if start_put > 0 or stop_put < pre + post:
            epoch.fill(fill)
        epoch[put_idx] = x[grab_idx]
        yield epoch.copy()
    return
def main(msname, store_basename, newparmdbext='-instrument_amp_clock_offset'):

    # name (path) for parmdb to be written
    newparmDB = msname + newparmdbext

    # load the numpy arrays written by the previous scripts
    # (filenames constructed in the same way as in these scripts)
    freqs_ampl = np.load('freqs_for_amplitude_array.npy')
    amps_array = np.load(store_basename + '_amplitude_array.npy')
    clock_array = np.load('fitted_data_dclock_' + store_basename + '_1st.npy')
    freqs_phase = np.load('freqs_for_phase_array.npy')
    phases_array = np.load(store_basename + '_phase_array.npy')
    station_names = np.load(store_basename + '_station_names.npy')

    #print "phases shape:",np.shape(phases_array)
    #print "amps shape:",np.shape(amps_array)
    #print "clock shape:",np.shape(clock_array)

    #for ms in mslist: #this script works only on one MS!
    msinfo = ReadMs(msname)
    # this is the same for all antennas
    starttime = msinfo.timepara['start']
    endtime = msinfo.timepara['end']
    startfreqs = msinfo.msfreqvalues - msinfo.GetFreqpara('step') / 2.
    endfreqs = msinfo.msfreqvalues + msinfo.GetFreqpara('step') / 2.
    ntimes = 1
    nfreqs = len(startfreqs)

    outDB = make_empty_parmdb(newparmDB)

    # Now do the interpolating
    for antenna_id, antenna in enumerate(station_names):
        if antenna not in msinfo.stations:
            pass

        # form median of amplitudes along the time axis, for both polarizations
        amp_cal_00_all = np.median(amps_array[antenna_id, :, :, 0], axis=0)
        amp_cal_11_all = np.median(amps_array[antenna_id, :, :, 1], axis=0)
        # interpolate to target frequencies
        amp_cal_00 = np.interp(msinfo.msfreqvalues, freqs_ampl, amp_cal_00_all)
        amp_cal_11 = np.interp(msinfo.msfreqvalues, freqs_ampl, amp_cal_11_all)
        # interpolate phases
        phase_cal_00 = 0.
        phase_cal_11 = np.interp(msinfo.msfreqvalues, freqs_phase,
                                 phases_array[:, antenna_id])

        # convert to real and imaginary
        real_00 = amp_cal_00 * np.cos(phase_cal_00)
        imag_00 = amp_cal_00 * np.sin(phase_cal_00)
        real_11 = amp_cal_11 * np.cos(-1. * phase_cal_11)
        imag_11 = amp_cal_11 * np.sin(-1. * phase_cal_11)

        real_00_pdb = real_00.reshape((ntimes, nfreqs))
        imag_00_pdb = imag_00.reshape((ntimes, nfreqs))
        real_11_pdb = real_11.reshape((ntimes, nfreqs))
        imag_11_pdb = imag_11.reshape((ntimes, nfreqs))

        # generate parmDB entries
        ValueHolder = outDB.makeValue(values=real_00_pdb,
                                      sfreq=startfreqs,
                                      efreq=endfreqs,
                                      stime=starttime,
                                      etime=endtime,
                                      asStartEnd=True)
        outDB.addValues('Gain:0:0:Real:' + antenna, ValueHolder)
        ValueHolder = outDB.makeValue(values=imag_00_pdb,
                                      sfreq=startfreqs,
                                      efreq=endfreqs,
                                      stime=starttime,
                                      etime=endtime,
                                      asStartEnd=True)
        outDB.addValues('Gain:0:0:Imag:' + antenna, ValueHolder)
        ValueHolder = outDB.makeValue(values=real_11_pdb,
                                      sfreq=startfreqs,
                                      efreq=endfreqs,
                                      stime=starttime,
                                      etime=endtime,
                                      asStartEnd=True)
        outDB.addValues('Gain:1:1:Real:' + antenna, ValueHolder)
        ValueHolder = outDB.makeValue(values=imag_11_pdb,
                                      sfreq=startfreqs,
                                      efreq=endfreqs,
                                      stime=starttime,
                                      etime=endtime,
                                      asStartEnd=True)
        outDB.addValues('Gain:1:1:Imag:' + antenna, ValueHolder)

        #now handle the clock-value (no fancy interpolating needed)
        clock_pdb = np.array(np.median(clock_array[:, antenna_id]), ndmin=2)
        ValueHolder = outDB.makeValue(values=clock_pdb,
                                      sfreq=startfreqs[0],
                                      efreq=endfreqs[-1],
                                      stime=starttime,
                                      etime=endtime,
                                      asStartEnd=True)
        outDB.addValues('Clock:' + antenna, ValueHolder)

    outDB = False
    return {'transfer_parmDB': newparmDB}
Example #58
0
 import numpy
 arr=[1,2,3]
 median1=numpy.median(arr)
 print( median1 )
Example #59
0
def comb_frames(frames_arr, printtype=None, frametype='Unknown', saturation=None,
                     maskvalue=1048577, method='weightmean', satpix='reject', cosmics=None,
                     n_lohi=[0,0], sig_lohi=[3.,3.], replace='maxnonsat'):
    """
    Combine several frames

    .. todo::
        - Make better use of np.ma.MaskedArray objects throughout?
        - More testing of replacement code necessary?
        - Improve docstring...

    Parameters
    ----------
    frames_arr : ndarray (3D)
      Array of frames to be combined
    weights : str, or None (optional)
      How should the frame combination by weighted (not currently
      implemented)
    frametype : str, optional
      What is the type of frame being combining?
    maskvalue : int (optional)
      What should the masked values be set to (should be greater than
      the detector's saturation value -- Default = 1 + 2**20)
    printtype : str (optional)
      The frame type string that should be printed by armsgs. If None,
      frametype will be used
    reject : dict, optional
      Set the rejection parameters:  cosmics, lowhigh, level, replace
      Perhaps these should be called out separately
    satpix : str, optional
      Method for handling saturated pixels
    saturation : float, optional
      Saturation value;  only required for some choices of reject['replace']

    Returns
    -------
    comb_frame : ndarray
    """
    ###########
    # FIRST DO SOME CHECKS ON THE INPUT
    ###########
    # Was printtype specified
    if printtype is None:
        printtype = frametype
    # Check the number of frames
    if frames_arr is None:
        msgs.error("No '{0:s}' frames were given to comb_frames to combine".format(printtype))
    (sz_x, sz_y, num_frames) = np.shape(frames_arr)
    if num_frames == 1:
        msgs.info("Only one frame to combine!")
        msgs.info("Returning input frame")
        return frames_arr[:, :, 0]
    else:
        msgs.info("Combining {0:d} {1:s} frames".format(num_frames, printtype))

    # Check if the user has allowed the combination of long and short
    # frames (e.g. different exposure times)
    msgs.work("lscomb feature has not been included here yet...")
    # Check the user hasn't requested to reject more frames than available
    if n_lohi[0] > 0 and n_lohi[1] > 0 and n_lohi[0] + n_lohi[1] >= num_frames:
        msgs.error('You cannot reject more frames than are available with \'n_lohi\'.'
                   + msgs.newline() + 'There are {0:d} frames '.format(num_frames)
                   + 'and n_lohi will reject {0:d} low and {1:d} high values.'.format(
                                                                n_lohi[0], n_lohi[1]))

    # Calculate the values to be used if all frames are rejected in some pixels
    if replace == 'min':
        allrej_arr = np.amin(frames_arr, axis=2)
    elif replace == 'max':
        allrej_arr = np.amax(frames_arr, axis=2)
    elif replace == 'mean':
        allrej_arr = np.mean(frames_arr, axis=2)
    elif replace == 'median':
        allrej_arr = np.median(frames_arr, axis=2)
    elif replace == 'weightmean':
        msgs.work("No weights are implemented yet")
        allrej_arr = frames_arr.copy()
        allrej_arr = masked_weightmean(allrej_arr, maskvalue)
    elif replace == 'maxnonsat':
        allrej_arr = frames_arr.copy()
        allrej_arr = maxnonsat(allrej_arr, saturation)
    else:
        msgs.error("You must specify what to do in case all pixels are rejected")

    ################
    # Saturated Pixels
    msgs.info("Finding saturated and non-linear pixels")
    if satpix == 'force':
        # If a saturated pixel is in one of the frames, force them to
        # all have saturated pixels
#		satw = np.zeros_like(frames_arr)
#		satw[np.where(frames_arr > settings.spect['det']['saturation']*settings.spect['det']['nonlinear'])] = 1.0
#		satw = np.any(satw,axis=2)
#		del satw
        setsat = np.zeros_like(frames_arr)
        setsat[frames_arr > saturation] = 1
    elif satpix == 'reject':
        # Ignore saturated pixels in frames if possible
        frames_arr[frames_arr > saturation] = maskvalue
    elif satpix == 'nothing':
        # Don't do anything special for saturated pixels (Hopefully the
        # user has specified how to deal with them below!)
        pass
    else:
        msgs.error('Option \'{0}\' '.format(satpix)
                   + 'for dealing with saturated pixels was not recognised.')

    ################
    # Cosmic Rays
    if cosmics > 0.0:
        msgs.info("Rejecting cosmic rays")  # Use a robust statistic
        masked_fa = np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue)
        medarr = np.ma.median(masked_fa, axis=2)
        stdarr = 1.4826*np.ma.median(np.ma.absolute(masked_fa - medarr[:,:,None]), axis=2)
        indx = (frames_arr != maskvalue) \
                    & (frames_arr > (medarr.data + cosmics * stdarr.data)[:,:,None])
        frames_arr[indx] = maskvalue
        # Delete unecessary arrays
        del medarr, stdarr
    else:
        msgs.info("Not rejecting cosmic rays")

    ################
    # Low and High pixel rejection --- Masks *additional* pixels
    rejlo, rejhi = n_lohi
    if n_lohi[0] > 0 or n_lohi[1] > 0:

        # First reject low pixels
        frames_arr = np.sort(frames_arr, axis=2)
        if n_lohi[0] > 0:
            msgs.info("Rejecting {0:d} deviant low pixels".format(n_lohi[0]))
            while rejlo > 0:
                xi, yi = np.indices(sz_x, sz_y)
                frames_arr[xi, yi, np.argmin(frames_arr, axis=2)] = maskvalue
                del xi, yi
                rejlo -= 1

        # Now reject high pixels
        if n_lohi[1] > 0:
            msgs.info("Rejecting {0:d} deviant high pixels".format(n_lohi[1]))
            frames_arr[np.where(frames_arr == maskvalue)] *= -1
            while rejhi > 0:
                xi, yi = np.indices(sz_x, sz_y)
                frames_arr[xi, yi, np.argmax(frames_arr, axis=2)] = -maskvalue
                del xi, yi
                rejhi -= 1
            frames_arr[np.where(frames_arr) == -maskvalue] *= -1

# TODO: Do we need this?
# The following is an example of *not* masking additional pixels
#		if reject['lowhigh'][1] > 0:
#			msgs.info("Rejecting {0:d} deviant high pixels".format(reject['lowhigh'][1]))
#			masktemp[:,:,-reject['lowhigh'][0]:] = True
    else:
        msgs.info("Not rejecting any low/high pixels")

    ################
    # Deviant Pixels
    # TODO: sig_lohi (what was level) is not actually used, instead this
    # just selects if cosmics should be used.  Is this intentional?  Why
    # not just do: `if cosmics > 0:`?
    if sig_lohi[0] > 0.0 or sig_lohi[1] > 0.0:
        msgs.info("Rejecting deviant pixels")  # Use a robust statistic

        masked_fa = np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue)
        medarr = np.ma.median(masked_fa, axis=2)
        stdarr = 1.4826*np.ma.median(np.ma.absolute(masked_fa - medarr[:,:,None]), axis=2)
        indx = (frames_arr != maskvalue) \
                    & ( (frames_arr > (medarr.data + cosmics*stdarr.data)[:,:,None])
                        | (frames_arr < (medarr.data - cosmics*stdarr.data)[:,:,None]))
        frames_arr[indx] = maskvalue

        # Delete unecessary arrays
        del medarr, stdarr
    else:
        msgs.info("Not rejecting deviant pixels")

    ##############
    # Combine the arrays
    msgs.info("Combining frames with a {0:s} operation".format(method))
    if method == 'mean':
        comb_frame = np.ma.mean(np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue), axis=2)
    elif method == 'median':
        comb_frame = np.ma.median(np.ma.MaskedArray(frames_arr, mask=frames_arr==maskvalue), axis=2)
    elif method == 'weightmean':
        comb_frame = frames_arr.copy()
        comb_frame = masked_weightmean(comb_frame, maskvalue)
    else:
        msgs.error("Combination type '{0:s}' is unknown".format(method))

    ##############
    # If any pixels are completely masked, apply user-specified function
    msgs.info("Replacing completely masked pixels with the {0:s} value of the input frames".format(replace))
    indx = comb_frame == maskvalue
    comb_frame[indx] = allrej_arr[indx]
    # Delete unecessary arrays
    del allrej_arr

    ##############
    # Apply the saturated pixels:
    if satpix == 'force':
        msgs.info("Applying saturated pixels to final combined image")
        comb_frame[setsat] = saturation # settings.spect[dnum]['saturation']

    ##############
    # And return a 2D numpy array
    msgs.info("{0:d} {1:s} frames combined successfully!".format(num_frames, printtype))
    # Make sure the returned array is the correct type
    comb_frame = np.array(comb_frame, dtype=np.float)
    return comb_frame
Example #60
0
def collect_samples_and_stats(
    config: SimpleNamespace,
    model_cls: Type[BaseModel],
    all_ppl_details: List[PPLDetails],
    train_data: xr.Dataset,
    test_data: xr.Dataset,
    output_dir: str,
) -> Tuple[xr.Dataset, xr.Dataset]:
    """
    :param confg: The benchmark configuration.
    :param model_cls: The model class
    :param ppl_details: For each ppl the the impl and inference classes etc.
    :param train_data: The training dataset.
    :param test_data: The held-out test dataset.
    :param output_dir: The directory for storing results.
    :returns: Two datasets:
        variable_metrics
            Coordinates: ppl, metric (n_eff, Rhat), others from model
            Data variables: from model
        other_metrics
            Coordinates: ppl, chain, draw, phase (compile, infer)
            Data variables: pll (ppl, chain, draw), timing (ppl, chain, phase)
    """
    all_variable_metrics, all_pll, all_timing, all_names = [], [], [], []
    all_samples, all_overall_neff, all_overall_neff_per_time = [], [], []
    for pplobj in all_ppl_details:
        all_names.append(pplobj.name)
        rand = np.random.RandomState(pplobj.seed)
        LOGGER.info(f"Starting inference on `{pplobj.name}` with seed {pplobj.seed}")
        # first compile the PPL Implementation this involves two steps
        compile_t1 = time.time()
        # compile step 1: instantiate ppl inference object
        infer_obj = pplobj.inference_class(pplobj.impl_class, train_data.attrs)
        # compile step 2: call compile
        infer_obj.compile(seed=rand.randint(1, 1e7), **pplobj.compile_args)
        compile_time = time.time() - compile_t1
        LOGGER.info(f"compiling on `{pplobj.name}` took {compile_time:.2f} secs")
        # then run inference for each trial
        trial_samples, trial_pll, trial_timing = [], [], []
        for trialnum in range(config.trials):
            infer_t1 = time.time()
            samples = infer_obj.infer(
                data=train_data,
                num_samples=config.num_samples,
                seed=rand.randint(1, 1e7),
                **pplobj.infer_args,
            )
            infer_time = time.time() - infer_t1
            LOGGER.info(f"inference trial {trialnum} took {infer_time:.2f} secs")
            # compute the pll per sample and then convert it to the actual pll over
            # cumulative samples
            persample_pll = model_cls.evaluate_posterior_predictive(samples, test_data)
            pll = np.logaddexp.accumulate(persample_pll) - np.log(
                np.arange(config.num_samples) + 1
            )
            LOGGER.info(f"PLL = {str(pll)}")
            trial_samples.append(samples)
            trial_pll.append(pll)
            trial_timing.append([compile_time, infer_time])
            # finally, give the inference object an opportunity
            # to write additional diagnostics
            infer_obj.additional_diagnostics(output_dir, f"{pplobj.name}_{trialnum}")
        del infer_obj
        # concatenate the samples data from each trial together so we can compute metrics
        trial_samples_data = xr.concat(
            trial_samples, pd.Index(data=np.arange(config.trials), name="chain")
        )
        neff_data = arviz.ess(trial_samples_data)
        rhat_data = arviz.rhat(trial_samples_data)
        LOGGER.info(f"Trials completed for {pplobj.name}")
        LOGGER.info("== n_eff ===")
        LOGGER.info(str(neff_data.data_vars))
        LOGGER.info("==  Rhat ===")
        LOGGER.info(str(rhat_data.data_vars))

        # compute ess/time
        neff_df = neff_data.to_dataframe()
        overall_neff = [
            neff_df.values.min(),
            np.median(neff_df.values),
            neff_df.values.max(),
        ]
        mean_inference_time = np.mean(np.array(trial_timing)[:, 1])
        overall_neff_per_time = np.array(overall_neff) / mean_inference_time

        LOGGER.info("== overall n_eff [min, median, max]===")
        LOGGER.info(str(overall_neff))
        LOGGER.info("== overall n_eff/s [min, median, max]===")
        LOGGER.info(str(overall_neff_per_time))

        trial_variable_metrics_data = xr.concat(
            [neff_data, rhat_data], pd.Index(data=["n_eff", "Rhat"], name="metric")
        )
        all_variable_metrics.append(trial_variable_metrics_data)
        all_pll.append(trial_pll)
        all_timing.append(trial_timing)
        all_samples.append(trial_samples_data)
        all_overall_neff.append(overall_neff)
        all_overall_neff_per_time.append(overall_neff_per_time)
    # merge the trial-level metrics at the PPL level
    all_variable_metrics_data = xr.concat(
        all_variable_metrics, pd.Index(data=all_names, name="ppl")
    )
    all_other_metrics_data = xr.Dataset(
        {
            "timing": (["ppl", "chain", "phase"], all_timing),
            "pll": (["ppl", "chain", "draw"], all_pll),
            "overall_neff": (["ppl", "percentile"], all_overall_neff),
            "overall_neff_per_time": (["ppl", "percentile"], all_overall_neff_per_time),
        },
        coords={
            "ppl": np.array(all_names),
            "chain": np.arange(config.trials),
            "phase": np.array(["compile", "infer"]),
            "draw": np.arange(config.num_samples),
            "percentile": np.array(["min", "median", "max"]),
        },
    )
    all_samples_data = xr.concat(all_samples, pd.Index(data=all_names, name="ppl"))
    model_cls.additional_metrics(output_dir, all_samples_data, train_data, test_data)
    LOGGER.info("all benchmark samples and metrics collected")
    # save the samples data only if requested
    if getattr(config, "save_samples", False):
        save_dataset(output_dir, "samples", all_samples_data)
    # write out thes metrics
    save_dataset(output_dir, "diagnostics", all_variable_metrics_data)
    save_dataset(output_dir, "metrics", all_other_metrics_data)
    return all_variable_metrics_data, all_other_metrics_data