Beispiel #1
0
    def estimate(self, data=None):
        """
        this function estimates the mean, std, iqr for the generated
        ensemble

        Output:
            Y1_mean = mean of the simulated ensemble
            Y1_std = std of the simulated ensemble
            Y1_ll = lower limit of the simulated ensemble
            Y1_ul = upper limit of the simulated ensemble
        """
        nbin = 50
        #check if already the generate_xy has been called,
        #if not called, call now
        try:
            self.X1
            copula_ens = len(self.X1)
        except:
            copula_ens = 10000
            self.generate_xy(copula_ens)
        
        if data is None:
            data = self.X
        
        n_ens = copula_ens/nbin #average no. of bin in each class
        ind_sort = self.X1.argsort()
        x_mean = np.zeros((nbin,))
        y_mean = np.zeros((nbin,))
        y_ul = np.zeros((nbin,))
        y_ll = np.zeros((nbin,))
        y_std = np.zeros((nbin,))
    
        for ii in range(nbin):
            x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
            y_mean[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
            y_std[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].std()
            y_ll[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 25)
            y_ul[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 75)
            
        foo_mean = interp1d(x_mean, y_mean, bounds_error=False)
        foo_std = interp1d(x_mean, y_std, bounds_error=False)
        foo_ll = interp1d(x_mean, y_ll, bounds_error=False)
        foo_ul = interp1d(x_mean, y_ul, bounds_error=False)
        
        
        Y1_mean = foo_mean(data)
        Y1_std = foo_std(data)
        Y1_ll = foo_ll(data)
        Y1_ul = foo_ul(data)
        
        return Y1_mean, Y1_std, Y1_ll, Y1_ul
Beispiel #2
0
 def estimate_globalradiation(self, loader, cache, output):
     excentricity = cache.excentricity
     solarangle = cache.solarangle
     atmosphericalbedo = cache.atmosphericalbedo
     t_earth = cache.t_earth
     t_sat = cache.t_sat
     observedalbedo = self.getalbedo(loader.calibrated_data,
                                     self.algorithm.i0met,
                                     excentricity, solarangle)
     apparentalbedo = self.getapparentalbedo(observedalbedo,
                                             atmosphericalbedo,
                                             t_earth, t_sat)
     declination = cache.declination[:]
     logging.info("Calculating the noon window... ")
     slot_window_in_hours = 4
     image_per_day = 24 * self.algorithm.IMAGE_PER_HOUR
     noon_slot = image_per_day / 2
     half_window = self.algorithm.IMAGE_PER_HOUR * slot_window_in_hours/2
     min_slot = noon_slot - half_window
     max_slot = noon_slot + half_window
     condition = ((cache.slots >= min_slot) & (cache.slots < max_slot))
     condition = np.reshape(condition, condition.shape[0])
     mask1 = (loader.calibrated_data[condition] <=
              (self.algorithm.i0met / np.pi) * 0.03)
     m_apparentalbedo = np.ma.masked_array(apparentalbedo[condition], mask1)
     # To do the nexts steps needs a lot of memory
     logging.info("Calculating the ground reference albedo... ")
     mask2 = m_apparentalbedo < stats.scoreatpercentile(m_apparentalbedo, 5)
     p5_apparentalbedo = np.ma.masked_array(m_apparentalbedo, mask2)
     groundreferencealbedo = self.getsecondmin(p5_apparentalbedo)
     # Calculate the solar elevation using times, latitudes and omega
     logging.info("Calculating solar elevation... ")
     r_alphanoon = self.getsolarelevation(declination, loader.lat[0], 0)
     r_alphanoon = r_alphanoon * 2./3.
     r_alphanoon[r_alphanoon > 40] = 40
     r_alphanoon[r_alphanoon < 15] = 15
     solarelevation = cache.solarelevation[:]
     logging.info("Calculating the ground minimum albedo... ")
     groundminimumalbedo = self.getsecondmin(
         np.ma.masked_array(
             apparentalbedo[condition],
             solarelevation[condition] < r_alphanoon[condition]))
     aux_2g0 = 2 * groundreferencealbedo
     aux_05g0 = 0.5 * groundreferencealbedo
     condition_2g0 = groundminimumalbedo > aux_2g0
     condition_05g0 = groundminimumalbedo < aux_05g0
     groundminimumalbedo[condition_2g0] = aux_2g0[condition_2g0]
     groundminimumalbedo[condition_05g0] = aux_05g0[condition_05g0]
     logging.info("Calculating the cloud index... ")
     i = output.ref_globalradiation.shape[0]
     cloudindex = self.getcloudindex(apparentalbedo[-i:],
                                     groundminimumalbedo,
                                     cache.cloudalbedo[-i:])
     output.ref_cloudindex[:] = cloudindex
     output.ref_globalradiation[:] = (self.getclearsky(cloudindex) *
                                      cache.gc[-i:, :])
     nc.sync(output.root)
Beispiel #3
0
 def estimate_globalradiation(self, loader, cache, output):
     excentricity = cache.excentricity
     solarangle = cache.solarangle
     atmosphericalbedo = cache.atmosphericalbedo
     t_earth = cache.t_earth
     t_sat = cache.t_sat
     observedalbedo = self.getalbedo(loader.calibrated_data, self.algorithm.i0met,
                                     excentricity, solarangle)
     apparentalbedo = self.getapparentalbedo(observedalbedo, atmosphericalbedo,
                                             t_earth, t_sat)
     declination = cache.declination[:]
     logging.info("Calculating the noon window... ")
     slot_window_in_hours = 4
     image_per_day = 24 * self.algorithm.IMAGE_PER_HOUR
     noon_slot = image_per_day / 2
     half_window = self.algorithm.IMAGE_PER_HOUR * slot_window_in_hours/2
     min_slot = noon_slot - half_window
     max_slot = noon_slot + half_window
     condition = ((cache.slots >= min_slot) & (cache.slots < max_slot))
     condition = np.reshape(condition, condition.shape[0])
     mask1 = (loader.calibrated_data[condition] <=
              (self.algorithm.i0met / np.pi) * 0.03)
     m_apparentalbedo = np.ma.masked_array(apparentalbedo[condition], mask1)
     # To do the nexts steps needs a lot of memory
     logging.info("Calculating the ground reference albedo... ")
     mask2 = m_apparentalbedo < stats.scoreatpercentile(m_apparentalbedo, 5)
     p5_apparentalbedo = np.ma.masked_array(m_apparentalbedo, mask2)
     groundreferencealbedo = self.getsecondmin(p5_apparentalbedo)
     # Calculate the solar elevation using times, latitudes and omega
     logging.info("Calculating solar elevation... ")
     r_alphanoon = self.getsolarelevation(declination, loader.lat[0], 0)
     r_alphanoon = r_alphanoon * 2./3.
     r_alphanoon[r_alphanoon > 40] = 40
     r_alphanoon[r_alphanoon < 15] = 15
     solarelevation = cache.solarelevation[:]
     logging.info("Calculating the ground minimum albedo... ")
     groundminimumalbedo = self.getsecondmin(
         np.ma.masked_array(apparentalbedo[condition],
                            solarelevation[condition] < r_alphanoon[condition]))
     aux_2g0 = 2 * groundreferencealbedo
     aux_05g0 = 0.5 * groundreferencealbedo
     condition_2g0 = groundminimumalbedo > aux_2g0
     condition_05g0 = groundminimumalbedo < aux_05g0
     groundminimumalbedo[condition_2g0] = aux_2g0[condition_2g0]
     groundminimumalbedo[condition_05g0] = aux_05g0[condition_05g0]
     logging.info("Calculating the cloud index... ")
     i = output.ref_globalradiation.shape[0]
     cloudindex = self.getcloudindex(apparentalbedo[-i:], groundminimumalbedo,
                                cache.cloudalbedo[-i:])
     output.ref_cloudindex[:] = cloudindex
     output.ref_globalradiation[:] = (self.getclearsky(cloudindex) *
                                      cache.gc[-i:,:])
     nc.sync(output.root)
Beispiel #4
0
    def estimate_ens(self, data=None, pc=[50]):
        """
        this function estimates the scoreatpercentile for the generated
        ensemble

        Output:
            Y1_pc = score at percentile for the ismulated ensemble
        """
        n_pc = len(pc)
        nbin = 50
        #check if already the generate_xy has been called,
        #if not called, call now
        try:
            self.X1
            copula_ens = len(self.X1)
        except:
            copula_ens = 10000
            self.generate_xy(copula_ens)
        
        if data is None:
            data = self.X
        
        n_ens = copula_ens/nbin #average no. of bin in each class
        ind_sort = self.X1.argsort()
        x_mean = np.zeros((nbin,))
        y_pc = np.zeros((nbin,n_pc))
            
        for ii in range(nbin):
            x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
            for jj in range(n_pc):
                y_pc[ii,jj] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], pc[jj])
        
        
        Y1_pc = np.zeros((len(data),n_pc))
        for jj in range(n_pc):
            foo_pc = interp1d(x_mean, y_pc[:,jj], bounds_error=False)
            Y1_pc[:,jj] = foo_pc(data)
            
        return Y1_pc
Beispiel #5
0
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af))
print('mean:',stats.mean(a),stats.mean(af))
print('var:',stats.var(a),stats.var(af))
print('stdev:',stats.stdev(a),stats.stdev(af))
print('sem:',stats.sem(a),stats.sem(af))
print('describe:')
print(stats.describe(l))
print(stats.describe(lf))
print(stats.describe(a))
print(stats.describe(af))
print('\nFREQUENCY')
print('freqtable:')
print('itemfreq:')
print(stats.itemfreq(l))
print(stats.itemfreq(a))
print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40))
print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12))
print('histogram:',stats.histogram(l),stats.histogram(a))
print('cumfreq:')
print(stats.cumfreq(l))
print(stats.cumfreq(lf))
print(stats.cumfreq(a))
print(stats.cumfreq(af))
print('relfreq:')
print(stats.relfreq(l))
print(stats.relfreq(lf))
print(stats.relfreq(a))
print(stats.relfreq(af))
print('\nVARIATION')
print('obrientransform:')
l = range(1,21)
Beispiel #6
0
def _aggregate_gtf(gtf_file, sample_id, gtf_expr_attr, output_fh, stats_fh,
                   is_ref=False):
    def _init_t_dict():
        return {'_id': None, 'num_exons': 0, 'length': 0}

    t_dict = collections.defaultdict(_init_t_dict)
    cur_t_id = 1
    exprs = []
    for f in pysam.tabix_iterator(open(gtf_file), pysam.asGTF()):
        if f.feature == 'transcript':
            t_id = f.transcript_id
            if t_id in t_dict:
                m = 'GTF "%s" transcript_id "%s" not unique' % (gtf_file, t_id)
                raise GTFError(m)
            t_item = t_dict[t_id]
            # rename transcript id
            new_t_id = "%s.T%d" % (sample_id, cur_t_id)
            cur_t_id += 1
            t_item['_id'] = new_t_id
            if is_ref:
                expr = 0.0
            else:
                expr = float(f[gtf_expr_attr])
            exprs.append(expr)
            # prepare attributes
            attrs = {GTF.Attr.TRANSCRIPT_ID: new_t_id,
                     GTF.Attr.SAMPLE_ID: sample_id,
                     GTF.Attr.REF: str(int(is_ref)),
                     GTF.Attr.EXPR: str(expr)}
            # save attributes
            f.fromDict(attrs)
            print >>output_fh, str(f)
        elif f.feature == 'exon':
            t_id = f.transcript_id
            t_item = t_dict[t_id]
            # update statistics
            t_item['num_exons'] += 1
            t_item['length'] += (f.end - f.start)
            # replace transcript id
            f.fromDict({GTF.Attr.TRANSCRIPT_ID: t_item['_id']})
            print >>output_fh, str(f)

    # process statistics
    num_exons = []
    lengths = []
    for t_item in t_dict.itervalues():
        lengths.append(t_item['length'])
        num_exons.append(t_item['num_exons'])

    # compute and write stats
    quantiles = range(0, 101)
    expr_qs = (scoreatpercentile(exprs, q) for q in quantiles)
    expr_qs = ','.join(map(str, expr_qs))
    length_qs = (int(round(scoreatpercentile(lengths, q)))
                 for q in quantiles)
    length_qs = ','.join(map(str, length_qs))
    num_exon_qs = (int(round(scoreatpercentile(num_exons, q)))
                   for q in quantiles)
    num_exon_qs = ','.join(map(str, num_exon_qs))
    fields = [sample_id, len(t_dict), expr_qs, length_qs, num_exon_qs]
    print >>stats_fh, '\t'.join(map(str, fields))
Beispiel #7
0
def _aggregate_gtf(gtf_file,
                   sample_id,
                   gtf_expr_attr,
                   output_fh,
                   stats_fh,
                   is_ref=False):
    def _init_t_dict():
        return {'_id': None, 'num_exons': 0, 'length': 0}

    t_dict = collections.defaultdict(_init_t_dict)
    cur_t_id = 1
    exprs = []
    for f in pysam.tabix_iterator(open(gtf_file), pysam.asGTF()):
        if f.feature == 'transcript':
            t_id = f.transcript_id
            if t_id in t_dict:
                m = 'GTF "%s" transcript_id "%s" not unique' % (gtf_file, t_id)
                raise GTFError(m)
            t_item = t_dict[t_id]
            # rename transcript id
            new_t_id = "%s.T%d" % (sample_id, cur_t_id)
            cur_t_id += 1
            t_item['_id'] = new_t_id
            if is_ref:
                expr = 0.0
            else:
                expr = float(f[gtf_expr_attr])
            exprs.append(expr)
            # prepare attributes
            attrs = {
                GTF.Attr.TRANSCRIPT_ID: new_t_id,
                GTF.Attr.SAMPLE_ID: sample_id,
                GTF.Attr.REF: str(int(is_ref)),
                GTF.Attr.EXPR: str(expr)
            }
            # save attributes
            f.fromDict(attrs)
            print >> output_fh, str(f)
        elif f.feature == 'exon':
            t_id = f.transcript_id
            t_item = t_dict[t_id]
            # update statistics
            t_item['num_exons'] += 1
            t_item['length'] += (f.end - f.start)
            # replace transcript id
            f.fromDict({GTF.Attr.TRANSCRIPT_ID: t_item['_id']})
            print >> output_fh, str(f)

    # process statistics
    num_exons = []
    lengths = []
    for t_item in t_dict.itervalues():
        lengths.append(t_item['length'])
        num_exons.append(t_item['num_exons'])

    # compute and write stats
    quantiles = range(0, 101)
    expr_qs = (scoreatpercentile(exprs, q) for q in quantiles)
    expr_qs = ','.join(map(str, expr_qs))
    length_qs = (int(round(scoreatpercentile(lengths, q))) for q in quantiles)
    length_qs = ','.join(map(str, length_qs))
    num_exon_qs = (int(round(scoreatpercentile(num_exons, q)))
                   for q in quantiles)
    num_exon_qs = ','.join(map(str, num_exon_qs))
    fields = [sample_id, len(t_dict), expr_qs, length_qs, num_exon_qs]
    print >> stats_fh, '\t'.join(map(str, fields))
Beispiel #8
0
 def check_percentile(self):
     x = arange(8) * 0.5
     assert_equal(stats.scoreatpercentile(x, 0), 0.)
     assert_equal(stats.scoreatpercentile(x, 100), 3.5)
     assert_equal(stats.scoreatpercentile(x, 50), 1.75)