def estimate(self, data=None): """ this function estimates the mean, std, iqr for the generated ensemble Output: Y1_mean = mean of the simulated ensemble Y1_std = std of the simulated ensemble Y1_ll = lower limit of the simulated ensemble Y1_ul = upper limit of the simulated ensemble """ nbin = 50 #check if already the generate_xy has been called, #if not called, call now try: self.X1 copula_ens = len(self.X1) except: copula_ens = 10000 self.generate_xy(copula_ens) if data is None: data = self.X n_ens = copula_ens/nbin #average no. of bin in each class ind_sort = self.X1.argsort() x_mean = np.zeros((nbin,)) y_mean = np.zeros((nbin,)) y_ul = np.zeros((nbin,)) y_ll = np.zeros((nbin,)) y_std = np.zeros((nbin,)) for ii in range(nbin): x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean() y_mean[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean() y_std[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].std() y_ll[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 25) y_ul[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 75) foo_mean = interp1d(x_mean, y_mean, bounds_error=False) foo_std = interp1d(x_mean, y_std, bounds_error=False) foo_ll = interp1d(x_mean, y_ll, bounds_error=False) foo_ul = interp1d(x_mean, y_ul, bounds_error=False) Y1_mean = foo_mean(data) Y1_std = foo_std(data) Y1_ll = foo_ll(data) Y1_ul = foo_ul(data) return Y1_mean, Y1_std, Y1_ll, Y1_ul
def estimate_globalradiation(self, loader, cache, output): excentricity = cache.excentricity solarangle = cache.solarangle atmosphericalbedo = cache.atmosphericalbedo t_earth = cache.t_earth t_sat = cache.t_sat observedalbedo = self.getalbedo(loader.calibrated_data, self.algorithm.i0met, excentricity, solarangle) apparentalbedo = self.getapparentalbedo(observedalbedo, atmosphericalbedo, t_earth, t_sat) declination = cache.declination[:] logging.info("Calculating the noon window... ") slot_window_in_hours = 4 image_per_day = 24 * self.algorithm.IMAGE_PER_HOUR noon_slot = image_per_day / 2 half_window = self.algorithm.IMAGE_PER_HOUR * slot_window_in_hours/2 min_slot = noon_slot - half_window max_slot = noon_slot + half_window condition = ((cache.slots >= min_slot) & (cache.slots < max_slot)) condition = np.reshape(condition, condition.shape[0]) mask1 = (loader.calibrated_data[condition] <= (self.algorithm.i0met / np.pi) * 0.03) m_apparentalbedo = np.ma.masked_array(apparentalbedo[condition], mask1) # To do the nexts steps needs a lot of memory logging.info("Calculating the ground reference albedo... ") mask2 = m_apparentalbedo < stats.scoreatpercentile(m_apparentalbedo, 5) p5_apparentalbedo = np.ma.masked_array(m_apparentalbedo, mask2) groundreferencealbedo = self.getsecondmin(p5_apparentalbedo) # Calculate the solar elevation using times, latitudes and omega logging.info("Calculating solar elevation... ") r_alphanoon = self.getsolarelevation(declination, loader.lat[0], 0) r_alphanoon = r_alphanoon * 2./3. r_alphanoon[r_alphanoon > 40] = 40 r_alphanoon[r_alphanoon < 15] = 15 solarelevation = cache.solarelevation[:] logging.info("Calculating the ground minimum albedo... ") groundminimumalbedo = self.getsecondmin( np.ma.masked_array( apparentalbedo[condition], solarelevation[condition] < r_alphanoon[condition])) aux_2g0 = 2 * groundreferencealbedo aux_05g0 = 0.5 * groundreferencealbedo condition_2g0 = groundminimumalbedo > aux_2g0 condition_05g0 = groundminimumalbedo < aux_05g0 groundminimumalbedo[condition_2g0] = aux_2g0[condition_2g0] groundminimumalbedo[condition_05g0] = aux_05g0[condition_05g0] logging.info("Calculating the cloud index... ") i = output.ref_globalradiation.shape[0] cloudindex = self.getcloudindex(apparentalbedo[-i:], groundminimumalbedo, cache.cloudalbedo[-i:]) output.ref_cloudindex[:] = cloudindex output.ref_globalradiation[:] = (self.getclearsky(cloudindex) * cache.gc[-i:, :]) nc.sync(output.root)
def estimate_globalradiation(self, loader, cache, output): excentricity = cache.excentricity solarangle = cache.solarangle atmosphericalbedo = cache.atmosphericalbedo t_earth = cache.t_earth t_sat = cache.t_sat observedalbedo = self.getalbedo(loader.calibrated_data, self.algorithm.i0met, excentricity, solarangle) apparentalbedo = self.getapparentalbedo(observedalbedo, atmosphericalbedo, t_earth, t_sat) declination = cache.declination[:] logging.info("Calculating the noon window... ") slot_window_in_hours = 4 image_per_day = 24 * self.algorithm.IMAGE_PER_HOUR noon_slot = image_per_day / 2 half_window = self.algorithm.IMAGE_PER_HOUR * slot_window_in_hours/2 min_slot = noon_slot - half_window max_slot = noon_slot + half_window condition = ((cache.slots >= min_slot) & (cache.slots < max_slot)) condition = np.reshape(condition, condition.shape[0]) mask1 = (loader.calibrated_data[condition] <= (self.algorithm.i0met / np.pi) * 0.03) m_apparentalbedo = np.ma.masked_array(apparentalbedo[condition], mask1) # To do the nexts steps needs a lot of memory logging.info("Calculating the ground reference albedo... ") mask2 = m_apparentalbedo < stats.scoreatpercentile(m_apparentalbedo, 5) p5_apparentalbedo = np.ma.masked_array(m_apparentalbedo, mask2) groundreferencealbedo = self.getsecondmin(p5_apparentalbedo) # Calculate the solar elevation using times, latitudes and omega logging.info("Calculating solar elevation... ") r_alphanoon = self.getsolarelevation(declination, loader.lat[0], 0) r_alphanoon = r_alphanoon * 2./3. r_alphanoon[r_alphanoon > 40] = 40 r_alphanoon[r_alphanoon < 15] = 15 solarelevation = cache.solarelevation[:] logging.info("Calculating the ground minimum albedo... ") groundminimumalbedo = self.getsecondmin( np.ma.masked_array(apparentalbedo[condition], solarelevation[condition] < r_alphanoon[condition])) aux_2g0 = 2 * groundreferencealbedo aux_05g0 = 0.5 * groundreferencealbedo condition_2g0 = groundminimumalbedo > aux_2g0 condition_05g0 = groundminimumalbedo < aux_05g0 groundminimumalbedo[condition_2g0] = aux_2g0[condition_2g0] groundminimumalbedo[condition_05g0] = aux_05g0[condition_05g0] logging.info("Calculating the cloud index... ") i = output.ref_globalradiation.shape[0] cloudindex = self.getcloudindex(apparentalbedo[-i:], groundminimumalbedo, cache.cloudalbedo[-i:]) output.ref_cloudindex[:] = cloudindex output.ref_globalradiation[:] = (self.getclearsky(cloudindex) * cache.gc[-i:,:]) nc.sync(output.root)
def estimate_ens(self, data=None, pc=[50]): """ this function estimates the scoreatpercentile for the generated ensemble Output: Y1_pc = score at percentile for the ismulated ensemble """ n_pc = len(pc) nbin = 50 #check if already the generate_xy has been called, #if not called, call now try: self.X1 copula_ens = len(self.X1) except: copula_ens = 10000 self.generate_xy(copula_ens) if data is None: data = self.X n_ens = copula_ens/nbin #average no. of bin in each class ind_sort = self.X1.argsort() x_mean = np.zeros((nbin,)) y_pc = np.zeros((nbin,n_pc)) for ii in range(nbin): x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean() for jj in range(n_pc): y_pc[ii,jj] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], pc[jj]) Y1_pc = np.zeros((len(data),n_pc)) for jj in range(n_pc): foo_pc = interp1d(x_mean, y_pc[:,jj], bounds_error=False) Y1_pc[:,jj] = foo_pc(data) return Y1_pc
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)) print('mean:',stats.mean(a),stats.mean(af)) print('var:',stats.var(a),stats.var(af)) print('stdev:',stats.stdev(a),stats.stdev(af)) print('sem:',stats.sem(a),stats.sem(af)) print('describe:') print(stats.describe(l)) print(stats.describe(lf)) print(stats.describe(a)) print(stats.describe(af)) print('\nFREQUENCY') print('freqtable:') print('itemfreq:') print(stats.itemfreq(l)) print(stats.itemfreq(a)) print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)) print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)) print('histogram:',stats.histogram(l),stats.histogram(a)) print('cumfreq:') print(stats.cumfreq(l)) print(stats.cumfreq(lf)) print(stats.cumfreq(a)) print(stats.cumfreq(af)) print('relfreq:') print(stats.relfreq(l)) print(stats.relfreq(lf)) print(stats.relfreq(a)) print(stats.relfreq(af)) print('\nVARIATION') print('obrientransform:') l = range(1,21)
def _aggregate_gtf(gtf_file, sample_id, gtf_expr_attr, output_fh, stats_fh, is_ref=False): def _init_t_dict(): return {'_id': None, 'num_exons': 0, 'length': 0} t_dict = collections.defaultdict(_init_t_dict) cur_t_id = 1 exprs = [] for f in pysam.tabix_iterator(open(gtf_file), pysam.asGTF()): if f.feature == 'transcript': t_id = f.transcript_id if t_id in t_dict: m = 'GTF "%s" transcript_id "%s" not unique' % (gtf_file, t_id) raise GTFError(m) t_item = t_dict[t_id] # rename transcript id new_t_id = "%s.T%d" % (sample_id, cur_t_id) cur_t_id += 1 t_item['_id'] = new_t_id if is_ref: expr = 0.0 else: expr = float(f[gtf_expr_attr]) exprs.append(expr) # prepare attributes attrs = {GTF.Attr.TRANSCRIPT_ID: new_t_id, GTF.Attr.SAMPLE_ID: sample_id, GTF.Attr.REF: str(int(is_ref)), GTF.Attr.EXPR: str(expr)} # save attributes f.fromDict(attrs) print >>output_fh, str(f) elif f.feature == 'exon': t_id = f.transcript_id t_item = t_dict[t_id] # update statistics t_item['num_exons'] += 1 t_item['length'] += (f.end - f.start) # replace transcript id f.fromDict({GTF.Attr.TRANSCRIPT_ID: t_item['_id']}) print >>output_fh, str(f) # process statistics num_exons = [] lengths = [] for t_item in t_dict.itervalues(): lengths.append(t_item['length']) num_exons.append(t_item['num_exons']) # compute and write stats quantiles = range(0, 101) expr_qs = (scoreatpercentile(exprs, q) for q in quantiles) expr_qs = ','.join(map(str, expr_qs)) length_qs = (int(round(scoreatpercentile(lengths, q))) for q in quantiles) length_qs = ','.join(map(str, length_qs)) num_exon_qs = (int(round(scoreatpercentile(num_exons, q))) for q in quantiles) num_exon_qs = ','.join(map(str, num_exon_qs)) fields = [sample_id, len(t_dict), expr_qs, length_qs, num_exon_qs] print >>stats_fh, '\t'.join(map(str, fields))
def _aggregate_gtf(gtf_file, sample_id, gtf_expr_attr, output_fh, stats_fh, is_ref=False): def _init_t_dict(): return {'_id': None, 'num_exons': 0, 'length': 0} t_dict = collections.defaultdict(_init_t_dict) cur_t_id = 1 exprs = [] for f in pysam.tabix_iterator(open(gtf_file), pysam.asGTF()): if f.feature == 'transcript': t_id = f.transcript_id if t_id in t_dict: m = 'GTF "%s" transcript_id "%s" not unique' % (gtf_file, t_id) raise GTFError(m) t_item = t_dict[t_id] # rename transcript id new_t_id = "%s.T%d" % (sample_id, cur_t_id) cur_t_id += 1 t_item['_id'] = new_t_id if is_ref: expr = 0.0 else: expr = float(f[gtf_expr_attr]) exprs.append(expr) # prepare attributes attrs = { GTF.Attr.TRANSCRIPT_ID: new_t_id, GTF.Attr.SAMPLE_ID: sample_id, GTF.Attr.REF: str(int(is_ref)), GTF.Attr.EXPR: str(expr) } # save attributes f.fromDict(attrs) print >> output_fh, str(f) elif f.feature == 'exon': t_id = f.transcript_id t_item = t_dict[t_id] # update statistics t_item['num_exons'] += 1 t_item['length'] += (f.end - f.start) # replace transcript id f.fromDict({GTF.Attr.TRANSCRIPT_ID: t_item['_id']}) print >> output_fh, str(f) # process statistics num_exons = [] lengths = [] for t_item in t_dict.itervalues(): lengths.append(t_item['length']) num_exons.append(t_item['num_exons']) # compute and write stats quantiles = range(0, 101) expr_qs = (scoreatpercentile(exprs, q) for q in quantiles) expr_qs = ','.join(map(str, expr_qs)) length_qs = (int(round(scoreatpercentile(lengths, q))) for q in quantiles) length_qs = ','.join(map(str, length_qs)) num_exon_qs = (int(round(scoreatpercentile(num_exons, q))) for q in quantiles) num_exon_qs = ','.join(map(str, num_exon_qs)) fields = [sample_id, len(t_dict), expr_qs, length_qs, num_exon_qs] print >> stats_fh, '\t'.join(map(str, fields))
def check_percentile(self): x = arange(8) * 0.5 assert_equal(stats.scoreatpercentile(x, 0), 0.) assert_equal(stats.scoreatpercentile(x, 100), 3.5) assert_equal(stats.scoreatpercentile(x, 50), 1.75)