def save_pickle(filename, data): file = open(filename + '.pck', 'w') pickle_dump(data, file) file.close() return
def rule_timer_loop(): logger.info('(ALL IPSC SYSTEMS) Rule timer loop started') _now = time() for _bridge in BRIDGES: for _system in BRIDGES[_bridge]: if _system['TO_TYPE'] == 'ON': if _system['ACTIVE'] == True: if _system['TIMER'] < _now: _system['ACTIVE'] = False logger.info( 'Conference Bridge TIMEOUT: DEACTIVATE System: %s, Bridge: %s, TS: %s, TGID: %s', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID'])) else: timeout_in = _system['TIMER'] - _now logger.info( 'Conference Bridge ACTIVE (ON timer running): System: %s Bridge: %s, TS: %s, TGID: %s, Timeout in: %ss,', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID']), timeout_in) elif _system['ACTIVE'] == False: logger.debug( 'Conference Bridge INACTIVE (no change): System: %s Bridge: %s, TS: %s, TGID: %s', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID'])) elif _system['TO_TYPE'] == 'OFF': if _system['ACTIVE'] == False: if _system['TIMER'] < _now: _system['ACTIVE'] = True logger.info( 'Conference Bridge TIMEOUT: ACTIVATE System: %s, Bridge: %s, TS: %s, TGID: %s', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID'])) else: timeout_in = _system['TIMER'] - _now logger.info( 'Conference Bridge INACTIVE (OFF timer running): System: %s Bridge: %s, TS: %s, TGID: %s, Timeout in: %ss,', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID']), timeout_in) elif _system['ACTIVE'] == True: logger.debug( 'Conference Bridge ACTIVE (no change): System: %s Bridge: %s, TS: %s, TGID: %s', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID'])) else: logger.debug( 'Conference Bridge NO ACTION: System: %s, Bridge: %s, TS: %s, TGID: %s', _system['SYSTEM'], _bridge, _system['TS'], int_id(_system['TGID'])) if BRIDGE_CONF['REPORT']: try: with open( CONFIG['REPORTS']['REPORT_PATH'] + 'confbridge_stats.pickle', 'wb') as file: pickle_dump(BRIDGES, file, 2) file.close() except IOError as detail: _logger.error('I/O Error: %s', detail)
def reporting_loop(_logger): _logger.debug('Periodic Reporting Loop Started (PICKLE)') try: with open(_config['REPORTS']['REPORT_PATH']+'dmrlink_stats.pickle', 'wb') as file: pickle_dump(_config['SYSTEMS'], file, 2) file.close() except IOError as detail: _logger.error('I/O Error: %s', detail)
def getAnnObject2(collection,document): '''newest version of the getAnnObject methode''' try: from os.path import join as path_join from document import real_directory real_dir = real_directory(collection) except: real_dir=collection app_path = WORK_DIR + "/application/" ann = None full_name = collection + document full_name = full_name.replace("/","") if( isfile(app_path+full_name)): temp=open (app_path+full_name , 'rb') ann = pickle_load(temp) temp.close() else: ann = TextAnnotations(real_dir+document) ann = SimpleAnnotations(ann) ann.folia = {} try: #TODO:good error message ann.folia=get_extra_info(collection,document) except Exception as e: ann.folia = {} Messager.error('Error: get extra folia info() failed: %s' % e) #Validation: try: import os import simplejson as json import session docdir = os.path.dirname(ann._document) string = session.load_conf()["config"] val = json.loads(string)["validationOn"] #validate if config enables it and if it's not already done. if val: if not ann.validated: from verify_annotations import verify_annotation projectconf = ProjectConfiguration(docdir) issues = [] issues = verify_annotation(ann, projectconf) else: issues = ann.issues else: ann.validated = False issues = [] except session.NoSessionError: issues = [] except KeyError: issues = [] except Exception as e: # TODO add an issue about the failure? issues = [] ann.issues = issues temp=open (app_path+full_name , 'wb') pickle_dump(ann, temp) temp.close() return ann
def _quick_test(classifiers, datasets, outdir, verbose=False, worker_pool=None, no_simstring_cache=False, use_test_set=False): if worker_pool is not None: raise NotImplementedError results_file_path = _get_quick_pickle_path(outdir) results_by_dataset = {} for dataset_id, dataset_getter in datasets.iteritems(): if verbose: print >> stderr, 'Data set:', dataset_id if verbose: print >> stderr, 'Caching data set...', train, dev, test = dataset_getter() if use_test_set: train, dev = list(chain(train, dev)), list(test) else: train, dev = list(train), list(dev) if verbose: print >> stderr, 'Done!' if not no_simstring_cache: simstring_caching(classifiers, (train, dev), verbose=verbose) # Collect the seen type to iterate over later seen_types = set() results_by_classifier = {} for classifier_id, classifier_class in classifiers.iteritems(): if verbose: print >> stderr, 'Classifier:', classifier_id classifier = classifier_class() classifier.train(train) score = score_classifier(classifier, dev) results_by_classifier[classifier_id] = score macro_score, micro_score, tp_sum, fn_sum, _ = score if verbose: res_str = ('Results: ' '{0:.3f}/' '{1:.3f}/' '{2}/{3} (MACRO/MICRO/TP/FN)' ).format(macro_score, micro_score, tp_sum, fn_sum) print res_str results_by_dataset[dataset_id] = results_by_classifier with open(results_file_path, 'wb') as results_file: pickle_dump(results_by_dataset, results_file) if verbose: print >> stderr, 'Results written to:', results_file_path
def learning_curve_test(classifiers, datasets, outdir, verbose=False, no_simstring_cache=False, folds=10, worker_pool=None, min_perc=5, max_perc=100, step_perc=5, it_factor=1, pickle_name='learning', use_test_set=False): ### This part is really generic # TODO: We could keep old results... But dangerous, mix-up results_file_path = _get_learning_pickle_path(outdir, pickle_name) #XXX: RESUME GOES HERE! results_by_dataset = {} for dataset_id, dataset_getter in datasets.iteritems(): if verbose: print >> stderr, 'Data set:', dataset_id if verbose: print >> stderr, 'Caching vectorised data...', train_set, dev_set, test_set = dataset_getter() if use_test_set: train, test = list(chain(train_set, dev_set)), list(test_set) else: train, test = list(train_set), list(dev_set) del train_set, dev_set, test_set if verbose: print >> stderr, 'Done!' results_by_dataset[dataset_id] = _learning_curve_test_data_set( classifiers, train, test, worker_pool, verbose=verbose, no_simstring_cache=no_simstring_cache, use_test_set=use_test_set, folds=folds, min_perc=min_perc, max_perc=max_perc, step_perc=step_perc, it_factor=it_factor) ### HACK TO GET INTERMEDIATE! with open(results_file_path, 'w') as results_file: pickle_dump(results_by_dataset, results_file) if verbose: print >> stderr, 'Results written to:', results_file_path
def reporting_loop(_logger): _logger.debug('Periodic Reporting Loop Started (PICKLE)') try: with open( _config['REPORTS']['REPORT_PATH'] + 'dmrlink_stats.pickle', 'wb') as file: pickle_dump(_config['SYSTEMS'], file, 2) file.close() except IOError as detail: _logger.error('I/O Error: %s', detail)
def test(): try: from cPickle import dump as pickle_dump, load as pickle_load, dumps except ImportError: from pickle import dump as pickle_dump, load as pickle_load ann = TextAnnotations("/home/hast/Downloads/brat/data/brat_vb/sentiment/test") temp=open ("/home/hast/Downloads/brat/work/brat_vbsentimenttest", 'wb') sann = SimpleAnnotations(ann) pickle_dump(sann,temp) temp.close()
def update_dump(j_dic,file_path): app_path = WORK_DIR + "/application/" temp_paths = file_path.split("/data/") try: full_name = temp_paths[1].replace("/","") temp=open (app_path+full_name , 'wb') pickle_dump(j_dic, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: "+str(e))
def dump(self, filename): """Dump the template object to ``filename`` so you can re-use it later. This method uses cPickle to serialize internal template model, so you don't need to pass through the learn process everytime you need to parse data. It's worth using this method since learning process generally cost a lot of time compared to parsing. """ fp = open(filename, 'w') pickle_dump(self, fp) fp.close()
def save(self, filename=PICKLE_CACHE_PATH): from cPickle import UnpicklingError from cPickle import dump as pickle_dump try: with open(filename, 'wb') as cache_file: pickle_dump(ordall(self._map), cache_file) cache_file.close() except IOError: print >> sys.stderr, 'warning: failed to write cache.' except: print >> sys.stderr, 'warning: unexpected error writing cache.'
def update_dump(j_dic, file_path): app_path = WORK_DIR + "/application/" temp_paths = file_path.split("/data/") try: full_name = temp_paths[1].replace("/", "") temp = open(app_path + full_name, 'wb') pickle_dump(j_dic, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: " + str(e))
def save_cache(fn, data): from cPickle import UnpicklingError from cPickle import dump as pickle_dump try: with open(fn, 'wb') as cache_file: pickle_dump(data, cache_file) cache_file.close() except IOError: print >> sys.stderr, "rewritetex: warning: failed to write cache." except: print >> sys.stderr, "rewritetex: warning: unexpected error writing cache."
def main(args): argp = ARGPARSER.parse_args(args[1:]) # Create a dataset out of the input doc = _tab_separated_input_to_doc(argp.input) # Cache the strings for speed cache_simstring(((doc, ), ), verbose=argp.verbose) classifier = SimStringInternalClassifier() classifier.train((doc, )) with open(argp.model_path, 'w') as model_file: pickle_dump(classifier, model_file)
def compile_file(path, to_python=False): qpt = QPTokenizer() with open(path, 'rb') as qp: qpt.compile_source(qp) base_dir, parser = os.path.split(path) if not to_python: cparser = parser + 'y' cfile = open(os.path.join(base_dir, cparser), 'wb') cfile.write(COMPILED_SIGNATURE) pickle_dump(qpt.globals, cfile, HIGHEST_PROTOCOL) else: cparser = parser.split('.')[0] + '.py' cfile = open(os.path.join(base_dir, cparser), 'wb') cfile.write(str(qpt.globals))
def getAnnObject(collection, document): try: real_dir = real_directory(collection) except: real_dir = collection app_path = WORK_DIR + "/application/" full_name = collection + document full_name = full_name.replace("/", "") if (os.path.isfile(app_path + full_name)): temp = open(app_path + full_name, 'rb') ann = pickle_load(temp) temp.close() else: ann = TextAnnotations(real_dir + document) ann = SimpleAnnotations(ann) ann.folia = {} try: #TODO:good error message ann.folia = get_extra_info(collection, document) except Exception as e: ann.folia = {} Messager.error('Error: get extra folia info() failed: %s' % e) #Validation: try: docdir = os.path.dirname(ann._document) string = session.load_conf()["config"] val = json.loads(string)["validationOn"] #validate if config enables it and if it's not already done. if val: if not ann.validated: projectconf = ProjectConfiguration(docdir) issues = verify_annotation(ann, projectconf) else: issues = ann.issues else: ann.validated = False issues = [] except session.NoSessionError: issues = [] except KeyError: issues = [] except Exception as e: # TODO add an issue about the failure? issues = [] Messager.error('Error: validation failed: %s' % e) ann.issues = issues temp = open(app_path + full_name, 'wb') pickle_dump(ann, temp) temp.close() return ann
def _save_simstring_query_cache(): # Check if the cache directory exists, otherwise create it if not exists(dirname(SIMSTRING_QUERY_CACHE_DIR_PATH)): from os import makedirs makedirs(SIMSTRING_QUERY_CACHE_DIR_PATH) # Save if we have a cache and it has been modified if SIMSTRING_QUERY_CACHE is not None and MODIFIED_SIMSTRING_QUERY_CACHE: # We could suffer race conditions here so we write to a tempfile # and then swap it in place tmp_file = None try: with NamedTemporaryFile('wb', delete=False) as tmp_file: # Dump with the highest available protocol pickle_dump(SIMSTRING_QUERY_CACHE, tmp_file, -1) move(tmp_file.name, SIMSTRING_QUERY_CACHE_PATH) finally: # If something went wrong, we need to clean up /tmp if tmp_file is not None and exists(tmp_file.name): remove(tmp_file.name)
def learning_curve_test(classifiers, datasets, outdir, verbose=False, no_simstring_cache=False, folds=10, worker_pool=None, min_perc=5, max_perc=100, step_perc=5, it_factor=1, pickle_name='learning', use_test_set=False ): ### This part is really generic # TODO: We could keep old results... But dangerous, mix-up results_file_path = _get_learning_pickle_path(outdir, pickle_name) #XXX: RESUME GOES HERE! results_by_dataset = {} for dataset_id, dataset_getter in datasets.iteritems(): if verbose: print >> stderr, 'Data set:', dataset_id if verbose: print >> stderr, 'Caching vectorised data...', train_set, dev_set, test_set = dataset_getter() if use_test_set: train, test = list(chain(train_set, dev_set)), list(test_set) else: train, test = list(train_set), list(dev_set) del train_set, dev_set, test_set if verbose: print >> stderr, 'Done!' results_by_dataset[dataset_id] = _learning_curve_test_data_set( classifiers, train, test, worker_pool, verbose=verbose, no_simstring_cache=no_simstring_cache, use_test_set=use_test_set, folds=folds, min_perc=min_perc, max_perc=max_perc, step_perc=step_perc, it_factor=it_factor) ### HACK TO GET INTERMEDIATE! with open(results_file_path, 'w') as results_file: pickle_dump(results_by_dataset, results_file) if verbose: print >> stderr, 'Results written to:', results_file_path
def _dump(self, f): try: pos = f.tell() pickle_dump(self.store, f) except: S = self.store.copy() ff = getBytesIO() for k, v in S.items(): try: pickle_dump({k: v}, ff) except: S[k] = '<unpicklable object %r>' % v f.seek(pos, 0) pickle_dump(S, f)
def _dump(self,f): try: pos=f.tell() pickle_dump(self.store,f) except: S=self.store.copy() ff=getBytesIO() for k,v in S.items(): try: pickle_dump({k:v},ff) except: S[k] = '<unpicklable object %r>' % v f.seek(pos,0) pickle_dump(S,f)
def main(): #=============================================================================== cwdir = os.getcwd() SIBrundir = os.path.join(cwdir, 'forward_runs') sites = ['BE-Lon','DE-Kli','FR-Gri','IT-BCi','NL-Dij','NL-Lan'] # time axis of all time series tm = pd.date_range('2000-01-01 00:00:00', '2010-12-31 23:59:59', freq='1d') series = dict() # strange thing is: SiBCASA ignores leap years and does not simulate 29 feb # we have to delete 3 dates on 3 leap years between 2000 and 2010 new_tm = tm[0:59].union(tm[60:1520].union(tm[1521:2981].union(tm[2982:]))) print new_tm, len(new_tm) for site in sites: # open all the years and store in one list namefile = '%s_2000-2010/'%(site) +'hsib_*.qp2.nc' pathfile = os.path.join(SIBrundir, namefile) # open all 11 years * 12 files f = cdf.MFDataset(pathfile) # get daily GPP and NEE (in micromoles/m2/s) and convert # the fluxes to gC/m2/d: fac = 0.000001*12. # conversion from micromoles to gC dt = 3600. * 24. # nb of seconds in a day Sib_gpp = np.array(-f.variables['gpp'][:])*fac*dt Sib_ter = np.array(f.variables['resp_tot'][:])*fac*dt Sib_rhet = np.array(f.variables['resp_het'][:])*fac*dt Sib_raut = np.array(f.variables['resp_auto'][:])*fac*dt Sib_nee = np.array(f.variables['NEE_2'][:])*fac*dt # from moles/m2 to gC/m2 Sib_csoil= np.array(f.variables['carb_soil'][:])*fac*1000000.*dt # close file f.close() series[site] = dict() series[site]['GPP'] = pd.Series([l[0] for l in Sib_gpp], index=new_tm) series[site]['TER'] = pd.Series([l[0] for l in Sib_ter], index=new_tm) series[site]['Rhet'] = pd.Series([l[0] for l in Sib_rhet], index=new_tm) series[site]['Raut'] = pd.Series([l[0] for l in Sib_raut], index=new_tm) series[site]['NEE'] = pd.Series([l[0] for l in Sib_nee], index=new_tm) fig, ax = plt.subplots(nrows=1, ncols=1) fig.suptitle(site, fontsize=14) series[site]['GPP'].plot(label='GPP') series[site]['TER'].plot(label='TER') series[site]['Rhet'].plot(label='Rhet') series[site]['Raut'].plot(label='Raut') series[site]['NEE'].plot(label='NEE') ax.legend() # store the formatted pandas timeseries in a pickle file filepath = os.path.join(SIBrundir,'timeseries_SiBCASA.pickle') pickle_dump(series, open(filepath,'wb')) # preview the timeseries per site plt.show()
def retrieve_CGMS_input(grid, year, crop_no, suitable_stu, engine, retrieve_weather=False): #=============================================================================== # Temporarily add code directory to python path, to be able to import pcse # modules sys.path.insert(0, codedir) #------------------------------------------------------------------------------- from pcse.exceptions import PCSEError from pcse.db.cgms11 import TimerDataProvider, SoilDataIterator, \ CropDataProvider, STU_Suitability, \ SiteDataProvider, WeatherObsGridDataProvider # if the retrieval does not raise an error, the crop was cultivated that year print ' - grid cell no %i'%grid try: # We retrieve the crop calendar (timerdata) filename = os.path.join(CGMSdir, 'timerdata_objects/%i/c%i/'%(year,crop_no), 'timerobject_g%d_c%d_y%d.pickle'%(grid, crop_no, year)) if os.path.exists(filename): pass else: timerdata = TimerDataProvider(engine, grid, crop_no, year) pickle_dump(timerdata,open(filename,'wb')) # If required by the user, we retrieve the weather data if retrieve_weather == True: filename = os.path.join(CGMSdir, 'weather_objects/', 'weatherobject_g%d.pickle'%(grid)) if os.path.exists(filename): pass else: weatherdata = WeatherObsGridDataProvider(engine, grid) weatherdata._dump(filename) # We retrieve the soil data (soil_iterator) filename = os.path.join(CGMSdir, 'soildata_objects/', 'soilobject_g%d.pickle'%(grid)) if os.path.exists(filename): soil_iterator = pickle_load(open(filename,'rb')) else: soil_iterator = SoilDataIterator(engine, grid) pickle_dump(soil_iterator,open(filename,'wb')) # We retrieve the crop variety info (crop_data) filename = os.path.join(CGMSdir, 'cropdata_objects/%i/c%i/'%(year,crop_no), 'cropobject_g%d_c%d_y%d.pickle'%(grid,crop_no,year)) if os.path.exists(filename): pass else: cropdata = CropDataProvider(engine, grid, crop_no, year) pickle_dump(cropdata,open(filename,'wb')) # WE LOOP OVER ALL SOIL TYPES LOCATED IN THE GRID CELL: for smu_no, area_smu, stu_no, percentage, soildata in soil_iterator: # NB: we remove all unsuitable soils from the iteration if (stu_no not in suitable_stu): pass else: print ' soil type no %i'%stu_no # We retrieve the site data (site management) if (str(grid)).startswith('1'): dum = str(grid)[0:2] else: dum = str(grid)[0] filename = os.path.join(CGMSdir, 'sitedata_objects/%i/c%i/grid_%s/'%(year,crop_no,dum), 'siteobject_g%d_c%d_y%d_s%d.pickle'%(grid, crop_no, year, stu_no)) if os.path.exists(filename): pass else: sitedata = SiteDataProvider(engine,grid,crop_no,year,stu_no) pickle_dump(sitedata,open(filename,'wb')) # if an error is raised, the crop was not grown that year except PCSEError: print ' the crop was not grown that year in that grid cell' except Exception as e: print ' Unexpected error', e#sys.exc_info()[0] return None
def main(): #=============================================================================== #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') # ============================================================================== #------------------------------------------------------------------------------- # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] resolution = rcdict['resolution'] # can be hourly or daily # directory paths fluxnetdir = rcdict['obsdir'] obsdir = os.path.join(fluxnetdir, 'regrouped_data') #------------------------------------------------------------------------------- if resolution == 'daily': filelist = [ 'BE-Lon_FLUXNET2015_FULLSET_DD_2004-2014.csv', 'FR-Gri_FLUXNET2015_FULLSET_DD_2004-2013.csv', 'DE-Kli_FLUXNET2015_FULLSET_DD_2004-2014.csv', 'IT-BCi_mais_2004-2009_daily.csv' ] elif resolution == '3-hourly': filelist = [ 'BE-Lon_FLUXNET2015_FULLSET_HH_2004-2014.csv', 'FR-Gri_FLUXNET2015_FULLSET_HH_2004-2013.csv', 'DE-Kli_FLUXNET2015_FULLSET_HH_2004-2014.csv', 'IT-BCi_mais_2004-2009_daily.csv' ] #------------------------------------------------------------------------------- # Extract timeseries for the different sites # read files for the diferent sites f = open_csv(obsdir, filelist, convert_to_float=True) series = dict() filepath = os.path.join(fluxnetdir, '%s_timeseries_OBS.pickle' % resolution) for fnam, site in zip(filelist, sites): print site # TA_F_DAY: average daytime Ta_day from meas and ERA (*C) # SW_IN_F: SWin from meas and ERA (W.m-2) # VPD_F: VPD consolidated from VPD_F_MDS and VPD_F_ERA (hPa) # TS_F_MDS_1 to 4: Tsoil of 4 soil layers (*C) # SWC_F_MDS_1 to 4: soil water content (%) of 4 layers (1=shallow) # NT = night-time partitioning method (gC m-2 s-1) # VUT: variable ref u* between years FLUX_variables = [ 'TA_F_DAY', 'SW_IN_F', 'VPD_F', 'TS_F_MDS_1', 'TS_F_MDS_2', 'TS_F_MDS_3', 'SWC_F_MDS_1', 'SWC_F_MDS_2', 'SWC_F_MDS_3', 'GPP_NT_VUT_REF', 'RECO_NT_VUT_REF', 'NEE_VUT_REF', 'crop', 'LAI', 'AGB', 'C_height' ] FLUX_varnames = [ 'Ta_day', 'SWin', 'VPD', 'Ts_1', 'Ts_2', 'Ts_3', 'SWC_1', 'SWC_2', 'SWC_3', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT' ] IT_variables = [ 'SWC_avg', 'GPP', 'Reco', 'NEE', 'crop', 'GLAI', 'AGB', 'C_height' ] IT_varnames = [ 'SWC', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT' ] # timestamps for all daily timeseries startyear = str(f[fnam]['TIMESTAMP'][0])[0:4] endyear = str(f[fnam]['TIMESTAMP'][-1])[0:4] startdate = '%s-01-01 00:00:00' % startyear enddate = '%s-12-31 23:30:00' % endyear if site == 'DE-Kli': enddate = '%s-12-31 23:00:00' % endyear series[site] = dict() if resolution == '3-hourly': tm = pd.date_range(startdate, enddate, freq='30min') if (site != 'IT-BCi'): for var, varname in zip(FLUX_variables[:12], FLUX_varnames[:12]): # if the fluxes are half-hourly, I convert them to 3-hourly if varname == 'Ta_day': series[site]['Ta'] = pd.Series(f[fnam]['TA_F'], index=tm) elif ((varname == 'SWC_2' or varname == 'SWC_3') and site == 'FR-Gri'): series[site][varname] = pd.Series([-9999.] * len(tm), index=tm) else: series[site][varname] = pd.Series(f[fnam][var], index=tm) print varname elif resolution == 'daily': tm = pd.date_range(startdate, enddate, freq='1d') if (site != 'IT-BCi'): for var, varname in zip(FLUX_variables, FLUX_varnames): series[site][varname] = pd.Series(f[fnam][var], index=tm) print varname else: tm_irreg = [ pd.to_datetime('%s-%s-%s' % (str(t)[0:4], str(t)[4:6], str(t)[6:8])) for t in f[fnam]['TIMESTAMP'] ] # since the time records has gaps in the IT-BCi data, we use a # special function to fill the gaps with -9999. values and # convert it to pandas timeseries for var, varname in zip(IT_variables, IT_varnames): #if varname == 'VPD': # ta = f[fnam]['T_avg'] # dayvar = f[fnam]['Rh_avg'] / 100. * 6.11 * np.exp(ta /\ # (238.3 + ta) * 17.2694) dayvar = f[fnam][var] series[site][varname] = convert2pandas( tm_irreg, dayvar, tm) print varname else: print "Wrong CO2 fluxes temporal resolution: must be either "+\ "'daily' or '3-hourly'" sys.exit() # we store the pandas series in one pickle file pickle_dump(series, open(filepath, 'wb')) #------------------------------------------------------------------------------- # plot timeseries # Let's plot the available micromet variables that are important for WOFOST #plot_fluxnet_micromet(obsdir,sites,[2005,2005],'-') # Let's plot GPP, TER, NEE #plot_fluxnet_daily_c_fluxes(obsdir,sites,[2004,2014],'-') #plot_fluxnet_LAI_CHT_AGB(obsdir,sites,[2004,2014],'o') #------------------------------------------------------------------------------- return series
def main(): #=============================================================================== #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') # ============================================================================== #------------------------------------------------------------------------------- # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] resolution = rcdict['resolution'] # can be hourly or daily # directory paths fluxnetdir = rcdict['obsdir'] obsdir = os.path.join(fluxnetdir, 'regrouped_data') #------------------------------------------------------------------------------- if resolution == 'daily': filelist = ['BE-Lon_FLUXNET2015_FULLSET_DD_2004-2014.csv', 'FR-Gri_FLUXNET2015_FULLSET_DD_2004-2013.csv', 'DE-Kli_FLUXNET2015_FULLSET_DD_2004-2014.csv', 'IT-BCi_mais_2004-2009_daily.csv'] elif resolution == '3-hourly': filelist = ['BE-Lon_FLUXNET2015_FULLSET_HH_2004-2014.csv', 'FR-Gri_FLUXNET2015_FULLSET_HH_2004-2013.csv', 'DE-Kli_FLUXNET2015_FULLSET_HH_2004-2014.csv', 'IT-BCi_mais_2004-2009_daily.csv'] #------------------------------------------------------------------------------- # Extract timeseries for the different sites # read files for the diferent sites f = open_csv(obsdir,filelist,convert_to_float=True) series = dict() filepath = os.path.join(fluxnetdir,'%s_timeseries_OBS.pickle'%resolution) for fnam,site in zip(filelist, sites): print site # TA_F_DAY: average daytime Ta_day from meas and ERA (*C) # SW_IN_F: SWin from meas and ERA (W.m-2) # VPD_F: VPD consolidated from VPD_F_MDS and VPD_F_ERA (hPa) # TS_F_MDS_1 to 4: Tsoil of 4 soil layers (*C) # SWC_F_MDS_1 to 4: soil water content (%) of 4 layers (1=shallow) # NT = night-time partitioning method (gC m-2 s-1) # VUT: variable ref u* between years FLUX_variables = ['TA_F_DAY', 'SW_IN_F', 'VPD_F', 'TS_F_MDS_1', 'TS_F_MDS_2', 'TS_F_MDS_3', 'SWC_F_MDS_1', 'SWC_F_MDS_2', 'SWC_F_MDS_3', 'GPP_NT_VUT_REF', 'RECO_NT_VUT_REF', 'NEE_VUT_REF', 'crop', 'LAI', 'AGB', 'C_height'] FLUX_varnames = ['Ta_day', 'SWin', 'VPD', 'Ts_1', 'Ts_2', 'Ts_3', 'SWC_1', 'SWC_2', 'SWC_3', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT'] IT_variables = ['SWC_avg', 'GPP', 'Reco', 'NEE', 'crop', 'GLAI', 'AGB', 'C_height'] IT_varnames = ['SWC', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT'] # timestamps for all daily timeseries startyear = str(f[fnam]['TIMESTAMP'][0])[0:4] endyear = str(f[fnam]['TIMESTAMP'][-1])[0:4] startdate = '%s-01-01 00:00:00'%startyear enddate = '%s-12-31 23:30:00'%endyear if site=='DE-Kli': enddate = '%s-12-31 23:00:00'%endyear series[site] = dict() if resolution == '3-hourly': tm = pd.date_range(startdate, enddate, freq='30min') if (site!='IT-BCi'): for var,varname in zip(FLUX_variables[:12], FLUX_varnames[:12]): # if the fluxes are half-hourly, I convert them to 3-hourly if varname == 'Ta_day': series[site]['Ta'] = pd.Series(f[fnam]['TA_F'], index=tm) elif ((varname == 'SWC_2' or varname == 'SWC_3') and site == 'FR-Gri'): series[site][varname] = pd.Series([-9999.]*len(tm), index=tm) else: series[site][varname] = pd.Series(f[fnam][var], index=tm) print varname elif resolution == 'daily': tm = pd.date_range(startdate, enddate, freq='1d') if (site!='IT-BCi'): for var,varname in zip(FLUX_variables, FLUX_varnames): series[site][varname] = pd.Series(f[fnam][var], index=tm) print varname else: tm_irreg = [pd.to_datetime('%s-%s-%s'%(str(t)[0:4],str(t)[4:6], str(t)[6:8])) for t in f[fnam]['TIMESTAMP']] # since the time records has gaps in the IT-BCi data, we use a # special function to fill the gaps with -9999. values and # convert it to pandas timeseries for var,varname in zip(IT_variables, IT_varnames): #if varname == 'VPD': # ta = f[fnam]['T_avg'] # dayvar = f[fnam]['Rh_avg'] / 100. * 6.11 * np.exp(ta /\ # (238.3 + ta) * 17.2694) dayvar = f[fnam][var] series[site][varname] = convert2pandas(tm_irreg, dayvar, tm) print varname else: print "Wrong CO2 fluxes temporal resolution: must be either "+\ "'daily' or '3-hourly'" sys.exit() # we store the pandas series in one pickle file pickle_dump(series, open(filepath,'wb')) #------------------------------------------------------------------------------- # plot timeseries # Let's plot the available micromet variables that are important for WOFOST #plot_fluxnet_micromet(obsdir,sites,[2005,2005],'-') # Let's plot GPP, TER, NEE #plot_fluxnet_daily_c_fluxes(obsdir,sites,[2004,2014],'-') #plot_fluxnet_LAI_CHT_AGB(obsdir,sites,[2004,2014],'o') #------------------------------------------------------------------------------- return series
def main(): #=============================================================================== global inputdir, codedir, outputdir, CGMSdir, obsdir\ #------------------------------------------------------------------------------- import cx_Oracle import sqlalchemy as sa from datetime import datetime #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== #------------------------------------------------------------------------------- # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] crops = [s.strip(' ') for s in rcdict['crops'].split(',')] crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')] years = [int(s.strip(' ')) for s in rcdict['years'].split(',')] obsdir = rcdict['obsdir'] inputdir = rcdict['inputdir'] CGMSdir = os.path.join(inputdir, 'CGMS') codedir = rcdict['codedir'] #------------------------------------------------------------------------------- # get the closest CGMS grid cell id number for each FluxNet site # get the sites longitude and latitudes sitdict = open_csv(os.path.join(obsdir,'regrouped_data'), 'sites_info.txt', convert_to_float=False) site_lons = sitdict['site_lons'] site_lats = sitdict['site_lats'] # we read the CGMS grid cells coordinates from file CGMS_cells = open_csv(CGMSdir, 'CGMS_grid_list.csv', convert_to_float=True) all_grids = CGMS_cells['GRID_NO'] all_lons = CGMS_cells['LONGITUDE'] all_lats = CGMS_cells['LATITUDE'] flux_gri = dict() for i,site in enumerate(sitdict['sites']): lon = float(site_lons[i]) lat = float(site_lats[i]) # compute the distance to site for all CGMS grid cells dist_list = list() for j,grid_no in enumerate(all_grids): distance = ((all_lons[j]-lon)**2. + (all_lats[j]-lat)**2.)**(1./2.) dist_list += [distance] # select the closest grid cell indx = np.argmin(np.array(dist_list)) flux_gri[site] = all_grids[indx] print 'FluxNet site %s with lon=%5.2f, lat=%5.2f: closest grid cell is %i'%(site, lon, lat, all_grids[indx]) #------------------------------------------------------------------------------- # create new file with grid cell number in it filename = os.path.join(inputdir,'sites_info2.csv') newres = open(filename,'wb') oldres = open(os.path.join(obsdir,'regrouped_data/sites_info.txt'),'rU') reader = oldres.readlines() oldres.close() for l,line in enumerate(reader): site = line.split(',')[0].strip(' ') if l==0: line = line.strip('\n')+', gridcells\n' else: line = line.strip('\n') + ',%10i'%int(flux_gri[site]) + '\n' newres.write(line) newres.close() print '\nWe successfully created the input file with grid cell IDs:\n%s'%filename #------------------------------------------------------------------------------- # retrieve the necessary input data for all sites # settings of the connection user = "******" password = "******" tns = "EURDAS.WORLD" dsn = "oracle+cx_oracle://{user}:{pw}@{tns}".format(user=user,pw=password,tns=tns) engine = sa.create_engine(dsn) print engine # test the connection: try: connection = cx_Oracle.connect("cgms12eu_select/[email protected]") except cx_Oracle.DatabaseError: print '\nBEWARE!! The Oracle database is not responding. Probably, you are' print 'not using a computer wired within the Wageningen University network.' print '--> Get connected with ethernet cable before trying again!' sys.exit() for c,crop in enumerate(crops): crop_no = crop_nos[c] print '\nRetrieving input data for %s (CGMS id=%i)'%(crop,crop_no) # We add a timestamp at start of the retrieval start_timestamp = datetime.utcnow() # We retrieve the list of suitable soil types for the selected crop # species filename = os.path.join(CGMSdir, 'soildata_objects/', 'suitablesoilsobject_c%d.pickle'%(crop_no)) if os.path.exists(filename): suitable_stu = pickle_load(open(filename,'rb')) else: from pcse.db.cgms11 import STU_Suitability suitable_stu = STU_Suitability(engine, crop_no) suitable_stu_list = [] for item in suitable_stu: suitable_stu_list = suitable_stu_list + [item] suitable_stu = suitable_stu_list pickle_dump(suitable_stu,open(filename,'wb')) print 'retrieving suitable soils for %s'%crop # WE LOOP OVER ALL YEARS: for y, year in enumerate(years): print '\n######################## Year %i ##############'%year+\ '##########\n' # if we do a serial iteration, we loop over the grid cells that # contain arable land for grid in flux_gri.values(): retrieve_CGMS_input(grid, year, crop_no, suitable_stu, engine) # We add a timestamp at end of the retrieval, to time the process end_timestamp = datetime.utcnow() print '\nDuration of the retrieval:', end_timestamp-start_timestamp
makedirs(SESSIONS_DIR) except OSError, e: if e.errno == 17: # Already exists pass else: raise # Write to a temporary file and move it in place, for safety tmp_file_path = None try: tmp_file_fh, tmp_file_path = mkstemp() os_close(tmp_file_fh) with open(tmp_file_path, 'wb') as tmp_file: pickle_dump(CURRENT_SESSION, tmp_file) copy(tmp_file_path, get_session_pickle_path(CURRENT_SESSION.get_sid())) except IOError: # failed store: no permissions? raise SessionStoreError finally: if tmp_file_path is not None: remove(tmp_file_path) def save_conf(config): get_session()['conf'] = config return {} def load_conf(): try: return {
def main(): #=============================================================================== global inputdir, outputdir, optimidir #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] #NUTS_reg = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')] crops = [s.strip(' ') for s in rcdict['crops'].split(',')] crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')] years = [int(s.strip(' ')) for s in rcdict['years'].split(',')] # directory paths outputdir = rcdict['outputdir'] inputdir = rcdict['inputdir'] #------------------------------------------------------------------------------- # get the list of NUTS 2 region names associated to the list of FluxNet sites from WOF_00_retrieve_input_data import open_csv sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False) NUTS_reg = sitdict['NUTS_reg'] #------------------------------------------------------------------------------- # list the old gapfilled files to remove, and remove them all for s, site in enumerate(sites): for c, crop_name in enumerate(crops): crop_no = crop_nos[c] for year in years: optimidir = os.path.join(outputdir, 'fgap/%i/c%i/' % (year, crop_no)) files2remove = [ f for f in os.listdir(optimidir) if '_gapfilled' in f ] for f in files2remove: os.remove(os.path.join(optimidir, f)) #------------------------------------------------------------------------------- # gap fill for s, site in enumerate(sites): NUTS_no = NUTS_reg[s] for c, crop_name in enumerate(crops): crop_no = crop_nos[c] for year in years: # create output folder if it doesn't already exists optimidir = os.path.join(outputdir, 'fgap/%i/c%i/' % (year, crop_no)) # detect if there is this year needs to be gapfilled f2gapfill = [ f for f in os.listdir(optimidir) if ('_tobegapfilled' in f) and (NUTS_no in f) ] if len(f2gapfill) == 0: continue print '\nWe gap fill:', site, NUTS_no, year, crop_name # GAP-FILLING YLDGAPF for NUTS2 level: prevyear = os.path.join( optimidir.replace('%04d' % year, '%04d' % (year - 1)), 'fgap_%s_optimized.pickle' % NUTS_no) nextyear = os.path.join( optimidir.replace('%04d' % year, '%04d' % (year + 1)), 'fgap_%s_optimized.pickle' % NUTS_no) availfiles = [] availyears = [] for yr in range(1995, 2020): searchyear = os.path.join( optimidir.replace('%04d' % year, '%04d' % yr), 'fgap_%s_optimized.pickle' % NUTS_no) if os.path.exists(searchyear): availfiles.append(searchyear) availyears.append(yr) print "%d years found for gap filling:" % len( availfiles), availyears # Use average from y-1 and y+1 if prevyear in availfiles and nextyear in availfiles: optimi_info = pickle_load(open(prevyear, 'rb')) ygf_prev = optimi_info[2] optimi_info = pickle_load(open(nextyear, 'rb')) ygf_next = optimi_info[2] ygf = (ygf_prev + ygf_next) / 2.0 # simply average opt_code = 'gapfilled02' shortlist_cells = optimi_info[3] # Use previous year value elif prevyear in availfiles: optimi_info = pickle_load(open(prevyear, 'rb')) ygf = optimi_info[2] opt_code = 'gapfilled03a' shortlist_cells = optimi_info[3] print shortlist_cells # Use next year value elif nextyear in availfiles: optimi_info = pickle_load(open(nextyear, 'rb')) ygf = optimi_info[2] opt_code = 'gapfilled03b' shortlist_cells = optimi_info[3] # Use climatological average from other years if nyear > 2 elif len(availfiles) > 2: ygf = 0.0 for filename in availfiles: optimi_info = pickle_load(open(filename, 'rb')) ygf += optimi_info[2] ygf = ygf / len(availfiles) opt_code = 'gapfilled04' shortlist_cells = optimi_info[3] # Use upper NUTS level optimum (NUTS1, or NUTS0 at worst) else: try: nuts1file = os.path.join( optimidir, 'fgap_%s_optimized.pickle' % NUTS_no[0:3]) data = pickle_load(open(nuts1file, 'rb')) ygf = data[2] opt_code = 'gapfilled05a' shortlist_cells = data[3] except IOError: try: nuts0file = os.path.join( optimidir, 'fgap_%s_optimized.pickle' % NUTS_no[0:2]) data = pickle_load(open(nuts0file, 'rb')) ygf = data[2] opt_code = 'gapfilled05b' shortlist_cells = data[3] # Use default value if all previous methods fail except IOError: ygf = 0.8 opt_code = 'gapfilled06' shortlist_cells = [] print "Using ygf of %5.2f and code of %s" % (ygf, opt_code) print "created file fgap_%s_%s.pickle"%(NUTS_no, opt_code)+\ " in folder %s"%optimidir currentyear = os.path.join( optimidir, 'fgap_%s_%s.pickle' % (NUTS_no, opt_code)) pickle_dump([NUTS_no, opt_code, ygf, shortlist_cells], open(currentyear, 'wb'))
def main(): #=============================================================================== global inputdir, codedir, outputdir, CGMSdir, ECMWFdir, optimidir, forwardir,\ EUROSTATdir, mmC, mmCO2, mmCH2O #------------------------------------------------------------------------------- # fixed molar masses for unit conversion of carbon fluxes mmC = 12.01 mmCO2 = 44.01 mmCH2O = 30.03 # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== #------------------------------------------------------------------------------- # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] #site_lons = [float(s.strip(' ')) for s in rcdict['site_lons'].split(',')] #site_lats = [float(s.strip(' ')) for s in rcdict['site_lats'].split(',')] #gridcells = [float(s.strip(' ')) for s in rcdict['gridcells'].split(',')] #NUTS_reg = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')] crops = [s.strip(' ') for s in rcdict['crops'].split(',')] crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')] years = [int(s.strip(' ')) for s in rcdict['years'].split(',')] # forward runs settings force_forwardsim = str_to_bool(rcdict['force_forwardsim']) selec_method = rcdict['selec_method'] ncells = int(rcdict['ncells']) nsoils = int(rcdict['nsoils']) weather = rcdict['weather'] # carbon cycle settings TER_method = rcdict[ 'TER_method'] # if grow-only: NEE = GPP + Rgrow + Rsoil Eact0 = float(rcdict['Eact0']) R10 = float(rcdict['R10']) resolution = rcdict['resolution'] # can be hourly or daily # directory paths outputdir = rcdict['outputdir'] inputdir = rcdict['inputdir'] codedir = rcdict['codedir'] CGMSdir = os.path.join(inputdir, 'CGMS') ECMWFdir = os.path.join(inputdir, 'ECMWF') EUROSTATdir = os.path.join(inputdir, 'EUROSTATobs') #------------------------------------------------------------------------------- # get the sites longitude and latitudes from WOF_00_retrieve_input_data import open_csv sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False) site_lons = [float(l) for l in sitdict['site_lons']] site_lats = [float(l) for l in sitdict['site_lats']] gridcells = [int(g) for g in sitdict['gridcells']] NUTS_reg = sitdict['NUTS_reg'] #------------------------------------------------------------------------------- # run WOFOST at the location / year / crops specified by user print '\nYLDGAPF(-), grid_no, year, stu_no, stu_area(ha), '\ +'TSO(kgDM.ha-1), TLV(kgDM.ha-1), TST(kgDM.ha-1), '\ +'TRT(kgDM.ha-1), maxLAI(m2.m-2), rootdepth(cm), TAGP(kgDM.ha-1)' # we format the time series using the pandas python library, for easy plotting startdate = '%i-01-01 00:00:00' % years[0] enddate = '%i-12-31 23:59:59' % years[-1] if resolution == 'daily': dtimes = pd.date_range(startdate, enddate, freq='1d') elif resolution == '3-hourly': dtimes = pd.date_range(startdate, enddate, freq='3H') else: print "Wrong CO2 fluxes temporal resolution: must be either 'daily' or '3-hourly'" sys.exit() series = dict() for s, site in enumerate(sites): lon = site_lons[s] lat = site_lats[s] grid_no = gridcells[s] NUTS_no = NUTS_reg[s] series[site] = dict() for c, crop_name in enumerate(crops): cpno = crop_nos[c] series[site]['c%i' % cpno] = dict() list_of_gpp = np.array([]) list_of_raut = np.array([]) list_of_rhet = np.array([]) list_of_ter = np.array([]) list_of_nee = np.array([]) for year in years: # create output folder if it doesn't already exists optimidir = os.path.join(outputdir, 'fgap/%i/c%i/' % (year, cpno)) # create output folder if it doesn't already exists forwardir = os.path.join(outputdir, 'forward_runs/%i/c%i/' % (year, cpno)) if not os.path.exists(forwardir): os.makedirs(forwardir) print '\n', site, NUTS_no, year, crop_name # RETRIEVE OPTIMUM FGAP: # either the NUTS2 optimum if it exists ygf_path = os.path.join(optimidir, 'fgap_%s_optimized.pickle' % NUTS_no) # or the gapfilled version if not os.path.exists(ygf_path): ygf_file = [ f for f in os.listdir(optimidir) if (NUTS_no in f) and ('_gapfilled' in f) ][0] ygf_path = os.path.join(optimidir, ygf_file) fgap_info = pickle_load(open(ygf_path, 'rb')) yldgapf = fgap_info[2] # FORWARD SIMULATIONS: perform_yield_sim(cpno, grid_no, int(year), yldgapf, selec_method, nsoils, force_forwardsim) # POST-PROCESSING OF GPP, RAUTO, RHET, NEE: SimData = compute_timeseries_fluxes(cpno, grid_no, lon, lat, year, R10, Eact0, selec_method, nsoils, TER_method=TER_method, scale=resolution) list_of_gpp = np.concatenate([list_of_gpp, SimData[1]], axis=0) list_of_raut = np.concatenate([list_of_raut, SimData[2]], axis=0) list_of_rhet = np.concatenate([list_of_rhet, SimData[3]], axis=0) list_of_ter = np.concatenate([list_of_ter, SimData[4]], axis=0) list_of_nee = np.concatenate([list_of_nee, SimData[5]], axis=0) print dtimes, list_of_gpp series[site]['c%i' % cpno]['GPP'] = pd.Series(list_of_gpp, index=dtimes) series[site]['c%i' % cpno]['Raut'] = pd.Series(list_of_raut, index=dtimes) series[site]['c%i' % cpno]['Rhet'] = pd.Series(list_of_rhet, index=dtimes) series[site]['c%i' % cpno]['TER'] = pd.Series(list_of_ter, index=dtimes) series[site]['c%i' % cpno]['NEE'] = pd.Series(list_of_nee, index=dtimes) # we store the two pandas series in one pickle file filepath = os.path.join(outputdir,'forward_runs/'+\ '%s_timeseries_%s_WOFOST.pickle'%(resolution,TER_method)) pickle_dump(series, open(filepath, 'wb'))
makedirs(SESSIONS_DIR) except OSError, e: if e.errno == 17: # Already exists pass else: raise # Write to a temporary file and move it in place, for safety tmp_file_path = None try: tmp_file_fh, tmp_file_path = mkstemp() os_close(tmp_file_fh) with open(tmp_file_path, 'wb') as tmp_file: pickle_dump(CURRENT_SESSION, tmp_file) copy(tmp_file_path, get_session_pickle_path(CURRENT_SESSION.get_sid())) except IOError: # failed store: no permissions? raise SessionStoreError finally: if tmp_file_path is not None: remove(tmp_file_path) def save_conf(config): get_session()['conf'] = config return {} def load_conf():
def main(): #=============================================================================== global inputdir, codedir, outputdir, CGMSdir, ECMWFdir, optimidir, forwardir,\ EUROSTATdir, mmC, mmCO2, mmCH2O #------------------------------------------------------------------------------- # fixed molar masses for unit conversion of carbon fluxes mmC = 12.01 mmCO2 = 44.01 mmCH2O = 30.03 # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== #------------------------------------------------------------------------------- # extract the needed information from the rc file sites = [s.strip(' ') for s in rcdict['sites'].split(',')] #site_lons = [float(s.strip(' ')) for s in rcdict['site_lons'].split(',')] #site_lats = [float(s.strip(' ')) for s in rcdict['site_lats'].split(',')] #gridcells = [float(s.strip(' ')) for s in rcdict['gridcells'].split(',')] #NUTS_reg = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')] crops = [s.strip(' ') for s in rcdict['crops'].split(',')] crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')] years = [int(s.strip(' ')) for s in rcdict['years'].split(',')] # forward runs settings force_forwardsim = str_to_bool(rcdict['force_forwardsim']) selec_method = rcdict['selec_method'] ncells = int(rcdict['ncells']) nsoils = int(rcdict['nsoils']) weather = rcdict['weather'] # carbon cycle settings TER_method = rcdict['TER_method'] # if grow-only: NEE = GPP + Rgrow + Rsoil Eact0 = float(rcdict['Eact0']) R10 = float(rcdict['R10']) resolution = rcdict['resolution'] # can be hourly or daily # directory paths outputdir = rcdict['outputdir'] inputdir = rcdict['inputdir'] codedir = rcdict['codedir'] CGMSdir = os.path.join(inputdir, 'CGMS') ECMWFdir = os.path.join(inputdir, 'ECMWF') EUROSTATdir = os.path.join(inputdir, 'EUROSTATobs') #------------------------------------------------------------------------------- # get the sites longitude and latitudes from WOF_00_retrieve_input_data import open_csv sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False) site_lons = [float(l) for l in sitdict['site_lons']] site_lats = [float(l) for l in sitdict['site_lats']] gridcells = [int(g) for g in sitdict['gridcells']] NUTS_reg = sitdict['NUTS_reg'] #------------------------------------------------------------------------------- # run WOFOST at the location / year / crops specified by user print '\nYLDGAPF(-), grid_no, year, stu_no, stu_area(ha), '\ +'TSO(kgDM.ha-1), TLV(kgDM.ha-1), TST(kgDM.ha-1), '\ +'TRT(kgDM.ha-1), maxLAI(m2.m-2), rootdepth(cm), TAGP(kgDM.ha-1)' # we format the time series using the pandas python library, for easy plotting startdate = '%i-01-01 00:00:00'%years[0] enddate = '%i-12-31 23:59:59'%years[-1] if resolution == 'daily': dtimes = pd.date_range(startdate, enddate, freq='1d') elif resolution == '3-hourly': dtimes = pd.date_range(startdate, enddate, freq='3H') else: print "Wrong CO2 fluxes temporal resolution: must be either 'daily' or '3-hourly'" sys.exit() series = dict() for s,site in enumerate(sites): lon = site_lons[s] lat = site_lats[s] grid_no = gridcells[s] NUTS_no = NUTS_reg[s] series[site] = dict() for c,crop_name in enumerate(crops): cpno = crop_nos[c] series[site]['c%i'%cpno] = dict() list_of_gpp = np.array([]) list_of_raut = np.array([]) list_of_rhet = np.array([]) list_of_ter = np.array([]) list_of_nee = np.array([]) for year in years: # create output folder if it doesn't already exists optimidir = os.path.join(outputdir,'fgap/%i/c%i/'%(year,cpno)) # create output folder if it doesn't already exists forwardir = os.path.join(outputdir,'forward_runs/%i/c%i/'%(year, cpno)) if not os.path.exists(forwardir): os.makedirs(forwardir) print '\n', site, NUTS_no, year, crop_name # RETRIEVE OPTIMUM FGAP: # either the NUTS2 optimum if it exists ygf_path = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no) # or the gapfilled version if not os.path.exists(ygf_path): ygf_file = [f for f in os.listdir(optimidir) if (NUTS_no in f) and ('_gapfilled' in f)][0] ygf_path = os.path.join(optimidir, ygf_file) fgap_info = pickle_load(open(ygf_path,'rb')) yldgapf = fgap_info[2] # FORWARD SIMULATIONS: perform_yield_sim(cpno, grid_no, int(year), yldgapf, selec_method, nsoils, force_forwardsim) # POST-PROCESSING OF GPP, RAUTO, RHET, NEE: SimData = compute_timeseries_fluxes(cpno, grid_no, lon, lat, year, R10, Eact0, selec_method, nsoils, TER_method=TER_method, scale=resolution) list_of_gpp = np.concatenate([list_of_gpp, SimData[1]], axis=0) list_of_raut = np.concatenate([list_of_raut, SimData[2]], axis=0) list_of_rhet = np.concatenate([list_of_rhet, SimData[3]], axis=0) list_of_ter = np.concatenate([list_of_ter, SimData[4]], axis=0) list_of_nee = np.concatenate([list_of_nee, SimData[5]], axis=0) print dtimes, list_of_gpp series[site]['c%i'%cpno]['GPP'] = pd.Series(list_of_gpp, index=dtimes) series[site]['c%i'%cpno]['Raut'] = pd.Series(list_of_raut, index=dtimes) series[site]['c%i'%cpno]['Rhet'] = pd.Series(list_of_rhet, index=dtimes) series[site]['c%i'%cpno]['TER'] = pd.Series(list_of_ter, index=dtimes) series[site]['c%i'%cpno]['NEE'] = pd.Series(list_of_nee, index=dtimes) # we store the two pandas series in one pickle file filepath = os.path.join(outputdir,'forward_runs/'+\ '%s_timeseries_%s_WOFOST.pickle'%(resolution,TER_method)) pickle_dump(series, open(filepath,'wb'))
def main(): #=============================================================================== global outputdir, obsdir #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== #------------------------------------------------------------------------------- # extract the needed information for that script sites = [s.strip(' ') for s in rcdict['sites'].split(',')] years = [s.strip(' ') for s in rcdict['years'].split(',')] TER_method = rcdict['TER_method'] R10 = rcdict['R10'] resolution = rcdict['resolution'] # can be hourly or daily if resolution=='daily': res='1d' elif resolution=='3-hourly': res='3H' # directory paths outputdir = rcdict['outputdir'] obsdir = rcdict['obsdir'] forwardir = os.path.join(outputdir, 'forward_runs') #------------------------------------------------------------------------------- # load the WOFOST runs of all crops # we store the two pandas series in one pickle file filepath = os.path.join(forwardir,'%s_timeseries_'%resolution+\ '%s_WOFOST.pickle'%TER_method) series = pickle_load(open(filepath,'rb')) filepath = os.path.join(obsdir,'daily_timeseries_OBS.pickle') obs = pickle_load(open(filepath,'rb')) final_series = dict() for s,site in enumerate(sites): print site print obs[site].keys() final_series[site] = dict() # read the crop rotation from FluxNet file rotation = obs[site]['crop_no'] # slice each year's required time series, append to final series for varname in ['GPP','TER','Raut','Rhet','NEE']: print 'variable %s'%varname var = [] for year in years: # get the crop number for that year if site != 'IT-BCi': try: crop_no = rotation[year:year][0] except IndexError: # index error occurs when the year is # not in the rotation time series startdate = '%s-01-01 00:00:00'%year enddate = '%s-12-31 23:59:59'%year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes)*[np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ',site, year, 'unknown crop cover: skip.' continue elif site == 'IT-BCi': if int(year) not in np.arange(2004,2010,1): startdate = '%s-01-01 00:00:00'%year enddate = '%s-12-31 23:59:59'%year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes)*[np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ',site, year, 'unknown crop cover: skip.' continue else: crop_no = 2 # try slicing and concatenating that year's timeseries from file try: # if the GPP = 0 (failed growing season), we set TER and # NEE to zero as well if np.mean(series[site]['c%i'%crop_no]['GPP'][year:year]) == 0.: startdate = '%s-01-01 00:00:00'%year enddate = '%s-12-31 23:59:59'%year dtimes = pd.date_range(startdate, enddate, freq=res) zeros = np.array(len(dtimes)*[0.]) var += [pd.Series(zeros, index=dtimes)] else: var += [series[site]['c%i'%crop_no][varname][year:year]] print ' ',site, year, '%2i'%crop_no, 'slicing' except KeyError: # key error occurs when we haven't ran a crop # or a year with WOFOST startdate = '%s-01-01 00:00:00'%year enddate = '%s-12-31 23:59:59'%year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes)*[np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ',site, year, '%2i'%crop_no, 'skip.' final_series[site][varname] = pd.concat(var) #final_series[site]['GPP'].plot() #plt.show() # store the final WOFOST timeseries filepath = os.path.join(outputdir,'%s_timeseries_'%resolution+\ '%s_R10=%s_WOFOST_crop_rotation.pickle'%(TER_method,R10)) pickle_dump(final_series, open(filepath,'wb')) print 'successfully dumped %s'%filepath
def optimize_fgap(site, crop_no, crop_name, year, NUTS_no, selec_method, ncells, nsoils, weather, force_optimization): #=============================================================================== # Temporarily add code directory to python path, to be able to import pcse # modules sys.path.insert(0, codedir) sys.path.insert(0, os.path.join(codedir,'carbon_cycle')) #------------------------------------------------------------------------------- import glob from maries_toolbox import define_opti_years,\ select_cells, select_soils #------------------------------------------------------------------------------- # if the optimization has already been performed and we don't want # to redo it, we skip that region filepath = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no) if (os.path.exists(filepath) and force_optimization==False): optimum = pickle_load(open(filepath,'rb')) print "We have already calculated the optimum fgap for that "+\ "year and crop: fgap=%.2f"%optimum[2] return optimum[2] #------------------------------------------------------------------------------- # we select the grid cell of the FluxNet site gridlist = pickle_load(open(os.path.join(CGMSdir, 'gridlist_objects/shortgridlist.pickle'),'rb')) selected_grid_cells = gridlist[NUTS_no] #------------------------------------------------------------------------------- # where possible, we retrieve local information about yield and sowing date local_sowda = None local_yield = None for row in custom_yns: if row[0]==site and row[1]==year and row[2]==crop_no: local_sowda = row[3] local_yield = row[4] print 'We recovered local info from site %s:'%site print 'sowing date of %s:'%crop_name, local_sowda, 'grain yield: %.3f'%local_yield break if local_sowda==None and local_yield==None: print 'No local information on sowing date and yield.' #------------------------------------------------------------------------------- # we retrieve the EUROSTAT pre-processed yield observations: if local_sowda==None and local_yield==None: try: filename1 = os.path.join(EUROSTATdir, 'preprocessed_yields.pickle') yields_dict = pickle_load(open(filename1,'rb')) except IOError: print '\nYou have not preprocessed the EUROSTAT observations' print 'Run the script 03_preprocess_obs.py first!\n' sys.exit() # NB: we do NOT detrend the yields anymore, since fgap is not supposed to be # representative of multi-annual gap obs_yields = yields_dict[crop_name][NUTS_no] return None #------------------------------------------------------------------------------- # if there were no reported yield on the year X, we skip that region if (year not in obs_yields[1]): print 'No reported yield, we have to gap-fill later' filename = os.path.join(optimidir,'fgap_%s_tobegapfilled.pickle'%NUTS_no) outlist = [NUTS_no, 2, 1., selected_grid_cells] pickle_dump(outlist, open(filename,'wb')) return 1. #------------------------------------------------------------------------------- # NB: in the optimization routine, we use the observed cultivation # fraction of the crop to calculate the soil cultivated areas, and # thus to compute the regional yields (= weighted average of yields # using soil cultivated areas) # if the observed cultivated fraction is zero, we skip that region selected_soil_types = select_soils(crop_no,[g for g,a in selected_grid_cells], CGMSdir, method=selec_method, n=nsoils) print 'we selected grid cell %i, top %i soil types, for optimization'%( selected_grid_cells[0][0], nsoils) #------------------------------------------------------------------------------- # we set the optimization code (gives us info on how we optimize) opti_code = 1 # 1= observations are available for optimization # 2= no obs available #print obs_yields[1], obs_yields[0] # in all other cases, we optimize the yield gap factor optimum = optimize_regional_yldgapf_dyn(NUTS_no, obs_yields, crop_no, selected_grid_cells, selected_soil_types, weather, CGMSdir, [year], obs_type='yield', plot_rmse=False) # pickle the information per NUTS region outlist = [NUTS_no, opti_code, optimum, selected_grid_cells] filename = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no) pickle_dump(outlist, open(filename,'wb')) return optimum #=============================================================================== # Function to optimize the regional yield gap factor using the difference # between the regional simulated and the observed harvest or yield (ie. 1 gap to # optimize per NUTS region). This function iterates dynamically to find the # optimum YLDGAPF. def optimize_regional_yldgapf_dyn(NUTS_no_, detrend, crop_no_, selected_grid_cells_, selected_soil_types_, weather, inputdir, opti_years_, obs_type='yield', plot_rmse=False): #=============================================================================== import math from operator import itemgetter as operator_itemgetter from matplotlib import pyplot as plt from pcse.models import Wofost71_WLP_FD from pcse.base_classes import WeatherDataProvider from pcse.fileinput.cabo_weather import CABOWeatherDataProvider # aggregated yield method: # 2- we construct a 2D array with same dimensions as TSO_regional, # containing the observed yields row = [] # this list will become the row of the 2D array for y,year in enumerate(opti_years_): index_year = np.argmin(np.absolute(detrend[1]-year)) row = row + [detrend[0][index_year]] OBS = np.tile(row, (5,1)) # repeats the list as a row 3 times, to get a # 2D array # 3- we calculate all the individual yields from the selected grid cells x # soils combinations # NB: we explore the range of yldgapf between 0.1 and 1. f0 = 0. f2 = 0.5 f4 = 1. f_step = 0.25 # Until the precision of the yield gap factor is good enough (i.e. < 0.02) # we loop over it. We do 12 iterations in total with this method. iter_no = 0 RMSE_stored = list() while (f_step >= 0.02): iter_no = iter_no + 1 # sub-method: looping over the yield gap factors # we build a range of 3 yield gap factors to explore one low bound, one # high bound, one in the middle f_step = (f4 - f0)/4. f1 = f0 + f_step f3 = f2 + f_step f_range = [f0, f1, f2, f3, f4] RES = [] # list in which we will store the yields of the combinations counter=0 for grid, arable_land in selected_grid_cells_: frac_arable = arable_land / 625000000. # Retrieve the weather data of one grid cell (all years are in one # file) if (weather == 'CGMS'): filename = os.path.join(inputdir,'weather_objects/', 'weatherobject_g%d.pickle'%grid) weatherdata = WeatherDataProvider() weatherdata._load(filename) if (weather == 'ECMWF'): weatherdata = CABOWeatherDataProvider('%i'%grid,fpath=ECMWFdir) # Retrieve the soil data of one grid cell (all possible soil types) filename = os.path.join(inputdir,'soildata_objects/', 'soilobject_g%d.pickle'%grid) soil_iterator = pickle_load(open(filename,'rb')) for smu, stu_no, weight, soildata in selected_soil_types_[grid]: # TSO will store all the yields of one grid cell x soil # combination, for all years and all 3 yldgapf values TSO = np.zeros((len(f_range), len(opti_years_))) counter +=1 for y, year in enumerate(opti_years_): # Retrieve yearly data filename = os.path.join(inputdir, 'timerdata_objects/%i/c%i/'%(year,crop_no_), 'timerobject_g%d_c%d_y%d.pickle'\ %(grid,crop_no_,year)) timerdata = pickle_load(open(filename,'rb')) filename = os.path.join(inputdir, 'cropdata_objects/%i/c%i/'%(year,crop_no_), 'cropobject_g%d_c%d_y%d.pickle'\ %(grid,crop_no_,year)) cropdata = pickle_load(open(filename,'rb')) if str(grid).startswith('1'): dum = str(grid)[0:2] else: dum = str(grid)[0] filename = os.path.join(inputdir, 'sitedata_objects/%i/c%i/grid_%s/' %(year,crop_no_,dum), 'siteobject_g%d_c%d_y%d_s%d.pickle'\ %(grid,crop_no_,year,stu_no)) sitedata = pickle_load(open(filename,'rb')) for f,factor in enumerate(f_range): cropdata['YLDGAPF']=factor # run WOFOST wofost_object = Wofost71_WLP_FD(sitedata, timerdata, soildata, cropdata, weatherdata) wofost_object.run_till_terminate() # get the yield (in kgDM.ha-1) TSO[f,y] = wofost_object.get_variable('TWSO') #print grid, stu_no, year, counter, [y[0] for y in TSO], OBS[0] RES = RES + [(grid, stu_no, weight*frac_arable, TSO)] # 4- we aggregate the yield or harvest into the regional one with array # operations sum_weighted_vals = np.zeros((len(f_range), len(opti_years_))) # empty 2D array with same dimension as TSO sum_weights = 0. for grid, stu_no, weight, TSO in RES: # adding weighted 2D-arrays in the empty array sum_weighted_yields # NB: variable 'weight' is actually the cultivated area in m2 sum_weighted_vals = sum_weighted_vals + (weight/10000.)*TSO # computing the total sum of the cultivated area in ha sum_weights = sum_weights + (weight/10000.) if (obs_type == 'harvest'): TSO_regional = sum_weighted_vals / 1000000. # sum of the individual # harvests in 1000 tDM elif (obs_type == 'yield'): TSO_regional = sum_weighted_vals / sum_weights # weighted average of # all yields in kgDM/ha # 5- we compute the (sim-obs) differences. DIFF = TSO_regional - OBS if (TSO_regional[-1][0] <= 0.): print 'WARNING: no simulated crop growth. We set the optimum fgap to 1.' return 1. if (TSO_regional[-1] <= OBS[-1]): print 'WARNING: obs yield > sim yield. We set optimum to 1.' return 1. # 6- we calculate the RMSE (root mean squared error) of the 3 yldgapf # The RMSE of each yldgapf is based on N obs-sim differences for the N # years looped over RMSE = np.zeros(len(f_range)) for f,factor in enumerate(f_range): list_of_DIFF = [] for y, year in enumerate(opti_years_): list_of_DIFF = list_of_DIFF + [DIFF[f,y]] RMSE[f] = np.sqrt(np.mean( [ math.pow(j,2) for j in list_of_DIFF ] )) #print RMSE, f_range # We store the value of the RMSE for plotting purposes RMSE_stored = RMSE_stored + [(f_range[1], RMSE[1]), (f_range[3], RMSE[3])] if (iter_no == 1): RMSE_stored = RMSE_stored + [(f_range[0], RMSE[0]), (f_range[2], RMSE[2]), (f_range[4], RMSE[4])] # 7- We update the yldgapf range to explore for the next iteration. # For this we do a linear interpolation of RMSE between the 3 yldgapf # explored here, and the next range to explore is the one having the # smallest interpolated RMSE index_new_center = RMSE.argmin() # if the optimum is close to 1: if index_new_center == len(f_range)-1: f0 = f_range[index_new_center-2] f2 = f_range[index_new_center-1] f4 = f_range[index_new_center] # if the optimum is close to 0: elif index_new_center == 0: f0 = f_range[index_new_center] f2 = f_range[index_new_center+1] f4 = f_range[index_new_center+2] else: f0 = f_range[index_new_center-1] f2 = f_range[index_new_center] f4 = f_range[index_new_center+1] # when we are finished iterating on the yield gap factor range, we plot the # RMSE as a function of the yield gap factor if (plot_rmse == True): RMSE_stored = sorted(RMSE_stored, key=operator_itemgetter(0)) x,y = zip(*RMSE_stored) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5,5)) fig.subplots_adjust(0.15,0.16,0.95,0.96,0.4,0.) ax.plot(x, y, c='k', marker='o') ax.set_xlabel('yldgapf (-)') ax.set_ylabel('RMSE') fig.savefig('%s_opti_fgap.png'%NUTS_no_) #pickle_dump(RMSE_stored,open('%s_RMSE.pickle'%NUTS_no_,'wb')) # 8- when we are finished iterating on the yield gap factor range, we return # the optimum value. We look for the yldgapf with the lowest RMSE index_optimum = RMSE.argmin() optimum_yldgapf = f_range[index_optimum] print 'optimum found: %.2f +/- %.2f'%(optimum_yldgapf, f_step) # 10- we return the optimized YLDGAPF return optimum_yldgapf #=============================================================================== def str_to_bool(s): #=============================================================================== if s.strip(' ') == 'True': return True elif s.strip(' ') == 'False': return False else: raise ValueError #=============================================================================== if __name__=='__main__': main()
if options_get_validation(directory) == 'none': docstats.append([tb_count, rel_count, event_count]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except: # TODO: error reporting issue_count = -1 docstats.append([tb_count, rel_count, event_count, issue_count]) except Exception, e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) # Cache the statistics try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError, e: Messager.warning("Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats # TODO: Testing!
def save_pickle(): with open(pickle_path, 'wb') as pickle_file: pickle_dump(pickle, pickle_file, HIGHEST_PROTOCOL)
def main(): #=============================================================================== cwdir = os.getcwd() SIBrundir = os.path.join(cwdir, 'forward_runs') sites = ['BE-Lon', 'DE-Kli', 'FR-Gri', 'IT-BCi', 'NL-Dij', 'NL-Lan'] # time axis of all time series tm = pd.date_range('2000-01-01 00:00:00', '2010-12-31 23:59:59', freq='1d') series = dict() # strange thing is: SiBCASA ignores leap years and does not simulate 29 feb # we have to delete 3 dates on 3 leap years between 2000 and 2010 new_tm = tm[0:59].union(tm[60:1520].union(tm[1521:2981].union(tm[2982:]))) print new_tm, len(new_tm) for site in sites: # open all the years and store in one list namefile = '%s_2000-2010/' % (site) + 'hsib_*.qp2.nc' pathfile = os.path.join(SIBrundir, namefile) # open all 11 years * 12 files f = cdf.MFDataset(pathfile) # get daily GPP and NEE (in micromoles/m2/s) and convert # the fluxes to gC/m2/d: fac = 0.000001 * 12. # conversion from micromoles to gC dt = 3600. * 24. # nb of seconds in a day Sib_gpp = np.array(-f.variables['gpp'][:]) * fac * dt Sib_ter = np.array(f.variables['resp_tot'][:]) * fac * dt Sib_rhet = np.array(f.variables['resp_het'][:]) * fac * dt Sib_raut = np.array(f.variables['resp_auto'][:]) * fac * dt Sib_nee = np.array(f.variables['NEE_2'][:]) * fac * dt # from moles/m2 to gC/m2 Sib_csoil = np.array(f.variables['carb_soil'][:]) * fac * 1000000. * dt # close file f.close() series[site] = dict() series[site]['GPP'] = pd.Series([l[0] for l in Sib_gpp], index=new_tm) series[site]['TER'] = pd.Series([l[0] for l in Sib_ter], index=new_tm) series[site]['Rhet'] = pd.Series([l[0] for l in Sib_rhet], index=new_tm) series[site]['Raut'] = pd.Series([l[0] for l in Sib_raut], index=new_tm) series[site]['NEE'] = pd.Series([l[0] for l in Sib_nee], index=new_tm) fig, ax = plt.subplots(nrows=1, ncols=1) fig.suptitle(site, fontsize=14) series[site]['GPP'].plot(label='GPP') series[site]['TER'].plot(label='TER') series[site]['Rhet'].plot(label='Rhet') series[site]['Raut'].plot(label='Raut') series[site]['NEE'].plot(label='NEE') ax.legend() # store the formatted pandas timeseries in a pickle file filepath = os.path.join(SIBrundir, 'timeseries_SiBCASA.pickle') pickle_dump(series, open(filepath, 'wb')) # preview the timeseries per site plt.show()
try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except: # TODO: error reporting issue_count = -1 docstats.append( [tb_count, rel_count, event_count, issue_count]) except Exception, e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) # Cache the statistics try: with open(cache_file_path.decode('utf-8').encode('utf-8'), 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError, e: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats # TODO: Testing!
def main(): #=============================================================================== global outputdir, obsdir #------------------------------------------------------------------------------- # ================================= USER INPUT ================================= # read the settings from the rc file rcdict = rc.read('settings.rc') #=============================================================================== #------------------------------------------------------------------------------- # extract the needed information for that script sites = [s.strip(' ') for s in rcdict['sites'].split(',')] years = [s.strip(' ') for s in rcdict['years'].split(',')] TER_method = rcdict['TER_method'] R10 = rcdict['R10'] resolution = rcdict['resolution'] # can be hourly or daily if resolution == 'daily': res = '1d' elif resolution == '3-hourly': res = '3H' # directory paths outputdir = rcdict['outputdir'] obsdir = rcdict['obsdir'] forwardir = os.path.join(outputdir, 'forward_runs') #------------------------------------------------------------------------------- # load the WOFOST runs of all crops # we store the two pandas series in one pickle file filepath = os.path.join(forwardir,'%s_timeseries_'%resolution+\ '%s_WOFOST.pickle'%TER_method) series = pickle_load(open(filepath, 'rb')) filepath = os.path.join(obsdir, 'daily_timeseries_OBS.pickle') obs = pickle_load(open(filepath, 'rb')) final_series = dict() for s, site in enumerate(sites): print site print obs[site].keys() final_series[site] = dict() # read the crop rotation from FluxNet file rotation = obs[site]['crop_no'] # slice each year's required time series, append to final series for varname in ['GPP', 'TER', 'Raut', 'Rhet', 'NEE']: print 'variable %s' % varname var = [] for year in years: # get the crop number for that year if site != 'IT-BCi': try: crop_no = rotation[year:year][0] except IndexError: # index error occurs when the year is # not in the rotation time series startdate = '%s-01-01 00:00:00' % year enddate = '%s-12-31 23:59:59' % year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes) * [np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ', site, year, 'unknown crop cover: skip.' continue elif site == 'IT-BCi': if int(year) not in np.arange(2004, 2010, 1): startdate = '%s-01-01 00:00:00' % year enddate = '%s-12-31 23:59:59' % year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes) * [np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ', site, year, 'unknown crop cover: skip.' continue else: crop_no = 2 # try slicing and concatenating that year's timeseries from file try: # if the GPP = 0 (failed growing season), we set TER and # NEE to zero as well if np.mean(series[site]['c%i' % crop_no]['GPP'][year:year]) == 0.: startdate = '%s-01-01 00:00:00' % year enddate = '%s-12-31 23:59:59' % year dtimes = pd.date_range(startdate, enddate, freq=res) zeros = np.array(len(dtimes) * [0.]) var += [pd.Series(zeros, index=dtimes)] else: var += [ series[site]['c%i' % crop_no][varname][year:year] ] print ' ', site, year, '%2i' % crop_no, 'slicing' except KeyError: # key error occurs when we haven't ran a crop # or a year with WOFOST startdate = '%s-01-01 00:00:00' % year enddate = '%s-12-31 23:59:59' % year dtimes = pd.date_range(startdate, enddate, freq=res) na_vals = np.array(len(dtimes) * [np.nan]) var += [pd.Series(na_vals, index=dtimes)] print ' ', site, year, '%2i' % crop_no, 'skip.' final_series[site][varname] = pd.concat(var) #final_series[site]['GPP'].plot() #plt.show() # store the final WOFOST timeseries filepath = os.path.join(outputdir,'%s_timeseries_'%resolution+\ '%s_R10=%s_WOFOST_crop_rotation.pickle'%(TER_method,R10)) pickle_dump(final_series, open(filepath, 'wb')) print 'successfully dumped %s' % filepath