Exemple #1
0
def save_pickle(filename, data):

    file = open(filename + '.pck', 'w')
    pickle_dump(data, file)
    file.close()

    return
Exemple #2
0
def rule_timer_loop():
    logger.info('(ALL IPSC SYSTEMS) Rule timer loop started')
    _now = time()

    for _bridge in BRIDGES:
        for _system in BRIDGES[_bridge]:
            if _system['TO_TYPE'] == 'ON':
                if _system['ACTIVE'] == True:
                    if _system['TIMER'] < _now:
                        _system['ACTIVE'] = False
                        logger.info(
                            'Conference Bridge TIMEOUT: DEACTIVATE System: %s, Bridge: %s, TS: %s, TGID: %s',
                            _system['SYSTEM'], _bridge, _system['TS'],
                            int_id(_system['TGID']))
                    else:
                        timeout_in = _system['TIMER'] - _now
                        logger.info(
                            'Conference Bridge ACTIVE (ON timer running): System: %s Bridge: %s, TS: %s, TGID: %s, Timeout in: %ss,',
                            _system['SYSTEM'], _bridge, _system['TS'],
                            int_id(_system['TGID']), timeout_in)
                elif _system['ACTIVE'] == False:
                    logger.debug(
                        'Conference Bridge INACTIVE (no change): System: %s Bridge: %s, TS: %s, TGID: %s',
                        _system['SYSTEM'], _bridge, _system['TS'],
                        int_id(_system['TGID']))
            elif _system['TO_TYPE'] == 'OFF':
                if _system['ACTIVE'] == False:
                    if _system['TIMER'] < _now:
                        _system['ACTIVE'] = True
                        logger.info(
                            'Conference Bridge TIMEOUT: ACTIVATE System: %s, Bridge: %s, TS: %s, TGID: %s',
                            _system['SYSTEM'], _bridge, _system['TS'],
                            int_id(_system['TGID']))
                    else:
                        timeout_in = _system['TIMER'] - _now
                        logger.info(
                            'Conference Bridge INACTIVE (OFF timer running): System: %s Bridge: %s, TS: %s, TGID: %s, Timeout in: %ss,',
                            _system['SYSTEM'], _bridge, _system['TS'],
                            int_id(_system['TGID']), timeout_in)
                elif _system['ACTIVE'] == True:
                    logger.debug(
                        'Conference Bridge ACTIVE (no change): System: %s Bridge: %s, TS: %s, TGID: %s',
                        _system['SYSTEM'], _bridge, _system['TS'],
                        int_id(_system['TGID']))
            else:
                logger.debug(
                    'Conference Bridge NO ACTION: System: %s, Bridge: %s, TS: %s, TGID: %s',
                    _system['SYSTEM'], _bridge, _system['TS'],
                    int_id(_system['TGID']))

    if BRIDGE_CONF['REPORT']:
        try:
            with open(
                    CONFIG['REPORTS']['REPORT_PATH'] +
                    'confbridge_stats.pickle', 'wb') as file:
                pickle_dump(BRIDGES, file, 2)
                file.close()
        except IOError as detail:
            _logger.error('I/O Error: %s', detail)
Exemple #3
0
 def reporting_loop(_logger):  
     _logger.debug('Periodic Reporting Loop Started (PICKLE)')
     try:
         with open(_config['REPORTS']['REPORT_PATH']+'dmrlink_stats.pickle', 'wb') as file:
             pickle_dump(_config['SYSTEMS'], file, 2)
             file.close()
     except IOError as detail:
         _logger.error('I/O Error: %s', detail)
Exemple #4
0
def getAnnObject2(collection,document):
    '''newest version of the getAnnObject methode'''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(collection)
    except:
        real_dir=collection      
    app_path = WORK_DIR + "/application/"
    ann = None
    full_name = collection + document
    full_name = full_name.replace("/","")
    if( isfile(app_path+full_name)):
        temp=open (app_path+full_name , 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir+document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia=get_extra_info(collection,document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        import os
        import simplejson as json
        import session
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:    
                from verify_annotations import verify_annotation
                projectconf = ProjectConfiguration(docdir)
                issues = []
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
    ann.issues = issues
    temp=open (app_path+full_name , 'wb')    
    pickle_dump(ann, temp)
    temp.close()
    return ann
Exemple #5
0
def _quick_test(classifiers, datasets, outdir, verbose=False, worker_pool=None,
        no_simstring_cache=False, use_test_set=False):
    
    if worker_pool is not None:
        raise NotImplementedError

    results_file_path = _get_quick_pickle_path(outdir)
    results_by_dataset = {}

    for dataset_id, dataset_getter in datasets.iteritems():
        if verbose:
            print >> stderr, 'Data set:', dataset_id

        if verbose:
            print >> stderr, 'Caching data set...',
        train, dev, test = dataset_getter()
        if use_test_set:
            train, dev = list(chain(train, dev)), list(test)
        else:
            train, dev = list(train), list(dev)
        if verbose:
            print >> stderr, 'Done!'
      
        if not no_simstring_cache:
            simstring_caching(classifiers, (train, dev), verbose=verbose)
    
        # Collect the seen type to iterate over later
        seen_types = set()
        results_by_classifier = {}

        for classifier_id, classifier_class in classifiers.iteritems():
            if verbose:
                print >> stderr, 'Classifier:', classifier_id
            
            classifier = classifier_class()

            classifier.train(train)

            score = score_classifier(classifier, dev)
            results_by_classifier[classifier_id] = score
            macro_score, micro_score, tp_sum, fn_sum, _ = score
            
            if verbose:
                res_str = ('Results: '
                        '{0:.3f}/'
                        '{1:.3f}/'
                        '{2}/{3} (MACRO/MICRO/TP/FN)'
                        ).format(macro_score, micro_score, tp_sum, fn_sum)
                print res_str

        results_by_dataset[dataset_id] = results_by_classifier

    with open(results_file_path, 'wb') as results_file:
        pickle_dump(results_by_dataset, results_file)

    if verbose:
        print >> stderr, 'Results written to:', results_file_path
Exemple #6
0
def learning_curve_test(classifiers,
                        datasets,
                        outdir,
                        verbose=False,
                        no_simstring_cache=False,
                        folds=10,
                        worker_pool=None,
                        min_perc=5,
                        max_perc=100,
                        step_perc=5,
                        it_factor=1,
                        pickle_name='learning',
                        use_test_set=False):
    ### This part is really generic
    # TODO: We could keep old results... But dangerous, mix-up
    results_file_path = _get_learning_pickle_path(outdir, pickle_name)
    #XXX: RESUME GOES HERE!
    results_by_dataset = {}

    for dataset_id, dataset_getter in datasets.iteritems():

        if verbose:
            print >> stderr, 'Data set:', dataset_id

        if verbose:
            print >> stderr, 'Caching vectorised data...',

        train_set, dev_set, test_set = dataset_getter()
        if use_test_set:
            train, test = list(chain(train_set, dev_set)), list(test_set)
        else:
            train, test = list(train_set), list(dev_set)
        del train_set, dev_set, test_set

        if verbose:
            print >> stderr, 'Done!'

        results_by_dataset[dataset_id] = _learning_curve_test_data_set(
            classifiers,
            train,
            test,
            worker_pool,
            verbose=verbose,
            no_simstring_cache=no_simstring_cache,
            use_test_set=use_test_set,
            folds=folds,
            min_perc=min_perc,
            max_perc=max_perc,
            step_perc=step_perc,
            it_factor=it_factor)

        ### HACK TO GET INTERMEDIATE!
        with open(results_file_path, 'w') as results_file:
            pickle_dump(results_by_dataset, results_file)

        if verbose:
            print >> stderr, 'Results written to:', results_file_path
Exemple #7
0
 def reporting_loop(_logger):
     _logger.debug('Periodic Reporting Loop Started (PICKLE)')
     try:
         with open(
                 _config['REPORTS']['REPORT_PATH'] +
                 'dmrlink_stats.pickle', 'wb') as file:
             pickle_dump(_config['SYSTEMS'], file, 2)
             file.close()
     except IOError as detail:
         _logger.error('I/O Error: %s', detail)
Exemple #8
0
def test():
	try:
		from cPickle import dump as pickle_dump, load as pickle_load, dumps
	except ImportError:
		from pickle import dump as pickle_dump, load as pickle_load
	ann = TextAnnotations("/home/hast/Downloads/brat/data/brat_vb/sentiment/test")
	temp=open ("/home/hast/Downloads/brat/work/brat_vbsentimenttest", 'wb')
	sann = SimpleAnnotations(ann)
	pickle_dump(sann,temp)
	temp.close()
Exemple #9
0
def update_dump(j_dic,file_path):
    app_path = WORK_DIR + "/application/"
    temp_paths = file_path.split("/data/")
    try:
        full_name = temp_paths[1].replace("/","")
        temp=open (app_path+full_name , 'wb')
        pickle_dump(j_dic, temp)
        temp.close()
    except Exception as e:
        Messager.error("Error while caching changes in the annotation file: "+str(e))
Exemple #10
0
    def dump(self, filename):
        """Dump the template object to ``filename`` so you can re-use it later.

        This method uses cPickle to serialize internal template model, so you
        don't need to pass through the learn process everytime you need to
        parse data. It's worth using this method since learning process
        generally cost a lot of time compared to parsing.
        """
        fp = open(filename, 'w')
        pickle_dump(self, fp)
        fp.close()
Exemple #11
0
 def save(self, filename=PICKLE_CACHE_PATH):
     from cPickle import UnpicklingError
     from cPickle import dump as pickle_dump
     try:
         with open(filename, 'wb') as cache_file:
             pickle_dump(ordall(self._map), cache_file)
             cache_file.close()
     except IOError:
         print >> sys.stderr, 'warning: failed to write cache.'
     except:
         print >> sys.stderr, 'warning: unexpected error writing cache.'
Exemple #12
0
def update_dump(j_dic, file_path):
    app_path = WORK_DIR + "/application/"
    temp_paths = file_path.split("/data/")
    try:
        full_name = temp_paths[1].replace("/", "")
        temp = open(app_path + full_name, 'wb')
        pickle_dump(j_dic, temp)
        temp.close()
    except Exception as e:
        Messager.error("Error while caching changes in the annotation file: " +
                       str(e))
Exemple #13
0
def save_cache(fn, data):
    from cPickle import UnpicklingError
    from cPickle import dump as pickle_dump
    try:
        with open(fn, 'wb') as cache_file:
            pickle_dump(data, cache_file)
            cache_file.close()
    except IOError:
        print >> sys.stderr, "rewritetex: warning: failed to write cache."        
    except:
        print >> sys.stderr, "rewritetex: warning: unexpected error writing cache."
Exemple #14
0
    def dump(self, filename):
        """Dump the template object to ``filename`` so you can re-use it later.

        This method uses cPickle to serialize internal template model, so you
        don't need to pass through the learn process everytime you need to
        parse data. It's worth using this method since learning process
        generally cost a lot of time compared to parsing.
        """
        fp = open(filename, 'w')
        pickle_dump(self, fp)
        fp.close()
def save_cache(fn, data):
    from cPickle import UnpicklingError
    from cPickle import dump as pickle_dump
    try:
        with open(fn, 'wb') as cache_file:
            pickle_dump(data, cache_file)
            cache_file.close()
    except IOError:
        print >> sys.stderr, "rewritetex: warning: failed to write cache."
    except:
        print >> sys.stderr, "rewritetex: warning: unexpected error writing cache."
Exemple #16
0
 def save(self, filename=PICKLE_CACHE_PATH):
     from cPickle import UnpicklingError
     from cPickle import dump as pickle_dump
     try:
         with open(filename, 'wb') as cache_file:
             pickle_dump(ordall(self._map), cache_file)
             cache_file.close()
     except IOError:
         print >> sys.stderr, 'warning: failed to write cache.'
     except:
         print >> sys.stderr, 'warning: unexpected error writing cache.'
Exemple #17
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    # Create a dataset out of the input
    doc = _tab_separated_input_to_doc(argp.input)

    # Cache the strings for speed
    cache_simstring(((doc, ), ), verbose=argp.verbose)

    classifier = SimStringInternalClassifier()
    classifier.train((doc, ))

    with open(argp.model_path, 'w') as model_file:
        pickle_dump(classifier, model_file)
Exemple #18
0
def compile_file(path, to_python=False):
    qpt = QPTokenizer()
    with open(path, 'rb') as qp:
        qpt.compile_source(qp)
    base_dir, parser = os.path.split(path)
    if not to_python:
        cparser = parser + 'y'
        cfile = open(os.path.join(base_dir, cparser), 'wb')
        cfile.write(COMPILED_SIGNATURE)
        pickle_dump(qpt.globals, cfile, HIGHEST_PROTOCOL)
    else:
        cparser = parser.split('.')[0] + '.py'
        cfile = open(os.path.join(base_dir, cparser), 'wb')
        cfile.write(str(qpt.globals))
Exemple #19
0
def getAnnObject(collection, document):
    try:
        real_dir = real_directory(collection)
    except:
        real_dir = collection
    app_path = WORK_DIR + "/application/"
    full_name = collection + document
    full_name = full_name.replace("/", "")
    if (os.path.isfile(app_path + full_name)):
        temp = open(app_path + full_name, 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir + document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia = get_extra_info(collection, document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:
                projectconf = ProjectConfiguration(docdir)
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: validation failed: %s' % e)
    ann.issues = issues
    temp = open(app_path + full_name, 'wb')
    pickle_dump(ann, temp)
    temp.close()
    return ann
Exemple #20
0
def compile_file(path, to_python=False):
    qpt = QPTokenizer()
    with open(path, 'rb') as qp:
        qpt.compile_source(qp)
    base_dir, parser = os.path.split(path)
    if not to_python:
        cparser = parser + 'y'
        cfile = open(os.path.join(base_dir, cparser), 'wb')
        cfile.write(COMPILED_SIGNATURE)
        pickle_dump(qpt.globals, cfile, HIGHEST_PROTOCOL)
    else:
        cparser = parser.split('.')[0] + '.py'
        cfile = open(os.path.join(base_dir, cparser), 'wb')
        cfile.write(str(qpt.globals))
Exemple #21
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    # Create a dataset out of the input
    doc = _tab_separated_input_to_doc(argp.input)

    # Cache the strings for speed
    cache_simstring(((doc, ), ), verbose=argp.verbose)

    classifier = SimStringInternalClassifier()
    classifier.train((doc, ))

    with open(argp.model_path, 'w') as model_file:
        pickle_dump(classifier, model_file)
Exemple #22
0
def _save_simstring_query_cache():
    # Check if the cache directory exists, otherwise create it
    if not exists(dirname(SIMSTRING_QUERY_CACHE_DIR_PATH)):
        from os import makedirs
        makedirs(SIMSTRING_QUERY_CACHE_DIR_PATH)

    # Save if we have a cache and it has been modified
    if SIMSTRING_QUERY_CACHE is not None and MODIFIED_SIMSTRING_QUERY_CACHE:
        # We could suffer race conditions here so we write to a tempfile
        # and then swap it in place
        tmp_file = None
        try:
            with NamedTemporaryFile('wb', delete=False) as tmp_file:
                # Dump with the highest available protocol
                pickle_dump(SIMSTRING_QUERY_CACHE, tmp_file, -1)
            move(tmp_file.name, SIMSTRING_QUERY_CACHE_PATH)
        finally:
            # If something went wrong, we need to clean up /tmp
            if tmp_file is not None and exists(tmp_file.name):
                remove(tmp_file.name)
Exemple #23
0
def _save_simstring_query_cache():
    # Check if the cache directory exists, otherwise create it
    if not exists(dirname(SIMSTRING_QUERY_CACHE_DIR_PATH)):
        from os import makedirs
        makedirs(SIMSTRING_QUERY_CACHE_DIR_PATH)

    # Save if we have a cache and it has been modified
    if SIMSTRING_QUERY_CACHE is not None and MODIFIED_SIMSTRING_QUERY_CACHE:
        # We could suffer race conditions here so we write to a tempfile
        # and then swap it in place
        tmp_file = None
        try:
            with NamedTemporaryFile('wb', delete=False) as tmp_file:
                # Dump with the highest available protocol
                pickle_dump(SIMSTRING_QUERY_CACHE, tmp_file, -1)
            move(tmp_file.name, SIMSTRING_QUERY_CACHE_PATH)
        finally:
            # If something went wrong, we need to clean up /tmp
            if tmp_file is not None and exists(tmp_file.name):
                remove(tmp_file.name)
def learning_curve_test(classifiers, datasets, outdir,
        verbose=False, no_simstring_cache=False, folds=10, worker_pool=None,
        min_perc=5, max_perc=100, step_perc=5, it_factor=1,
        pickle_name='learning', use_test_set=False
        ):
    ### This part is really generic
    # TODO: We could keep old results... But dangerous, mix-up
    results_file_path = _get_learning_pickle_path(outdir, pickle_name)
    #XXX: RESUME GOES HERE!
    results_by_dataset = {}
    
    for dataset_id, dataset_getter in datasets.iteritems():
        
        if verbose:
            print >> stderr, 'Data set:', dataset_id
            
        if verbose:
            print >> stderr, 'Caching vectorised data...',

        train_set, dev_set, test_set = dataset_getter()
        if use_test_set:
            train, test = list(chain(train_set, dev_set)), list(test_set)
        else:
            train, test = list(train_set), list(dev_set)
        del train_set, dev_set, test_set

        if verbose:
            print >> stderr, 'Done!'

        results_by_dataset[dataset_id] = _learning_curve_test_data_set(
                classifiers, train, test, worker_pool,
                verbose=verbose, no_simstring_cache=no_simstring_cache,
                use_test_set=use_test_set, folds=folds, min_perc=min_perc,
                max_perc=max_perc, step_perc=step_perc, it_factor=it_factor)

        ### HACK TO GET INTERMEDIATE!
        with open(results_file_path, 'w') as results_file:
            pickle_dump(results_by_dataset, results_file)

        if verbose:
            print >> stderr, 'Results written to:', results_file_path
 def _dump(self, f):
     try:
         pos = f.tell()
         pickle_dump(self.store, f)
     except:
         S = self.store.copy()
         ff = getBytesIO()
         for k, v in S.items():
             try:
                 pickle_dump({k: v}, ff)
             except:
                 S[k] = '<unpicklable object %r>' % v
         f.seek(pos, 0)
         pickle_dump(S, f)
 def _dump(self,f):
     try:
         pos=f.tell()
         pickle_dump(self.store,f)
     except:
         S=self.store.copy()
         ff=getBytesIO()
         for k,v in S.items():
             try:
                 pickle_dump({k:v},ff)
             except:
                 S[k] = '<unpicklable object %r>' % v
         f.seek(pos,0)
         pickle_dump(S,f)
def main():
#===============================================================================

    cwdir     = os.getcwd()
    SIBrundir = os.path.join(cwdir, 'forward_runs')

    sites = ['BE-Lon','DE-Kli','FR-Gri','IT-BCi','NL-Dij','NL-Lan']

    # time axis of all time series
    tm = pd.date_range('2000-01-01 00:00:00', '2010-12-31 23:59:59', freq='1d')
    series = dict()

    # strange thing is: SiBCASA ignores leap years and does not simulate 29 feb
    # we have to delete 3 dates on 3 leap years between 2000 and 2010
    new_tm = tm[0:59].union(tm[60:1520].union(tm[1521:2981].union(tm[2982:])))
    print new_tm, len(new_tm)

    for site in sites:

        # open all the years and store in one list
        namefile = '%s_2000-2010/'%(site) +'hsib_*.qp2.nc'
        pathfile = os.path.join(SIBrundir, namefile)

        # open all 11 years * 12 files
        f = cdf.MFDataset(pathfile)
        # get daily GPP and NEE (in micromoles/m2/s) and convert
        # the fluxes to gC/m2/d:
        fac = 0.000001*12. # conversion from micromoles to gC
        dt  = 3600. * 24.  # nb of seconds in a day
        Sib_gpp  = np.array(-f.variables['gpp'][:])*fac*dt
        Sib_ter  = np.array(f.variables['resp_tot'][:])*fac*dt
        Sib_rhet = np.array(f.variables['resp_het'][:])*fac*dt
        Sib_raut = np.array(f.variables['resp_auto'][:])*fac*dt
        Sib_nee  = np.array(f.variables['NEE_2'][:])*fac*dt
        # from moles/m2 to gC/m2
        Sib_csoil= np.array(f.variables['carb_soil'][:])*fac*1000000.*dt
        # close file
        f.close()

        series[site] = dict()
        series[site]['GPP'] = pd.Series([l[0] for l in Sib_gpp],  index=new_tm)
        series[site]['TER'] = pd.Series([l[0] for l in Sib_ter],  index=new_tm)
        series[site]['Rhet'] = pd.Series([l[0] for l in Sib_rhet], index=new_tm)
        series[site]['Raut'] = pd.Series([l[0] for l in Sib_raut], index=new_tm)
        series[site]['NEE'] = pd.Series([l[0] for l in Sib_nee],  index=new_tm)

        fig, ax = plt.subplots(nrows=1, ncols=1)
        fig.suptitle(site, fontsize=14)
        series[site]['GPP'].plot(label='GPP')
        series[site]['TER'].plot(label='TER')
        series[site]['Rhet'].plot(label='Rhet')
        series[site]['Raut'].plot(label='Raut')
        series[site]['NEE'].plot(label='NEE')
        ax.legend()

    # store the formatted pandas timeseries in a pickle file
    filepath = os.path.join(SIBrundir,'timeseries_SiBCASA.pickle')
    pickle_dump(series, open(filepath,'wb'))

    # preview the timeseries per site
    plt.show()
def retrieve_CGMS_input(grid, year, crop_no, suitable_stu, engine, retrieve_weather=False):
#===============================================================================
# Temporarily add code directory to python path, to be able to import pcse
# modules
    sys.path.insert(0, codedir) 
#-------------------------------------------------------------------------------
    from pcse.exceptions import PCSEError 
    from pcse.db.cgms11 import TimerDataProvider, SoilDataIterator, \
                               CropDataProvider, STU_Suitability, \
                               SiteDataProvider, WeatherObsGridDataProvider
# if the retrieval does not raise an error, the crop was cultivated that year
    print '    - grid cell no %i'%grid
    try:
        # We retrieve the crop calendar (timerdata)
        filename = os.path.join(CGMSdir,
                   'timerdata_objects/%i/c%i/'%(year,crop_no),
                   'timerobject_g%d_c%d_y%d.pickle'%(grid, crop_no, year))
        if os.path.exists(filename):
            pass
        else:
            timerdata = TimerDataProvider(engine, grid, crop_no, year)
            pickle_dump(timerdata,open(filename,'wb'))    

        # If required by the user, we retrieve the weather data
        if retrieve_weather == True: 
            filename = os.path.join(CGMSdir, 'weather_objects/',
                       'weatherobject_g%d.pickle'%(grid))
            if os.path.exists(filename):
                pass
            else:
                weatherdata = WeatherObsGridDataProvider(engine, grid)
                weatherdata._dump(filename)

        # We retrieve the soil data (soil_iterator)
        filename = os.path.join(CGMSdir, 'soildata_objects/',
                   'soilobject_g%d.pickle'%(grid))
        if os.path.exists(filename):
            soil_iterator = pickle_load(open(filename,'rb'))
        else:
            soil_iterator = SoilDataIterator(engine, grid)
            pickle_dump(soil_iterator,open(filename,'wb'))       

        # We retrieve the crop variety info (crop_data)
        filename = os.path.join(CGMSdir,
                   'cropdata_objects/%i/c%i/'%(year,crop_no),
                   'cropobject_g%d_c%d_y%d.pickle'%(grid,crop_no,year))
        if os.path.exists(filename):
            pass
        else:
            cropdata = CropDataProvider(engine, grid, crop_no, year)
            pickle_dump(cropdata,open(filename,'wb'))     

        # WE LOOP OVER ALL SOIL TYPES LOCATED IN THE GRID CELL:
        for smu_no, area_smu, stu_no, percentage, soildata in soil_iterator:

            # NB: we remove all unsuitable soils from the iteration
            if (stu_no not in suitable_stu):
                pass
            else:
                print '        soil type no %i'%stu_no

                # We retrieve the site data (site management)
                if (str(grid)).startswith('1'):
                    dum = str(grid)[0:2]
                else:
                    dum = str(grid)[0]
                filename = os.path.join(CGMSdir,
                           'sitedata_objects/%i/c%i/grid_%s/'%(year,crop_no,dum),
                           'siteobject_g%d_c%d_y%d_s%d.pickle'%(grid, crop_no,
                                                                  year, stu_no))
                if os.path.exists(filename):
                    pass
                else:
                    sitedata = SiteDataProvider(engine,grid,crop_no,year,stu_no)
                    pickle_dump(sitedata,open(filename,'wb'))     

    # if an error is raised, the crop was not grown that year
    except PCSEError:
        print '        the crop was not grown that year in that grid cell'
    except Exception as e:
        print '        Unexpected error', e#sys.exc_info()[0]

    return None
Exemple #29
0
def main():
    #===============================================================================

    #-------------------------------------------------------------------------------
    # ================================= USER INPUT =================================

    # read the settings from the rc file
    rcdict = rc.read('settings.rc')

    # ==============================================================================
    #-------------------------------------------------------------------------------
    # extract the needed information from the rc file
    sites = [s.strip(' ') for s in rcdict['sites'].split(',')]
    resolution = rcdict['resolution']  # can be hourly or daily

    # directory paths
    fluxnetdir = rcdict['obsdir']
    obsdir = os.path.join(fluxnetdir, 'regrouped_data')

    #-------------------------------------------------------------------------------
    if resolution == 'daily':
        filelist = [
            'BE-Lon_FLUXNET2015_FULLSET_DD_2004-2014.csv',
            'FR-Gri_FLUXNET2015_FULLSET_DD_2004-2013.csv',
            'DE-Kli_FLUXNET2015_FULLSET_DD_2004-2014.csv',
            'IT-BCi_mais_2004-2009_daily.csv'
        ]
    elif resolution == '3-hourly':
        filelist = [
            'BE-Lon_FLUXNET2015_FULLSET_HH_2004-2014.csv',
            'FR-Gri_FLUXNET2015_FULLSET_HH_2004-2013.csv',
            'DE-Kli_FLUXNET2015_FULLSET_HH_2004-2014.csv',
            'IT-BCi_mais_2004-2009_daily.csv'
        ]

#-------------------------------------------------------------------------------
# Extract timeseries for the different sites

# read files for the diferent sites
    f = open_csv(obsdir, filelist, convert_to_float=True)

    series = dict()
    filepath = os.path.join(fluxnetdir,
                            '%s_timeseries_OBS.pickle' % resolution)

    for fnam, site in zip(filelist, sites):

        print site

        # TA_F_DAY: average daytime Ta_day from meas and ERA (*C)
        # SW_IN_F: SWin from meas and ERA (W.m-2)
        # VPD_F: VPD consolidated from VPD_F_MDS and VPD_F_ERA (hPa)
        # TS_F_MDS_1 to 4: Tsoil of 4 soil layers (*C)
        # SWC_F_MDS_1 to 4: soil water content (%) of 4 layers (1=shallow)
        # NT = night-time partitioning method (gC m-2 s-1)
        # VUT: variable ref u* between years
        FLUX_variables = [
            'TA_F_DAY', 'SW_IN_F', 'VPD_F', 'TS_F_MDS_1', 'TS_F_MDS_2',
            'TS_F_MDS_3', 'SWC_F_MDS_1', 'SWC_F_MDS_2', 'SWC_F_MDS_3',
            'GPP_NT_VUT_REF', 'RECO_NT_VUT_REF', 'NEE_VUT_REF', 'crop', 'LAI',
            'AGB', 'C_height'
        ]
        FLUX_varnames = [
            'Ta_day', 'SWin', 'VPD', 'Ts_1', 'Ts_2', 'Ts_3', 'SWC_1', 'SWC_2',
            'SWC_3', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT'
        ]
        IT_variables = [
            'SWC_avg', 'GPP', 'Reco', 'NEE', 'crop', 'GLAI', 'AGB', 'C_height'
        ]
        IT_varnames = [
            'SWC', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 'CHT'
        ]

        # timestamps for all daily timeseries
        startyear = str(f[fnam]['TIMESTAMP'][0])[0:4]
        endyear = str(f[fnam]['TIMESTAMP'][-1])[0:4]
        startdate = '%s-01-01 00:00:00' % startyear
        enddate = '%s-12-31 23:30:00' % endyear
        if site == 'DE-Kli': enddate = '%s-12-31 23:00:00' % endyear

        series[site] = dict()

        if resolution == '3-hourly':
            tm = pd.date_range(startdate, enddate, freq='30min')
            if (site != 'IT-BCi'):
                for var, varname in zip(FLUX_variables[:12],
                                        FLUX_varnames[:12]):
                    # if the fluxes are half-hourly, I convert them to 3-hourly
                    if varname == 'Ta_day':
                        series[site]['Ta'] = pd.Series(f[fnam]['TA_F'],
                                                       index=tm)
                    elif ((varname == 'SWC_2' or varname == 'SWC_3')
                          and site == 'FR-Gri'):
                        series[site][varname] = pd.Series([-9999.] * len(tm),
                                                          index=tm)
                    else:
                        series[site][varname] = pd.Series(f[fnam][var],
                                                          index=tm)
                    print varname

        elif resolution == 'daily':
            tm = pd.date_range(startdate, enddate, freq='1d')
            if (site != 'IT-BCi'):
                for var, varname in zip(FLUX_variables, FLUX_varnames):
                    series[site][varname] = pd.Series(f[fnam][var], index=tm)
                    print varname
            else:
                tm_irreg = [
                    pd.to_datetime('%s-%s-%s' %
                                   (str(t)[0:4], str(t)[4:6], str(t)[6:8]))
                    for t in f[fnam]['TIMESTAMP']
                ]
                # since the time records has gaps in the IT-BCi data, we use a
                # special function to fill the gaps with -9999. values and
                # convert it to pandas timeseries
                for var, varname in zip(IT_variables, IT_varnames):
                    #if varname == 'VPD':
                    #    ta     = f[fnam]['T_avg']
                    #    dayvar = f[fnam]['Rh_avg'] / 100. * 6.11 * np.exp(ta /\
                    #             (238.3 + ta) * 17.2694)
                    dayvar = f[fnam][var]
                    series[site][varname] = convert2pandas(
                        tm_irreg, dayvar, tm)
                    print varname
        else:
            print "Wrong CO2 fluxes temporal resolution: must be either "+\
                  "'daily' or '3-hourly'"
            sys.exit()

    # we store the pandas series in one pickle file
    pickle_dump(series, open(filepath, 'wb'))

    #-------------------------------------------------------------------------------
    # plot timeseries

    # Let's plot the available micromet variables that are important for WOFOST
    #plot_fluxnet_micromet(obsdir,sites,[2005,2005],'-')

    # Let's plot GPP, TER, NEE
    #plot_fluxnet_daily_c_fluxes(obsdir,sites,[2004,2014],'-')

    #plot_fluxnet_LAI_CHT_AGB(obsdir,sites,[2004,2014],'o')
    #-------------------------------------------------------------------------------

    return series
def main():
#===============================================================================

#-------------------------------------------------------------------------------
# ================================= USER INPUT =================================

# read the settings from the rc file
    rcdict     = rc.read('settings.rc')

# ==============================================================================
#-------------------------------------------------------------------------------
# extract the needed information from the rc file
    sites      = [s.strip(' ') for s in rcdict['sites'].split(',')]
    resolution = rcdict['resolution']  # can be hourly or daily

    # directory paths
    fluxnetdir = rcdict['obsdir']
    obsdir     = os.path.join(fluxnetdir, 'regrouped_data')
 
#-------------------------------------------------------------------------------
    if resolution == 'daily':
        filelist   = ['BE-Lon_FLUXNET2015_FULLSET_DD_2004-2014.csv',
                      'FR-Gri_FLUXNET2015_FULLSET_DD_2004-2013.csv',
                      'DE-Kli_FLUXNET2015_FULLSET_DD_2004-2014.csv',
                      'IT-BCi_mais_2004-2009_daily.csv']
    elif resolution == '3-hourly':
        filelist   = ['BE-Lon_FLUXNET2015_FULLSET_HH_2004-2014.csv',
                      'FR-Gri_FLUXNET2015_FULLSET_HH_2004-2013.csv',
                      'DE-Kli_FLUXNET2015_FULLSET_HH_2004-2014.csv',
                      'IT-BCi_mais_2004-2009_daily.csv']

#-------------------------------------------------------------------------------
# Extract timeseries for the different sites

    # read files for the diferent sites
    f = open_csv(obsdir,filelist,convert_to_float=True)

    series = dict()
    filepath = os.path.join(fluxnetdir,'%s_timeseries_OBS.pickle'%resolution)

    for fnam,site in zip(filelist, sites):

        print site
       
        # TA_F_DAY: average daytime Ta_day from meas and ERA (*C)
        # SW_IN_F: SWin from meas and ERA (W.m-2) 
        # VPD_F: VPD consolidated from VPD_F_MDS and VPD_F_ERA (hPa)
        # TS_F_MDS_1 to 4: Tsoil of 4 soil layers (*C)
        # SWC_F_MDS_1 to 4: soil water content (%) of 4 layers (1=shallow) 
        # NT = night-time partitioning method (gC m-2 s-1)
        # VUT: variable ref u* between years
        FLUX_variables = ['TA_F_DAY', 'SW_IN_F', 'VPD_F', 'TS_F_MDS_1', 
                          'TS_F_MDS_2', 'TS_F_MDS_3', 'SWC_F_MDS_1', 'SWC_F_MDS_2',
                          'SWC_F_MDS_3', 'GPP_NT_VUT_REF', 'RECO_NT_VUT_REF',
                          'NEE_VUT_REF', 'crop', 'LAI', 'AGB', 'C_height']
        FLUX_varnames  = ['Ta_day', 'SWin', 'VPD', 'Ts_1', 'Ts_2', 'Ts_3', 'SWC_1',
                          'SWC_2', 'SWC_3', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI',
                          'AGB', 'CHT']
        IT_variables = ['SWC_avg', 'GPP', 'Reco', 'NEE', 'crop', 'GLAI', 'AGB', 
                        'C_height']
        IT_varnames  = ['SWC', 'GPP', 'TER', 'NEE', 'crop_no', 'LAI', 'AGB', 
                        'CHT']

        # timestamps for all daily timeseries
        startyear = str(f[fnam]['TIMESTAMP'][0])[0:4]
        endyear   = str(f[fnam]['TIMESTAMP'][-1])[0:4]
        startdate = '%s-01-01 00:00:00'%startyear
        enddate   = '%s-12-31 23:30:00'%endyear
        if site=='DE-Kli': enddate = '%s-12-31 23:00:00'%endyear

        series[site] = dict()

        if resolution == '3-hourly':
            tm = pd.date_range(startdate, enddate, freq='30min')
            if (site!='IT-BCi'):
                for var,varname in zip(FLUX_variables[:12], FLUX_varnames[:12]):
                    # if the fluxes are half-hourly, I convert them to 3-hourly
                    if varname == 'Ta_day':
                        series[site]['Ta'] = pd.Series(f[fnam]['TA_F'], index=tm)
                    elif ((varname == 'SWC_2' or varname == 'SWC_3') and 
                    site == 'FR-Gri'):
                        series[site][varname] = pd.Series([-9999.]*len(tm), index=tm)
                    else: 
                        series[site][varname] = pd.Series(f[fnam][var], index=tm)
                    print varname

        elif resolution == 'daily':
            tm = pd.date_range(startdate, enddate, freq='1d')
            if (site!='IT-BCi'):
                for var,varname in zip(FLUX_variables, FLUX_varnames):
                    series[site][varname] = pd.Series(f[fnam][var], index=tm)
                    print varname
            else:
                tm_irreg = [pd.to_datetime('%s-%s-%s'%(str(t)[0:4],str(t)[4:6],
                                    str(t)[6:8])) for t in f[fnam]['TIMESTAMP']]
                # since the time records has gaps in the IT-BCi data, we use a 
                # special function to fill the gaps with -9999. values and
                # convert it to pandas timeseries
                for var,varname in zip(IT_variables, IT_varnames):
                    #if varname == 'VPD':
                    #    ta     = f[fnam]['T_avg']
                    #    dayvar = f[fnam]['Rh_avg'] / 100. * 6.11 * np.exp(ta /\
                    #             (238.3 + ta) * 17.2694)
                    dayvar = f[fnam][var]
                    series[site][varname] = convert2pandas(tm_irreg, dayvar, tm)
                    print varname
        else:
            print "Wrong CO2 fluxes temporal resolution: must be either "+\
                  "'daily' or '3-hourly'"
            sys.exit()



    # we store the pandas series in one pickle file
    pickle_dump(series, open(filepath,'wb'))

#-------------------------------------------------------------------------------
# plot timeseries

    # Let's plot the available micromet variables that are important for WOFOST
    #plot_fluxnet_micromet(obsdir,sites,[2005,2005],'-')

    # Let's plot GPP, TER, NEE
    #plot_fluxnet_daily_c_fluxes(obsdir,sites,[2004,2014],'-')

    #plot_fluxnet_LAI_CHT_AGB(obsdir,sites,[2004,2014],'o')
#-------------------------------------------------------------------------------

    return series
def main():
#===============================================================================
    global inputdir, codedir, outputdir, CGMSdir, obsdir\
#-------------------------------------------------------------------------------
    import cx_Oracle
    import sqlalchemy as sa
    from datetime import datetime
#-------------------------------------------------------------------------------
# ================================= USER INPUT =================================

# read the settings from the rc file
    rcdict     = rc.read('settings.rc')

#===============================================================================
#-------------------------------------------------------------------------------
# extract the needed information from the rc file
    sites      = [s.strip(' ') for s in rcdict['sites'].split(',')]
    crops      = [s.strip(' ') for s in rcdict['crops'].split(',')]
    crop_nos   = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')]
    years      = [int(s.strip(' ')) for s in rcdict['years'].split(',')]

    obsdir     = rcdict['obsdir']
    inputdir   = rcdict['inputdir']
    CGMSdir     = os.path.join(inputdir, 'CGMS')
    codedir    = rcdict['codedir']
#-------------------------------------------------------------------------------
# get the closest CGMS grid cell id number for each FluxNet site

    # get the sites longitude and latitudes
    sitdict = open_csv(os.path.join(obsdir,'regrouped_data'), 'sites_info.txt',
                       convert_to_float=False)
    site_lons = sitdict['site_lons']
    site_lats = sitdict['site_lats']

    # we read the CGMS grid cells coordinates from file
    CGMS_cells = open_csv(CGMSdir, 'CGMS_grid_list.csv', convert_to_float=True)
    all_grids  = CGMS_cells['GRID_NO']
    all_lons   = CGMS_cells['LONGITUDE']
    all_lats   = CGMS_cells['LATITUDE']

    flux_gri = dict()
    for i,site in enumerate(sitdict['sites']):
        lon = float(site_lons[i])
        lat = float(site_lats[i])
        # compute the distance to site for all CGMS grid cells
        dist_list = list()
        for j,grid_no in enumerate(all_grids):
            distance = ((all_lons[j]-lon)**2. + (all_lats[j]-lat)**2.)**(1./2.)
            dist_list += [distance] 
        # select the closest grid cell
        indx = np.argmin(np.array(dist_list))
        flux_gri[site] = all_grids[indx]

        print 'FluxNet site %s with lon=%5.2f, lat=%5.2f: closest grid cell is %i'%(site, lon, lat, all_grids[indx])

#-------------------------------------------------------------------------------
# create new file with grid cell number in it

    filename = os.path.join(inputdir,'sites_info2.csv')
    newres = open(filename,'wb')
    oldres = open(os.path.join(obsdir,'regrouped_data/sites_info.txt'),'rU') 
    reader = oldres.readlines()
    oldres.close()
    for l,line in enumerate(reader):
        site = line.split(',')[0].strip(' ')
        if l==0: line = line.strip('\n')+', gridcells\n'
        else: line = line.strip('\n') + ',%10i'%int(flux_gri[site]) + '\n'
        newres.write(line)
    newres.close()
    print '\nWe successfully created the input file with grid cell IDs:\n%s'%filename
    

#-------------------------------------------------------------------------------
# retrieve the necessary input data for all sites

    # settings of the connection
    user = "******"
    password = "******"
    tns = "EURDAS.WORLD"
    dsn = "oracle+cx_oracle://{user}:{pw}@{tns}".format(user=user,pw=password,tns=tns)
    engine = sa.create_engine(dsn)
    print engine

    # test the connection:
    try:
        connection = cx_Oracle.connect("cgms12eu_select/[email protected]")
    except cx_Oracle.DatabaseError:
        print '\nBEWARE!! The Oracle database is not responding. Probably, you are'
        print 'not using a computer wired within the Wageningen University network.'
        print '--> Get connected with ethernet cable before trying again!'
        sys.exit()

    for c,crop in enumerate(crops):
        crop_no = crop_nos[c]

        print '\nRetrieving input data for %s (CGMS id=%i)'%(crop,crop_no)
        # We add a timestamp at start of the retrieval
        start_timestamp = datetime.utcnow()
        
		# We retrieve the list of suitable soil types for the selected crop
		# species
        filename = os.path.join(CGMSdir, 'soildata_objects/',
                   'suitablesoilsobject_c%d.pickle'%(crop_no))
        if os.path.exists(filename):
            suitable_stu = pickle_load(open(filename,'rb'))
        else:
            from pcse.db.cgms11 import STU_Suitability
            suitable_stu = STU_Suitability(engine, crop_no)
            suitable_stu_list = []
            for item in suitable_stu:
                suitable_stu_list = suitable_stu_list + [item]
            suitable_stu = suitable_stu_list
            pickle_dump(suitable_stu,open(filename,'wb'))       
            print 'retrieving suitable soils for %s'%crop

        # WE LOOP OVER ALL YEARS:
        for y, year in enumerate(years): 
            print '\n######################## Year %i ##############'%year+\
            '##########\n'
        
            # if we do a serial iteration, we loop over the grid cells that 
            # contain arable land
            for grid in flux_gri.values():
                retrieve_CGMS_input(grid, year, crop_no, suitable_stu, engine)
        
        # We add a timestamp at end of the retrieval, to time the process
        end_timestamp = datetime.utcnow()
        print '\nDuration of the retrieval:', end_timestamp-start_timestamp
Exemple #32
0
        makedirs(SESSIONS_DIR)
    except OSError, e:
        if e.errno == 17:
            # Already exists
            pass
        else:
            raise

    # Write to a temporary file and move it in place, for safety
    tmp_file_path = None
    try:
        tmp_file_fh, tmp_file_path = mkstemp()
        os_close(tmp_file_fh)

        with open(tmp_file_path, 'wb') as tmp_file:
            pickle_dump(CURRENT_SESSION, tmp_file)
        copy(tmp_file_path, get_session_pickle_path(CURRENT_SESSION.get_sid()))
    except IOError:
        # failed store: no permissions?
        raise SessionStoreError
    finally:
        if tmp_file_path is not None:
            remove(tmp_file_path)

def save_conf(config):
    get_session()['conf'] = config
    return {}
    
def load_conf():
    try:
        return {
Exemple #33
0
def main():
    #===============================================================================
    global inputdir, outputdir, optimidir
    #-------------------------------------------------------------------------------
    # ================================= USER INPUT =================================

    # read the settings from the rc file
    rcdict = rc.read('settings.rc')

    #===============================================================================
    # extract the needed information from the rc file
    sites = [s.strip(' ') for s in rcdict['sites'].split(',')]
    #NUTS_reg   = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')]
    crops = [s.strip(' ') for s in rcdict['crops'].split(',')]
    crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')]
    years = [int(s.strip(' ')) for s in rcdict['years'].split(',')]

    # directory paths
    outputdir = rcdict['outputdir']
    inputdir = rcdict['inputdir']

    #-------------------------------------------------------------------------------
    # get the list of NUTS 2 region names associated to the list of FluxNet sites
    from WOF_00_retrieve_input_data import open_csv
    sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False)
    NUTS_reg = sitdict['NUTS_reg']
    #-------------------------------------------------------------------------------
    # list the old gapfilled files to remove, and remove them all

    for s, site in enumerate(sites):

        for c, crop_name in enumerate(crops):
            crop_no = crop_nos[c]

            for year in years:
                optimidir = os.path.join(outputdir,
                                         'fgap/%i/c%i/' % (year, crop_no))

                files2remove = [
                    f for f in os.listdir(optimidir) if '_gapfilled' in f
                ]
                for f in files2remove:
                    os.remove(os.path.join(optimidir, f))

#-------------------------------------------------------------------------------
# gap fill

    for s, site in enumerate(sites):
        NUTS_no = NUTS_reg[s]

        for c, crop_name in enumerate(crops):
            crop_no = crop_nos[c]

            for year in years:
                # create output folder if it doesn't already exists
                optimidir = os.path.join(outputdir,
                                         'fgap/%i/c%i/' % (year, crop_no))

                # detect if there is this year needs to be gapfilled
                f2gapfill = [
                    f for f in os.listdir(optimidir)
                    if ('_tobegapfilled' in f) and (NUTS_no in f)
                ]
                if len(f2gapfill) == 0:
                    continue

                print '\nWe gap fill:', site, NUTS_no, year, crop_name

                # GAP-FILLING YLDGAPF for NUTS2 level:
                prevyear = os.path.join(
                    optimidir.replace('%04d' % year, '%04d' % (year - 1)),
                    'fgap_%s_optimized.pickle' % NUTS_no)
                nextyear = os.path.join(
                    optimidir.replace('%04d' % year, '%04d' % (year + 1)),
                    'fgap_%s_optimized.pickle' % NUTS_no)
                availfiles = []
                availyears = []
                for yr in range(1995, 2020):
                    searchyear = os.path.join(
                        optimidir.replace('%04d' % year, '%04d' % yr),
                        'fgap_%s_optimized.pickle' % NUTS_no)
                    if os.path.exists(searchyear):
                        availfiles.append(searchyear)
                        availyears.append(yr)
                print "%d years found for gap filling:" % len(
                    availfiles), availyears

                # Use average from y-1 and y+1
                if prevyear in availfiles and nextyear in availfiles:
                    optimi_info = pickle_load(open(prevyear, 'rb'))
                    ygf_prev = optimi_info[2]
                    optimi_info = pickle_load(open(nextyear, 'rb'))
                    ygf_next = optimi_info[2]
                    ygf = (ygf_prev + ygf_next) / 2.0  # simply average
                    opt_code = 'gapfilled02'
                    shortlist_cells = optimi_info[3]

                # Use previous year value
                elif prevyear in availfiles:
                    optimi_info = pickle_load(open(prevyear, 'rb'))
                    ygf = optimi_info[2]
                    opt_code = 'gapfilled03a'
                    shortlist_cells = optimi_info[3]
                    print shortlist_cells

                # Use next year value
                elif nextyear in availfiles:
                    optimi_info = pickle_load(open(nextyear, 'rb'))
                    ygf = optimi_info[2]
                    opt_code = 'gapfilled03b'
                    shortlist_cells = optimi_info[3]

                # Use climatological average from other years if nyear > 2
                elif len(availfiles) > 2:
                    ygf = 0.0
                    for filename in availfiles:
                        optimi_info = pickle_load(open(filename, 'rb'))
                        ygf += optimi_info[2]
                    ygf = ygf / len(availfiles)
                    opt_code = 'gapfilled04'
                    shortlist_cells = optimi_info[3]
                # Use upper NUTS level optimum (NUTS1, or NUTS0 at worst)
                else:
                    try:
                        nuts1file = os.path.join(
                            optimidir,
                            'fgap_%s_optimized.pickle' % NUTS_no[0:3])
                        data = pickle_load(open(nuts1file, 'rb'))
                        ygf = data[2]
                        opt_code = 'gapfilled05a'
                        shortlist_cells = data[3]
                    except IOError:
                        try:
                            nuts0file = os.path.join(
                                optimidir,
                                'fgap_%s_optimized.pickle' % NUTS_no[0:2])
                            data = pickle_load(open(nuts0file, 'rb'))
                            ygf = data[2]
                            opt_code = 'gapfilled05b'
                            shortlist_cells = data[3]
                # Use default value if all previous methods fail
                        except IOError:
                            ygf = 0.8
                            opt_code = 'gapfilled06'
                            shortlist_cells = []

                print "Using ygf of %5.2f and code of %s" % (ygf, opt_code)
                print "created file fgap_%s_%s.pickle"%(NUTS_no, opt_code)+\
                      " in folder %s"%optimidir
                currentyear = os.path.join(
                    optimidir, 'fgap_%s_%s.pickle' % (NUTS_no, opt_code))
                pickle_dump([NUTS_no, opt_code, ygf, shortlist_cells],
                            open(currentyear, 'wb'))
Exemple #34
0
def main():
    #===============================================================================
    global inputdir, codedir, outputdir, CGMSdir, ECMWFdir, optimidir, forwardir,\
           EUROSTATdir, mmC, mmCO2, mmCH2O
    #-------------------------------------------------------------------------------
    # fixed molar masses for unit conversion of carbon fluxes
    mmC = 12.01
    mmCO2 = 44.01
    mmCH2O = 30.03

    # ================================= USER INPUT =================================

    # read the settings from the rc file
    rcdict = rc.read('settings.rc')

    #===============================================================================
    #-------------------------------------------------------------------------------
    # extract the needed information from the rc file
    sites = [s.strip(' ') for s in rcdict['sites'].split(',')]
    #site_lons  = [float(s.strip(' ')) for s in rcdict['site_lons'].split(',')]
    #site_lats  = [float(s.strip(' ')) for s in rcdict['site_lats'].split(',')]
    #gridcells  = [float(s.strip(' ')) for s in rcdict['gridcells'].split(',')]
    #NUTS_reg   = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')]
    crops = [s.strip(' ') for s in rcdict['crops'].split(',')]
    crop_nos = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')]
    years = [int(s.strip(' ')) for s in rcdict['years'].split(',')]

    # forward runs settings
    force_forwardsim = str_to_bool(rcdict['force_forwardsim'])
    selec_method = rcdict['selec_method']
    ncells = int(rcdict['ncells'])
    nsoils = int(rcdict['nsoils'])
    weather = rcdict['weather']

    # carbon cycle settings
    TER_method = rcdict[
        'TER_method']  # if grow-only: NEE = GPP + Rgrow + Rsoil
    Eact0 = float(rcdict['Eact0'])
    R10 = float(rcdict['R10'])
    resolution = rcdict['resolution']  # can be hourly or daily

    # directory paths
    outputdir = rcdict['outputdir']
    inputdir = rcdict['inputdir']
    codedir = rcdict['codedir']
    CGMSdir = os.path.join(inputdir, 'CGMS')
    ECMWFdir = os.path.join(inputdir, 'ECMWF')
    EUROSTATdir = os.path.join(inputdir, 'EUROSTATobs')

    #-------------------------------------------------------------------------------
    # get the sites longitude and latitudes
    from WOF_00_retrieve_input_data import open_csv
    sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False)
    site_lons = [float(l) for l in sitdict['site_lons']]
    site_lats = [float(l) for l in sitdict['site_lats']]
    gridcells = [int(g) for g in sitdict['gridcells']]
    NUTS_reg = sitdict['NUTS_reg']
    #-------------------------------------------------------------------------------
    # run WOFOST at the location / year / crops specified by user

    print '\nYLDGAPF(-),  grid_no,  year,  stu_no, stu_area(ha), '\
     +'TSO(kgDM.ha-1), TLV(kgDM.ha-1), TST(kgDM.ha-1), '\
     +'TRT(kgDM.ha-1), maxLAI(m2.m-2), rootdepth(cm), TAGP(kgDM.ha-1)'

    # we format the time series using the pandas python library, for easy plotting
    startdate = '%i-01-01 00:00:00' % years[0]
    enddate = '%i-12-31 23:59:59' % years[-1]
    if resolution == 'daily':
        dtimes = pd.date_range(startdate, enddate, freq='1d')
    elif resolution == '3-hourly':
        dtimes = pd.date_range(startdate, enddate, freq='3H')
    else:
        print "Wrong CO2 fluxes temporal resolution: must be either 'daily' or '3-hourly'"
        sys.exit()

    series = dict()
    for s, site in enumerate(sites):
        lon = site_lons[s]
        lat = site_lats[s]
        grid_no = gridcells[s]
        NUTS_no = NUTS_reg[s]
        series[site] = dict()

        for c, crop_name in enumerate(crops):
            cpno = crop_nos[c]
            series[site]['c%i' % cpno] = dict()
            list_of_gpp = np.array([])
            list_of_raut = np.array([])
            list_of_rhet = np.array([])
            list_of_ter = np.array([])
            list_of_nee = np.array([])

            for year in years:
                # create output folder if it doesn't already exists
                optimidir = os.path.join(outputdir,
                                         'fgap/%i/c%i/' % (year, cpno))

                # create output folder if it doesn't already exists
                forwardir = os.path.join(outputdir,
                                         'forward_runs/%i/c%i/' % (year, cpno))
                if not os.path.exists(forwardir):
                    os.makedirs(forwardir)

                print '\n', site, NUTS_no, year, crop_name

                # RETRIEVE OPTIMUM FGAP:
                # either the NUTS2 optimum if it exists
                ygf_path = os.path.join(optimidir,
                                        'fgap_%s_optimized.pickle' % NUTS_no)
                # or the gapfilled version
                if not os.path.exists(ygf_path):
                    ygf_file = [
                        f for f in os.listdir(optimidir)
                        if (NUTS_no in f) and ('_gapfilled' in f)
                    ][0]
                    ygf_path = os.path.join(optimidir, ygf_file)
                fgap_info = pickle_load(open(ygf_path, 'rb'))
                yldgapf = fgap_info[2]

                # FORWARD SIMULATIONS:
                perform_yield_sim(cpno, grid_no, int(year), yldgapf,
                                  selec_method, nsoils, force_forwardsim)
                # POST-PROCESSING OF GPP, RAUTO, RHET, NEE:
                SimData = compute_timeseries_fluxes(cpno,
                                                    grid_no,
                                                    lon,
                                                    lat,
                                                    year,
                                                    R10,
                                                    Eact0,
                                                    selec_method,
                                                    nsoils,
                                                    TER_method=TER_method,
                                                    scale=resolution)
                list_of_gpp = np.concatenate([list_of_gpp, SimData[1]], axis=0)
                list_of_raut = np.concatenate([list_of_raut, SimData[2]],
                                              axis=0)
                list_of_rhet = np.concatenate([list_of_rhet, SimData[3]],
                                              axis=0)
                list_of_ter = np.concatenate([list_of_ter, SimData[4]], axis=0)
                list_of_nee = np.concatenate([list_of_nee, SimData[5]], axis=0)

            print dtimes, list_of_gpp

            series[site]['c%i' % cpno]['GPP'] = pd.Series(list_of_gpp,
                                                          index=dtimes)
            series[site]['c%i' % cpno]['Raut'] = pd.Series(list_of_raut,
                                                           index=dtimes)
            series[site]['c%i' % cpno]['Rhet'] = pd.Series(list_of_rhet,
                                                           index=dtimes)
            series[site]['c%i' % cpno]['TER'] = pd.Series(list_of_ter,
                                                          index=dtimes)
            series[site]['c%i' % cpno]['NEE'] = pd.Series(list_of_nee,
                                                          index=dtimes)

    # we store the two pandas series in one pickle file
    filepath = os.path.join(outputdir,'forward_runs/'+\
               '%s_timeseries_%s_WOFOST.pickle'%(resolution,TER_method))
    pickle_dump(series, open(filepath, 'wb'))
        makedirs(SESSIONS_DIR)
    except OSError, e:
        if e.errno == 17:
            # Already exists
            pass
        else:
            raise

    # Write to a temporary file and move it in place, for safety
    tmp_file_path = None
    try:
        tmp_file_fh, tmp_file_path = mkstemp()
        os_close(tmp_file_fh)

        with open(tmp_file_path, 'wb') as tmp_file:
            pickle_dump(CURRENT_SESSION, tmp_file)
        copy(tmp_file_path, get_session_pickle_path(CURRENT_SESSION.get_sid()))
    except IOError:
        # failed store: no permissions?
        raise SessionStoreError
    finally:
        if tmp_file_path is not None:
            remove(tmp_file_path)


def save_conf(config):
    get_session()['conf'] = config
    return {}


def load_conf():
def main():
#===============================================================================
    global inputdir, codedir, outputdir, CGMSdir, ECMWFdir, optimidir, forwardir,\
           EUROSTATdir, mmC, mmCO2, mmCH2O
#-------------------------------------------------------------------------------
# fixed molar masses for unit conversion of carbon fluxes
    mmC    = 12.01
    mmCO2  = 44.01
    mmCH2O = 30.03 

# ================================= USER INPUT =================================

# read the settings from the rc file
    rcdict     = rc.read('settings.rc')

#===============================================================================
#-------------------------------------------------------------------------------
# extract the needed information from the rc file
    sites      = [s.strip(' ') for s in rcdict['sites'].split(',')]
    #site_lons  = [float(s.strip(' ')) for s in rcdict['site_lons'].split(',')]
    #site_lats  = [float(s.strip(' ')) for s in rcdict['site_lats'].split(',')]
    #gridcells  = [float(s.strip(' ')) for s in rcdict['gridcells'].split(',')]
    #NUTS_reg   = [s.strip(' ') for s in rcdict['NUTS_reg'].split(',')]
    crops      = [s.strip(' ') for s in rcdict['crops'].split(',')]
    crop_nos   = [int(s.strip(' ')) for s in rcdict['crop_nos'].split(',')]
    years      = [int(s.strip(' ')) for s in rcdict['years'].split(',')]

    # forward runs settings
    force_forwardsim = str_to_bool(rcdict['force_forwardsim'])
    selec_method  = rcdict['selec_method']
    ncells        = int(rcdict['ncells'])
    nsoils        = int(rcdict['nsoils'])
    weather       = rcdict['weather']

    # carbon cycle settings
    TER_method  = rcdict['TER_method'] # if grow-only: NEE = GPP + Rgrow + Rsoil
    Eact0       = float(rcdict['Eact0'])
    R10         = float(rcdict['R10'])
    resolution  = rcdict['resolution']  # can be hourly or daily

    # directory paths
    outputdir  = rcdict['outputdir']
    inputdir   = rcdict['inputdir']
    codedir    = rcdict['codedir']
    CGMSdir     = os.path.join(inputdir, 'CGMS')
    ECMWFdir    = os.path.join(inputdir, 'ECMWF')
    EUROSTATdir = os.path.join(inputdir, 'EUROSTATobs')

#-------------------------------------------------------------------------------
    # get the sites longitude and latitudes
    from WOF_00_retrieve_input_data import open_csv
    sitdict = open_csv(inputdir, 'sites_info2.csv', convert_to_float=False)
    site_lons = [float(l) for l in sitdict['site_lons']]
    site_lats = [float(l) for l in sitdict['site_lats']]
    gridcells = [int(g) for g in sitdict['gridcells']]
    NUTS_reg  = sitdict['NUTS_reg']
#-------------------------------------------------------------------------------
# run WOFOST at the location / year / crops specified by user

    print '\nYLDGAPF(-),  grid_no,  year,  stu_no, stu_area(ha), '\
     +'TSO(kgDM.ha-1), TLV(kgDM.ha-1), TST(kgDM.ha-1), '\
     +'TRT(kgDM.ha-1), maxLAI(m2.m-2), rootdepth(cm), TAGP(kgDM.ha-1)'

    # we format the time series using the pandas python library, for easy plotting
    startdate = '%i-01-01 00:00:00'%years[0]
    enddate   = '%i-12-31 23:59:59'%years[-1]
    if resolution == 'daily':
        dtimes = pd.date_range(startdate, enddate, freq='1d')
    elif resolution == '3-hourly':
        dtimes = pd.date_range(startdate, enddate, freq='3H')
    else:
        print "Wrong CO2 fluxes temporal resolution: must be either 'daily' or '3-hourly'"
        sys.exit()

    series = dict()
    for s,site in enumerate(sites):
        lon = site_lons[s]
        lat = site_lats[s]
        grid_no = gridcells[s]
        NUTS_no = NUTS_reg[s]
        series[site] = dict()

        for c,crop_name in enumerate(crops):
            cpno = crop_nos[c]
            series[site]['c%i'%cpno] = dict()
            list_of_gpp  = np.array([])
            list_of_raut = np.array([])
            list_of_rhet = np.array([])
            list_of_ter  = np.array([])
            list_of_nee  = np.array([])

            for year in years:
                # create output folder if it doesn't already exists
                optimidir = os.path.join(outputdir,'fgap/%i/c%i/'%(year,cpno))

                # create output folder if it doesn't already exists
                forwardir = os.path.join(outputdir,'forward_runs/%i/c%i/'%(year,
                                                                        cpno))
                if not os.path.exists(forwardir):
                    os.makedirs(forwardir)

                print '\n', site, NUTS_no, year, crop_name

                # RETRIEVE OPTIMUM FGAP:
                # either the NUTS2 optimum if it exists
                ygf_path  = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no)
                # or the gapfilled version
                if not os.path.exists(ygf_path):
                    ygf_file  = [f for f in os.listdir(optimidir) if (NUTS_no in f) 
                                and ('_gapfilled' in f)][0]
                    ygf_path = os.path.join(optimidir, ygf_file)
                fgap_info = pickle_load(open(ygf_path,'rb'))
                yldgapf   = fgap_info[2]

                # FORWARD SIMULATIONS:
                perform_yield_sim(cpno, grid_no, int(year), yldgapf, 
                                  selec_method, nsoils, force_forwardsim)
                # POST-PROCESSING OF GPP, RAUTO, RHET, NEE:
                SimData = compute_timeseries_fluxes(cpno, grid_no, lon, lat, 
                                                    year, R10, Eact0, selec_method, 
                                                    nsoils, TER_method=TER_method,
                                                    scale=resolution)
                list_of_gpp  = np.concatenate([list_of_gpp,  SimData[1]], axis=0)
                list_of_raut = np.concatenate([list_of_raut, SimData[2]], axis=0)
                list_of_rhet = np.concatenate([list_of_rhet, SimData[3]], axis=0)
                list_of_ter  = np.concatenate([list_of_ter,  SimData[4]], axis=0)
                list_of_nee  = np.concatenate([list_of_nee,  SimData[5]], axis=0)

            print dtimes, list_of_gpp
            
            series[site]['c%i'%cpno]['GPP']  = pd.Series(list_of_gpp,  index=dtimes)
            series[site]['c%i'%cpno]['Raut'] = pd.Series(list_of_raut, index=dtimes)
            series[site]['c%i'%cpno]['Rhet'] = pd.Series(list_of_rhet, index=dtimes)
            series[site]['c%i'%cpno]['TER']  = pd.Series(list_of_ter,  index=dtimes)
            series[site]['c%i'%cpno]['NEE']  = pd.Series(list_of_nee,  index=dtimes)

    # we store the two pandas series in one pickle file
    filepath = os.path.join(outputdir,'forward_runs/'+\
               '%s_timeseries_%s_WOFOST.pickle'%(resolution,TER_method))
    pickle_dump(series, open(filepath,'wb'))
def main():
#===============================================================================
    global outputdir, obsdir
#-------------------------------------------------------------------------------
# ================================= USER INPUT =================================

# read the settings from the rc file
    rcdict    = rc.read('settings.rc')

#===============================================================================
#-------------------------------------------------------------------------------
# extract the needed information for that script
    sites      = [s.strip(' ') for s in rcdict['sites'].split(',')]
    years      = [s.strip(' ') for s in rcdict['years'].split(',')]
    TER_method = rcdict['TER_method']
    R10        = rcdict['R10']
    resolution  = rcdict['resolution']  # can be hourly or daily
    if resolution=='daily': res='1d'
    elif resolution=='3-hourly': res='3H'

    # directory paths
    outputdir  = rcdict['outputdir']
    obsdir     = rcdict['obsdir']
    forwardir  = os.path.join(outputdir, 'forward_runs')

#-------------------------------------------------------------------------------
# load the WOFOST runs of all crops

    # we store the two pandas series in one pickle file
    filepath = os.path.join(forwardir,'%s_timeseries_'%resolution+\
                            '%s_WOFOST.pickle'%TER_method)
    series   = pickle_load(open(filepath,'rb'))

    filepath = os.path.join(obsdir,'daily_timeseries_OBS.pickle')
    obs      = pickle_load(open(filepath,'rb'))

    final_series = dict()

    for s,site in enumerate(sites):
        print site
        print obs[site].keys()
        final_series[site] = dict()

        # read the crop rotation from FluxNet file
        rotation = obs[site]['crop_no']

        # slice each year's required time series, append to final series
        for varname in ['GPP','TER','Raut','Rhet','NEE']:
            print 'variable %s'%varname
            var = []
            for year in years:
                
                # get the crop number for that year
                if site != 'IT-BCi':
                    try:
                        crop_no = rotation[year:year][0]
                    except IndexError: # index error occurs when the year is
                                       # not in the rotation time series
                        startdate = '%s-01-01 00:00:00'%year
                        enddate   = '%s-12-31 23:59:59'%year
                        dtimes    = pd.date_range(startdate, enddate, freq=res)
                        na_vals   = np.array(len(dtimes)*[np.nan])
                        var      += [pd.Series(na_vals, index=dtimes)]
                        print '   ',site, year, 'unknown crop cover: skip.'
                        continue
                elif site == 'IT-BCi':
                    if int(year) not in np.arange(2004,2010,1): 
                        startdate = '%s-01-01 00:00:00'%year
                        enddate   = '%s-12-31 23:59:59'%year
                        dtimes    = pd.date_range(startdate, enddate, freq=res)
                        na_vals   = np.array(len(dtimes)*[np.nan])
                        var      += [pd.Series(na_vals, index=dtimes)]
                        print '   ',site, year, 'unknown crop cover: skip.'
                        continue
                    else:
                        crop_no = 2

                # try slicing and concatenating that year's timeseries from file
                try:
                    # if the GPP = 0 (failed growing season), we set TER and 
                    # NEE to zero as well
                    if np.mean(series[site]['c%i'%crop_no]['GPP'][year:year]) == 0.:
                        startdate = '%s-01-01 00:00:00'%year
                        enddate   = '%s-12-31 23:59:59'%year
                        dtimes    = pd.date_range(startdate, enddate, freq=res)
                        zeros     = np.array(len(dtimes)*[0.])
                        var      += [pd.Series(zeros, index=dtimes)]
                    else:
                        var += [series[site]['c%i'%crop_no][varname][year:year]]
                    print '   ',site, year, '%2i'%crop_no, 'slicing'
                except KeyError: # key error occurs when we haven't ran a crop
                                 # or a year with WOFOST
                    startdate = '%s-01-01 00:00:00'%year
                    enddate   = '%s-12-31 23:59:59'%year
                    dtimes    = pd.date_range(startdate, enddate, freq=res)
                    na_vals   = np.array(len(dtimes)*[np.nan])
                    var      += [pd.Series(na_vals, index=dtimes)]
                    print '   ',site, year, '%2i'%crop_no, 'skip.'
                
            final_series[site][varname] = pd.concat(var)
        #final_series[site]['GPP'].plot()
        #plt.show()

    # store the final WOFOST timeseries
    filepath = os.path.join(outputdir,'%s_timeseries_'%resolution+\
               '%s_R10=%s_WOFOST_crop_rotation.pickle'%(TER_method,R10))
    pickle_dump(final_series, open(filepath,'wb'))
    print 'successfully dumped %s'%filepath
Exemple #38
0
def optimize_fgap(site, crop_no, crop_name, year, NUTS_no, selec_method, ncells, 
                                           nsoils, weather, force_optimization):
#===============================================================================
# Temporarily add code directory to python path, to be able to import pcse
# modules
    sys.path.insert(0, codedir) 
    sys.path.insert(0, os.path.join(codedir,'carbon_cycle')) 
#-------------------------------------------------------------------------------
    import glob
    from maries_toolbox import define_opti_years,\
                               select_cells, select_soils
#-------------------------------------------------------------------------------
    # if the optimization has already been performed and we don't want
    # to redo it, we skip that region
    filepath = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no)
    if (os.path.exists(filepath) and force_optimization==False):
        optimum = pickle_load(open(filepath,'rb'))
        print "We have already calculated the optimum fgap for that "+\
              "year and crop: fgap=%.2f"%optimum[2]
        return optimum[2]

#-------------------------------------------------------------------------------
    # we select the grid cell of the FluxNet site
    gridlist = pickle_load(open(os.path.join(CGMSdir,
                                 'gridlist_objects/shortgridlist.pickle'),'rb'))
    selected_grid_cells = gridlist[NUTS_no]
#-------------------------------------------------------------------------------
    # where possible, we retrieve local information about yield and sowing date
    local_sowda = None
    local_yield = None
    for row in custom_yns:
        if row[0]==site and row[1]==year and row[2]==crop_no:
            local_sowda = row[3]
            local_yield = row[4]
            print 'We recovered local info from site %s:'%site
            print 'sowing date of %s:'%crop_name, local_sowda, 'grain yield: %.3f'%local_yield
            break
    if local_sowda==None and local_yield==None:
        print 'No local information on sowing date and yield.'
#-------------------------------------------------------------------------------
# we retrieve the EUROSTAT pre-processed yield observations:
    if local_sowda==None and local_yield==None:
    try:
        filename1   = os.path.join(EUROSTATdir, 'preprocessed_yields.pickle')
        yields_dict = pickle_load(open(filename1,'rb'))
    except IOError:
        print '\nYou have not preprocessed the EUROSTAT observations'
        print 'Run the script 03_preprocess_obs.py first!\n'
        sys.exit() 
    # NB: we do NOT detrend the yields anymore, since fgap is not supposed to be
    # representative of multi-annual gap
    obs_yields = yields_dict[crop_name][NUTS_no]
    return None
#-------------------------------------------------------------------------------
	# if there were no reported yield on the year X, we skip that region
    if (year not in obs_yields[1]):
        print 'No reported yield, we have to gap-fill later'
        filename = os.path.join(optimidir,'fgap_%s_tobegapfilled.pickle'%NUTS_no)
        outlist = [NUTS_no, 2, 1., selected_grid_cells]
        pickle_dump(outlist, open(filename,'wb'))
        return 1.
#-------------------------------------------------------------------------------
    # NB: in the optimization routine, we use the observed cultivation
    # fraction of the crop to calculate the soil cultivated areas, and
    # thus to compute the regional yields (= weighted average of yields
    # using soil cultivated areas)

    # if the observed cultivated fraction is zero, we skip that region
    selected_soil_types = select_soils(crop_no,[g for g,a in selected_grid_cells],
                                       CGMSdir, method=selec_method, n=nsoils)
    print 'we selected grid cell %i, top %i soil types, for optimization'%(
                                              selected_grid_cells[0][0], nsoils)

#-------------------------------------------------------------------------------
    # we set the optimization code (gives us info on how we optimize)
    opti_code = 1 # 1= observations are available for optimization
                  # 2= no obs available 

    #print obs_yields[1], obs_yields[0]
    # in all other cases, we optimize the yield gap factor
    optimum = optimize_regional_yldgapf_dyn(NUTS_no, obs_yields,
                                                            crop_no,
                                                            selected_grid_cells,
                                                            selected_soil_types,
                                                            weather,
                                                            CGMSdir,
                                                            [year],
                                                            obs_type='yield',
                                                            plot_rmse=False)

    # pickle the information per NUTS region
    outlist = [NUTS_no, opti_code, optimum, selected_grid_cells]
    filename = os.path.join(optimidir,'fgap_%s_optimized.pickle'%NUTS_no)
    pickle_dump(outlist, open(filename,'wb'))

    return optimum

#===============================================================================
# Function to optimize the regional yield gap factor using the difference
# between the regional simulated and the observed harvest or yield (ie. 1 gap to
# optimize per NUTS region). This function iterates dynamically to find the
# optimum YLDGAPF.
def optimize_regional_yldgapf_dyn(NUTS_no_, detrend, crop_no_, 
    selected_grid_cells_, selected_soil_types_, weather, inputdir, opti_years_, 
    obs_type='yield', plot_rmse=False):
#===============================================================================

    import math
    from operator import itemgetter as operator_itemgetter
    from matplotlib import pyplot as plt
    from pcse.models import Wofost71_WLP_FD
    from pcse.base_classes import WeatherDataProvider
    from pcse.fileinput.cabo_weather import CABOWeatherDataProvider

    # aggregated yield method:
    
    # 2- we construct a 2D array with same dimensions as TSO_regional,
    # containing the observed yields
    row = [] # this list will become the row of the 2D array
    for y,year in enumerate(opti_years_):
        index_year = np.argmin(np.absolute(detrend[1]-year))
        row = row + [detrend[0][index_year]]
    OBS = np.tile(row, (5,1)) # repeats the list as a row 3 times, to get a 
                              # 2D array

    # 3- we calculate all the individual yields from the selected grid cells x
    # soils combinations

    # NB: we explore the range of yldgapf between 0.1 and 1.
    f0  = 0.
    f2  = 0.5
    f4  = 1.
    f_step  = 0.25 
    # Until the precision of the yield gap factor is good enough (i.e. < 0.02)
    # we loop over it. We do 12 iterations in total with this method.
    iter_no = 0
    RMSE_stored = list()
    while (f_step >= 0.02):

        iter_no = iter_no + 1
        # sub-method: looping over the yield gap factors

        # we build a range of 3 yield gap factors to explore one low bound, one
        # high bound, one in the middle
        f_step = (f4 - f0)/4.
        f1 = f0 + f_step
        f3 = f2 + f_step
        f_range = [f0, f1, f2, f3, f4]

        RES = [] # list in which we will store the yields of the combinations

        counter=0
        for grid, arable_land in selected_grid_cells_:
 
            frac_arable = arable_land / 625000000.

            # Retrieve the weather data of one grid cell (all years are in one
            # file) 
            if (weather == 'CGMS'):
                filename = os.path.join(inputdir,'weather_objects/',
                           'weatherobject_g%d.pickle'%grid)
                weatherdata = WeatherDataProvider()
                weatherdata._load(filename)
            if (weather == 'ECMWF'):
                weatherdata = CABOWeatherDataProvider('%i'%grid,fpath=ECMWFdir)
                        
            # Retrieve the soil data of one grid cell (all possible soil types) 
            filename = os.path.join(inputdir,'soildata_objects/',
                       'soilobject_g%d.pickle'%grid)
            soil_iterator = pickle_load(open(filename,'rb'))

            for smu, stu_no, weight, soildata in selected_soil_types_[grid]:

                # TSO will store all the yields of one grid cell x soil 
                # combination, for all years and all 3 yldgapf values
                TSO = np.zeros((len(f_range), len(opti_years_)))

                counter +=1
        
                for y, year in enumerate(opti_years_): 

                    # Retrieve yearly data 
                    filename = os.path.join(inputdir,
                               'timerdata_objects/%i/c%i/'%(year,crop_no_),
                               'timerobject_g%d_c%d_y%d.pickle'\
                                                           %(grid,crop_no_,year))
                    timerdata = pickle_load(open(filename,'rb'))
                    filename = os.path.join(inputdir,
                               'cropdata_objects/%i/c%i/'%(year,crop_no_),
                               'cropobject_g%d_c%d_y%d.pickle'\
                                                           %(grid,crop_no_,year))
                    cropdata = pickle_load(open(filename,'rb'))
                    if str(grid).startswith('1'):
                        dum = str(grid)[0:2]
                    else:
                        dum = str(grid)[0]
                    filename = os.path.join(inputdir,
                               'sitedata_objects/%i/c%i/grid_%s/'
                                                          %(year,crop_no_,dum),
                               'siteobject_g%d_c%d_y%d_s%d.pickle'\
                                                   %(grid,crop_no_,year,stu_no))
                    sitedata = pickle_load(open(filename,'rb'))

                    for f,factor in enumerate(f_range):
            
                        cropdata['YLDGAPF']=factor
                       
                        # run WOFOST
                        wofost_object = Wofost71_WLP_FD(sitedata, timerdata,
                                                soildata, cropdata, weatherdata)
                        wofost_object.run_till_terminate()
        
                        # get the yield (in kgDM.ha-1) 
                        TSO[f,y] = wofost_object.get_variable('TWSO')

                    #print grid, stu_no, year, counter, [y[0] for y in TSO], OBS[0]
                RES = RES + [(grid, stu_no, weight*frac_arable, TSO)]

        # 4- we aggregate the yield or harvest into the regional one with array
        # operations

        sum_weighted_vals = np.zeros((len(f_range), len(opti_years_)))
                                    # empty 2D array with same dimension as TSO
        sum_weights       = 0.
        for grid, stu_no, weight, TSO in RES:
            # adding weighted 2D-arrays in the empty array sum_weighted_yields
            # NB: variable 'weight' is actually the cultivated area in m2
            sum_weighted_vals   = sum_weighted_vals + (weight/10000.)*TSO 
            # computing the total sum of the cultivated area in ha 
            sum_weights         = sum_weights       + (weight/10000.) 

        if (obs_type == 'harvest'):
            TSO_regional = sum_weighted_vals / 1000000. # sum of the individual 
                                                        # harvests in 1000 tDM
        elif (obs_type == 'yield'):
            TSO_regional = sum_weighted_vals / sum_weights # weighted average of 
                                                        # all yields in kgDM/ha

        # 5- we compute the (sim-obs) differences.
        DIFF = TSO_regional - OBS
        if (TSO_regional[-1][0] <= 0.):
            print 'WARNING: no simulated crop growth. We set the optimum fgap to 1.'
            return 1.
        if (TSO_regional[-1] <= OBS[-1]):
            print 'WARNING: obs yield > sim yield. We set optimum to 1.'
            return 1.
        
        # 6- we calculate the RMSE (root mean squared error) of the 3 yldgapf
        # The RMSE of each yldgapf is based on N obs-sim differences for the N
        # years looped over

        RMSE = np.zeros(len(f_range))
        for f,factor in enumerate(f_range):
            list_of_DIFF = []
            for y, year in enumerate(opti_years_):
                list_of_DIFF = list_of_DIFF + [DIFF[f,y]]
            RMSE[f] = np.sqrt(np.mean( [ math.pow(j,2) for j in
                                                           list_of_DIFF ] ))
        #print RMSE, f_range
        # We store the value of the RMSE for plotting purposes
        RMSE_stored = RMSE_stored + [(f_range[1], RMSE[1]), (f_range[3], RMSE[3])]
        if (iter_no == 1):
            RMSE_stored = RMSE_stored + [(f_range[0], RMSE[0]), 
                                         (f_range[2], RMSE[2]),
                                         (f_range[4], RMSE[4])]

        # 7- We update the yldgapf range to explore for the next iteration. 
        # For this we do a linear interpolation of RMSE between the 3 yldgapf
        # explored here, and the next range to explore is the one having the
        # smallest interpolated RMSE

        index_new_center = RMSE.argmin()
        # if the optimum is close to 1:
        if index_new_center == len(f_range)-1:
            f0 = f_range[index_new_center-2]
            f2 = f_range[index_new_center-1]
            f4 = f_range[index_new_center]
        # if the optimum is close to 0:
        elif index_new_center == 0:
            f0 = f_range[index_new_center]
            f2 = f_range[index_new_center+1]
            f4 = f_range[index_new_center+2]
        else:
            f0 = f_range[index_new_center-1]
            f2 = f_range[index_new_center]
            f4 = f_range[index_new_center+1]

	# when we are finished iterating on the yield gap factor range, we plot the
    # RMSE as a function of the yield gap factor
    if (plot_rmse == True):
        RMSE_stored  = sorted(RMSE_stored, key=operator_itemgetter(0))
        x,y = zip(*RMSE_stored)
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5,5))
        fig.subplots_adjust(0.15,0.16,0.95,0.96,0.4,0.)
        ax.plot(x, y, c='k', marker='o')
        ax.set_xlabel('yldgapf (-)')
        ax.set_ylabel('RMSE')
        fig.savefig('%s_opti_fgap.png'%NUTS_no_)
        #pickle_dump(RMSE_stored,open('%s_RMSE.pickle'%NUTS_no_,'wb'))

    # 8- when we are finished iterating on the yield gap factor range, we return
    # the optimum value. We look for the yldgapf with the lowest RMSE
    index_optimum   = RMSE.argmin()
    optimum_yldgapf = f_range[index_optimum] 

    print 'optimum found: %.2f +/- %.2f'%(optimum_yldgapf, f_step)

    # 10- we return the optimized YLDGAPF
    return optimum_yldgapf


#===============================================================================
def str_to_bool(s):
#===============================================================================
    if s.strip(' ') == 'True':
         return True
    elif s.strip(' ') == 'False':
         return False
    else:
         raise ValueError

#===============================================================================
if __name__=='__main__':
    main()
                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append([tb_count, rel_count, event_count, issue_count])
            except Exception, e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path, 'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError, e:
            Messager.warning("Could not write statistics cache file to directory %s: %s" % (directory, e))

    return stat_types, docstats

# TODO: Testing!
Exemple #40
0
 def save_pickle():
     with open(pickle_path, 'wb') as pickle_file:
         pickle_dump(pickle, pickle_file, HIGHEST_PROTOCOL)
Exemple #41
0
def main():
    #===============================================================================

    cwdir = os.getcwd()
    SIBrundir = os.path.join(cwdir, 'forward_runs')

    sites = ['BE-Lon', 'DE-Kli', 'FR-Gri', 'IT-BCi', 'NL-Dij', 'NL-Lan']

    # time axis of all time series
    tm = pd.date_range('2000-01-01 00:00:00', '2010-12-31 23:59:59', freq='1d')
    series = dict()

    # strange thing is: SiBCASA ignores leap years and does not simulate 29 feb
    # we have to delete 3 dates on 3 leap years between 2000 and 2010
    new_tm = tm[0:59].union(tm[60:1520].union(tm[1521:2981].union(tm[2982:])))
    print new_tm, len(new_tm)

    for site in sites:

        # open all the years and store in one list
        namefile = '%s_2000-2010/' % (site) + 'hsib_*.qp2.nc'
        pathfile = os.path.join(SIBrundir, namefile)

        # open all 11 years * 12 files
        f = cdf.MFDataset(pathfile)
        # get daily GPP and NEE (in micromoles/m2/s) and convert
        # the fluxes to gC/m2/d:
        fac = 0.000001 * 12.  # conversion from micromoles to gC
        dt = 3600. * 24.  # nb of seconds in a day
        Sib_gpp = np.array(-f.variables['gpp'][:]) * fac * dt
        Sib_ter = np.array(f.variables['resp_tot'][:]) * fac * dt
        Sib_rhet = np.array(f.variables['resp_het'][:]) * fac * dt
        Sib_raut = np.array(f.variables['resp_auto'][:]) * fac * dt
        Sib_nee = np.array(f.variables['NEE_2'][:]) * fac * dt
        # from moles/m2 to gC/m2
        Sib_csoil = np.array(f.variables['carb_soil'][:]) * fac * 1000000. * dt
        # close file
        f.close()

        series[site] = dict()
        series[site]['GPP'] = pd.Series([l[0] for l in Sib_gpp], index=new_tm)
        series[site]['TER'] = pd.Series([l[0] for l in Sib_ter], index=new_tm)
        series[site]['Rhet'] = pd.Series([l[0] for l in Sib_rhet],
                                         index=new_tm)
        series[site]['Raut'] = pd.Series([l[0] for l in Sib_raut],
                                         index=new_tm)
        series[site]['NEE'] = pd.Series([l[0] for l in Sib_nee], index=new_tm)

        fig, ax = plt.subplots(nrows=1, ncols=1)
        fig.suptitle(site, fontsize=14)
        series[site]['GPP'].plot(label='GPP')
        series[site]['TER'].plot(label='TER')
        series[site]['Rhet'].plot(label='Rhet')
        series[site]['Raut'].plot(label='Raut')
        series[site]['NEE'].plot(label='NEE')
        ax.legend()

    # store the formatted pandas timeseries in a pickle file
    filepath = os.path.join(SIBrundir, 'timeseries_SiBCASA.pickle')
    pickle_dump(series, open(filepath, 'wb'))

    # preview the timeseries per site
    plt.show()
Exemple #42
0
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append(
                            [tb_count, rel_count, event_count, issue_count])
            except Exception, e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path.decode('utf-8').encode('utf-8'),
                      'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError, e:
            Messager.warning(
                "Could not write statistics cache file to directory %s: %s" %
                (directory, e))

    return stat_types, docstats


# TODO: Testing!
def main():
    #===============================================================================
    global outputdir, obsdir
    #-------------------------------------------------------------------------------
    # ================================= USER INPUT =================================

    # read the settings from the rc file
    rcdict = rc.read('settings.rc')

    #===============================================================================
    #-------------------------------------------------------------------------------
    # extract the needed information for that script
    sites = [s.strip(' ') for s in rcdict['sites'].split(',')]
    years = [s.strip(' ') for s in rcdict['years'].split(',')]
    TER_method = rcdict['TER_method']
    R10 = rcdict['R10']
    resolution = rcdict['resolution']  # can be hourly or daily
    if resolution == 'daily': res = '1d'
    elif resolution == '3-hourly': res = '3H'

    # directory paths
    outputdir = rcdict['outputdir']
    obsdir = rcdict['obsdir']
    forwardir = os.path.join(outputdir, 'forward_runs')

    #-------------------------------------------------------------------------------
    # load the WOFOST runs of all crops

    # we store the two pandas series in one pickle file
    filepath = os.path.join(forwardir,'%s_timeseries_'%resolution+\
                            '%s_WOFOST.pickle'%TER_method)
    series = pickle_load(open(filepath, 'rb'))

    filepath = os.path.join(obsdir, 'daily_timeseries_OBS.pickle')
    obs = pickle_load(open(filepath, 'rb'))

    final_series = dict()

    for s, site in enumerate(sites):
        print site
        print obs[site].keys()
        final_series[site] = dict()

        # read the crop rotation from FluxNet file
        rotation = obs[site]['crop_no']

        # slice each year's required time series, append to final series
        for varname in ['GPP', 'TER', 'Raut', 'Rhet', 'NEE']:
            print 'variable %s' % varname
            var = []
            for year in years:

                # get the crop number for that year
                if site != 'IT-BCi':
                    try:
                        crop_no = rotation[year:year][0]
                    except IndexError:  # index error occurs when the year is
                        # not in the rotation time series
                        startdate = '%s-01-01 00:00:00' % year
                        enddate = '%s-12-31 23:59:59' % year
                        dtimes = pd.date_range(startdate, enddate, freq=res)
                        na_vals = np.array(len(dtimes) * [np.nan])
                        var += [pd.Series(na_vals, index=dtimes)]
                        print '   ', site, year, 'unknown crop cover: skip.'
                        continue
                elif site == 'IT-BCi':
                    if int(year) not in np.arange(2004, 2010, 1):
                        startdate = '%s-01-01 00:00:00' % year
                        enddate = '%s-12-31 23:59:59' % year
                        dtimes = pd.date_range(startdate, enddate, freq=res)
                        na_vals = np.array(len(dtimes) * [np.nan])
                        var += [pd.Series(na_vals, index=dtimes)]
                        print '   ', site, year, 'unknown crop cover: skip.'
                        continue
                    else:
                        crop_no = 2

                # try slicing and concatenating that year's timeseries from file
                try:
                    # if the GPP = 0 (failed growing season), we set TER and
                    # NEE to zero as well
                    if np.mean(series[site]['c%i' %
                                            crop_no]['GPP'][year:year]) == 0.:
                        startdate = '%s-01-01 00:00:00' % year
                        enddate = '%s-12-31 23:59:59' % year
                        dtimes = pd.date_range(startdate, enddate, freq=res)
                        zeros = np.array(len(dtimes) * [0.])
                        var += [pd.Series(zeros, index=dtimes)]
                    else:
                        var += [
                            series[site]['c%i' % crop_no][varname][year:year]
                        ]
                    print '   ', site, year, '%2i' % crop_no, 'slicing'
                except KeyError:  # key error occurs when we haven't ran a crop
                    # or a year with WOFOST
                    startdate = '%s-01-01 00:00:00' % year
                    enddate = '%s-12-31 23:59:59' % year
                    dtimes = pd.date_range(startdate, enddate, freq=res)
                    na_vals = np.array(len(dtimes) * [np.nan])
                    var += [pd.Series(na_vals, index=dtimes)]
                    print '   ', site, year, '%2i' % crop_no, 'skip.'

            final_series[site][varname] = pd.concat(var)
        #final_series[site]['GPP'].plot()
        #plt.show()

    # store the final WOFOST timeseries
    filepath = os.path.join(outputdir,'%s_timeseries_'%resolution+\
               '%s_R10=%s_WOFOST_crop_rotation.pickle'%(TER_method,R10))
    pickle_dump(final_series, open(filepath, 'wb'))
    print 'successfully dumped %s' % filepath