def export_to_cache(): for site in SITE_LIST: cache.init_cache(CACHE_DATABASE_FILE, ECHO_SQLALCHEMY) # print "cache._cache size: %s" % reduce( # (lambda x, y: x+y), (len(v) for v in cache._cache.values())) cache.clear_memory_cache() # print "gc.get_count(): [%s, %s, %s]" % gc.get_count() # print "garbage collecting..." gc.collect() # print "gc.get_count(): [%s, %s, %s]" % gc.get_count() # print "cache._cache size: %s" % reduce( # (lambda x, y: x+y), (len(v) for v in cache._cache.values())) commit_data_for_site(site)
def export_cache(): global_site_index = 1 for database_file in CACHE_DATABASE_FILES: cache.init_cache(os.path.join(CACHE_DIR, database_file), ECHO_SQLALCHEMY) # create an index on timeseries values if it doesn't exist try: i = Index('ix_value_timeseries_id', cache.DBValue.__table__.c.timeseries_id) i.create(cache.engine) except OperationalError: pass sources = cache.db_session.query(cache.DBSource).all() for source in sources: global_site_index = export_source(source, global_site_index)
def convert_to_pyhis(): """convert the tceq database to a pyhis database""" # if os.path.exists(CACHE_DATABASE_FILE): # print ("Hold up.. %s already exists. You need to delete or " # "rename it before continuing." % CACHE_DATABASE_FILE) # sys.exit(1) # create_pyhis_sites(stations, file_source) found = False cache.init_cache(CACHE_DATABASE_FILE, ECHO_SQLALCHEMY) for tceq_parameter_code in WDFT_PARAMETERS: file_source = cache.CacheSource(url=TCEQ_SOURCE) parameter = tceq_session.query(Parameter).filter_by( parameter_code=tceq_parameter_code).one() results_query = parameter.results.filter_by(gtlt='') results_count = results_query.count() wdft_parameter_code = WDFT_PARAMETERS[tceq_parameter_code][0] wdft_parameter_name = PARAMETERS_DICT[wdft_parameter_code] tceq_units_code = WDFT_PARAMETERS[tceq_parameter_code][1] wdft_converted_units_code = UNITS_DICT[tceq_units_code][1] wdft_converted_units_name = UNITS_DICT[wdft_converted_units_code][0] conversion_func = UNITS_DICT[tceq_units_code][2] if not conversion_func: conversion_func = lambda x: x print("converting %s values for param: %s (%s)" % ( results_count, tceq_parameter_code, wdft_parameter_name)) units = cache.CacheUnits( code=wdft_converted_units_code, abbreviation=wdft_converted_units_code, name=wdft_converted_units_name) variable = cache.CacheVariable( units=units, name=wdft_parameter_name, code=wdft_parameter_code, vocabulary=TCEQ_VOCABULARY) param_total = 0 param_count = results_count for result in page_query(results_query): if len(cache.db_session.new) > 5000: param_total += len(cache.db_session.new) cache.db_session.commit() print("committing %s of %s" % (param_total, param_count)) if result.gtlt != '': logger.warning ("result being thrown out, gtlt value. " "result id: %s" % result.id) continue if not result.event: logger.warning("no event found for orphaned result: %s, %s" % (result.id, result.tag_id)) continue event = result.event try: station = tceq_session.query(Station).filter_by(tceq_station_id=event.station_id).one() except NoResultFound: logger.warning("station not found for event %s, station_id %s: " % (event.id, event.station_id)) continue if getattr(event, 'start_date', None) and \ getattr(event, 'start_time', None): timestamp = datetime.datetime.combine(event.start_date, event.start_time) elif getattr(event, 'end_date', None) and \ getattr(event, 'end_time', None): timestamp = datetime.datetime.combine(event.end_date, event.end_time) else: logger.warning("event being thrown out, could not determine " "timestamp. event tag_id: %s" % event.tag_id) continue site = cache.db_session.query(cache.DBSite).filter_by( latitude=station.latitude, longitude=station.longitude).first() if not site: site = cache.CacheSite( site_id=station.tceq_station_id, code=station.tceq_station_id, name=station.short_description, network=TCEQ_NETWORK, source=file_source, latitude=station.latitude, longitude=station.longitude, auto_commit=False, auto_add=True) timeseries = cache.CacheTimeSeries( site=site, variable=variable, auto_commit=False, auto_add=True) value = cache.DBValue( timestamp=timestamp, value=conversion_func(result.value), timeseries=timeseries) cache.db_session.commit()