print errmsg % (ucanid, filepath) continue # gotta quit if there is no hourly data else: filepath = factory.getFilepathForUcanid(ucanid, 'statistics') if os.path.exists(filepath): if not replace_existing: print skipmsg % (station_num, total_stations, ucanid, station_id, station_name) errmsg = 'Statistics database for station %d already exists : %s' print errmsg % (ucanid, filepath) continue else: os.remove(filepath) # create a new statistics file and initialize with station attributes if not os.path.exists(filepath): manager = factory.getFileManager((ucanid, 'hours'), mode='r') attrs = manager.getFileAttributes() manager.closeFile() manager = factory.getFileManager((ucanid, 'statistics'), mode='w') manager.setFileAttributes(**attrs) manager.closeFile() del manager # we're going to process this station announce = procmsg % (station_num, total_stations, ucanid, station_id, station_name) print announce # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if debug: print 'historical extremes for %d' % ucanid
type='string', dest='working_dir', default=None) parser.add_option('--numyears', action='store', type='int', dest='num_years', default=15) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # factory = ObsnetDataFactory(options) index_manager = factory.getFileManager('index', mode='r') max_buddies = options.max_buddies min_num_years = options.num_years min_num_hours = min_num_years * (365 * 24) networks = index_manager.getData('network') potential_buddies = N.where((networks == 'icao') | (networks == 'cu_log')) num_potential_buddies = len(potential_buddies[0]) ucanids = index_manager.getData('ucanid') potential_buddy_ids = ucanids[potential_buddies] if len(args) > 0: ucanids = tuple(ucanids) need_buddies = [ucanids.index(int(arg)) for arg in args] need_buddies.sort()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # test_run = options.test if test_run: debug = True update = False else: debug = options.debug update = options.update max_attempts = options.max_attempts # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - factory = ObsnetDataFactory(options) index_manager = factory.getFileManager('index', mode='r') index_datasets, datasets_attrs = index_manager.getData('datasets',True) index_ucanids = index_manager.getData('ucanid') num_stations = len(index_ucanids) networks = index_manager.getData('network') sids = index_manager.getData('sid') index_manager.closeFile() del index_manager # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - num_changed = 0 ucanid_list = tuple(index_ucanids) attempt = 1 do_over = [ ]
type='string', dest='working_dir', default=None) parser.add_option('-y', action='store_true', dest='test', default=False) parser.add_option('-z', action='store_true', dest='debug', default=False) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # factory = ObsnetDataFactory(options) sort_by = args[0] # get all stations currently in the index file manager = factory.getFileManager('index', 'r') column_names = manager.listDatasets() column_created = {} columns = {} for name in column_names: column_created[name] = manager.getDatasetAttribute(name, 'created') columns[name] = [] stations = list(factory.getIndexedStations(column_names, None, sort_by)) manager.closeFile() del manager for station in stations: for name in column_names: columns[name].append(station.get(name, MISSING[name])) del stations
for key in INDEX_KEYS: arrays[key] = [ ] # populate the index arrays with station data for station in stations: for key in INDEX_KEYS: if key in station: arrays[key].append(station[key]) else: arrays[key].append(MISSING[key]) # save the index arrays to the index file factory = ObsnetDataFactory(options) if factory.fileExists('index'): factory.backupIndexFile() index_manager = factory.getFileManager('index', mode='w') for key, dataset in INDEX.items(): print 'creating array for', key data = N.array(arrays[key], dtype=dataset.data_type) attrs = { 'missing' : dataset.missing, 'description' : dataset.description, } if key in ('lon','lat','elev'): valid = data[N.where(N.isfinite(data))] attrs['min'] = N.min(valid) attrs['max'] = N.max(valid) if dataset.units is not None: attrs['units'] = dataset.units index_manager.createDataset(key, data, attrs)
update_datasets = {} for arg in args[1:]: # the arg may be a mapping of the dataset name used in the dump file # to the dataset name expected by the index file if ':' in arg: dump_key, index_key = arg.split(':') update_datasets[dump_key] = index_key else: update_datasets[arg] = arg # the list of columns to update was bnot on the command line else: update_datasets = None # open the index dataset and get the current set of keys factory = ObsnetDataFactory(options) index_manager = factory.getFileManager('index', 'r') existing_index_keys = list(index_manager.listDatasets()) if debug: print '\nexisting keys', existing_index_keys # get the master index dataset index_key_dataset, index_key_attrs = index_manager.getData( master_index_key, True) index_key_dataset_size = len(index_key_dataset) # open the dump file and read the first line dump_file = open(dump_filepath, 'r') update_dict = eval(dump_file.readline()) update_index = N.where(index_key_dataset == update_dict[master_index_key]) # if the list of datasets to update was not passed on the command line # we need to create one from the list of keys in the dump file
action='store', type='string', dest='working_dir', default=None) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # if len(args) > 0: by_ucanid = tuple([int(arg) for arg in args]) else: by_ucanid = None factory = ObsnetDataFactory(options) index_manager = factory.getFileManager('index', mode='r') max_buddies = options.max_buddies networks = index_manager.getData('network') icao = N.where(networks == 'icao') not_icao = N.where(networks != 'icao') del networks lats = index_manager.getData('lat') icao_lats = lats[icao] lats = lats[not_icao] lons = index_manager.getData('lon') icao_lons = lons[icao] lons = lons[not_icao]
ucan = UcanConnection(None, days_per_request) filepath = factory.getFilepathForUcanid(station['ucanid'], 'hours') if os.path.exists(filepath): if replace_existing: os.remove(filepath) else: print skipmsg % (station_num, total_stations, station['ucanid'], station['sid'], station['name']) continue # we're going to process this station print procmsg % (station_num, total_stations, station['ucanid'], station['sid'], station['name']) # get a manager for the new file manager = factory.getFileManager(filepath, 'w') manager.setFileAttribute('created', manager._timestamp()) manager.setFileAttributes(**station) available_elements = NETWORKS[station['network']].elements elements = [elem for elem in all_elements if elem in available_elements] earliest_hour = (9999, 99, 99, 99) latest_hour = (0, 0, 0, 0) dewpt_data = None rhum_data = None temp_data = None for element in elements: dtype, missing_value, units, tsv_name, tsv_units =\