def validate(self, station): debug = self.debug detail = self.detail if not station['active'] in ('Y','O'): return None station['index'] = N.where(self.sids_column == station['sid'])[0] station_info = '%(ucanid)d : %(sid)s : %(name)s' % station if debug and detail: print '\n***** processing', station_info elif self.test_run: print 'processing', station_info reportable_datasets = \ [ name for name in stringToTuple(station['datasets']) if name in self.reportable_datasets ] reportable_datasets.sort() num_datasets = len(reportable_datasets) if debug and detail: print 'reportable datasets', num_datasets, reportable_datasets # look for datasets with missing data missing_data = [ ] last_valid_hour = -1 valid_hour_set = set() # make connection to UCAN server connection = HourlyDataConnection(2, first_hour_in_day=1) for dataset_name in reportable_datasets: try: first_hour, last_hour, data = \ connection.getData(station, dataset_name, self.start_time, self.end_time, detail) except Exception as e: print '\n\n%s' % '\n'.join(e.args) if "UnknownUcanId" in e.__class__.__name__: print '\n' break else: continue if debug and detail: print '\n', first_hour, last_hour, len(N.where(N.isfinite(data))[0]) print data if len(data) > 0: valid_hours = N.where(N.isfinite(data))[0] if len(valid_hours) > 0: valid_hour_set |= set(valid_hours) else: missing_data.append(dataset_name) else: missing_data.append(dataset_name) station['reportable_datasets'] = reportable_datasets station['missing_datasets'] = missing_data station['valid_hours'] = valid_hour_set kwargs = { 'debug' : debug, 'reportable_data' : reportable_datasets } return self.decisionTree(self, station, **kwargs)
def _parseMetadata(self, metadata, manager=None): if isinstance(metadata, basestring): if metadata == 'all': if manager is not None: return tuple(manager.listDatasets()) else: return RAW_DATA_ELEMENTS else: return stringToTuple(metadata) elif isinstance(metadata, (list, tuple)): return tuple(metadata) else: errmsg = "'metadata' argument is an invalid type: %s" raise TypeError, errmsg % type(metadata)
dest='state', default=None) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # procmsg = '\nProcessing station %d of %d : %d : %s (%s)' skipmsg = '\nSkipping station %d of %d : %d : %s (%s)' debug = options.debug replace_existing = options.replace_existing report_missing = options.report_missing seq_count_cutoff = options.sequence_count_cutoff if options.datasets != 'all': datasets = list(stringToTuple(options.datasets)) else: datasets = None factory = ObsnetDataFactory(options) stations = factory.argsToStationData(args, options) total_stations = len(stations) buddy_locator = BuddyLocator(factory, options.min_buddy_years, options.max_buddies, options.max_buddy_distance) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - exceptions_encountered = [] station_num = 0
parser.add_option('--state', action='store', type='string', dest='state', default=None) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # procmsg = '\nProcessing station %d of %d : %d : %s (%s)' skipmsg = '\nSkipping station %d of %d : %d : %s (%s)' debug = options.debug elements = options.elements if elements != 'all': elements = stringToTuple(elements) replace_existing = options.replace_existing factory = ObsnetDataFactory(options) stations = factory.argsToStationData(args, options, 'all') total_stations = len(stations) station_num = 0 for station in stations: if 'id' in station: station['sid'] = station['id'] del station['id'] ucanid = station['ucanid'] station_id = station['sid'] station_name = station['name'] station_num += 1
dest='network', default=None) parser.add_option('--state', action='store', type='string', dest='state', default=None) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # procmsg = '\nProcessing station %d of %d : %d : %s (%s)' skipmsg = '\nSkipping station %d of %d : %d : %s (%s)' debug = options.debug if options.elements == 'all': elements = list(MIN_RUN_LENGTHS.keys()) else: elements = list(stringToTuple(options.elements)) factory = ObsnetDataFactory(options) stations = factory.argsToStationData(args, options, 'all') total_stations = len(stations) station_num = 0 for station in stations: station_num += 1 ucanid = station['ucanid'] if 'id' in station: station['sid'] = station['id'] del station['id'] # hourly data file must already exist
default=None) parser.add_option('--state', action='store', type='string', dest='state', default=None) parser.add_option('-m', action='store', type='string', dest='metadata', default='all') parser.add_option('-o', action='store', type='string', dest='output_format', default='dump') parser.add_option('-s', action='store', type='string', dest='sort_by', default='network,ucanid') options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # search_keys = ('active','bbox','county','network','state') metadata = stringToTuple(options.metadata) sort_by = stringToTuple(options.sort_by) index_metadata = tuple(set(metadata) | set(sort_by)) sort_by_template = getSortBy(*sort_by) def sortBy(station): return sort_by_template % station if len(args) > 0: filepath = os.path.abspath(os.path.normpath(args[0])) path, ext = os.path.splitext(filepath) output_format = ext[1:] if output_format == 'py': output_format = 'dump' else: output_format = options.output_format fileroot = 'indexed_metadata_summary'
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # debug = options.debug duration_days = options.duration_days days_cushion = duration_days / 2 if duration_days == days_cushion * 2: raise ValueError, 'Value of --dd option must be an odd number.' duration_hours = options.duration_hours hours_cushion = duration_hours / 2 if duration_hours == hours_cushion * 2: raise ValueError, 'Value of --dh option must be an odd number.' if options.elements is None: elements = CALC_STATS_FOR else: elements = stringToTuple(options.elements) min_year_span = options.min_year_span percent_missing = options.percent_missing report_rate = options.report_rate replace_existing = options.replace_existing max_sample_size = durtions_days * durations_hours min_sample_size = int(max_sample_size * (1.0 - (percent_missing / 100.))) rel_hours_cushion = relativedelta(hours=hours_cushion) # create a factory, then use it to get the list of stations factory = ObsnetDataFactory(options) if len(args) > 0: ucanids = [int(arg) for arg in args] else: criteria = factory._validCriteria(options, SEARCH_KEYS)
day = int(args[2]) if len(args) > 3: first_hour_in_day = int(args[3]) end_time = datetime(year, month, day, 23) start_time = datetime(year, month, day, 0) else: date = datetime.now() - ONE_DAY end_time = datetime(date.year, date.month, date.day, 23) start_time = datetime(date.year, date.month, date.day, 0) end_time_as_int = dateAsInt(end_time, True) date_str = end_time.strftime('%B %d, %Y') prev_date = dateAsInt(end_time - ONE_DAY, True) debug = options.debug verbose = options.verbose if verbose: print 'verbose debug output requested' networks = stringToTuple(options.networks) test_run = options.test_run update_index = options.update_index # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # create factory and get required data from index file factory = ObsnetDataFactory(options) index_manager = factory.getFileManager('index', 'r') sids_column = index_manager.getData('sid') last_report_column = index_manager.getData('last_report') active_status_column = index_manager.getData('active') index_manager.closeFile() validation_manager =\ ValidationManager(decisionTree, start_time, end_time, networks,
parser.add_option('-w', action='store', type='string', dest='working_dir', default=None) parser.add_option('-y', action='store_true', dest='test', default=False) parser.add_option('-z', action='store_true', dest='debug', default=False) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # debug = options.debug default_network = options.default_network separator = options.separator if options.columns_to_save is None or options.columns_to_save == 'all': columns_to_save = INDEX_KEYS else: columns_to_save = stringToTuple(options.columns_to_save) if 'index' in columns_to_save: columns_to_save = list(columns_to_save) indx = columns_to_save.index('index') del columns_to_save[indx] columns_to_save = tuple(set(columns_to_save) | set(INDEX_KEYS)) elif 'ucanid' not in columns_to_save: columns_to_save = list(columns_to_save) columns_to_save.insert(0,'ucanid') columns_to_save = tuple(columns_to_save) print 'columns_to_save', columns_to_save input_filepaths = [ ] for arg in args: input_filepaths.append(os.path.normpath(arg))
dest='sort_by', default='ucanid') parser.add_option('-w', action='store', type='string', dest='working_dir', default=None) parser.add_option('-y', action='store_true', dest='test', default=False) parser.add_option('-z', action='store_true', dest='debug', default=False) options, args = parser.parse_args() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # match_key = options.match_key metadata = stringToTuple(options.metadata) if ',' in args[0]: dataset_keys = args[0].split(',') else: dataset_keys = [args[0], args[0]] input_filepath = os.path.abspath(args[1]) if ',' in input_filepath: data_key, input_filepath = input_filepath.split(',') else: data_key = match_key input_file = open(input_filepath, 'r') data = eval(input_file.read().strip().replace('\n', '')) input_file.close()