Example #1
0
    def validate(self, station):
        debug = self.debug
        detail = self.detail

        if not station['active'] in ('Y','O'): return None

        station['index'] = N.where(self.sids_column == station['sid'])[0]
        station_info = '%(ucanid)d : %(sid)s : %(name)s' % station
        if debug and detail:
            print '\n***** processing', station_info
        elif self.test_run: print 'processing', station_info

        reportable_datasets = \
            [ name for name in stringToTuple(station['datasets'])
              if name in self.reportable_datasets ]
        reportable_datasets.sort()
        num_datasets = len(reportable_datasets)
        if debug and detail:
            print 'reportable datasets', num_datasets, reportable_datasets

        # look for datasets with missing data
        missing_data = [ ]
        last_valid_hour = -1
        valid_hour_set = set()

        # make connection to UCAN server
        connection = HourlyDataConnection(2, first_hour_in_day=1)
        for dataset_name in reportable_datasets:

            try:
                first_hour, last_hour, data = \
                connection.getData(station, dataset_name, self.start_time,
                                   self.end_time, detail)
            except Exception as e:
                print '\n\n%s' % '\n'.join(e.args)
                if "UnknownUcanId" in e.__class__.__name__:
                    print '\n'
                    break
                else: continue

            if debug and detail:
                print '\n', first_hour, last_hour, len(N.where(N.isfinite(data))[0])
                print data

            if len(data) > 0:
                valid_hours = N.where(N.isfinite(data))[0]
                if len(valid_hours) > 0:
                    valid_hour_set |= set(valid_hours)
                else: missing_data.append(dataset_name)
            else: missing_data.append(dataset_name)

        station['reportable_datasets'] = reportable_datasets
        station['missing_datasets'] = missing_data
        station['valid_hours'] = valid_hour_set
        kwargs = { 'debug' : debug, 'reportable_data' : reportable_datasets }

        return self.decisionTree(self, station, **kwargs)
 def _parseMetadata(self, metadata, manager=None):
     if isinstance(metadata, basestring):
         if metadata == 'all':
             if manager is not None:
                 return tuple(manager.listDatasets())
             else:
                 return RAW_DATA_ELEMENTS
         else:
             return stringToTuple(metadata)
     elif isinstance(metadata, (list, tuple)):
         return tuple(metadata)
     else:
         errmsg = "'metadata' argument is an invalid type: %s"
         raise TypeError, errmsg % type(metadata)
                  dest='state',
                  default=None)

options, args = parser.parse_args()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

procmsg = '\nProcessing station %d of %d : %d : %s (%s)'
skipmsg = '\nSkipping station %d of %d : %d : %s (%s)'

debug = options.debug
replace_existing = options.replace_existing
report_missing = options.report_missing
seq_count_cutoff = options.sequence_count_cutoff
if options.datasets != 'all':
    datasets = list(stringToTuple(options.datasets))
else:
    datasets = None

factory = ObsnetDataFactory(options)
stations = factory.argsToStationData(args, options)
total_stations = len(stations)

buddy_locator = BuddyLocator(factory, options.min_buddy_years,
                             options.max_buddies, options.max_buddy_distance)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

exceptions_encountered = []

station_num = 0
parser.add_option('--state',
                  action='store',
                  type='string',
                  dest='state',
                  default=None)

options, args = parser.parse_args()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

procmsg = '\nProcessing station %d of %d : %d : %s (%s)'
skipmsg = '\nSkipping station %d of %d : %d : %s (%s)'

debug = options.debug
elements = options.elements
if elements != 'all': elements = stringToTuple(elements)
replace_existing = options.replace_existing

factory = ObsnetDataFactory(options)
stations = factory.argsToStationData(args, options, 'all')
total_stations = len(stations)

station_num = 0
for station in stations:
    if 'id' in station:
        station['sid'] = station['id']
        del station['id']
    ucanid = station['ucanid']
    station_id = station['sid']
    station_name = station['name']
    station_num += 1
                      dest='network', default=None)
    parser.add_option('--state', action='store', type='string', dest='state',
                      default=None)

    options, args = parser.parse_args()

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

    procmsg = '\nProcessing station %d of %d : %d : %s (%s)' 
    skipmsg = '\nSkipping station %d of %d : %d : %s (%s)' 

    debug = options.debug
    if options.elements == 'all':
        elements = list(MIN_RUN_LENGTHS.keys())
    else:
        elements = list(stringToTuple(options.elements))

    factory = ObsnetDataFactory(options)
    stations = factory.argsToStationData(args, options, 'all')
    total_stations = len(stations)
    
    station_num = 0
    for station in stations:
        station_num += 1
        ucanid = station['ucanid']

        if 'id' in station:
            station['sid'] = station['id']
            del station['id']

        # hourly data file must already exist
Example #6
0
                  default=None)
parser.add_option('--state', action='store', type='string', dest='state',
                  default=None)

parser.add_option('-m', action='store', type='string', dest='metadata',
                  default='all')
parser.add_option('-o', action='store', type='string', dest='output_format',
                  default='dump')
parser.add_option('-s', action='store', type='string', dest='sort_by',
                  default='network,ucanid')
options, args = parser.parse_args()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

search_keys = ('active','bbox','county','network','state')
metadata = stringToTuple(options.metadata)
sort_by = stringToTuple(options.sort_by)
index_metadata = tuple(set(metadata) | set(sort_by))

sort_by_template = getSortBy(*sort_by)
def sortBy(station):
    return sort_by_template % station

if len(args) > 0:
    filepath = os.path.abspath(os.path.normpath(args[0]))
    path, ext = os.path.splitext(filepath)
    output_format = ext[1:]
    if output_format == 'py': output_format = 'dump'
else:
    output_format = options.output_format
    fileroot = 'indexed_metadata_summary'
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

debug = options.debug
duration_days = options.duration_days
days_cushion = duration_days / 2
if duration_days == days_cushion * 2:
    raise ValueError, 'Value of --dd option must be an odd number.'
duration_hours = options.duration_hours
hours_cushion = duration_hours / 2
if duration_hours == hours_cushion * 2:
    raise ValueError, 'Value of --dh option must be an odd number.'
if options.elements is None:
    elements = CALC_STATS_FOR
else:
    elements = stringToTuple(options.elements)
min_year_span = options.min_year_span
percent_missing = options.percent_missing
report_rate = options.report_rate
replace_existing = options.replace_existing

max_sample_size = durtions_days * durations_hours
min_sample_size = int(max_sample_size * (1.0 - (percent_missing / 100.)))
rel_hours_cushion = relativedelta(hours=hours_cushion)

# create a factory, then use it to get the list of stations
factory = ObsnetDataFactory(options)
if len(args) > 0:
    ucanids = [int(arg) for arg in args]
else:
    criteria = factory._validCriteria(options, SEARCH_KEYS)
Example #8
0
    day = int(args[2])
    if len(args) > 3: first_hour_in_day = int(args[3])
    end_time = datetime(year, month, day, 23)
    start_time = datetime(year, month, day, 0)
else:
    date = datetime.now() - ONE_DAY
    end_time = datetime(date.year, date.month, date.day, 23)
    start_time = datetime(date.year, date.month, date.day, 0)
end_time_as_int = dateAsInt(end_time, True)
date_str = end_time.strftime('%B %d, %Y')
prev_date = dateAsInt(end_time - ONE_DAY, True)

debug = options.debug
verbose = options.verbose
if verbose: print 'verbose debug output requested'
networks = stringToTuple(options.networks)
test_run = options.test_run
update_index = options.update_index

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# create factory and get required data from index file
factory = ObsnetDataFactory(options)
index_manager = factory.getFileManager('index', 'r')
sids_column = index_manager.getData('sid')
last_report_column = index_manager.getData('last_report')
active_status_column = index_manager.getData('active')
index_manager.closeFile()

validation_manager =\
ValidationManager(decisionTree, start_time, end_time, networks,
Example #9
0
parser.add_option('-w', action='store', type='string', dest='working_dir',
                  default=None)
parser.add_option('-y', action='store_true', dest='test', default=False)
parser.add_option('-z', action='store_true', dest='debug', default=False)

options, args = parser.parse_args()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

debug = options.debug
default_network = options.default_network
separator = options.separator

if options.columns_to_save is None or options.columns_to_save == 'all':
    columns_to_save = INDEX_KEYS
else: columns_to_save = stringToTuple(options.columns_to_save)
if 'index' in columns_to_save:
    columns_to_save = list(columns_to_save)
    indx = columns_to_save.index('index')
    del columns_to_save[indx]
    columns_to_save = tuple(set(columns_to_save) | set(INDEX_KEYS))
elif 'ucanid' not in columns_to_save:
    columns_to_save = list(columns_to_save)
    columns_to_save.insert(0,'ucanid')
    columns_to_save = tuple(columns_to_save)
print 'columns_to_save', columns_to_save

input_filepaths = [ ]
for arg in args:
    input_filepaths.append(os.path.normpath(arg))
Example #10
0
                  dest='sort_by',
                  default='ucanid')
parser.add_option('-w',
                  action='store',
                  type='string',
                  dest='working_dir',
                  default=None)
parser.add_option('-y', action='store_true', dest='test', default=False)
parser.add_option('-z', action='store_true', dest='debug', default=False)

options, args = parser.parse_args()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

match_key = options.match_key
metadata = stringToTuple(options.metadata)

if ',' in args[0]:
    dataset_keys = args[0].split(',')
else:
    dataset_keys = [args[0], args[0]]

input_filepath = os.path.abspath(args[1])
if ',' in input_filepath:
    data_key, input_filepath = input_filepath.split(',')
else:
    data_key = match_key
input_file = open(input_filepath, 'r')
data = eval(input_file.read().strip().replace('\n', ''))
input_file.close()