def parse_record(self, nmea_record): """Receive an id-prefixed, timestamped NMEA record.""" if not nmea_record: return None if not type(nmea_record) == type(''): logging.error('Record is not NMEA string: "%s"', nmea_record) return None try: (data_id, raw_ts, message) = nmea_record.strip().split(sep=' ', maxsplit=2) ts = timestamp(raw_ts) except ValueError: logging.error( 'Record not in <data_id> <timestamp> <NMEA> format: "%s"', nmea_record) return None # Figure out what kind of message we're expecting, based on data_id sensor = self.sensors.get(data_id, None) if not sensor: logging.error('Unrecognized data_id ("%s") in record: %s', data_id, nmea_record) return None model_name = sensor.get('model', None) if not model_name: logging.error('No "model" for sensor %s', sensor) return None # If something goes wrong during parsing, we'll get a ValueError try: (fields, message_type) = self.parse_nmea(sensor_model_name=model_name, message=message) except ValueError as e: logging.error(str(e)) return None # Finally, convert field values to variable names specific to sensor sensor_fields = sensor.get('fields', None) if not sensor_fields: logging.error('No "fields" definition found for sensor %s', data_id) return None named_fields = {} for field_name in fields: var_name = sensor_fields.get(field_name, None) if var_name: named_fields[var_name] = fields[field_name] record = DASRecord(data_id=data_id, message_type=message_type, timestamp=ts, fields=named_fields) logging.debug('created DASRecord: %s', str(record)) return record
def _write_record_to_data_server(self, field_name, record): """Format and label a record and send it to the cached data server. """ if self.data_server_writer: das_record = DASRecord(fields={field_name: record}) logging.debug('DASRecord: %s' % das_record) self.data_server_writer.write(das_record) else: logging.info('Update: %s: %s', field_name, record)
def test_parse_records_das_record(self): p = RecordParser(definition_path=self.device_filename, return_das_record=True) r = p.parse_record(GRV1_RECORDS[0]) self.assertEqual( r, DASRecord(data_id='grv1', timestamp=1510275606.572, fields={ 'Grv1Error': 0, 'Grv1Value': 24557 })) r = p.parse_record(SEAP_RECORDS[0]) self.assertEqual( r, DASRecord(data_id='seap', timestamp=1509778839.291859, fields={ 'Seap200HeightQual': 0, 'Seap200RollPitchQual': 0, 'Seap200HorizQual': 1, 'Seap200HeadingQual': 0 })) r = p.parse_record(SEAP_RECORDS[1]) self.assertEqual( r, DASRecord(data_id='seap', timestamp=1509778839.547251, fields={ 'Seap200GyroOffset': 0.74, 'Seap200GyroCal': 0.44 })) r = p.parse_record(SEAP_RECORDS[2]) self.assertEqual( r, DASRecord(data_id='seap', timestamp=1509778839.802690, fields={ 'Seap200Roll': -1.47, 'Seap200Heave': -0.38, 'Seap200HeadingTrue': 235.77, 'Seap200Pitch': 0.01 }))
def transform(self, record): """Does record exceed any previously-observed bounds?""" if not record: return None # If we've got a list, hope it's a list of records. Recurse, # calling transform() on each of the list elements in order and # return the resulting list. if type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results if type(record) is DASRecord: fields = record.fields elif type(record) is dict: fields = record else: logging.warning( 'Input to MaxMinTransform must be either ' 'DASRecord or dict. Received type "%s"', type(record)) return None new_limits = {} for field, value in fields.items(): # Max and Min only make sense for int, float and bool if not type(value) in [int, float, bool]: continue if field not in self.max or value > self.max[field]: self.max[field] = value new_limits[field + ':max'] = value if field not in self.min or value < self.min[field]: self.min[field] = value new_limits[field + ':min'] = value if not new_limits: return None if type(record) is DASRecord: if record.data_id: data_id = record.data_id + '_limits' if record.data_id else 'limits' return DASRecord(data_id=data_id, message_type=record.message_type, timestamp=record.timestamp, fields=new_limits) return new_limits
def _write_record(self, record): """Write record to table. Connectors assume we've got a DASRecord, but check; if we don't see if it's a suitably-formatted dict that we can convert into a DASRecord. """ if isinstance(record, dict): try: data_id = record.get('data_id', 'no_data_id') timestamp = record.get('timestamp', None) fields = record['fields'] record = DASRecord(data_id=data_id, timestamp=timestamp, fields=fields) except KeyError: logging.error('Unable to create DASRecord from dict: %s', pprint.pformat(record)) self.db.write_record(record)
def _stderr_file_to_cds(self, logger, stderr_file_name): """Iteratively read from a file (presumed to be a logger's stderr file and send the lines to a cached data server labeled as coming from stderr:logger:<logger>. Format of error messages is as a JSON-encoded dict of asctime, levelno, levelname, filename, lineno and message. To be run in a separate thread from _check_logger_stderr_loop """ if not self.data_server_writer: logging.error( 'INTERNAL ERROR: called _stderr_file_to_cds(), but no ' 'cached data server defined?!?') return field_name = 'stderr:logger:' + logger message_format = ('{ascdate:S} {asctime:S} {levelno:d} {levelname:w} ' '{filename:w}.py:{lineno:d} {message}') # Our caller checked that this file exists, so open with impunity. reader = TextFileReader(file_spec=stderr_file_name, tail=True) while not self.quit_flag: record = reader.read() try: parsed_fields = parse.parse(message_format, record) fields = { 'asctime': (parsed_fields['ascdate'] + 'T' + parsed_fields['asctime']), 'levelno': parsed_fields['levelno'], 'levelname': parsed_fields['levelname'], 'filename': parsed_fields['filename'] + '.py', 'lineno': parsed_fields['lineno'], 'message': parsed_fields['message'] } das_record = DASRecord(fields={field_name: json.dumps(fields)}) # logging.warning('Message: %s', fields) self.data_server_writer.write(das_record) except KeyError: logging.warning('Couldn\'t parse stderr message: %s', record)
def write(self, record): """Write out record. Accept a DASRecord or a dict, or a list of either of those. """ if not record: return # If we've got a list, hope it's a list of records. Recurse, # calling write() on each of the list elements in order. if type(record) is list: for single_record in record: self.write(single_record) return # If we've been passed a DASRecord, things are easy: write it and return. if type(record) is DASRecord: self._write_record(record) return if not type(record) is dict: logging.error( 'Record passed to DatabaseWriter is not of type ' '"DASRecord" or dict; is type "%s"', type(record)) return # If here, our record is a dict, figure out whether it is a top-level # field dict or not. data_id = record.get('data_id', None) timestamp = record.get('timestamp', time.time()) fields = record.get('fields', None) if fields is None: logging.error( 'Dict record passed to DatabaseWriter has no "fields" ' 'key, which either means it\'s not a dict you should be ' 'passing, or it is in the old "field_dict" format that ' 'assumes key:value pairs are at the top level.') logging.error('The record in question: %s', str(record)) return das_record = DASRecord(data_id=data_id, timestamp=timestamp, fields=fields) self._write_record(das_record) return
def test_default(self): max_min = MaxMinTransform() self.assertDictEqual( max_min.transform({ 'f1': 1, 'f2': 1.5, 'f3': 'string', 'f4': [] }), { 'f1:max': 1, 'f1:min': 1, 'f2:max': 1.5, 'f2:min': 1.5 }) self.assertEqual( max_min.transform({ 'f1': 1, 'f2': 1.5, 'f3': 'string', 'f4': [] }), None) self.assertDictEqual( max_min.transform({ 'f1': 1.1, 'f2': 1.5, 'f3': 'string', 'f4': [] }), {'f1:max': 1.1}) record = DASRecord(data_id='foo', message_type='bar', fields={ 'f1': 1.1, 'f2': 1.0, 'f3': 'string', 'f4': [] }) result = max_min.transform(record) self.assertEqual(result.data_id, 'foo_limits') self.assertDictEqual(result.fields, {'f2:min': 1.0})
def transform(self, record): """Return counts of the previous times we've seen these field names.""" if not record: return None # If we've got a list, hope it's a list of records. Recurse, # calling transform() on each of the list elements in order and # return the resulting list. if type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results if type(record) is DASRecord: fields = record.fields elif type(record) is dict: fields = record else: logging.warning( 'Input to CountTransform must be either ' 'DASRecord or dict. Received type "%s"', type(record)) return None new_counts = {} for field, value in fields.items(): if field not in self.counts: self.counts[field] = 1 else: self.counts[field] += 1 new_counts[field + ':count'] = self.counts[field] if type(record) is DASRecord: if record.data_id: data_id = record.data_id + '_counts' if record.data_id else 'counts' return DASRecord(data_id=data_id, message_type=record.message_type, timestamp=record.timestamp, fields=new_counts) return new_counts
def write(self, record): """ Write out record, appending a newline at end.""" if not record: return # If input purports to not be a field dict, it should be a # DASRecord. Just write it out. if not self.field_dict_input: if type(record) is DASRecord: self._write_record(record) else: logging.error( 'Record passed to DatabaseWriter is not of type ' '"DASRecord"; is type "%s"', type(record)) return # If here, we believe we've received a field dict, in which each # field may have multiple [timestamp, value] pairs. First thing we # do is reformat the data into a map of # {timestamp: {field:value, field:value],...}} if not type(record) is dict: raise ValueError( 'DatabaseWriter.write() received record purporting ' 'to be a field dict but of type %s' % type(record)) values_by_timestamp = {} try: for field, ts_value_list in record.items(): for (timestamp, value) in ts_value_list: if not timestamp in values_by_timestamp: values_by_timestamp[timestamp] = {} values_by_timestamp[timestamp][field] = value except ValueError: logging.error('Badly-structured field dictionary: %s: %s', field, pprint.pformat(ts_value_list)) # Now go through each timestamp, generate a DASRecord from its # values, and write them. for timestamp in sorted(values_by_timestamp): das_record = DASRecord(timestamp=timestamp, fields=values_by_timestamp[timestamp]) self._write_record(das_record)
def test_sanity(self): """Sanity check that the numbers coming out make sense.""" check = SANITY_CHECK.copy() expected_results = SANITY_RESULTS.copy() tw = TrueWindsTransform(data_id='truw', course_fields='CourseTrue', speed_fields='Speed', heading_fields='HeadingTrue', wind_dir_fields='RelWindDir', wind_speed_fields='RelWindSpeed') while check: fields = check.pop(0) record = DASRecord(data_id='truw', fields=fields) result = tw.transform(record) logging.debug('sanity result: %s', result) expected_fields = expected_results.pop(0) self.assertDictEqual(result.fields, expected_fields) return
def _fetch_and_parse_records(self, table_name, query): """Fetch records, give DB query, and parse into DASRecords.""" (data_id, message_type) = self._parse_table_name(table_name) columns = self._get_table_columns(table_name) cursor = self.connection.cursor() cursor.execute(query) results = [] for values in cursor: logging.debug('value: %s', values) fields = dict(zip(columns, values)) id = fields.pop('id') self.next_id[table_name] = id + 1 timestamp = fields.pop('timestamp') results.append(DASRecord(data_id=data_id, message_type=message_type, timestamp=timestamp, fields=fields)) cursor.close() return results
def _fetch_and_parse_records(self, table_name, query): """Fetch records, give DB query, and parse into DASRecords.""" cursor = list(self.db[table_name].find(query, {'_id': 0})) (data_id, message_type) = self._parse_table_name(table_name) columns = self._get_table_columns(table_name) # cursor = self.connection.cursor() # cursor.execute(query) results = [] for values in cursor: fields = dict(zip(columns, values)) # id = fields.pop('id') self.next_id[table_name] = self.next_id[table_name] + 1 timestamp = fields.pop('timestamp') results.append( DASRecord(data_id=data_id, message_type=message_type, timestamp=timestamp, fields=fields)) # cursor.close() return results
def test_sanity(self): """Sanity check that the numbers coming out make sense.""" check = SANITY_CHECK.copy() expected_results = SANITY_RESULTS.copy() tw = TrueWindsTransform(course_field='CourseTrue', speed_field='Speed', heading_field='HeadingTrue', wind_dir_field='RelWindDir', wind_speed_field='RelWindSpeed', true_dir_name='PortTrueWindDir', true_speed_name='PortTrueWindSpeed', apparent_dir_name='PortApparentWindDir') while check: fields = check.pop(0) record = DASRecord(data_id='truw', fields=fields) result = tw.transform(record.fields) expected = expected_results.pop(0) logging.info('sanity result: %s', result) logging.info('sanity expected: %s', expected) self.assertRecursiveAlmostEqual(result, expected) return
def transform(self, record): """Parse JSON record to Python data struct or DASRecord.""" if not record: return None # If we've got a list, hope it's a list of records. Recurse, # calling transform() on each of the list elements in order and # return the resulting list. if type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results if not type(record) is str: logging.warning( 'FromJSON transform received non-string input, ' 'type: "%s"', type(record)) return None try: data = json.loads(record) except: logging.warning('Failed to parse JSON string: "%s"', record) return None if not self.das_record: return data if not type(data) is dict: logging.warning( 'FromJSON asked to create DASRecord from non-dict ' 'data: "%s"', type(data)) return None return DASRecord(fields=data)
def test_default(self): t = DeltaTransform(field_type={'gyroheading': 'polar'}) alpha = {'timestamp': 1, 'fields': {'gyroheading': 15}} # First time through, no results self.assertEqual(t.transform(alpha), None) beta = {'timestamp': 3, 'fields': {'gyroheading': 359, 'seatemp': 20}} results = t.transform(beta) self.assertEqual(results['fields'].get('gyroheading'), -16) self.assertEqual(results['fields'].get('seatemp', None), None) # Simple transform with no special field types t = DeltaTransform() # First record, there's nothing to "delta" with self.assertEqual( t.transform({ 'timestamp': 1, 'fields': { 'variable': 15 } }), None) # Second record, at timestamp 2, has a delta of +5 self.assertEqual( t.transform({ 'timestamp': 2, 'fields': { 'variable': 20, 'variable2': 10 } }), { 'timestamp': 2, 'fields': { 'variable': 5 } }) # Third record, at timestamp 10, has a delta of -30 self.assertEqual( t.transform({ 'timestamp': 10, 'fields': { 'variable': -10, 'variable2': 15 } }), { 'timestamp': 10, 'fields': { 'variable': -30, 'variable2': 5 } }) # Check that, when given a DASRecord, it returns one record = DASRecord(timestamp=11, fields={ 'variable': -9, 'variable2': 17 }) result = t.transform(record) expected = DASRecord(timestamp=11, fields={ 'variable': 1, 'variable2': 2 }) self.assertEqual(result, expected)
def transform(self, record): """Incorporate any useable fields in this record, and if it gives us a new true wind value, return it.""" if not record: return None if not type(record) is DASRecord: logging.warning('Improper format record: %s', record) return None update = False for field_name in record.fields: if field_name in self.course_fields: self.course_val = record.fields[field_name] elif field_name in self.speed_fields: self.speed_val = record.fields[ field_name] * self.convert_speed_factor elif field_name in self.heading_fields: self.heading_val = record.fields[field_name] elif field_name in self.wind_dir_fields: self.wind_dir_val = record.fields[field_name] elif field_name in self.wind_speed_fields: self.wind_speed_val = record.fields[ field_name] * self.convert_wind_factor if field_name in self.update_on_fields: update = True # If we've not seen anything that updates fields that would # trigger a new true winds value, return None. if not update: return None if self.course_val is None: logging.info('Still missing course_val') return None if self.speed_val is None: logging.info('Still missing speed_val') return None if self.heading_val is None: logging.info('Still missing heading_val') return None if self.wind_dir_val is None: logging.info('Still missing wind_dir_val') return None if self.wind_speed_val is None: logging.info('Still missing wind_speed_val') return None logging.info('Computing new true winds') (true_dir, true_speed, app_dir) = truew(crse=self.course_val, cspd=self.speed_val, hd=self.heading_val, wdir=self.wind_dir_val, zlr=self.zero_line_reference, wspd=self.wind_speed_val) logging.info('Got true winds: dir: %s, speed: %s, app_dir: %s', true_dir, true_speed, app_dir) if true_dir is None or true_speed is None or app_dir is None: logging.info('Got invalid true winds') return None # If here, we've got a valid new true wind result if self.output_nmea: new_record = '%s %s %g,%g,%g' % (self.data_id, time_str(record.timestamp), true_dir, true_speed, app_dir) else: new_record = DASRecord(data_id=self.data_id, timestamp=record.timestamp, fields={ 'TrueWindDir': true_dir, 'TrueWindSpeed': true_speed, 'ApparentWindDir': app_dir }) return new_record
def transform(self, record): if not record: return None fields = {} if type(record) is DASRecord: fields = record.fields timestamp = record.timestamp elif type(record) is dict: fields = record.get('fields', None) timestamp = record.get('timestamp', None) elif type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results else: logging.info('Record passed to DeltaTransform was neither a dict nor a ' 'DASRecord. Type was %s: %s' % (type(record), str(record)[:80])) return None if fields is None logging.info('Record passed to DeltaTransform does not have "fields": %s', record) return None if timestamp is None: logging.info('Record passed to DeltaTransform does not have "timestamp": %s', record) return None delta_values = {} rate_values = {} for key, value in fields.items(): if key in self.last_value_dict: last_timestamp, last_value = self.last_value_dict.get(key, (None, None)) if self.field_type is None: delta_values[key] = value - last_value elif type(self.field_type) is dict: if key in self.field_type: if self.field_type[key] == 'polar': # delta_values[key] = DeltaTransform.polar_diff(self, last_value, value) delta_values[key] = polar_diff(last_value, value) else: delta_values[key] = value - last_value else: return ('field_type passed to DeltaTransform is neither None nor a dict') rate_values[key] = delta_values[key] / (timestamp - last_timestamp) self.last_value_dict[key] = (timestamp, value) else: self.last_value_dict[key] = (timestamp, value) delta_values[key] = None if self.rate: return DASRecord(timestamp=timestamp, fields=rate_values) elif type(self.rate) is list: results = {} for field in self.rate: results[field] = rate_values[field] return DASRecord(timestamp=timestamp, fields=results) else: return DASRecord(timestamp=timestamp, fields=delta_values)
def transform(self, record): """Incorporate any useable fields in this record, and if it gives us a new true wind value, return the results.""" if record is None: return None # If we've got a list, hope it's a list of records. Recurse, # calling transform() on each of the list elements in order and # return the resulting list. if type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results results = [] for das_record in to_das_record_list(record): # If they haven't specified specific fields we should wait for # before updates, plan to emit an update after every new record # we process. Otherwise, assume we're not going to update unless # we see one of the named fields. if not self.update_on_fields: update = True else: update = False timestamp = das_record.timestamp if not timestamp: logging.info('DASRecord is missing timestamp - skipping') continue # Get latest values for any of our fields fields = das_record.fields if self.temp_field in fields: if timestamp >= self.temp_val_time: self.temp_val = fields.get(self.temp_field) self.temp_val_time = timestamp if self.temp_field in self.update_on_fields: update = True if self.salinity_field in fields: if timestamp >= self.salinity_val_time: self.salinity_val = fields.get(self.salinity_field) self.salinity_val_time = timestamp if self.salinity_field in self.update_on_fields: update = True if self.pressure_field and self.pressure_field in fields: if timestamp >= self.pressure_val_time: self.pressure_val = fields.get(self.pressure_field) self.pressure_val_time = timestamp if self.pressure_field in self.update_on_fields: update = True if None in (self.temp_val, self.salinity_val, self.pressure_val): logging.warning('Not all required values for seawater specific gravity ' 'are present: ' 'time: %s: %s: %s, %s: %s, %s: %s', timestamp, self.temp_field, self.temp_val, self.salinity_field, self.salinity_val, self.pressure_field, self.pressure_val) continue # If we've not seen anything that updates fields that would # trigger a computation, skip rest of computation. if not update: logging.debug('No update needed') continue logging.debug('Computing specific gravity') specific_gravity_val = specific_gravity(temp=self.temp_val, salinity=self.salinity_val, pressure=self.pressure_val) logging.debug('Got seawater specific gravity: %s', specific_gravity_val) # If here, we've got a valid specific gravity result result_fields = {self.specific_gravity_name: specific_gravity_val} # Add in metadata if so specified and it's been long enough since # we last sent it. now = time.time() if self.metadata_interval and \ now - self.metadata_interval > self.last_metadata_send: metadata = {'fields': self._metadata()} self.last_metadata_send = now logging.debug('Emitting metadata: %s', pformat(metadata)) else: metadata = None results.append(DASRecord(timestamp=timestamp, fields=result_fields, metadata=metadata)) # If we've only got a single result, return it as a singleton # rather than as a list. if results and len(results) == 1: return results[0] return results
def parse_record(self, record): """Parse an id-prefixed text record into a Python dict of data_id, timestamp and fields. """ if not record: return None if not isinstance(record, str): logging.info('Record is not a string: "%s"', record) return None try: # Break record into (by default) data_id, timestamp and field_string parsed_record = self.compiled_record_format.parse(record).named except (ValueError, AttributeError): if not self.quiet: logging.warning('Unable to parse record into "%s"', self.record_format) logging.warning('Record: %s', record) return None # Convert timestamp to numeric, if it's there timestamp = parsed_record.get('timestamp', None) if timestamp is not None and isinstance(timestamp, datetime.datetime): timestamp = timestamp.timestamp() parsed_record['timestamp'] = timestamp # Extract the field string we're going to parse; remove trailing # whitespace. field_string = parsed_record.get('field_string', None) # If we don't have fields, there's nothing to parse if field_string is None: return None field_string = field_string.strip() if not field_string: return None fields = {} # If we've been given a set of field_patterns to apply, use the # first that matches. if self.field_patterns: fields, message_type = self._parse_field_string( field_string, self.compiled_field_patterns) # If we were given no explicit field_patterns to use, we need to # count on the record having a data_id that lets us figure out # which device, and therefore which field_patterns to try. else: data_id = parsed_record.get('data_id', None) if data_id is None: if not self.quiet: logging.warning('No data id found in record: %s', record) return None fields, message_type = self.parse_for_data_id( data_id, field_string) # We should now have a dictionary of fields. If not, go home if not fields: if not self.quiet: logging.warning('No formats matched field_string "%s"', field_string) return None # Some folks want the data_id prepended if self.prepend_data_id: # This conditional dictates whether fields are stored with just the # field_name key, or <data_id><delimiter><field_name> # Doing some work directly on the fields dict, so we'll take a copy # to loop over. fields_copy = fields.copy() # Reset the fields dict fields = {} for field in fields_copy: # Determine the new "field_name" key = '' + data_id + self.delimiter + field # Set the value fields[key] = fields_copy[field] # Remove raw 'field_string' and add parsed 'fields' to parsed_record del parsed_record['field_string'] parsed_record['fields'] = fields if message_type: parsed_record['message_type'] = message_type # If we have parsed fields, see if we also have metadata. Are we # supposed to occasionally send it for our variables? Is it time # to send it again? metadata_fields = {} if self.metadata and self.metadata_interval: for field_name in fields: last_metadata_sent = self.metadata_last_sent.get(field_name, 0) time_since_send = timestamp - last_metadata_sent if time_since_send > self.metadata_interval: field_metadata = self.metadata.get(field_name, None) if field_metadata: metadata_fields[field_name] = field_metadata self.metadata_last_sent[field_name] = timestamp if metadata_fields: metadata = {'fields': metadata_fields} else: metadata = None if metadata: parsed_record['metadata'] = metadata logging.debug('Created parsed record: %s', pprint.pformat(parsed_record)) # What are we going to do with the result we've created? if self.return_das_record: try: return DASRecord(data_id=data_id, timestamp=timestamp, message_type=message_type, fields=fields, metadata=metadata) except KeyError: return None elif self.return_json: return json.dumps(parsed_record) else: return parsed_record
def transform(self, record): if not record: return None fields = {} if type(record) is DASRecord: fields = record.fields timestamp = record.timestamp elif type(record) is dict: fields = record.get('fields', None) timestamp = record.get('timestamp', None) elif type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results else: logging.info( 'Record passed to DeltaTransform was neither a dict nor a ' 'DASRecord. Type was %s: %s' % (type(record), str(record)[:80])) return None if fields is None: logging.info( 'Record passed to DeltaTransform does not have "fields": %s', record) return None if timestamp is None: logging.info( 'Record passed to DeltaTransform does not have "timestamp": %s', record) return None delta_values = {} for key, value in fields.items(): # If we don't have a previous value for this field, store the # current one and move on to the next field. if key not in self.last_value_dict: self.last_value_dict[key] = (timestamp, value) continue last_timestamp, last_value = self.last_value_dict.get( key, (None, None)) # Does this field have a special type? if type(self.field_type) is dict: this_field_type = self.field_type.get(key, None) else: this_field_type = self.field_type # What do we do with this field_type? 'None' is a simple diff if this_field_type == 'polar': delta_values[key] = polar_diff(last_value, value) elif this_field_type is None: delta_values[key] = value - last_value else: raise ValueError( 'DeltaTransform configured with unrecognized ' 'field type for %s: "%s"', key, this_field_type) # Are we doing rate or simple diff for this field? if self.rate is True or type( self.rate) is list and key in self.rate: time_diff = timestamp - last_timestamp # If rate, make sure it's a valid time difference. Bail if it isn't. if time_diff <= 0: logging.warning( 'Invalid difference in successive timestamps for ' 'field %s: %g -> %g', key, last_timestamp, timestamp) return None delta_values[key] = delta_values[key] / time_diff # Finally, save the current values for next time self.last_value_dict[key] = (timestamp, value) # If, at the end of it all, we don't have any fields, return None if not delta_values: return None # If they gave us a dict, return a dict; if they gave us a # DASRecord, return a DASRecord. if type(record) is dict: return {'timestamp': timestamp, 'fields': delta_values} return DASRecord(timestamp=timestamp, fields=delta_values)
def write(self, record): """Write out record. Connectors assume we've got a DASRecord, so check what we've got and convert as necessary. """ if not record: return # If we've got a list, hope it's a list of records. Recurse, # calling write() on each of the list elements in order. if type(record) is list: for single_record in record: self.write(single_record) return # If we've been passed a DASRecord, things are easy: write it and return. if type(record) is DASRecord: self._write_record(record) return if not type(record) is dict: logging.error( 'Record passed to DatabaseWriter is not of type ' '"DASRecord" or dict; is type "%s"', type(record)) return # If here, our record is a dict, figure out whether it is a top-level # field dict or not. data_id = record.get('data_id', None) timestamp = record.get('timestamp', time.time()) fields = record.get('fields', None) if fields is None: logging.error( 'Dict record passed to DatabaseWriter has no "fields" ' 'key, which either means it\'s not a dict you should be ' 'passing, or it is in the old "field_dict" format that ' 'assumes key:value pairs are at the top level.') logging.error('The record in question: %s', str(record)) return # Now check whether our 'values' are singletons (in which case # we've got a single record) or lists of tuples. Shortcut by # checking only the first value in our 'fields' dict. try: first_key, first_value = next(iter(fields.items())) except StopIteration: # Empty fields logging.debug('Empty "fields" dict in record: %s', str(record)) return # If we've got a singleton, it's a single record. Convert to # DASRecord and write it out. if not type(first_value) is list: das_record = DASRecord(data_id=data_id, timestamp=timestamp, fields=fields) self._write_record(das_record) return # If we're here, our values (or at least our first one) are lists # of (value, timestamp) pairs. First thing we do is # reformat the data into a map of # # {timestamp: {field:value, field:value],...}} values_by_timestamp = {} try: for field, ts_value_list in fields.items(): for (timestamp, value) in ts_value_list: if not timestamp in values_by_timestamp: values_by_timestamp[timestamp] = {} values_by_timestamp[timestamp][field] = value except ValueError: logging.error('Badly-structured field dictionary: %s: %s', field, pprint.pformat(ts_value_list)) # Now go through each timestamp, generate a DASRecord from its # values, and write them. for timestamp in sorted(values_by_timestamp): das_record = DASRecord(data_id=data_id, timestamp=timestamp, fields=values_by_timestamp[timestamp]) self._write_record(das_record)
def transform(self, record): """Take either a DASRecord or a field dictionary and returns transformed values in the same format.""" if not record: return # What type of record is this? if type(record) is DASRecord: is_das_record = True fields = record.fields elif type(record) is dict: is_das_record = False fields = record else: raise TypeError( 'ComposedDerivedDataTransform.transform(record) ' 'received record of inappropriate type: %s', type(record)) # DASRecords are easy - we only have one timestamp to deal with, # so only have to run each transform once. if is_das_record: # Which transforms are interested in values contained in record? transforms_to_run = set() for field, value in fields.items(): field_transforms = self.fields.get(field, set()) transforms_to_run.update(field_transforms) self.values[field] = value self.timestamps[field] = record.timestamp # Run all transforms that have registered interest in these # fields, then aggregate results into a single dict. results = {} for transform in transforms_to_run: t_results = transform.transform(self.values, self.timestamps) if t_results: results.update(t_results) # Return an anonymous DASRecord with the results we've aggregated if not results: return None return DASRecord(timestamp=record.timestamp, fields=results) # If here, we believe we've received a field dict, in which each # field may have multiple [timestamp, value] pairs. First thing we # do is reformat the data into a map of # {timestamp: {field:value, field:value],...}} values_by_timestamp = {} try: for field, ts_value_list in fields.items(): for (timestamp, value) in ts_value_list: if not timestamp in values_by_timestamp: values_by_timestamp[timestamp] = {} values_by_timestamp[timestamp][field] = value except ValueError: logging.error('Badly-structured field dictionary: %s: %s', field, pprint.pformat(ts_value_list)) # Now go through each timestamp, update the values in it, then run # the transforms that are interested in the values that have # updated. Append the resulting transformed [timestamp, value] # pairs to the appropriate field name. results = {} for timestamp in sorted(values_by_timestamp): fields = values_by_timestamp[timestamp] logging.debug('timestamp %f, fields: %s', timestamp, fields) # Which transforms are interested in values contained in this # particular timestamp? transforms_to_run = set() for field, value in fields.items(): field_transforms = self.fields.get(field, set()) transforms_to_run.update(field_transforms) self.values[field] = value self.timestamps[field] = timestamp # Run all transforms and aggregate results into a single dict for transform in transforms_to_run: field_values = transform.transform(self.values, self.timestamps) if not field_values: continue for field, value in field_values.items(): if not field in results: results[field] = [] results[field].append([timestamp, value]) return results or None
def parse_record(self, record): """Parse an id-prefixed text record into a Python dict of data_id, timestamp and fields. """ if not record: return None if not type(record) is str: logging.info('Record is not string: "%s"', record) return None try: parsed_record = self.compiled_record_format.parse(record).named except (ValueError, AttributeError): if not self.quiet: logging.warning('Unable to parse record into "%s"', self.record_format) logging.warning('Record: %s', record) return None # Convert timestamp to numeric, if it's there timestamp = parsed_record.get('timestamp', None) if timestamp is not None and type(timestamp) is datetime.datetime: timestamp = timestamp.timestamp() parsed_record['timestamp'] = timestamp # Figure out what kind of message we're expecting, based on data_id data_id = parsed_record.get('data_id', None) if data_id is None: if not self.quiet: logging.warning('No data id found in record: %s', record) return None # Get device and device_type definitions for data_id device = self.devices.get(data_id, None) if not device: if not self.quiet: logging.warning('Unrecognized data id "%s", record: %s', data_id, record) logging.warning('Devices are: %s', ', '.join(self.devices.keys())) return None device_type = device.get('device_type', None) if not device_type: if not self.quiet: logging.error( 'Internal error: No "device_type" for device %s!', device) return None # Extract the message we're going to parse; remove trailing whitespace message = parsed_record.get('message', '').rstrip() if not message: if not self.quiet: logging.warning('No message found in record: %s', record) return None del parsed_record['message'] # Now parse the message, based on device type. If something goes # wrong during parsing, expect a ValueError. try: parsed_fields = self.parse(device_type=device_type, message=message) logging.debug('Got fields: %s', pprint.pformat(parsed_fields)) except ValueError as e: logging.error(str(e)) return None # Finally, convert field values to variable names specific to device device_fields = device.get('fields', None) if not device_fields: if not self.quiet: logging.error('No "fields" definition found for device %s', data_id) return None # Assign field values to the appropriate named variable. fields = {} metadata_fields = {} for field_name, value in parsed_fields.items(): variable_name = device_fields.get(field_name, None) # None means we're not supposed to report it. if variable_name is None: continue # None means we didn't have a value for this field; omit it. if value is None: continue # If it's a datetime, convert to numeric timestamp if type(value) is datetime.datetime: value = value.timestamp() fields[variable_name] = value # Are we supposed to occasionally send metadata on our # variables? Is it time to send it again? if self.metadata_interval: last_metadata_sent = self.field_metadata_last_sent[ variable_name] time_since_send = timestamp - last_metadata_sent if time_since_send > self.metadata_interval: metadata_fields[variable_name] = self.field_metadata.get( variable_name) self.field_metadata_last_sent[variable_name] = timestamp parsed_record['fields'] = fields # Are we sending metadata on any fields? If so, add the 'metadata' # key into our parsed_record. if metadata_fields: metadata = {'fields': metadata_fields} parsed_record['metadata'] = metadata else: metadata = None logging.debug('Created parsed record: %s', pprint.pformat(parsed_record)) # What are we going to do with the parsed_record we've created? if self.return_das_record: try: return DASRecord(data_id=data_id, timestamp=timestamp, fields=fields, metadata=metadata) except KeyError: return None elif self.return_json: return json.dumps(parsed_record) else: return parsed_record
def transform(self, record): """Incorporate any useable fields in this record, and if it gives us a new true wind value, return the results.""" if record is None: return None # If we've got a list, hope it's a list of records. Recurse, # calling transform() on each of the list elements in order and # return the resulting list. if type(record) is list: results = [] for single_record in record: results.append(self.transform(single_record)) return results results = [] for das_record in to_das_record_list(record): # If they haven't specified specific fields we should wait for # before updates, plan to emit an update after every new record # we process. Otherwise, assume we're not going to update unless # we see one of the named fields. if not self.update_on_fields: update = True else: update = False timestamp = das_record.timestamp if not timestamp: logging.info('DASRecord is missing timestamp - skipping') continue # Get latest values for any of our fields fields = das_record.fields if self.course_field in fields: if timestamp >= self.course_val_time: self.course_val = fields.get(self.course_field) self.course_val_time = timestamp if self.course_field in self.update_on_fields: update = True if self.speed_field in fields: if timestamp >= self.speed_val_time: self.speed_val = fields.get(self.speed_field) self.speed_val *= self.convert_speed_factor self.speed_val_time = timestamp if self.speed_field in self.update_on_fields: update = True if self.heading_field in fields: if timestamp >= self.heading_val_time: self.heading_val = fields.get(self.heading_field) self.heading_val_time = timestamp if self.heading_field in self.update_on_fields: update = True if self.wind_dir_field in fields: if timestamp >= self.wind_dir_val_time: self.wind_dir_val = fields.get(self.wind_dir_field) self.wind_dir_val_time = timestamp if self.wind_dir_field in self.update_on_fields: update = True if self.wind_speed_field in fields: if timestamp >= self.wind_speed_val_time: self.wind_speed_val = fields.get(self.wind_speed_field) self.wind_speed_val *= self.convert_wind_factor self.wind_speed_val_time = timestamp if self.wind_speed_field in self.update_on_fields: update = True if None in (self.course_val, self.speed_val, self.heading_val, self.wind_dir_val, self.wind_speed_val): logging.debug( 'Not all required values for true winds are present: ' 'time: %s: %s: %s, %s: %s, %s: %s, %s: %s, %s: %s', timestamp, self.course_field, self.course_val, self.speed_field, self.speed_val, self.heading_field, self.heading_val, self.wind_dir_field, self.wind_dir_val, self.wind_speed_field, self.wind_speed_val) continue # If we've not seen anything that updates fields that would # trigger a new true winds value, skip rest of computation. if not update: logging.debug('No update needed') continue logging.debug('Computing new true winds') (true_dir, true_speed, apparent_dir) = truew(crse=self.course_val, cspd=self.speed_val, hd=self.heading_val, wdir=self.wind_dir_val, zlr=self.zero_line_reference, wspd=self.wind_speed_val) logging.debug( 'Got true winds: dir: %s, speed: %s, apparent_dir: %s', true_dir, true_speed, apparent_dir) if None in (true_dir, true_speed, apparent_dir): logging.info('Got invalid true winds') continue # If here, we've got a valid new true wind result true_wind_fields = { self.true_dir_name: true_dir, self.true_speed_name: true_speed, self.apparent_dir_name: apparent_dir } # Add in metadata if so specified and it's been long enough since # we last sent it. now = time.time() if self.metadata_interval and \ now - self.metadata_interval > self.last_metadata_send: metadata = {'fields': self._metadata()} self.last_metadata_send = now logging.debug('Emitting metadata: %s', pformat(metadata)) else: metadata = None results.append( DASRecord(timestamp=timestamp, fields=true_wind_fields, metadata=metadata)) return results
def parse_record(self, record): """Parse an id-prefixed text record into a Python dict of data_id, timestamp and fields. """ if not record: return None if not type(record) is str: logging.info('Record is not a string: "%s"', record) return None try: parsed_record = self.compiled_record_format.parse(record).named except (ValueError, AttributeError): if not self.quiet: logging.warning('Unable to parse record into "%s"', self.record_format) logging.warning('Record: %s', record) return None # Convert timestamp to numeric, if it's there timestamp = parsed_record.get('timestamp', None) if timestamp is not None and type(timestamp) is datetime.datetime: timestamp = timestamp.timestamp() parsed_record['timestamp'] = timestamp # Extract the field string we're going to parse; remove trailing # whitespace. field_string = parsed_record.get('field_string', None).rstrip() if field_string is not None: del parsed_record['field_string'] fields = {} if field_string: # If we've been given a set of field_patterns to apply, use the # first that matches. if self.field_patterns: for trial_pattern in self.compiled_field_patterns: parsed_fields = trial_pattern.parse(field_string) # Did we find a parse that matched? If so, return its named fields if parsed_fields: fields = parsed_fields.named break # If we were given no explicit field_patterns to use, we need to # count on the record having a data_id that lets us figure out # which device, and therefore which field_patterns to try. else: data_id = parsed_record.get('data_id', None) if data_id is None: if not self.quiet: logging.warning('No data id found in record: %s', record) return None fields = self.parse_for_data_id(data_id, field_string) if fields: parsed_record['fields'] = fields # If we have parsed fields, see if we also have metadata. Are we # supposed to occasionally send it for our variables? Is it time # to send it again? metadata_fields = {} if self.metadata and self.metadata_interval: for field_name in fields: last_metadata_sent = self.metadata_last_sent.get(field_name, 0) time_since_send = timestamp - last_metadata_sent if time_since_send > self.metadata_interval: field_metadata = self.metadata.get(field_name, None) if field_metadata: metadata_fields[field_name] = field_metadata self.metadata_last_sent[field_name] = timestamp if metadata_fields: metadata = {'fields': metadata_fields} else: metadata = None if metadata: parsed_record['metadata'] = metadata logging.debug('Created parsed record: %s', pprint.pformat(parsed_record)) # What are we going to do with the result we've created? if self.return_das_record: try: return DASRecord(data_id=data_id, timestamp=timestamp, fields=fields, metadata=metadata) except KeyError: return None elif self.return_json: return json.dumps(parsed_record) else: return parsed_record