def transform(cls, data): """ Compute InfluxDB query expression from data in transformation dictionary. Also compute date range from query parameters "from" and "to". """ from pyinfluxql import Query from pyinfluxql.functions import Mean measurement = data.measurement # Vanilla QL (v1) #expression = 'SELECT * FROM {measurement}'.format(measurement=measurement) # PyInfluxQL (v2) # https://github.com/jjmalina/pyinfluxql # Labs #time_begin = arrow.utcnow() - arrow.Arrow(hour=1) #expression = Query('*').from_(measurement).where(time__gt=datetime.utcnow() - timedelta(hours=1)) #expression = Query(Mean('*')).from_(measurement).where(time__gt=datetime.now() - timedelta(1)).group_by(time=timedelta(hours=1)) # Fix up "measurement" if starting with numeric value # TODO: Fix should go to pyinfluxql if is_number(measurement[0]): measurement = '"{measurement}"'.format(measurement=measurement) # TODO: Use ".date_range" API method time_begin, time_end = compute_daterange(data.get('from'), data.get('to')) tags = {} #tags = InfluxDBAdapter.get_tags(data) expression = Query('*').from_(measurement).where(time__gte=time_begin, time__lte=time_end, **tags) result = { 'expression': str(expression), 'time_begin': time_begin, 'time_end': time_end, } return result
def transform(cls, data): """ Compute InfluxDB query expression from data in transformation dictionary. Also compute date range from query parameters "from" and "to". """ measurement = data.measurement # Vanilla QL (v1) #expression = 'SELECT * FROM {measurement}'.format(measurement=measurement) # PyInfluxQL (v2) # https://github.com/jjmalina/pyinfluxql # Labs #time_begin = arrow.utcnow() - arrow.Arrow(hour=1) #expression = Query('*').from_(measurement).where(time__gt=datetime.utcnow() - timedelta(hours=1)) #expression = Query(Mean('*')).from_(measurement).where(time__gt=datetime.now() - timedelta(1)).group_by(time=timedelta(hours=1)) # Fix up "measurement" if starting with numeric value # TODO: Fix should go to pyinfluxql if is_number(measurement[0]): measurement = '"{measurement}"'.format(measurement=measurement) # TODO: Use ".date_range" API method time_begin, time_end = compute_daterange(data.get('from'), data.get('to')) tags = {} #tags = InfluxDBAdapter.get_tags(data) expression = Query('*').from_(measurement).where(time__gte=time_begin, time__lte=time_end, **tags) result = { 'expression': str(expression), 'time_begin': time_begin, 'time_end': time_end, } return result
def format_chunk(self, meta, data): """ Format for InfluxDB >= 0.9:: { "measurement": "hiveeyes_100", "tags": { "host": "server01", "region": "europe" }, "time": "2015-10-17T19:30:00Z", "fields": { "value": 0.42 } } """ assert isinstance(data, dict), 'Data payload is not a dictionary' chunk = { "measurement": meta['measurement'], "tags": {}, } """ if "gateway" in meta: chunk["tags"]["gateway"] = meta["gateway"] if "node" in meta: chunk["tags"]["node"] = meta["node"] """ # Extract timestamp field from data chunk['time_precision'] = 'n' for time_field in ['time', 'datetime', 'dateTime']: if time_field in data: # WeeWX. TODO: Move to specific vendor configuration. # Disabled in favor of precision detection heuristic. #if time_field == 'dateTime': # chunk['time_precision'] = 's' # Process timestamp field. if data[time_field]: # Decode timestamp. chunk['time'] = data[time_field] if is_number(chunk['time']): chunk['time'] = int(float(chunk['time'])) # Remove timestamp from data payload. del data[time_field] # If we found a timestamp field already, # don't look out for more. break # Extract geohash from data. Finally, thanks Rich! # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags if "geohash" in data: chunk["tags"]["geohash"] = data["geohash"] del data['geohash'] if "latitude" in data and "longitude" in data: chunk["tags"]["latitude"] = data["latitude"] chunk["tags"]["longitude"] = data["longitude"] del data['latitude'] del data['longitude'] # Extract more information specific to luftdaten.info for field in [ 'location', 'location_id', 'location_name', 'sensor_id', 'sensor_type' ]: if field in data: chunk["tags"][field] = data[field] del data[field] # TODO: Maybe do this at data acquisition / transformation time, not here. if 'time' in chunk: timestamp = chunk['time'] = parse_timestamp(chunk['time']) # Heuristically compute timestamp precision if isinstance(timestamp, int): if timestamp >= 1e17 or timestamp <= -1e17: time_precision = 'n' elif timestamp >= 1e14 or timestamp <= -1e14: time_precision = 'u' elif timestamp >= 1e11 or timestamp <= -1e11: time_precision = 'ms' # FIXME: Is this a reasonable default? else: time_precision = 's' chunk['time_precision'] = time_precision """ # FIXME: Breaks CSV data acquisition. Why? if isinstance(chunk['time'], datetime.datetime): if chunk['time'].microsecond == 0: chunk['time_precision'] = 's' """ """ Prevent errors like ERROR: InfluxDBClientError: 400: write failed: field type conflict: input field "pitch" on measurement "01_position" is type float64, already exists as type integer """ self.data_to_float(data) assert data, 'Data payload is empty' chunk["fields"] = data return chunk
def format_chunk(self, meta, data): """ Format for InfluxDB >= 0.9:: { "measurement": "hiveeyes_100", "tags": { "host": "server01", "region": "europe" }, "time": "2015-10-17T19:30:00Z", "fields": { "value": 0.42 } } """ assert isinstance(data, dict), 'Data payload is not a dictionary' chunk = { "measurement": meta['measurement'], "tags": {}, } """ if "gateway" in meta: chunk["tags"]["gateway"] = meta["gateway"] if "node" in meta: chunk["tags"]["node"] = meta["node"] """ # Extract timestamp field from data chunk['time_precision'] = 'n' for time_field in ['time', 'datetime', 'dateTime']: if time_field in data: # WeeWX. TODO: Move to specific vendor configuration. # Disabled in favor of precision detection heuristic. #if time_field == 'dateTime': # chunk['time_precision'] = 's' # Process timestamp field. if data[time_field]: # Decode timestamp. chunk['time'] = data[time_field] if is_number(chunk['time']): chunk['time'] = int(float(chunk['time'])) # Remove timestamp from data payload. del data[time_field] # If we found a timestamp field already, # don't look out for more. break # Extract geohash from data. Finally, thanks Rich! # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags if "geohash" in data: chunk["tags"]["geohash"] = data["geohash"] del data['geohash'] # Extract more information specific to luftdaten.info for field in ['location', 'location_id', 'location_name', 'sensor_id', 'sensor_type']: if field in data: chunk["tags"][field] = data[field] del data[field] # TODO: Maybe do this at data acquisition / transformation time, not here. if 'time' in chunk: timestamp = chunk['time'] = parse_timestamp(chunk['time']) # Heuristically compute timestamp precision if isinstance(timestamp, int): if timestamp >= 1e17 or timestamp <= -1e17: time_precision = 'n' elif timestamp >= 1e14 or timestamp <= -1e14: time_precision = 'u' elif timestamp >= 1e11 or timestamp <= -1e11: time_precision = 'ms' # FIXME: Is this a reasonable default? else: time_precision = 's' chunk['time_precision'] = time_precision """ # FIXME: Breaks CSV data acquisition. Why? if isinstance(chunk['time'], datetime.datetime): if chunk['time'].microsecond == 0: chunk['time_precision'] = 's' """ """ Prevent errors like ERROR: InfluxDBClientError: 400: write failed: field type conflict: input field "pitch" on measurement "01_position" is type float64, already exists as type integer """ self.data_to_float(data) assert data, 'Data payload is empty' chunk["fields"] = data return chunk
def format_chunk(self, meta, data): """ Format for InfluxDB >= 0.9:: { "measurement": "hiveeyes_100", "tags": { "host": "server01", "region": "europe" }, "time": "2015-10-17T19:30:00Z", "fields": { "value": 0.42 } } """ assert isinstance(data, dict), 'Data payload is not a dictionary' chunk = { "measurement": meta['measurement'], "tags": {}, } """ if "gateway" in meta: chunk["tags"]["gateway"] = meta["gateway"] if "node" in meta: chunk["tags"]["node"] = meta["node"] """ # Extract timestamp field from data chunk['time_precision'] = 'n' for time_field in ['time', 'dateTime']: if time_field in data: if data[time_field]: chunk['time'] = data[time_field] if is_number(chunk['time']): chunk['time'] = int(float(chunk['time'])) # WeeWX. TODO: Move to specific vendor configuration. if time_field == 'dateTime': chunk['time_precision'] = 's' del data[time_field] # Extract geohash from data. Finally, thanks Rich! # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags if "geohash" in data: chunk["tags"]["geohash"] = data["geohash"] del data['geohash'] # Extract more information specific to luftdaten.info for field in [ 'location', 'location_id', 'location_name', 'sensor_id', 'sensor_type' ]: if field in data: chunk["tags"][field] = data[field] del data[field] # TODO: Maybe do this at data acquisition / transformation time, not here. if 'time' in chunk: chunk['time'] = parse_timestamp(chunk['time']) """ # FIXME: Breaks CSV data acquisition. Why? if isinstance(chunk['time'], datetime.datetime): if chunk['time'].microsecond == 0: chunk['time_precision'] = 's' """ """ Prevent errors like ERROR: InfluxDBClientError: 400: write failed: field type conflict: input field "pitch" on measurement "01_position" is type float64, already exists as type integer """ self.data_to_float(data) chunk["fields"] = data return chunk