Exemple #1
0
    def transform(cls, data):
        """
        Compute InfluxDB query expression from data in transformation dictionary.
        Also compute date range from query parameters "from" and "to".
        """

        from pyinfluxql import Query
        from pyinfluxql.functions import Mean

        measurement = data.measurement

        # Vanilla QL (v1)
        #expression = 'SELECT * FROM {measurement}'.format(measurement=measurement)

        # PyInfluxQL (v2)
        # https://github.com/jjmalina/pyinfluxql

        # Labs
        #time_begin = arrow.utcnow() - arrow.Arrow(hour=1)
        #expression = Query('*').from_(measurement).where(time__gt=datetime.utcnow() - timedelta(hours=1))
        #expression = Query(Mean('*')).from_(measurement).where(time__gt=datetime.now() - timedelta(1)).group_by(time=timedelta(hours=1))

        # Fix up "measurement" if starting with numeric value
        # TODO: Fix should go to pyinfluxql
        if is_number(measurement[0]):
            measurement = '"{measurement}"'.format(measurement=measurement)

        # TODO: Use ".date_range" API method
        time_begin, time_end = compute_daterange(data.get('from'),
                                                 data.get('to'))

        tags = {}
        #tags = InfluxDBAdapter.get_tags(data)

        expression = Query('*').from_(measurement).where(time__gte=time_begin,
                                                         time__lte=time_end,
                                                         **tags)

        result = {
            'expression': str(expression),
            'time_begin': time_begin,
            'time_end': time_end,
        }

        return result
Exemple #2
0
    def transform(cls, data):
        """
        Compute InfluxDB query expression from data in transformation dictionary.
        Also compute date range from query parameters "from" and "to".
        """

        measurement = data.measurement

        # Vanilla QL (v1)
        #expression = 'SELECT * FROM {measurement}'.format(measurement=measurement)

        # PyInfluxQL (v2)
        # https://github.com/jjmalina/pyinfluxql

        # Labs
        #time_begin = arrow.utcnow() - arrow.Arrow(hour=1)
        #expression = Query('*').from_(measurement).where(time__gt=datetime.utcnow() - timedelta(hours=1))
        #expression = Query(Mean('*')).from_(measurement).where(time__gt=datetime.now() - timedelta(1)).group_by(time=timedelta(hours=1))

        # Fix up "measurement" if starting with numeric value
        # TODO: Fix should go to pyinfluxql
        if is_number(measurement[0]):
            measurement = '"{measurement}"'.format(measurement=measurement)

        # TODO: Use ".date_range" API method
        time_begin, time_end = compute_daterange(data.get('from'), data.get('to'))

        tags = {}
        #tags = InfluxDBAdapter.get_tags(data)

        expression = Query('*').from_(measurement).where(time__gte=time_begin, time__lte=time_end, **tags)

        result = {
            'expression': str(expression),
            'time_begin': time_begin,
            'time_end':   time_end,
        }

        return result
Exemple #3
0
    def format_chunk(self, meta, data):
        """
        Format for InfluxDB >= 0.9::
        {
            "measurement": "hiveeyes_100",
            "tags": {
                "host": "server01",
                "region": "europe"
            },
            "time": "2015-10-17T19:30:00Z",
            "fields": {
                "value": 0.42
            }
        }
        """

        assert isinstance(data, dict), 'Data payload is not a dictionary'

        chunk = {
            "measurement": meta['measurement'],
            "tags": {},
        }
        """
        if "gateway" in meta:
            chunk["tags"]["gateway"] = meta["gateway"]

        if "node" in meta:
            chunk["tags"]["node"]    = meta["node"]
        """

        # Extract timestamp field from data
        chunk['time_precision'] = 'n'
        for time_field in ['time', 'datetime', 'dateTime']:
            if time_field in data:

                # WeeWX. TODO: Move to specific vendor configuration.
                # Disabled in favor of precision detection heuristic.
                #if time_field == 'dateTime':
                #    chunk['time_precision'] = 's'

                # Process timestamp field.
                if data[time_field]:

                    # Decode timestamp.
                    chunk['time'] = data[time_field]
                    if is_number(chunk['time']):
                        chunk['time'] = int(float(chunk['time']))

                    # Remove timestamp from data payload.
                    del data[time_field]

                    # If we found a timestamp field already,
                    # don't look out for more.
                    break

        # Extract geohash from data. Finally, thanks Rich!
        # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
        if "geohash" in data:
            chunk["tags"]["geohash"] = data["geohash"]
            del data['geohash']

        if "latitude" in data and "longitude" in data:
            chunk["tags"]["latitude"] = data["latitude"]
            chunk["tags"]["longitude"] = data["longitude"]
            del data['latitude']
            del data['longitude']

        # Extract more information specific to luftdaten.info
        for field in [
                'location', 'location_id', 'location_name', 'sensor_id',
                'sensor_type'
        ]:
            if field in data:
                chunk["tags"][field] = data[field]
                del data[field]

        # TODO: Maybe do this at data acquisition / transformation time, not here.
        if 'time' in chunk:
            timestamp = chunk['time'] = parse_timestamp(chunk['time'])

            # Heuristically compute timestamp precision
            if isinstance(timestamp, int):
                if timestamp >= 1e17 or timestamp <= -1e17:
                    time_precision = 'n'
                elif timestamp >= 1e14 or timestamp <= -1e14:
                    time_precision = 'u'
                elif timestamp >= 1e11 or timestamp <= -1e11:
                    time_precision = 'ms'

                # FIXME: Is this a reasonable default?
                else:
                    time_precision = 's'

                chunk['time_precision'] = time_precision
            """
            # FIXME: Breaks CSV data acquisition. Why?
            if isinstance(chunk['time'], datetime.datetime):
                if chunk['time'].microsecond == 0:
                    chunk['time_precision'] = 's'
            """
        """
        Prevent errors like
        ERROR: InfluxDBClientError: 400:
                       write failed: field type conflict:
                       input field "pitch" on measurement "01_position" is type float64, already exists as type integer
        """
        self.data_to_float(data)

        assert data, 'Data payload is empty'

        chunk["fields"] = data

        return chunk
Exemple #4
0
    def format_chunk(self, meta, data):
        """
        Format for InfluxDB >= 0.9::
        {
            "measurement": "hiveeyes_100",
            "tags": {
                "host": "server01",
                "region": "europe"
            },
            "time": "2015-10-17T19:30:00Z",
            "fields": {
                "value": 0.42
            }
        }
        """

        assert isinstance(data, dict), 'Data payload is not a dictionary'

        chunk = {
            "measurement": meta['measurement'],
            "tags": {},
        }

        """
        if "gateway" in meta:
            chunk["tags"]["gateway"] = meta["gateway"]

        if "node" in meta:
            chunk["tags"]["node"]    = meta["node"]
        """

        # Extract timestamp field from data
        chunk['time_precision'] = 'n'
        for time_field in ['time', 'datetime', 'dateTime']:
            if time_field in data:

                # WeeWX. TODO: Move to specific vendor configuration.
                # Disabled in favor of precision detection heuristic.
                #if time_field == 'dateTime':
                #    chunk['time_precision'] = 's'

                # Process timestamp field.
                if data[time_field]:

                    # Decode timestamp.
                    chunk['time'] = data[time_field]
                    if is_number(chunk['time']):
                        chunk['time'] = int(float(chunk['time']))

                    # Remove timestamp from data payload.
                    del data[time_field]

                    # If we found a timestamp field already,
                    # don't look out for more.
                    break

        # Extract geohash from data. Finally, thanks Rich!
        # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
        if "geohash" in data:
            chunk["tags"]["geohash"] = data["geohash"]
            del data['geohash']

        # Extract more information specific to luftdaten.info
        for field in ['location', 'location_id', 'location_name', 'sensor_id', 'sensor_type']:
            if field in data:
                chunk["tags"][field] = data[field]
                del data[field]

        # TODO: Maybe do this at data acquisition / transformation time, not here.
        if 'time' in chunk:
            timestamp = chunk['time'] = parse_timestamp(chunk['time'])

            # Heuristically compute timestamp precision
            if isinstance(timestamp, int):
                if timestamp >= 1e17 or timestamp <= -1e17:
                    time_precision = 'n'
                elif timestamp >= 1e14 or timestamp <= -1e14:
                    time_precision = 'u'
                elif timestamp >= 1e11 or timestamp <= -1e11:
                    time_precision = 'ms'

                # FIXME: Is this a reasonable default?
                else:
                    time_precision = 's'

                chunk['time_precision'] = time_precision

            """
            # FIXME: Breaks CSV data acquisition. Why?
            if isinstance(chunk['time'], datetime.datetime):
                if chunk['time'].microsecond == 0:
                    chunk['time_precision'] = 's'
            """

        """
        Prevent errors like
        ERROR: InfluxDBClientError: 400:
                       write failed: field type conflict:
                       input field "pitch" on measurement "01_position" is type float64, already exists as type integer
        """
        self.data_to_float(data)

        assert data, 'Data payload is empty'

        chunk["fields"] = data

        return chunk
Exemple #5
0
    def format_chunk(self, meta, data):
        """
        Format for InfluxDB >= 0.9::
        {
            "measurement": "hiveeyes_100",
            "tags": {
                "host": "server01",
                "region": "europe"
            },
            "time": "2015-10-17T19:30:00Z",
            "fields": {
                "value": 0.42
            }
        }
        """

        assert isinstance(data, dict), 'Data payload is not a dictionary'

        chunk = {
            "measurement": meta['measurement'],
            "tags": {},
        }
        """
        if "gateway" in meta:
            chunk["tags"]["gateway"] = meta["gateway"]

        if "node" in meta:
            chunk["tags"]["node"]    = meta["node"]
        """

        # Extract timestamp field from data
        chunk['time_precision'] = 'n'
        for time_field in ['time', 'dateTime']:
            if time_field in data:

                if data[time_field]:
                    chunk['time'] = data[time_field]
                    if is_number(chunk['time']):
                        chunk['time'] = int(float(chunk['time']))

                # WeeWX. TODO: Move to specific vendor configuration.
                if time_field == 'dateTime':
                    chunk['time_precision'] = 's'

                del data[time_field]

        # Extract geohash from data. Finally, thanks Rich!
        # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
        if "geohash" in data:
            chunk["tags"]["geohash"] = data["geohash"]
            del data['geohash']

        # Extract more information specific to luftdaten.info
        for field in [
                'location', 'location_id', 'location_name', 'sensor_id',
                'sensor_type'
        ]:
            if field in data:
                chunk["tags"][field] = data[field]
                del data[field]

        # TODO: Maybe do this at data acquisition / transformation time, not here.
        if 'time' in chunk:
            chunk['time'] = parse_timestamp(chunk['time'])
            """
            # FIXME: Breaks CSV data acquisition. Why?
            if isinstance(chunk['time'], datetime.datetime):
                if chunk['time'].microsecond == 0:
                    chunk['time_precision'] = 's'
            """
        """
        Prevent errors like
        ERROR: InfluxDBClientError: 400:
                       write failed: field type conflict:
                       input field "pitch" on measurement "01_position" is type float64, already exists as type integer
        """
        self.data_to_float(data)

        chunk["fields"] = data

        return chunk