Esempio n. 1
0
def create_client() -> InfluxDBClient:
    """
    Create a client for the Influx database.
    Includes connection test and database and retention policy creation.

    :return: an instance of InfluxDBClient.
    """
    client = InfluxDBClient(host=INFLUXDB_HOST,
                            port=INFLUXDB_PORT,
                            username=INFLUXDB_USERNAME,
                            password=INFLUXDB_PASSWORD,
                            database=INFLUXDB_DATABASE)

    # Test connection
    try:
        client.ping()
    except requests.exceptions.ConnectionError:
        _logger.error('Connection to InfluxDB was refused!')
        exit(code=1)  # TODO replace with backoff retry

    # Create database (will pass if already exists)
    client.create_database(INFLUXDB_DATABASE)

    # Set retention policy on database
    client.create_retention_policy(name=RETENTION_NAME,
                                   duration=RETENTION_DURATION,
                                   replication=RETENTION_REPLICATION,
                                   database=INFLUXDB_DATABASE,
                                   default=True)

    _logger.info("InfluxDB client created successfully")

    return client
Esempio n. 2
0
class InfluxDBAdapter(object):
    def __init__(self, settings=None, database=None):

        settings = deepcopy(settings) or {}
        settings.setdefault('host', u'localhost')
        settings.setdefault('port', u'8086')
        settings.setdefault('username', u'root')
        settings.setdefault('password', u'root')
        settings.setdefault('database', database)

        settings.setdefault('use_udp', False)
        settings.setdefault('udp_port', u'4444')

        settings['port'] = int(settings['port'])
        settings['udp_port'] = int(settings['udp_port'])

        self.__dict__.update(**settings)

        # Bookeeping for all databases having been touched already
        self.databases_written_once = Set()

        # Knowledge about all databases to be accessed using UDP
        # TODO: Refactor to configuration setting
        self.udp_databases = [
            {
                'name': 'luftdaten_info',
                'port': u'4445'
            },
        ]
        self.host_uri = u'influxdb://{host}:{port}'.format(**self.__dict__)

        log.info(u'Storage target is {uri}', uri=self.host_uri)
        self.influx_client = InfluxDBClient(host=self.host,
                                            port=self.port,
                                            username=self.username,
                                            password=self.password,
                                            database=self.database,
                                            timeout=10)

        # TODO: Hold references to multiple UDP databases using mapping "self.udp_databases".
        self.influx_client_udp = None
        if settings['use_udp']:
            self.influx_client_udp = InfluxDBClient(
                host=self.host,
                port=self.port,
                username=self.username,
                password=self.password,
                use_udp=settings['use_udp'],
                udp_port=settings['udp_port'],
                timeout=10)

    def is_udp_database(self, name):
        for entry in self.udp_databases:
            if entry['name'] == name:
                return True
        return False

    def write(self, meta, data):

        meta_copy = deepcopy(dict(meta))
        data_copy = deepcopy(data)

        try:
            chunk = self.format_chunk(meta, data)

        except Exception as ex:
            log.failure(
                u'Could not format chunk (ex={ex_name}: {ex}): data={data}, meta={meta}',
                ex_name=ex.__class__.__name__,
                ex=ex,
                meta=meta_copy,
                data=data_copy)
            raise

        try:
            success = self.write_chunk(meta, chunk)
            return success

        except requests.exceptions.ConnectionError as ex:
            log.failure(u'Problem connecting to InfluxDB at {uri}: {ex}',
                        uri=self.host_uri,
                        ex=ex)
            raise

        except InfluxDBClientError as ex:

            if ex.code == 404 or ex.message == 'database not found':

                log.info('Creating database "{database}"',
                         database=meta.database)
                self.influx_client.create_database(meta.database)

                # Attempt second write
                success = self.write_chunk(meta, chunk)
                return success

                #log.failure('InfluxDBClientError: {ex}', ex=ex)

            # [0.8] ignore "409: database kotori-dev exists"
            # [0.9] ignore "database already exists"
            elif ex.code == 409 or ex.message == 'database already exists':
                pass
            else:
                raise

    def write_chunk(self, meta, chunk):
        if self.influx_client_udp and self.is_udp_database(
                meta.database
        ) and meta.database in self.databases_written_once:
            success = self.influx_client_udp.write_points(
                [chunk], time_precision='s', database=meta.database)
        else:
            success = self.influx_client.write_points(
                [chunk],
                time_precision=chunk['time_precision'],
                database=meta.database)
            self.databases_written_once.add(meta.database)
        if success:
            log.debug(u"Storage success: {chunk}", chunk=chunk)
        else:
            log.error(u"Storage failed:  {chunk}", chunk=chunk)
        return success

    @staticmethod
    def get_tags(data):
        return project(data, ['gateway', 'node'])

    def format_chunk(self, meta, data):
        """
        Format for InfluxDB >= 0.9::
        {
            "measurement": "hiveeyes_100",
            "tags": {
                "host": "server01",
                "region": "europe"
            },
            "time": "2015-10-17T19:30:00Z",
            "fields": {
                "value": 0.42
            }
        }
        """

        assert isinstance(data, dict), 'Data payload is not a dictionary'

        chunk = {
            "measurement": meta['measurement'],
            "tags": {},
        }
        """
        if "gateway" in meta:
            chunk["tags"]["gateway"] = meta["gateway"]

        if "node" in meta:
            chunk["tags"]["node"]    = meta["node"]
        """

        # Extract timestamp field from data
        chunk['time_precision'] = 'n'
        for time_field in ['time', 'datetime', 'dateTime']:
            if time_field in data:

                # WeeWX. TODO: Move to specific vendor configuration.
                # Disabled in favor of precision detection heuristic.
                #if time_field == 'dateTime':
                #    chunk['time_precision'] = 's'

                # Process timestamp field.
                if data[time_field]:

                    # Decode timestamp.
                    chunk['time'] = data[time_field]
                    if is_number(chunk['time']):
                        chunk['time'] = int(float(chunk['time']))

                    # Remove timestamp from data payload.
                    del data[time_field]

                    # If we found a timestamp field already,
                    # don't look out for more.
                    break

        # Extract geohash from data. Finally, thanks Rich!
        # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
        if "geohash" in data:
            chunk["tags"]["geohash"] = data["geohash"]
            del data['geohash']

        if "latitude" in data and "longitude" in data:
            chunk["tags"]["latitude"] = data["latitude"]
            chunk["tags"]["longitude"] = data["longitude"]
            del data['latitude']
            del data['longitude']

        # Extract more information specific to luftdaten.info
        for field in [
                'location', 'location_id', 'location_name', 'sensor_id',
                'sensor_type'
        ]:
            if field in data:
                chunk["tags"][field] = data[field]
                del data[field]

        # TODO: Maybe do this at data acquisition / transformation time, not here.
        if 'time' in chunk:
            timestamp = chunk['time'] = parse_timestamp(chunk['time'])

            # Heuristically compute timestamp precision
            if isinstance(timestamp, int):
                if timestamp >= 1e17 or timestamp <= -1e17:
                    time_precision = 'n'
                elif timestamp >= 1e14 or timestamp <= -1e14:
                    time_precision = 'u'
                elif timestamp >= 1e11 or timestamp <= -1e11:
                    time_precision = 'ms'

                # FIXME: Is this a reasonable default?
                else:
                    time_precision = 's'

                chunk['time_precision'] = time_precision
            """
            # FIXME: Breaks CSV data acquisition. Why?
            if isinstance(chunk['time'], datetime.datetime):
                if chunk['time'].microsecond == 0:
                    chunk['time_precision'] = 's'
            """
        """
        Prevent errors like
        ERROR: InfluxDBClientError: 400:
                       write failed: field type conflict:
                       input field "pitch" on measurement "01_position" is type float64, already exists as type integer
        """
        self.data_to_float(data)

        assert data, 'Data payload is empty'

        chunk["fields"] = data

        return chunk

    def data_to_float(self, data):
        return convert_floats(data)

        for key, value in data.iteritems():

            # Sanity checks
            if type(value) in types.StringTypes:
                continue

            if value is None:
                data[key] = None
                continue

            # Convert to float
            try:
                data[key] = float(value)
            except (TypeError, ValueError) as ex:
                log.warn(
                    u'Measurement "{key}: {value}" float conversion failed: {ex}',
                    key=key,
                    value=value,
                    ex=ex)
Esempio n. 3
0
class InfluxDBAdapter(object):

    def __init__(self, settings=None, database=None):

        settings = deepcopy(settings) or {}
        settings.setdefault('host', u'localhost')
        settings.setdefault('port', u'8086')
        settings.setdefault('username', u'root')
        settings.setdefault('password', u'root')
        settings.setdefault('database', database)

        settings.setdefault('use_udp', False)
        settings.setdefault('udp_port', u'4444')

        settings['port'] = int(settings['port'])
        settings['udp_port'] = int(settings['udp_port'])

        self.__dict__.update(**settings)

        # Bookeeping for all databases having been touched already
        self.databases_written_once = Set()

        # Knowledge about all databases to be accessed using UDP
        # TODO: Refactor to configuration setting
        self.udp_databases = [
            {'name': 'luftdaten_info', 'port': u'4445'},
        ]
        self.host_uri = u'influxdb://{host}:{port}'.format(**self.__dict__)

        log.info(u'Storage target is {uri}', uri=self.host_uri)
        self.influx_client = InfluxDBClient(
            host=self.host, port=self.port,
            username=self.username, password=self.password,
            database=self.database,
            timeout=10)

        # TODO: Hold references to multiple UDP databases using mapping "self.udp_databases".
        self.influx_client_udp = None
        if settings['use_udp']:
            self.influx_client_udp = InfluxDBClient(
                host=self.host, port=self.port,
                username=self.username, password=self.password,
                use_udp=settings['use_udp'], udp_port=settings['udp_port'],
                timeout=10)

    def is_udp_database(self, name):
        for entry in self.udp_databases:
            if entry['name'] == name:
                return True
        return False

    def write(self, meta, data):

        meta_copy = deepcopy(dict(meta))
        data_copy = deepcopy(data)

        try:
            chunk = self.format_chunk(meta, data)

        except Exception as ex:
            log.failure(u'Could not format chunk (ex={ex_name}: {ex}): data={data}, meta={meta}',
                ex_name=ex.__class__.__name__, ex=ex, meta=meta_copy, data=data_copy)
            raise

        try:
            success = self.write_chunk(meta, chunk)
            return success

        except requests.exceptions.ConnectionError as ex:
            log.failure(u'Problem connecting to InfluxDB at {uri}: {ex}', uri=self.host_uri, ex=ex)
            raise

        except InfluxDBClientError as ex:

            if ex.code == 404 or ex.message == 'database not found':

                log.info('Creating database "{database}"', database=meta.database)
                self.influx_client.create_database(meta.database)

                # Attempt second write
                success = self.write_chunk(meta, chunk)
                return success

                #log.failure('InfluxDBClientError: {ex}', ex=ex)

            # [0.8] ignore "409: database kotori-dev exists"
            # [0.9] ignore "database already exists"
            elif ex.code == 409 or ex.message == 'database already exists':
                pass
            else:
                raise

    def write_chunk(self, meta, chunk):
        if self.influx_client_udp and self.is_udp_database(meta.database) and meta.database in self.databases_written_once:
            success = self.influx_client_udp.write_points([chunk], time_precision='s', database=meta.database)
        else:
            success = self.influx_client.write_points([chunk], time_precision=chunk['time_precision'], database=meta.database)
            self.databases_written_once.add(meta.database)
        if success:
            log.debug(u"Storage success: {chunk}", chunk=chunk)
        else:
            log.error(u"Storage failed:  {chunk}", chunk=chunk)
        return success

    @staticmethod
    def get_tags(data):
        return project(data, ['gateway', 'node'])

    def format_chunk(self, meta, data):
        """
        Format for InfluxDB >= 0.9::
        {
            "measurement": "hiveeyes_100",
            "tags": {
                "host": "server01",
                "region": "europe"
            },
            "time": "2015-10-17T19:30:00Z",
            "fields": {
                "value": 0.42
            }
        }
        """

        assert isinstance(data, dict), 'Data payload is not a dictionary'

        chunk = {
            "measurement": meta['measurement'],
            "tags": {},
        }

        """
        if "gateway" in meta:
            chunk["tags"]["gateway"] = meta["gateway"]

        if "node" in meta:
            chunk["tags"]["node"]    = meta["node"]
        """

        # Extract timestamp field from data
        chunk['time_precision'] = 'n'
        for time_field in ['time', 'datetime', 'dateTime']:
            if time_field in data:

                # WeeWX. TODO: Move to specific vendor configuration.
                # Disabled in favor of precision detection heuristic.
                #if time_field == 'dateTime':
                #    chunk['time_precision'] = 's'

                # Process timestamp field.
                if data[time_field]:

                    # Decode timestamp.
                    chunk['time'] = data[time_field]
                    if is_number(chunk['time']):
                        chunk['time'] = int(float(chunk['time']))

                    # Remove timestamp from data payload.
                    del data[time_field]

                    # If we found a timestamp field already,
                    # don't look out for more.
                    break

        # Extract geohash from data. Finally, thanks Rich!
        # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
        if "geohash" in data:
            chunk["tags"]["geohash"] = data["geohash"]
            del data['geohash']

        # Extract more information specific to luftdaten.info
        for field in ['location', 'location_id', 'location_name', 'sensor_id', 'sensor_type']:
            if field in data:
                chunk["tags"][field] = data[field]
                del data[field]

        # TODO: Maybe do this at data acquisition / transformation time, not here.
        if 'time' in chunk:
            timestamp = chunk['time'] = parse_timestamp(chunk['time'])

            # Heuristically compute timestamp precision
            if isinstance(timestamp, int):
                if timestamp >= 1e17 or timestamp <= -1e17:
                    time_precision = 'n'
                elif timestamp >= 1e14 or timestamp <= -1e14:
                    time_precision = 'u'
                elif timestamp >= 1e11 or timestamp <= -1e11:
                    time_precision = 'ms'

                # FIXME: Is this a reasonable default?
                else:
                    time_precision = 's'

                chunk['time_precision'] = time_precision

            """
            # FIXME: Breaks CSV data acquisition. Why?
            if isinstance(chunk['time'], datetime.datetime):
                if chunk['time'].microsecond == 0:
                    chunk['time_precision'] = 's'
            """

        """
        Prevent errors like
        ERROR: InfluxDBClientError: 400:
                       write failed: field type conflict:
                       input field "pitch" on measurement "01_position" is type float64, already exists as type integer
        """
        self.data_to_float(data)

        assert data, 'Data payload is empty'

        chunk["fields"] = data

        return chunk

    def data_to_float(self, data):
        return convert_floats(data)

        for key, value in data.iteritems():

            # Sanity checks
            if type(value) in types.StringTypes:
                continue

            if value is None:
                data[key] = None
                continue

            # Convert to float
            try:
                data[key] = float(value)
            except (TypeError, ValueError) as ex:
                log.warn(u'Measurement "{key}: {value}" float conversion failed: {ex}', key=key, value=value, ex=ex)