Example #1
0
def _validate_payload(payload):
    """
    :param payload:
        The received json data in the notification.
    :return: str | None
        Error message, if any.

    Note that some attributes are actually verified by Connexion framework (
    e.g type and id). We leave the checks as double-checking.
    """
    # The entity must be uniquely identifiable
    if 'type' not in payload:
        return 'Entity type is required in notifications'

    if 'id' not in payload:
        return 'Entity id is required in notifications'

    # There should be at least one attribute other than id and type
    # (i.e, the changed value)
    attrs = list(iter_entity_attrs(payload))
    if len(attrs) == 0:
        log().warning(
            "Received notification containing an entity update without attributes "
            + "other than 'type' and 'id'")

    # Attributes should have a value and the modification time
    for attr in attrs:
        if not has_value(payload, attr):
            payload[attr].update({'value': None})
            log().warning(
                'An entity update is missing value for attribute {}'.format(
                    attr))
Example #2
0
def _validate_payload(payload):
    """
    :param payload:
        The received json data in the notification.
    :return: str | None
        Error message, if any.
    """
    # The entity must be uniquely identifiable
    if 'type' not in payload:
        return 'Entity type is required in notifications'

    # TODO: State that pattern-based ids or types are not yet supported.
    if 'id' not in payload:
        return 'Entity id is required in notifications'

    # There must be at least one attribute other than id and type (i.e, the changed value)
    attrs = list(iter_entity_attrs(payload))
    if len(attrs) == 0:
        return 'Received notification without attributes other than "type" and "id"'

    # Attributes must have a value and the modification time
    for attr in attrs:
        if 'value' not in payload[attr] or payload[attr]['value'] == '':
            return 'Payload is missing value for attribute {}'.format(attr)

        if 'dateModified' not in payload[attr]['metadata']:
            warnings.warn("Attribute '{}' did not include a dateModified. Assuming notification arrival time.".format(attr))
Example #3
0
def _validate_payload(payload):
    """
    :param payload:
        The received json data in the notification.
    :return: str | None
        Error message, if any.

    Note that some attributes are actually verified by Connexion framework (
    e.g type and id). We leave the checks as double-checking.
    """
    # The entity must be uniquely identifiable
    if 'type' not in payload:
        return 'Entity type is required in notifications'

    if 'id' not in payload:
        return 'Entity id is required in notifications'

    # There must be at least one attribute other than id and type
    # (i.e, the changed value)
    attrs = list(iter_entity_attrs(payload))
    if len(attrs) == 0:
        return "Received notification without attributes " \
               "other than 'type' and 'id'"

    # Attributes must have a value and the modification time
    for attr in attrs:
        if 'value' not in payload[attr] or payload[attr]['value'] == '':
            return 'Payload is missing value for attribute {}'.format(attr)
Example #4
0
def _get_time_index(payload):
    """
    :param payload:
        The received json data in the notification.

    :return: str
        The notification time index. E.g: '2017-06-29T14:47:50.844'

    The strategy for now is simple. Received notifications are expected to have
    the dateModified field
    (http://docs.orioncontextbroker.apiary.io/#introduction/specification/virtual-attributes)
    If the notification lacks this attribute, we try using the "latest" of the
    modification times of any of the attributes in the notification. If there
    isn't any, the notification received time will be assumed.

    In future, this could be enhanced with customs notifications where user
    specifies which attribute is to be used as "time index".
    """
    if 'dateModified' in payload:
        return payload['dateModified']['value']

    # Orion did not include dateModified at the entity level.
    # Let's use the newest of the changes in any of the attributes.
    dates = set([])
    for attr in iter_entity_attrs(payload):
        if 'dateModified' in payload[attr].get('metadata', {}):
            dates.add(payload[attr]['metadata']['dateModified']['value'])
    if dates:
        return sorted(dates)[-1]

    # Finally, assume current timestamp as dateModified
    return datetime.now().isoformat()
Example #5
0
def _get_time_index(payload):
    """
    :param payload:
        The received json data in the notification.

    :return: str
        The notification time index. E.g: '2017-06-29T14:47:50.844'

    The strategy for now is simple. Received notifications are expected to have
    the dateModified field
    (http://docs.orioncontextbroker.apiary.io/#introduction/specification/virtual-attributes)
    If the notification lacks this attribute, the received time will be assumed.

    In future, this could be enhanced with customs notifications where user
    specifies which attribute is to be used as "time index".
    """
    if 'dateModified' in payload:
        return payload['dateModified']

    for attr in iter_entity_attrs(payload):
        if 'dateModified' in payload[attr].get('metadata', {}):
            return payload[attr]['metadata']['dateModified']['value']

    # Assume current timestamp as dateModified
    return datetime.now().isoformat()
Example #6
0
def _filter_no_type_no_value_entities(payload):
    attrs = list(iter_entity_attrs(payload))
    attrs.remove('time_index')
    for i in attrs:
        attr = payload.get(i, {})
        attr_value = attr.get('value', None)
        attr_type = attr.get('type', None)
        if not attr_type and not attr_value:
            del payload[i]

    return payload
Example #7
0
def _filter_empty_entities(payload):
    log().debug('Received payload: {}'.format(payload))
    attrs = list(iter_entity_attrs(payload))
    empty = False
    attrs.remove('time_index')
    for j in attrs:
        value = payload[j]['value']
        if isinstance(value, int) and value is not None:
            empty = True
        elif value:
            empty = True
    if empty:
        return payload
    else:
        return None
Example #8
0
def _filter_no_type_no_value_entities(payload):
    attrs = list(iter_entity_attrs(payload))
    attrs.remove('time_index')
    for i in attrs:
        attr = payload.get(i, {})
        try:
            attr_value = attr.get('value', None)
            attr_type = attr.get('type', None)
            if not attr_type and not attr_value:
                del payload[i]
        # remove attributes without value or type
        except Exception as e:
            del payload[i]

    return payload
Example #9
0
def check_notifications_record(notifications, records):
    """
    Check that the given NGSI notifications like those sent by Orion
    (Translator Input) are correctly transformed to a set of records
    (Translator Output).
    """
    from translators.sql_translator import NGSI_DATETIME, NGSI_ISO8601
    assert len(notifications) > 0
    assert len(records) == 1

    record = records[0]
    expected_type = record['type']
    expected_id = record['id']

    # all notifications and records should have same type and id
    assert all(map(lambda x: x['type'] == expected_type, notifications))
    assert all(map(lambda x: x['id'] == expected_id, notifications))

    index = [n[TIME_INDEX_NAME] for n in notifications]
    assert_equal_time_index_arrays(index, record['index'])

    for a in iter_entity_attrs(record):
        if a == 'index':
            continue

        r_values = record[a]['values']
        # collect values for the attribute a from the entities in the notification
        # if an entity does not have a value for the attribute use None
        n_values = [e[a]['value'] if a in e else None for e in notifications]

        if any(isinstance(x, float) for x in n_values):
            assert pytest.approx(r_values, n_values)
        else:
            if record[a].get('type', None) in (NGSI_DATETIME, NGSI_ISO8601):
                assert_equal_time_index_arrays(r_values, n_values)
            else:
                assert r_values == n_values, "{} != {}".format(
                    r_values, n_values)
Example #10
0
def _iter_date_modified_in_metadata(notification: dict) \
        -> Iterable[MaybeString]:
    for attr_name in iter_entity_attrs(notification):
        yield _meta_date_modified_attribute(notification, attr_name)
Example #11
0
def _iter_metadata(notification: dict, meta_name: str) -> Iterable[MaybeString]:
    for attr_name in iter_entity_attrs(notification):
        yield _meta_attribute(notification, attr_name, meta_name)
Example #12
0
    def _insert_entities_of_type(self,
                                 entity_type,
                                 entities,
                                 fiware_service=None,
                                 fiware_servicepath=None):
        # All entities must be of the same type and have a time index
        for e in entities:
            if e[NGSI_TYPE] != entity_type:
                msg = "Entity {} is not of type {}."
                raise ValueError(msg.format(e[NGSI_ID], entity_type))

            if self.TIME_INDEX_NAME not in e:
                import warnings
                msg = "Translating entity without TIME_INDEX. " \
                      "It should have been inserted by the 'Reporter'. {}"
                warnings.warn(msg.format(e))
                now_iso = datetime.now().isoformat(timespec='milliseconds')
                e[self.TIME_INDEX_NAME] = now_iso

        # Define column types
        # {column_name -> pg_column_type}
        table = {
            'entity_id': NGSI_TO_PG['Text'],
            'entity_type': NGSI_TO_PG['Text'],
            self.TIME_INDEX_NAME: PG_TIME_INDEX,
            FIWARE_SERVICEPATH: NGSI_TO_PG['Text']
        }

        # Preserve original attr names and types
        # {column_name -> (attr_name, attr_type)}
        original_attrs = {
            'entity_type': (NGSI_TYPE, NGSI_TEXT),
            'entity_id': (NGSI_ID, NGSI_TEXT),
            self.TIME_INDEX_NAME: (self.TIME_INDEX_NAME, NGSI_DATETIME)
        }

        for e in entities:
            for attr in iter_entity_attrs(e):
                if attr == self.TIME_INDEX_NAME:
                    continue

                if isinstance(e[attr], dict) and 'type' in e[attr]:
                    attr_t = e[attr]['type']
                else:
                    # Won't guess the type if used did't specify the type.
                    attr_t = NGSI_TEXT

                original_attrs[attr] = (attr, attr_t)

                if attr_t not in NGSI_TO_PG:
                    # if attribute is complex assume NGSI StructuredValue
                    if self._attr_is_structured(e[attr]):
                        table[attr] = NGSI_TO_PG[NGSI_STRUCTURED_VALUE]
                    else:
                        supported_types = ', '.join(NGSI_TO_PG.keys())
                        msg = ("'{}' is not a supported NGSI type. "
                               "Please use any of the following: {}. "
                               "Falling back to {}.")
                        self.logger.warning(msg.format(
                            attr_t, supported_types, NGSI_TEXT))

                        table[attr] = NGSI_TO_PG[NGSI_TEXT]

                else:
                    pg_t = NGSI_TO_PG[attr_t]

                    # Github issue 24: StructuredValue == object or array
                    is_list = isinstance(e[attr].get('value', None), list)
                    if attr_t == NGSI_STRUCTURED_VALUE and is_list:
                        pg_t = PG_JSON_ARRAY

                    table[attr] = pg_t

        # Create/Update metadata table for this type
        table_name = self._et2tn(entity_type, fiware_service)
        self._update_metadata_table(table_name, original_attrs)
        self.conn.commit()

        # Sort out data table, including schema, hyper-table and any
        # new columns.
        self._prepare_data_table(table_name, table, fiware_service)
        self.conn.commit()

        # Gather attribute values
        col_names = sorted(table.keys())
        entries = []  # raw values in same order as column names
        for e in entities:
            values = self._preprocess_values(e, table, col_names,
                                             fiware_servicepath)
            entries.append(values)

        # Insert entities data
        p1 = table_name
        p2 = ', '.join(['"{}"'.format(c.lower()) for c in col_names])
        p3 = ','.join(['?'] * len(col_names))
        stmt = "insert into {} ({}) values ({})".format(p1, p2, p3)
        self.cursor.executemany(stmt, entries)
        self.conn.commit()

        return self.cursor
Example #13
0
def _iter_time_instant_in_metadata(notification: dict) -> Iterable[MaybeString]:
    for attr_name in iter_entity_attrs(notification):
        yield _meta_time_instant_attribute(notification, attr_name)
Example #14
0
    def _insert_entities_of_type(self,
                                 entity_type,
                                 entities,
                                 fiware_service=None,
                                 fiware_servicepath=None):
        # All entities must be of the same type and have a time index
        # Also, an entity can't have an attribute with the same name
        # as that specified by ORIGINAL_ENTITY_COL_NAME.
        for e in entities:
            if e[NGSI_TYPE] != entity_type:
                msg = "Entity {} is not of type {}."
                raise ValueError(msg.format(e[NGSI_ID], entity_type))

            if self.TIME_INDEX_NAME not in e:
                import warnings
                msg = "Translating entity without TIME_INDEX. " \
                      "It should have been inserted by the 'Reporter'. {}"
                warnings.warn(msg.format(e))
                e[self.TIME_INDEX_NAME] = current_timex()

            if ORIGINAL_ENTITY_COL in e:
                raise ValueError(
                    f"Entity {e[NGSI_ID]} has a reserved attribute name: " +
                    "'{ORIGINAL_ENTITY_COL_NAME}'")

        # Define column types
        # {column_name -> crate_column_type}
        table = {
            'entity_id': self.NGSI_TO_SQL['Text'],
            'entity_type': self.NGSI_TO_SQL['Text'],
            self.TIME_INDEX_NAME: self.NGSI_TO_SQL[TIME_INDEX],
            FIWARE_SERVICEPATH: self.NGSI_TO_SQL['Text'],
            ORIGINAL_ENTITY_COL: self.NGSI_TO_SQL[NGSI_STRUCTURED_VALUE]
        }

        # Preserve original attr names and types
        # {column_name -> (attr_name, attr_type)}
        original_attrs = {
            'entity_type': (NGSI_TYPE, NGSI_TEXT),
            'entity_id': (NGSI_ID, NGSI_TEXT),
            self.TIME_INDEX_NAME: (self.TIME_INDEX_NAME, NGSI_DATETIME),
        }

        for e in entities:
            for attr in iter_entity_attrs(e):
                if attr == self.TIME_INDEX_NAME:
                    continue

                if isinstance(e[attr], dict) and 'type' in e[attr]:
                    attr_t = e[attr]['type']
                else:
                    # Won't guess the type if user did't specify the type.
                    # TODO Guess Type!
                    attr_t = NGSI_TEXT

                col = self._ea2cn(attr)
                original_attrs[col] = (attr, attr_t)

                if attr_t not in self.NGSI_TO_SQL:
                    # if attribute is complex assume it as an NGSI StructuredValue
                    # TODO we should support type name different from NGSI types
                    # but mapping to NGSI types
                    if self._attr_is_structured(e[attr]):
                        table[col] = self.NGSI_TO_SQL[NGSI_STRUCTURED_VALUE]
                    else:
                        # TODO fallback type should be defined by actual JSON type
                        supported_types = ', '.join(self.NGSI_TO_SQL.keys())
                        msg = ("'{}' is not a supported NGSI type. "
                               "Please use any of the following: {}. "
                               "Falling back to {}.")
                        self.logger.warning(
                            msg.format(attr_t, supported_types, NGSI_TEXT))

                        table[col] = self.NGSI_TO_SQL[NGSI_TEXT]

                else:
                    # Github issue 44: Disable indexing for long string
                    sql_type = self._compute_type(attr_t, e[attr])

                    # Github issue 24: StructuredValue == object or array
                    is_list = isinstance(e[attr].get('value', None), list)
                    if attr_t == NGSI_STRUCTURED_VALUE and is_list:
                        sql_type = self.NGSI_TO_SQL['Array']

                    table[attr] = sql_type

        # Create/Update metadata table for this type
        table_name = self._et2tn(entity_type, fiware_service)
        self._update_metadata_table(table_name, original_attrs)
        # Sort out data table.
        self._prepare_data_table(table_name, table, fiware_service)

        # Gather attribute values
        col_names = sorted(table.keys())
        entries = []  # raw values in same order as column names
        for e in entities:
            values = self._preprocess_values(e, table, col_names,
                                             fiware_servicepath)
            entries.append(values)

        # Insert entities data
        self._insert_entity_rows(table_name, col_names, entries, entities)
        return self.cursor
Example #15
0
    def _insert_entities_of_type(self,
                                 entity_type,
                                 entities,
                                 fiware_service=None,
                                 fiware_servicepath=None):
        # All entities must be of the same type and have a time index
        for e in entities:
            if e[NGSI_TYPE] != entity_type:
                msg = "Entity {} is not of type {}."
                raise ValueError(msg.format(e[NGSI_ID], entity_type))

            if self.TIME_INDEX_NAME not in e:
                import warnings
                msg = "Translating entity without TIME_INDEX. " \
                      "It should have been inserted by the 'Reporter'. {}"
                warnings.warn(msg.format(e))
                now_iso = datetime.now().isoformat(timespec='milliseconds')
                e[self.TIME_INDEX_NAME] = now_iso

        # Define column types
        # {column_name -> crate_column_type}
        table = {
            'entity_id': NGSI_TO_CRATE['Text'],
            'entity_type': NGSI_TO_CRATE['Text'],
            self.TIME_INDEX_NAME: NGSI_TO_CRATE[NGSI_DATETIME],
        }

        # Preserve original attr names and types
        # {column_name -> (attr_name, attr_type)}
        original_attrs = {
            'entity_type': (NGSI_TYPE, NGSI_TEXT),
            'entity_id': (NGSI_ID, NGSI_TEXT),
            self.TIME_INDEX_NAME: (self.TIME_INDEX_NAME, NGSI_DATETIME),
        }

        for e in entities:
            for attr in iter_entity_attrs(e):
                if attr == self.TIME_INDEX_NAME:
                    continue

                if isinstance(e[attr], dict) and 'type' in e[attr]:
                    attr_t = e[attr]['type']
                else:
                    # Won't guess the type if used did't specify the type.
                    attr_t = NGSI_TEXT

                col = self._ea2cn(attr)
                original_attrs[col] = (attr, attr_t)

                if attr_t not in NGSI_TO_CRATE:
                    # if attribute is complex assume it as an NGSI StructuredValue
                    if self._attr_is_structured(e[attr]):
                        table[col] = NGSI_TO_CRATE[NGSI_STRUCTURED_VALUE]
                    else:
                        supported_types = ', '.join(NGSI_TO_CRATE.keys())
                        msg = ("'{}' is not a supported NGSI type. "
                               "Please use any of the following: {}. "
                               "Falling back to {}.")
                        self.logger.warning(
                            msg.format(attr_t, supported_types, NGSI_TEXT))

                        table[col] = NGSI_TO_CRATE[NGSI_TEXT]

                else:
                    # Github issue 44: Disable indexing for long string
                    db_version = self.get_db_version()
                    crate_t = _adjust_gh_44(attr_t, e[attr], db_version)

                    # Github issue 24: StructuredValue == object or array
                    is_list = isinstance(e[attr].get('value', None), list)
                    if attr_t == NGSI_STRUCTURED_VALUE and is_list:
                        crate_t = CRATE_ARRAY_STR

                    table[attr] = crate_t

        # Create/Update metadata table for this type
        table_name = self._et2tn(entity_type, fiware_service)
        self._update_metadata_table(table_name, original_attrs)

        # Create Data Table
        # NOTE. CrateDB identifiers (like column and table names) become case
        # sensitive when quoted like we do below in the CREATE TABLE statement.
        columns = ', '.join('"{}" {}'.format(cn.lower(), ct)
                            for cn, ct in table.items())
        stmt = "create table if not exists {} ({}) with " \
               "(number_of_replicas = '2-all')".format(table_name, columns)
        self.cursor.execute(stmt)

        # Gather attribute values
        col_names = sorted(table.keys())
        col_names.append(FIWARE_SERVICEPATH)
        entries = []  # raw values in same order as column names
        for e in entities:
            values = self._preprocess_values(e, col_names, fiware_servicepath)
            entries.append(values)

        # Insert entities data
        p1 = table_name
        p2 = ', '.join(['"{}"'.format(c.lower()) for c in col_names])
        p3 = ','.join(['?'] * len(col_names))
        stmt = "insert into {} ({}) values ({})".format(p1, p2, p3)
        self.cursor.executemany(stmt, entries)
        return self.cursor
Example #16
0
 def _postprocess_values(self, e):
     for attr in iter_entity_attrs(e):
         if 'type' in e[attr] and e[attr]['type'] == 'geo:point':
             lon, lat = e[attr]['value']
             e[attr]['value'] = "{}, {}".format(lat, lon)
     return e
Example #17
0
    def insert(self, entities, fiware_service=None, fiware_servicepath=None):
        if not isinstance(entities, list):
            msg = "Entities expected to be of type list, but got {}"
            raise TypeError(msg.format(type(entities)))

        tables = {}  # {table_name -> {column_name -> crate_column_type}}
        entities_by_tn = {}  # {table_name -> list(entities)}
        custom_columns = {}  # {table_name -> attr_name -> custom_column}

        # Collect tables info
        for e in entities:
            tn = self._et2tn(e['type'], fiware_service)

            table = tables.setdefault(tn, {})
            entities_by_tn.setdefault(tn, []).append(e)

            if self.TIME_INDEX_NAME not in e:
                import warnings
                msg = "Translating entity without TIME_INDEX. " \
                      "It should have been inserted by the 'Reporter'. {}"
                warnings.warn(msg.format(e))
                e[self.TIME_INDEX_NAME] = datetime.now().isoformat()

            # Intentionally avoid using 'id' and 'type' as a column names.
            # It's problematic for some dbs.
            table['entity_id'] = NGSI_TO_CRATE['Text']
            table['entity_type'] = NGSI_TO_CRATE['Text']
            for attr in iter_entity_attrs(e):
                if attr == self.TIME_INDEX_NAME:
                    table[self.TIME_INDEX_NAME] = NGSI_TO_CRATE['DateTime']
                else:
                    ngsi_t = e[attr]['type'] if 'type' in e[attr] else NGSI_TEXT
                    if ngsi_t not in NGSI_TO_CRATE:
                        msg = ("'{}' is not a supported NGSI type. "
                               "Please use any of the following: {}. "
                               "Falling back to {}.").format(
                                   ngsi_t, ", ".join(NGSI_TO_CRATE.keys()),
                                   NGSI_TEXT)
                        self.logger.warning(msg)
                        # Keep the original type to be saved in the metadata
                        # table, but switch to TEXT for crate column.
                        table[attr] = ngsi_t
                    else:
                        crate_t = NGSI_TO_CRATE[ngsi_t]
                        # Github issue 44: Disable indexing for long string
                        if ngsi_t == NGSI_TEXT and \
                           len(e[attr]['value']) > 32765:
                            custom_columns.setdefault(tn, {})[attr] = crate_t \
                              + ' INDEX OFF'

                        # Github issue 24: StructuredValue == object or array
                        if ngsi_t == NGSI_STRUCTURED_VALUE and \
                                isinstance(e[attr].get('value', None), list):
                            crate_t = CRATE_ARRAY_STR

                        table[attr] = crate_t

        persisted_metadata = self._process_metadata_table(tables.keys())
        new_metadata = {}

        # Create data tables
        for tn, table in tables.items():
            # Preserve original attr names and types
            original_attrs = {
                'entity_type': (NGSI_TYPE, NGSI_TEXT),
                'entity_id': (NGSI_ID, NGSI_TEXT),
            }
            for attr, t in table.items():
                if t not in CRATE_TO_NGSI:
                    original_attrs[attr.lower()] = (attr, t)
                    # Having persisted original types in metadata, weird types
                    # fall back to string for crate.
                    table[attr] = NGSI_TO_CRATE[NGSI_TEXT]
                else:
                    if attr not in ('entity_type', 'entity_id'):
                        original_attrs[attr.lower()] = (attr, CRATE_TO_NGSI[t])
                        if isinstance(e[attr], dict) and e[attr].get(
                                'type', None) == NGSI_ISO8601:
                            original_attrs[attr.lower()] = (attr, NGSI_ISO8601)
            new_metadata[tn] = original_attrs

            # Apply custom column modifiers
            for _attr_name, cc in custom_columns.setdefault(tn, {}).items():
                table[_attr_name] = cc

            # Now create data table
            columns = ', '.join('{} {}'.format(cn, ct)
                                for cn, ct in table.items())
            stmt = "create table if not exists {} ({}) with " \
                   "(number_of_replicas = '2-all')".format(tn, columns)
            self.cursor.execute(stmt)

        # Update metadata if necessary
        self._update_metadata_table(tables.keys(), persisted_metadata,
                                    new_metadata)

        # Populate data tables
        for tn, entities in entities_by_tn.items():
            col_names = sorted(tables[tn].keys())
            col_names.append(FIWARE_SERVICEPATH)

            entries = []  # raw values in same order as column names
            for e in entities:
                values = self._preprocess_values(e, col_names,
                                                 fiware_servicepath)
                entries.append(values)

            stmt = "insert into {} ({}) values ({})".format(
                tn, ', '.join(col_names), ('?,' * len(col_names))[:-1])
            self.cursor.executemany(stmt, entries)

        return self.cursor