def _define_types(ks_name, raw_cf_name):
    # Define a TextField type to analyze texts (tokenizer, ascii, etc.)
    try:
        execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                " ADD types.fieldType[@name='TextField',"
                " @class='org.apache.solr.schema.TextField']"
                " WITH {2};".format(ks_name, raw_cf_name,
                                    TEXT_SEARCH_JSON_SNIPPED))
    except Exception as ex:
        _logger.warning("Maybe te field type has been already"
                        " defined in the schema. Cause: {}".format(ex))
        pass

    # Define a Point and LineString types for geospatial queries
    try:
        """
        <fieldType name="LocationField"
               class="solr.SpatialRecursivePrefixTreeFieldType"
               geo="false"
               worldBounds="ENVELOPE(-1000, 1000, 1000, -1000)"
               maxDistErr="0.001"
               units="degrees" />
               """
        execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                " ADD types.fieldType[@name='LocationField',"
                " @class='solr.SpatialRecursivePrefixTreeFieldType',"
                " @geo='false',"
                " @worldBounds='ENVELOPE(-1000, 1000, 1000, -1000)',"
                " @maxDistErr='0.001',"
                " @units='degrees'];".format(ks_name, raw_cf_name))
    except Exception as ex:
        _logger.warning("Maybe te field type has been already"
                        " defined in the schema. Cause: {}".format(ex))
        pass
def _process_field(ks_name,
                   table_name,
                   field_name,
                   field_type="StrField",
                   indexed=True,
                   stored=True,
                   multivalued=False,
                   docvalues=False,
                   dynamic=False,
                   add=True):
    try:
        if add:
            execute(
                "ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                " ADD fields.{2}[@name='{3}', @type='{4}',"
                " @indexed='{5}', @stored='{6}',"
                " @multiValued='{7}', @docValues='{8}'];".format(
                    ks_name, table_name,
                    "field" if not dynamic else "dynamicField",
                    field_name if not dynamic else "*_{}".format(field_name),
                    field_type, "true" if indexed else "false",
                    "true" if stored else "false",
                    "true" if multivalued else "false",
                    "true" if docvalues else "false"))
        else:
            execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                    " DROP {2}".format(ks_name, table_name, field_name))
    except Exception as ex:
        if add:
            _logger.warning("Maybe te field has been already"
                            " defined in the schema. Cause: {}".format(ex))
        else:
            _logger.warning("Maybe te field is not defined"
                            " in the schema. Cause: {}".format(ex))
        pass
예제 #3
0
 def test_extra_field(self):
     drop_table(self.TestModel)
     sync_table(self.TestModel)
     self.TestModel.create()
     execute("ALTER TABLE {0} add blah int".format(
         self.TestModel.column_family_name(include_keyspace=True)))
     self.TestModel.objects().all()
예제 #4
0
    def __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=None):
        cluster = get_cluster(connection)

        if name not in cluster.metadata.keyspaces:
            log.info(format_log_context("Creating keyspace %s", connection=connection), name)
            ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options)
            execute(ks_meta.as_cql_query(), connection=connection)
        else:
            log.info(format_log_context("Not creating keyspace %s because it already exists", connection=connection), name)
def _update_options(model, connection=None):
    """Updates the table options for the given model if necessary.

    :param model: The model to update.
    :param connection: Name of the connection to use

    :return: `True`, if the options were modified in Cassandra,
        `False` otherwise.
    :rtype: bool
    """
    ks_name = model._get_keyspace()
    msg = format_log_context("Checking %s for option differences",
                             keyspace=ks_name,
                             connection=connection)
    log.debug(msg, model)
    model_options = model.__options__ or {}

    table_meta = _get_table_metadata(model, connection=connection)
    # go to CQL string first to normalize meta from different versions
    existing_option_strings = set(
        table_meta._make_option_strings(table_meta.options))
    existing_options = _options_map_from_strings(existing_option_strings)
    model_option_strings = metadata.TableMetadataV3._make_option_strings(
        model_options)
    model_options = _options_map_from_strings(model_option_strings)

    update_options = {}
    for name, value in model_options.items():
        try:
            existing_value = existing_options[name]
        except KeyError:
            msg = format_log_context(
                "Invalid table option: '%s'; known options: %s",
                keyspace=ks_name,
                connection=connection)
            raise KeyError(msg % (name, existing_options.keys()))
        if isinstance(existing_value, six.string_types):
            if value != existing_value:
                update_options[name] = value
        else:
            try:
                for k, v in value.items():
                    if existing_value[k] != v:
                        update_options[name] = value
                        break
            except KeyError:
                update_options[name] = value

    if update_options:
        options = ' AND '.join(
            metadata.TableMetadataV3._make_option_strings(update_options))
        query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(),
                                                  options)
        execute(query, connection=connection)
        return True

    return False
def _process_copy_field(ks_name, table_name, src, dest, add=True):
    try:
        execute(
            "ALTER SEARCH INDEX SCHEMA ON {0}.{1} {2}"
            " copyField[@source='{3}', @dest='{4}'];".format(ks_name, table_name, "ADD" if add else "DROP", src, dest)
        )
    except Exception as ex:
        if add:
            _logger.warning("Maybe the copy field type has been already" " defined in the schema. Cause: {}".format(ex))
        else:
            _logger.warning("Maybe the copy field type is not avaiable " " in the schema. Cause: {}".format(ex))
        pass
예제 #7
0
def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None):

    syncd_sub_types = omit_subtypes or set()
    for field in type_model._fields.values():
        udts = []
        columns.resolve_udts(field, udts)
        for udt in [u for u in udts if u not in syncd_sub_types]:
            _sync_type(ks_name, udt, syncd_sub_types, connection=connection)
            syncd_sub_types.add(udt)

    type_name = type_model.type_name()
    type_name_qualified = "%s.%s" % (ks_name, type_name)

    cluster = get_cluster(connection)

    keyspace = cluster.metadata.keyspaces[ks_name]
    defined_types = keyspace.user_types

    if type_name not in defined_types:
        log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified)
        cql = get_create_type(type_model, ks_name)
        execute(cql, connection=connection)
        cluster.refresh_user_type_metadata(ks_name, type_name)
        type_model.register_for_keyspace(ks_name, connection=connection)
    else:
        type_meta = defined_types[type_name]
        defined_fields = type_meta.field_names
        model_fields = set()
        for field in type_model._fields.values():
            model_fields.add(field.db_field_name)
            if field.db_field_name not in defined_fields:
                execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection)
            else:
                field_type = type_meta.field_types[defined_fields.index(field.db_field_name)]
                if field_type != field.db_type:
                    msg = format_log_context('Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).'
                                  ' UserType should be updated.', keyspace=ks_name, connection=connection)
                    msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type)
                    warnings.warn(msg)
                    log.warning(msg)

        type_model.register_for_keyspace(ks_name, connection=connection)

        if len(defined_fields) == len(model_fields):
            log.info(format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified)
            return

        db_fields_not_in_model = model_fields.symmetric_difference(defined_fields)
        if db_fields_not_in_model:
            msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection)
            log.info(msg, type_name_qualified, db_fields_not_in_model)
예제 #8
0
def _drop_table(model, connection=None):
    if not _allow_schema_modification():
        return

    connection = connection or model._get_connection()

    # don't try to delete non existant tables
    meta = get_cluster(connection).metadata

    ks_name = model._get_keyspace()
    raw_cf_name = model._raw_column_family_name()

    try:
        meta.keyspaces[ks_name].tables[raw_cf_name]
        execute('DROP TABLE {0};'.format(model.column_family_name()), connection=connection)
    except KeyError:
        pass
    def _verify_statement(self, original):
        st = SelectStatement(self.table_name)
        result = execute(st)
        response = result[0]

        for assignment in original.assignments:
            self.assertEqual(response[assignment.field], assignment.value)
        self.assertEqual(len(response), 7)
def _define_location(ks_name, raw_cf_name, field_name):
    # Define a Point and LineString types for geospatial queries
    try:

        """
        <fieldType name="location"
            class="solr.LatLonType"
            subFieldSuffix="_coordinate"/>
        """
        execute(
            "ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
            " ADD types.fieldType[@name='location',"
            " @class='solr.LatLonType',"
            " @subFieldSuffix='_coordinate'];".format(ks_name, raw_cf_name, field_name)
        )
        _process_field(ks_name, raw_cf_name, "coordinate", "tdouble", stored=False, dynamic=True)
    except Exception as ex:
        _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex))
        pass
    def test_insert_statement_execute(self):
        """
        Test to verify the execution of BaseCQLStatements using connection.execute

        @since 3.10
        @jira_ticket PYTHON-505
        @expected_result inserts a row in C*, updates the rows and then deletes
        all the rows using BaseCQLStatements

        @test_category data_types:object_mapper
        """
        partition = uuid4()
        cluster = 1

        #Verifying insert statement
        st = InsertStatement(self.table_name)
        st.add_assignment(Column(db_field='partition'), partition)
        st.add_assignment(Column(db_field='cluster'), cluster)

        st.add_assignment(Column(db_field='count'), 1)
        st.add_assignment(Column(db_field='text'), "text_for_db")
        st.add_assignment(Column(db_field='text_set'), set(("foo", "bar")))
        st.add_assignment(Column(db_field='text_list'), ["foo", "bar"])
        st.add_assignment(Column(db_field='text_map'), {
            "foo": '1',
            "bar": '2'
        })

        execute(st)
        self._verify_statement(st)

        # Verifying update statement
        where = [
            WhereClause('partition', EqualsOperator(), partition),
            WhereClause('cluster', EqualsOperator(), cluster)
        ]

        st = UpdateStatement(self.table_name, where=where)
        st.add_assignment(Column(db_field='count'), 2)
        st.add_assignment(Column(db_field='text'), "text_for_db_update")
        st.add_assignment(Column(db_field='text_set'),
                          set(("foo_update", "bar_update")))
        st.add_assignment(Column(db_field='text_list'),
                          ["foo_update", "bar_update"])
        st.add_assignment(Column(db_field='text_map'), {
            "foo": '3',
            "bar": '4'
        })

        execute(st)
        self._verify_statement(st)

        # Verifying delete statement
        execute(DeleteStatement(self.table_name, where=where))
        self.assertEqual(TestQueryUpdateModel.objects.count(), 0)
def _create_index(model, index, connection=None):
    if not management._allow_schema_modification():
        return

    connection = connection or model._get_connection()

    # don't try to create indexes in non existant tables
    meta = management.get_cluster(connection).metadata

    ks_name = model._get_keyspace()
    raw_cf_name = model._raw_column_family_name()

    try:
        _logger.info(
            "Creating SEARCH INDEX if not exists for model: {}".format(model))
        meta.keyspaces[ks_name].tables[raw_cf_name]
        execute("CREATE SEARCH INDEX IF NOT EXISTS ON {0}.{1};".format(
            ks_name, raw_cf_name),
                timeout=30.0)

        if hasattr(index.Meta, "index_settings"):
            for param, value in index.Meta.index_settings.items():
                _logger.info("Setting index parameters: {0} = {1}".format(
                    param, value))
                execute("ALTER SEARCH INDEX CONFIG ON {0}.{1}"
                        " SET {2} = {3};".format(ks_name, raw_cf_name, param,
                                                 value))

        _define_types(ks_name, raw_cf_name)

        search_fields = [
            attr for attr in index.__class__.__dict__.values()
            if issubclass(attr.__class__, fields.SearchField)
        ]

        document_fields = []

        for search_field in search_fields:

            # If the field do not have a direct mapping with the model
            if not search_field.model_attr:
                continue

            _logger.info("Processing field field {0}({1})".format(
                search_field.__class__, search_field.model_attr))

            # force the creation of the field if it does not exists yet (the
            #   original table has been changed after the index was created
            try:
                _process_field(ks_name,
                               raw_cf_name,
                               search_field.model_attr,
                               field_type=_get_solr_type(
                                   model, index, search_field),
                               multivalued=search_field.is_multivalued,
                               stored=True,
                               indexed=True,
                               docvalues=False)
            except Exception as ex:
                _logger.warning("Maybe te field has been already"
                                " created in the schema. Cause: {}".format(ex))

            # https://docs.datastax.com/en/
            # datastax_enterprise/5.0/
            # datastax_enterprise/srch/queriesGeoSpatial.html
            # <fieldType name="location" class="solr.LatLonType"
            #  subFieldSuffix="_coordinate"/>
            if issubclass(search_field.__class__, fields.LocationField):
                execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                        " SET fields.field[@name='{2}']@type='LocationField';".
                        format(ks_name, raw_cf_name, search_field.model_attr))

                continue

            # Facet fields
            if search_field.faceted:
                # We need to create a <field>_exact field in Solr with
                #  docValues=true and that will receive the data from
                #  the original <field> (copyFrom)
                # This <field>_exact field is the one used by Hasystak to
                #  do the facet queries
                _process_field(ks_name,
                               raw_cf_name,
                               "{}_exact".format(search_field.model_attr),
                               field_type=_get_solr_type(
                                   model, index, search_field),
                               multivalued=search_field.is_multivalued,
                               stored=False,
                               docvalues=True)

                _process_copy_field(ks_name, raw_cf_name,
                                    search_field.model_attr,
                                    "{}_exact".format(search_field.model_attr))

            # Get a reference to the model column definition
            attribute = getattr(model, search_field.model_attr, None)

            # Indexed field?
            if not (attribute and isinstance(attribute.column, columns.Map)):
                if search_field.indexed:
                    execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                            " SET fields.field[@name='{2}']@indexed='true';".
                            format(ks_name, raw_cf_name,
                                   search_field.model_attr))
                else:
                    execute("ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                            " SET fields.field[@name='{2}']@indexed='false';".
                            format(ks_name, raw_cf_name,
                                   search_field.model_attr))

            # Facet field?: force docValues=true
            if not (attribute and isinstance(attribute.column, columns.Map)):
                if search_field.is_multivalued:
                    execute(
                        "ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                        " SET fields.field[@name='{2}']@multiValued='true';".
                        format(ks_name, raw_cf_name, search_field.model_attr))
                else:
                    execute(
                        "ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
                        " SET fields.field[@name='{2}']@multiValued='false';".
                        format(ks_name, raw_cf_name, search_field.model_attr))

            # All the document fields have to be TextFields to be
            # processed as tokens
            if not (attribute and isinstance(attribute.column, columns.Map)):
                if issubclass(search_field.__class__, fields.CharField):
                    if search_field.model_attr in index.Meta.text_fields:
                        _logger.info(
                            "Changing SEARCH INDEX field {0} to TextField".
                            format(search_field.model_attr))
                        execute(
                            "ALTER SEARCH INDEX SCHEMA ON {0}.{1} "
                            " SET fields.field[@name='{2}']@type='TextField';".
                            format(ks_name, raw_cf_name,
                                   search_field.model_attr))

                        document_fields.append(search_field)
                    # else:
                    #    execute(
                    #        "ALTER SEARCH INDEX SCHEMA ON {0}.{1} "
                    #        " SET fields.field[@name='{2}']@type='StrField';".
                    #        format(
                    #            ks_name,
                    #            raw_cf_name,
                    #            search_field.model_attr))

        # If there are document fields we need to copy all them into
        # the text field
        if len(document_fields):
            _process_field(ks_name,
                           raw_cf_name,
                           "text",
                           field_type="TextField",
                           stored=False,
                           multivalued=True)

            for document_field in document_fields:
                _process_copy_field(ks_name, raw_cf_name,
                                    document_field.model_attr, "text")

        # Reload the index for the changes to take effect
        execute("RELOAD SEARCH INDEX ON {0}.{1};".format(ks_name, raw_cf_name))

    except KeyError:
        _logger.exception("Unable to create the search index")
        pass
예제 #13
0
def _sync_table(model, connection=None):
    if not _allow_schema_modification():
        return

    if not issubclass(model, Model):
        raise CQLEngineException("Models must be derived from base Model.")

    if model.__abstract__:
        raise CQLEngineException("cannot create table from abstract model")

    cf_name = model.column_family_name()
    raw_cf_name = model._raw_column_family_name()

    ks_name = model._get_keyspace()
    connection = connection or model._get_connection()

    cluster = get_cluster(connection)

    try:
        keyspace = cluster.metadata.keyspaces[ks_name]
    except KeyError:
        msg = format_log_context("Keyspace '{0}' for model {1} does not exist.", connection=connection)
        raise CQLEngineException(msg.format(ks_name, model))

    tables = keyspace.tables

    syncd_types = set()
    for col in model._columns.values():
        udts = []
        columns.resolve_udts(col, udts)
        for udt in [u for u in udts if u not in syncd_types]:
            _sync_type(ks_name, udt, syncd_types, connection=connection)

    if raw_cf_name not in tables:
        log.debug(format_log_context("sync_table creating new table %s", keyspace=ks_name, connection=connection), cf_name)
        qs = _get_create_table(model)

        try:
            execute(qs, connection=connection)
        except CQLEngineException as ex:
            # 1.2 doesn't return cf names, so we have to examine the exception
            # and ignore if it says the column family already exists
            if "Cannot add already existing column family" not in unicode(ex):
                raise
    else:
        log.debug(format_log_context("sync_table checking existing table %s", keyspace=ks_name, connection=connection), cf_name)
        table_meta = tables[raw_cf_name]

        _validate_pk(model, table_meta)

        table_columns = table_meta.columns
        model_fields = set()

        for model_name, col in model._columns.items():
            db_name = col.db_field_name
            model_fields.add(db_name)
            if db_name in table_columns:
                col_meta = table_columns[db_name]
                if col_meta.cql_type != col.db_type:
                    msg = format_log_context('Existing table {0} has column "{1}" with a type ({2}) differing from the model type ({3}).'
                                  ' Model should be updated.', keyspace=ks_name, connection=connection)
                    msg = msg.format(cf_name, db_name, col_meta.cql_type, col.db_type)
                    warnings.warn(msg)
                    log.warning(msg)

                continue

            if col.primary_key or col.primary_key:
                msg = format_log_context("Cannot add primary key '{0}' (with db_field '{1}') to existing table {2}", keyspace=ks_name, connection=connection)
                raise CQLEngineException(msg.format(model_name, db_name, cf_name))

            query = "ALTER TABLE {0} add {1}".format(cf_name, col.get_column_def())
            execute(query, connection=connection)

        db_fields_not_in_model = model_fields.symmetric_difference(table_columns)
        if db_fields_not_in_model:
            msg = format_log_context("Table {0} has fields not referenced by model: {1}", keyspace=ks_name, connection=connection)
            log.info(msg.format(cf_name, db_fields_not_in_model))

        _update_options(model, connection=connection)

    table = cluster.metadata.keyspaces[ks_name].tables[raw_cf_name]

    indexes = [c for n, c in model._columns.items() if c.index]

    # TODO: support multiple indexes in C* 3.0+
    for column in indexes:
        index_name = _get_index_name_by_column(table, column.db_field_name)
        if index_name:
            continue

        qs = ['CREATE INDEX']
        qs += ['ON {0}'.format(cf_name)]
        qs += ['("{0}")'.format(column.db_field_name)]
        qs = ' '.join(qs)
        execute(qs, connection=connection)
예제 #14
0
 def _drop_keyspace(name, connection=None):
     cluster = get_cluster(connection)
     if name in cluster.metadata.keyspaces:
         execute("DROP KEYSPACE {0}".format(metadata.protect_name(name)), connection=connection)
def _define_types(ks_name, raw_cf_name):
    # Define a TextField type to analyze texts (tokenizer, ascii, etc.)
    try:
        execute(
            f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}"
            f" ADD types.fieldType[@name='TextField',"
            f" @class='org.apache.solr.schema.TextField']"
            f" WITH {TEXT_SEARCH_JSON_SNIPPED};"
        )
    except Exception as ex:
        _logger.warning("Maybe te field type TextField has been already" " defined in the schema. Cause: {}".format(ex))
        pass

    try:
        execute(
            f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}"
            f" ADD types.fieldType[@name='ISCStrField',"
            f" @class='org.apache.solr.schema.TextField']"
            f" WITH {STR_SEARCH_JSON_SNIPPED};"
        )
    except Exception as ex:
        _logger.warning(
            "Maybe te field type ISCStrField has been already" " defined in the schema. Cause: {}".format(ex)
        )
        pass

    types = ["TupleField", "SimpleDateField"]
    for type in types:
        try:
            execute(
                f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}"
                f" ADD types.fieldType[@name='{type}',"
                f" @class='com.datastax.bdp.search.solr.core.types.{type}'];"
            )
        except Exception as ex:
            _logger.warning(f"Maybe te field type {type} has been already" " defined in the schema. Cause: {ex}")
            pass

    types = ["TrieLongField", "TrieDoubleField", "TrieIntField", "BoolField", "UUIDField", "TrieDateField"]
    for type in types:
        # Define a TrieFloatField type to analyze texts (tokenizer, ascii, etc.)
        try:
            execute(
                f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{raw_cf_name}"
                f" ADD types.fieldType[@name='{type}',"
                f" @class='org.apache.solr.schema.{type}'];"
            )
        except Exception as ex:
            _logger.warning(f"Maybe te field type {type} has been already" " defined in the schema. Cause: {ex}")
            pass

    # Define a Point and LineString types for geospatial queries
    try:

        """
        <fieldType name="LocationField"
               class="solr.SpatialRecursivePrefixTreeFieldType"
               geo="false"
               worldBounds="ENVELOPE(-1000, 1000, 1000, -1000)"
               maxDistErr="0.001"
               units="degrees" />
               """
        execute(
            "ALTER SEARCH INDEX SCHEMA ON {0}.{1}"
            " ADD types.fieldType[@name='LocationField',"
            " @class='solr.SpatialRecursivePrefixTreeFieldType',"
            " @geo='false',"
            " @worldBounds='ENVELOPE(-1000, 1000, 1000, -1000)',"
            " @maxDistErr='0.001',"
            " @units='degrees'];".format(ks_name, raw_cf_name)
        )
    except Exception as ex:
        _logger.warning("Maybe te field type has been already" " defined in the schema. Cause: {}".format(ex))
        pass
def _drop_unnecessary_indexes(ks_name, table_name, fieldsname):
    for fieldname in fieldsname:
        try:
            execute(f"ALTER SEARCH INDEX SCHEMA ON {ks_name}.{table_name} DROP field {fieldname}")
        except Exception as ex:
            _logger.warning(f"Unable to remove unnecessary field {fieldname}. Cause: {ex}")