コード例 #1
0
ファイル: schema.py プロジェクト: venky6363/appscale
def create_batch_tables(cluster, session):
  """ Create the tables required for large batches.

  Args:
    cluster: A cassandra-driver cluster.
    session: A cassandra-driver session.
  """
  keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE]
  if 'batches' in keyspace_metadata.tables:
    columns = keyspace_metadata.tables['batches'].columns
    if ('transaction' in columns and
        columns['transaction'].cql_type != 'bigint'):
      session.execute('DROP TABLE batches', timeout=SCHEMA_CHANGE_TIMEOUT)

  logger.info('Trying to create batches')
  create_table = """
    CREATE TABLE IF NOT EXISTS batches (
      app text,
      transaction bigint,
      namespace text,
      path blob,
      old_value blob,
      new_value blob,
      exclude_indices text,
      PRIMARY KEY ((app, transaction), namespace, path)
    )
  """
  statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
  try:
    session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
  except cassandra.OperationTimedOut:
    logger.warning(
      'Encountered an operation timeout while creating batches table. '
      'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
    time.sleep(SCHEMA_CHANGE_TIMEOUT)
    raise

  if ('batch_status' in keyspace_metadata.tables and
      'txid_hash' not in keyspace_metadata.tables['batch_status'].columns):
    session.execute('DROP TABLE batch_status', timeout=SCHEMA_CHANGE_TIMEOUT)

  logger.info('Trying to create batch_status')
  create_table = """
    CREATE TABLE IF NOT EXISTS batch_status (
      txid_hash blob PRIMARY KEY,
      applied boolean,
      op_id uuid
    )
  """
  statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
  try:
    session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
  except cassandra.OperationTimedOut:
    logger.warning(
      'Encountered an operation timeout while creating batch_status table. '
      'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
    time.sleep(SCHEMA_CHANGE_TIMEOUT)
    raise
コード例 #2
0
def rebuild_task_indexes(session):
    """ Creates index entries for all pull queue tasks.

  Args:
    session: A cassandra-driver session.
  """
    logger.info('Rebuilding task indexes')
    batch_size = 100
    total_tasks = 0
    app = ''
    queue = ''
    id_ = ''
    while True:
        results = session.execute(
            """
      SELECT app, queue, id, lease_expires, tag FROM pull_queue_tasks
      WHERE token(app, queue, id) > token(%(app)s, %(queue)s, %(id)s)
      LIMIT {}
    """.format(batch_size), {
                'app': app,
                'queue': queue,
                'id': id_
            })
        results_list = list(results)
        for result in results_list:
            parameters = {
                'app': result.app,
                'queue': result.queue,
                'eta': result.lease_expires,
                'id': result.id,
                'tag': result.tag or ''
            }

            insert_eta_index = SimpleStatement("""
        INSERT INTO pull_queue_eta_index (app, queue, eta, id, tag)
        VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s)
      """,
                                               retry_policy=BASIC_RETRIES)
            session.execute(insert_eta_index, parameters)

            insert_tag_index = SimpleStatement("""
        INSERT INTO pull_queue_tags_index (app, queue, tag, eta, id)
        VALUES (%(app)s, %(queue)s, %(tag)s, %(eta)s, %(id)s)
      """,
                                               retry_policy=BASIC_RETRIES)
            session.execute(insert_tag_index, parameters)

        total_tasks += len(results_list)
        if len(results_list) < batch_size:
            break

        app = results_list[-1].app
        queue = results_list[-1].queue
        id_ = results_list[-1].id

    logger.info('Created entries for {} tasks'.format(total_tasks))
コード例 #3
0
ファイル: schema.py プロジェクト: nagyistge/appscale
def create_batch_tables(cluster, session):
    """ Create the tables required for large batches.

  Args:
    cluster: A cassandra-driver cluster.
    session: A cassandra-driver session.
  """
    logging.info('Trying to create batches')
    create_table = """
    CREATE TABLE IF NOT EXISTS batches (
      app text,
      transaction int,
      namespace text,
      path blob,
      old_value blob,
      new_value blob,
      exclude_indices text,
      PRIMARY KEY ((app, transaction), namespace, path)
    )
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement)
    except cassandra.OperationTimedOut:
        logging.warning(
            'Encountered an operation timeout while creating batches table. '
            'Waiting 1 minute for schema to settle.')
        time.sleep(60)
        raise

    logging.info('Trying to create batch_status')
    create_table = """
    CREATE TABLE IF NOT EXISTS batch_status (
      app text,
      transaction int,
      applied boolean,
      PRIMARY KEY ((app), transaction)
    )
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement)
    except cassandra.OperationTimedOut:
        logging.warning(
            'Encountered an operation timeout while creating batch_status table. '
            'Waiting 1 minute for schema to settle.')
        time.sleep(60)
        raise
コード例 #4
0
ファイル: py_cassandra.py プロジェクト: xiaodongmx/appscale
  def get_entity(self, table_name, row_key, column_names):
    error = [ERROR_DEFAULT]
    list = error
    row_key = bytearray('/'.join([table_name, row_key]))
    statement = """
      SELECT * FROM "{table}"
      WHERE {key} = %(key)s
      AND {column} IN %(columns)s
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
    query = SimpleStatement(statement, retry_policy=self.retry_policy)
    parameters = {'key': row_key,
                  'columns': ValueSequence(column_names)}
    try:
      results = self.session.execute(query, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      raise AppScaleDBConnectionError('Unable to fetch entity')

    results_dict = {}
    for (_, column, value) in results:
      results_dict[column] = value

    if not results_dict:
      list[0] += 'Not found'
      return list

    for column in column_names:
      list.append(results_dict[column])
    return list
コード例 #5
0
ファイル: schema.py プロジェクト: sjones4/appscale
def create_transactions_table(session):
    """ Create the table used for storing transaction metadata.

  Args:
    session: A cassandra-driver session.
  """
    create_table = """
    CREATE TABLE IF NOT EXISTS transactions (
      txid_hash blob,
      operation tinyint,
      namespace text,
      path blob,
      start_time timestamp,
      is_xg boolean,
      in_progress blob,
      entity blob,
      task blob,
      PRIMARY KEY (txid_hash, operation, namespace, path)
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except cassandra.OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating transactions table. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise
コード例 #6
0
    def get_entity(self, table_name, row_key, column_names):
        error = [ERROR_DEFAULT]
        list = error
        row_key = bytearray('/'.join([table_name, row_key]))
        statement = """
      SELECT * FROM "{table}"
      WHERE {key} = %(key)s
      AND {column} IN %(columns)s
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
        query = SimpleStatement(statement, retry_policy=self.retry_policy)
        parameters = {'key': row_key, 'columns': ValueSequence(column_names)}
        try:
            results = self.session.execute(query, parameters)
        except (cassandra.Unavailable, cassandra.Timeout,
                cassandra.CoordinationFailure, cassandra.OperationTimedOut):
            list[0] += 'Unable to fetch entity'
            return list

        results_dict = {}
        for (_, column, value) in results:
            results_dict[column] = value

        if not results_dict:
            list[0] += 'Not found'
            return list

        for column in column_names:
            list.append(results_dict[column])
        return list
コード例 #7
0
def create_groups_table(session):
    create_table = """
    CREATE TABLE IF NOT EXISTS group_updates (
      group blob PRIMARY KEY,
      last_update int
    )
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement)
    except cassandra.OperationTimedOut:
        logging.warning(
            'Encountered an operation timeout while creating group_updates table. '
            'Waiting 1 minute for schema to settle.')
        time.sleep(60)
        raise
コード例 #8
0
    def delete_row(self, table_name, row_key):
        response = [ERROR_DEFAULT]
        row_key = bytearray('/'.join([table_name, row_key]))

        statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format(
            table=table_name, key=ThriftColumn.KEY)
        delete = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        try:
            yield self.tornado_cassandra.execute(delete, (row_key, ))
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            response[0] += 'Unable to delete row'
            raise gen.Return(response)

        response.append('0')
        raise gen.Return(response)
コード例 #9
0
ファイル: py_cassandra.py プロジェクト: xiaodongmx/appscale
  def delete_row(self, table_name, row_key):
    response = [ERROR_DEFAULT]
    row_key = bytearray('/'.join([table_name, row_key]))

    statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format(
      table=table_name, key=ThriftColumn.KEY)
    delete = SimpleStatement(statement, retry_policy=self.retry_policy)

    try:
      self.session.execute(delete, (row_key,))
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      response[0] += 'Unable to delete row'
      return response

    response.append('0')
    return response
コード例 #10
0
ファイル: schema.py プロジェクト: venky6363/appscale
def create_groups_table(session):
  create_table = """
    CREATE TABLE IF NOT EXISTS group_updates (
      group blob PRIMARY KEY,
      last_update bigint
    )
  """
  statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
  try:
    session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
  except cassandra.OperationTimedOut:
    logger.warning(
      'Encountered an operation timeout while creating group_updates table. '
      'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
    time.sleep(SCHEMA_CHANGE_TIMEOUT)
    raise
コード例 #11
0
    def delete_row(self, table_name, row_key):
        response = [ERROR_DEFAULT]
        row_key = bytearray('/'.join([table_name, row_key]))

        statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format(
            table=table_name, key=ThriftColumn.KEY)
        delete = SimpleStatement(statement, retry_policy=self.retry_policy)

        try:
            self.session.execute(delete, (row_key, ))
        except (cassandra.Unavailable, cassandra.Timeout,
                cassandra.CoordinationFailure, cassandra.OperationTimedOut):
            response[0] += 'Unable to delete row'
            return response

        response.append('0')
        return response
コード例 #12
0
    def get_table(self, table_name, column_names):
        """ Fetch a list of values for the given columns in a table.

    Args:
      table_name: A string containing the name of the table.
      column_names: A list of column names to retrieve values for.
    Returns:
      A list containing a status marker followed by the values.
      Note: The response does not contain any row keys or column names.
    """
        response = [ERROR_DEFAULT]

        statement = 'SELECT * FROM "{table}"'.format(table=table_name)
        query = SimpleStatement(statement, retry_policy=self.retry_policy)

        try:
            results = self.session.execute(query)
        except (cassandra.Unavailable, cassandra.Timeout,
                cassandra.CoordinationFailure, cassandra.OperationTimedOut):
            response[0] += 'Unable to fetch table contents'
            return response

        results_list = []
        current_item = {}
        current_key = None
        for (key, column, value) in results:
            if key != current_key:
                if current_item:
                    results_list.append({current_key: current_item})
                current_item = {}
                current_key = key

            current_item[column] = value
        if current_item:
            results_list.append({current_key: current_item})

        for result in results_list:
            result_columns = result.values()[0]
            for column in column_names:
                try:
                    response.append(result_columns[column])
                except KeyError:
                    response[0] += 'Table contents did not match schema'
                    return response

        return response
コード例 #13
0
    def get_table(self, table_name, column_names):
        """ Fetch a list of values for the given columns in a table.

    Args:
      table_name: A string containing the name of the table.
      column_names: A list of column names to retrieve values for.
    Returns:
      A list containing a status marker followed by the values.
      Note: The response does not contain any row keys or column names.
    """
        response = [ERROR_DEFAULT]

        statement = 'SELECT * FROM "{table}"'.format(table=table_name)
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        try:
            results = yield self.tornado_cassandra.execute(query)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            response[0] += 'Unable to fetch table contents'
            raise gen.Return(response)

        results_list = []
        current_item = {}
        current_key = None
        for (key, column, value) in results:
            if key != current_key:
                if current_item:
                    results_list.append({current_key: current_item})
                current_item = {}
                current_key = key

            current_item[column] = value
        if current_item:
            results_list.append({current_key: current_item})

        for result in results_list:
            result_columns = result.values()[0]
            for column in column_names:
                try:
                    response.append(result_columns[column])
                except KeyError:
                    response[0] += 'Table contents did not match schema'
                    raise gen.Return(response)

        raise gen.Return(response)
コード例 #14
0
ファイル: schema.py プロジェクト: venky6363/appscale
def create_entity_ids_table(session):
  create_table = """
    CREATE TABLE IF NOT EXISTS reserved_ids (
      project text,
      scattered boolean,
      last_reserved bigint,
      op_id uuid,
      PRIMARY KEY ((project, scattered))
    )
  """
  statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
  try:
    session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
  except cassandra.OperationTimedOut:
    logger.warning(
      'Encountered an operation timeout while creating entity_ids table. '
      'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
    time.sleep(SCHEMA_CHANGE_TIMEOUT)
    raise
コード例 #15
0
ファイル: schema.py プロジェクト: sjones4/appscale
def prime_cassandra(replication):
    """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
    if not isinstance(replication, int):
        raise TypeError('Replication must be an integer')

    if int(replication) <= 0:
        raise dbconstants.AppScaleBadArg(
            'Replication must be greater than zero')

    zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips())
    zk_client.start()

    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
        try:
            cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
            session = cluster.connect()
            break
        except cassandra.cluster.NoHostAvailable as connection_error:
            remaining_retries -= 1
            if remaining_retries < 0:
                raise connection_error
            time.sleep(3)
    session.default_consistency_level = ConsistencyLevel.QUORUM

    create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
    keyspace_replication = {
        'class': 'SimpleStrategy',
        'replication_factor': replication
    }
    session.execute(create_keyspace, {'replication': keyspace_replication},
                    timeout=SCHEMA_CHANGE_TIMEOUT)
    session.set_keyspace(KEYSPACE)

    logger.info('Waiting for all hosts to be connected')
    deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
    while True:
        if time.time() > deadline:
            logger.warning(
                'Timeout when waiting for hosts to join. Continuing '
                'with connected hosts.')
            break

        if len(session.get_pool_state()) == len(hosts):
            break

        time.sleep(1)

    for table in dbconstants.INITIAL_TABLES:
        create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

        logger.info('Trying to create {}'.format(table))
        try:
            session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
        except cassandra.OperationTimedOut:
            logger.warning(
                'Encountered an operation timeout while creating {} table. Waiting {} '
                'seconds for schema to settle.'.format(table,
                                                       SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise

    migrate_composite_index_metadata(cluster, session, zk_client)
    create_batch_tables(cluster, session)
    create_groups_table(session)
    create_transactions_table(session)
    create_entity_ids_table(session)

    first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format(
        dbconstants.APP_ENTITY_TABLE))
    existing_entities = len(list(first_entity)) == 1

    define_ua_schema(session)

    metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(table=dbconstants.DATASTORE_METADATA_TABLE,
             key=ThriftColumn.KEY,
             column=ThriftColumn.COLUMN_NAME,
             value=ThriftColumn.VALUE)

    if existing_entities:
        current_version = current_datastore_version(session)
        if current_version == 1.0:
            # Instruct the groomer to reclean the indexes.
            parameters = {
                'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
                'column': cassandra_interface.INDEX_STATE_KEY,
                'value': bytearray(str(IndexStates.DIRTY))
            }
            session.execute(metadata_insert, parameters)

            parameters = {
                'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                'column': cassandra_interface.VERSION_INFO_KEY,
                'value': bytearray(str(CURRENT_VERSION))
            }
            session.execute(metadata_insert, parameters)
    else:
        parameters = {
            'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
            'column': cassandra_interface.VERSION_INFO_KEY,
            'value': bytearray(str(CURRENT_VERSION))
        }
        session.execute(metadata_insert, parameters)

        # Mark the newly created indexes as clean.
        parameters = {
            'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
            'column': cassandra_interface.INDEX_STATE_KEY,
            'value': bytearray(str(IndexStates.CLEAN))
        }
        session.execute(metadata_insert, parameters)

        # Indicate that scatter property values do not need to be populated.
        parameters = {
            'key': bytearray(cassandra_interface.SCATTER_PROP_KEY),
            'column': cassandra_interface.SCATTER_PROP_KEY,
            'value': bytearray(ScatterPropStates.POPULATED)
        }
        session.execute(metadata_insert, parameters)

    # Indicate that the database has been successfully primed.
    parameters = {
        'key': bytearray(cassandra_interface.PRIMED_KEY),
        'column': cassandra_interface.PRIMED_KEY,
        'value': bytearray(str(CURRENT_VERSION))
    }
    session.execute(metadata_insert, parameters)
    logger.info('Cassandra is primed.')
コード例 #16
0
def test1():
    query = "SELECT * FROM test_tweet_select"
    fetch_size = 10000
    statement = SimpleStatement(query, fetch_size=fetch_size)
    for row in session.execute(statement, execution_profile='node1'):
        pass
コード例 #17
0
def create_pull_queue_tables(cluster, session):
    """ Create the required tables for pull queues.

  Args:
    cluster: A cassandra-driver cluster.
    session: A cassandra-driver session.
  """
    logger.info('Trying to create pull_queue_tasks')
    create_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_tasks (
      app text,
      queue text,
      id text,
      payload text,
      enqueued timestamp,
      lease_expires timestamp,
      retry_count int,
      tag text,
      op_id uuid,
      PRIMARY KEY ((app, queue, id))
    )
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_tasks. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE]
    if 'op_id' not in keyspace_metadata.tables['pull_queue_tasks'].columns:
        try:
            session.execute('ALTER TABLE pull_queue_tasks ADD op_id uuid',
                            timeout=SCHEMA_CHANGE_TIMEOUT)
        except OperationTimedOut:
            logger.warning(
                'Encountered a timeout when altering pull_queue_tasks. Waiting {} '
                'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise

    logger.info('Trying to create pull_queue_tasks_index')
    create_index_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_tasks_index (
      app text,
      queue text,
      eta timestamp,
      id text,
      tag text,
      tag_exists boolean,
      PRIMARY KEY ((app, queue, eta), id)
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_index_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_tasks_index.'
            ' Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    logger.info('Trying to create pull_queue_tags index')
    create_index = """
    CREATE INDEX IF NOT EXISTS pull_queue_tags ON pull_queue_tasks_index (tag);
  """
    try:
        session.execute(create_index, timeout=SCHEMA_CHANGE_TIMEOUT)
    except (OperationTimedOut, InvalidRequest):
        logger.warning(
            'Encountered error while creating pull_queue_tags index. Waiting {} '
            'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    # This additional index is needed for groupByTag=true,tag=None queries
    # because Cassandra can only do '=' queries on secondary indices.
    logger.info('Trying to create pull_queue_tag_exists index')
    create_index = """
    CREATE INDEX IF NOT EXISTS pull_queue_tag_exists
    ON pull_queue_tasks_index (tag_exists);
  """
    try:
        session.execute(create_index, timeout=SCHEMA_CHANGE_TIMEOUT)
    except (OperationTimedOut, InvalidRequest):
        logger.warning(
            'Encountered error while creating pull_queue_tag_exists index. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    logger.info('Trying to create pull_queue_leases')
    create_leases_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_leases (
      app text,
      queue text,
      leased timestamp,
      PRIMARY KEY ((app, queue, leased))
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_leases_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_leases. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise
コード例 #18
0
def prime_cassandra(replication):
    """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
    if not isinstance(replication, int):
        raise TypeError('Replication must be an integer')

    if int(replication) <= 0:
        raise dbconstants.AppScaleBadArg(
            'Replication must be greater than zero')

    hosts = appscale_info.get_db_ips()

    cluster = None
    session = None
    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
        try:
            cluster = Cluster(hosts)
            session = cluster.connect()
            break
        except cassandra.cluster.NoHostAvailable as connection_error:
            remaining_retries -= 1
            if remaining_retries < 0:
                raise connection_error
            time.sleep(3)
    session.default_consistency_level = ConsistencyLevel.QUORUM

    create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
    keyspace_replication = {
        'class': 'SimpleStrategy',
        'replication_factor': replication
    }
    session.execute(create_keyspace, {'replication': keyspace_replication})
    session.set_keyspace(KEYSPACE)

    for table in dbconstants.INITIAL_TABLES:
        create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

        logging.info('Trying to create {}'.format(table))
        try:
            session.execute(statement)
        except cassandra.OperationTimedOut:
            logging.warning(
                'Encountered an operation timeout while creating {} table. '
                'Waiting 1 minute for schema to settle.'.format(table))
            time.sleep(60)
            raise

    create_batch_tables(cluster, session)
    create_groups_table(session)
    create_transactions_table(session)
    create_pull_queue_tables(cluster, session)

    first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format(
        dbconstants.APP_ENTITY_TABLE))
    existing_entities = len(list(first_entity)) == 1

    define_ua_schema(session)

    metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(table=dbconstants.DATASTORE_METADATA_TABLE,
             key=ThriftColumn.KEY,
             column=ThriftColumn.COLUMN_NAME,
             value=ThriftColumn.VALUE)

    if not existing_entities:
        parameters = {
            'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
            'column': cassandra_interface.VERSION_INFO_KEY,
            'value': bytearray(str(POST_JOURNAL_VERSION))
        }
        session.execute(metadata_insert, parameters)

        # Mark the newly created indexes as clean.
        parameters = {
            'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
            'column': cassandra_interface.INDEX_STATE_KEY,
            'value': bytearray(str(IndexStates.CLEAN))
        }
        session.execute(metadata_insert, parameters)

    # Indicate that the database has been successfully primed.
    parameters = {
        'key': bytearray(cassandra_interface.PRIMED_KEY),
        'column': cassandra_interface.PRIMED_KEY,
        'value': bytearray('true')
    }
    session.execute(metadata_insert, parameters)
    logging.info('Cassandra is primed.')
コード例 #19
0
def create_pull_queue_tables(cluster, session):
    """ Create the required tables for pull queues.

  Args:
    cluster: A cassandra-driver cluster.
    session: A cassandra-driver session.
  """
    logger.info('Trying to create pull_queue_tasks')
    create_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_tasks (
      app text,
      queue text,
      id text,
      payload text,
      enqueued timestamp,
      lease_expires timestamp,
      retry_count int,
      tag text,
      op_id uuid,
      PRIMARY KEY ((app, queue, id))
    )
  """
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_tasks. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE]
    if 'op_id' not in keyspace_metadata.tables['pull_queue_tasks'].columns:
        try:
            session.execute('ALTER TABLE pull_queue_tasks ADD op_id uuid',
                            timeout=SCHEMA_CHANGE_TIMEOUT)
        except OperationTimedOut:
            logger.warning(
                'Encountered a timeout when altering pull_queue_tasks. Waiting {} '
                'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise

    rebuild_indexes = False
    if ('pull_queue_tasks_index' in keyspace_metadata.tables and 'tag_exists'
            in keyspace_metadata.tables['pull_queue_tasks_index'].columns):
        rebuild_indexes = True
        logger.info('Dropping outdated pull_queue_tags index')
        session.execute('DROP INDEX IF EXISTS pull_queue_tags',
                        timeout=SCHEMA_CHANGE_TIMEOUT)

        logger.info('Dropping outdated pull_queue_tag_exists index')
        session.execute('DROP INDEX IF EXISTS pull_queue_tag_exists',
                        timeout=SCHEMA_CHANGE_TIMEOUT)

        logger.info('Dropping outdated pull_queue_tasks_index table')
        session.execute('DROP TABLE pull_queue_tasks_index',
                        timeout=SCHEMA_CHANGE_TIMEOUT)

    logger.info('Trying to create pull_queue_eta_index')
    create_index_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_eta_index (
      app text,
      queue text,
      eta timestamp,
      id text,
      tag text,
      PRIMARY KEY ((app, queue, eta, id))
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_index_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_eta_index.'
            ' Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    logger.info('Trying to create pull_queue_tags_index')
    create_tags_index_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_tags_index (
      app text,
      queue text,
      tag text,
      eta timestamp,
      id text,
      PRIMARY KEY ((app, queue, tag, eta, id))
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_tags_index_table,
                                retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_tags_index.'
            ' Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise

    if rebuild_indexes:
        rebuild_task_indexes(session)

    logger.info('Trying to create pull_queue_leases')
    create_leases_table = """
    CREATE TABLE IF NOT EXISTS pull_queue_leases (
      app text,
      queue text,
      leased timestamp,
      PRIMARY KEY ((app, queue, leased))
    ) WITH gc_grace_seconds = 120
  """
    statement = SimpleStatement(create_leases_table, retry_policy=NO_RETRIES)
    try:
        session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except OperationTimedOut:
        logger.warning(
            'Encountered an operation timeout while creating pull_queue_leases. '
            'Waiting {} seconds for schema to settle.'.format(
                SCHEMA_CHANGE_TIMEOUT))
        time.sleep(SCHEMA_CHANGE_TIMEOUT)
        raise