Esempio n. 1
0
    def __init__(self, log_level=logging.INFO, hosts=None):
        """
    Constructor.
    """
        class_name = self.__class__.__name__
        self.logger = logging.getLogger(class_name)
        self.logger.setLevel(log_level)
        self.logger.info('Starting {}'.format(class_name))

        if hosts is not None:
            self.hosts = hosts
        else:
            self.hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                self.cluster = Cluster(self.hosts,
                                       default_retry_policy=BASIC_RETRIES)
                self.session = self.cluster.connect(KEYSPACE)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
        self.prepared_statements = {}
Esempio n. 2
0
  def __init__(self, log_level=logging.INFO, hosts=None):
    """
    Constructor.
    """
    class_name = self.__class__.__name__
    self.logger = logging.getLogger(class_name)
    self.logger.setLevel(log_level)
    self.logger.info('Starting {}'.format(class_name))

    if hosts is not None:
      self.hosts = hosts
    else:
      self.hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES)
        self.session = self.cluster.connect(KEYSPACE)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
    self.prepared_statements = {}
Esempio n. 3
0
def backup_data(path, keyname):
    """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logger.info("Starting new db backup.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    for db_ip in db_ips:
        appscale_utils.ssh(db_ip, keyname,
                           '{} clearsnapshot'.format(NODE_TOOL))
        appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

        get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
          format(APPSCALE_DATA_DIR)
        du_output = appscale_utils.ssh(db_ip,
                                       keyname,
                                       get_snapshot_size,
                                       method=subprocess.check_output)
        backup_size = sum(
            int(line.split()[0]) for line in du_output.split('\n') if line)

        output_dir = '/'.join(path.split('/')[:-1]) + '/'
        df_output = appscale_utils.ssh(db_ip,
                                       keyname,
                                       'df {}'.format(output_dir),
                                       method=subprocess.check_output)
        available = int(df_output.split('\n')[1].split()[3])

        if backup_size > available * PADDING_PERCENTAGE:
            raise BRException('{} has insufficient space: {}/{}'.format(
                db_ip, available * PADDING_PERCENTAGE, backup_size))

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
          '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
        appscale_utils.ssh(db_ip, keyname,
                           'cd {} && {}'.format(cassandra_dir, create_tar))

    logger.info("Done with db backup.")
Esempio n. 4
0
    def __init__(self):
        hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                cluster = Cluster(hosts)
                self.session = cluster.connect(keyspace=KEYSPACE)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
Esempio n. 5
0
def backup_data(path, keyname):
  """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
  logging.info("Starting new db backup.")

  db_ips = appscale_info.get_db_ips()
  if not db_ips:
    raise BRException('Unable to find any Cassandra machines.')

  for db_ip in db_ips:
    appscale_utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL))
    appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

    get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
      format(APPSCALE_DATA_DIR)
    du_output = appscale_utils.ssh(db_ip, keyname, get_snapshot_size,
                                   method=subprocess.check_output)
    backup_size = sum(int(line.split()[0])
                      for line in du_output.split('\n') if line)

    output_dir = '/'.join(path.split('/')[:-1]) + '/'
    df_output = appscale_utils.ssh(db_ip, keyname, 'df {}'.format(output_dir),
                                   method=subprocess.check_output)
    available = int(df_output.split('\n')[1].split()[3])

    if backup_size > available * PADDING_PERCENTAGE:
      raise BRException('{} has insufficient space: {}/{}'.
        format(db_ip, available * PADDING_PERCENTAGE, backup_size))

  cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
  for db_ip in db_ips:
    create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
      '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
    appscale_utils.ssh(db_ip, keyname,
                       'cd {} && {}'.format(cassandra_dir, create_tar))

  logging.info("Done with db backup.")
Esempio n. 6
0
def main():
    """ Main. """
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Output debug-level logging')
    args = parser.parse_args()

    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    options.define('secret', appscale_info.get_secret())

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)

    my_ip = appscale_info.get_private_ip()
    is_master = (my_ip == appscale_info.get_headnode_ip())
    is_lb = (my_ip in appscale_info.get_load_balancer_ips())
    is_tq = (my_ip in appscale_info.get_taskqueue_nodes())
    is_db = (my_ip in appscale_info.get_db_ips())

    if is_master:
        global zk_client
        zk_client = KazooClient(hosts=','.join(
            appscale_info.get_zk_node_ips()),
                                connection_retry=ZK_PERSISTENT_RECONNECTS)
        zk_client.start()
        # Start watching profiling configs in ZooKeeper
        stats_app.ProfilingManager(zk_client)

    app = tornado.web.Application(
        stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) +
        stats_app.get_cluster_stats_api_routes(is_master),
        debug=False)
    app.listen(constants.HERMES_PORT)

    # Start loop for accepting http requests.
    IOLoop.instance().start()

    logger.info("Hermes is up and listening on port: {}.".format(
        constants.HERMES_PORT))
Esempio n. 7
0
def main():
  """ Main. """
  parser = argparse.ArgumentParser()
  parser.add_argument(
    '-v', '--verbose', action='store_true',
    help='Output debug-level logging')
  args = parser.parse_args()

  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
  if args.verbose:
    logging.getLogger().setLevel(logging.DEBUG)

  options.define('secret', appscale_info.get_secret())

  signal.signal(signal.SIGTERM, signal_handler)
  signal.signal(signal.SIGINT, signal_handler)

  my_ip = appscale_info.get_private_ip()
  is_master = (my_ip == appscale_info.get_headnode_ip())
  is_lb = (my_ip in appscale_info.get_load_balancer_ips())
  is_tq = (my_ip in appscale_info.get_taskqueue_nodes())
  is_db = (my_ip in appscale_info.get_db_ips())

  if is_master:
    global zk_client
    zk_client = KazooClient(
      hosts=','.join(appscale_info.get_zk_node_ips()),
      connection_retry=ZK_PERSISTENT_RECONNECTS)
    zk_client.start()
    # Start watching profiling configs in ZooKeeper
    stats_app.ProfilingManager(zk_client)

  app = tornado.web.Application(
    stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db)
    + stats_app.get_cluster_stats_api_routes(is_master),
    debug=False
  )
  app.listen(constants.HERMES_PORT)

  # Start loop for accepting http requests.
  IOLoop.instance().start()

  logger.info("Hermes is up and listening on port: {}."
               .format(constants.HERMES_PORT))
Esempio n. 8
0
def main():
  """ Performs schema upgrades. """
  hosts = appscale_info.get_db_ips()
  cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
  session = cluster.connect(KEYSPACE)

  table = 'group_updates'
  column = 'last_update'
  temp_column = 'last_update_temp'
  key_column = 'group'
  tables = cluster.metadata.keyspaces[KEYSPACE].tables

  assert table in tables, 'The table {} was not found'.format(table)

  columns = tables[table].columns
  assert column in columns or temp_column in columns,\
    '{}.{} was not found'.format(table, column)

  if (column in columns and columns[column].cql_type == 'bigint' and
      temp_column not in columns):
    logging.info('{}.{} is already the correct type'.format(table, column))
    return

  if column in columns and columns[column].cql_type != 'bigint':
    if temp_column not in columns:
      logging.info('Adding new column with correct type')
      statement = 'ALTER TABLE {} ADD {} int'.format(table, temp_column)
      session.execute(statement)

    copy_column(session, table, 'group', column, temp_column)

    logging.info('Dropping {}.{}'.format(table, column))
    session.execute('ALTER TABLE {} DROP {}'.format(table, column))

    logging.info('Creating {}.{}'.format(table, column))
    session.execute('ALTER TABLE {} ADD {} bigint'.format(table, column))

  copy_column(session, table, key_column, temp_column, column)

  logging.info('Dropping {}.{}'.format(table, temp_column))
  session.execute('ALTER TABLE {} DROP {}'.format(table, temp_column))

  logging.info('Schema upgrade complete')
Esempio n. 9
0
def main():
  """ Performs schema upgrades. """
  hosts = appscale_info.get_db_ips()
  cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
  session = cluster.connect(KEYSPACE)

  table = 'group_updates'
  column = 'last_update'
  temp_column = 'last_update_temp'
  key_column = 'group'
  tables = cluster.metadata.keyspaces[KEYSPACE].tables

  assert table in tables, 'The table {} was not found'.format(table)

  columns = tables[table].columns
  assert column in columns or temp_column in columns,\
    '{}.{} was not found'.format(table, column)

  if (column in columns and columns[column].cql_type == 'bigint' and
      temp_column not in columns):
    logger.info('{}.{} is already the correct type'.format(table, column))
    return

  if column in columns and columns[column].cql_type != 'bigint':
    if temp_column not in columns:
      logger.info('Adding new column with correct type')
      statement = 'ALTER TABLE {} ADD {} int'.format(table, temp_column)
      session.execute(statement)

    copy_column(session, table, 'group', column, temp_column)

    logger.info('Dropping {}.{}'.format(table, column))
    session.execute('ALTER TABLE {} DROP {}'.format(table, column))

    logger.info('Creating {}.{}'.format(table, column))
    session.execute('ALTER TABLE {} ADD {} bigint'.format(table, column))

  copy_column(session, table, key_column, temp_column, column)

  logger.info('Dropping {}.{}'.format(table, temp_column))
  session.execute('ALTER TABLE {} DROP {}'.format(table, temp_column))

  logger.info('Schema upgrade complete')
Esempio n. 10
0
    def __init__(self, log_level=logging.INFO, hosts=None):
        """
    Constructor.
    """
        class_name = self.__class__.__name__
        self.logger = logging.getLogger(class_name)
        self.logger.setLevel(log_level)
        self.logger.info('Starting {}'.format(class_name))

        if hosts is not None:
            self.hosts = hosts
        else:
            self.hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                self.cluster = Cluster(self.hosts,
                                       default_retry_policy=BASIC_RETRIES,
                                       load_balancing_policy=LB_POLICY)
                self.session = self.cluster.connect(KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
        self.prepared_statements = {}

        # Provide synchronous version of some async methods
        self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
        self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
        self.batch_delete_sync = tornado_synchronous(self.batch_delete)
        self.valid_data_version_sync = tornado_synchronous(
            self.valid_data_version)
        self.range_query_sync = tornado_synchronous(self.range_query)
        self.get_metadata_sync = tornado_synchronous(self.get_metadata)
        self.set_metadata_sync = tornado_synchronous(self.set_metadata)
        self.get_indices_sync = tornado_synchronous(self.get_indices)
        self.delete_table_sync = tornado_synchronous(self.delete_table)
Esempio n. 11
0
  def __init__(self):
    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
        self.session = cluster.connect(keyspace=KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM

    # Provide synchronous version of get_schema method
    self.get_schema_sync = tornado_synchronous(self.get_schema)
Esempio n. 12
0
    def __init__(self):
        hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
                self.session = cluster.connect(keyspace=KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM

        # Provide synchronous version of get_schema method
        self.get_schema_sync = tornado_synchronous(self.get_schema)
Esempio n. 13
0
  def __init__(self, log_level=logging.INFO, hosts=None):
    """
    Constructor.
    """
    class_name = self.__class__.__name__
    self.logger = logging.getLogger(class_name)
    self.logger.setLevel(log_level)
    self.logger.info('Starting {}'.format(class_name))

    if hosts is not None:
      self.hosts = hosts
    else:
      self.hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES,
                               load_balancing_policy=LB_POLICY)
        self.session = self.cluster.connect(KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
    self.prepared_statements = {}

    # Provide synchronous version of some async methods
    self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
    self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
    self.batch_delete_sync = tornado_synchronous(self.batch_delete)
    self.valid_data_version_sync = tornado_synchronous(self.valid_data_version)
    self.range_query_sync = tornado_synchronous(self.range_query)
    self.get_metadata_sync = tornado_synchronous(self.get_metadata)
    self.set_metadata_sync = tornado_synchronous(self.set_metadata)
    self.get_indices_sync = tornado_synchronous(self.get_indices)
    self.delete_table_sync = tornado_synchronous(self.delete_table)
Esempio n. 14
0
def main():
    """ Main. """
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Output debug-level logging')
    parser.add_argument('--port',
                        type=int,
                        default=constants.HERMES_PORT,
                        help='The port to listen on')
    args = parser.parse_args()

    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
    if args.verbose:
        logging.getLogger('appscale').setLevel(logging.DEBUG)

    my_ip = appscale_info.get_private_ip()
    is_master = (my_ip == appscale_info.get_headnode_ip())
    is_lb = (my_ip in appscale_info.get_load_balancer_ips())
    is_tq = (my_ip in appscale_info.get_taskqueue_nodes())
    is_db = (my_ip in appscale_info.get_db_ips())

    app = web.Application(middlewares=[verify_secret_middleware])

    route_items = []
    route_items += get_local_stats_api_routes(is_lb, is_tq, is_db)
    route_items += get_cluster_stats_api_routes(is_master)
    for route, handler in route_items:
        app.router.add_get(route, handler)

    logger.info("Starting Hermes on port: {}.".format(args.port))
    web.run_app(app,
                port=args.port,
                access_log=logger,
                access_log_format='%a "%r" %s %bB %Tfs "%{User-Agent}i"')
Esempio n. 15
0
def get_kind_averages(keys):
  """ Get an average size for each kind.

  Args:
    keys: A list of dictionaries containing keys.
  Returns:
    A dictionary listing the average size of each kind.
  """
  hosts = appscale_info.get_db_ips()
  cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES,
                    load_balancing_policy=LB_POLICY)
  session = cluster.connect(KEYSPACE)

  entities_by_kind = {}
  for key_dict in keys:
    key = key_dict['key']
    if is_entity(key):
      key_parts = key.split(KEY_DELIMITER)
      kind = key_parts[2].split(':')[0]
      kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind])
      if kind_id not in entities_by_kind:
        entities_by_kind[kind_id] = {'keys': [], 'size': 0, 'fetched': 0}
      entities_by_kind[kind_id]['keys'].append(key)

  for kind_id, kind in entities_by_kind.iteritems():
    shuffle(kind['keys'])

  if not entities_by_kind:
    return {}

  futures = []
  for _ in range(50):
    kind = choice(entities_by_kind.keys())
    try:
      key = entities_by_kind[kind]['keys'].pop()
    except IndexError:
      continue

    select = """
      SELECT {value} FROM "{table}"
      WHERE {key}=%(key)s AND {column}=%(column)s
    """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE,
               key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME)
    parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]}
    future = session.execute_async(select, parameters)
    futures.append({'future': future, 'kind': kind})

  for future_dict in futures:
    future = future_dict['future']
    kind = future_dict['kind']
    try:
      entity = future.result()[0].value
    except IndexError:
      continue

    entities_by_kind[kind]['size'] += len(entity)
    entities_by_kind[kind]['fetched'] += 1

  kind_averages = {}
  for kind_id, kind in entities_by_kind.iteritems():
    try:
      kind_averages[kind_id] = int(kind['size'] / kind['fetched'])
    except ZeroDivisionError:
      kind_averages[kind_id] = 0

  return kind_averages
Esempio n. 16
0
def restore_data(path, keyname, force=False):
  """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
  logging.info("Starting new db restore.")

  db_ips = appscale_info.get_db_ips()
  if not db_ips:
    raise BRException('Unable to find any Cassandra machines.')

  machines_without_restore = []
  for db_ip in db_ips:
    exit_code = appscale_utils.ssh(db_ip, keyname, 'ls {}'.format(path),
                                   method=subprocess.call)
    if exit_code != ExitCodes.SUCCESS:
      machines_without_restore.append(db_ip)

  if machines_without_restore and not force:
    logging.info('The following machines do not have a restore file: {}'.
      format(machines_without_restore))
    response = raw_input('Would you like to continue? [y/N] ')
    if response not in ['Y', 'y']:
      return

  for db_ip in db_ips:
    logging.info('Stopping Cassandra on {}'.format(db_ip))
    summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                 method=subprocess.check_output)
    status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
    retries = SERVICE_RETRIES
    while status != MonitStates.UNMONITORED:
      appscale_utils.ssh(db_ip, keyname,
                         'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                         method=subprocess.call)
      time.sleep(3)
      summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                   method=subprocess.check_output)
      status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
      retries -= 1
      if retries < 0:
        raise BRException('Unable to stop Cassandra')

  cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
  for db_ip in db_ips:
    logging.info('Restoring Cassandra data on {}'.format(db_ip))
    clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
      format(cassandra_dir)
    appscale_utils.ssh(db_ip, keyname, clear_db)

    if db_ip not in machines_without_restore:
      appscale_utils.ssh(db_ip, keyname,
                         'tar xf {} -C {}'.format(path, cassandra_dir))
      appscale_utils.ssh(db_ip, keyname,
                         'chown -R cassandra {}'.format(cassandra_dir))

    logging.info('Starting Cassandra on {}'.format(db_ip))
    retries = SERVICE_RETRIES
    status = MonitStates.UNMONITORED
    while status != MonitStates.RUNNING:
      appscale_utils.ssh(db_ip, keyname,
                         'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                         method=subprocess.call)
      time.sleep(3)
      summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                   method=subprocess.check_output)
      status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
      retries -= 1
      if retries < 0:
        raise BRException('Unable to start Cassandra')

    appscale_utils.ssh(db_ip, keyname,
                       'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME))

  logging.info('Waiting for Cassandra cluster to be ready')
  db_ip = db_ips[0]
  deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
  while True:
    ready = True
    try:
      output = appscale_utils.ssh(
        db_ip, keyname, '{} status'.format(NODE_TOOL),
        method=subprocess.check_output)
      nodes_ready = len([line for line in output.split('\n')
                         if line.startswith('UN')])
      if nodes_ready < len(db_ips):
        ready = False
    except CalledProcessError:
      ready = False

    if ready:
      break

    if time.time() > deadline:
      logging.warning('Cassandra cluster still not ready.')
      break

    time.sleep(3)

  logging.info("Done with db restore.")
Esempio n. 17
0
def restore_data(path, keyname, force=False):
    """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logger.info("Starting new db restore.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    machines_without_restore = []
    for db_ip in db_ips:
        exit_code = appscale_utils.ssh(db_ip,
                                       keyname,
                                       'ls {}'.format(path),
                                       method=subprocess.call)
        if exit_code != utils.ExitCodes.SUCCESS:
            machines_without_restore.append(db_ip)

    if machines_without_restore and not force:
        logger.info(
            'The following machines do not have a restore file: {}'.format(
                machines_without_restore))
        response = raw_input('Would you like to continue? [y/N] ')
        if response not in ['Y', 'y']:
            return

    for db_ip in db_ips:
        logger.info('Stopping Cassandra on {}'.format(db_ip))
        summary = appscale_utils.ssh(db_ip,
                                     keyname,
                                     'appscale-admin summary',
                                     method=subprocess.check_output)
        status_line = next((line for line in summary.split('\n')
                            if line.startswith(CASSANDRA_MONIT_WATCH_NAME)),
                           '')
        retries = SERVICE_RETRIES
        while MonitStates.UNMONITORED not in status_line:
            appscale_utils.ssh(
                db_ip,
                keyname,
                'appscale-stop-service {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                method=subprocess.call)
            time.sleep(3)
            summary = appscale_utils.ssh(db_ip,
                                         keyname,
                                         'appscale-admin summary',
                                         method=subprocess.check_output)
            status_line = next(
                (line for line in summary.split('\n')
                 if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '')
            retries -= 1
            if retries < 0:
                raise BRException('Unable to stop Cassandra')

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        logger.info('Restoring Cassandra data on {}'.format(db_ip))
        clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
          format(cassandra_dir)
        appscale_utils.ssh(db_ip, keyname, clear_db)

        if db_ip not in machines_without_restore:
            appscale_utils.ssh(db_ip, keyname,
                               'tar xf {} -C {}'.format(path, cassandra_dir))
            appscale_utils.ssh(db_ip, keyname,
                               'chown -R cassandra {}'.format(cassandra_dir))

        logger.info('Starting Cassandra on {}'.format(db_ip))
        retries = SERVICE_RETRIES
        status_line = MonitStates.UNMONITORED
        while MonitStates.RUNNING not in status_line:
            appscale_utils.ssh(
                db_ip,
                keyname,
                'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                method=subprocess.call)
            time.sleep(3)
            summary = appscale_utils.ssh(db_ip,
                                         keyname,
                                         'appscale-admin summary',
                                         method=subprocess.check_output)
            status_line = next(
                (line for line in summary.split('\n')
                 if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '')
            retries -= 1
            if retries < 0:
                raise BRException('Unable to start Cassandra')

        appscale_utils.ssh(
            db_ip, keyname,
            'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME))

    logger.info('Waiting for Cassandra cluster to be ready')
    db_ip = db_ips[0]
    deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
    while True:
        ready = True
        try:
            output = appscale_utils.ssh(db_ip,
                                        keyname,
                                        '{} status'.format(NODE_TOOL),
                                        method=subprocess.check_output)
            nodes_ready = len(
                [line for line in output.split('\n') if line.startswith('UN')])
            if nodes_ready < len(db_ips):
                ready = False
        except CalledProcessError:
            ready = False

        if ready:
            break

        if time.time() > deadline:
            logger.warning('Cassandra cluster still not ready.')
            break

        time.sleep(3)

    logger.info("Done with db restore.")
Esempio n. 18
0
def prime_cassandra(replication):
    """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
    if not isinstance(replication, int):
        raise TypeError('Replication must be an integer')

    if int(replication) <= 0:
        raise dbconstants.AppScaleBadArg(
            'Replication must be greater than zero')

    hosts = appscale_info.get_db_ips()

    cluster = None
    session = None
    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
        try:
            cluster = Cluster(hosts)
            session = cluster.connect()
            break
        except cassandra.cluster.NoHostAvailable as connection_error:
            remaining_retries -= 1
            if remaining_retries < 0:
                raise connection_error
            time.sleep(3)
    session.default_consistency_level = ConsistencyLevel.QUORUM

    create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
    keyspace_replication = {
        'class': 'SimpleStrategy',
        'replication_factor': replication
    }
    session.execute(create_keyspace, {'replication': keyspace_replication},
                    timeout=SCHEMA_CHANGE_TIMEOUT)
    session.set_keyspace(KEYSPACE)

    for table in dbconstants.INITIAL_TABLES:
        create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

        logging.info('Trying to create {}'.format(table))
        try:
            session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
        except cassandra.OperationTimedOut:
            logging.warning(
                'Encountered an operation timeout while creating {} table. Waiting {} '
                'seconds for schema to settle.'.format(table,
                                                       SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise

    create_batch_tables(cluster, session)
    create_groups_table(session)
    create_transactions_table(session)
    create_pull_queue_tables(cluster, session)
    create_entity_ids_table(session)

    first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format(
        dbconstants.APP_ENTITY_TABLE))
    existing_entities = len(list(first_entity)) == 1

    define_ua_schema(session)

    metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(table=dbconstants.DATASTORE_METADATA_TABLE,
             key=ThriftColumn.KEY,
             column=ThriftColumn.COLUMN_NAME,
             value=ThriftColumn.VALUE)

    if not existing_entities:
        parameters = {
            'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
            'column': cassandra_interface.VERSION_INFO_KEY,
            'value': bytearray(str(POST_JOURNAL_VERSION))
        }
        session.execute(metadata_insert, parameters)

        # Mark the newly created indexes as clean.
        parameters = {
            'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
            'column': cassandra_interface.INDEX_STATE_KEY,
            'value': bytearray(str(IndexStates.CLEAN))
        }
        session.execute(metadata_insert, parameters)

    # Indicate that the database has been successfully primed.
    parameters = {
        'key': bytearray(cassandra_interface.PRIMED_KEY),
        'column': cassandra_interface.PRIMED_KEY,
        'value': bytearray('true')
    }
    session.execute(metadata_insert, parameters)
    logging.info('Cassandra is primed.')
Esempio n. 19
0
def get_random_db_node():
    return [random.choice(appscale_info.get_db_ips())]
Esempio n. 20
0
def get_kind_averages(keys):
    """ Get an average size for each kind.

  Args:
    keys: A list of dictionaries containing keys.
  Returns:
    A dictionary listing the average size of each kind.
  """
    hosts = appscale_info.get_db_ips()
    cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES)
    session = cluster.connect(KEYSPACE)

    entities_by_kind = {}
    for key_dict in keys:
        key = key_dict['key']
        if is_entity(key):
            key_parts = key.split(KEY_DELIMITER)
            kind = key_parts[2].split(':')[0]
            kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind])
            if kind_id not in entities_by_kind:
                entities_by_kind[kind_id] = {
                    'keys': [],
                    'size': 0,
                    'fetched': 0
                }
            entities_by_kind[kind_id]['keys'].append(key)

    for kind_id, kind in entities_by_kind.iteritems():
        shuffle(kind['keys'])

    if not entities_by_kind:
        return {}

    futures = []
    for _ in range(50):
        kind = choice(entities_by_kind.keys())
        try:
            key = entities_by_kind[kind]['keys'].pop()
        except IndexError:
            continue

        select = """
      SELECT {value} FROM "{table}"
      WHERE {key}=%(key)s AND {column}=%(column)s
    """.format(value=ThriftColumn.VALUE,
               table=APP_ENTITY_TABLE,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
        parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]}
        future = session.execute_async(select, parameters)
        futures.append({'future': future, 'kind': kind})

    for future_dict in futures:
        future = future_dict['future']
        kind = future_dict['kind']
        try:
            entity = future.result()[0].value
        except IndexError:
            continue

        entities_by_kind[kind]['size'] += len(entity)
        entities_by_kind[kind]['fetched'] += 1

    kind_averages = {}
    for kind_id, kind in entities_by_kind.iteritems():
        try:
            kind_averages[kind_id] = int(kind['size'] / kind['fetched'])
        except ZeroDivisionError:
            kind_averages[kind_id] = 0

    return kind_averages
Esempio n. 21
0
def prime_cassandra(replication):
  """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
  if not isinstance(replication, int):
    raise TypeError('Replication must be an integer')

  if int(replication) <= 0:
    raise dbconstants.AppScaleBadArg('Replication must be greater than zero')

  zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips())
  zk_client.start()

  hosts = appscale_info.get_db_ips()

  remaining_retries = INITIAL_CONNECT_RETRIES
  while True:
    try:
      cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
      session = cluster.connect()
      break
    except cassandra.cluster.NoHostAvailable as connection_error:
      remaining_retries -= 1
      if remaining_retries < 0:
        raise connection_error
      time.sleep(3)
  session.default_consistency_level = ConsistencyLevel.QUORUM

  create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
  keyspace_replication = {'class': 'SimpleStrategy',
                          'replication_factor': replication}
  session.execute(create_keyspace, {'replication': keyspace_replication},
                  timeout=SCHEMA_CHANGE_TIMEOUT)
  session.set_keyspace(KEYSPACE)

  logger.info('Waiting for all hosts to be connected')
  deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
  while True:
    if time.time() > deadline:
      logger.warning('Timeout when waiting for hosts to join. Continuing '
                      'with connected hosts.')
      break

    if len(session.get_pool_state()) == len(hosts):
      break

    time.sleep(1)

  for table in dbconstants.INITIAL_TABLES:
    create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

    logger.info('Trying to create {}'.format(table))
    try:
      session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except cassandra.OperationTimedOut:
      logger.warning(
        'Encountered an operation timeout while creating {} table. Waiting {} '
        'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT))
      time.sleep(SCHEMA_CHANGE_TIMEOUT)
      raise

  migrate_composite_index_metadata(cluster, session, zk_client)
  create_batch_tables(cluster, session)
  create_groups_table(session)
  create_transactions_table(session)
  create_pull_queue_tables(cluster, session)
  create_entity_ids_table(session)

  first_entity = session.execute(
    'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE))
  existing_entities = len(list(first_entity)) == 1

  define_ua_schema(session)

  metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(
    table=dbconstants.DATASTORE_METADATA_TABLE,
    key=ThriftColumn.KEY,
    column=ThriftColumn.COLUMN_NAME,
    value=ThriftColumn.VALUE
  )

  if existing_entities:
    current_version = current_datastore_version(session)
    if current_version == 1.0:
      # Instruct the groomer to reclean the indexes.
      parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
                    'column': cassandra_interface.INDEX_STATE_KEY,
                    'value': bytearray(str(IndexStates.DIRTY))}
      session.execute(metadata_insert, parameters)

      parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                    'column': cassandra_interface.VERSION_INFO_KEY,
                    'value': bytearray(str(CURRENT_VERSION))}
      session.execute(metadata_insert, parameters)
  else:
    parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                  'column': cassandra_interface.VERSION_INFO_KEY,
                  'value': bytearray(str(CURRENT_VERSION))}
    session.execute(metadata_insert, parameters)

    # Mark the newly created indexes as clean.
    parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
                  'column': cassandra_interface.INDEX_STATE_KEY,
                  'value': bytearray(str(IndexStates.CLEAN))}
    session.execute(metadata_insert, parameters)

    # Indicate that scatter property values do not need to be populated.
    parameters = {'key': bytearray(cassandra_interface.SCATTER_PROP_KEY),
                  'column': cassandra_interface.SCATTER_PROP_KEY,
                  'value': bytearray(ScatterPropStates.POPULATED)}
    session.execute(metadata_insert, parameters)

  # Indicate that the database has been successfully primed.
  parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY),
                'column': cassandra_interface.PRIMED_KEY,
                'value': bytearray(str(CURRENT_VERSION))}
  session.execute(metadata_insert, parameters)
  logger.info('Cassandra is primed.')
Esempio n. 22
0
def get_random_db_node():
  return [random.choice(appscale_info.get_db_ips())]
Esempio n. 23
0
  async def get_current(cls):
    """ Retrieves Cassandra status info.

    Returns:
      An instance of CassandraStatsSnapshot.
    """
    start = time.time()

    process = await asyncio.create_subprocess_shell(
      NODETOOL_STATUS_COMMAND,
      stdout=asyncio.subprocess.PIPE,
      stderr=asyncio.subprocess.PIPE
    )
    logger.info('Started subprocess `{}` (pid: {})'
                .format(NODETOOL_STATUS_COMMAND, process.pid))

    try:
      # Wait for the subprocess to finish
      stdout, stderr = await asyncio.wait_for(
        process.communicate(), NODETOOL_STATUS_TIMEOUT
      )
    except asyncio.TimeoutError:
      raise NodetoolStatusError(
        'Timed out waiting for subprocess `{}` (pid: {})'
        .format(NODETOOL_STATUS_COMMAND, process.pid)
      )

    output = stdout.decode()
    error = stderr.decode()
    if error:
      logger.warning(error)
    if process.returncode != 0:
      raise NodetoolStatusError('Subprocess failed with return code {} ({})'
                                .format(process.returncode, error))

    known_db_nodes = set(appscale_info.get_db_ips())
    nodes = []
    shown_nodes = set()

    if cls.SINGLENODE_HEADER_PATTERN.search(output):
      for match in cls.SINGLENODE_STATUS_PATTERN.finditer(output):
        address = match.group('address')
        status = match.group('status')
        state = match.group('state')
        load = match.group('load')
        size_unit = match.group('size_unit')
        owns_pct = match.group('owns_pct')
        tokens_num = 1
        host_id = match.group('host_id')
        rack = match.group('rack')
        node_stats = CassandraNodeStats(
          address=address,
          status=cls.STATUSES[status],
          state=cls.STATES[state],
          load=int(float(load) * cls.SIZE_UNITS[size_unit]),
          owns_pct=float(owns_pct),
          tokens_num=int(tokens_num),
          host_id=host_id,
          rack=rack,
        )
        nodes.append(node_stats)
        shown_nodes.add(address)

    elif cls.MULTINODE_HEADER_PATTERN.search(output):
      for match in cls.MULTINODE_STATUS_PATTERN.finditer(output):
        address = match.group('address')
        status = match.group('status')
        state = match.group('state')
        load = match.group('load')
        size_unit = match.group('size_unit')
        owns_pct = match.group('owns_pct')
        tokens_num = match.group('tokens_num')
        host_id = match.group('host_id')
        rack = match.group('rack')
        node_stats = CassandraNodeStats(
          address=address,
          status=cls.STATUSES[status],
          state=cls.STATES[state],
          load=int(float(load) * cls.SIZE_UNITS[size_unit]),
          owns_pct=float(owns_pct),
          tokens_num=int(tokens_num),
          host_id=host_id,
          rack=rack,
        )
        nodes.append(node_stats)
        shown_nodes.add(address)

    else:
      raise NodetoolStatusError(
        '`{}` output does not contain expected header. Actual output:\n{}'
        .format(NODETOOL_STATUS_COMMAND, output)
      )

    snapshot = CassandraStatsSnapshot(
      utc_timestamp=int(time.time()),
      nodes=nodes,
      missing_nodes=list(known_db_nodes - shown_nodes),
      unknown_nodes=list(shown_nodes - known_db_nodes)
    )
    logger.info('Prepared Cassandra nodes status in '
                '{elapsed:.2f}s.'.format(elapsed=time.time()-start))
    return snapshot
Esempio n. 24
0
def prime_cassandra(replication):
  """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
  if not isinstance(replication, int):
    raise TypeError('Replication must be an integer')

  if int(replication) <= 0:
    raise dbconstants.AppScaleBadArg('Replication must be greater than zero')

  hosts = appscale_info.get_db_ips()

  cluster = None
  session = None
  remaining_retries = INITIAL_CONNECT_RETRIES
  while True:
    try:
      cluster = Cluster(hosts)
      session = cluster.connect()
      break
    except cassandra.cluster.NoHostAvailable as connection_error:
      remaining_retries -= 1
      if remaining_retries < 0:
        raise connection_error
      time.sleep(3)
  session.default_consistency_level = ConsistencyLevel.QUORUM

  create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
  keyspace_replication = {'class': 'SimpleStrategy',
                          'replication_factor': replication}
  session.execute(create_keyspace, {'replication': keyspace_replication},
                  timeout=SCHEMA_CHANGE_TIMEOUT)
  session.set_keyspace(KEYSPACE)

  for table in dbconstants.INITIAL_TABLES:
    create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

    logging.info('Trying to create {}'.format(table))
    try:
      session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
    except cassandra.OperationTimedOut:
      logging.warning(
        'Encountered an operation timeout while creating {} table. Waiting {} '
        'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT))
      time.sleep(SCHEMA_CHANGE_TIMEOUT)
      raise

  create_batch_tables(cluster, session)
  create_groups_table(session)
  create_transactions_table(session)
  create_pull_queue_tables(cluster, session)
  create_entity_ids_table(session)

  first_entity = session.execute(
    'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE))
  existing_entities = len(list(first_entity)) == 1

  define_ua_schema(session)

  metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(
    table=dbconstants.DATASTORE_METADATA_TABLE,
    key=ThriftColumn.KEY,
    column=ThriftColumn.COLUMN_NAME,
    value=ThriftColumn.VALUE
  )

  if not existing_entities:
    parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                  'column': cassandra_interface.VERSION_INFO_KEY,
                  'value': bytearray(str(POST_JOURNAL_VERSION))}
    session.execute(metadata_insert, parameters)

    # Mark the newly created indexes as clean.
    parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
                  'column': cassandra_interface.INDEX_STATE_KEY,
                  'value': bytearray(str(IndexStates.CLEAN))}
    session.execute(metadata_insert, parameters)

  # Indicate that the database has been successfully primed.
  parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY),
                'column': cassandra_interface.PRIMED_KEY,
                'value': bytearray('true')}
  session.execute(metadata_insert, parameters)
  logging.info('Cassandra is primed.')
Esempio n. 25
0
def prime_cassandra(replication):
    """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
    if not isinstance(replication, int):
        raise TypeError('Replication must be an integer')

    if int(replication) <= 0:
        raise dbconstants.AppScaleBadArg(
            'Replication must be greater than zero')

    zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips())
    zk_client.start()

    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
        try:
            cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
            session = cluster.connect()
            break
        except cassandra.cluster.NoHostAvailable as connection_error:
            remaining_retries -= 1
            if remaining_retries < 0:
                raise connection_error
            time.sleep(3)
    session.default_consistency_level = ConsistencyLevel.QUORUM

    create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
    keyspace_replication = {
        'class': 'SimpleStrategy',
        'replication_factor': replication
    }
    session.execute(create_keyspace, {'replication': keyspace_replication},
                    timeout=SCHEMA_CHANGE_TIMEOUT)
    session.set_keyspace(KEYSPACE)

    logger.info('Waiting for all hosts to be connected')
    deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
    while True:
        if time.time() > deadline:
            logger.warning(
                'Timeout when waiting for hosts to join. Continuing '
                'with connected hosts.')
            break

        if len(session.get_pool_state()) == len(hosts):
            break

        time.sleep(1)

    for table in dbconstants.INITIAL_TABLES:
        create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

        logger.info('Trying to create {}'.format(table))
        try:
            session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT)
        except cassandra.OperationTimedOut:
            logger.warning(
                'Encountered an operation timeout while creating {} table. Waiting {} '
                'seconds for schema to settle.'.format(table,
                                                       SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise

    migrate_composite_index_metadata(cluster, session, zk_client)
    create_batch_tables(cluster, session)
    create_groups_table(session)
    create_transactions_table(session)
    create_entity_ids_table(session)

    first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format(
        dbconstants.APP_ENTITY_TABLE))
    existing_entities = len(list(first_entity)) == 1

    define_ua_schema(session)

    metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(table=dbconstants.DATASTORE_METADATA_TABLE,
             key=ThriftColumn.KEY,
             column=ThriftColumn.COLUMN_NAME,
             value=ThriftColumn.VALUE)

    if existing_entities:
        current_version = current_datastore_version(session)
        if current_version == 1.0:
            # Instruct the groomer to reclean the indexes.
            parameters = {
                'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
                'column': cassandra_interface.INDEX_STATE_KEY,
                'value': bytearray(str(IndexStates.DIRTY))
            }
            session.execute(metadata_insert, parameters)

            parameters = {
                'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                'column': cassandra_interface.VERSION_INFO_KEY,
                'value': bytearray(str(CURRENT_VERSION))
            }
            session.execute(metadata_insert, parameters)
    else:
        parameters = {
            'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
            'column': cassandra_interface.VERSION_INFO_KEY,
            'value': bytearray(str(CURRENT_VERSION))
        }
        session.execute(metadata_insert, parameters)

        # Mark the newly created indexes as clean.
        parameters = {
            'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
            'column': cassandra_interface.INDEX_STATE_KEY,
            'value': bytearray(str(IndexStates.CLEAN))
        }
        session.execute(metadata_insert, parameters)

        # Indicate that scatter property values do not need to be populated.
        parameters = {
            'key': bytearray(cassandra_interface.SCATTER_PROP_KEY),
            'column': cassandra_interface.SCATTER_PROP_KEY,
            'value': bytearray(ScatterPropStates.POPULATED)
        }
        session.execute(metadata_insert, parameters)

    # Indicate that the database has been successfully primed.
    parameters = {
        'key': bytearray(cassandra_interface.PRIMED_KEY),
        'column': cassandra_interface.PRIMED_KEY,
        'value': bytearray(str(CURRENT_VERSION))
    }
    session.execute(metadata_insert, parameters)
    logger.info('Cassandra is primed.')
Esempio n. 26
0
    def get_current(cls):
        """ Retrieves Cassandra status info.

    Returns:
      An instance of RabbitMQStatsSnapshot.
    """
        start = time.time()
        try:
            proc = process.Subprocess(NODETOOL_STATUS_COMMAND,
                                      stdout=process.Subprocess.STREAM,
                                      stderr=process.Subprocess.STREAM)
            status = yield proc.stdout.read_until_close()
            err = yield proc.stderr.read_until_close()
            if err:
                logger.error(err)
        except process.CalledProcessError as err:
            raise NodetoolStatusError(err)

        known_db_nodes = set(appscale_info.get_db_ips())
        nodes = []
        shown_nodes = set()

        if cls.SINGLENODE_HEADER_PATTERN.search(status):
            for match in cls.SINGLENODE_STATUS_PATTERN.finditer(status):
                address = match.group('address')
                status = match.group('status')
                state = match.group('state')
                load = match.group('load')
                size_unit = match.group('size_unit')
                owns_pct = match.group('owns_pct')
                tokens_num = 1
                host_id = match.group('host_id')
                rack = match.group('rack')
                node_stats = CassandraNodeStats(
                    address=address,
                    status=cls.STATUSES[status],
                    state=cls.STATES[state],
                    load=int(float(load) * cls.SIZE_UNITS[size_unit]),
                    owns_pct=float(owns_pct),
                    tokens_num=int(tokens_num),
                    host_id=host_id,
                    rack=rack,
                )
                nodes.append(node_stats)
                shown_nodes.add(address)

        elif cls.MULTINODE_HEADER_PATTERN.search(status):
            for match in cls.MULTINODE_STATUS_PATTERN.finditer(status):
                address = match.group('address')
                status = match.group('status')
                state = match.group('state')
                load = match.group('load')
                size_unit = match.group('size_unit')
                owns_pct = match.group('owns_pct')
                tokens_num = match.group('tokens_num')
                host_id = match.group('host_id')
                rack = match.group('rack')
                node_stats = CassandraNodeStats(
                    address=address,
                    status=cls.STATUSES[status],
                    state=cls.STATES[state],
                    load=int(float(load) * cls.SIZE_UNITS[size_unit]),
                    owns_pct=float(owns_pct),
                    tokens_num=int(tokens_num),
                    host_id=host_id,
                    rack=rack,
                )
                nodes.append(node_stats)
                shown_nodes.add(address)

        else:
            raise NodetoolStatusError(
                '`nodetool status` output does not contain expected header')

        snapshot = CassandraStatsSnapshot(
            utc_timestamp=int(time.time()),
            nodes=nodes,
            missing_nodes=list(known_db_nodes - shown_nodes),
            unknown_nodes=list(shown_nodes - known_db_nodes))
        logger.info('Prepared Cassandra nodes status in '
                    '{elapsed:.1f}s.'.format(elapsed=time.time() - start))
        raise gen.Return(snapshot)
Esempio n. 27
0
  def get_current(cls):
    """ Retrieves Cassandra status info.

    Returns:
      An instance of RabbitMQStatsSnapshot.
    """
    start = time.time()
    try:
      proc = process.Subprocess(
        NODETOOL_STATUS_COMMAND,
        stdout=process.Subprocess.STREAM,
        stderr=process.Subprocess.STREAM
      )
      status = yield proc.stdout.read_until_close()
      err = yield proc.stderr.read_until_close()
      if err:
        logger.error(err)
    except process.CalledProcessError as err:
      raise NodetoolStatusError(err)

    known_db_nodes = set(appscale_info.get_db_ips())
    nodes = []
    shown_nodes = set()

    if cls.SINGLENODE_HEADER_PATTERN.search(status):
      for match in cls.SINGLENODE_STATUS_PATTERN.finditer(status):
        address = match.group('address')
        status = match.group('status')
        state = match.group('state')
        load = match.group('load')
        size_unit = match.group('size_unit')
        owns_pct = match.group('owns_pct')
        tokens_num = 1
        host_id = match.group('host_id')
        rack = match.group('rack')
        node_stats = CassandraNodeStats(
          address=address,
          status=cls.STATUSES[status],
          state=cls.STATES[state],
          load=int(float(load) * cls.SIZE_UNITS[size_unit]),
          owns_pct=float(owns_pct),
          tokens_num=int(tokens_num),
          host_id=host_id,
          rack=rack,
        )
        nodes.append(node_stats)
        shown_nodes.add(address)

    elif cls.MULTINODE_HEADER_PATTERN.search(status):
      for match in cls.MULTINODE_STATUS_PATTERN.finditer(status):
        address = match.group('address')
        status = match.group('status')
        state = match.group('state')
        load = match.group('load')
        size_unit = match.group('size_unit')
        owns_pct = match.group('owns_pct')
        tokens_num = match.group('tokens_num')
        host_id = match.group('host_id')
        rack = match.group('rack')
        node_stats = CassandraNodeStats(
          address=address,
          status=cls.STATUSES[status],
          state=cls.STATES[state],
          load=int(float(load) * cls.SIZE_UNITS[size_unit]),
          owns_pct=float(owns_pct),
          tokens_num=int(tokens_num),
          host_id=host_id,
          rack=rack,
        )
        nodes.append(node_stats)
        shown_nodes.add(address)

    else:
      raise NodetoolStatusError(
        '`nodetool status` output does not contain expected header'
      )

    snapshot = CassandraStatsSnapshot(
      utc_timestamp=int(time.time()),
      nodes=nodes,
      missing_nodes=list(known_db_nodes - shown_nodes),
      unknown_nodes=list(shown_nodes - known_db_nodes)
    )
    logger.info('Prepared Cassandra nodes status in '
                 '{elapsed:.1f}s.'.format(elapsed=time.time()-start))
    raise gen.Return(snapshot)
Esempio n. 28
0
def restore_data(path, keyname, force=False):
    """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logging.info("Starting new db restore.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    machines_without_restore = []
    for db_ip in db_ips:
        exit_code = utils.ssh(db_ip,
                              keyname,
                              'ls {}'.format(path),
                              method=subprocess.call)
        if exit_code != ExitCodes.SUCCESS:
            machines_without_restore.append(db_ip)

    if machines_without_restore and not force:
        logging.info(
            'The following machines do not have a restore file: {}'.format(
                machines_without_restore))
        response = raw_input('Would you like to continue? [y/N] ')
        if response not in ['Y', 'y']:
            return

    for db_ip in db_ips:
        logging.info('Stopping Cassandra on {}'.format(db_ip))
        summary = utils.ssh(db_ip,
                            keyname,
                            'monit summary',
                            method=subprocess.check_output)
        status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
        retries = SERVICE_RETRIES
        while status != MonitStates.UNMONITORED:
            utils.ssh(db_ip,
                      keyname,
                      'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                      method=subprocess.call)
            time.sleep(3)
            summary = utils.ssh(db_ip,
                                keyname,
                                'monit summary',
                                method=subprocess.check_output)
            status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
            retries -= 1
            if retries < 0:
                raise BRException('Unable to stop Cassandra')

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        logging.info('Restoring Cassandra data on {}'.format(db_ip))
        clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
          format(cassandra_dir)
        utils.ssh(db_ip, keyname, clear_db)

        if db_ip not in machines_without_restore:
            utils.ssh(db_ip, keyname,
                      'tar xf {} -C {}'.format(path, cassandra_dir))
            utils.ssh(db_ip, keyname,
                      'chown -R cassandra {}'.format(cassandra_dir))

        logging.info('Starting Cassandra on {}'.format(db_ip))
        retries = SERVICE_RETRIES
        status = MonitStates.UNMONITORED
        while status != MonitStates.RUNNING:
            utils.ssh(db_ip,
                      keyname,
                      'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                      method=subprocess.call)
            time.sleep(3)
            summary = utils.ssh(db_ip,
                                keyname,
                                'monit summary',
                                method=subprocess.check_output)
            status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
            retries -= 1
            if retries < 0:
                raise BRException('Unable to start Cassandra')

        utils.ssh(db_ip, keyname,
                  'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME))

    logging.info("Done with db restore.")