Beispiel #1
0
def start_zookeeper(zk_ips, keyname):
    """ Creates a monit configuration file and prompts Monit to start ZooKeeper.
    Args:
      zk_ips: A list of zookeeper node IPs to start ZooKeeper on.
      keyname: A string containing the deployment's keyname.
    """
    logging.info("Starting ZooKeeper...")
    for ip in zk_ips:
        start_service_cmd = START_SERVICE_SCRIPT + ZK_WATCH_NAME
        try:
            ssh(ip, keyname, start_service_cmd)
        except subprocess.CalledProcessError:
            message = 'Unable to start ZooKeeper on {}'.format(ip)
            logging.exception(message)
            raise ZKInternalException(message)

    logging.info('Waiting for ZooKeeper to be ready')
    zk_server_cmd = None
    for script in ZK_SERVER_CMD_LOCATIONS:
        if os.path.isfile(script):
            zk_server_cmd = script
            break
    if zk_server_cmd is None:
        raise ZKInternalException('Unable to find zkServer.sh')

    status_cmd = '{} status'.format(zk_server_cmd)
    while ssh(zk_ips[0], keyname, status_cmd, method=subprocess.call) != 0:
        time.sleep(5)

    logging.info("Successfully started ZooKeeper.")
Beispiel #2
0
def start_zookeeper(zk_ips, keyname):
  """ Creates a monit configuration file and prompts Monit to start ZooKeeper.
    Args:
      zk_ips: A list of zookeeper node IPs to start ZooKeeper on.
      keyname: A string containing the deployment's keyname.
    """
  logging.info("Starting ZooKeeper...")
  for ip in zk_ips:
    start_service_cmd = START_SERVICE_SCRIPT + ZK_WATCH_NAME
    try:
      ssh(ip, keyname, start_service_cmd)
    except subprocess.CalledProcessError:
      message = 'Unable to start ZooKeeper on {}'.format(ip)
      logging.exception(message)
      raise ZKInternalException(message)

  logging.info('Waiting for ZooKeeper to be ready')
  zk_server_cmd = None
  for script in ZK_SERVER_CMD_LOCATIONS:
    if os.path.isfile(script):
      zk_server_cmd = script
      break
  if zk_server_cmd is None:
    raise ZKInternalException('Unable to find zkServer.sh')

  status_cmd = '{} status'.format(zk_server_cmd)
  while ssh(zk_ips[0], keyname, status_cmd, method=subprocess.call) != 0:
    time.sleep(5)

  logging.info("Successfully started ZooKeeper.")
Beispiel #3
0
def stop_cassandra(db_ips, keyname):
    """ Stops Cassandra.
  Args:
    db_ips: A list of database node IPs to stop Cassandra on.
    keyname: A string containing the deployment's keyname.
  """
    logging.info("Stopping Cassandra...")
    for ip in db_ips:
        stop_service_cmd = STOP_SERVICE_SCRIPT + CASSANDRA_WATCH_NAME
        try:
            ssh(ip, keyname, stop_service_cmd)
        except subprocess.CalledProcessError:
            logging.error('Unable to stop Cassandra on {}'.format(ip))
Beispiel #4
0
def stop_cassandra(db_ips, keyname):
  """ Stops Cassandra.
  Args:
    db_ips: A list of database node IPs to stop Cassandra on.
    keyname: A string containing the deployment's keyname.
  """
  logging.info("Stopping Cassandra...")
  for ip in db_ips:
    stop_service_cmd = STOP_SERVICE_SCRIPT + CASSANDRA_WATCH_NAME
    try:
      ssh(ip, keyname, stop_service_cmd)
    except subprocess.CalledProcessError:
      logging.error('Unable to stop Cassandra on {}'.format(ip))
Beispiel #5
0
def stop_zookeeper(zk_ips, keyname):
  """ Stops ZooKeeper.
  Args:
    zk_ips: A list of zookeeper node IPs to stop ZooKeeper on.
    keyname: A string containing the deployment's keyname.
  """
  logging.info("Stopping ZooKeeper...")
  for ip in zk_ips:
    stop_service_cmd = STOP_SERVICE_SCRIPT + ZK_WATCH_NAME
    try:
      ssh(ip, keyname, stop_service_cmd)
    except subprocess.CalledProcessError:
      logging.error('Unable to stop ZooKeeper on {}'.format(ip))
Beispiel #6
0
def stop_zookeeper(zk_ips, keyname):
    """ Stops ZooKeeper.
  Args:
    zk_ips: A list of zookeeper node IPs to stop ZooKeeper on.
    keyname: A string containing the deployment's keyname.
  """
    logging.info("Stopping ZooKeeper...")
    for ip in zk_ips:
        stop_service_cmd = STOP_SERVICE_SCRIPT + ZK_WATCH_NAME
        try:
            ssh(ip, keyname, stop_service_cmd)
        except subprocess.CalledProcessError:
            logging.error('Unable to stop ZooKeeper on {}'.format(ip))
Beispiel #7
0
def estimate_total_entities(session, db_master, keyname):
  """ Estimate the total number of entities.

  Args:
    session: A cassandra-driver session.
    db_master: A string containing the IP address of the primary DB node.
    keyname: A string containing the deployment keyname.
  Returns:
    A string containing an entity count.
  Raises:
    AppScaleDBError if unable to get a count.
  """
  query = SimpleStatement(
    'SELECT COUNT(*) FROM "{}"'.format(dbconstants.APP_ENTITY_TABLE),
    consistency_level=ConsistencyLevel.ONE
  )
  try:
    rows = session.execute(query)[0].count
    return str(rows / len(dbconstants.APP_ENTITY_SCHEMA))
  except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
    stats_cmd = '{nodetool} cfstats {keyspace}.{table}'.format(
      nodetool=cassandra_interface.NODE_TOOL,
      keyspace=cassandra_interface.KEYSPACE,
      table=dbconstants.APP_ENTITY_TABLE)
    stats = ssh(db_master, keyname, stats_cmd, method=subprocess.check_output)
    for line in stats.splitlines():
      if 'Number of keys (estimate)' in line:
        return '{} (estimate)'.format(line.split()[-1])
  raise dbconstants.AppScaleDBError('Unable to estimate total entities.')
Beispiel #8
0
def estimate_total_entities(session, db_master, keyname):
    """ Estimate the total number of entities.

  Args:
    session: A cassandra-driver session.
    db_master: A string containing the IP address of the primary DB node.
    keyname: A string containing the deployment keyname.
  Returns:
    A string containing an entity count.
  Raises:
    AppScaleDBError if unable to get a count.
  """
    query = SimpleStatement('SELECT COUNT(*) FROM "{}"'.format(
        dbconstants.APP_ENTITY_TABLE),
                            consistency_level=ConsistencyLevel.ONE)
    try:
        rows = session.execute(query)[0].count
        return str(rows / len(dbconstants.APP_ENTITY_SCHEMA))
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
        stats_cmd = '{nodetool} cfstats {keyspace}.{table}'.format(
            nodetool=cassandra_interface.NODE_TOOL,
            keyspace=cassandra_interface.KEYSPACE,
            table=dbconstants.APP_ENTITY_TABLE)
        stats = ssh(db_master,
                    keyname,
                    stats_cmd,
                    method=subprocess.check_output)
        for line in stats.splitlines():
            if 'Number of keys (estimate)' in line:
                return '{} (estimate)'.format(line.split()[-1])
    raise dbconstants.AppScaleDBError('Unable to estimate total entities.')
Beispiel #9
0
def backup_data(path, keyname):
    """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logger.info("Starting new db backup.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    for db_ip in db_ips:
        appscale_utils.ssh(db_ip, keyname,
                           '{} clearsnapshot'.format(NODE_TOOL))
        appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

        get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
          format(APPSCALE_DATA_DIR)
        du_output = appscale_utils.ssh(db_ip,
                                       keyname,
                                       get_snapshot_size,
                                       method=subprocess.check_output)
        backup_size = sum(
            int(line.split()[0]) for line in du_output.split('\n') if line)

        output_dir = '/'.join(path.split('/')[:-1]) + '/'
        df_output = appscale_utils.ssh(db_ip,
                                       keyname,
                                       'df {}'.format(output_dir),
                                       method=subprocess.check_output)
        available = int(df_output.split('\n')[1].split()[3])

        if backup_size > available * PADDING_PERCENTAGE:
            raise BRException('{} has insufficient space: {}/{}'.format(
                db_ip, available * PADDING_PERCENTAGE, backup_size))

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
          '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
        appscale_utils.ssh(db_ip, keyname,
                           'cd {} && {}'.format(cassandra_dir, create_tar))

    logger.info("Done with db backup.")
Beispiel #10
0
def start_cassandra(db_ips, db_master, keyname, zookeeper_ips):
    """ Creates a monit configuration file and prompts Monit to start Cassandra.
  Args:
    db_ips: A list of database node IPs to start Cassandra on.
    db_master: The IP address of the DB master.
    keyname: A string containing the deployment's keyname.
    zookeeper_ips: The IP addresses of the Zookeeper nodes.
  Raises:
    AppScaleDBError if unable to start Cassandra.
  """
    logging.info("Starting Cassandra...")
    for ip in db_ips:
        init_config = '{script} --local-ip {ip} --master-ip {db_master} ' \
                      '--zk-locations {zk_locations}'.format(
                      script=SETUP_CASSANDRA_SCRIPT, ip=ip, db_master=db_master,
                      zk_locations=get_zk_locations_string(zookeeper_ips))
        try:
            ssh(ip, keyname, init_config)
        except subprocess.CalledProcessError:
            message = 'Unable to configure Cassandra on {}'.format(ip)
            logging.exception(message)
            raise dbconstants.AppScaleDBError(message)

        try:
            start_service_cmd = START_SERVICE_SCRIPT + CASSANDRA_WATCH_NAME
            ssh(ip, keyname, start_service_cmd)
        except subprocess.CalledProcessError:
            message = 'Unable to start Cassandra on {}'.format(ip)
            logging.exception(message)
            raise dbconstants.AppScaleDBError(message)

    logging.info('Waiting for Cassandra to be ready')
    status_cmd = '{} status'.format(cassandra_interface.NODE_TOOL)
    while ssh(db_master, keyname, status_cmd, method=subprocess.call) != 0:
        time.sleep(5)

    logging.info("Successfully started Cassandra.")
Beispiel #11
0
def start_cassandra(db_ips, db_master, keyname, zookeeper_ips):
  """ Creates a monit configuration file and prompts Monit to start Cassandra.
  Args:
    db_ips: A list of database node IPs to start Cassandra on.
    db_master: The IP address of the DB master.
    keyname: A string containing the deployment's keyname.
    zookeeper_ips: The IP addresses of the Zookeeper nodes.
  Raises:
    AppScaleDBError if unable to start Cassandra.
  """
  logging.info("Starting Cassandra...")
  for ip in db_ips:
    init_config = '{script} --local-ip {ip} --master-ip {db_master} ' \
                  '--zk-locations {zk_locations}'.format(
                  script=SETUP_CASSANDRA_SCRIPT, ip=ip, db_master=db_master,
                  zk_locations=get_zk_locations_string(zookeeper_ips))
    try:
      ssh(ip, keyname, init_config)
    except subprocess.CalledProcessError:
      message = 'Unable to configure Cassandra on {}'.format(ip)
      logging.exception(message)
      raise dbconstants.AppScaleDBError(message)

    try:
      start_service_cmd = START_SERVICE_SCRIPT + CASSANDRA_WATCH_NAME
      ssh(ip, keyname, start_service_cmd)
    except subprocess.CalledProcessError:
      message = 'Unable to start Cassandra on {}'.format(ip)
      logging.exception(message)
      raise dbconstants.AppScaleDBError(message)

  logging.info('Waiting for Cassandra to be ready')
  status_cmd = '{} status'.format(cassandra_interface.NODE_TOOL)
  while ssh(db_master, keyname, status_cmd, method=subprocess.call) != 0:
    time.sleep(5)

  logging.info("Successfully started Cassandra.")
Beispiel #12
0
def backup_data(path, keyname):
  """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
  logging.info("Starting new db backup.")

  db_ips = appscale_info.get_db_ips()
  if not db_ips:
    raise BRException('Unable to find any Cassandra machines.')

  for db_ip in db_ips:
    appscale_utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL))
    appscale_utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

    get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
      format(APPSCALE_DATA_DIR)
    du_output = appscale_utils.ssh(db_ip, keyname, get_snapshot_size,
                                   method=subprocess.check_output)
    backup_size = sum(int(line.split()[0])
                      for line in du_output.split('\n') if line)

    output_dir = '/'.join(path.split('/')[:-1]) + '/'
    df_output = appscale_utils.ssh(db_ip, keyname, 'df {}'.format(output_dir),
                                   method=subprocess.check_output)
    available = int(df_output.split('\n')[1].split()[3])

    if backup_size > available * PADDING_PERCENTAGE:
      raise BRException('{} has insufficient space: {}/{}'.
        format(db_ip, available * PADDING_PERCENTAGE, backup_size))

  cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
  for db_ip in db_ips:
    create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
      '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
    appscale_utils.ssh(db_ip, keyname,
                       'cd {} && {}'.format(cassandra_dir, create_tar))

  logging.info("Done with db backup.")
Beispiel #13
0
def equalize(node1, node2):
    """ Move data from the node with a larger load to the other one.

  Args:
    node1: A dictionary representing a node.
    node2: A dictionary representing a neighbor of node1.
  """
    keys = [key for key in os.listdir(KEY_DIRECTORY) if key.endswith('.key')]
    keyname = keys[0].split('.')[0]

    to_move = abs(node1['load'] - node2['load']) / 2
    mb_to_move = round(to_move / 1024**2, 2)
    if node1['load'] > node2['load']:
        logger.info('Moving {} MiB from {} to {}'.format(
            mb_to_move, node1['ip'], node2['ip']))
        percentile = 100 - int((to_move / node1['load']) * 100)
        new_token = ssh(node1['ip'],
                        keyname,
                        'appscale-get-token {}'.format(percentile),
                        method=check_output).strip()
        repair = [new_token, node1['token']]
        cleanup_ip = node1['ip']
    else:
        logger.info('Moving {} MiB from {} to {}'.format(
            mb_to_move, node2['ip'], node1['ip']))
        percentile = int((to_move / node2['load']) * 100)
        new_token = ssh(node2['ip'],
                        keyname,
                        'appscale-get-token {}'.format(percentile),
                        method=check_output).strip()
        repair = [node1['token'], new_token]
        cleanup_ip = node2['ip']

    logger.info('Moving {} to {}'.format(node1['ip'], new_token[:60] + '...'))
    ssh(node1['ip'], keyname, '{} move {}'.format(NODE_TOOL, new_token))

    start = repair[0][:60] + '...'
    end = repair[1][:60] + '...'
    logger.info('Repairing {} to {}'.format(start, end))
    check_output([NODE_TOOL, 'repair', '-st', repair[0], '-et', repair[1]])

    logger.info('Cleaning up {}'.format(cleanup_ip))
    ssh(cleanup_ip, keyname, '{} cleanup'.format(NODE_TOOL))
Beispiel #14
0
def equalize(node1, node2):
  """ Move data from the node with a larger load to the other one.

  Args:
    node1: A dictionary representing a node.
    node2: A dictionary representing a neighbor of node1.
  """
  keys = [key for key in os.listdir(KEY_DIRECTORY) if key.endswith('.key')]
  keyname = keys[0].split('.')[0]

  to_move = abs(node1['load'] - node2['load']) / 2
  mb_to_move = round(to_move / 1024 ** 2, 2)
  if node1['load'] > node2['load']:
    logging.info('Moving {} MiB from {} to {}'.format(
      mb_to_move, node1['ip'], node2['ip']))
    percentile = 100 - int((to_move / node1['load']) * 100)
    new_token = ssh(node1['ip'], keyname,
                    'appscale-get-token {}'.format(percentile),
                    method=check_output).strip()
    repair = [new_token, node1['token']]
    cleanup_ip = node1['ip']
  else:
    logging.info('Moving {} MiB from {} to {}'.format(
      mb_to_move, node2['ip'], node1['ip']))
    percentile = int((to_move / node2['load']) * 100)
    new_token = ssh(node2['ip'], keyname,
                    'appscale-get-token {}'.format(percentile),
                    method=check_output).strip()
    repair = [node1['token'], new_token]
    cleanup_ip = node2['ip']

  logging.info('Moving {} to {}'.format(node1['ip'], new_token[:60] + '...'))
  ssh(node1['ip'], keyname, '{} move {}'.format(NODE_TOOL, new_token))

  start = repair[0][:60] + '...'
  end = repair[1][:60] + '...'
  logging.info('Repairing {} to {}'.format(start, end))
  check_output([NODE_TOOL, 'repair', '-st', repair[0], '-et', repair[1]])

  logging.info('Cleaning up {}'.format(cleanup_ip))
  ssh(cleanup_ip, keyname, '{} cleanup'.format(NODE_TOOL))
Beispiel #15
0
def restore_data(path, keyname, force=False):
  """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
  logging.info("Starting new db restore.")

  db_ips = appscale_info.get_db_ips()
  if not db_ips:
    raise BRException('Unable to find any Cassandra machines.')

  machines_without_restore = []
  for db_ip in db_ips:
    exit_code = appscale_utils.ssh(db_ip, keyname, 'ls {}'.format(path),
                                   method=subprocess.call)
    if exit_code != ExitCodes.SUCCESS:
      machines_without_restore.append(db_ip)

  if machines_without_restore and not force:
    logging.info('The following machines do not have a restore file: {}'.
      format(machines_without_restore))
    response = raw_input('Would you like to continue? [y/N] ')
    if response not in ['Y', 'y']:
      return

  for db_ip in db_ips:
    logging.info('Stopping Cassandra on {}'.format(db_ip))
    summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                 method=subprocess.check_output)
    status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
    retries = SERVICE_RETRIES
    while status != MonitStates.UNMONITORED:
      appscale_utils.ssh(db_ip, keyname,
                         'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                         method=subprocess.call)
      time.sleep(3)
      summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                   method=subprocess.check_output)
      status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
      retries -= 1
      if retries < 0:
        raise BRException('Unable to stop Cassandra')

  cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
  for db_ip in db_ips:
    logging.info('Restoring Cassandra data on {}'.format(db_ip))
    clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
      format(cassandra_dir)
    appscale_utils.ssh(db_ip, keyname, clear_db)

    if db_ip not in machines_without_restore:
      appscale_utils.ssh(db_ip, keyname,
                         'tar xf {} -C {}'.format(path, cassandra_dir))
      appscale_utils.ssh(db_ip, keyname,
                         'chown -R cassandra {}'.format(cassandra_dir))

    logging.info('Starting Cassandra on {}'.format(db_ip))
    retries = SERVICE_RETRIES
    status = MonitStates.UNMONITORED
    while status != MonitStates.RUNNING:
      appscale_utils.ssh(db_ip, keyname,
                         'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                         method=subprocess.call)
      time.sleep(3)
      summary = appscale_utils.ssh(db_ip, keyname, 'monit summary',
                                   method=subprocess.check_output)
      status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
      retries -= 1
      if retries < 0:
        raise BRException('Unable to start Cassandra')

    appscale_utils.ssh(db_ip, keyname,
                       'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME))

  logging.info('Waiting for Cassandra cluster to be ready')
  db_ip = db_ips[0]
  deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
  while True:
    ready = True
    try:
      output = appscale_utils.ssh(
        db_ip, keyname, '{} status'.format(NODE_TOOL),
        method=subprocess.check_output)
      nodes_ready = len([line for line in output.split('\n')
                         if line.startswith('UN')])
      if nodes_ready < len(db_ips):
        ready = False
    except CalledProcessError:
      ready = False

    if ready:
      break

    if time.time() > deadline:
      logging.warning('Cassandra cluster still not ready.')
      break

    time.sleep(3)

  logging.info("Done with db restore.")
Beispiel #16
0

if __name__ == "__main__":
  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
  parser = init_parser()
  args = parser.parse_args()
  status = {'status': 'inProgress', 'message': 'Starting services'}
  write_to_json_file(status, args.log_postfix)

  db_access = None
  zookeeper = None
  try:
    # Ensure monit is running.
    relevant_ips = set(args.zookeeper) | set(args.database)
    for ip in relevant_ips:
      ssh(ip, args.keyname, 'service monit start')

    start_zookeeper(args.zookeeper, args.keyname)
    conn = KazooClient(hosts=",".join(args.zookeeper))
    conn.start()
    if not conn.exists(ZK_CASSANDRA_CONFIG):
      conn.create(ZK_CASSANDRA_CONFIG, json.dumps({"num_tokens":256}),
                  makepath=True)
    start_cassandra(args.database, args.db_master, args.keyname, args.zookeeper)
    datastore_upgrade.wait_for_quorum(
      args.keyname, args.db_master, len(args.database), args.replication)
    db_access = DatastoreProxy(hosts=args.database)

    # Exit early if a data layout upgrade is not needed.
    if db_access.valid_data_version_sync():
      status = {'status': 'complete', 'message': 'The data layout is valid'}
Beispiel #17
0

if __name__ == "__main__":
    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
    parser = init_parser()
    args = parser.parse_args()
    status = {'status': 'inProgress', 'message': 'Starting services'}
    write_to_json_file(status, args.log_postfix)

    db_access = None
    zookeeper = None
    try:
        # Ensure monit is running.
        relevant_ips = set(args.zookeeper) | set(args.database)
        for ip in relevant_ips:
            ssh(ip, args.keyname, 'service monit start')

        start_zookeeper(args.zookeeper, args.keyname)
        conn = KazooClient(hosts=",".join(args.zookeeper))
        conn.start()
        if not conn.exists(ZK_CASSANDRA_CONFIG):
            conn.create(ZK_CASSANDRA_CONFIG,
                        json.dumps({"num_tokens": 256}),
                        makepath=True)
        start_cassandra(args.database, args.db_master, args.keyname,
                        args.zookeeper)
        datastore_upgrade.wait_for_quorum(args.keyname, args.db_master,
                                          len(args.database), args.replication)
        db_access = DatastoreProxy(hosts=args.database)

        # Exit early if a data layout upgrade is not needed.
Beispiel #18
0
def restore_data(path, keyname, force=False):
    """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logger.info("Starting new db restore.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    machines_without_restore = []
    for db_ip in db_ips:
        exit_code = appscale_utils.ssh(db_ip,
                                       keyname,
                                       'ls {}'.format(path),
                                       method=subprocess.call)
        if exit_code != utils.ExitCodes.SUCCESS:
            machines_without_restore.append(db_ip)

    if machines_without_restore and not force:
        logger.info(
            'The following machines do not have a restore file: {}'.format(
                machines_without_restore))
        response = raw_input('Would you like to continue? [y/N] ')
        if response not in ['Y', 'y']:
            return

    for db_ip in db_ips:
        logger.info('Stopping Cassandra on {}'.format(db_ip))
        summary = appscale_utils.ssh(db_ip,
                                     keyname,
                                     'appscale-admin summary',
                                     method=subprocess.check_output)
        status_line = next((line for line in summary.split('\n')
                            if line.startswith(CASSANDRA_MONIT_WATCH_NAME)),
                           '')
        retries = SERVICE_RETRIES
        while MonitStates.UNMONITORED not in status_line:
            appscale_utils.ssh(
                db_ip,
                keyname,
                'appscale-stop-service {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                method=subprocess.call)
            time.sleep(3)
            summary = appscale_utils.ssh(db_ip,
                                         keyname,
                                         'appscale-admin summary',
                                         method=subprocess.check_output)
            status_line = next(
                (line for line in summary.split('\n')
                 if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '')
            retries -= 1
            if retries < 0:
                raise BRException('Unable to stop Cassandra')

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        logger.info('Restoring Cassandra data on {}'.format(db_ip))
        clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
          format(cassandra_dir)
        appscale_utils.ssh(db_ip, keyname, clear_db)

        if db_ip not in machines_without_restore:
            appscale_utils.ssh(db_ip, keyname,
                               'tar xf {} -C {}'.format(path, cassandra_dir))
            appscale_utils.ssh(db_ip, keyname,
                               'chown -R cassandra {}'.format(cassandra_dir))

        logger.info('Starting Cassandra on {}'.format(db_ip))
        retries = SERVICE_RETRIES
        status_line = MonitStates.UNMONITORED
        while MonitStates.RUNNING not in status_line:
            appscale_utils.ssh(
                db_ip,
                keyname,
                'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME),
                method=subprocess.call)
            time.sleep(3)
            summary = appscale_utils.ssh(db_ip,
                                         keyname,
                                         'appscale-admin summary',
                                         method=subprocess.check_output)
            status_line = next(
                (line for line in summary.split('\n')
                 if line.startswith(CASSANDRA_MONIT_WATCH_NAME)), '')
            retries -= 1
            if retries < 0:
                raise BRException('Unable to start Cassandra')

        appscale_utils.ssh(
            db_ip, keyname,
            'appscale-start-service {}'.format(CASSANDRA_MONIT_WATCH_NAME))

    logger.info('Waiting for Cassandra cluster to be ready')
    db_ip = db_ips[0]
    deadline = time.time() + SCHEMA_CHANGE_TIMEOUT
    while True:
        ready = True
        try:
            output = appscale_utils.ssh(db_ip,
                                        keyname,
                                        '{} status'.format(NODE_TOOL),
                                        method=subprocess.check_output)
            nodes_ready = len(
                [line for line in output.split('\n') if line.startswith('UN')])
            if nodes_ready < len(db_ips):
                ready = False
        except CalledProcessError:
            ready = False

        if ready:
            break

        if time.time() > deadline:
            logger.warning('Cassandra cluster still not ready.')
            break

        time.sleep(3)

    logger.info("Done with db restore.")