def recover(num_servers, backups_per_server, num_partitions, object_size, num_objects, num_overwrites, replicas, coordinator_args='', master_args='', backup_args='', master_ram=None, old_master_ram=None, num_removals=0, timeout=100, log_level='NOTICE', log_dir='logs', transport='infrc', verbose=False, debug=False): """Run a recovery on the cluster specified in the config module and return statisitics about the recovery. See the config module for more parameters that affect how the recovery will be run. In particular config.hosts and config.old_master_host which select which hosts in the cluster recovery will be run on. @param num_servers: Number of hosts on which to run Masters. @type num_servers: C{int} @param backups_per_server: Number of Backups to colocate on the same host with each Master. If this is 1 the Backup is run in the same process as the Master. If this is 2 then an additional process is started to run the second Backup. @type backups_per_server: C{int} @param num_partitions: Number of partitions to create on the old Master before it is crashed. This also determines how many Recovery Masters will participate in recovery. @type num_partitions: C{int} @param object_size: Size of objects to fill old Master with before crash. @type object_size: C{int} @param num_objects: Number of objects to fill old Master with before crash. @type num_objects: C{int} @param num_overwrites: Number of writes on same key while filling old Master. @type num_overwrites: C{int} @param replicas: Number of times each segment is replicated to different backups in the cluster. @type replicas: C{int} @param coordinator_args: Additional command-line arguments to pass to the coordinator. @type coordinator_args: C{str} @param master_args: Additional command-line arguments to pass to each server that acts as a master. @type master_args: C{str} @param backup_args: Additional command-line arguments to pass to each server that acts as a backup. @type backup_args: C{str} @param master_ram: Megabytes of space allocated in each of the Masters (except the old Master, see old_master_ram). If left unspecified same sane default will be attempted based on num_objects, object_size and num_overwrites. @type master_ram: C{int} @param old_master_ram: Megabytes of space allocated in the old Master that will eventually be crashed. If left unspecified same sane default will be attempted based on num_objects, num_overwrites and object_size. @type old_master_ram: C{int} @param num_removals: Number of erases to do after inserts. Allows recoveries where some of the log contains tombstones. @type num_removals: C{int} @param timeout: Seconds to wait before giving up and declaring the recovery to have failed. @type timeout: C{int} @param log_level: Log level to use for all servers. DEBUG, NOTICE, WARNING, or ERROR. @type log_level: C{str} @param log_dir: Top-level directory in which to write log files. A separate subdirectory will be created in this directory for the log files from this run. @type log_dir: C{str} @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...) @type transport: C{str} @param verbose: Print information about progress in starting clients and servers. @type verbose: C{bool} @param debug: If True, pause after starting all to allow for debugging setup such as attaching gdb. @type debug: C{bool} @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery). """ server_binary = '%s/server' % obj_path client_binary = '%s/recovery' % obj_path ensure_servers_bin = '%s/ensureServers' % obj_path args = {} args['num_servers'] = num_servers args['backups_per_server'] = backups_per_server args['replicas'] = replicas args['timeout'] = timeout args['log_level'] = log_level args['log_dir'] = log_dir args['transport'] = transport args['verbose'] = verbose args['debug'] = debug args['coordinator_host'] = config.old_master_host args['coordinator_args'] = coordinator_args args['backup_args'] = backup_args # Just a guess of about how much capacity a master will have to have # to hold all the data from all the partitions if master_ram: log_space_per_partition = master_ram else: log_space_per_partition = ( 200 + (1.3 * num_objects / object_size * num_overwrites)) # Extra segments are needed because the dummy tablets placed on the # masters cause log head rollovers. Without compensating for the # space waster by that the recovery masters run out of space. args['master_args'] = '-d -D -t %d' % (log_space_per_partition + 8 * num_partitions) if master_args: args['master_args'] += ' ' + master_args args['client'] = ('%s -f -n %d -r %d -s %d ' '-t %d -k %d -l %s -o %d' % (client_binary, num_objects, num_removals, object_size, num_partitions, num_servers, log_level, num_overwrites)) args['old_master_host'] = config.old_master_host args['client_hosts'] = [config.old_master_host] if old_master_ram: args['old_master_args'] = '-d -t %d' % old_master_ram else: old_master_ram = log_space_per_partition * num_partitions * num_overwrites if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000: print('Warning: pushing the limits of rcmaster; ' 'limiting RAM to 42000 MB to avoid knocking it over; ' 'use rcmonster if you need more') old_master_ram = 42000 args['old_master_args'] = '-d -D -t %d' % old_master_ram recovery_logs = cluster.run(**args) # Collect metrics information. stats = {} stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs) report = recoverymetrics.makeReport(stats['metrics']).jsonable() f = open('%s/metrics' % recovery_logs, 'w') getDumpstr().print_report(report, file=f) f.close() stats['run'] = recovery_logs stats['count'] = num_objects stats['size'] = object_size stats['ns'] = stats['metrics'].client.recoveryNs stats['report'] = report return stats
def recover(num_servers, backup_disks_per_server, object_size, num_objects, num_overwrites, replicas, coordinator_args='', master_args='', backup_args='', master_ram=None, old_master_ram=None, num_removals=0, timeout=100, log_level='NOTICE', log_dir='logs', transport='basic+infud', verbose=False, debug=False): """Run a recovery on the cluster specified in the config module and return statisitics about the recovery. See the config module for more parameters that affect how the recovery will be run. In particular config.hosts and config.old_master_host which select which hosts in the cluster recovery will be run on. @param num_servers: Number of hosts on which to run Masters. @type num_servers: C{int} @param backup_disks_per_server: Number of backup disks to use on each Master. All disks are shared by one Backup (in the same process as the Master). @type backup_disks_per_server: C{int} @param object_size: Size of objects to fill old Master with before crash. @type object_size: C{int} @param num_objects: Number of objects to fill old Master with before crash. @type num_objects: C{int} @param num_overwrites: Number of writes on same key while filling old Master. @type num_overwrites: C{int} @param replicas: Number of times each segment is replicated to different backups in the cluster. @type replicas: C{int} @param coordinator_args: Additional command-line arguments to pass to the coordinator. @type coordinator_args: C{str} @param master_args: Additional command-line arguments to pass to each server that acts as a master. @type master_args: C{str} @param backup_args: Additional command-line arguments to pass to each server that acts as a backup. @type backup_args: C{str} @param master_ram: Megabytes of space allocated in each of the Masters (except the old Master, see old_master_ram). If left unspecified same sane default will be attempted based on num_objects, object_size and num_overwrites. @type master_ram: C{int} @param old_master_ram: Megabytes of space allocated in the old Master that will eventually be crashed. If left unspecified same sane default will be attempted based on num_objects, num_overwrites and object_size. @type old_master_ram: C{int} @param num_removals: Number of erases to do after inserts. Allows recoveries where some of the log contains tombstones. @type num_removals: C{int} @param timeout: Seconds to wait before giving up and declaring the recovery to have failed. @type timeout: C{int} @param log_level: Log level to use for all servers. DEBUG, NOTICE, WARNING, or ERROR. @type log_level: C{str} @param log_dir: Top-level directory in which to write log files. A separate subdirectory will be created in this directory for the log files from this run. @type log_dir: C{str} @param transport: A transport name (e.g. infrc, basic+udp, tcp, ...) @type transport: C{str} @param verbose: Print information about progress in starting clients and servers. @type verbose: C{bool} @param debug: If True, pause after starting all to allow for debugging setup such as attaching gdb. @type debug: C{bool} @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery). """ server_binary = '%s/server' % obj_path client_binary = '%s/recovery' % obj_path ensure_servers_bin = '%s/ensureServers' % obj_path args = {} args['num_servers'] = num_servers args['backup_disks_per_server'] = backup_disks_per_server args['replicas'] = replicas args['timeout'] = timeout args['log_level'] = log_level args['log_dir'] = log_dir args['transport'] = transport args['verbose'] = verbose args['debug'] = debug args['coordinator_host'] = getOldMasterHost() args['coordinator_args'] = coordinator_args if backup_args: args['backup_args'] += backup_args; else: args['backup_args'] = '--maxNonVolatileBuffers 1000' # Allocate enough memory on recovery masters to handle several # recovery partitions (most recoveries will only have one recovery # partition per master, which is about 500 MB). args['master_args'] = '-d -D -t 5000' if master_args: args['master_args'] += ' ' + master_args; args['client'] = ('%s -f -n %d -r %d -s %d ' '-k %d -l %s -o %d' % (client_binary, num_objects, num_removals, object_size, num_servers, log_level, num_overwrites)) args['old_master_host'] = getOldMasterHost() args['client_hosts'] = [getOldMasterHost()] if old_master_ram: args['old_master_args'] = '-d -t %d' % old_master_ram else: # Estimate how much log space the old master will need to hold # all of the data. old_master_ram = (200 + (1.3 * num_objects / object_size * num_overwrites)) if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000: print('Warning: pushing the limits of rcmaster; ' 'limiting RAM to 42000 MB to avoid knocking it over; ' 'use rcmonster if you need more') old_master_ram = 42000 args['old_master_args'] = '-d -D -t %d' % old_master_ram recovery_logs = cluster.run(**args) # Collect metrics information. stats = {} stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs) report = recoverymetrics.makeReport(stats['metrics']).jsonable() f = open('%s/metrics' % recovery_logs, 'w') getDumpstr().print_report(report, file=f) f.close() stats['run'] = recovery_logs stats['count'] = num_objects stats['size'] = object_size stats['ns'] = stats['metrics'].client.recoveryNs stats['report'] = report return stats
def recover( num_servers, backups_per_server, num_partitions, object_size, num_objects, replicas, coordinator_args="", master_args="", backup_args="", master_ram=None, old_master_ram=None, num_removals=0, timeout=60, log_level="NOTICE", log_dir="logs", transport="infrc", verbose=False, debug=False, ): """Run a recovery on the cluster specified in the config module and return statisitics about the recovery. See the config module for more parameters that affect how the recovery will be run. In particular config.hosts and config.old_master_host which select which hosts in the cluster recovery will be run on. @param num_servers: Number of hosts on which to run Masters. @type num_servers: C{int} @param backups_per_server: Number of Backups to colocate on the same host with each Master. If this is 1 the Backup is run in the same process as the Master. If this is 2 then an additional process is started to run the second Backup. @type backups_per_server: C{int} @param num_partitions: Number of partitions to create on the old Master before it is crashed. This also determines how many Recovery Masters will participate in recovery. @type num_partitions: C{int} @param object_size: Size of objects to fill old Master with before crash. @type object_size: C{int} @param num_objects: Number of objects to fill old Master with before crash. @type num_objects: C{int} @param replicas: Number of times each segment is replicated to different backups in the cluster. @type replicas: C{int} @param coordinator_args: Additional command-line arguments to pass to the coordinator. @type coordinator_args: C{str} @param master_args: Additional command-line arguments to pass to each server that acts as a master. @type master_args: C{str} @param backup_args: Additional command-line arguments to pass to each server that acts as a backup. @type backup_args: C{str} @param master_ram: Megabytes of space allocated in each of the Masters (except the old Master, see old_master_ram). If left unspecified same sane default will be attempted based on num_objects and object_size. @type master_ram: C{int} @param old_master_ram: Megabytes of space allocated in the old Master that will eventually be crashed. If left unspecified same sane default will be attempted based on num_objects and object_size. @type old_master_ram: C{int} @param num_removals: Number of erases to do after inserts. Allows recoveries where some of the log contains tombstones. @type num_removals: C{int} @param timeout: Seconds to wait before giving up and declaring the recovery to have failed. @type timeout: C{int} @param log_level: Log level to use for all servers. DEBUG, NOTICE, WARNING, or ERROR. @type log_level: C{str} @param log_dir: Top-level directory in which to write log files. A separate subdirectory will be created in this directory for the log files from this run. @type log_dir: C{str} @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...) @type transport: C{str} @param verbose: Print information about progress in starting clients and servers. @type verbose: C{bool} @param debug: If True, pause after starting all to allow for debugging setup such as attaching gdb. @type debug: C{bool} @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery). """ server_binary = "%s/server" % obj_path client_binary = "%s/recovery" % obj_path ensure_servers_bin = "%s/ensureServers" % obj_path args = {} args["num_servers"] = num_servers args["backups_per_server"] = backups_per_server args["replicas"] = replicas args["timeout"] = timeout args["log_level"] = log_level args["log_dir"] = log_dir args["transport"] = transport args["verbose"] = verbose args["debug"] = debug args["coordinator_args"] = coordinator_args args["backup_args"] = backup_args # Just a guess of about how much capacity a master will have to have # to hold all the data from all the partitions if master_ram: log_space_per_partition = master_ram else: log_space_per_partition = 200 + (1.3 * num_objects / object_size) args["master_args"] = "-t %d" % log_space_per_partition if master_args: args["master_args"] += " " + master_args args["client"] = "%s -f -n %d -r %d -s %d " "-t %d -k %d" % ( client_binary, num_objects, num_removals, object_size, num_partitions, num_servers, ) args["old_master_host"] = config.old_master_host if old_master_ram: args["old_master_args"] = "-t %d" % old_master_ram else: args["old_master_args"] = "-t %d" % (log_space_per_partition * num_partitions) recovery_logs = cluster.run(**args) # Collect metrics information. stats = {} stats["metrics"] = recoverymetrics.parseRecovery(recovery_logs) report = recoverymetrics.makeReport(stats["metrics"]).jsonable() f = open("%s/metrics" % recovery_logs, "w") getDumpstr().print_report(report, file=f) f.close() stats["run"] = recovery_logs stats["count"] = num_objects stats["size"] = object_size stats["ns"] = stats["metrics"].client.recoveryNs stats["report"] = report return stats
def recover(num_servers, backups_per_server, num_partitions, object_size, num_objects, num_overwrites, replicas, coordinator_args='', master_args='', backup_args='', master_ram=None, old_master_ram=None, num_removals=0, timeout=100, log_level='NOTICE', log_dir='logs', transport='infrc', verbose=False, debug=False): """Run a recovery on the cluster specified in the config module and return statisitics about the recovery. See the config module for more parameters that affect how the recovery will be run. In particular config.hosts and config.old_master_host which select which hosts in the cluster recovery will be run on. @param num_servers: Number of hosts on which to run Masters. @type num_servers: C{int} @param backups_per_server: Number of Backups to colocate on the same host with each Master. If this is 1 the Backup is run in the same process as the Master. If this is 2 then an additional process is started to run the second Backup. @type backups_per_server: C{int} @param num_partitions: Number of partitions to create on the old Master before it is crashed. This also determines how many Recovery Masters will participate in recovery. @type num_partitions: C{int} @param object_size: Size of objects to fill old Master with before crash. @type object_size: C{int} @param num_objects: Number of objects to fill old Master with before crash. @type num_objects: C{int} @param num_overwrites: Number of writes on same key while filling old Master. @type num_overwrites: C{int} @param replicas: Number of times each segment is replicated to different backups in the cluster. @type replicas: C{int} @param coordinator_args: Additional command-line arguments to pass to the coordinator. @type coordinator_args: C{str} @param master_args: Additional command-line arguments to pass to each server that acts as a master. @type master_args: C{str} @param backup_args: Additional command-line arguments to pass to each server that acts as a backup. @type backup_args: C{str} @param master_ram: Megabytes of space allocated in each of the Masters (except the old Master, see old_master_ram). If left unspecified same sane default will be attempted based on num_objects, object_size and num_overwrites. @type master_ram: C{int} @param old_master_ram: Megabytes of space allocated in the old Master that will eventually be crashed. If left unspecified same sane default will be attempted based on num_objects, num_overwrites and object_size. @type old_master_ram: C{int} @param num_removals: Number of erases to do after inserts. Allows recoveries where some of the log contains tombstones. @type num_removals: C{int} @param timeout: Seconds to wait before giving up and declaring the recovery to have failed. @type timeout: C{int} @param log_level: Log level to use for all servers. DEBUG, NOTICE, WARNING, or ERROR. @type log_level: C{str} @param log_dir: Top-level directory in which to write log files. A separate subdirectory will be created in this directory for the log files from this run. @type log_dir: C{str} @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...) @type transport: C{str} @param verbose: Print information about progress in starting clients and servers. @type verbose: C{bool} @param debug: If True, pause after starting all to allow for debugging setup such as attaching gdb. @type debug: C{bool} @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery). """ server_binary = '%s/server' % obj_path client_binary = '%s/recovery' % obj_path ensure_servers_bin = '%s/ensureServers' % obj_path args = {} args['num_servers'] = num_servers args['backups_per_server'] = backups_per_server args['replicas'] = replicas args['timeout'] = timeout args['log_level'] = log_level args['log_dir'] = log_dir args['transport'] = transport args['verbose'] = verbose args['debug'] = debug args['coordinator_host'] = config.old_master_host args['coordinator_args'] = coordinator_args args['backup_args'] = backup_args # Just a guess of about how much capacity a master will have to have # to hold all the data from all the partitions if master_ram: log_space_per_partition = master_ram else: log_space_per_partition = (200 + (1.3 * num_objects / object_size * num_overwrites)) # Extra segments are needed because the dummy tablets placed on the # masters cause log head rollovers. Without compensating for the # space waster by that the recovery masters run out of space. args['master_args'] = '-d -D -t %d' % (log_space_per_partition + 8 * num_partitions) if master_args: args['master_args'] += ' ' + master_args; args['client'] = ('%s -f -n %d -r %d -s %d ' '-t %d -k %d -l %s -o %d' % (client_binary, num_objects, num_removals, object_size, num_partitions, num_servers, log_level, num_overwrites)) args['old_master_host'] = config.old_master_host args['client_hosts'] = [config.old_master_host] if old_master_ram: args['old_master_args'] = '-d -t %d' % old_master_ram else: old_master_ram = log_space_per_partition * num_partitions * num_overwrites if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000: print('Warning: pushing the limits of rcmaster; ' 'limiting RAM to 42000 MB to avoid knocking it over; ' 'use rcmonster if you need more') old_master_ram = 42000 args['old_master_args'] = '-d -D -t %d' % old_master_ram recovery_logs = cluster.run(**args) # Collect metrics information. stats = {} stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs) report = recoverymetrics.makeReport(stats['metrics']).jsonable() f = open('%s/metrics' % recovery_logs, 'w') getDumpstr().print_report(report, file=f) f.close() stats['run'] = recovery_logs stats['count'] = num_objects stats['size'] = object_size stats['ns'] = stats['metrics'].client.recoveryNs stats['report'] = report return stats
def recover(num_servers, backup_disks_per_server, object_size, num_objects, num_overwrites, replicas, coordinator_args='', master_args='', backup_args='', master_ram=None, old_master_ram=None, num_removals=0, timeout=100, log_level='NOTICE', log_dir='logs', transport='basic+infud', verbose=False, debug=False): """Run a recovery on the cluster specified in the config module and return statisitics about the recovery. See the config module for more parameters that affect how the recovery will be run. In particular config.hosts and config.old_master_host which select which hosts in the cluster recovery will be run on. @param num_servers: Number of hosts on which to run Masters. @type num_servers: C{int} @param backup_disks_per_server: Number of backup disks to use on each Master. All disks are shared by one Backup (in the same process as the Master). @type backup_disks_per_server: C{int} @param object_size: Size of objects to fill old Master with before crash. @type object_size: C{int} @param num_objects: Number of objects to fill old Master with before crash. @type num_objects: C{int} @param num_overwrites: Number of writes on same key while filling old Master. @type num_overwrites: C{int} @param replicas: Number of times each segment is replicated to different backups in the cluster. @type replicas: C{int} @param coordinator_args: Additional command-line arguments to pass to the coordinator. @type coordinator_args: C{str} @param master_args: Additional command-line arguments to pass to each server that acts as a master. @type master_args: C{str} @param backup_args: Additional command-line arguments to pass to each server that acts as a backup. @type backup_args: C{str} @param master_ram: Megabytes of space allocated in each of the Masters (except the old Master, see old_master_ram). If left unspecified same sane default will be attempted based on num_objects, object_size and num_overwrites. @type master_ram: C{int} @param old_master_ram: Megabytes of space allocated in the old Master that will eventually be crashed. If left unspecified same sane default will be attempted based on num_objects, num_overwrites and object_size. @type old_master_ram: C{int} @param num_removals: Number of erases to do after inserts. Allows recoveries where some of the log contains tombstones. @type num_removals: C{int} @param timeout: Seconds to wait before giving up and declaring the recovery to have failed. @type timeout: C{int} @param log_level: Log level to use for all servers. DEBUG, NOTICE, WARNING, or ERROR. @type log_level: C{str} @param log_dir: Top-level directory in which to write log files. A separate subdirectory will be created in this directory for the log files from this run. @type log_dir: C{str} @param transport: A transport name (e.g. infrc, basic+udp, tcp, ...) @type transport: C{str} @param verbose: Print information about progress in starting clients and servers. @type verbose: C{bool} @param debug: If True, pause after starting all to allow for debugging setup such as attaching gdb. @type debug: C{bool} @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery). """ server_binary = '%s/server' % obj_path client_binary = '%s/apps/recovery' % obj_path ensure_servers_bin = '%s/ensureServers' % obj_path args = {} args['num_servers'] = num_servers args['backup_disks_per_server'] = backup_disks_per_server args['replicas'] = replicas args['timeout'] = timeout args['log_level'] = log_level args['log_dir'] = log_dir args['transport'] = transport args['verbose'] = verbose args['debug'] = debug args['coordinator_host'] = getOldMasterHost() args['coordinator_args'] = coordinator_args if backup_args: args['backup_args'] += backup_args; else: args['backup_args'] = '--maxNonVolatileBuffers 1000' # Allocate enough memory on recovery masters to handle several # recovery partitions (most recoveries will only have one recovery # partition per master, which is about 500 MB). args['master_args'] = '-d -D -t 5000' if master_args: args['master_args'] += ' ' + master_args; args['client'] = ('%s -f -n %d -r %d -s %d ' '-k %d -l %s -o %d' % (client_binary, num_objects, num_removals, object_size, num_servers, log_level, num_overwrites)) args['old_master_host'] = getOldMasterHost() args['client_hosts'] = [getOldMasterHost()] if old_master_ram: args['old_master_args'] = '-d -t %d' % old_master_ram else: # Estimate how much log space the old master will need to hold # all of the data. old_master_ram = (200 + (1.3 * num_objects / object_size * num_overwrites)) if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000: print('Warning: pushing the limits of rcmaster; ' 'limiting RAM to 42000 MB to avoid knocking it over; ' 'use rcmonster if you need more') old_master_ram = 42000 args['old_master_args'] = '-d -D -t %d' % old_master_ram recovery_logs = cluster.run(**args) # Collect metrics information. stats = {} stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs) report = recoverymetrics.makeReport(stats['metrics']).jsonable() f = open('%s/metrics' % recovery_logs, 'w') getDumpstr().print_report(report, file=f) f.close() stats['run'] = recovery_logs stats['count'] = num_objects stats['size'] = object_size stats['ns'] = stats['metrics'].client.recoveryNs stats['report'] = report return stats