def test_db(mock_ast): mock_ast.DB.return_value = Mock() result = query.db("foo") mock_ast.DB.assert_called_once_with("foo") assert result == mock_ast.DB.return_value
def check_minimum_version(options, minimum_version='1.6'): minimum_version = distutils.version.LooseVersion(minimum_version) version_string = options.retryQuery('get server version', query.db( 'rethinkdb').table('server_status')[0]['process']['version']) matches = re.match(r'rethinkdb (?P<version>(\d+)\.(\d+)\.(\d+)).*', version_string) if not matches: raise RuntimeError("invalid version string format: %s" % version_string) if distutils.version.LooseVersion(matches.group('version')) < minimum_version: raise RuntimeError("Incompatible version, expected >= %s got: %s" % (minimum_version, version_string))
def check_minimum_version(options, minimum_version="1.6", raise_exception=True): minimum_version = distutils.version.LooseVersion(minimum_version) version_string = options.retryQuery( "get server version", query.db("rethinkdb").table("server_status")[0]["process"]["version"], ) matches = re.match( r"(rethinkdb|rebirthdb) (?P<version>(\d+)\.(\d+)\.(\d+)).*", version_string) if not matches: raise RuntimeError("invalid version string format: %s" % version_string) if distutils.version.LooseVersion( matches.group("version")) < minimum_version: if raise_exception: raise RuntimeError("Incompatible version, expected >= %s got: %s" % (minimum_version, version_string)) return False return True
def run(options): # Make sure this isn't a pre-`reql_admin` cluster - which could result in data loss # if the user has a database named 'rethinkdb' utils_common.check_minimum_version(options, '1.6') # get the complete list of tables db_table_set = set() all_tables = [ utils_common.DbTable(x['db'], x['name']) for x in options.retryQuery( 'list tables', query.db('rethinkdb').table('table_config').pluck(['db', 'name'])) ] if not options.db_tables: db_table_set = all_tables # default to all tables else: all_databases = options.retryQuery( 'list dbs', query.db_list().filter(query.row.ne('rethinkdb'))) for db_table in options.db_tables: db, table = db_table if db == 'rethinkdb': raise AssertionError( 'Can not export tables from the system database') if db not in all_databases: raise RuntimeError("Error: Database '%s' not found" % db) if table is None: # This is just a db name, implicitly selecting all tables in that db db_table_set.update(set([x for x in all_tables if x.db == db])) else: if utils_common.DbTable(db, table) not in all_tables: raise RuntimeError("Error: Table not found: '%s.%s'" % (db, table)) db_table_set.add(db_table) # Determine the actual number of client processes we'll have options.clients = min(options.clients, len(db_table_set)) # create the working directory and its structure parent_dir = os.path.dirname(options.directory) if not os.path.exists(parent_dir): if os.path.isdir(parent_dir): raise RuntimeError( "Output parent directory is not a directory: %s" % parent_dir) try: os.makedirs(parent_dir) except OSError as e: raise optparse.OptionValueError( "Unable to create parent directory for %s: %s" % (parent_dir, e.strerror)) working_dir = tempfile.mkdtemp(prefix=os.path.basename(options.directory) + '_partial_', dir=os.path.dirname(options.directory)) try: for db in set([database for database, _ in db_table_set]): os.makedirs(os.path.join(working_dir, str(db))) except OSError as e: raise RuntimeError("Failed to create temporary directory (%s): %s" % (e.filename, e.strerror)) # Run the export run_clients(options, working_dir, db_table_set) # Move the temporary directory structure over to the original output directory try: if os.path.isdir(options.directory): os.rmdir(options.directory ) # an empty directory is created here when using _dump elif os.path.exists(options.directory): raise Exception('There was a file at the output location: %s' % options.directory) os.rename(working_dir, options.directory) except OSError as e: raise RuntimeError( "Failed to move temporary directory to output directory (%s): %s" % (options.directory, e.strerror))
def run_clients(options, workingDir, db_table_set): # Spawn one client for each db.table, up to options.clients at a time exit_event = multiprocessing.Event() processes = [] if six.PY3: ctx = multiprocessing.get_context(multiprocessing.get_start_method()) error_queue = SimpleQueue(ctx=ctx) else: error_queue = SimpleQueue() interrupt_event = multiprocessing.Event() sindex_counter = multiprocessing.Value(ctypes.c_longlong, 0) hook_counter = multiprocessing.Value(ctypes.c_longlong, 0) signal.signal(signal.SIGINT, lambda a, b: abort_export(a, b, exit_event, interrupt_event)) errors = [] try: progress_info = [] arg_lists = [] for db, table in db_table_set: tableSize = int( options.retryQuery( "count", query.db(db).table(table).info() ['doc_count_estimates'].sum())) progress_info.append( (multiprocessing.Value(ctypes.c_longlong, 0), multiprocessing.Value(ctypes.c_longlong, tableSize))) arg_lists.append(( db, table, workingDir, options, error_queue, progress_info[-1], sindex_counter, hook_counter, exit_event, )) # Wait for all tables to finish while processes or arg_lists: time.sleep(0.1) while not error_queue.empty(): exit_event.set() # Stop immediately if an error occurs errors.append(error_queue.get()) processes = [ process for process in processes if process.is_alive() ] if len(processes) < options.clients and len(arg_lists) > 0: new_process = multiprocessing.Process(target=export_table, args=arg_lists.pop(0)) new_process.start() processes.append(new_process) update_progress(progress_info, options) # If we were successful, make sure 100% progress is reported # (rows could have been deleted which would result in being done at less than 100%) if len(errors ) == 0 and not interrupt_event.is_set() and not options.quiet: utils_common.print_progress(1.0, indent=4) # Continue past the progress output line and print total rows processed def plural(num, text, plural_text): return "%d %s" % (num, text if num == 1 else plural_text) if not options.quiet: print( "\n %s exported from %s, with %s, and %s" % (plural(sum([max(0, info[0].value) for info in progress_info]), "row", "rows"), plural(len(db_table_set), "table", "tables"), plural(sindex_counter.value, "secondary index", "secondary indexes"), plural(hook_counter.value, "hook function", "hook functions"))) finally: signal.signal(signal.SIGINT, signal.SIG_DFL) if interrupt_event.is_set(): raise RuntimeError("Interrupted") if len(errors) != 0: # multiprocessing queues don't handle tracebacks, so they've already been stringified in the queue for error in errors: print("%s" % error[1], file=sys.stderr) if options.debug: print("%s traceback: %s" % (error[0].__name__, error[2]), file=sys.stderr) raise RuntimeError("Errors occurred during export")
def export_table(db, table, directory, options, error_queue, progress_info, sindex_counter, hook_counter, exit_event): signal.signal( signal.SIGINT, signal.SIG_DFL ) # prevent signal handlers from being set in child processes writer = None try: # -- get table info table_info = options.retryQuery('table info: %s.%s' % (db, table), query.db(db).table(table).info()) # Rather than just the index names, store all index information table_info['indexes'] = options.retryQuery( 'table index data %s.%s' % (db, table), query.db(db).table(table).index_status(), run_options={'binary_format': 'raw'}) sindex_counter.value += len(table_info["indexes"]) table_info['write_hook'] = options.retryQuery( 'table write hook data %s.%s' % (db, table), query.db(db).table(table).get_write_hook(), run_options={'binary_format': 'raw'}) if table_info['write_hook'] is not None: hook_counter.value += 1 with open(os.path.join(directory, db, table + '.info'), 'w') as info_file: info_file.write(json.dumps(table_info) + "\n") with sindex_counter.get_lock(): sindex_counter.value += len(table_info["indexes"]) # -- start the writer if six.PY3: ctx = multiprocessing.get_context( multiprocessing.get_start_method()) task_queue = SimpleQueue(ctx=ctx) else: task_queue = SimpleQueue() writer = None if options.format == "json": filename = directory + "/%s/%s.json" % (db, table) writer = multiprocessing.Process(target=json_writer, args=(filename, options.fields, task_queue, error_queue, options.format)) elif options.format == "csv": filename = directory + "/%s/%s.csv" % (db, table) writer = multiprocessing.Process(target=csv_writer, args=(filename, options.fields, options.delimiter, task_queue, error_queue)) elif options.format == "ndjson": filename = directory + "/%s/%s.ndjson" % (db, table) writer = multiprocessing.Process(target=json_writer, args=(filename, options.fields, task_queue, error_queue, options.format)) else: raise RuntimeError("unknown format type: %s" % options.format) writer.start() # -- read in the data source # - lastPrimaryKey = None read_rows = 0 run_options = {"time_format": "raw", "binary_format": "raw"} if options.outdated: run_options["read_mode"] = "outdated" cursor = options.retryQuery('inital cursor for %s.%s' % (db, table), query.db(db).table(table).order_by( index=table_info["primary_key"]), run_options=run_options) while not exit_event.is_set(): try: for row in cursor: # bail on exit if exit_event.is_set(): break # add to the output queue task_queue.put([row]) lastPrimaryKey = row[table_info["primary_key"]] read_rows += 1 # Update the progress every 20 rows if read_rows % 20 == 0: progress_info[0].value = read_rows else: # Export is done - since we used estimates earlier, update the actual table size progress_info[0].value = read_rows progress_info[1].value = read_rows break except (errors.ReqlTimeoutError, errors.ReqlDriverError): # connection problem, re-setup the cursor try: cursor.close() except errors.ReqlError as exc: default_logger.exception(exc) cursor = options.retryQuery( 'backup cursor for %s.%s' % (db, table), query.db(db).table(table).between( lastPrimaryKey, None, left_bound="open").order_by( index=table_info["primary_key"]), run_options=run_options) except (errors.ReqlError, errors.ReqlDriverError) as ex: error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) except BaseException: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb))) finally: if writer and writer.is_alive(): task_queue.put(StopIteration()) writer.join()
def rebuild_indexes(options): # flesh out options.db_table if not options.db_table: options.db_table = [ utils_common.DbTable(x["db"], x["name"]) for x in options.retryQuery( "all tables", query.db("rethinkdb").table("table_config").pluck(["db", "name"]), ) ] else: for db_table in options.db_table[:]: # work from a copy if not db_table[1]: options.db_table += [ utils_common.DbTable(db_table[0], x) for x in options.retryQuery( "table list of %s" % db_table[0], query.db(db_table[0]).table_list(), ) ] del options.db_table[db_table] # wipe out any indexes with the TMP_INDEX_PREFIX for db, table in options.db_table: for index in options.retryQuery( "list indexes on %s.%s" % (db, table), query.db(db).table(table).index_list(), ): if index.startswith(TMP_INDEX_PREFIX): options.retryQuery( "drop index: %s.%s:%s" % (db, table, index), query.db(index["db"]) .table(index["table"]) .index_drop(index["name"]), ) # get the list of indexes to rebuild indexes_to_build = [] for db, table in options.db_table: indexes = None if not options.force: indexes = options.retryQuery( "get outdated indexes from %s.%s" % (db, table), query.db(db) .table(table) .index_status() .filter({"outdated": True}) .get_field("index"), ) else: indexes = options.retryQuery( "get all indexes from %s.%s" % (db, table), query.db(db).table(table).index_status().get_field("index"), ) for index in indexes: indexes_to_build.append({"db": db, "table": table, "name": index}) # rebuild selected indexes total_indexes = len(indexes_to_build) indexes_completed = 0 progress_ratio = 0.0 highest_progress = 0.0 indexes_in_progress = [] if not options.quiet: print( "Rebuilding %d index%s: %s" % ( total_indexes, "es" if total_indexes > 1 else "", ", ".join( ["`%(db)s.%(table)s:%(name)s`" % i for i in indexes_to_build] ), ) ) while len(indexes_to_build) > 0 or len(indexes_in_progress) > 0: # Make sure we're running the right number of concurrent index rebuilds while ( len(indexes_to_build) > 0 and len(indexes_in_progress) < options.concurrent ): index = indexes_to_build.pop() indexes_in_progress.append(index) index["temp_name"] = TMP_INDEX_PREFIX + index["name"] index["progress"] = 0 index["ready"] = False existing_indexes = dict( (x["index"], x["function"]) for x in options.retryQuery( "existing indexes", query.db(index["db"]) .table(index["table"]) .index_status() .pluck("index", "function"), ) ) if index["name"] not in existing_indexes: raise AssertionError( "{index_name} is not part of existing indexes {indexes}".format( index_name=index["name"], indexes=", ".join(existing_indexes) ) ) if index["temp_name"] not in existing_indexes: options.retryQuery( "create temp index: %(db)s.%(table)s:%(name)s" % index, query.db(index["db"]) .table(index["table"]) .index_create(index["temp_name"], existing_indexes[index["name"]]), ) # Report progress highest_progress = max(highest_progress, progress_ratio) if not options.quiet: utils_common.print_progress(highest_progress) # Check the status of indexes in progress progress_ratio = 0.0 for index in indexes_in_progress: status = options.retryQuery( "progress `%(db)s.%(table)s` index `%(name)s`" % index, query.db(index["db"]) .table(index["table"]) .index_status(index["temp_name"]) .nth(0), ) if status["ready"]: index["ready"] = True options.retryQuery( "rename `%(db)s.%(table)s` index `%(name)s`" % index, query.db(index["db"]) .table(index["table"]) .index_rename(index["temp_name"], index["name"], overwrite=True), ) else: progress_ratio += status.get("progress", 0) / total_indexes indexes_in_progress = [ index for index in indexes_in_progress if not index["ready"] ] indexes_completed = ( total_indexes - len(indexes_to_build) - len(indexes_in_progress) ) progress_ratio += float(indexes_completed) / total_indexes if len(indexes_in_progress) == options.concurrent or ( len(indexes_in_progress) > 0 and len(indexes_to_build) == 0 ): # Short sleep to keep from killing the CPU time.sleep(0.1) # Make sure the progress bar says we're done and get past the progress bar line if not options.quiet: utils_common.print_progress(1.0) print("")
def rebuild_indexes(options): # flesh out options.db_table if not options.db_table: options.db_table = [ utils_common.DbTable(x['db'], x['name']) for x in options.retryQuery( 'all tables', query.db('rethinkdb').table('table_config').pluck( ['db', 'name'])) ] else: for db_table in options.db_table[:]: # work from a copy if not db_table[1]: options.db_table += [ utils_common.DbTable(db_table[0], x) for x in options.retryQuery( 'table list of %s' % db_table[0], query.db(db_table[0]).table_list()) ] del options.db_table[db_table] # wipe out any indexes with the TMP_INDEX_PREFIX for db, table in options.db_table: for index in options.retryQuery( 'list indexes on %s.%s' % (db, table), query.db(db).table(table).index_list()): if index.startswith(TMP_INDEX_PREFIX): options.retryQuery( 'drop index: %s.%s:%s' % (db, table, index), query.db(index['db']).table(index['table']).index_drop( index['name'])) # get the list of indexes to rebuild indexes_to_build = [] for db, table in options.db_table: indexes = None if not options.force: indexes = options.retryQuery( 'get outdated indexes from %s.%s' % (db, table), query.db(db).table(table).index_status().filter({ 'outdated': True }).get_field('index')) else: indexes = options.retryQuery( 'get all indexes from %s.%s' % (db, table), query.db(db).table(table).index_status().get_field('index')) for index in indexes: indexes_to_build.append({'db': db, 'table': table, 'name': index}) # rebuild selected indexes total_indexes = len(indexes_to_build) indexes_completed = 0 progress_ratio = 0.0 highest_progress = 0.0 indexes_in_progress = [] if not options.quiet: print( "Rebuilding %d index%s: %s" % (total_indexes, 'es' if total_indexes > 1 else '', ", ".join( ["`%(db)s.%(table)s:%(name)s`" % i for i in indexes_to_build]))) while len(indexes_to_build) > 0 or len(indexes_in_progress) > 0: # Make sure we're running the right number of concurrent index rebuilds while len(indexes_to_build) > 0 and len( indexes_in_progress) < options.concurrent: index = indexes_to_build.pop() indexes_in_progress.append(index) index['temp_name'] = TMP_INDEX_PREFIX + index['name'] index['progress'] = 0 index['ready'] = False existing_indexes = dict( (x['index'], x['function']) for x in options.retryQuery( 'existing indexes', query.db(index['db']).table(index['table']).index_status(). pluck('index', 'function'))) if index['name'] not in existing_indexes: raise AssertionError( '{index_name} is not part of existing indexes {indexes}'. format(index_name=index['name'], indexes=', '.join(existing_indexes))) if index['temp_name'] not in existing_indexes: options.retryQuery( 'create temp index: %(db)s.%(table)s:%(name)s' % index, query.db(index['db']).table(index['table']).index_create( index['temp_name'], existing_indexes[index['name']])) # Report progress highest_progress = max(highest_progress, progress_ratio) if not options.quiet: utils_common.print_progress(highest_progress) # Check the status of indexes in progress progress_ratio = 0.0 for index in indexes_in_progress: status = options.retryQuery( "progress `%(db)s.%(table)s` index `%(name)s`" % index, query.db(index['db']).table(index['table']).index_status( index['temp_name']).nth(0)) if status['ready']: index['ready'] = True options.retryQuery( "rename `%(db)s.%(table)s` index `%(name)s`" % index, query.db(index['db']).table(index['table']).index_rename( index['temp_name'], index['name'], overwrite=True)) else: progress_ratio += status.get('progress', 0) / total_indexes indexes_in_progress = [ index for index in indexes_in_progress if not index['ready'] ] indexes_completed = total_indexes - len(indexes_to_build) - len( indexes_in_progress) progress_ratio += float(indexes_completed) / total_indexes if len(indexes_in_progress) == options.concurrent or \ (len(indexes_in_progress) > 0 and len(indexes_to_build) == 0): # Short sleep to keep from killing the CPU time.sleep(0.1) # Make sure the progress bar says we're done and get past the progress bar line if not options.quiet: utils_common.print_progress(1.0) print("")