def test_scidb_afl_module(): """Testing all public methods in scidblib.scidb_afl.""" print '*** testing scidblib.scidb_afl...' class TmpArgs: def __init__(self): self.host = '' self.port = '' args = TmpArgs() iquery_cmd = scidb_afl.get_iquery_cmd(args) scidb_afl.execute_it_return_out_err('ls') scidb_afl.afl(iquery_cmd, 'list()') print 'time_afl(..., \'list()\') =', scidb_afl.time_afl( iquery_cmd, 'list()') print 'single_cell_afl(..., \'build(<v:int64>[i=0:0,1,0], 5)\', 1) =', \ scidb_afl.single_cell_afl(iquery_cmd, 'build(<v:int64>[i=0:0,1,0], 5)', 1) print 'single_cell_afl(..., \'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)\', 2) =', \ scidb_afl.single_cell_afl(iquery_cmd, 'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)', 2) print 'get_num_instances(...) =', scidb_afl.get_num_instances(iquery_cmd) print 'get_array_names(...) =', scidb_afl.get_array_names(iquery_cmd) print
def __init__(self): class TmpArgs: def __init__(self): self.host = '' self.port = '' args = TmpArgs() self._iquery_cmd = scidb_afl.get_iquery_cmd(args, 'iquery')
def __init__(self, authentication_file = None): class TmpArgs: def __init__(self): self.host = '' self.port = '' args = TmpArgs() iquery = 'iquery' if authentication_file: iquery += ' --auth-file ' + authentication_file self._iquery_cmd = scidb_afl.get_iquery_cmd(args, iquery)
def __init__(self, login_username = None, login_userpassword = None): class TmpArgs: def __init__(self): self.host = '' self.port = '' args = TmpArgs() iquery = 'iquery' if login_username: iquery += ' -U ' + login_username if login_userpassword: iquery += ' -P ' + login_userpassword self._iquery_cmd = scidb_afl.get_iquery_cmd(args, iquery)
def test_scidb_afl_module(): """Testing all public methods in scidblib.scidb_afl.""" print '*** testing scidblib.scidb_afl...' class TmpArgs: def __init__(self): self.host = '' self.port = '' args = TmpArgs() iquery_cmd = scidb_afl.get_iquery_cmd(args) scidb_afl.execute_it_return_out_err('ls') scidb_afl.afl(iquery_cmd, 'list()') print 'time_afl(..., \'list()\') =', scidb_afl.time_afl(iquery_cmd, 'list()') print 'single_cell_afl(..., \'build(<v:int64>[i=0:0,1,0], 5)\', 1) =', \ scidb_afl.single_cell_afl(iquery_cmd, 'build(<v:int64>[i=0:0,1,0], 5)', 1) print 'single_cell_afl(..., \'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)\', 2) =', \ scidb_afl.single_cell_afl(iquery_cmd, 'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)', 2) print 'get_num_instances(...) =', scidb_afl.get_num_instances(iquery_cmd) print 'get_array_names(...) =', scidb_afl.get_array_names(iquery_cmd) print
def main(): """The main function gets command-line argument as a pattern, and removes all arrays with that pattern. """ parser = argparse.ArgumentParser( description='Remove all SciDB arrays whose names match a given pattern.', epilog= 'assumptions:\n' + ' - iquery is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-f', '--force', action='store_true', help='Forced removal of the arrays without asking for confirmation.') parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('-U', '--user-name', help='User name to be passed to iquery.') parser.add_argument('-P', '--user-password', help='User password to be passed to iquery.') parser.add_argument('-v', '--verbose ', default=True, help='display verbose output.') parser.add_argument('regex', metavar='REGEX', type=str, nargs='?', default='.*', help='''Regular expression to match against array names. The utility will remove arrays whose names match the regular expression. Default is '.+', meaning to remove all arrays, because the pattern matches all names. The regular expression must match the full array name. For instance, '.*s' will match array 'dogs' because it ends with 's', but will not match array 'moose' because it does not end with 's'.''' ) args = parser.parse_args() try: iquery_cmd = scidb_afl.get_iquery_cmd(args) namespaces = scidb_afl.get_namespace_names(iquery_cmd) for namespace in namespaces: print "\nSearching namespace: ", namespace names = scidb_afl.get_array_names( iquery_cmd = iquery_cmd, temp_only=args.temp_only, namespace=namespace) names_to_remove = [] for name in names: match_name = re.match('^'+args.regex+'$', name) if match_name: names_to_remove.append(name) if not names_to_remove: print "There are no arrays to remove in namespace", namespace continue if not args.force: print 'The following arrays are about to be removed from namespace ' + namespace + ':' print names_to_remove proceed = scidb_psf.confirm(prompt='Are you sure you want to remove?', resp=False) if not proceed: return for name in names_to_remove: scidb_afl.remove_array(name, namespace, iquery_cmd) if namespace != 'public': names = scidb_afl.get_array_names( iquery_cmd=iquery_cmd, temp_only=args.temp_only, namespace=namespace) if not names: scidb_afl.afl( iquery_cmd, 'drop_namespace(\'' + namespace + '\');') print "namespace " + namespace + " removed" print 'Number of arrays removed =', len(names_to_remove) except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if args.verbose: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def calculate_chunk_length(args): """Calculate chunk length and other fields which were '?', and print out the schema. @param args the result of argparse.ArgumentParser.parse_args(). @return 0 @exception AppError if anything goes wrong. """ iquery_cmd = scidb_afl.get_iquery_cmd(args) load_array = args.load_array raw_dims_str = args.raw_dims calculated_dims = Dimensions(raw_dims_str) # Initialize the progress tracker progress_tracker = scidb_progress.ProgressTracker(sys.stdout, '', args.verbose, # if_print_start args.verbose, # if_print_end args.verbose # if_print_skip ) progress_tracker.register_step('min_max_dc', 'Get min_coord, max_coord, and ApproxDC for each dim from load_array.') progress_tracker.register_step('overall_dc', 'Get overall ApproxDC from load_array.') progress_tracker.register_step('calculate', 'Calculate and adjust dimension specification.') # S = dims where chunk_length is Specified; # N = dims where chunk_length is Not specified. S = [] N = [] for i, the_dim in enumerate(calculated_dims.list): if the_dim.chunk_length == '?': N.append(i) else: S.append(i) # Get the (dimension and attribute) names of the load_array. names_in_load_array = NamesInLoadArray(iquery_cmd, load_array) # for each i in [0..d), calculate min_coord[i], max_coord[i], and distinct_count[i] progress_tracker.start_step('min_max_dc') for the_dim in calculated_dims.list: index = names_in_load_array.find_index(the_dim.dim_name) the_name_in_load_array = names_in_load_array.list[index] if the_name_in_load_array.is_dim: tmp = names_in_load_array.gen_uniq_name() cmd = ('aggregate(apply(aggregate(' + load_array + ', count(*), ' + the_dim.dim_name + '), ' + tmp + ', ' + the_dim.dim_name + '), min(' + tmp + '), max(' + tmp + '), count(*))' ) else: cmd = ('aggregate(' + load_array + ', min(' + the_dim.dim_name + '), max(' + the_dim.dim_name + '), approxdc(' + the_dim.dim_name + '))' ) min_coord, max_coord, distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 3) try: min_coord_int = int(min_coord) max_coord_int = int(max_coord) distinct_count_int = int(distinct_count) if args.verbose: print 'For ' + the_dim.dim_name + ', min_coord=' + str(min_coord_int) +\ ', max_coord=' + str(max_coord_int) +\ ', distinct_count=' + str(distinct_count_int) except ValueError: raise scidblib.AppError('Error: I cannot proceed because for ' + the_dim.dim_name + ' in array ' + load_array + ', not all of min_coord (=' + min_coord + '), max_coord (=' + max_coord + '), and distinct_count (=' + distinct_count + ') are integers.') the_dim.set_min_max_dc(min_coord_int, max_coord_int, distinct_count_int) progress_tracker.end_step('min_max_dc') # Fill dim_low, dim_high, and chunk_overlap (which was a '?' before). for the_dim in calculated_dims.list: if the_dim.dim_low == '?': the_dim.dim_low = the_dim.min_coord if the_dim.dim_high == '?': the_dim.dim_high = the_dim.max_coord if the_dim.chunk_overlap == '?': the_dim.chunk_overlap = 0 # Generate string_concat_of_dim_values in the form of: # string(dim_name1) + '|' + string(dim_name2) + '|' + string(dim_name3) string_values = [] for i, the_dim in enumerate(calculated_dims.list): string_values.append('string(' + the_dim.dim_name + ')') string_concat_of_dim_values = ' + \'|\' + '.join(string_values) # Calculate overall_distinct_count. tmp = names_in_load_array.gen_uniq_name() cmd = ('aggregate(apply(' + load_array + ', ' + tmp + ', ' + string_concat_of_dim_values + '), approxdc(' + tmp + '))' ) progress_tracker.start_step('overall_dc') overall_distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 1) overall_count = scidb_afl.single_cell_afl(iquery_cmd, 'aggregate(' + load_array + ', count(*))', 1) try: overall_distinct_count = int(overall_distinct_count) overall_count = int(overall_count) if overall_distinct_count > overall_count: overall_distinct_count = overall_count except ValueError: raise scidblib.AppError('Error: The query to get overall_distinct_count failed to return an integer.') if args.verbose: print 'overall_distinct_count=' + str(overall_distinct_count) progress_tracker.end_step('overall_dc') progress_tracker.start_step('calculate') # Shortcut: if |N| == 0, we are done. if len(N)==0: print calculated_dims.__str__() return 0 # Set num_chunks_from_n. num_chunks_from_n = scidb_math.ceil_of_division(overall_distinct_count, args.desired_values_per_chunk) for i in S: the_dim = calculated_dims.list[i] chunk_count = scidb_math.ceil_of_division(the_dim.distinct_count, int(the_dim.chunk_length)) num_chunks_from_n = scidb_math.ceil_of_division(num_chunks_from_n, chunk_count) if num_chunks_from_n <= 1: num_chunks_from_n = 1 # For each dimension i in N, calculate chunk_count[i], then set chunk_length. for i in N: the_dim = calculated_dims.list[i] chunk_count = math.pow(num_chunks_from_n, 1.0/len(N)) if not args.keep_shape: # calculate geomean product = 1.0 for k in N: product *= calculated_dims.list[k].distinct_count geomean = math.pow(product, 1.0/len(N)) chunk_count *= the_dim.distinct_count / geomean if chunk_count<1: chunk_count = 1.0 the_dim.chunk_length = int(math.ceil( (the_dim.max_coord-the_dim.min_coord+1)/chunk_count )) if chunk_count>1: the_dim.chunk_length = scidb_math.snap_to_grid( the_dim.chunk_length, args.grid_threshold, use_binary=(not args.grid_base10)) progress_tracker.end_step('calculate') # Print result. print calculated_dims.__str__() return 0
def main(): """The main function lists all arrays """ parser = argparse.ArgumentParser( description='List all scidb arrays.', epilog= 'Assumptions:\n' + ' - SciDB is running.\n' ' - The environment is setup to support namespaces.\n' ' - The iquery application is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('-v', '--versions', help='Include all versions in the list.') parser.add_argument('-A', '--auth-file', help='Authentication file to be passed to iquery.') parser.add_argument('-s', '--sort-by', default='array', choices=['array', 'namespace'], help='Either array or namespace.') parser.add_argument('-f', '--find-array', help='find a particular array name.') args = parser.parse_args() try: arrays = [] iquery_cmd = scidb_afl.get_iquery_cmd(args) namespaces = scidb_afl.get_namespace_names(iquery_cmd) for namespace in namespaces: new_arrays = scidb_afl.get_array_names( iquery_cmd=iquery_cmd, temp_only=args.temp_only, versions=args.versions, namespace=namespace) for array in new_arrays: t=(array, namespace) arrays.append(t) if arrays: if args.find_array: result=[tup for tup in arrays if tup[0] == args.find_array] if not result: raise ValueError, 'array {0} not found'.format(args.find_array) array, namespace = result[0] print scidb_make_qualified_array_name('namespace', 'array') print scidb_make_qualified_array_name(namespace, array) else: print scidb_make_qualified_array_name('namespace', 'array') item=0 if args.sort_by == 'namespace': item=1 for (array, namespace) in sorted(arrays, key=itemgetter(item)): print scidb_make_qualified_array_name(namespace, array) else: print >> sys.stderr, 'No arrays found' except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if _print_traceback_upon_exception: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def my_test(args, num_chunks, chunk_length, initial_values_per_chunk, new_values_per_chunk, type_name): """This function does the testing of appending alternate values to the end of every chunk of an array. @param args command-line parameters. @param num_chunks how many chunks are there. @param chunk_length the chunk length. @param initial_values_per_chunk the number of initial values per chunk @param new_values_per_chunk how many value to insert into each chunk. @param type_name the data type. @return 0 """ # Set even_value and odd_value. even_value = "0" odd_value = "1" if type_name=="bool": even_value = "true" odd_value = "false" # Initialize the ProgressTracker progress_tracker = scidb_progress.ProgressTracker(if_print_start = args.verbose, if_print_end = args.verbose) progress_tracker.register_step('initial', 'Load initial values.') progress_tracker.register_step('new', 'Insert new values.') # Remove the array if exists. iquery_cmd = scidb_afl.get_iquery_cmd(args) my_remove_arrays(iquery_cmd, tolerate_error=True) # Create the array. cmd = "create temp array %s <v:%s>[i=0:%d,%d,0]" % (array_name, type_name, chunk_length*num_chunks-1, chunk_length) scidb_afl.afl(iquery_cmd, cmd) # Load initial values. # The algorithm is to create an array that describes the ranges for the initial values, # then use cross_between to filter out values from a fully-populated array. progress_tracker.start_step('initial') cmd = "create temp array %s <low:int64, high:int64>[i=0:%d,%d,0]" % (ranges_array_name, num_chunks-1, num_chunks) scidb_afl.afl(iquery_cmd, cmd) for c in xrange(num_chunks): cmd = ("insert(redimension(apply(build(<adummyattribute:bool>[adummydim=0:0,1,0],true), i, %d, low, %d, high, %d), %s), %s)" % (c, c*chunk_length, c*chunk_length+initial_values_per_chunk-1, ranges_array_name, ranges_array_name)) scidb_afl.afl(iquery_cmd, cmd) cmd = ("store(cross_between(build(%s, iif(i%%2=0, %s(%s), %s(%s))), %s), %s)" % (array_name, type_name, even_value, type_name, odd_value, ranges_array_name, array_name)) scidb_afl.afl(iquery_cmd, cmd) progress_tracker.end_step('initial') # Load the additional values. progress_tracker.start_step('new') if args.verbose: print "In each of the %d batches, one value will be appended to each of the %d chunks." % (new_values_per_chunk, num_chunks) print "Batch\tTime" for i in xrange(new_values_per_chunk): start_time = datetime.datetime.now() for c in xrange(num_chunks): index = c*chunk_length+i+initial_values_per_chunk value = type_name+"("+even_value+")" if index%2==0 else type_name+"("+odd_value+")" cmd = "op_set_cell_attr_1D(%s, i, %d, v, %s)" % (array_name, index, value) scidb_afl.afl(iquery_cmd, cmd) if args.verbose: seconds = scidb_progress.timedelta_total_seconds(datetime.datetime.now() - start_time) print "%d\t%f" % (i+1, seconds) progress_tracker.end_step('new') # Remove the array. my_remove_arrays(iquery_cmd, tolerate_error=False) # Return 0 return 0
def main(): """The main function gets command-line argument as a pattern, and removes all arrays with that pattern. """ parser = argparse.ArgumentParser( description= 'Remove all SciDB arrays whose names match a given pattern.', epilog='assumptions:\n' + ' - iquery is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-f', '--force', action='store_true', help='Forced removal of the arrays without asking for confirmation.') parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('-U', '--user-name', help='User name to be passed to iquery.') parser.add_argument('-P', '--user-password', help='User password to be passed to iquery.') parser.add_argument('-v', '--verbose ', default=True, help='display verbose output.') parser.add_argument( 'regex', metavar='REGEX', type=str, nargs='?', default='.*', help='''Regular expression to match against array names. The utility will remove arrays whose names match the regular expression. Default is '.+', meaning to remove all arrays, because the pattern matches all names. The regular expression must match the full array name. For instance, '.*s' will match array 'dogs' because it ends with 's', but will not match array 'moose' because it does not end with 's'.''' ) args = parser.parse_args() try: iquery_cmd = scidb_afl.get_iquery_cmd(args) namespaces = scidb_afl.get_namespace_names(iquery_cmd) for namespace in namespaces: print "\nSearching namespace: ", namespace names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd, temp_only=args.temp_only, namespace=namespace) names_to_remove = [] for name in names: match_name = re.match('^' + args.regex + '$', name) if match_name: names_to_remove.append(name) if not names_to_remove: print "There are no arrays to remove in namespace", namespace continue if not args.force: print 'The following arrays are about to be removed from namespace ' + namespace + ':' print names_to_remove proceed = scidb_psf.confirm( prompt='Are you sure you want to remove?', resp=False) if not proceed: return for name in names_to_remove: scidb_afl.remove_array(name, namespace, iquery_cmd) if namespace != 'public': names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd, temp_only=args.temp_only, namespace=namespace) if not names: scidb_afl.afl(iquery_cmd, 'drop_namespace(\'' + namespace + '\');') print "namespace " + namespace + " removed" print 'Number of arrays removed =', len(names_to_remove) except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if args.verbose: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def main(): """The main function gets command-line argument as a pattern, and removes all arrays with that pattern. @exception AppError if something goes wrong. @note If print_traceback_upon_exception (defined at the top of the script) is True, stack trace will be printed. This is helpful during debugging. """ parser = argparse.ArgumentParser( description= 'Remove all SciDB arrays whose names match a given pattern.', epilog='assumptions:\n' + ' - iquery is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-f', '--force', action='store_true', help='Forced removal of the arrays without asking for confirmation.') parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument( 'regex', metavar='REGEX', type=str, nargs='?', default='.*', help='''Regular expression to match against array names. The utility will remove arrays whose names match the regular expression. Default is '.+', meaning to remove all arrays, because the pattern matches all names. The regular expression must match the full array name. For instance, '.*s' will match array 'dogs' because it ends with 's', but will not match array 'moose' because it does not end with 's'.''' ) args = parser.parse_args() try: names = scidb_afl.get_array_names(temp_only=args.temp_only) iquery_cmd = scidb_afl.get_iquery_cmd(args) names_to_remove = [] for name in names: match_name = re.match('^' + args.regex + '$', name) if match_name: names_to_remove.append(name) if not names_to_remove: print 'There is no array to remove.' sys.exit(0) if not args.force: print 'The following arrays are about to be removed:' print names_to_remove proceed = scidb_psf.confirm( prompt='Are you sure you want to remove?', resp=False) if not proceed: sys.exit(0) for name in names_to_remove: scidb_afl.afl(iquery_cmd, 'remove(' + name + ')') print 'Number of arrays removed =', len(names_to_remove) except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if _print_traceback_upon_exception: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def main(): """The main function gets command-line argument as a pattern, and removes all arrays with that pattern. @exception AppError if something goes wrong. @note If print_traceback_upon_exception (defined at the top of the script) is True, stack trace will be printed. This is helpful during debugging. """ parser = argparse.ArgumentParser( description='Remove all SciDB arrays whose names match a given pattern.', epilog= 'assumptions:\n' + ' - iquery is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-f', '--force', action='store_true', help='Forced removal of the arrays without asking for confirmation.') parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('regex', metavar='REGEX', type=str, nargs='?', default='.*', help='''Regular expression to match against array names. The utility will remove arrays whose names match the regular expression. Default is '.+', meaning to remove all arrays, because the pattern matches all names. The regular expression must match the full array name. For instance, '.*s' will match array 'dogs' because it ends with 's', but will not match array 'moose' because it does not end with 's'.''' ) args = parser.parse_args() try: names = scidb_afl.get_array_names(temp_only=args.temp_only) iquery_cmd = scidb_afl.get_iquery_cmd(args) names_to_remove = [] for name in names: match_name = re.match('^'+args.regex+'$', name) if match_name: names_to_remove.append(name) if not names_to_remove: print 'There is no array to remove.' sys.exit(0) if not args.force: print 'The following arrays are about to be removed:' print names_to_remove proceed = scidb_psf.confirm(prompt='Are you sure you want to remove?', resp=False) if not proceed: sys.exit(0) for name in names_to_remove: scidb_afl.afl(iquery_cmd, 'remove('+name+')') print 'Number of arrays removed =', len(names_to_remove) except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if _print_traceback_upon_exception: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def main(): """The main function gets command-line argument as a pattern, and removes all arrays with that pattern. Note: Empty namespaces will NOT be removed. """ parser = argparse.ArgumentParser( description= 'Remove all SciDB arrays whose names match a given pattern.', epilog='assumptions:\n' + ' - iquery is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-f', '--force', action='store_true', help='Forced removal of the arrays without asking for confirmation.') parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('-A', '--auth-file', help='Authentication file to be passed to iquery.') parser.add_argument( '-U', '--user-name', help= 'Deprecated: Use --auth-file instead. User name to be passed to iquery.' ) parser.add_argument( '-P', '--user-password', help= 'Deprecated: Use --auth-file instead. User password to be passed to iquery.' ) parser.add_argument('-v', '--verbose', default=True, help='display verbose output.') parser.add_argument( 'regex', metavar='REGEX', type=str, nargs='?', default='.*', help='''Regular expression to match against array names. The utility will remove arrays whose names match the regular expression. Default is '.+', meaning to remove all arrays, because the pattern matches all names. The regular expression must match the full array name. For instance, '.*s' will match array 'dogs' because it ends with 's', but will not match array 'moose' because it does not end with 's'.''' ) _temp_auth_file = None _arrays_removed = 0 args = parser.parse_args() try: if args.verbose == True: print >> sys.stderr, "args={0}".format(args) if args.user_name and args.user_password and (args.auth_file == None): print >> sys.stderr, '\nWARNING: --user-name and --user-password are deprecated. Use --auth-file instead.\n' _temp_auth_file = create_auth_file(args.user_name, args.user_password) args.auth_file = _temp_auth_file.name args.user_name = None args.user_password = None iquery_cmd = scidb_afl.get_iquery_cmd(args) namespaces = scidb_afl.get_namespace_names(iquery_cmd) if args.verbose == True: print >> sys.stderr, "namespaces={0}".format(namespaces) _arrays_removed = 0 for namespace in namespaces: if args.verbose == True: print >> sys.stderr, "\nSearching namespace: ", namespace names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd, temp_only=args.temp_only, namespace=namespace) if args.verbose == True: print >> sys.stderr, "names={0}".format(names) names_to_remove = [] for name in names: match_name = re.match('^' + args.regex + '$', name) if match_name: if args.verbose == True: print >> sys.stderr, "Schedule {0}.{1} to be removed".format( namespace, name) names_to_remove.append(name) if not names_to_remove: if args.verbose == True: print "There are no arrays to remove in namespace", namespace continue if not args.force: print 'The following arrays are about to be removed from namespace {0}:'.format( namespace) print names_to_remove proceed = scidb_psf.confirm( prompt='Are you sure you want to remove?', resp=False) if not proceed: return for name in names_to_remove: scidb_afl.remove_array(name, namespace, iquery_cmd) if args.verbose == True: print >> sys.stderr, "array {0}.{1} removed".format( namespace, name) _arrays_removed += 1 if namespace != 'public': names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd, temp_only=args.temp_only, namespace=namespace) if not names: scidb_afl.afl(iquery_cmd, "drop_namespace('{0}');".format(namespace)) if args.verbose == True: print >> sys.stderr, "namespace {0} removed".format( namespace) if args.verbose == True: print >> sys.stderr, 'Number of arrays removed =', _arrays_removed if _temp_auth_file: _temp_auth_file.close() _temp_auth_file = None except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if args.verbose == True: print >> sys.stderr, 'Number of arrays removed =', _arrays_removed if args.verbose == True: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() if _temp_auth_file: _temp_auth_file.close() _temp_auth_file = None print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def calculate_chunk_length(args): """Calculate chunk length and other fields which were '?', and print out the schema. @param args the result of argparse.ArgumentParser.parse_args(). @return 0 @exception AppError if anything goes wrong. """ iquery_cmd = scidb_afl.get_iquery_cmd(args) load_array = args.load_array raw_dims_str = args.raw_dims calculated_dims = parse_dimensions(raw_dims_str) dbg("Calculated dims:", [x.to_tuple() for x in calculated_dims]) # Initialize the progress tracker progress_tracker = scidb_progress.ProgressTracker( sys.stdout, '', args.verbose, # if_print_start args.verbose, # if_print_end args.verbose # if_print_skip ) progress_tracker.register_step( 'min_max_dc', 'Get min_coord, max_coord, and ApproxDC for each dim from load_array.') progress_tracker.register_step('overall_dc', 'Get overall ApproxDC from load_array.') progress_tracker.register_step( 'calculate', 'Calculate and adjust dimension specification.') # S = dims where chunk_length is Specified; # N = dims where chunk_length is Not specified. S = [] N = [] for i, the_dim in enumerate(calculated_dims): if the_dim.chunk_length == '?': N.append(i) else: S.append(i) dbg("S:", S) dbg("N:", N) # Get the (dimension and attribute) names of the load_array. names_in_load_array = NamesInLoadArray(iquery_cmd, load_array) dbg("names...:", names_in_load_array.list) # for each i in [0..d), calculate min_coord[i], max_coord[i], and distinct_count[i] progress_tracker.start_step('min_max_dc') for the_dim in calculated_dims: index = names_in_load_array.find_index(the_dim.dim_name) the_name_in_load_array = names_in_load_array.list[index] if the_name_in_load_array.is_dim: tmp = names_in_load_array.gen_uniq_name() cmd = ('aggregate(apply(aggregate(' + load_array + ', count(*), ' + the_dim.dim_name + '), ' + tmp + ', ' + the_dim.dim_name + '), min(' + tmp + '), max(' + tmp + '), count(*))') else: cmd = ('aggregate(' + load_array + ', min(' + the_dim.dim_name + '), max(' + the_dim.dim_name + '), approxdc(' + the_dim.dim_name + '))') dbg("Cmd:", cmd) min_coord, max_coord, distinct_count = scidb_afl.single_cell_afl( iquery_cmd, cmd, 3) dbg("(min,max,dc):", (min_coord, max_coord, distinct_count)) try: min_coord_int = int(min_coord) max_coord_int = int(max_coord) distinct_count_int = int(distinct_count) if args.verbose: print 'For ' + the_dim.dim_name + ', min_coord=' + str(min_coord_int) +\ ', max_coord=' + str(max_coord_int) +\ ', distinct_count=' + str(distinct_count_int) except ValueError: raise scidblib.AppError('Error: I cannot proceed because for ' + the_dim.dim_name + ' in array ' + load_array + ', not all of min_coord (=' + min_coord + '), max_coord (=' + max_coord + '), and distinct_count (=' + distinct_count + ') are integers.') the_dim.set_min_max_dc(min_coord_int, max_coord_int, distinct_count_int) progress_tracker.end_step('min_max_dc') # Fill dim_low, dim_high, and chunk_overlap (which was a '?' before). for the_dim in calculated_dims: if the_dim.dim_low == '?': the_dim.dim_low = the_dim.min_coord if the_dim.dim_high == '?': the_dim.dim_high = the_dim.max_coord if the_dim.chunk_overlap == '?': the_dim.chunk_overlap = 0 # Generate string_concat_of_dim_values in the form of: # string(dim_name1) + '|' + string(dim_name2) + '|' + string(dim_name3) string_values = [] for i, the_dim in enumerate(calculated_dims): string_values.append('string(' + the_dim.dim_name + ')') string_concat_of_dim_values = ' + \'|\' + '.join(string_values) # Calculate overall_distinct_count. tmp = names_in_load_array.gen_uniq_name() cmd = ('aggregate(apply(' + load_array + ', ' + tmp + ', ' + string_concat_of_dim_values + '), approxdc(' + tmp + '))') progress_tracker.start_step('overall_dc') overall_distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 1) overall_count = scidb_afl.single_cell_afl( iquery_cmd, 'aggregate(' + load_array + ', count(*))', 1) try: overall_distinct_count = int(overall_distinct_count) overall_count = int(overall_count) if overall_distinct_count > overall_count: overall_distinct_count = overall_count except ValueError: raise scidblib.AppError( 'Error: The query to get overall_distinct_count failed to return an integer.' ) if args.verbose: print 'overall_distinct_count=' + str(overall_distinct_count) progress_tracker.end_step('overall_dc') progress_tracker.start_step('calculate') # Shortcut: if |N| == 0, we are done. if len(N) == 0: print scidb_schema.unparse( dims=[x.to_tuple() for x in calculated_dims]) return 0 # Set num_chunks_from_n. num_chunks_from_n = scidb_math.ceil_of_division( overall_distinct_count, args.desired_values_per_chunk) for i in S: the_dim = calculated_dims[i] chunk_count = scidb_math.ceil_of_division(the_dim.distinct_count, int(the_dim.chunk_length)) num_chunks_from_n = scidb_math.ceil_of_division( num_chunks_from_n, chunk_count) if num_chunks_from_n <= 1: num_chunks_from_n = 1 # For each dimension i in N, calculate chunk_count[i], then set chunk_length. for i in N: the_dim = calculated_dims[i] chunk_count = math.pow(num_chunks_from_n, 1.0 / len(N)) if not args.keep_shape: # calculate geomean product = 1.0 for k in N: product *= calculated_dims[k].distinct_count geomean = math.pow(product, 1.0 / len(N)) chunk_count *= the_dim.distinct_count / geomean if chunk_count < 1: chunk_count = 1.0 the_dim.chunk_length = int( math.ceil( (the_dim.max_coord - the_dim.min_coord + 1) / chunk_count)) if chunk_count > 1: the_dim.chunk_length = scidb_math.snap_to_grid( the_dim.chunk_length, args.grid_threshold, use_binary=(not args.grid_base10)) progress_tracker.end_step('calculate') # Print result. print scidb_schema.unparse(dims=[x.to_tuple() for x in calculated_dims]) return 0
def main(): """The main function lists all arrays """ parser = argparse.ArgumentParser( description='List all scidb arrays.', epilog='Assumptions:\n' + ' - SciDB is running.\n' ' - The environment is setup to support namespaces.\n' ' - The iquery application is in your path.', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-c', '--host', help='Host name to be passed to iquery.') parser.add_argument('-p', '--port', help='Port number to be passed to iquery.') parser.add_argument('-t', '--temp-only', action='store_true', help='Limiting the candidates to temp arrays.') parser.add_argument('-v', '--versions', help='Include all versions in the list.') parser.add_argument('-A', '--auth-file', help='Authentication file to be passed to iquery.') parser.add_argument('-s', '--sort-by', default='array', choices=['array', 'namespace'], help='Either array or namespace.') parser.add_argument('-f', '--find-array', help='find a particular array name.') args = parser.parse_args() try: arrays = [] iquery_cmd = scidb_afl.get_iquery_cmd(args) namespaces = scidb_afl.get_namespace_names(iquery_cmd) for namespace in namespaces: new_arrays = scidb_afl.get_array_names(iquery_cmd=iquery_cmd, temp_only=args.temp_only, versions=args.versions, namespace=namespace) for array in new_arrays: t = (array, namespace) arrays.append(t) if arrays: if args.find_array: result = [tup for tup in arrays if tup[0] == args.find_array] if not result: raise ValueError, 'array {0} not found'.format( args.find_array) array, namespace = result[0] print scidb_make_qualified_array_name('namespace', 'array') print scidb_make_qualified_array_name(namespace, array) else: print scidb_make_qualified_array_name('namespace', 'array') item = 0 if args.sort_by == 'namespace': item = 1 for (array, namespace) in sorted(arrays, key=itemgetter(item)): print scidb_make_qualified_array_name(namespace, array) else: print >> sys.stderr, 'No arrays found' except Exception, e: print >> sys.stderr, '------ Exception -----------------------------' print >> sys.stderr, e if _print_traceback_upon_exception: print >> sys.stderr, '------ Traceback (for debug purpose) ---------' traceback.print_exc() print >> sys.stderr, '----------------------------------------------' sys.exit(-1) # upon an exception, throw -1
def my_test(args, num_chunks, chunk_length, initial_values_per_chunk, new_values_per_chunk, type_name): """This function does the testing of appending alternate values to the end of every chunk of an array. @param args command-line parameters. @param num_chunks how many chunks are there. @param chunk_length the chunk length. @param initial_values_per_chunk the number of initial values per chunk @param new_values_per_chunk how many value to insert into each chunk. @param type_name the data type. @return 0 """ # Set even_value and odd_value. even_value = "0" odd_value = "1" if type_name == "bool": even_value = "true" odd_value = "false" # Initialize the ProgressTracker progress_tracker = scidb_progress.ProgressTracker( if_print_start=args.verbose, if_print_end=args.verbose) progress_tracker.register_step('initial', 'Load initial values.') progress_tracker.register_step('new', 'Insert new values.') # Remove the array if exists. iquery_cmd = scidb_afl.get_iquery_cmd(args) my_remove_arrays(iquery_cmd, tolerate_error=True) # Create the array. cmd = "create temp array %s <v:%s>[i=0:%d,%d,0]" % ( array_name, type_name, chunk_length * num_chunks - 1, chunk_length) scidb_afl.afl(iquery_cmd, cmd) # Load initial values. # The algorithm is to create an array that describes the ranges for the initial values, # then use cross_between to filter out values from a fully-populated array. progress_tracker.start_step('initial') cmd = "create temp array %s <low:int64, high:int64>[i=0:%d,%d,0]" % ( ranges_array_name, num_chunks - 1, num_chunks) scidb_afl.afl(iquery_cmd, cmd) for c in xrange(num_chunks): cmd = ( "insert(redimension(apply(build(<adummyattribute:bool>[adummydim=0:0,1,0],true), i, %d, low, %d, high, %d), %s), %s)" % (c, c * chunk_length, c * chunk_length + initial_values_per_chunk - 1, ranges_array_name, ranges_array_name)) scidb_afl.afl(iquery_cmd, cmd) cmd = ( "store(cross_between(build(%s, iif(i%%2=0, %s(%s), %s(%s))), %s), %s)" % (array_name, type_name, even_value, type_name, odd_value, ranges_array_name, array_name)) scidb_afl.afl(iquery_cmd, cmd) progress_tracker.end_step('initial') # Load the additional values. progress_tracker.start_step('new') if args.verbose: print "In each of the %d batches, one value will be appended to each of the %d chunks." % ( new_values_per_chunk, num_chunks) print "Batch\tTime" for i in xrange(new_values_per_chunk): start_time = datetime.datetime.now() for c in xrange(num_chunks): index = c * chunk_length + i + initial_values_per_chunk value = type_name + "(" + even_value + ")" if index % 2 == 0 else type_name + "(" + odd_value + ")" cmd = "op_set_cell_attr_1D(%s, i, %d, v, %s)" % (array_name, index, value) scidb_afl.afl(iquery_cmd, cmd) if args.verbose: seconds = scidb_progress.timedelta_total_seconds( datetime.datetime.now() - start_time) print "%d\t%f" % (i + 1, seconds) progress_tracker.end_step('new') # Remove the array. my_remove_arrays(iquery_cmd, tolerate_error=False) # Return 0 return 0