def test_scidb_afl_module():
    """Testing all public methods in scidblib.scidb_afl."""
    print '*** testing scidblib.scidb_afl...'

    class TmpArgs:
        def __init__(self):
            self.host = ''
            self.port = ''

    args = TmpArgs()
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    scidb_afl.execute_it_return_out_err('ls')
    scidb_afl.afl(iquery_cmd, 'list()')

    print 'time_afl(..., \'list()\') =', scidb_afl.time_afl(
        iquery_cmd, 'list()')

    print 'single_cell_afl(..., \'build(<v:int64>[i=0:0,1,0], 5)\', 1) =', \
        scidb_afl.single_cell_afl(iquery_cmd, 'build(<v:int64>[i=0:0,1,0], 5)', 1)

    print 'single_cell_afl(..., \'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)\', 2) =', \
        scidb_afl.single_cell_afl(iquery_cmd, 'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)', 2)

    print 'get_num_instances(...) =', scidb_afl.get_num_instances(iquery_cmd)
    print 'get_array_names(...) =', scidb_afl.get_array_names(iquery_cmd)
    print
Example #2
0
    def __init__(self):
        class TmpArgs:
            def __init__(self):
                self.host = ''
                self.port = ''

        args = TmpArgs()
        self._iquery_cmd = scidb_afl.get_iquery_cmd(args, 'iquery')
Example #3
0
    def __init__(self):
        class TmpArgs:
            def __init__(self):
                self.host = ''
                self.port = ''

        args = TmpArgs()
        self._iquery_cmd = scidb_afl.get_iquery_cmd(args, 'iquery')
    def __init__(self, authentication_file = None):
        class TmpArgs:
            def __init__(self):
                self.host = ''
                self.port = ''

        args = TmpArgs()

        iquery = 'iquery'
        if authentication_file:
            iquery += ' --auth-file ' + authentication_file

        self._iquery_cmd = scidb_afl.get_iquery_cmd(args, iquery)
Example #5
0
    def __init__(self, login_username = None, login_userpassword = None):
        class TmpArgs:
            def __init__(self):
                self.host = ''
                self.port = ''

        args = TmpArgs()

        iquery = 'iquery'
        if login_username:
            iquery += ' -U ' + login_username

        if login_userpassword:
            iquery += ' -P ' + login_userpassword

        self._iquery_cmd = scidb_afl.get_iquery_cmd(args, iquery)
Example #6
0
def test_scidb_afl_module():
    """Testing all public methods in scidblib.scidb_afl."""
    print '*** testing scidblib.scidb_afl...'
    class TmpArgs:
        def __init__(self):
            self.host = ''
            self.port = ''

    args = TmpArgs()
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    scidb_afl.execute_it_return_out_err('ls')
    scidb_afl.afl(iquery_cmd, 'list()')

    print 'time_afl(..., \'list()\') =', scidb_afl.time_afl(iquery_cmd, 'list()')

    print 'single_cell_afl(..., \'build(<v:int64>[i=0:0,1,0], 5)\', 1) =', \
        scidb_afl.single_cell_afl(iquery_cmd, 'build(<v:int64>[i=0:0,1,0], 5)', 1)

    print 'single_cell_afl(..., \'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)\', 2) =', \
        scidb_afl.single_cell_afl(iquery_cmd, 'apply(build(<v:int64>[i=0:0,1,0], 5), v2, 6)', 2)

    print 'get_num_instances(...) =', scidb_afl.get_num_instances(iquery_cmd)
    print 'get_array_names(...) =', scidb_afl.get_array_names(iquery_cmd)
    print
Example #7
0
def main():
    """The main function gets command-line argument as a pattern, and removes all arrays with that pattern.
    """
    parser = argparse.ArgumentParser(
                                     description='Remove all SciDB arrays whose names match a given pattern.',
                                     epilog=
                                     'assumptions:\n' +
                                     '  - iquery is in your path.',
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-f', '--force', action='store_true',
                        help='Forced removal of the arrays without asking for confirmation.')
    parser.add_argument('-c', '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p', '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t', '--temp-only', action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('-U', '--user-name',
                        help='User name to be passed to iquery.')
    parser.add_argument('-P', '--user-password',
                        help='User password to be passed to iquery.')
    parser.add_argument('-v', '--verbose ', default=True,
                        help='display verbose output.')
    parser.add_argument('regex', metavar='REGEX', type=str, nargs='?', default='.*',
                        help='''Regular expression to match against array names.
                        The utility will remove arrays whose names match the regular expression.
                        Default is '.+', meaning to remove all arrays, because the pattern matches all names.
                        The regular expression must match the full array name.
                        For instance, '.*s' will match array 'dogs' because it ends with 's',
                        but will not match array 'moose' because it does not end with 's'.'''
                        )

    args = parser.parse_args()


    try:
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        namespaces = scidb_afl.get_namespace_names(iquery_cmd)
        for namespace in namespaces:
            print "\nSearching namespace: ", namespace

            names = scidb_afl.get_array_names(
                iquery_cmd = iquery_cmd,
                temp_only=args.temp_only,
                namespace=namespace)

            names_to_remove = []

            for name in names:
                match_name = re.match('^'+args.regex+'$', name)
                if match_name:
                    names_to_remove.append(name)

            if not names_to_remove:
                print "There are no arrays to remove in namespace", namespace
                continue

            if not args.force:
                print 'The following arrays are about to be removed from namespace ' + namespace + ':'
                print names_to_remove
                proceed = scidb_psf.confirm(prompt='Are you sure you want to remove?', resp=False)
                if not proceed:
                    return

            for name in names_to_remove:
                scidb_afl.remove_array(name, namespace, iquery_cmd)

            if namespace != 'public':
                names = scidb_afl.get_array_names(
                    iquery_cmd=iquery_cmd,
                    temp_only=args.temp_only,
                    namespace=namespace)
                if not names:
                    scidb_afl.afl(
                        iquery_cmd,
                        'drop_namespace(\'' + namespace + '\');')

                    print "namespace " + namespace + " removed"


        print 'Number of arrays removed =', len(names_to_remove)
    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if args.verbose:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #8
0
def calculate_chunk_length(args):
    """Calculate chunk length and other fields which were '?', and print out the schema.

    @param args  the result of argparse.ArgumentParser.parse_args().
    @return 0
    @exception AppError if anything goes wrong.
    """
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    load_array = args.load_array
    raw_dims_str = args.raw_dims

    calculated_dims = Dimensions(raw_dims_str)

    # Initialize the progress tracker
    progress_tracker = scidb_progress.ProgressTracker(sys.stdout,
                                      '',
                                      args.verbose,     # if_print_start
                                      args.verbose,     # if_print_end
                                      args.verbose      # if_print_skip
                                      )
    progress_tracker.register_step('min_max_dc', 'Get min_coord, max_coord, and ApproxDC for each dim from load_array.')
    progress_tracker.register_step('overall_dc', 'Get overall ApproxDC from load_array.')
    progress_tracker.register_step('calculate', 'Calculate and adjust dimension specification.')

    # S = dims where chunk_length is Specified;
    # N = dims where chunk_length is Not specified.
    S = []
    N = []
    for i, the_dim in enumerate(calculated_dims.list):
        if the_dim.chunk_length == '?':
            N.append(i)
        else:
            S.append(i)

    # Get the (dimension and attribute) names of the load_array.
    names_in_load_array = NamesInLoadArray(iquery_cmd, load_array)

    # for each i in [0..d), calculate min_coord[i], max_coord[i], and distinct_count[i]
    progress_tracker.start_step('min_max_dc')
    for the_dim in calculated_dims.list:
        index = names_in_load_array.find_index(the_dim.dim_name)
        the_name_in_load_array = names_in_load_array.list[index]

        if the_name_in_load_array.is_dim:
            tmp = names_in_load_array.gen_uniq_name()
            cmd = ('aggregate(apply(aggregate(' + load_array + ', count(*), ' + the_dim.dim_name +
                  '), ' + tmp + ', ' + the_dim.dim_name + '), min(' + tmp + '), max(' + tmp + '), count(*))'
                  )
        else:
            cmd = ('aggregate(' + load_array + ', min(' + the_dim.dim_name + '), max(' + the_dim.dim_name +
                   '), approxdc(' + the_dim.dim_name + '))'
                   )
        min_coord, max_coord, distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 3)
        try:
            min_coord_int = int(min_coord)
            max_coord_int = int(max_coord)
            distinct_count_int = int(distinct_count)
            if args.verbose:
                print 'For ' + the_dim.dim_name + ', min_coord=' + str(min_coord_int) +\
                    ', max_coord=' + str(max_coord_int) +\
                    ', distinct_count=' + str(distinct_count_int)
        except ValueError:
            raise scidblib.AppError('Error: I cannot proceed because for ' + the_dim.dim_name + ' in array ' + load_array +
                            ', not all of min_coord (=' + min_coord + '), max_coord (=' + max_coord +
                            '), and distinct_count (=' + distinct_count + ') are integers.')
        the_dim.set_min_max_dc(min_coord_int, max_coord_int, distinct_count_int)
    progress_tracker.end_step('min_max_dc')

    # Fill dim_low, dim_high, and chunk_overlap (which was a '?' before).
    for the_dim in calculated_dims.list:
        if the_dim.dim_low == '?':
            the_dim.dim_low = the_dim.min_coord
        if the_dim.dim_high == '?':
            the_dim.dim_high = the_dim.max_coord
        if the_dim.chunk_overlap == '?':
            the_dim.chunk_overlap = 0

    # Generate string_concat_of_dim_values in the form of:
    # string(dim_name1) + '|' + string(dim_name2) + '|' + string(dim_name3)
    string_values = []
    for i, the_dim in enumerate(calculated_dims.list):
        string_values.append('string(' + the_dim.dim_name + ')')
    string_concat_of_dim_values = ' + \'|\' + '.join(string_values)

    # Calculate overall_distinct_count.
    tmp = names_in_load_array.gen_uniq_name()
    cmd = ('aggregate(apply(' + load_array + ', ' + tmp + ', ' + string_concat_of_dim_values + '), approxdc(' + tmp + '))'
           )
    progress_tracker.start_step('overall_dc')
    overall_distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 1)
    overall_count = scidb_afl.single_cell_afl(iquery_cmd, 'aggregate(' + load_array + ', count(*))', 1)
    try:
        overall_distinct_count = int(overall_distinct_count)
        overall_count = int(overall_count)
        if overall_distinct_count > overall_count:
            overall_distinct_count = overall_count
    except ValueError:
        raise scidblib.AppError('Error: The query to get overall_distinct_count failed to return an integer.')
    if args.verbose:
        print 'overall_distinct_count=' + str(overall_distinct_count)
    progress_tracker.end_step('overall_dc')

    progress_tracker.start_step('calculate')

    # Shortcut: if |N| == 0, we are done.
    if len(N)==0:
        print calculated_dims.__str__()
        return 0

    # Set num_chunks_from_n.
    num_chunks_from_n = scidb_math.ceil_of_division(overall_distinct_count, args.desired_values_per_chunk)
    for i in S:
        the_dim = calculated_dims.list[i]
        chunk_count = scidb_math.ceil_of_division(the_dim.distinct_count, int(the_dim.chunk_length))
        num_chunks_from_n = scidb_math.ceil_of_division(num_chunks_from_n, chunk_count)
    if num_chunks_from_n <= 1:
        num_chunks_from_n = 1

    # For each dimension i in N, calculate chunk_count[i], then set chunk_length.
    for i in N:
        the_dim = calculated_dims.list[i]
        chunk_count = math.pow(num_chunks_from_n, 1.0/len(N))
        if not args.keep_shape:
            # calculate geomean
            product = 1.0
            for k in N:
                product *= calculated_dims.list[k].distinct_count
            geomean = math.pow(product, 1.0/len(N))
            chunk_count *= the_dim.distinct_count / geomean
        if chunk_count<1:
            chunk_count = 1.0
        the_dim.chunk_length = int(math.ceil(
                                           (the_dim.max_coord-the_dim.min_coord+1)/chunk_count
                                           ))
        if chunk_count>1:
            the_dim.chunk_length = scidb_math.snap_to_grid(
                                   the_dim.chunk_length, args.grid_threshold, use_binary=(not args.grid_base10))
    progress_tracker.end_step('calculate')

    # Print result.
    print calculated_dims.__str__()

    return 0
Example #9
0
def main():
    """The main function lists all arrays
    """
    parser = argparse.ArgumentParser(
                                     description='List all scidb arrays.',
                                     epilog=
                                     'Assumptions:\n' +
                                     '  - SciDB is running.\n'
                                     '  - The environment is setup to support namespaces.\n'
                                     '  - The iquery application is in your path.',
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-c', '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p', '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t', '--temp-only', action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('-v', '--versions',
                        help='Include all versions in the list.')
    parser.add_argument('-A', '--auth-file',
                        help='Authentication file to be passed to iquery.')
    parser.add_argument('-s', '--sort-by', default='array', choices=['array', 'namespace'],
                        help='Either array or namespace.')
    parser.add_argument('-f', '--find-array',
                        help='find a particular array name.')

    args = parser.parse_args()

    try:
        arrays = []
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        namespaces = scidb_afl.get_namespace_names(iquery_cmd)
        for namespace in namespaces:
            new_arrays = scidb_afl.get_array_names(
                iquery_cmd=iquery_cmd,
                temp_only=args.temp_only,
                versions=args.versions,
                namespace=namespace)
            for array in new_arrays:
                t=(array, namespace)
                arrays.append(t)

        if arrays:
            if args.find_array:
                result=[tup for tup in arrays if tup[0] == args.find_array]
                if not result:
                    raise ValueError, 'array {0} not found'.format(args.find_array)
                array, namespace = result[0]
                print scidb_make_qualified_array_name('namespace', 'array')
                print scidb_make_qualified_array_name(namespace, array)
            else:
                print scidb_make_qualified_array_name('namespace', 'array')
                item=0
                if args.sort_by == 'namespace':
                    item=1

                for (array, namespace) in sorted(arrays, key=itemgetter(item)):
                    print scidb_make_qualified_array_name(namespace, array)
        else:
            print >> sys.stderr, 'No arrays found'

    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if _print_traceback_upon_exception:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #10
0
def my_test(args, num_chunks, chunk_length, initial_values_per_chunk, new_values_per_chunk, type_name):
    """This function does the testing of appending alternate values to the end of every chunk of an array.

    @param args                          command-line parameters.
    @param num_chunks                    how many chunks are there.
    @param chunk_length                  the chunk length.
    @param initial_values_per_chunk  the number of initial values per chunk
    @param new_values_per_chunk      how many value to insert into each chunk.
    @param type_name                     the data type.
    @return 0
    """
    # Set even_value and odd_value.
    even_value = "0"
    odd_value = "1"
    if type_name=="bool":
        even_value = "true"
        odd_value = "false"

    # Initialize the ProgressTracker
    progress_tracker = scidb_progress.ProgressTracker(if_print_start = args.verbose, if_print_end = args.verbose)
    progress_tracker.register_step('initial', 'Load initial values.')
    progress_tracker.register_step('new', 'Insert new values.')

    # Remove the array if exists.
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    my_remove_arrays(iquery_cmd, tolerate_error=True)

    # Create the array.
    cmd = "create temp array %s <v:%s>[i=0:%d,%d,0]" % (array_name, type_name, chunk_length*num_chunks-1, chunk_length)
    scidb_afl.afl(iquery_cmd, cmd)

    # Load initial values.
    # The algorithm is to create an array that describes the ranges for the initial values,
    # then use cross_between to filter out values from a fully-populated array.
    progress_tracker.start_step('initial')
    cmd = "create temp array %s <low:int64, high:int64>[i=0:%d,%d,0]" % (ranges_array_name, num_chunks-1, num_chunks)
    scidb_afl.afl(iquery_cmd, cmd)
    for c in xrange(num_chunks):
        cmd = ("insert(redimension(apply(build(<adummyattribute:bool>[adummydim=0:0,1,0],true), i, %d, low, %d, high, %d), %s), %s)"
              % (c, c*chunk_length, c*chunk_length+initial_values_per_chunk-1, ranges_array_name, ranges_array_name))
        scidb_afl.afl(iquery_cmd, cmd)
    cmd = ("store(cross_between(build(%s, iif(i%%2=0, %s(%s), %s(%s))), %s), %s)"
           % (array_name, type_name, even_value, type_name, odd_value, ranges_array_name, array_name))
    scidb_afl.afl(iquery_cmd, cmd)
    progress_tracker.end_step('initial')

    # Load the additional values.
    progress_tracker.start_step('new')
    if args.verbose:
        print "In each of the %d batches, one value will be appended to each of the %d chunks." % (new_values_per_chunk, num_chunks)
        print "Batch\tTime"
    for i in xrange(new_values_per_chunk):
        start_time = datetime.datetime.now()
        for c in xrange(num_chunks):
            index = c*chunk_length+i+initial_values_per_chunk
            value = type_name+"("+even_value+")" if index%2==0 else type_name+"("+odd_value+")"
            cmd = "op_set_cell_attr_1D(%s, i, %d, v, %s)" % (array_name, index, value)
            scidb_afl.afl(iquery_cmd, cmd)
        if args.verbose:
            seconds = scidb_progress.timedelta_total_seconds(datetime.datetime.now() - start_time)
            print "%d\t%f" % (i+1, seconds)
    progress_tracker.end_step('new')

    # Remove the array.
    my_remove_arrays(iquery_cmd, tolerate_error=False)

    # Return 0
    return 0
Example #11
0
def main():
    """The main function gets command-line argument as a pattern, and removes all arrays with that pattern.
    """
    parser = argparse.ArgumentParser(
        description=
        'Remove all SciDB arrays whose names match a given pattern.',
        epilog='assumptions:\n' + '  - iquery is in your path.',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help='Forced removal of the arrays without asking for confirmation.')
    parser.add_argument('-c',
                        '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p',
                        '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t',
                        '--temp-only',
                        action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('-U',
                        '--user-name',
                        help='User name to be passed to iquery.')
    parser.add_argument('-P',
                        '--user-password',
                        help='User password to be passed to iquery.')
    parser.add_argument('-v',
                        '--verbose ',
                        default=True,
                        help='display verbose output.')
    parser.add_argument(
        'regex',
        metavar='REGEX',
        type=str,
        nargs='?',
        default='.*',
        help='''Regular expression to match against array names.
                        The utility will remove arrays whose names match the regular expression.
                        Default is '.+', meaning to remove all arrays, because the pattern matches all names.
                        The regular expression must match the full array name.
                        For instance, '.*s' will match array 'dogs' because it ends with 's',
                        but will not match array 'moose' because it does not end with 's'.'''
    )

    args = parser.parse_args()

    try:
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        namespaces = scidb_afl.get_namespace_names(iquery_cmd)
        for namespace in namespaces:
            print "\nSearching namespace: ", namespace

            names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd,
                                              temp_only=args.temp_only,
                                              namespace=namespace)

            names_to_remove = []

            for name in names:
                match_name = re.match('^' + args.regex + '$', name)
                if match_name:
                    names_to_remove.append(name)

            if not names_to_remove:
                print "There are no arrays to remove in namespace", namespace
                continue

            if not args.force:
                print 'The following arrays are about to be removed from namespace ' + namespace + ':'
                print names_to_remove
                proceed = scidb_psf.confirm(
                    prompt='Are you sure you want to remove?', resp=False)
                if not proceed:
                    return

            for name in names_to_remove:
                scidb_afl.remove_array(name, namespace, iquery_cmd)

            if namespace != 'public':
                names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd,
                                                  temp_only=args.temp_only,
                                                  namespace=namespace)
                if not names:
                    scidb_afl.afl(iquery_cmd,
                                  'drop_namespace(\'' + namespace + '\');')

                    print "namespace " + namespace + " removed"

        print 'Number of arrays removed =', len(names_to_remove)
    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if args.verbose:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #12
0
def main():
    """The main function gets command-line argument as a pattern, and removes all arrays with that pattern.

    @exception AppError if something goes wrong.
    @note If print_traceback_upon_exception (defined at the top of the script) is True,
          stack trace will be printed. This is helpful during debugging.
    """
    parser = argparse.ArgumentParser(
        description=
        'Remove all SciDB arrays whose names match a given pattern.',
        epilog='assumptions:\n' + '  - iquery is in your path.',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help='Forced removal of the arrays without asking for confirmation.')
    parser.add_argument('-c',
                        '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p',
                        '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t',
                        '--temp-only',
                        action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument(
        'regex',
        metavar='REGEX',
        type=str,
        nargs='?',
        default='.*',
        help='''Regular expression to match against array names.
                        The utility will remove arrays whose names match the regular expression.
                        Default is '.+', meaning to remove all arrays, because the pattern matches all names.
                        The regular expression must match the full array name.
                        For instance, '.*s' will match array 'dogs' because it ends with 's',
                        but will not match array 'moose' because it does not end with 's'.'''
    )
    args = parser.parse_args()

    try:
        names = scidb_afl.get_array_names(temp_only=args.temp_only)
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        names_to_remove = []

        for name in names:
            match_name = re.match('^' + args.regex + '$', name)
            if match_name:
                names_to_remove.append(name)

        if not names_to_remove:
            print 'There is no array to remove.'
            sys.exit(0)

        if not args.force:
            print 'The following arrays are about to be removed:'
            print names_to_remove
            proceed = scidb_psf.confirm(
                prompt='Are you sure you want to remove?', resp=False)
            if not proceed:
                sys.exit(0)

        for name in names_to_remove:
            scidb_afl.afl(iquery_cmd, 'remove(' + name + ')')

        print 'Number of arrays removed =', len(names_to_remove)
    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if _print_traceback_upon_exception:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #13
0
def main():
    """The main function gets command-line argument as a pattern, and removes all arrays with that pattern.

    @exception AppError if something goes wrong.
    @note If print_traceback_upon_exception (defined at the top of the script) is True,
          stack trace will be printed. This is helpful during debugging.
    """
    parser = argparse.ArgumentParser(
                                     description='Remove all SciDB arrays whose names match a given pattern.',
                                     epilog=
                                     'assumptions:\n' +
                                     '  - iquery is in your path.',
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-f', '--force', action='store_true',
                        help='Forced removal of the arrays without asking for confirmation.')
    parser.add_argument('-c', '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p', '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t', '--temp-only', action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('regex', metavar='REGEX', type=str, nargs='?', default='.*',
                        help='''Regular expression to match against array names.
                        The utility will remove arrays whose names match the regular expression.
                        Default is '.+', meaning to remove all arrays, because the pattern matches all names.
                        The regular expression must match the full array name.
                        For instance, '.*s' will match array 'dogs' because it ends with 's',
                        but will not match array 'moose' because it does not end with 's'.'''
                        )
    args = parser.parse_args()

    try:
        names = scidb_afl.get_array_names(temp_only=args.temp_only)
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        names_to_remove = []

        for name in names:
            match_name = re.match('^'+args.regex+'$', name)
            if match_name:
                names_to_remove.append(name)

        if not names_to_remove:
            print 'There is no array to remove.'
            sys.exit(0)

        if not args.force:
            print 'The following arrays are about to be removed:'
            print names_to_remove
            proceed = scidb_psf.confirm(prompt='Are you sure you want to remove?', resp=False)
            if not proceed:
                sys.exit(0)

        for name in names_to_remove:
            scidb_afl.afl(iquery_cmd, 'remove('+name+')')

        print 'Number of arrays removed =', len(names_to_remove)
    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if _print_traceback_upon_exception:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #14
0
def main():
    """The main function gets command-line argument as a pattern, and removes all arrays with that
    pattern.
    
    Note:  Empty namespaces will NOT be removed.
    """
    parser = argparse.ArgumentParser(
        description=
        'Remove all SciDB arrays whose names match a given pattern.',
        epilog='assumptions:\n' + '  - iquery is in your path.',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help='Forced removal of the arrays without asking for confirmation.')
    parser.add_argument('-c',
                        '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p',
                        '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t',
                        '--temp-only',
                        action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('-A',
                        '--auth-file',
                        help='Authentication file to be passed to iquery.')
    parser.add_argument(
        '-U',
        '--user-name',
        help=
        'Deprecated: Use --auth-file instead.  User name to be passed to iquery.'
    )
    parser.add_argument(
        '-P',
        '--user-password',
        help=
        'Deprecated: Use --auth-file instead.  User password to be passed to iquery.'
    )

    parser.add_argument('-v',
                        '--verbose',
                        default=True,
                        help='display verbose output.')
    parser.add_argument(
        'regex',
        metavar='REGEX',
        type=str,
        nargs='?',
        default='.*',
        help='''Regular expression to match against array names.
                        The utility will remove arrays whose names match the regular expression.
                        Default is '.+', meaning to remove all arrays, because the pattern matches all names.
                        The regular expression must match the full array name.
                        For instance, '.*s' will match array 'dogs' because it ends with 's',
                        but will not match array 'moose' because it does not end with 's'.'''
    )

    _temp_auth_file = None
    _arrays_removed = 0
    args = parser.parse_args()

    try:
        if args.verbose == True:
            print >> sys.stderr, "args={0}".format(args)

        if args.user_name and args.user_password and (args.auth_file == None):
            print >> sys.stderr, '\nWARNING:  --user-name and --user-password are deprecated. Use --auth-file instead.\n'
            _temp_auth_file = create_auth_file(args.user_name,
                                               args.user_password)
            args.auth_file = _temp_auth_file.name
            args.user_name = None
            args.user_password = None

        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        namespaces = scidb_afl.get_namespace_names(iquery_cmd)

        if args.verbose == True:
            print >> sys.stderr, "namespaces={0}".format(namespaces)

        _arrays_removed = 0
        for namespace in namespaces:
            if args.verbose == True:
                print >> sys.stderr, "\nSearching namespace: ", namespace

            names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd,
                                              temp_only=args.temp_only,
                                              namespace=namespace)

            if args.verbose == True:
                print >> sys.stderr, "names={0}".format(names)

            names_to_remove = []

            for name in names:
                match_name = re.match('^' + args.regex + '$', name)
                if match_name:
                    if args.verbose == True:
                        print >> sys.stderr, "Schedule {0}.{1} to be removed".format(
                            namespace, name)
                    names_to_remove.append(name)

            if not names_to_remove:
                if args.verbose == True:
                    print "There are no arrays to remove in namespace", namespace
                continue

            if not args.force:
                print 'The following arrays are about to be removed from namespace {0}:'.format(
                    namespace)
                print names_to_remove

                proceed = scidb_psf.confirm(
                    prompt='Are you sure you want to remove?', resp=False)
                if not proceed:
                    return

            for name in names_to_remove:
                scidb_afl.remove_array(name, namespace, iquery_cmd)
                if args.verbose == True:
                    print >> sys.stderr, "array {0}.{1} removed".format(
                        namespace, name)
                _arrays_removed += 1

            if namespace != 'public':
                names = scidb_afl.get_array_names(iquery_cmd=iquery_cmd,
                                                  temp_only=args.temp_only,
                                                  namespace=namespace)

                if not names:
                    scidb_afl.afl(iquery_cmd,
                                  "drop_namespace('{0}');".format(namespace))

                    if args.verbose == True:
                        print >> sys.stderr, "namespace {0} removed".format(
                            namespace)

        if args.verbose == True:
            print >> sys.stderr, 'Number of arrays removed =', _arrays_removed

        if _temp_auth_file:
            _temp_auth_file.close()
            _temp_auth_file = None

    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if args.verbose == True:
            print >> sys.stderr, 'Number of arrays removed =', _arrays_removed

        if args.verbose == True:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        if _temp_auth_file:
            _temp_auth_file.close()
            _temp_auth_file = None

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #15
0
def calculate_chunk_length(args):
    """Calculate chunk length and other fields which were '?', and print out the schema.

    @param args  the result of argparse.ArgumentParser.parse_args().
    @return 0
    @exception AppError if anything goes wrong.
    """
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    load_array = args.load_array
    raw_dims_str = args.raw_dims

    calculated_dims = parse_dimensions(raw_dims_str)
    dbg("Calculated dims:", [x.to_tuple() for x in calculated_dims])

    # Initialize the progress tracker
    progress_tracker = scidb_progress.ProgressTracker(
        sys.stdout,
        '',
        args.verbose,  # if_print_start
        args.verbose,  # if_print_end
        args.verbose  # if_print_skip
    )
    progress_tracker.register_step(
        'min_max_dc',
        'Get min_coord, max_coord, and ApproxDC for each dim from load_array.')
    progress_tracker.register_step('overall_dc',
                                   'Get overall ApproxDC from load_array.')
    progress_tracker.register_step(
        'calculate', 'Calculate and adjust dimension specification.')

    # S = dims where chunk_length is Specified;
    # N = dims where chunk_length is Not specified.
    S = []
    N = []
    for i, the_dim in enumerate(calculated_dims):
        if the_dim.chunk_length == '?':
            N.append(i)
        else:
            S.append(i)
    dbg("S:", S)
    dbg("N:", N)

    # Get the (dimension and attribute) names of the load_array.
    names_in_load_array = NamesInLoadArray(iquery_cmd, load_array)
    dbg("names...:", names_in_load_array.list)

    # for each i in [0..d), calculate min_coord[i], max_coord[i], and distinct_count[i]
    progress_tracker.start_step('min_max_dc')
    for the_dim in calculated_dims:
        index = names_in_load_array.find_index(the_dim.dim_name)
        the_name_in_load_array = names_in_load_array.list[index]

        if the_name_in_load_array.is_dim:
            tmp = names_in_load_array.gen_uniq_name()
            cmd = ('aggregate(apply(aggregate(' + load_array + ', count(*), ' +
                   the_dim.dim_name + '), ' + tmp + ', ' + the_dim.dim_name +
                   '), min(' + tmp + '), max(' + tmp + '), count(*))')
        else:
            cmd = ('aggregate(' + load_array + ', min(' + the_dim.dim_name +
                   '), max(' + the_dim.dim_name + '), approxdc(' +
                   the_dim.dim_name + '))')
        dbg("Cmd:", cmd)
        min_coord, max_coord, distinct_count = scidb_afl.single_cell_afl(
            iquery_cmd, cmd, 3)
        dbg("(min,max,dc):", (min_coord, max_coord, distinct_count))
        try:
            min_coord_int = int(min_coord)
            max_coord_int = int(max_coord)
            distinct_count_int = int(distinct_count)
            if args.verbose:
                print 'For ' + the_dim.dim_name + ', min_coord=' + str(min_coord_int) +\
                    ', max_coord=' + str(max_coord_int) +\
                    ', distinct_count=' + str(distinct_count_int)
        except ValueError:
            raise scidblib.AppError('Error: I cannot proceed because for ' +
                                    the_dim.dim_name + ' in array ' +
                                    load_array + ', not all of min_coord (=' +
                                    min_coord + '), max_coord (=' + max_coord +
                                    '), and distinct_count (=' +
                                    distinct_count + ') are integers.')
        the_dim.set_min_max_dc(min_coord_int, max_coord_int,
                               distinct_count_int)
    progress_tracker.end_step('min_max_dc')

    # Fill dim_low, dim_high, and chunk_overlap (which was a '?' before).
    for the_dim in calculated_dims:
        if the_dim.dim_low == '?':
            the_dim.dim_low = the_dim.min_coord
        if the_dim.dim_high == '?':
            the_dim.dim_high = the_dim.max_coord
        if the_dim.chunk_overlap == '?':
            the_dim.chunk_overlap = 0

    # Generate string_concat_of_dim_values in the form of:
    # string(dim_name1) + '|' + string(dim_name2) + '|' + string(dim_name3)
    string_values = []
    for i, the_dim in enumerate(calculated_dims):
        string_values.append('string(' + the_dim.dim_name + ')')
    string_concat_of_dim_values = ' + \'|\' + '.join(string_values)

    # Calculate overall_distinct_count.
    tmp = names_in_load_array.gen_uniq_name()
    cmd = ('aggregate(apply(' + load_array + ', ' + tmp + ', ' +
           string_concat_of_dim_values + '), approxdc(' + tmp + '))')
    progress_tracker.start_step('overall_dc')
    overall_distinct_count = scidb_afl.single_cell_afl(iquery_cmd, cmd, 1)
    overall_count = scidb_afl.single_cell_afl(
        iquery_cmd, 'aggregate(' + load_array + ', count(*))', 1)
    try:
        overall_distinct_count = int(overall_distinct_count)
        overall_count = int(overall_count)
        if overall_distinct_count > overall_count:
            overall_distinct_count = overall_count
    except ValueError:
        raise scidblib.AppError(
            'Error: The query to get overall_distinct_count failed to return an integer.'
        )
    if args.verbose:
        print 'overall_distinct_count=' + str(overall_distinct_count)
    progress_tracker.end_step('overall_dc')

    progress_tracker.start_step('calculate')

    # Shortcut: if |N| == 0, we are done.
    if len(N) == 0:
        print scidb_schema.unparse(
            dims=[x.to_tuple() for x in calculated_dims])
        return 0

    # Set num_chunks_from_n.
    num_chunks_from_n = scidb_math.ceil_of_division(
        overall_distinct_count, args.desired_values_per_chunk)
    for i in S:
        the_dim = calculated_dims[i]
        chunk_count = scidb_math.ceil_of_division(the_dim.distinct_count,
                                                  int(the_dim.chunk_length))
        num_chunks_from_n = scidb_math.ceil_of_division(
            num_chunks_from_n, chunk_count)
    if num_chunks_from_n <= 1:
        num_chunks_from_n = 1

    # For each dimension i in N, calculate chunk_count[i], then set chunk_length.
    for i in N:
        the_dim = calculated_dims[i]
        chunk_count = math.pow(num_chunks_from_n, 1.0 / len(N))
        if not args.keep_shape:
            # calculate geomean
            product = 1.0
            for k in N:
                product *= calculated_dims[k].distinct_count
            geomean = math.pow(product, 1.0 / len(N))
            chunk_count *= the_dim.distinct_count / geomean
        if chunk_count < 1:
            chunk_count = 1.0
        the_dim.chunk_length = int(
            math.ceil(
                (the_dim.max_coord - the_dim.min_coord + 1) / chunk_count))
        if chunk_count > 1:
            the_dim.chunk_length = scidb_math.snap_to_grid(
                the_dim.chunk_length,
                args.grid_threshold,
                use_binary=(not args.grid_base10))
    progress_tracker.end_step('calculate')

    # Print result.
    print scidb_schema.unparse(dims=[x.to_tuple() for x in calculated_dims])
    return 0
Example #16
0
def main():
    """The main function lists all arrays
    """
    parser = argparse.ArgumentParser(
        description='List all scidb arrays.',
        epilog='Assumptions:\n' + '  - SciDB is running.\n'
        '  - The environment is setup to support namespaces.\n'
        '  - The iquery application is in your path.',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-c',
                        '--host',
                        help='Host name to be passed to iquery.')
    parser.add_argument('-p',
                        '--port',
                        help='Port number to be passed to iquery.')
    parser.add_argument('-t',
                        '--temp-only',
                        action='store_true',
                        help='Limiting the candidates to temp arrays.')
    parser.add_argument('-v',
                        '--versions',
                        help='Include all versions in the list.')
    parser.add_argument('-A',
                        '--auth-file',
                        help='Authentication file to be passed to iquery.')
    parser.add_argument('-s',
                        '--sort-by',
                        default='array',
                        choices=['array', 'namespace'],
                        help='Either array or namespace.')
    parser.add_argument('-f',
                        '--find-array',
                        help='find a particular array name.')

    args = parser.parse_args()

    try:
        arrays = []
        iquery_cmd = scidb_afl.get_iquery_cmd(args)
        namespaces = scidb_afl.get_namespace_names(iquery_cmd)
        for namespace in namespaces:
            new_arrays = scidb_afl.get_array_names(iquery_cmd=iquery_cmd,
                                                   temp_only=args.temp_only,
                                                   versions=args.versions,
                                                   namespace=namespace)
            for array in new_arrays:
                t = (array, namespace)
                arrays.append(t)

        if arrays:
            if args.find_array:
                result = [tup for tup in arrays if tup[0] == args.find_array]
                if not result:
                    raise ValueError, 'array {0} not found'.format(
                        args.find_array)
                array, namespace = result[0]
                print scidb_make_qualified_array_name('namespace', 'array')
                print scidb_make_qualified_array_name(namespace, array)
            else:
                print scidb_make_qualified_array_name('namespace', 'array')
                item = 0
                if args.sort_by == 'namespace':
                    item = 1

                for (array, namespace) in sorted(arrays, key=itemgetter(item)):
                    print scidb_make_qualified_array_name(namespace, array)
        else:
            print >> sys.stderr, 'No arrays found'

    except Exception, e:
        print >> sys.stderr, '------ Exception -----------------------------'
        print >> sys.stderr, e

        if _print_traceback_upon_exception:
            print >> sys.stderr, '------ Traceback (for debug purpose) ---------'
            traceback.print_exc()

        print >> sys.stderr, '----------------------------------------------'
        sys.exit(-1)  # upon an exception, throw -1
Example #17
0
def my_test(args, num_chunks, chunk_length, initial_values_per_chunk,
            new_values_per_chunk, type_name):
    """This function does the testing of appending alternate values to the end of every chunk of an array.

    @param args                          command-line parameters.
    @param num_chunks                    how many chunks are there.
    @param chunk_length                  the chunk length.
    @param initial_values_per_chunk  the number of initial values per chunk
    @param new_values_per_chunk      how many value to insert into each chunk.
    @param type_name                     the data type.
    @return 0
    """
    # Set even_value and odd_value.
    even_value = "0"
    odd_value = "1"
    if type_name == "bool":
        even_value = "true"
        odd_value = "false"

    # Initialize the ProgressTracker
    progress_tracker = scidb_progress.ProgressTracker(
        if_print_start=args.verbose, if_print_end=args.verbose)
    progress_tracker.register_step('initial', 'Load initial values.')
    progress_tracker.register_step('new', 'Insert new values.')

    # Remove the array if exists.
    iquery_cmd = scidb_afl.get_iquery_cmd(args)
    my_remove_arrays(iquery_cmd, tolerate_error=True)

    # Create the array.
    cmd = "create temp array %s <v:%s>[i=0:%d,%d,0]" % (
        array_name, type_name, chunk_length * num_chunks - 1, chunk_length)
    scidb_afl.afl(iquery_cmd, cmd)

    # Load initial values.
    # The algorithm is to create an array that describes the ranges for the initial values,
    # then use cross_between to filter out values from a fully-populated array.
    progress_tracker.start_step('initial')
    cmd = "create temp array %s <low:int64, high:int64>[i=0:%d,%d,0]" % (
        ranges_array_name, num_chunks - 1, num_chunks)
    scidb_afl.afl(iquery_cmd, cmd)
    for c in xrange(num_chunks):
        cmd = (
            "insert(redimension(apply(build(<adummyattribute:bool>[adummydim=0:0,1,0],true), i, %d, low, %d, high, %d), %s), %s)"
            %
            (c, c * chunk_length, c * chunk_length + initial_values_per_chunk -
             1, ranges_array_name, ranges_array_name))
        scidb_afl.afl(iquery_cmd, cmd)
    cmd = (
        "store(cross_between(build(%s, iif(i%%2=0, %s(%s), %s(%s))), %s), %s)"
        % (array_name, type_name, even_value, type_name, odd_value,
           ranges_array_name, array_name))
    scidb_afl.afl(iquery_cmd, cmd)
    progress_tracker.end_step('initial')

    # Load the additional values.
    progress_tracker.start_step('new')
    if args.verbose:
        print "In each of the %d batches, one value will be appended to each of the %d chunks." % (
            new_values_per_chunk, num_chunks)
        print "Batch\tTime"
    for i in xrange(new_values_per_chunk):
        start_time = datetime.datetime.now()
        for c in xrange(num_chunks):
            index = c * chunk_length + i + initial_values_per_chunk
            value = type_name + "(" + even_value + ")" if index % 2 == 0 else type_name + "(" + odd_value + ")"
            cmd = "op_set_cell_attr_1D(%s, i, %d, v, %s)" % (array_name, index,
                                                             value)
            scidb_afl.afl(iquery_cmd, cmd)
        if args.verbose:
            seconds = scidb_progress.timedelta_total_seconds(
                datetime.datetime.now() - start_time)
            print "%d\t%f" % (i + 1, seconds)
    progress_tracker.end_step('new')

    # Remove the array.
    my_remove_arrays(iquery_cmd, tolerate_error=False)

    # Return 0
    return 0