Esempio n. 1
0
def test_scidblib_unparse_schema():
    """Unit test for the Python schema un-parser."""
    print '*** testing scidblib.scidb_schema.unparse...'
    schema1 = "<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2," + \
        "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char char('x')," + \
        "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')," + \
        "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')>" + \
        "[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]"
    attrs1,dims1 = SS.parse(schema1)
    schema2 = SS.unparse(attrs1,dims1)

    attrs2,dims2 = SS.parse(schema2)
    # Check attributes:
    print 'checking attributes...'
    for attr1,attr2 in zip(attrs1,attrs2):
        assert attr1.name == attr2.name
        assert attr1.type == attr2.type
        assert attr1.nullable == attr2.nullable
        assert attr1.default == attr2.default

    # Check dimensions:
    print 'checking dimensions...'
    for dim1,dim2 in zip(dims1,dims2):
        assert dim1.name == dim2.name
        assert dim1.lo == dim2.lo
        assert dim1.hi == dim2.hi
        assert dim1.chunk == dim2.chunk
        assert dim1.overlap == dim2.overlap
Esempio n. 2
0
 def _setup_load_parameters(self):
     """Interpret options that describe how to do the loading."""
     # Were we given a load_schema or a load_array in _args.array_or_schema?
     attrs, dims = None, None
     try:
         # Is it a schema?
         attrs, dims = scidb_schema.parse(_args.array_or_schema)
     except ValueError:
         # Hmmm, perhaps it is an array name.
         schema = get_array_schema(_args.array_or_schema)
         if schema is None:
             raise Usage('Array {0} does not exist'.format(
                     _args.array_or_schema))
         try:
             attrs, dims = scidb_schema.parse(schema)
         except ValueError:
             raise AppError("Internal error, show({0}) output: {1}".format(
                     _args.array_or_schema, schema))
         else:
             self.load_array = _args.array_or_schema
             self.load_schema = schema
     else:
         # We were given a schema, so we need a target array.
         self.load_array = None
         self.load_schema = _args.array_or_schema
         if not _args.target_array:
             raise Usage(' '.join(("Must specify at least one array with",
                                   "-s/--load-schema or -A/--target-array")))
     # Some final checks...
     if len(dims) != 1:
         raise Usage("Load schema '{0}' is not one-dimensional".format(
                 _args.array_or_schema))
     if _args.target_array and not get_array_schema(_args.target_array):
         raise Usage("Target array %s does not exist" % _args.target_array)
     return None
Esempio n. 3
0
def getLoadSchema():
    global loadAttrs, loadDims
    if opts.load_schema:
        loadAttrs, loadDims = scidb_schema.parse(opts.load_schema)
    elif opts.load_name:
        # Query SciDB for the schema.
        logNormal("Retrieving load array schema from SciDB.")
        cmd = "\"%siquery\" -c %s -p %d -o text -aq \"show(%s)\"" % (
            sciDbBinFolder, opts.db_address, opts.db_port, opts.load_name)
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,stderr=subprocess.PIPE,
                             shell=True, close_fds=True, preexec_fn=os.setsid)
        childProcesses.append(p)
        retVal = p.wait()
        if retVal != 0:
            err = "Failed to obtain schema for load array."
            if p and p.stderr:
                err = "%s\n%s" % (err, p.stderr.read())
            raise Exception(err)
        arrayDef = p.stdout.read()
        m = re.search(r'<[^>]+>\s*\[[^\]]+\]', arrayDef)
        if not m:
            err = "Schema obtained from load array is corrupt: %s" % arrayDef.rstrip("\n")
            if p and p.stderr:
                err = "%s\n%s" % (err, p.stderr.read())
            raise Exception(err)
        logVerbose("Result: %s" % m.group(0))
        loadAttrs, loadDims = scidb_schema.parse(m.group(0))
Esempio n. 4
0
 def _setup_load_parameters(self):
     """Interpret options that describe how to do the loading."""
     # Were we given a load_schema or a load_array in _args.array_or_schema?
     attrs, dims = None, None
     try:
         # Is it a schema?
         attrs, dims = scidb_schema.parse(_args.array_or_schema)
     except ValueError:
         # Hmmm, perhaps it is an array name.
         schema = get_array_schema(_args.array_or_schema)
         if schema is None:
             raise Usage('Array {0} does not exist'.format(
                     _args.array_or_schema))
         try:
             attrs, dims = scidb_schema.parse(schema)
         except ValueError:
             raise AppError("Internal error, show({0}) output: {1}".format(
                     _args.array_or_schema, schema))
         else:
             self.load_array = _args.array_or_schema
             self.load_schema = schema
     else:
         # We were given a schema, so we need a target array.
         self.load_array = None
         self.load_schema = _args.array_or_schema
         if not _args.target_array:
             raise Usage(' '.join(("Must specify at least one array with",
                                   "-s/--load-schema or -A/--target-array")))
     # Some final checks...
     if len(dims) != 1:
         raise Usage("Load schema '{0}' is not one-dimensional".format(
                 _args.array_or_schema))
     if _args.target_array and not get_array_schema(_args.target_array):
         raise Usage("Target array %s does not exist" % _args.target_array)
     return None
Esempio n. 5
0
def test_scidblib_unparse_schema():
    """Unit test for the Python schema un-parser."""
    print '*** testing scidblib.scidb_schema.unparse...'
    schema1 = "<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2," + \
        "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char char('x')," + \
        "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')," + \
        "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')>" + \
        "[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]"
    attrs1, dims1 = SS.parse(schema1)
    schema2 = SS.unparse(attrs1, dims1)

    attrs2, dims2 = SS.parse(schema2)
    # Check attributes:
    print 'checking attributes...'
    for attr1, attr2 in zip(attrs1, attrs2):
        assert attr1.name == attr2.name
        assert attr1.type == attr2.type
        assert attr1.nullable == attr2.nullable
        assert attr1.default == attr2.default

    # Check dimensions:
    print 'checking dimensions...'
    for dim1, dim2 in zip(dims1, dims2):
        assert dim1.name == dim2.name
        assert dim1.lo == dim2.lo
        assert dim1.hi == dim2.hi
        assert dim1.chunk == dim2.chunk
        assert dim1.overlap == dim2.overlap
Esempio n. 6
0
def reparse(attrs, dims):
    """Rebuild a schema from attrs and dims... it should match!"""
    schema = SS.unparse(attrs, dims)
    aa, dd = SS.parse(schema)
    for x, y in zip(attrs, aa):
        assert x == y, "Reparse attribute mismatch: '%s' != '%s'" % (x, y)
    for x, y in zip(dims, dd):
        assert x == y, "Reparse dimension mismatch: '%s' != '%s'" % (x, y)
Esempio n. 7
0
def parse_dimensions(s):
    """Parse 's' and return a list of (our) Dimension objects."""
    # Use scidblib parser to do the hard work.
    _, raw_dims = scidb_schema.parse("<dummy:int8>[{0}]".format(s))
    raise_if_duplicates([x.name for x in raw_dims], 'dimension name')
    # Convert each scidb_schema.Dimension to one of our local Dimension objects.
    return [
        Dimension(x.name, x.lo, x.hi, x.chunk, x.overlap) for x in raw_dims
    ]
Esempio n. 8
0
 def test_09_missing_high_bound(self):
     """Missing dimension high bound is caught"""
     s = "<x:int64> [i=+70]"
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         pass
     else:
         assert False, "Missing dimension high bound should have thrown"
Esempio n. 9
0
 def test_02_missing_everything(self):
     """Throw on missing attributes or dimensions"""
     s = "< \t\v\r\n  > [ \t\v\r\n ]"
     threw = False
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         threw = True
     assert threw, "No variables, did not throw: %s" % s
Esempio n. 10
0
 def test_08_missing_dim_name(self):
     """Missing dimension name is caught"""
     s = "<x:int64> [=-43:+70]"
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         pass
     else:
         assert False, "Missing dimension name should have thrown"
Esempio n. 11
0
 def test_10_extra_semi(self):
     """Semi-colon is separator not terminator"""
     s = "<x:int64> [i=+70:100;]"
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         pass
     else:
         assert False, "Extra dimension separator should have thrown"
Esempio n. 12
0
 def test_12_id_equals_lo_semi_hi(self):
     """Semi-colon cannot separate low and high bound"""
     s = "<x:int64> [i=0;0:0:1; j=0:99:2:10]"
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         pass
     else:
         assert False, (
             "Semi-colon between hi and lo bound should have thrown")
Esempio n. 13
0
def reparse(attrs, dims):
    """Rebuild a schema from attrs and dims... it should match!"""
    # Don't use old-style dimension syntax for this "mirroring".  The
    # old syntax won't "mirror" None values (they must become '*').
    schema = SS.unparse(attrs, dims, compat=False)
    aa, dd = SS.parse(schema)
    for x, y in zip(attrs, aa):
        assert x == y, "Reparse attribute mismatch: '%s' != '%s'" % (x, y)
    for x, y in zip(dims, dd):
        assert x == y, "Reparse dimension mismatch: '%s' != '%s'" % (x, y)
Esempio n. 14
0
    def test_00_missing_attrs(self):
        """Throw on missing attributes"""
        s = "junk < \n\v\r\t  >  [i=0:43:5;  j_ = 1 :  50 : 1 : 10] more junk"
        threw = False
        try:
            aa, dd = SS.parse(s)
        except ValueError:
            threw = True
        assert threw, "No attributes, did not throw: %s" % s

        s = re.sub("<\s*>", "<x:int64>", s)
        threw = ''
        try:
            aa, dd = SS.parse(s)
        except ValueError as e:
            threw = str(e)
        assert not threw, "Got attributes, threw: %s (%s)" % (s, threw)
        assert len(aa) == 1, "Expected one attribute, got %d" % len(aa)
        assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd)
        reparse(aa, dd)
Esempio n. 15
0
 def test_11_expression_evaluation(self):
     """Expression evaluation is not supported"""
     # The following is *completely legal* new-style syntax!!!
     s = "<x:int64> [i=0:sizeof(',,,,,,,,,,')]"
     try:
         aa, dd = SS.parse(s)
     except ValueError:
         # ...but it fails anyway, because SS.parse() can't evalute
         # expressions like sizeof(), it only casts strings to longs.
         pass
     else:
         assert False, "Kudos to whomever implemented sizeof() evaluation!"
Esempio n. 16
0
    def test_01_missing_dims(self):
        """Throw on missing dimensions"""
        s = "< a:int64, b:string default foo(bar+1)  > [ \t\v\n\r ]"
        aa = dd = None
        threw = False
        try:
            aa, dd = SS.parse(s)
        except ValueError:
            threw = True
        assert threw, "No dimensions, did not throw: %s" % s

        s = re.sub(r"\[\s*\]", "[i=0:0,1,0]", s)
        threw = ''
        try:
            aa, dd = SS.parse(s)
        except ValueError as e:
            threw = str(e)
        assert not threw, "No dimensions, threw: %s (%s)" % (s, threw)
        assert len(aa) == 2, "Expected two attributes, got %d" % len(aa)
        assert len(dd) == 1, "Expected one dimension, got %d" % len(dd)
        reparse(aa, dd)
Esempio n. 17
0
def test_scidblib_unparse_schema():
    """Unit test for the Python schema un-parser."""
    print '*** testing scidblib.scidb_schema.unparse...'
    schema1 = ''.join(
        ("<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2,",
         "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char dEfAuLt char('x'),",
         "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')",
         " compression 'gzip'",
         "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')",
         " reserve 32", ">[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]"))

    # TODO: Fix for default_nullable=True, see SDB-5138.
    attrs1, dims1 = SS.parse(schema1, default_nullable=False)
    schema2 = SS.unparse(attrs1, dims1, default_nullable=False)
    attrs2, dims2 = SS.parse(schema2, default_nullable=False)

    # Check attributes:
    print 'checking attributes...'
    for i, (attr1, attr2) in enumerate(zip(attrs1, attrs2)):
        assert attr1.name == attr2.name, "%d: %s != %s" % (i, attr1.name,
                                                           attr2.name)
        assert attr1.type == attr2.type, "%d: %s != %s" % (i, attr1.type,
                                                           attr2.type)
        assert attr1.nullable == attr2.nullable, "%d: %s != %s" % (
            i, attr1.nullable, attr2.nullable)
        assert attr1.default == attr2.default, "%d: %s != %s" % (
            i, attr1.default, attr2.default)
        assert attr1.compression == attr2.compression, "%d: %s != %s" % (
            i, attr1.compression, attr2.compression)
        assert attr1.reserve == attr2.reserve, "%d: %s != %s" % (
            i, attr1.reserve, attr2.reserve)

    # Check dimensions:
    print 'checking dimensions...'
    for dim1, dim2 in zip(dims1, dims2):
        assert dim1.name == dim2.name
        assert dim1.lo == dim2.lo
        assert dim1.hi == dim2.hi
        assert dim1.chunk == dim2.chunk
        assert dim1.overlap == dim2.overlap
Esempio n. 18
0
 def test_03_quotes_in_strings(self):
     """Escaped quote marks OK in string literals"""
     s = r"""<x:string default 'Ain\'t it grand?' compression 'D\'oh!!!'>
             [i=0:0:0:1]"""
     aa, dd = SS.parse(s)
     # The DEFAULT string is preserved, quotes and all.  Parsing
     # the DEFAULT clause is tricky, so we don't actually do
     # it---instead we are just careful not to disturb it.
     assert aa[0].default == r"'Ain\'t it grand?'"
     # The COMPRESSION string is *not* preserved as-is, its
     # enclosing quotes are stripped (and any escaped quote is left
     # escaped, which is wrong... when we have a compression method
     # named 'Mike\'s sick method' we'll fix that).  This is
     # probably the behavior you want.
     assert aa[0].compression == r"D\'oh!!!"
Esempio n. 19
0
 def test_06_lone_identifier(self):
     """Lone dimension name x means x=0:*:0:*"""
     s = "<x:int64>[i]"
     aa, dd = SS.parse(s)
     assert len(aa) == 1, "Expected one attribute, got %d" % len(aa)
     assert len(dd) == 1, "Expected one dimension, got %d" % len(dd)
     reparse(aa, dd)
     d = dd[0]
     assert d.lo == 0, "Default low bound should be zero, got %s" % dd.lo
     assert d.hi == '*', ("Default high bound should be '*', got %s" %
                          dd.hi)
     assert d.overlap == 0, ("Default overlap should be zero, got %s" %
                             dd.overlap)
     assert d.chunk == '*', ("Default interval should be *, got %s" %
                             dd.chunk)
Esempio n. 20
0
 def test_05_whitespace(self):
     """Newlines and other whitespace"""
     # This schema has whitespace in every legal location.
     s = r"""<
         a : int64 not null default strlen ( 'I\'m whelmed' ) ,
         b : string default 'I\'m the default, baby!' compression 'default',
         c : binary reserve 42 > [ i = - 90 : 90 : 0: 10 ;
                                   j = - 180 : 180: 2 : 20 ]
         """
     aa, dd = SS.parse("\n\t\v\r ".join(s.split()))
     assert len(aa) == 3, "Expected 3 attributes, got %d" % len(aa)
     assert len(dd) == 2, "Expected 2 dimensions, got %d" % len(dd)
     assert aa[1].name == 'b', "b name: %s" % aa[1].name
     assert aa[1].compression == r"default", ("b compression: %s" %
                                              aa[1].compression)
     assert aa[2].reserve == 42, "c.reserve: %s" % aa[2].reserve
Esempio n. 21
0
 def test_07_default_overlap_and_interval(self):
     """Omit interval, or interval and overlap, and get None"""
     s = "<\nx:int64\n>\n[i=-43:+70; j=\v+\t100:+200:5]"
     aa, dd = SS.parse(s)
     assert len(aa) == 1, "Expected one attribute, got %d" % len(aa)
     assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd)
     reparse(aa, dd)
     assert dd[0].name == 'i', "Dim 0 name got borked"
     assert dd[0].overlap is None, ("Dim 0 omitted overlap became %s" %
                                    dd[0].overlap)
     assert dd[0].chunk is None, ("Dim 0 omitted chunk became %s" %
                                  dd[0].chunk)
     assert dd[1].name == 'j', "Dim 1 name got borked"
     assert dd[1].overlap == 5, ("Dim 1 overlap got borked to %s" %
                                 dd[1].overlap)
     assert dd[1].chunk is None, ("Dim 1 omitted chunk became %s" %
                                  dd[1].chunk)
Esempio n. 22
0
def parse_attributes(s):
    """Parse 's' and return a list of scidb_schema.Attribute objects."""
    # The scidb_schema.Attribute has all the functionality we need.
    attrs, _ = scidb_schema.parse("<%s>[dummy]" % s)
    raise_if_duplicates([x.name for x in attrs], 'attribute name')
    return attrs
Esempio n. 23
0
def test_scidblib_parse_schema():
    """Unit test for the Python schema parser."""
    print '*** testing scidblib.scidb_schema.parse...'

    schema1 = """<
        a:double NULL DEFAULT -0.5,
        b:char DEFAULT 'a',
        c:int8 NULL,
        d:uint64,
        e:string DEFAULT 'aa
        Aa',
        f:datetime DEFAULT datetime('25Nov2009:16:11:19'),
        g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00')
        >
        [
        d_0=1:*,?,1,
        d_1=-101:-9,5,0
        ]
    """
    attrs, dims = SS.parse(schema1)
    # Check that the correct number of attributes and dimensions was parsed.
    print 'checking length of attributes list...'
    assert len(attrs) == 7
    print 'checking length of dimensions list...'
    assert len(dims) == 2
    # Check the names of all attributes.
    print 'checking attribute names...'
    attr_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
    assert [a.name for a in attrs] == attr_names
    # Check the types of all attributes.
    print 'checking attribute types...'
    attr_types = [
        'double',  # a
        'char',  # b
        'int8',  # c
        'uint64',  # d
        'string',  # e
        'datetime',  # f
        'datetimetz'  # g
    ]
    assert [a.type for a in attrs] == attr_types
    # Check which attributes are nullable.
    print 'checking if attributes are nullable...'
    attr_nullables = [
        True,  # a
        False,  # b
        True,  # c
        False,  # d
        False,  # e
        False,  # f
        False  # g
    ]
    assert [a.nullable for a in attrs] == attr_nullables

    # Check which attributes have default modifiers.
    print 'checking attribute default modifiers...'
    attr_defaults = [
        '-0.5',  # a
        '\'a\'',  # b
        None,  # c
        None,  # d
        """\'aa
        Aa\'""",  # e
        'datetime(\'25Nov2009:16:11:19\')',  # f
        'datetimetz(\'11/25/2009 16:11:19 +10:00\')'  # g
    ]
    assert [a.default for a in attrs] == attr_defaults
    # Check dimension names.
    print 'checking dimension names...'

    dim_names = ['d_0', 'd_1']
    assert [d.name for d in dims] == dim_names

    # Check dimension lower bounds.
    print 'checking lower bounds of dimensions...'

    dim_los = [1, -101]
    assert [d.lo for d in dims] == dim_los

    # Check dimension upper bounds.
    print 'checking upper bounds of dimensions...'

    dim_his = [SS.MAX_COORDINATE, -9]
    assert [d.hi for d in dims] == dim_his

    # Check dimension chunks.
    print 'checking dimension chunks...'

    dim_chunks = ['?', 5]
    assert [d.chunk for d in dims] == dim_chunks

    # Check dimension overlaps.
    print 'checking dimension overlaps...'

    dim_overlaps = [1, 0]
    assert [d.overlap for d in dims] == dim_overlaps
Esempio n. 24
0
def test_scidblib_parse_schema():
    """Unit test for the Python schema parser."""
    print '*** testing scidblib.scidb_schema.parse...'

    schema1 = """<
        a:double NULL DEFAULT -0.5,
        b:char DEFAULT 'a',
        c:int8 NULL,
        d:uint64,
        e:string DEFAULT 'aa
        Aa',
        f:datetime DEFAULT datetime('25Nov2009:16:11:19'),
        g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00')
        >
        [
        d_0=1:*,?,1,
        d_1=-101:-9,5,0
        ]
    """
    attrs,dims = SS.parse(schema1)
    # Check that the correct number of attributes and dimensions was parsed.
    print 'checking length of attributes list...'
    assert len(attrs) == 7
    print 'checking length of dimensions list...'
    assert len(dims) == 2
    # Check the names of all attributes.
    print 'checking attribute names...'
    attr_names = [
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g'
        ]
    assert [a.name for a in attrs] == attr_names
    # Check the types of all attributes.
    print 'checking attribute types...'
    attr_types = [
        'double',    # a
        'char',      # b
        'int8',      # c
        'uint64',    # d
        'string',    # e
        'datetime',  # f
        'datetimetz' # g
        ]
    assert [a.type for a in attrs] == attr_types
    # Check which attributes are nullable.
    print 'checking if attributes are nullable...'
    attr_nullables = [
        True,  # a
        False, # b
        True,  # c
        False, # d
        False, # e
        False, # f
        False  # g
        ]
    assert [a.nullable for a in attrs] == attr_nullables

    # Check which attributes have default modifiers.
    print 'checking attribute default modifiers...'
    attr_defaults = [
        '-0.5',  # a
        '\'a\'', # b
        None,    # c
        None,    # d
        """\'aa
        Aa\'""", # e
        'datetime(\'25Nov2009:16:11:19\')', # f
        'datetimetz(\'11/25/2009 16:11:19 +10:00\')' # g
        ]
    assert [a.default for a in attrs] == attr_defaults
    # Check dimension names.
    print 'checking dimension names...'

    dim_names = ['d_0','d_1']
    assert [d.name for d in dims] == dim_names

    # Check dimension lower bounds.
    print 'checking lower bounds of dimensions...'

    dim_los = [1,-101]
    assert [d.lo for d in dims] == dim_los

    # Check dimension upper bounds.
    print 'checking upper bounds of dimensions...'

    dim_his = [SS.MAX_COORDINATE,-9]
    assert [d.hi for d in dims] == dim_his

    # Check dimension chunks.
    print 'checking dimension chunks...'

    dim_chunks = ['?',5]
    assert [d.chunk for d in dims] == dim_chunks

    # Check dimension overlaps.
    print 'checking dimension overlaps...'

    dim_overlaps = [1,0]
    assert [d.overlap for d in dims] == dim_overlaps
Esempio n. 25
0
def main(argv=None):
    """
    Main program entry point.
    """

    if (argv is None):
        argv = sys.argv

    parse_args(argv) # Parse the command line arguments.

    # Filter out specific TSV/CSV separator character (if one of such
    # formats is specified).
    remove_separators_from_strings()

    # TODO: Fix for default_nullable=True, see SDB-5138.
    if 0:
        attrs,dims = scidb_schema.parse(_args.schema)
    else:
        attrs,dims = scidb_schema.parse(_args.schema, default_nullable=False)

    if (_args.constant is not None):
        setup_constant_data_generators()

    dims_sizes = _get_dim_sizes(dims)

    dim_offsets = _get_dim_offsets(dims)

    start_indices,stop_indices = _get_start_and_stop_indices(dims,dims_sizes,dim_offsets)

    total_array_size,prob_sizes = _get_worker_problem_sizes(dims_sizes)

    attr_types = [a.type for a in attrs]
    attr_nulls = [a.nullable for a in attrs]

    formatter_func = _get_formatter(dims,attr_types,attr_nulls)

    splitter = _get_splitter(total_array_size)

    # Set up the inter-process data manager.
    manager = MP.Manager()
    # Prepare the data queue for the data-generating workers.
    queue = manager.Queue()
    # Put together the list of arguments for all workers.
    args_dicts = []
    for i in xrange(_args.workers):
        arg_tuples = [
            ('dim_starts',start_indices[i]),
            ('dim_stops',stop_indices[i]),
            ('attr_types',attr_types),
            ('attr_nulls',attr_nulls),
            ('random_seed',_args.seed+i),
            ('problem_size',prob_sizes[i])
            ]
        args_dicts.append(dict(arg_tuples))

    # Create the process pool of workers.
    pool = MP.Pool(processes=_args.workers)

    # Assign data generating tasks to each worker in the pool and
    # start them.
    results = [pool.apply_async(data_producer,(queue,d),{}) for d in args_dicts]

    # Register a simple cleanup function in case of the unexpected exit
    # (e.g. user presses CTRL-C).
    atexit.register(lambda : pool.terminate())

    # Record the original parent process id: when the parent dies, we shall
    # attempt to exit too.
    ppid = os.getppid()

    # Process the data from the record queue: workers insert data record
    # blocks into the queue while the main (this) process pulls them out
    # and outputs them to stdout.

    queue_empty = False
    while (not all([r.ready() for r in results])) or (not queue_empty):
        try:
            s = queue.get(False) # Grab a block of text from the queue.
            queue_empty = False
            emit_records(s,formatter_func,splitter)
        except Queue.Empty:
            queue_empty = True
        except (KeyboardInterrupt, SystemExit):
            # In case of CTRL-C press or unexpected exit, exit the main
            # process of the program.  Child processes will also exit
            # in the same way.
            break

    # Close the pool (no more workers can be added).
    pool.close()
    # Double-check that all workers are finished.
    pool.join()
    # Terminate the pool.
    pool.terminate()

    return 0
Esempio n. 26
0
def test_scidblib_parse_schema():
    """Unit test for the Python schema parser."""
    print '*** testing scidblib.scidb_schema.parse...'

    schema1 = r"""<
        a:double NULL DEFAULT -0.5,
        b:char DEFAULT 'a',
        c:int8 NULL,
        d:uint64,
        e:string DEFAULT 'aa
        Aa',
        f:datetime DEFAULT datetime('25Nov2009:16:11:19'),
        g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00'),
        h:uint16 NOT NULL,
        i: string not null default 'this is the default, isn\'t that wild?' ComPression 'default' reserve +64
        >
        [
        d_0=1:*,?,1,
        d_1=-101:-9,5,0
        ]
    """
    attrs, dims = SS.parse(schema1)
    # Check that the correct number of attributes and dimensions was parsed.
    print 'checking length of attributes list...'
    assert len(attrs) == 9
    print 'checking length of dimensions list...'
    assert len(dims) == 2
    # Check the names of all attributes.
    print 'checking attribute names...'
    attr_names = [
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
    ]
    assert [a.name for a in attrs] == attr_names
    # Check the types of all attributes.
    print 'checking attribute types...'
    attr_types = [
        'double',  # a
        'char',  # b
        'int8',  # c
        'uint64',  # d
        'string',  # e
        'datetime',  # f
        'datetimetz',  # g
        'uint16',  # h
        'string',  # i
    ]
    assert [a.type for a in attrs] == attr_types
    # Check which attributes are nullable.
    print 'checking if attributes are nullable...'
    attr_nullables = [
        True,  # a
        True,  # b
        True,  # c
        True,  # d
        True,  # e
        True,  # f
        True,  # g
        False,  # h
        False,  # i
    ]
    assert [a.nullable for a in attrs] == attr_nullables

    # Check which attributes have default modifiers.
    print 'checking attribute default modifiers...'
    attr_defaults = [
        '-0.5',  # a
        '\'a\'',  # b
        None,  # c
        None,  # d
        """\'aa
        Aa\'""",  # e
        'datetime(\'25Nov2009:16:11:19\')',  # f
        'datetimetz(\'11/25/2009 16:11:19 +10:00\')',  # g
        None,  # h
        r"'this is the default, isn\'t that wild?'",  # i
    ]
    assert [a.default for a in attrs] == attr_defaults

    # Check compression values.
    attr_compression = [
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,  # a thru h
        'default',  # i
    ]
    assert [a.compression for a in attrs] == attr_compression

    # Check reserve values.
    attr_reserve = [
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,  # a thru h
        64,  # i
    ]
    assert [a.reserve for a in attrs] == attr_reserve

    # Check dimension names.
    print 'checking dimension names...'

    dim_names = ['d_0', 'd_1']
    assert [d.name for d in dims] == dim_names

    # Check dimension lower bounds.
    print 'checking lower bounds of dimensions...'

    dim_los = [1, -101]
    assert [d.lo for d in dims] == dim_los

    # Check dimension upper bounds.
    print 'checking upper bounds of dimensions...'

    dim_his = ['*', -9]
    assert [d.hi for d in dims] == dim_his

    # Check dimension chunks.
    print 'checking dimension chunks...'

    dim_chunks = ['?', 5]
    assert [d.chunk for d in dims] == dim_chunks

    # Check dimension overlaps.
    print 'checking dimension overlaps...'

    dim_overlaps = [1, 0]
    assert [d.overlap for d in dims] == dim_overlaps

    # Reparse with old nullability rules and check nullable.
    attrs, _ = SS.parse(schema1, default_nullable=False)
    attr_nullables = [
        True,  # a
        False,  # b
        True,  # c
        False,  # d
        False,  # e
        False,  # f
        False,  # g
        False,  # h
        False,  # i
    ]
    assert [a.nullable for a in attrs] == attr_nullables
Esempio n. 27
0
def main(argv=None):
    """
    Main program entry point.
    """

    if (argv is None):
        argv = sys.argv

    parse_args(argv)  # Parse the command line arguments.

    # Filter out specific TSV/CSV separator character (if one of such
    # formats is specified).
    remove_separators_from_strings()

    # TODO: Fix for default_nullable=True, see SDB-5138.
    if 0:
        attrs, dims = scidb_schema.parse(_args.schema)
    else:
        attrs, dims = scidb_schema.parse(_args.schema, default_nullable=False)

    if (_args.constant is not None):
        setup_constant_data_generators()

    dims_sizes = _get_dim_sizes(dims)

    dim_offsets = _get_dim_offsets(dims)

    start_indices, stop_indices = _get_start_and_stop_indices(
        dims, dims_sizes, dim_offsets)

    total_array_size, prob_sizes = _get_worker_problem_sizes(dims_sizes)

    attr_types = [a.type for a in attrs]
    attr_nulls = [a.nullable for a in attrs]

    formatter_func = _get_formatter(dims, attr_types, attr_nulls)

    splitter = _get_splitter(total_array_size)

    # Set up the inter-process data manager.
    manager = MP.Manager()
    # Prepare the data queue for the data-generating workers.
    queue = manager.Queue()
    # Put together the list of arguments for all workers.
    args_dicts = []
    for i in xrange(_args.workers):
        arg_tuples = [('dim_starts', start_indices[i]),
                      ('dim_stops', stop_indices[i]),
                      ('attr_types', attr_types), ('attr_nulls', attr_nulls),
                      ('random_seed', _args.seed + i),
                      ('problem_size', prob_sizes[i])]
        args_dicts.append(dict(arg_tuples))

    # Create the process pool of workers.
    pool = MP.Pool(processes=_args.workers)

    # Assign data generating tasks to each worker in the pool and
    # start them.
    results = [
        pool.apply_async(data_producer, (queue, d), {}) for d in args_dicts
    ]

    # Register a simple cleanup function in case of the unexpected exit
    # (e.g. user presses CTRL-C).
    atexit.register(lambda: pool.terminate())

    # Record the original parent process id: when the parent dies, we shall
    # attempt to exit too.
    ppid = os.getppid()

    # Process the data from the record queue: workers insert data record
    # blocks into the queue while the main (this) process pulls them out
    # and outputs them to stdout.

    queue_empty = False
    while (not all([r.ready() for r in results])) or (not queue_empty):
        try:
            s = queue.get(False)  # Grab a block of text from the queue.
            queue_empty = False
            emit_records(s, formatter_func, splitter)
        except Queue.Empty:
            queue_empty = True
        except (KeyboardInterrupt, SystemExit):
            # In case of CTRL-C press or unexpected exit, exit the main
            # process of the program.  Child processes will also exit
            # in the same way.
            break

    # Close the pool (no more workers can be added).
    pool.close()
    # Double-check that all workers are finished.
    pool.join()
    # Terminate the pool.
    pool.terminate()

    return 0
Esempio n. 28
0
 def test_04_semi_colon_dimension_sep(self):
     """Semi-colons can separate dimension groups"""
     s = "<x:int64>[i=0:0:0:1; j=0:99:2:10]"
     aa, dd = SS.parse(s)
     assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd)
     reparse(aa, dd)