def test_scidblib_unparse_schema(): """Unit test for the Python schema un-parser.""" print '*** testing scidblib.scidb_schema.unparse...' schema1 = "<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2," + \ "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char char('x')," + \ "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')," + \ "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')>" + \ "[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]" attrs1,dims1 = SS.parse(schema1) schema2 = SS.unparse(attrs1,dims1) attrs2,dims2 = SS.parse(schema2) # Check attributes: print 'checking attributes...' for attr1,attr2 in zip(attrs1,attrs2): assert attr1.name == attr2.name assert attr1.type == attr2.type assert attr1.nullable == attr2.nullable assert attr1.default == attr2.default # Check dimensions: print 'checking dimensions...' for dim1,dim2 in zip(dims1,dims2): assert dim1.name == dim2.name assert dim1.lo == dim2.lo assert dim1.hi == dim2.hi assert dim1.chunk == dim2.chunk assert dim1.overlap == dim2.overlap
def _setup_load_parameters(self): """Interpret options that describe how to do the loading.""" # Were we given a load_schema or a load_array in _args.array_or_schema? attrs, dims = None, None try: # Is it a schema? attrs, dims = scidb_schema.parse(_args.array_or_schema) except ValueError: # Hmmm, perhaps it is an array name. schema = get_array_schema(_args.array_or_schema) if schema is None: raise Usage('Array {0} does not exist'.format( _args.array_or_schema)) try: attrs, dims = scidb_schema.parse(schema) except ValueError: raise AppError("Internal error, show({0}) output: {1}".format( _args.array_or_schema, schema)) else: self.load_array = _args.array_or_schema self.load_schema = schema else: # We were given a schema, so we need a target array. self.load_array = None self.load_schema = _args.array_or_schema if not _args.target_array: raise Usage(' '.join(("Must specify at least one array with", "-s/--load-schema or -A/--target-array"))) # Some final checks... if len(dims) != 1: raise Usage("Load schema '{0}' is not one-dimensional".format( _args.array_or_schema)) if _args.target_array and not get_array_schema(_args.target_array): raise Usage("Target array %s does not exist" % _args.target_array) return None
def getLoadSchema(): global loadAttrs, loadDims if opts.load_schema: loadAttrs, loadDims = scidb_schema.parse(opts.load_schema) elif opts.load_name: # Query SciDB for the schema. logNormal("Retrieving load array schema from SciDB.") cmd = "\"%siquery\" -c %s -p %d -o text -aq \"show(%s)\"" % ( sciDbBinFolder, opts.db_address, opts.db_port, opts.load_name) p = subprocess.Popen(cmd, stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True, close_fds=True, preexec_fn=os.setsid) childProcesses.append(p) retVal = p.wait() if retVal != 0: err = "Failed to obtain schema for load array." if p and p.stderr: err = "%s\n%s" % (err, p.stderr.read()) raise Exception(err) arrayDef = p.stdout.read() m = re.search(r'<[^>]+>\s*\[[^\]]+\]', arrayDef) if not m: err = "Schema obtained from load array is corrupt: %s" % arrayDef.rstrip("\n") if p and p.stderr: err = "%s\n%s" % (err, p.stderr.read()) raise Exception(err) logVerbose("Result: %s" % m.group(0)) loadAttrs, loadDims = scidb_schema.parse(m.group(0))
def test_scidblib_unparse_schema(): """Unit test for the Python schema un-parser.""" print '*** testing scidblib.scidb_schema.unparse...' schema1 = "<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2," + \ "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char char('x')," + \ "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')," + \ "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')>" + \ "[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]" attrs1, dims1 = SS.parse(schema1) schema2 = SS.unparse(attrs1, dims1) attrs2, dims2 = SS.parse(schema2) # Check attributes: print 'checking attributes...' for attr1, attr2 in zip(attrs1, attrs2): assert attr1.name == attr2.name assert attr1.type == attr2.type assert attr1.nullable == attr2.nullable assert attr1.default == attr2.default # Check dimensions: print 'checking dimensions...' for dim1, dim2 in zip(dims1, dims2): assert dim1.name == dim2.name assert dim1.lo == dim2.lo assert dim1.hi == dim2.hi assert dim1.chunk == dim2.chunk assert dim1.overlap == dim2.overlap
def reparse(attrs, dims): """Rebuild a schema from attrs and dims... it should match!""" schema = SS.unparse(attrs, dims) aa, dd = SS.parse(schema) for x, y in zip(attrs, aa): assert x == y, "Reparse attribute mismatch: '%s' != '%s'" % (x, y) for x, y in zip(dims, dd): assert x == y, "Reparse dimension mismatch: '%s' != '%s'" % (x, y)
def parse_dimensions(s): """Parse 's' and return a list of (our) Dimension objects.""" # Use scidblib parser to do the hard work. _, raw_dims = scidb_schema.parse("<dummy:int8>[{0}]".format(s)) raise_if_duplicates([x.name for x in raw_dims], 'dimension name') # Convert each scidb_schema.Dimension to one of our local Dimension objects. return [ Dimension(x.name, x.lo, x.hi, x.chunk, x.overlap) for x in raw_dims ]
def test_09_missing_high_bound(self): """Missing dimension high bound is caught""" s = "<x:int64> [i=+70]" try: aa, dd = SS.parse(s) except ValueError: pass else: assert False, "Missing dimension high bound should have thrown"
def test_02_missing_everything(self): """Throw on missing attributes or dimensions""" s = "< \t\v\r\n > [ \t\v\r\n ]" threw = False try: aa, dd = SS.parse(s) except ValueError: threw = True assert threw, "No variables, did not throw: %s" % s
def test_08_missing_dim_name(self): """Missing dimension name is caught""" s = "<x:int64> [=-43:+70]" try: aa, dd = SS.parse(s) except ValueError: pass else: assert False, "Missing dimension name should have thrown"
def test_10_extra_semi(self): """Semi-colon is separator not terminator""" s = "<x:int64> [i=+70:100;]" try: aa, dd = SS.parse(s) except ValueError: pass else: assert False, "Extra dimension separator should have thrown"
def test_12_id_equals_lo_semi_hi(self): """Semi-colon cannot separate low and high bound""" s = "<x:int64> [i=0;0:0:1; j=0:99:2:10]" try: aa, dd = SS.parse(s) except ValueError: pass else: assert False, ( "Semi-colon between hi and lo bound should have thrown")
def reparse(attrs, dims): """Rebuild a schema from attrs and dims... it should match!""" # Don't use old-style dimension syntax for this "mirroring". The # old syntax won't "mirror" None values (they must become '*'). schema = SS.unparse(attrs, dims, compat=False) aa, dd = SS.parse(schema) for x, y in zip(attrs, aa): assert x == y, "Reparse attribute mismatch: '%s' != '%s'" % (x, y) for x, y in zip(dims, dd): assert x == y, "Reparse dimension mismatch: '%s' != '%s'" % (x, y)
def test_00_missing_attrs(self): """Throw on missing attributes""" s = "junk < \n\v\r\t > [i=0:43:5; j_ = 1 : 50 : 1 : 10] more junk" threw = False try: aa, dd = SS.parse(s) except ValueError: threw = True assert threw, "No attributes, did not throw: %s" % s s = re.sub("<\s*>", "<x:int64>", s) threw = '' try: aa, dd = SS.parse(s) except ValueError as e: threw = str(e) assert not threw, "Got attributes, threw: %s (%s)" % (s, threw) assert len(aa) == 1, "Expected one attribute, got %d" % len(aa) assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd) reparse(aa, dd)
def test_11_expression_evaluation(self): """Expression evaluation is not supported""" # The following is *completely legal* new-style syntax!!! s = "<x:int64> [i=0:sizeof(',,,,,,,,,,')]" try: aa, dd = SS.parse(s) except ValueError: # ...but it fails anyway, because SS.parse() can't evalute # expressions like sizeof(), it only casts strings to longs. pass else: assert False, "Kudos to whomever implemented sizeof() evaluation!"
def test_01_missing_dims(self): """Throw on missing dimensions""" s = "< a:int64, b:string default foo(bar+1) > [ \t\v\n\r ]" aa = dd = None threw = False try: aa, dd = SS.parse(s) except ValueError: threw = True assert threw, "No dimensions, did not throw: %s" % s s = re.sub(r"\[\s*\]", "[i=0:0,1,0]", s) threw = '' try: aa, dd = SS.parse(s) except ValueError as e: threw = str(e) assert not threw, "No dimensions, threw: %s (%s)" % (s, threw) assert len(aa) == 2, "Expected two attributes, got %d" % len(aa) assert len(dd) == 1, "Expected one dimension, got %d" % len(dd) reparse(aa, dd)
def test_scidblib_unparse_schema(): """Unit test for the Python schema un-parser.""" print '*** testing scidblib.scidb_schema.unparse...' schema1 = ''.join( ("<z1:string DEFAULT 'aa aa',z2:int64 NULL DEFAULT -2,", "z3:int32 NULL,z4:float DEFAULT -0.5,z5:char dEfAuLt char('x'),", "z4:datetime DEFAULT datetime(\'25Nov2009:16:11:19\')", " compression 'gzip'", "z5:datetimetz DEFAULT datetimetz(\'10/13/2008 15:10:20 +9:00\')", " reserve 32", ">[dim1=-77:*,23,0,dim2=0:99,?,1,dim3=-100:7,?,1]")) # TODO: Fix for default_nullable=True, see SDB-5138. attrs1, dims1 = SS.parse(schema1, default_nullable=False) schema2 = SS.unparse(attrs1, dims1, default_nullable=False) attrs2, dims2 = SS.parse(schema2, default_nullable=False) # Check attributes: print 'checking attributes...' for i, (attr1, attr2) in enumerate(zip(attrs1, attrs2)): assert attr1.name == attr2.name, "%d: %s != %s" % (i, attr1.name, attr2.name) assert attr1.type == attr2.type, "%d: %s != %s" % (i, attr1.type, attr2.type) assert attr1.nullable == attr2.nullable, "%d: %s != %s" % ( i, attr1.nullable, attr2.nullable) assert attr1.default == attr2.default, "%d: %s != %s" % ( i, attr1.default, attr2.default) assert attr1.compression == attr2.compression, "%d: %s != %s" % ( i, attr1.compression, attr2.compression) assert attr1.reserve == attr2.reserve, "%d: %s != %s" % ( i, attr1.reserve, attr2.reserve) # Check dimensions: print 'checking dimensions...' for dim1, dim2 in zip(dims1, dims2): assert dim1.name == dim2.name assert dim1.lo == dim2.lo assert dim1.hi == dim2.hi assert dim1.chunk == dim2.chunk assert dim1.overlap == dim2.overlap
def test_03_quotes_in_strings(self): """Escaped quote marks OK in string literals""" s = r"""<x:string default 'Ain\'t it grand?' compression 'D\'oh!!!'> [i=0:0:0:1]""" aa, dd = SS.parse(s) # The DEFAULT string is preserved, quotes and all. Parsing # the DEFAULT clause is tricky, so we don't actually do # it---instead we are just careful not to disturb it. assert aa[0].default == r"'Ain\'t it grand?'" # The COMPRESSION string is *not* preserved as-is, its # enclosing quotes are stripped (and any escaped quote is left # escaped, which is wrong... when we have a compression method # named 'Mike\'s sick method' we'll fix that). This is # probably the behavior you want. assert aa[0].compression == r"D\'oh!!!"
def test_06_lone_identifier(self): """Lone dimension name x means x=0:*:0:*""" s = "<x:int64>[i]" aa, dd = SS.parse(s) assert len(aa) == 1, "Expected one attribute, got %d" % len(aa) assert len(dd) == 1, "Expected one dimension, got %d" % len(dd) reparse(aa, dd) d = dd[0] assert d.lo == 0, "Default low bound should be zero, got %s" % dd.lo assert d.hi == '*', ("Default high bound should be '*', got %s" % dd.hi) assert d.overlap == 0, ("Default overlap should be zero, got %s" % dd.overlap) assert d.chunk == '*', ("Default interval should be *, got %s" % dd.chunk)
def test_05_whitespace(self): """Newlines and other whitespace""" # This schema has whitespace in every legal location. s = r"""< a : int64 not null default strlen ( 'I\'m whelmed' ) , b : string default 'I\'m the default, baby!' compression 'default', c : binary reserve 42 > [ i = - 90 : 90 : 0: 10 ; j = - 180 : 180: 2 : 20 ] """ aa, dd = SS.parse("\n\t\v\r ".join(s.split())) assert len(aa) == 3, "Expected 3 attributes, got %d" % len(aa) assert len(dd) == 2, "Expected 2 dimensions, got %d" % len(dd) assert aa[1].name == 'b', "b name: %s" % aa[1].name assert aa[1].compression == r"default", ("b compression: %s" % aa[1].compression) assert aa[2].reserve == 42, "c.reserve: %s" % aa[2].reserve
def test_07_default_overlap_and_interval(self): """Omit interval, or interval and overlap, and get None""" s = "<\nx:int64\n>\n[i=-43:+70; j=\v+\t100:+200:5]" aa, dd = SS.parse(s) assert len(aa) == 1, "Expected one attribute, got %d" % len(aa) assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd) reparse(aa, dd) assert dd[0].name == 'i', "Dim 0 name got borked" assert dd[0].overlap is None, ("Dim 0 omitted overlap became %s" % dd[0].overlap) assert dd[0].chunk is None, ("Dim 0 omitted chunk became %s" % dd[0].chunk) assert dd[1].name == 'j', "Dim 1 name got borked" assert dd[1].overlap == 5, ("Dim 1 overlap got borked to %s" % dd[1].overlap) assert dd[1].chunk is None, ("Dim 1 omitted chunk became %s" % dd[1].chunk)
def parse_attributes(s): """Parse 's' and return a list of scidb_schema.Attribute objects.""" # The scidb_schema.Attribute has all the functionality we need. attrs, _ = scidb_schema.parse("<%s>[dummy]" % s) raise_if_duplicates([x.name for x in attrs], 'attribute name') return attrs
def test_scidblib_parse_schema(): """Unit test for the Python schema parser.""" print '*** testing scidblib.scidb_schema.parse...' schema1 = """< a:double NULL DEFAULT -0.5, b:char DEFAULT 'a', c:int8 NULL, d:uint64, e:string DEFAULT 'aa Aa', f:datetime DEFAULT datetime('25Nov2009:16:11:19'), g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00') > [ d_0=1:*,?,1, d_1=-101:-9,5,0 ] """ attrs, dims = SS.parse(schema1) # Check that the correct number of attributes and dimensions was parsed. print 'checking length of attributes list...' assert len(attrs) == 7 print 'checking length of dimensions list...' assert len(dims) == 2 # Check the names of all attributes. print 'checking attribute names...' attr_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] assert [a.name for a in attrs] == attr_names # Check the types of all attributes. print 'checking attribute types...' attr_types = [ 'double', # a 'char', # b 'int8', # c 'uint64', # d 'string', # e 'datetime', # f 'datetimetz' # g ] assert [a.type for a in attrs] == attr_types # Check which attributes are nullable. print 'checking if attributes are nullable...' attr_nullables = [ True, # a False, # b True, # c False, # d False, # e False, # f False # g ] assert [a.nullable for a in attrs] == attr_nullables # Check which attributes have default modifiers. print 'checking attribute default modifiers...' attr_defaults = [ '-0.5', # a '\'a\'', # b None, # c None, # d """\'aa Aa\'""", # e 'datetime(\'25Nov2009:16:11:19\')', # f 'datetimetz(\'11/25/2009 16:11:19 +10:00\')' # g ] assert [a.default for a in attrs] == attr_defaults # Check dimension names. print 'checking dimension names...' dim_names = ['d_0', 'd_1'] assert [d.name for d in dims] == dim_names # Check dimension lower bounds. print 'checking lower bounds of dimensions...' dim_los = [1, -101] assert [d.lo for d in dims] == dim_los # Check dimension upper bounds. print 'checking upper bounds of dimensions...' dim_his = [SS.MAX_COORDINATE, -9] assert [d.hi for d in dims] == dim_his # Check dimension chunks. print 'checking dimension chunks...' dim_chunks = ['?', 5] assert [d.chunk for d in dims] == dim_chunks # Check dimension overlaps. print 'checking dimension overlaps...' dim_overlaps = [1, 0] assert [d.overlap for d in dims] == dim_overlaps
def test_scidblib_parse_schema(): """Unit test for the Python schema parser.""" print '*** testing scidblib.scidb_schema.parse...' schema1 = """< a:double NULL DEFAULT -0.5, b:char DEFAULT 'a', c:int8 NULL, d:uint64, e:string DEFAULT 'aa Aa', f:datetime DEFAULT datetime('25Nov2009:16:11:19'), g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00') > [ d_0=1:*,?,1, d_1=-101:-9,5,0 ] """ attrs,dims = SS.parse(schema1) # Check that the correct number of attributes and dimensions was parsed. print 'checking length of attributes list...' assert len(attrs) == 7 print 'checking length of dimensions list...' assert len(dims) == 2 # Check the names of all attributes. print 'checking attribute names...' attr_names = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g' ] assert [a.name for a in attrs] == attr_names # Check the types of all attributes. print 'checking attribute types...' attr_types = [ 'double', # a 'char', # b 'int8', # c 'uint64', # d 'string', # e 'datetime', # f 'datetimetz' # g ] assert [a.type for a in attrs] == attr_types # Check which attributes are nullable. print 'checking if attributes are nullable...' attr_nullables = [ True, # a False, # b True, # c False, # d False, # e False, # f False # g ] assert [a.nullable for a in attrs] == attr_nullables # Check which attributes have default modifiers. print 'checking attribute default modifiers...' attr_defaults = [ '-0.5', # a '\'a\'', # b None, # c None, # d """\'aa Aa\'""", # e 'datetime(\'25Nov2009:16:11:19\')', # f 'datetimetz(\'11/25/2009 16:11:19 +10:00\')' # g ] assert [a.default for a in attrs] == attr_defaults # Check dimension names. print 'checking dimension names...' dim_names = ['d_0','d_1'] assert [d.name for d in dims] == dim_names # Check dimension lower bounds. print 'checking lower bounds of dimensions...' dim_los = [1,-101] assert [d.lo for d in dims] == dim_los # Check dimension upper bounds. print 'checking upper bounds of dimensions...' dim_his = [SS.MAX_COORDINATE,-9] assert [d.hi for d in dims] == dim_his # Check dimension chunks. print 'checking dimension chunks...' dim_chunks = ['?',5] assert [d.chunk for d in dims] == dim_chunks # Check dimension overlaps. print 'checking dimension overlaps...' dim_overlaps = [1,0] assert [d.overlap for d in dims] == dim_overlaps
def main(argv=None): """ Main program entry point. """ if (argv is None): argv = sys.argv parse_args(argv) # Parse the command line arguments. # Filter out specific TSV/CSV separator character (if one of such # formats is specified). remove_separators_from_strings() # TODO: Fix for default_nullable=True, see SDB-5138. if 0: attrs,dims = scidb_schema.parse(_args.schema) else: attrs,dims = scidb_schema.parse(_args.schema, default_nullable=False) if (_args.constant is not None): setup_constant_data_generators() dims_sizes = _get_dim_sizes(dims) dim_offsets = _get_dim_offsets(dims) start_indices,stop_indices = _get_start_and_stop_indices(dims,dims_sizes,dim_offsets) total_array_size,prob_sizes = _get_worker_problem_sizes(dims_sizes) attr_types = [a.type for a in attrs] attr_nulls = [a.nullable for a in attrs] formatter_func = _get_formatter(dims,attr_types,attr_nulls) splitter = _get_splitter(total_array_size) # Set up the inter-process data manager. manager = MP.Manager() # Prepare the data queue for the data-generating workers. queue = manager.Queue() # Put together the list of arguments for all workers. args_dicts = [] for i in xrange(_args.workers): arg_tuples = [ ('dim_starts',start_indices[i]), ('dim_stops',stop_indices[i]), ('attr_types',attr_types), ('attr_nulls',attr_nulls), ('random_seed',_args.seed+i), ('problem_size',prob_sizes[i]) ] args_dicts.append(dict(arg_tuples)) # Create the process pool of workers. pool = MP.Pool(processes=_args.workers) # Assign data generating tasks to each worker in the pool and # start them. results = [pool.apply_async(data_producer,(queue,d),{}) for d in args_dicts] # Register a simple cleanup function in case of the unexpected exit # (e.g. user presses CTRL-C). atexit.register(lambda : pool.terminate()) # Record the original parent process id: when the parent dies, we shall # attempt to exit too. ppid = os.getppid() # Process the data from the record queue: workers insert data record # blocks into the queue while the main (this) process pulls them out # and outputs them to stdout. queue_empty = False while (not all([r.ready() for r in results])) or (not queue_empty): try: s = queue.get(False) # Grab a block of text from the queue. queue_empty = False emit_records(s,formatter_func,splitter) except Queue.Empty: queue_empty = True except (KeyboardInterrupt, SystemExit): # In case of CTRL-C press or unexpected exit, exit the main # process of the program. Child processes will also exit # in the same way. break # Close the pool (no more workers can be added). pool.close() # Double-check that all workers are finished. pool.join() # Terminate the pool. pool.terminate() return 0
def test_scidblib_parse_schema(): """Unit test for the Python schema parser.""" print '*** testing scidblib.scidb_schema.parse...' schema1 = r"""< a:double NULL DEFAULT -0.5, b:char DEFAULT 'a', c:int8 NULL, d:uint64, e:string DEFAULT 'aa Aa', f:datetime DEFAULT datetime('25Nov2009:16:11:19'), g:datetimetz DEFAULT datetimetz('11/25/2009 16:11:19 +10:00'), h:uint16 NOT NULL, i: string not null default 'this is the default, isn\'t that wild?' ComPression 'default' reserve +64 > [ d_0=1:*,?,1, d_1=-101:-9,5,0 ] """ attrs, dims = SS.parse(schema1) # Check that the correct number of attributes and dimensions was parsed. print 'checking length of attributes list...' assert len(attrs) == 9 print 'checking length of dimensions list...' assert len(dims) == 2 # Check the names of all attributes. print 'checking attribute names...' attr_names = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', ] assert [a.name for a in attrs] == attr_names # Check the types of all attributes. print 'checking attribute types...' attr_types = [ 'double', # a 'char', # b 'int8', # c 'uint64', # d 'string', # e 'datetime', # f 'datetimetz', # g 'uint16', # h 'string', # i ] assert [a.type for a in attrs] == attr_types # Check which attributes are nullable. print 'checking if attributes are nullable...' attr_nullables = [ True, # a True, # b True, # c True, # d True, # e True, # f True, # g False, # h False, # i ] assert [a.nullable for a in attrs] == attr_nullables # Check which attributes have default modifiers. print 'checking attribute default modifiers...' attr_defaults = [ '-0.5', # a '\'a\'', # b None, # c None, # d """\'aa Aa\'""", # e 'datetime(\'25Nov2009:16:11:19\')', # f 'datetimetz(\'11/25/2009 16:11:19 +10:00\')', # g None, # h r"'this is the default, isn\'t that wild?'", # i ] assert [a.default for a in attrs] == attr_defaults # Check compression values. attr_compression = [ None, None, None, None, None, None, None, None, # a thru h 'default', # i ] assert [a.compression for a in attrs] == attr_compression # Check reserve values. attr_reserve = [ None, None, None, None, None, None, None, None, # a thru h 64, # i ] assert [a.reserve for a in attrs] == attr_reserve # Check dimension names. print 'checking dimension names...' dim_names = ['d_0', 'd_1'] assert [d.name for d in dims] == dim_names # Check dimension lower bounds. print 'checking lower bounds of dimensions...' dim_los = [1, -101] assert [d.lo for d in dims] == dim_los # Check dimension upper bounds. print 'checking upper bounds of dimensions...' dim_his = ['*', -9] assert [d.hi for d in dims] == dim_his # Check dimension chunks. print 'checking dimension chunks...' dim_chunks = ['?', 5] assert [d.chunk for d in dims] == dim_chunks # Check dimension overlaps. print 'checking dimension overlaps...' dim_overlaps = [1, 0] assert [d.overlap for d in dims] == dim_overlaps # Reparse with old nullability rules and check nullable. attrs, _ = SS.parse(schema1, default_nullable=False) attr_nullables = [ True, # a False, # b True, # c False, # d False, # e False, # f False, # g False, # h False, # i ] assert [a.nullable for a in attrs] == attr_nullables
def main(argv=None): """ Main program entry point. """ if (argv is None): argv = sys.argv parse_args(argv) # Parse the command line arguments. # Filter out specific TSV/CSV separator character (if one of such # formats is specified). remove_separators_from_strings() # TODO: Fix for default_nullable=True, see SDB-5138. if 0: attrs, dims = scidb_schema.parse(_args.schema) else: attrs, dims = scidb_schema.parse(_args.schema, default_nullable=False) if (_args.constant is not None): setup_constant_data_generators() dims_sizes = _get_dim_sizes(dims) dim_offsets = _get_dim_offsets(dims) start_indices, stop_indices = _get_start_and_stop_indices( dims, dims_sizes, dim_offsets) total_array_size, prob_sizes = _get_worker_problem_sizes(dims_sizes) attr_types = [a.type for a in attrs] attr_nulls = [a.nullable for a in attrs] formatter_func = _get_formatter(dims, attr_types, attr_nulls) splitter = _get_splitter(total_array_size) # Set up the inter-process data manager. manager = MP.Manager() # Prepare the data queue for the data-generating workers. queue = manager.Queue() # Put together the list of arguments for all workers. args_dicts = [] for i in xrange(_args.workers): arg_tuples = [('dim_starts', start_indices[i]), ('dim_stops', stop_indices[i]), ('attr_types', attr_types), ('attr_nulls', attr_nulls), ('random_seed', _args.seed + i), ('problem_size', prob_sizes[i])] args_dicts.append(dict(arg_tuples)) # Create the process pool of workers. pool = MP.Pool(processes=_args.workers) # Assign data generating tasks to each worker in the pool and # start them. results = [ pool.apply_async(data_producer, (queue, d), {}) for d in args_dicts ] # Register a simple cleanup function in case of the unexpected exit # (e.g. user presses CTRL-C). atexit.register(lambda: pool.terminate()) # Record the original parent process id: when the parent dies, we shall # attempt to exit too. ppid = os.getppid() # Process the data from the record queue: workers insert data record # blocks into the queue while the main (this) process pulls them out # and outputs them to stdout. queue_empty = False while (not all([r.ready() for r in results])) or (not queue_empty): try: s = queue.get(False) # Grab a block of text from the queue. queue_empty = False emit_records(s, formatter_func, splitter) except Queue.Empty: queue_empty = True except (KeyboardInterrupt, SystemExit): # In case of CTRL-C press or unexpected exit, exit the main # process of the program. Child processes will also exit # in the same way. break # Close the pool (no more workers can be added). pool.close() # Double-check that all workers are finished. pool.join() # Terminate the pool. pool.terminate() return 0
def test_04_semi_colon_dimension_sep(self): """Semi-colons can separate dimension groups""" s = "<x:int64>[i=0:0:0:1; j=0:99:2:10]" aa, dd = SS.parse(s) assert len(dd) == 2, "Expected two dimensions, got %d" % len(dd) reparse(aa, dd)