def test_marble_stream_get(self): for date, file in self.files.iteritems(): stream = MarbleStream(file) rowid = 1 for album in self.albums: if album[_PARTITIONS] != date: continue # test 'get' first for k, v in album.iteritems(): self.assertEqual(v, stream.get(k, rowid)) rowid += 1 stream.close()
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names): from disco import util from hustle.core.marble import Expr, MarbleStream empty = () try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: print "Error handling hustle_input_stream for %s. %s" % (url, e) raise e fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) # print "FLOGLE: %s %s" % (url, fle) otab = None try: # import sys # sys.path.append('/Library/Python/2.7/site-packages/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=12999, stdoutToServer=True, stderrToServer=True) otab = MarbleStream(fle) bitmaps = {} for index, where in enumerate(wheres): # do not process where clauses that have nothing to do with this marble if where._name == otab.marble._name: if type(where) is Expr and not where.is_partition: bitmaps[index] = where(otab) else: # it is either the table itself, or a partition expression. either way, # return the entire table bitmaps[index] = otab.iter_all() for index, bitmap in bitmaps.iteritems(): prefix = [index] if gen_where_index else [] for row_id in bitmap: record = [ otab.get(col, row_id) if col else None for col in key_names[index] ] # print "Gibbled: %s" % repr(record) record[ 0: 0] = prefix # this looks odd, but is faster than 'prefix + record' yield tuple(record), empty finally: if otab: otab.close()
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names): from disco import util from hustle.core.marble import Expr, MarbleStream empty = () try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: print "Error handling hustle_input_stream for %s. %s" % (url, e) raise e fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) # print "FLOGLE: %s %s" % (url, fle) otab = None try: # import sys # sys.path.append('/Library/Python/2.7/site-packages/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=12999, stdoutToServer=True, stderrToServer=True) otab = MarbleStream(fle) bitmaps = {} for index, where in enumerate(wheres): # do not process where clauses that have nothing to do with this marble if where._name == otab.marble._name: if type(where) is Expr and not where.is_partition: bitmaps[index] = where(otab) else: # it is either the table itself, or a partition expression. either way, # return the entire table bitmaps[index] = otab.iter_all() for index, bitmap in bitmaps.iteritems(): prefix = [index] if gen_where_index else [] for row_id in bitmap: record = [otab.get(col, row_id) if col else None for col in key_names[index]] # print "Gibbled: %s" % repr(record) record[0:0] = prefix # this looks odd, but is faster than 'prefix + record' yield tuple(record), empty finally: if otab: otab.close()