def make_sharding(db, table, conn, shard_fields, start, number_per_shard, tolerance_of_shard, shard_maker=list): scan_args = [(db, table), shard_fields, shard_fields, start] scan_kwargs = {"left_open": False, "use_dict": False, "retry": 3} connpool = mysqlconnpool.make(conn) record_iter = scan_index(connpool, *scan_args, **scan_kwargs) shards = strutil.sharding( record_iter, number_per_shard, accuracy=tolerance_of_shard, joiner=list) _, count = shards[0] result = { "shard": [shard_maker(start)], "number": [count], "total": count, } for shard, count in shards[1:]: result['shard'].append(shard_maker(shard)) result['number'].append(count) result['total'] += count return result
def make_sharding(db, table, conn, shard_fields, start, number_per_shard, tolerance_of_shard, shard_maker=list): scan_args = [(db, table), shard_fields, shard_fields, start] scan_kwargs = {"left_open": False, "use_dict": False, "retry": 3} connpool = mysqlconnpool.make(conn) record_iter = scan_index(connpool, *scan_args, **scan_kwargs) shards = strutil.sharding(record_iter, number_per_shard, accuracy=tolerance_of_shard, joiner=list) _, count = shards[0] result = { "shard": [shard_maker(start)], "number": [count], "total": count, } for shard, count in shards[1:]: result['shard'].append(shard_maker(shard)) result['number'].append(count) result['total'] += count return result
def test_sharding_iterable(self): dd() expected = ( (None, 5), ((1, 'd', '人'), 5), ((2, 'b', 'x'), 5), ((2, 'b', 'xx'), 3), ) rst = strutil.sharding(self.iterables, size=5, joiner=tuple) for i, (start, n) in enumerate(rst): dd('{start:<20} {n:>10}'.format(start=start, n=n)) self.assertEqual(expected[i], (start, n))
def test_sharding_accuracy(self): dd() cases = ( [ 5, ( (None, 5), (( 1, 'd', ), 5), ((2, 'b', 'x'), 5), ((2, 'b', 'xx'), 3), ), ], [ 9, ( (None, 9), ((2, ), 9), ), ], [ len(self.iterables), ((None, len(self.iterables)), ), ], [ len(self.iterables) + 1, ((None, len(self.iterables)), ), ], ) for accuracy, expected in cases: rst = strutil.sharding(self.iterables, size=5, accuracy=accuracy, joiner=tuple) dd("accuracy:", accuracy) dd("rst:") dd(rst) for i, (start, n) in enumerate(rst): dd('{start:<20} {n:>10}'.format(start=start, n=n)) self.assertEqual(expected[i], (start, n))
def test_sharding(self): with open(os.path.join(this_base, 'words')) as f: lines = f.readlines() lines = [x.strip() for x in lines] _size, _accuracy = 200, 20 rst = strutil.sharding(lines, size=_size, accuracy=_accuracy) expected = [ ( None, 209, ), ( 'M', 202, ), ( 'TestU', 202, ), ( 'br', 202, ), ( 'dc', 201, ), ( 'exi', 202, ), ( 'inf', 204, ), ( 'may', 205, ), ( 'pf', 200, ), ( 'rew', 208, ), ( 'suc', 204, ), ( 'wh', 56, ), ] for i, (start, size) in enumerate(rst): dd('{start:<10} {size:>20}'.format(start=start, size=size)) self.assertEqual(expected[i], (start, size)) # general cases cases = ( (1, 1), (20, 1), (20, 5), (200, 20), (200, 100), ) for _size, _accuracy in cases: dd('size: {_size}, accuracy: {_accuracy}'.format( _size=_size, _accuracy=_accuracy)) rst = strutil.sharding(lines, size=_size, accuracy=_accuracy) tot = 0 for i, (start, size) in enumerate(rst): dd('{start:<10} {size:>20}'.format(start=start, size=size)) # the last shard might have less items if i < len(rst) - 1: nxt = rst[i + 1][0] self.assertLessEqual(_size, size) self.assertLessEqual(size, _size + _accuracy) self.assertEqual( len([x for x in lines if x >= start and x < nxt]), size) else: self.assertLessEqual(size, _size) self.assertEqual(len([x for x in lines if x >= start]), size) tot += size self.assertEqual(len(lines), tot)
print t for p in xrange(0, 200): print strutil.colorize(p, 100), print for p in xrange(0, 100): print strutil.colorize(p, 10), print print strutil.colorize(22, 100, '{0:>10}%') str_linestr = 'adw\nni\nleng\n' print strutil.line_pad(str_linestr, 'hehe' * 4) print strutil.format_line([["name:", "age:"], ["drdrxp", "18"], "wow"], sep= " | ", aligns="lll") lines = 'cheng yang 180' lines += 'zhang xixi 150' lines += 'liu xiang 170' lines += 'yao ming 160' print strutil.sharding(lines, size=160, accuracy=30) a = [1, 2, 3, 4, 5, 6, 7, 8] for l in strutil.struct_repr(a): print l print print strutil.tokenize(' a\t b\n c\r ') print strutil.tokenize('a bxyc d', sep='xy')