Ejemplo n.º 1
0
def make_sharding(db, table, conn, shard_fields, start, number_per_shard, tolerance_of_shard,
        shard_maker=list):

    scan_args = [(db, table), shard_fields, shard_fields, start]
    scan_kwargs = {"left_open": False, "use_dict": False, "retry": 3}

    connpool = mysqlconnpool.make(conn)

    record_iter = scan_index(connpool, *scan_args, **scan_kwargs)

    shards = strutil.sharding(
        record_iter, number_per_shard, accuracy=tolerance_of_shard, joiner=list)

    _, count = shards[0]
    result = {
        "shard": [shard_maker(start)],
        "number": [count],
        "total": count,
    }

    for shard, count in shards[1:]:

        result['shard'].append(shard_maker(shard))
        result['number'].append(count)
        result['total'] += count

    return result
Ejemplo n.º 2
0
def make_sharding(db,
                  table,
                  conn,
                  shard_fields,
                  start,
                  number_per_shard,
                  tolerance_of_shard,
                  shard_maker=list):

    scan_args = [(db, table), shard_fields, shard_fields, start]
    scan_kwargs = {"left_open": False, "use_dict": False, "retry": 3}

    connpool = mysqlconnpool.make(conn)

    record_iter = scan_index(connpool, *scan_args, **scan_kwargs)

    shards = strutil.sharding(record_iter,
                              number_per_shard,
                              accuracy=tolerance_of_shard,
                              joiner=list)

    _, count = shards[0]
    result = {
        "shard": [shard_maker(start)],
        "number": [count],
        "total": count,
    }

    for shard, count in shards[1:]:

        result['shard'].append(shard_maker(shard))
        result['number'].append(count)
        result['total'] += count

    return result
Ejemplo n.º 3
0
    def test_sharding_iterable(self):

        dd()

        expected = (
            (None, 5),
            ((1, 'd', '人'), 5),
            ((2, 'b', 'x'), 5),
            ((2, 'b', 'xx'), 3),
        )

        rst = strutil.sharding(self.iterables, size=5, joiner=tuple)
        for i, (start, n) in enumerate(rst):
            dd('{start:<20} {n:>10}'.format(start=start, n=n))
            self.assertEqual(expected[i], (start, n))
Ejemplo n.º 4
0
    def test_sharding_accuracy(self):

        dd()

        cases = (
            [
                5,
                (
                    (None, 5),
                    ((
                        1,
                        'd',
                    ), 5),
                    ((2, 'b', 'x'), 5),
                    ((2, 'b', 'xx'), 3),
                ),
            ],
            [
                9,
                (
                    (None, 9),
                    ((2, ), 9),
                ),
            ],
            [
                len(self.iterables),
                ((None, len(self.iterables)), ),
            ],
            [
                len(self.iterables) + 1,
                ((None, len(self.iterables)), ),
            ],
        )

        for accuracy, expected in cases:
            rst = strutil.sharding(self.iterables,
                                   size=5,
                                   accuracy=accuracy,
                                   joiner=tuple)

            dd("accuracy:", accuracy)
            dd("rst:")
            dd(rst)

            for i, (start, n) in enumerate(rst):
                dd('{start:<20} {n:>10}'.format(start=start, n=n))
                self.assertEqual(expected[i], (start, n))
Ejemplo n.º 5
0
    def test_sharding(self):

        with open(os.path.join(this_base, 'words')) as f:
            lines = f.readlines()
            lines = [x.strip() for x in lines]

        _size, _accuracy = 200, 20

        rst = strutil.sharding(lines, size=_size, accuracy=_accuracy)

        expected = [
            (
                None,
                209,
            ),
            (
                'M',
                202,
            ),
            (
                'TestU',
                202,
            ),
            (
                'br',
                202,
            ),
            (
                'dc',
                201,
            ),
            (
                'exi',
                202,
            ),
            (
                'inf',
                204,
            ),
            (
                'may',
                205,
            ),
            (
                'pf',
                200,
            ),
            (
                'rew',
                208,
            ),
            (
                'suc',
                204,
            ),
            (
                'wh',
                56,
            ),
        ]

        for i, (start, size) in enumerate(rst):
            dd('{start:<10} {size:>20}'.format(start=start, size=size))
            self.assertEqual(expected[i], (start, size))

        # general cases

        cases = (
            (1, 1),
            (20, 1),
            (20, 5),
            (200, 20),
            (200, 100),
        )

        for _size, _accuracy in cases:

            dd('size: {_size}, accuracy: {_accuracy}'.format(
                _size=_size, _accuracy=_accuracy))

            rst = strutil.sharding(lines, size=_size, accuracy=_accuracy)

            tot = 0
            for i, (start, size) in enumerate(rst):

                dd('{start:<10} {size:>20}'.format(start=start, size=size))

                # the last shard might have less items
                if i < len(rst) - 1:
                    nxt = rst[i + 1][0]
                    self.assertLessEqual(_size, size)
                    self.assertLessEqual(size, _size + _accuracy)
                    self.assertEqual(
                        len([x for x in lines if x >= start and x < nxt]),
                        size)
                else:
                    self.assertLessEqual(size, _size)
                    self.assertEqual(len([x for x in lines if x >= start]),
                                     size)

                tot += size

            self.assertEqual(len(lines), tot)
Ejemplo n.º 6
0
    print t

    for p in xrange(0, 200):
        print strutil.colorize(p, 100),
    print 
    for p in xrange(0, 100):
        print strutil.colorize(p, 10),
    print
    print strutil.colorize(22, 100, '{0:>10}%')
    
    str_linestr = 'adw\nni\nleng\n'
    print strutil.line_pad(str_linestr, 'hehe' * 4)

    print strutil.format_line([["name:", "age:"], ["drdrxp", "18"], "wow"], sep= 
    " | ", aligns="lll")

    lines = 'cheng yang 180'
    lines += 'zhang xixi 150'
    lines += 'liu xiang 170'
    lines += 'yao ming 160'
    print strutil.sharding(lines, size=160, accuracy=30)
    
    a = [1, 2, 3, 4, 5, 6, 7, 8]
    for l in strutil.struct_repr(a):
        print l
    print

    print strutil.tokenize(' a\t b\n c\r ')
    print strutil.tokenize('a bxyc d', sep='xy')