예제 #1
0
def run(flow, cloudburst, requests, local, sckt=None):
    latencies = []

    if not local:
        print = logging.info

    bench_start = time.time()
    for i in range(requests):
        if i % 100 == 0:
            logging.info(f'On request {i}...')

        inp = Table([('user', StrType), ('recent', NumpyType)])

        uid = np.random.randint(NUM_USERS)
        recent = np.random.randint(0, NUM_PRODUCT_SETS, 5)

        inp.insert([str(uid), recent])

        start = time.time()
        flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    bench_end = time.time()

    print_latency_stats(latencies, "E2E", not local, bench_end - bench_start)

    if sckt:
        bts = cp.dumps(latencies)
        sckt.send(bts)
예제 #2
0
def run(flow, cloudburst, requests, local, sckt=None):
    schema = [('classify', StrType), ('translate', StrType)]
    french = [
        'Je m\'appelle Pierre.', 'Comment allez-vous aujourd\'hui?',
        'La nuit est longue et froide, et je veux rentrer chez moi.',
        'Tu es venue a minuit, mais je me suis déja couché.',
        'On veut aller dehors mais il faut rester dedans.'
    ]

    german = [
        'Ich bin in Berliner.', 'Die katz ist saß auf dem Stuhl.',
        'Sie schwimmt im Regen.',
        'Ich gehe in den Supermarkt, aber mir ist kalt.',
        'Ich habe nie gedacht, dass du Amerikanerin bist.'
    ]

    english = [
        'What is the weather like today?',
        'Why does it rain so much in April?',
        'I like running but my ankles hurt.',
        'I should go home to eat dinner before it gets too late.',
        'I would like to hang out with my friends, but I have to work.'
    ]

    inputs = []
    for _ in range(20):
        table = Table(schema)

        if random.random() < 0.5:
            other = random.choice(french)
        else:
            other = random.choice(german)

        vals = [other, random.choice(english)]
        table.insert(vals)

        inputs.append(table)

    logging.info('Starting benchmark...')

    latencies = []
    bench_start = time.time()
    for i in range(requests):
        if i % 100 == 0:
            logging.info(f'On request {i}...')

        inp = random.choice(inputs)

        start = time.time()
        result = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)
    bench_end = time.time()

    print_latency_stats(latencies, "E2E", not local, bench_end - bench_start)

    if sckt:
        bts = cp.dumps(latencies)
        sckt.send(bts)
예제 #3
0
            def run(self, _, inp: GroupbyTable):
                result = Table(inp.schema)

                for group, gtable in inp.get():
                    for row in gtable.get():
                        result.insert(row)
                return result
예제 #4
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        do_optimize: bool):

    def stage1(self, row: Row) -> bytes:
        import numpy as np

        return np.random.rand(row['size'])

    def stage2(self, row: Row) -> int:
        return 3

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('colocate-benchmark', FlowType.PUSH, cloudburst)
    f1 = flow.map(stage1)

    p1 = f1.map(stage2, names=['val1'])
    p2 = f1.map(stage2, names=['val2'])
    p3 = f1.map(stage2, names=['val3'])
    p4 = f1.map(stage2, names=['val4'])
    p5 = f1.map(stage2, names=['val5'])
    # p6 = f1.map(stage2, names=['val6'])
    # p7 = f1.map(stage2, names=['val7'])
    # p8 = f1.map(stage2, names=['val8'])

    p1.join(p2).join(p3).join(p4).join(p5) # .join(p6).join(p7).join(p8)

    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('size', IntType)])
    inp.insert([DATA_SIZES[data_size]])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
예제 #5
0
def classify_language(self, table: Table) -> (str, str):
    inputs = [row['classify'] for row in table.get()]

    predicts = self.model.predict(inputs)[0]
    predicts = [label[0].split('_')[-1] for label in predicts]

    result = []
    idx = 0

    for row in table.get():
        result.append([predicts[idx], row['translate']])
        idx += 1

    return result
예제 #6
0
            def run(self, cloudburst, aggregate, column, inp):
                serialized = False
                if type(inp) == bytes:
                    serialized = True
                    inp = deserialize(inp)

                if aggregate == 'count':
                    aggfn = self.count
                if aggregate == 'min':
                    aggfn = self.min
                if aggregate == 'max':
                    aggfn = self.max
                if aggregate == 'sum':
                    aggfn = self.sum
                if aggregate == 'average':
                    aggfn = self.average

                if isinstance(inp, GroupbyTable):
                    gb_col = inp.col
                    val, _ = next(inp.get())
                    gb_typ = get_type(type(val))

                    result = Table([(gb_col, gb_typ), (aggregate, FloatType)])

                    for val, tbl in inp.get():
                        agg = aggfn(tbl, column)
                        result.insert([val, float(agg)])
                else:
                    result = Table([(aggregate, FloatType)])
                    result.insert([float(aggnf(inp, column))])

                if serialized:
                    result = serialize(result)

                return result
예제 #7
0
def run(cloudburst: CloudburstConnection, num_requests: int, gamma: int,
        num_replicas: int):
    def stage1(self, val: int) -> int:
        return val + 1

    def stage2(self, row: Row) -> float:
        import time
        from scipy.stats import gamma

        delay = gamma.rvs(3.0, scale=row['scale']) * 10 / 1000  # convert to ms
        time.sleep(delay)

        return delay

    def stage3(self, row: Row) -> float:
        return row['val']

    print(f'Creating flow with {num_replicas} replicas and' +
          f' gamma={GAMMA_VALS[gamma]}')

    flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, col='val') \
        .map(stage2, names=['val'], high_variance=True) \
        .map(stage3, names=['val'])

    optimize_rules['compete_replicas'] = num_replicas
    flow = optimize(flow, rules=optimize_rules)
    print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('val', IntType), ('scale', FloatType)])
    inp.insert([1, GAMMA_VALS[gamma]])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        time.sleep(.300)  # Sleep to let the queue drain.
        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
예제 #8
0
def run(flow, cloudburst, requests, local, sckt=None):
    if not local:
        if not os.path.exists('imagenet_sample.zip'):
            raise RuntimeError(
                'Expect to have the imagenet_sample directory locally.')

        os.system('unzip imagenet_sample.zip')
    else:
        if not os.path.exists('imagenet_sample/imagenet'):
            raise RuntimeError(
                'Expect to have the imagenet_sample directory locally.')

    prefix = 'imagenet_sample/imagenet'
    files = os.listdir(prefix)
    files = [os.path.join(prefix, fname) for fname in files]

    inputs = []

    logging.info('Loading input images...')
    for fname in files:
        table = Table([('img', NumpyType)])
        img = np.array(Image.open(fname).convert('RGB').resize((224, 224)))

        table.insert([img])
        inputs.append(table)

    logging.info('Starting benchmark...')

    latencies = []
    bench_start = time.time()
    for i in range(requests):
        if i % 100 == 0:
            logging.info(f'On request {i}...')

        inp = random.choice(inputs)

        start = time.time()
        result = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)
    bench_end = time.time()

    print_latency_stats(latencies, "E2E", not local, bench_end - bench_start)

    if sckt:
        bts = cp.dumps(latencies)
        sckt.send(bts)
예제 #9
0
def english_to_german_gpu(self, table: Table) -> str:
    inputs = [row['translate'] for row in table.get()]

    if len(inputs) > 0:
        return self.model.translate(inputs)
    else:
        return []
예제 #10
0
def resnet_model_gpu(self, table: Table) -> (np.ndarray, int, float):
    """
    ResNet101 for image classification on ResNet
    """
    import torch

    originals = [row['img'] for row in table.get()]
    inputs = [torch.from_numpy(img) for img in originals]
    inputs = torch.stack(inputs, dim=0).cuda()

    out = self.resnet(inputs)
    _, indices = torch.sort(out, descending=True)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    p_2 = percentage.cpu().detach().numpy()
    indicies = indices.cpu().detach().numpy()

    result = []
    for i in range(len(originals)):
        index = indices[i][0].item()
        perc = p_2[indices[i][0]].item()
        img = originals[i]

        result.append([img, index, perc])

    return result
예제 #11
0
def english_to_french(self, table: Table) -> str:
    if type(table) == Table:
        inputs = [row['translate'] for row in table.get()]
    else:
        inputs = [table]

    if len(inputs) > 0:
        return self.model.translate(inputs)
    else:
        return []
예제 #12
0
            def run(self, cloudburst, lookup_key, dynamic: bool, input_object,
                    inp: Table):
                from flow.types.basic import get_type

                serialized = False
                if type(inp) == bytes:
                    inp = deserialize(inp)
                    serialized = True

                if cloudburst is None or dynamic:
                    obj = input_object
                    lookup_key = next(inp.get())[lookup_key]
                else:
                    obj = cloudburst.get(lookup_key)

                schema = list(inp.schema)
                schema.append((lookup_key, get_type(type(obj))))

                new_table = Table(schema)
                for row in inp.get():
                    vals = [row[key] for key, _ in inp.schema]
                    vals.append(obj)

                    new_table.insert(vals)

                if serialized:
                    new_table = serialize(new_table)
                return new_table
예제 #13
0
            def run(self, _, fn, group, inp):
                batching = isinstance(inp, list)
                serialized = False

                if batching:
                    if type(inp[0]) == bytes:
                        serialized = True
                        inp = [deserialize(tbl) for tbl in inp]
                else:
                    if type(inp) == bytes:
                        serialized = True
                        inp = deserialize(inp)

                if batching:
                    # Because we have batching enabled by default, we have to
                    # assume these are lists if these are not merged into a multi
                    # operator. We have to check these because a whole flow
                    # operator will not have lists even when batching is
                    # enabled.
                    if type(group) == list:
                        group = group[0]

                    if type(fn) == list:
                        fn = fn[0]
                    inp, mappings = merge_tables(inp)

                if group and not isinstance(inp, GroupbyTable):
                    raise RuntimeError(
                        "Can't run a group filter over a non-grouped" +
                        " table.")

                if group:
                    result = GroupbyTable(inp.schema, inp.col)
                    for group, gtable in inp.get():
                        if fn(self, next(gtable.get())):
                            result.add_group(group, gtable)
                else:
                    result = Table(inp.schema)
                    for row in inp.get():
                        if fn(self, row):
                            result.insert(row)

                if batching:
                    result = demux_tables(result, mappings)
                    if serialized:
                        result = [serialize(tbl) for tbl in result]
                else:
                    if serialized:
                        result = serialize(result)

                return result
예제 #14
0
def run(cloudburst: CloudburstConnection, num_requests: int, num_fns: int,
        data_size: str, do_optimize: bool):
    def fusion_op(self, row: Row) -> bytes:
        return row['data']

    print(f'Creating flow with {num_fns} operators and {data_size}' +
          f' ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst)

    marker = flow
    for _ in range(num_fns):
        marker = marker.map(fusion_op, names=['data'])

    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('data', BtsType)])
    inp.insert([os.urandom(DATA_SIZES[data_size])])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
예제 #15
0
            def run(self, _, col: str, inp: Table):
                serialized = False
                if type(inp) == bytes:
                    serialized = True
                    inp = deserialize(inp)

                gb_table = GroupbyTable(inp.schema, col)

                for row in inp.get():
                    gb_table.add_row(row)

                if serialized:
                    gb_table = serialize(gb_table)

                return gb_table
예제 #16
0
def inceptionv3_model_gpu(self, table: Table) -> (int, float):
    import torch

    # Shortcut for empty input.
    if table.size() == 0:
        return []

    originals = [row['img'] for row in table.get()]
    inputs = [torch.from_numpy(img) for img in originals]
    inputs = torch.stack(inputs, dim=0).cuda()

    out = self.incept(inputs)
    _, indices = torch.sort(out, descending=True)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    p_2 = percentage.cpu().detach().numpy()

    result = []
    for i in range(len(originals)):
        index = indices[i][0].item()
        perc = p_2[indices[i][0]].item()

        result.append([index, perc])

    return result
예제 #17
0
def cascade_predict_batch(self, table: Table) -> str:
    results = []
    for row in table.get():
        resnet_index = row['resnet_index']
        resnet_max_prob = row['resnet_max_prob']
        incept_index = row['incept_index']
        incept_max_prob = row['incept_max_prob']

        if incept_max_prob is None:
            # Didn't go to inception because resnet prediction was confident
            # enough.
            results.append(self.classes[resnet_index])
        else:
            # choose the distribution with the higher max_prob.
            if resnet_max_prob > incept_max_prob:
                results.append(self.classes[resnet_index])
            else:
                results.append(self.classes[incept_index])

    return results
예제 #18
0
    def resnet_model_cpu(self, table: Table) -> str:
        """
        AlexNet for image classification on ImageNet
        """
        import torch

        inputs = []
        for row in table.get():
            img = self.transforms(row['img'])
            inputs.append(img)

        inputs = torch.stack(inputs, dim=0)
        output = self.resnet(inputs)
        _, indices = torch.sort(output, descending=True)
        indices = indices.detach().numpy()

        result = []
        for idx_set in indices:
            index = idx_set[0]
            result.append(self.classes[index])

        return result
예제 #19
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        breakpoint: bool,
        do_optimize: bool):

    print('Creating data...')
    size = DATA_SIZES[data_size]
    for i in range(1, NUM_DATA_POINTS+1):
        arr = np.random.rand(size)
        cloudburst.put_object('data-' + str(i), arr)

    def stage1(self, row: Row) -> (int, str):
        idx = int(row['req_num'] / 10) + 1
        key = 'data-%d' % (idx)

        return idx, key

    def stage2(self, row: Row) -> str:
        import numpy as np
        arr = row[row['key']]

        return float(np.sum(arr))

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, names=['index', 'key']) \
        .lookup('key', dynamic=True) \
        .map(stage2, names=['sum'])

    optimize_rules['breakpoint'] = breakpoint
    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('req_num', IntType)])

    if breakpoint:
        print('Starting warmup...')
        for i in range(NUM_DATA_POINTS):
            inp = Table([('req_num', IntType)])
            inp.insert([i * 10])

            res = flow.run(inp).get()

        print('Pausing to let cache metadata propagate...')
        time.sleep(15)

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        inp = Table([('req_num', IntType)])
        inp.insert([i])

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    with open('data.bts', 'wb') as f:
        from cloudburst.shared.serializer import Serializer
        ser = Serializer()
        bts = ser.dump(latencies)
        f.write(bts)

    print_latency_stats(latencies, 'E2E')
예제 #20
0
def run(cloudburst: CloudburstConnection, num_requests: int, batch_size: int,
        gpu: bool):

    with open('imagenet_classes.txt', 'r') as f:
        classes = [line.strip() for line in f.readlines()]

    cloudburst.put_object('imagenet-classes', classes)

    def resnet_init_gpu(self, cloudburst):
        import os

        import torch
        import torchvision
        from torchvision import transforms

        tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints')
        self.resnet = torch.load(os.path.join(tpath, 'resnet101.model')).cuda()
        self.resnet.eval()

        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        self.classes = cloudburst.get('imagenet-classes')

    def resnet_model_gpu(self, table: Table) -> str:
        """
        AlexNet for image classification on ImageNet
        """
        import torch

        inputs = []
        for row in table.get():
            img = self.transforms(row['img'])
            inputs.append(img)

        inputs = torch.stack(inputs, dim=0).cuda()
        output = self.resnet(inputs)
        _, indices = torch.sort(output, descending=True)
        indices = indices.cpu().detach().numpy()

        result = []
        for idx_set in indices:
            index = idx_set[0]
            result.append(self.classes[index])

        return result

    def resnet_init_cpu(self, cloudburst):
        import os

        import torch
        import torchvision
        from torchvision import transforms

        tpath = os.path.join(os.getenv('TORCH_HOME'), 'checkpoints')
        self.resnet = torch.load(os.path.join(tpath, 'resnet101.model'))

        self.resnet.eval()

        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        self.classes = cloudburst.get('imagenet-classes')

    def resnet_model_cpu(self, table: Table) -> str:
        """
        AlexNet for image classification on ImageNet
        """
        import torch

        inputs = []
        for row in table.get():
            img = self.transforms(row['img'])
            inputs.append(img)

        inputs = torch.stack(inputs, dim=0)
        output = self.resnet(inputs)
        _, indices = torch.sort(output, descending=True)
        indices = indices.detach().numpy()

        result = []
        for idx_set in indices:
            index = idx_set[0]
            result.append(self.classes[index])

        return result

    print(f'Creating flow with size {batch_size} batches.')

    flow = Flow('batching-benchmark', FlowType.PUSH, cloudburst)
    if gpu:
        flow.map(resnet_model_gpu,
                 init=resnet_init_gpu,
                 names=['class'],
                 gpu=True,
                 batching=True)
    else:
        flow.map(resnet_model_cpu,
                 init=resnet_init_cpu,
                 names=['class'],
                 batching=True)

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('img', NumpyType)])
    img = np.array(Image.open('panda.jpg').convert('RGB').resize((224, 224)))

    inp.insert([img])

    kvs = cloudburst.kvs_client

    if gpu:
        print('Starting GPU warmup...')
        for _ in range(50):
            flow.run(inp).get()
        print('Finished warmup...')

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        futs = []
        for _ in range(batch_size):
            futs.append(flow.run(inp))
        pending = set([fut.obj_id for fut in futs])

        # Break these apart to batch the KVS get requests.
        start = time.time()
        while len(pending) > 0:
            get_start = time.time()
            response = kvs.get(list(pending))

            for key in response:
                if response[key] is not None:
                    pending.discard(key)

        end = time.time()
        latencies.append(end - start)

    compute_time = np.mean(latencies) * num_requests
    tput = (batch_size * num_requests) / (compute_time)
    print('THROUGHPUT: %.2f' % (tput))
    print_latency_stats(latencies, 'E2E')
예제 #21
0
            def run(self, _, on, how, left, right):
                serialized = False
                if type(left) == bytes:
                    left = deserialize(left)
                    right = deserialize(right)
                    serialized = True

                # Note: We currently don't support batching with custom
                # seriralization for joins. Shouldn't be hard to implement but
                # skipping it for expediency.
                batching = False
                if type(left) == list:
                    batching = True
                    _, left = merge_tables(left)
                    mappings, right = merge_tables(right)

                new_schema = merge_schema(left.schema, right.schema)
                result = Table(new_schema)
                ljoin = (how == 'left')
                ojoin = (how == 'outer')

                # Track whether each right row has been inserted for outer
                # joins.
                rindex_map = {}

                for lrow in left.get():
                    lrow_inserted = False

                    idx = 0
                    for rrow in right.get():
                        if lrow[on] == rrow[on]:
                            new_row = merge_row(lrow, rrow, new_schema)
                            result.insert(new_row)
                            lrow_inserted = True

                            rindex_map[idx] = True
                            idx += 1

                    if not lrow_inserted and (ljoin or ojoin):
                        rvals = [None] * len(right.schema)
                        rrow = Row(right.schema, rvals, lrow[Row.qid_key])
                        new_row = merge_row(lrow, rrow, new_schema)
                        result.insert(new_row)

                if ojoin:
                    idx = 0
                    for row in right.get():
                        if idx not in rindex_map:
                            lvals = [None] * len(left.schema)
                            lrow = Row(left.schema, lvals, row[Row.qid_key])
                            new_row = merge_row(lrow, row, new_schema)
                            result.insert(new_row)

                        idx += 1

                if serialized:
                    result = serialize(result)

                if batching:
                    result = demux_tables(result, mappings)

                return result
예제 #22
0
    flow = optimize(flow, rules=optimize_rules)

    print('Deploying flow...')
    flow.deploy()

    local = args.local[0].lower() == 'true'
    if local:
        run(flow, cloudburst, args.requests[0], local)
    else:
        flow.cloudburst = None  # Hack to serialize and send flow.
        queue = [flow]
        while len(queue) > 0:
            op = queue.pop(0)
            op.cb_fn = None

            queue.extend(op.downstreams)

        sockets = []

        benchmark_ips = []
        with open('benchmarks.txt', 'r') as f:
            benchmark_ips = [line.strip() for line in f.readlines()]

        sample_input = Table([('img', NumpyType)])
        img = np.array(
            Image.open('panda.jpg').convert('RGB').resize((224, 224)))
        sample_input.insert([img])

        run_distributed_benchmark(flow, args.requests[0], 'cascade',
                                  args.threads[0], benchmark_ips, sample_input)
예제 #23
0
    a_index = predict_row['alexnet_index']
    a_perc = predict_row['alexnet_perc']
    r_index = predict_row['resnet_index']
    r_perc = predict_row['resnet_perc']
    all_percentages = (a_perc + r_perc) / 2
    indices = np.argsort(all_percentages)[::-1]
    return classes[indices[0]]


import base64
import sys

from cloudburst.client.client import CloudburstConnection

table = Table([('img', StrType)])
img = base64.b64encode(open('panda.jpg', "rb").read()).decode('ascii')

table.insert([img])

cloudburst = CloudburstConnection(sys.argv[1], '3.226.122.35')
flow = Flow('ensemble-flow', FlowType.PUSH, cloudburst)
img = flow.map(transform, init=transform_init, names=['img'])

anet = img.map(alexnet_model, init=alexnet_init, names=['alexnet_index', 'alexnet_perc'])
rnet = img.map(resnet_model, init=resnet_init, names=['resnet_index', 'resnet_perc'])
anet.join(rnet).map(ensemble_predict, names=['class'])

flow.deploy()

from cloudburst.server.benchmarks.utils import print_latency_stats
예제 #24
0
    #     product_set = np.random.randn(2500, 512)
    #     key = 'category-' + str(i)
    #     cloudburst.put_object(key, product_set)

    print('Deploying flow...')
    flow.deploy()

    print('Starting warmup phase...')
    for i in range(NUM_PRODUCT_SETS):
        if i % 100 == 0:
            print(f'On warmup {i}...')
        uid = np.random.randint(NUM_USERS)
        recent = np.array([i, 0, 0, 0, 0])

        inp = Table([('user', StrType), ('recent', NumpyType)])
        inp.insert([str(uid), recent])

        flow.run(inp).get()

    print('Starting benchmark...')

    local = args.local[0].lower() == 'true'
    if local:
        run(flow, cloudburst, args.requests[0], local)
    else:
        flow.cloudburst = None  # Hack to serialize and send flow.
        queue = [flow]
        while len(queue) > 0:
            op = queue.pop(0)
            op.cb_fn = None
예제 #25
0
cloudburst.list()

import random
import string
salt = "".join(random.choices(string.ascii_letters, k=6))

print("Running sanity check")
cloud_sq = cloudburst.register(lambda _, x: x * x, "square-2"+salt)
print(cloud_sq(2).get())
cloudburst.delete_dag("dag")
cloudburst.register_dag("dag", ["square-2"+salt], [])
print(cloudburst.call_dag("dag", {"square-2"+salt: [2]}).get())

# 1 / 0
print("Running example flow")
dataflow = Flow("example-flow"+salt, FlowType.PUSH, cloudburst)
dataflow.map(map_fn, names=["sum"]).filter(filter_fn)

table = Table([("a", IntType), ("b", IntType)])

table.insert([1, 2])
table.insert([1, 3])
table.insert([1, 4])

dataflow.register()
dataflow.deploy()

print(dataflow)
print("deployed")
print(dataflow.run(table).get())
예제 #26
0
def transform_batch(self, table: Table) -> np.ndarray:
    return [self.transform(row['img']).detach().numpy() for row in table.get()]
예제 #27
0
            def run(self, cloudburst, fn, fntype, col, names, inp):
                # Merge all of the tables.
                serialized = False
                batching = self.batching and isinstance(inp, list)
                if batching:
                    if type(inp[0]) == bytes:
                        inp = [deserialize(tbl) for tbl in inp]
                        serialized = True

                    # inp will be a list of Tables. If it not, this is part of
                    # a MultiOperator, and everything is taken care of for us.
                    merged, mappings = merge_tables(inp)
                    inp = merged

                    # This will all be repeated because of the way Cloudburst's
                    # batching works, so we just pick the first one. But we
                    # check because even with batching enabled, in a multi
                    # operator, we will not have to deal with this.
                    if type(fn) == list:
                        fn = fn[0]
                    if type(fntype) == list:
                        fntype = fntype[0]
                    if type(col) == list:
                        col = col[0]
                    if type(names) == list and type(names[0]) == list:
                        names = names[0]
                else:
                    if type(inp) == bytes:
                        inp = deserialize(inp)
                        serialized = True

                schema = []
                if col is None:
                    if len(names) != 0:
                        schema = list(zip(names, fntype.ret))
                    else:
                        for i in range(len(fntype.ret)):
                            schema.append((str(i), fntype.ret[i]))
                else:
                    for name, tp in inp.schema:
                        if name != col:
                            schema.append((name, tp))
                        else:
                            if len(names) != 0:
                                schema.append((names[0], fntype.ret[0]))
                            else:
                                schema.append((name, fntype.ret[0]))

                if isinstance(inp, GroupbyTable):
                    result = GroupbyTable(schema, inp.col)
                    for group, gtable in inp.get():
                        result.add_group(group, self.run(fn, fntype, col, gtable))
                else:
                    result = Table(schema)

                    if self.batching or self.multi:
                        res = fn(self, inp)
                        for val in res:
                            if type(val) == tuple:
                                val = list(val)
                            elif type(val) != list:
                                val = [val]

                            result.insert(val)
                    else:
                        for row in inp.get():
                            if col is None:
                                vals = fn(self, row)
                                if type(vals) == tuple:
                                    vals = list(vals)
                                elif type(vals) != list:
                                    vals = [vals]

                                result.insert(vals, row[Row.qid_key])
                            else:
                                val = fn(self, row[col])
                                new_vals = []
                                for name, _ in inp.schema:
                                    if name == col:
                                        new_vals.append(val)
                                    else:
                                        new_vals.append(row[name])

                                result.insert(new_vals, row[Row.qid_key])

                if batching: # Unmerge all the tables.
                    tables = demux_tables(result, mappings)
                    result = tables

                    if serialized:
                        result = [serialize(tbl) for tbl in result]
                else:
                    if serialized:
                        result = serialize(result)

                if self.send_broadcast:
                    import uuid
                    uid = str(uuid.uuid4())
                    cloudburst.put(uid, result)
                    result = uid

                return result
예제 #28
0
                        dest='benchmarks',
                        required=True)

    args = parser.parse_args()

    benchmark_ips = []
    with open(args.benchmarks[0], 'r') as f:
        benchmark_ips = f.readlines()

    cloudburst = CloudburstConnection(args.cloudburst[0], args.ip[0])
    print('Successfully connected to Cloudburst')

    flow = Flow('scaling-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, names=['val']).map(stage2, names=['val'])

    table = Table([('val', IntType)])

    table.insert([1])

    num_bench = len(benchmark_ips)
    num_start = int(start_percent * num_bench)

    flow.cloudburst = None  # Hack to serialize and send flow.
    queue = [flow]
    while len(queue) > 0:
        op = queue.pop(0)
        op.cb_fn = None

        queue.extend(op.downstreams)

    flow = cp.dumps(flow)
예제 #29
0
    flow = optimize(flow, rules=optimize_rules)

    print('Deploying flow...')
    flow.deploy()

    print('Starting benchmark...')

    local = args.local[0].lower() == 'true'
    if local:
        run(flow, cloudburst, args.requests[0], local)
    else:
        flow.cloudburst = None  # Hack to serialize and send flow.
        queue = [flow]
        while len(queue) > 0:
            op = queue.pop(0)
            op.cb_fn = None

            queue.extend(op.downstreams)

        sockets = []

        benchmark_ips = []
        with open('benchmarks.txt', 'r') as f:
            benchmark_ips = [line.strip() for line in f.readlines()]

        sample_input = Table([('classify', StrType), ('translate', StrType)])
        sample_input.insert(['Je m\'appelle Pierre.', 'How are you?'])

        run_distributed_benchmark(flow, args.requests[0], 'nmt',
                                  args.threads[0], benchmark_ips, sample_input)