Beispiel #1
0
    def test_exec_causal_dag_sink(self):
        '''
        Tests that the last function in a causal DAG executes correctly and
        stores the result in the KVS. Also checks to make sure that causal
        metadata is properly created.
        '''
        def func(_, x):
            return x * x

        fname = 'square'
        arg = 2
        dag = create_linear_dag([func], [fname], self.kvs_client, 'dag',
                                MultiKeyCausalLattice)
        schedule, triggers = self._create_fn_schedule(dag, arg, fname, [fname],
                                                      MULTI)
        schedule.output_key = 'output_key'
        schedule.client_id = '12'

        # We know that there is only one trigger. We populate dependencies
        # explicitly in this trigger message to make sure that they are
        # reflected in the final result.
        kv = triggers['BEGIN'].dependencies.add()
        kv.key = 'dependency'
        DEFAULT_VC.serialize(kv.vector_clock)

        exec_dag_function(self.pusher_cache, self.kvs_client, triggers, func,
                          schedule, self.user_library, {}, {})

        # Assert that there have been 0 messages sent.
        self.assertEqual(len(self.socket.outbox), 0)

        # Retrieve the result and check its value and its metadata.
        result = self.kvs_client.get(schedule.output_key)[schedule.output_key]
        self.assertEqual(type(result), MultiKeyCausalLattice)

        # Check that the vector clock of the output corresponds ot the client
        # ID.
        self.assertEqual(result.vector_clock,
                         VectorClock({schedule.client_id: 1}, True))

        # Check that the dependencies of the output match those specified in
        # the trigger.
        self.assertEqual(len(result.dependencies.reveal()), 1)
        self.assertTrue(kv.key in result.dependencies.reveal())
        self.assertEqual(result.dependencies.reveal()[kv.key], DEFAULT_VC)

        # Check that the output is equal to a local function execution.
        result = serializer.load_lattice(result)[0]
        self.assertEqual(result, func('', arg))
Beispiel #2
0
def _exec_dag_function_causal(pusher_cache, kvs, triggers, function, schedule,
                              user_lib):
    schedule = schedule[0]
    triggers = triggers[0]

    fname = schedule.target_function
    fargs = list(schedule.arguments[fname].values)

    key_version_locations = {}
    dependencies = {}

    for trigger in triggers:
        fargs += list(trigger.arguments.values)

        # Combine the locations of upstream cached key versions from all
        # triggers.
        for addr in trigger.version_locations:
            if addr in key_version_locations:
                key_version_locations[addr].extend(
                    trigger.version_locations[addr].key_versions)
            else:
                key_version_locations[addr] = list(
                    trigger.version_locations[addr])

        # Combine the dependency sets from all triggers.
        for dependency in trigger.dependencies:
            vc = VectorClock(dict(dependency.vector_clock), True)
            key = dependency.key

            if key in dependencies:
                dependencies[key].merge(vc)
            else:
                dependencies[key] = vc

    fargs = [serializer.load(arg) for arg in fargs]

    result = _exec_func_causal(kvs, function, fargs, user_lib, schedule,
                               key_version_locations, dependencies)

    this_ref = None
    for ref in schedule.dag.functions:
        if ref.name == fname:
            this_ref = ref  # There must be a match.

    success = True
    if this_ref.type == MULTIEXEC:
        if serializer.dump(result) in this_ref.invalid_results:
            return False, False

    # Create a new trigger with the schedule ID and results of this execution.
    new_trigger = _construct_trigger(schedule.id, fname, result)

    # Serialize the key version location information into this new trigger.
    for addr in key_version_locations:
        new_trigger.version_locations[addr].keys.extend(
            key_version_locations[addr])

    # Serialize the set of dependency versions for causal metadata.
    for key in dependencies:
        dep = new_trigger.dependencies.add()
        dep.key = key
        dependencies[key].serialize(dep.vector_clock)

    is_sink = True
    for conn in schedule.dag.connections:
        if conn.source == fname:
            is_sink = False
            new_trigger.target_function = conn.sink

            dest_ip = schedule.locations[conn.sink]
            sckt = pusher_cache.get(sutils.get_dag_trigger_address(dest_ip))
            sckt.send(new_trigger.SerializeToString())

    if is_sink:
        logging.info('DAG %s (ID %s) completed in causal mode; result at %s.' %
                     (schedule.dag.name, schedule.id, schedule.output_key))

        vector_clock = {}
        okey = schedule.output_key
        if okey in dependencies:
            prev_count = 0
            if schedule.client_id in dependencies[okey]:
                prev_count = dependencies[okey][schedule.client_id]

            dependencies[okey].update(schedule.client_id, prev_count + 1)
            dependencies[okey].serialize(vector_clock)
            del dependencies[okey]
        else:
            vector_clock = {schedule.client_id: 1}

        # Serialize result into a MultiKeyCausalLattice.
        vector_clock = VectorClock(vector_clock, True)
        result = serializer.dump(result)
        dependencies = MapLattice(dependencies)
        lattice = MultiKeyCausalLattice(vector_clock, dependencies,
                                        SetLattice({result}))

        succeed = kvs.causal_put(schedule.output_key, lattice,
                                 schedule.client_id)
        while not succeed:
            succeed = kvs.causal_put(schedule.output_key, lattice,
                                     schedule.client_id)

        # Issues requests to all upstream caches for this particular request
        # and asks them to garbage collect pinned versions stored for the
        # context of this request.
        for cache_addr in key_version_locations:
            gc_address = utils.get_cache_gc_address(cache_addr)
            sckt = pusher_cache.get(gc_address)
            sckt.send_string(schedule.client_id)

    return is_sink, [success]
Beispiel #3
0
# For message sending via the user library.
RECV_INBOX_PORT = 5500

STATISTICS_REPORT_PORT = 7006

# Create a generic error response protobuf.
error = GenericResponse()
error.success = False

# Create a generic success response protobuf.
ok = GenericResponse()
ok.success = True
ok_resp = ok.SerializeToString()

# Create a default vector clock for keys that have no dependencies.
DEFAULT_VC = VectorClock({'base': MaxIntLattice(1)})


def get_func_kvs_name(fname):
    return FUNC_PREFIX + fname


def get_dag_trigger_address(address):
    ip, tid = address.split(':')
    return 'tcp://' + ip + ':' + str(int(tid) + DAG_EXEC_PORT)


def get_statistics_report_address(mgmt_ip):
    return 'tcp://' + mgmt_ip + ':' + str(STATISTICS_REPORT_PORT)

Beispiel #4
0
    def test_exec_causal_dag_non_sink_with_ref(self):
        '''
        Creates and executes a non-sink function in a causal-mode DAG. This
        version accesses a KVS key, so we ensure that data is appropriately
        cached and the metadata is passed downstream.
        '''

        # Create two functions intended to be used in sequence.
        def incr(_, x):
            x + 1

        iname = 'incr'

        def square(_, x):
            return x * x

        sname = 'square'

        # Put tthe argument into the KVS.
        arg_name = 'arg'
        arg_value = 1
        arg = serializer.dump_lattice(arg_value, MultiKeyCausalLattice)
        self.kvs_client.put(arg_name, arg)

        # Create a DAG and a trigger for the first function in the DAG.
        dag = create_linear_dag([incr, square], [iname, sname],
                                self.kvs_client, 'dag', MultiKeyCausalLattice)
        schedule, triggers = self._create_fn_schedule(
            dag, CloudburstReference(arg_name, True), iname, [iname, sname],
            MULTI)

        exec_dag_function(self.pusher_cache, self.kvs_client, triggers, incr,
                          schedule, self.user_library, {}, {})

        # Assert that there has been a message sent.
        self.assertEqual(len(self.pusher_cache.socket.outbox), 1)

        # Extract that message and check its contents.
        trigger = DagTrigger()
        trigger.ParseFromString(self.pusher_cache.socket.outbox[0])
        self.assertEqual(trigger.id, schedule.id)
        self.assertEqual(trigger.target_function, sname)
        self.assertEqual(trigger.source, iname)
        self.assertEqual(len(trigger.arguments.values), 1)

        # Check the metadata of the key that is cached here after execution.
        locs = trigger.version_locations
        self.assertEqual(len(locs), 1)
        self.assertTrue(self.ip in locs.keys())
        self.assertEqual(len(locs[self.ip].keys), 1)
        kv = locs[self.ip].keys[0]
        self.assertEqual(kv.key, arg_name)
        self.assertEqual(VectorClock(dict(kv.vector_clock), True),
                         arg.vector_clock)

        # Check the metatada of the causal dependency passed downstream.
        self.assertEqual(len(trigger.dependencies), 1)
        kv = trigger.dependencies[0]
        self.assertEqual(kv.key, arg_name)
        self.assertEqual(VectorClock(dict(kv.vector_clock), True),
                         arg.vector_clock)

        val = serializer.load(trigger.arguments.values[0])
        self.assertEqual(val, incr('', arg_value))
Beispiel #5
0
    def _deserialize(self, tup):
        if tup.lattice_type == LWW:
            # Deserialize last-writer-wins lattices
            val = LWWValue()
            val.ParseFromString(tup.payload)

            return LWWPairLattice(val.timestamp, val.value)
        elif tup.lattice_type == SET:
            # Deserialize unordered-set lattices
            s = SetValue()
            s.ParseFromString(tup.payload)

            result = set()
            for k in s.values:
                result.add(k)

            return SetLattice(result)
        elif tup.lattice_type == ORDERED_SET:
            # Deserialize ordered-set lattices
            res = ListBasedOrderedSet()
            val = SetValue()
            val.ParseFromString(tup.payload)
            for v in val.values:
                res.insert(v)

            return OrderedSetLattice(res)

        elif tup.lattice_type == SINGLE_CAUSAL:
            # Deserialize single-key causal lattices
            val = SingleKeyCausalValue()

            # Deserialize the vector_clock stored in the Protobuf into a
            # MapLattice, where each value is a MaxIntLattice of the VC
            # counter.
            vc = VectorClock(val.vector_clock, True)

            # Create a SetLattice with the value(s) stored by this lattice.
            values = set()
            for v in val.values():
                values.add(v)

            return SingleKeyCasaulLattice(vc, SetLattice(values))

        elif tup.lattice_type == MULTI_CAUSAL:
            # Deserialize multi-key causal lattices
            val = MultiKeyCausalValue()

            # Deserialize the vector_clock stored in the Protobuf into a
            # MapLattice, where each value is a MaxIntLattice of the VC
            # counter.
            vc = VectorClock(val.vector_clock, True)

            # Deserialize the set of dependencies of this key into a MapLattice
            # where the keys are names of other KVS keys and the values are
            # MapLattices that have the vector clocks for those keys.
            dep_map = {}
            for kv in val.dependencies:
                key = kv.key
                dep_map[key] = VectorClock(kv.vector_clock, True)

            # Create a SetLattice with the value(s) stored by this lattice.
            values = set()
            for v in val.values():
                values.add(v)

            dependencies = MapLattice(dep_map)
            value = SetLattice(values)

            return MultiKeyCausalLattice(vc, dependencies, value)
        else:
            raise ValueError('Unsupported type cannot be serialized: ' +
                             str(tup.lattice_type))