コード例 #1
0
    def add_ops(self, roots, after_exop=None):
        """
        Add exops needed to compute ops in roots.

        Args:
            roots: A collection of ops whose values are needed.
            after_exop: Where in the list to add the ops. Defaults to the end.

        """
        if after_exop is None:
            after_exop = self.prev_exop

        # Get computation graph ops that are already inserted.
        available = OrderedSet()
        counts = dict()
        parents = defaultdict(OrderedSet)
        ready = OrderedSet()

        # Some ops in roots may have been replaced by other ops; if so, they
        # are in the graph already, although maybe not in this block. Get the
        # op from the exop so we have the current version.
        for op in roots:
            exop = self.computation_decl.get_exop(op, None)
            if exop is not None:
                op = exop.op
            available.add(op)

        while available:
            op = available.pop()
            if op in counts or op in self.all_ops:
                continue

            nchildren = 0
            for child in op.all_deps:
                exop = self.computation_decl.get_exop(child, None)
                if exop is not None:
                    child = exop.op
                if child not in self.all_ops:
                    parents[child].add(op)
                    available.add(child)
                    nchildren += 1
            if nchildren > 0:
                counts[op] = nchildren
            else:
                ready.add(op)

        while ready:
            op = ready.pop()
            after_exop = self.add_op(op, after_exop=after_exop)
            for p in parents.get(op, []):
                count = counts[p] - 1
                if count == 0:
                    ready.add(p)
                    del counts[p]
                else:
                    counts[p] = count
        if len(counts) > 0:
            raise ValueError("Graph not a DAG")
コード例 #2
0
class OrderedSetQueue(Queue):
    """
        https://stackoverflow.com/questions/16506429/check-if-element-is-already-in-a-queue
    """
    def _init(self, maxsize):
        # print("b" * 200)
        self.cacheLock = Lock()
        self.queue = OrderedSet()
    def _put(self, item):
        with self.cacheLock:
            self.queue.add(item)
    def _get(self):
        with self.cacheLock:
            return self.queue.pop(last=False)
    def __contains__(self, item):
        with self.cacheLock:
            with self.mutex:
                return item in self.queue
    def _qsize(self):
        with self.cacheLock:
            return len(self.queue)
    def size(self):
        return self.qsize()
    def toList(self):
        return queueToList(self, maxsize=self.maxsize)
コード例 #3
0
class Poller(object):
    def __init__(self):
        rospy.init_node('poller_node')
        self.rate = rospy.Rate(3)   # 3hz
        self.extractBasestationFromParams()
        self.createCommunicators()
        self.request_list = OrderedSet([])

    def createCommunicators(self):
        self.client = Client(10019)
        self.measurements_publisher = rospy.Publisher('measurements', MeasurementList, queue_size=10)
        self.request_subscriber = rospy.Subscriber("measurements_request", String, self.pushbackRequest)

    def extractBasestationFromParams(self):
        stations = rospy.get_param("/poller_node/basestations")
        self.storeBasestation(stations)

    def storeBasestation(self, stations):
        self.basestations = []
        for station in stations:
            self.basestations.append(Basestation(station[0], float(station[1]), float(station[2])))

    def pushbackRequest(self, msg):
        self.request_list.add(msg.data)

    def measurementsLoop(self):
        while not rospy.is_shutdown():
            while not self.request_list.isEmpty():
                station_address = self.request_list.pop()
                self.serveRequest(station_address)
            self.rate.sleep()

    def pollStation(self, station_address):
        return self.client.pollBasestation(station_address)

    def serveRequest(self, station_address):
        try:
            data = self.pollStation(station_address)
            if containsMeasurements(data):
                self.publishMeasuements(extractJson(data), station_address)
        except socket.error:
            pass

    def publishMeasuements(self, measurs, station):
        msg = MeasurementList()
        for el in measurs:
            msg.data.append(self.generateMeasurement(el))
        msg.basestation = station
        msg.header.stamp = rospy.Time.now()
        self.measurements_publisher.publish(msg)

    def generateMeasurement(self, element):
        tmp = Measurement()
        tmp.tag = element['id_tag'].encode('utf-8')
        tmp.measurement = int(element['rssid'])
        return tmp
コード例 #4
0
class SetStack(queue.Queue):
    def _init(self, maxsize):
        self.queue = OrderedSet()

    def _put(self, item):
        self.queue.add(item)

    def _get(self):
        return self.queue.pop(True)

    def hasElement(self,element):
        return element in self.queue
コード例 #5
0
class SimpleBreadthFirstLearner(TemplateLearner):
    def __init__(self,
                 solver_instance: LPSolver,
                 eval_fn: EvalFunction,
                 max_body_literals=4,
                 do_print=False):
        super().__init__(solver_instance, eval_fn, do_print=do_print)
        self._max_body_literals = max_body_literals

    def initialise_pool(self):
        self._candidate_pool = OrderedSet()

    def put_into_pool(
            self, candidates: typing.Union[Clause, Procedure,
                                           typing.Sequence]) -> None:
        if isinstance(candidates, Clause):
            self._candidate_pool.add(candidates)
        else:
            self._candidate_pool |= candidates

    def get_from_pool(self) -> Clause:
        return self._candidate_pool.pop(0)

    def stop_inner_search(self, eval: typing.Union[int, float], examples: Task,
                          clause: Clause) -> bool:
        if eval > 0:
            return True
        else:
            return False

    def process_expansions(
            self, examples: Task, exps: typing.Sequence[Clause],
            hypothesis_space: TopDownHypothesisSpace
    ) -> typing.Sequence[Clause]:
        # eliminate every clause with more body literals than allowed
        exps = [cl for cl in exps if len(cl) <= self._max_body_literals]

        # check if every clause has solutions
        exps = [(cl, self._solver.has_solution(*cl.get_body().get_literals()))
                for cl in exps]
        new_exps = []

        for ind in range(len(exps)):
            if exps[ind][1]:
                # keep it if it has solutions
                new_exps.append(exps[ind][0])
            else:
                # remove from hypothesis space if it does not
                hypothesis_space.remove(exps[ind][0])

        return new_exps
コード例 #6
0
ファイル: hetr_utils.py プロジェクト: leonllm/ngraph
def find_recvs(fro):
    # Find all the Receivers fro depends on
    visit = OrderedSet()
    recvs = OrderedSet()
    visit.add(fro)
    while visit:
        v = visit.pop()
        if isinstance(v, RecvOp):
            recvs.add(v)
            visit |= get_iterable(v.send_node())
        else:
            if hasattr(v, 'args'):
                visit.update(v.args)

    return recvs
コード例 #7
0
    class __TransactionSet:
        def __init__(self):
            """Initialize transaction store."""
            self.store = OrderedSet()

        def add(self, transaction):
            self.store.add(transaction)

        def add_multiple(self, transaction_list):
            """Add multiple transactions to the set.

            Since the transactions are re-added to the set (they were in it once),
            we add them at the front of the existing set.
            """
            transaction_list = OrderedSet(transaction_list)
            self.store = transaction_list.union(self.store)

        def contains(self, transaction):
            return self.store.__contains__(transaction)

        def pop(self):
            """Remove and return a transaction from the set."""
            try:
                return self.store.pop(0)
            # Catch KeyError if set is empty
            except KeyError:
                return None

        def discard(self, transaction):
            """Remove the transaction if it was present in the set."""
            self.store.discard(transaction)

        def discard_multiple(self, transaction_list):
            """Remove multiple transactions from the set."""
            [self.discard(tx) for tx in transaction_list]

        def clear(self):
            """Remove all transactions from the set."""
            self.store.clear()

        def __len__(self):
            return len(self.store)

        def __iter__(self):
            yield from self.store

        def __repr__(self):
            return self.store.__repr__()
コード例 #8
0
ファイル: hetr_utils.py プロジェクト: leonllm/ngraph
def comm_path_exists(fro, to):
    """
    Find a path from fro to to, including paths non-explicit edges from
    a Receiver to its Sender.

    Note- this is a non-standard traversal, as most traversals stop at a Receiver.
    """

    # TODO: Issue #1865 does this correctly handle traversing multiple send-recv junctions
    # from fro to to?

    visit = OrderedSet(fro.args)
    visit.add(fro)
    while visit:
        v = visit.pop()
        if v == to:
            return True
        if isinstance(v, RecvOp):
            visit |= get_iterable(v.send_node())
        else:
            visit.update(v.args)

    return False
コード例 #9
0
class Propagator:
    MAX_REQUESTED_KEYS_TO_KEEP = 1000

    def __init__(self, metrics: MetricsCollector = NullMetricsCollector()):
        self.requests = Requests()
        self.requested_propagates_for = OrderedSet()
        self.metrics = metrics

    # noinspection PyUnresolvedReferences
    def propagate(self, request: Request, clientName):
        """
        Broadcast a PROPAGATE to all other nodes

        :param request: the REQUEST to propagate
        """
        if self.requests.has_propagated(request, self.name):
            logger.trace("{} already propagated {}".format(self, request))
        else:
            with self.metrics.measure_time(MetricsName.SEND_PROPAGATE_TIME):
                self.requests.add_propagate(request, self.name)
                propagate = self.createPropagate(request, clientName)
                logger.debug("{} propagating request {} from client {}".format(
                    self, request.key, clientName),
                             extra={
                                 "cli": True,
                                 "tags": ["node-propagate"]
                             })
                self.send(propagate)

    @staticmethod
    def createPropagate(request: Union[Request, dict],
                        client_name) -> Propagate:
        """
        Create a new PROPAGATE for the given REQUEST.

        :param request: the client REQUEST
        :return: a new PROPAGATE msg
        """
        if not isinstance(request, (Request, dict)):
            logger.error(
                "{}Request not formatted properly to create propagate".format(
                    THREE_PC_PREFIX))
            return
        logger.trace("Creating PROPAGATE for REQUEST {}".format(request))
        request = request.as_dict if isinstance(request, Request) else \
            request
        if isinstance(client_name, bytes):
            client_name = client_name.decode()
        return Propagate(request, client_name)

    # noinspection PyUnresolvedReferences
    def canForward(self, request: Request):
        """
        Determine whether to forward client REQUESTs to replicas, based on the
        following logic:

        - If exactly f+1 PROPAGATE requests are received, then forward.
        - If less than f+1 of requests then probably there's no consensus on the
            REQUEST, don't forward.
        - If more than f+1 then already forwarded to replicas, don't forward

        Even if the node hasn't received the client REQUEST itself, if it has
        received enough number of PROPAGATE messages for the same, the REQUEST
        can be forwarded.

        :param request: the client REQUEST
        """

        if self.requests.forwarded(request):
            return 'already forwarded'

        # If not enough Propagates, don't bother comparing
        if not self.quorums.propagate.is_reached(self.requests.votes(request)):
            return 'not finalised'

        req = self.requests.req_with_acceptable_quorum(request,
                                                       self.quorums.propagate)
        if req:
            self.requests.set_finalised(req)
            return None
        else:
            return 'not finalised'

    # noinspection PyUnresolvedReferences
    def forward(self, request: Request):
        """
        Forward the specified client REQUEST to the other replicas on this node

        :param request: the REQUEST to propagate
        """
        key = request.key
        num_replicas = self.replicas.num_replicas
        logger.debug('{} forwarding request {} to {} replicas'.format(
            self, key, num_replicas))
        self.replicas.pass_message(ReqKey(key))
        self.monitor.requestUnOrdered(key)
        self.requests.mark_as_forwarded(request, num_replicas)

    # noinspection PyUnresolvedReferences
    def recordAndPropagate(self, request: Request, clientName):
        """
        Record the request in the list of requests and propagate.

        :param request:
        :param clientName:
        """
        self.requests.add(request)
        self.propagate(request, clientName)
        self.tryForwarding(request)

    def tryForwarding(self, request: Request):
        """
        Try to forward the request if the required conditions are met.
        See the method `canForward` for the conditions to check before
        forwarding a request.
        """
        cannot_reason_msg = self.canForward(request)
        if cannot_reason_msg is None:
            # If haven't got the client request(REQUEST) for the corresponding
            # propagate request(PROPAGATE) but have enough propagate requests
            # to move ahead
            self.forward(request)
        else:
            logger.trace("{} not forwarding request {} to its replicas "
                         "since {}".format(self, request, cannot_reason_msg))

    def request_propagates(self, req_keys):
        """
        Request PROPAGATEs for the given request keys. Since replicas can
        request PROPAGATEs independently of each other, check if it has
        been requested recently
        :param req_keys:
        :return:
        """
        i = 0
        for digest in req_keys:
            if digest not in self.requested_propagates_for:
                if digest not in self.requests:
                    # Request from all nodes
                    self.request_msg(PROPAGATE, {f.DIGEST.nm: digest})
                else:
                    # Request from nodes that didn't send
                    send_to = [
                        conn for conn in self.nodestack.connecteds
                        if conn not in self.requests[digest].propagates.keys()
                    ]
                    self.request_msg(PROPAGATE, {f.DIGEST.nm: digest},
                                     frm=send_to)

                self._add_to_recently_requested(digest)
                i += 1
            else:
                logger.debug(
                    '{} already requested PROPAGATE recently for {}'.format(
                        self, digest))
        return i

    def _add_to_recently_requested(self, key):
        while len(self.requested_propagates_for
                  ) > self.MAX_REQUESTED_KEYS_TO_KEEP:
            self.requested_propagates_for.pop(last=False)
        self.requested_propagates_for.add(key)
コード例 #10
0
def main():
    f1 = open("processeddata_average.csv", "a")
    f2 = open("processeddata_covered.csv", "a")
    prolog.consult(
        "../inputfiles/StringTransformations_BackgroundKnowledge.pl")
    amount_of_clauses = 500
    chosen_pred = "t"
    minlength = 0
    max_factor_per_length = 4

    _, predicates = createKnowledge(
        "../inputfiles/StringTransformations_BackgroundKnowledge.pl",
        chosen_pred)
    train = readPositiveOfType("../inputfiles/StringTransformationProblems",
                               "train_task")

    totalextension = []
    filtered_predicates = []

    for predicate in predicates:
        if predicate.name not in ["s", chosen_pred
                                  ] and predicate not in filtered_predicates:
            totalextension.append(
                lambda x, predicate=predicate: plain_extension(
                    x, predicate, connected_clauses=True))
            filtered_predicates.append(predicate)

    # create the hypothesis space
    hs = TopDownHypothesisSpace(
        primitives=totalextension,
        head_constructor=c_pred("train_task", 1),
        # TODO connected clause kan miss problemen hebben da ie geen nieuwe vars wil introducen, kweet eigl nie (check)
        expansion_hooks_keep=[lambda x, y: connected_clause(x, y)],
        expansion_hooks_reject=[
            lambda x, y: has_singleton_vars(x, y),
            lambda x, y: has_duplicated_literal(x, y),
            # TODO check op fouten lol :p
            lambda x, y: has_all_same_vars_in_literal(x, y)
        ])
    clauses_used = 0
    possible_candidates = OrderedSet()
    put_into_pool(possible_candidates, hs.get_current_candidate())
    amount_of_length = 0
    previouslength = 0

    while clauses_used < amount_of_clauses:
        current_cand = possible_candidates.pop(0)
        # expand the candidate
        _ = hs.expand(current_cand)
        # this is important: .expand() method returns candidates only the first time it is called;
        #     if the same node is expanded the second time, it returns the empty list
        #     it is safer than to use the .get_successors_of method
        exps = hs.get_successors_of(current_cand)
        expa = process_expansions(exps, hs)
        put_into_pool(possible_candidates, expa)

        if previouslength < len(current_cand):
            amount_of_length = 0

        if random.random() < 0.5 and len(
                current_cand
        ) > minlength and amount_of_length < max_factor_per_length * len(
                current_cand):
            for problem in train:
                for example in train.get(problem):
                    if random.random() < 0.25:
                        input = get_input_data(current_cand, example,
                                               filtered_predicates)
                        output, output2 = get_output_data(
                            current_cand, expa, example, filtered_predicates)
                        f1.write(input + "," + output + "\n")
                        f2.write(input + "," + output2 + "\n")
            clauses_used += 1
            amount_of_length += 1
        previouslength = len(current_cand)
コード例 #11
0
class DownloadManager(object):
    def __init__(self):
        # type: () -> None

        self.loop = asyncio.get_event_loop()
        self.timeout = aiohttp.ClientTimeout(total=None, sock_read=60)
        self.session = aiohttp.ClientSession(loop=self.loop,
                                             timeout=self.timeout,
                                             auto_decompress=False)
        self.concurrent_downloads = 3
        #self.sem = asyncio.Semaphore(1000)
        self.chunksize = 1024 * 1024  # file write buffer

        self.queue = OrderedSet()
        self.active = OrderedSet()
        self.done = OrderedSet()
        self.error = OrderedSet()

    def status(self):
        # type: () -> str

        total_active = sum(t.downloaded for t in self.active)
        total_done = sum(t.downloaded for t in self.done)
        total_error = sum(t.downloaded for t in self.error)

        return "Queued: {}, active: {}, done: {}, error: {}\nDownload active: {}, done: {}, error: {}".format(
            len(self.queue), len(self.active), len(self.done), len(self.error),
            total_active, total_done, total_error)

    def _enqueue(self, task, priority):
        # type: (DownloadTask, Any) -> None

        self.queue.add(task)

    def _start(self, task):
        # type: (DownloadTask, ) -> asyncio.Task

        self.active.add(task)
        atask = asyncio.ensure_future(self._download(task))
        return atask

    def _trystart(self):
        # type: () -> Optional[asyncio.Task]

        if len(self.active) < self.concurrent_downloads:
            try:
                task = self.queue.pop()
                return self._start(task)
            except KeyError:
                if not self.active:
                    logger.info("all done")
                    #self.loop.stop()
                    #task = asyncio.ensure_future(self._close())

        return None

    async def _download(self, task):
        # type: (DownloadTask, ) -> None

        task.start()
        #await asyncio.sleep(10)

        # send http head request first to check for range support

        try:

            #async with self.session.get(task.url, headers={"Range": "bytes=0-10"}) as response:
            async with self.session.get(task.url, headers={}) as response:
                stream = response.content
                try:
                    size = int(response.headers.get("content-length",
                                                    ""))  # type: Optional[int]
                except (ValueError, TypeError):
                    size = None

                accept_range = response.headers.get('Accept-Ranges',
                                                    'none').lower()

                if response.status == 200:  # range not supported
                    pass
                elif response.status == 206:  # range supported
                    if accept_range != "bytes":
                        raise RuntimeError(
                            "Only bytes content ranges are supported")
                    bytes_range = response.headers.get(
                        'Content-Range')  # 'bytes 0-10/46239'
                    raise RuntimeError(
                        "Range requests are not supported yet: {}".format(
                            bytes_range))

                with open(task.path, "wb", buffering=self.chunksize) as fw:
                    async for data in stream.iter_any():
                        task.downloaded += len(data)
                        fw.write(data)

                if size and size != task.downloaded:
                    print("incomplete", task.downloaded, "of", size)

        except asyncio.TimeoutError:
            self.error.add(task)
        else:
            self.done.add(task)

        task.done()
        self.active.remove(task)
        self._trystart()

    def download(self, url, path="tmp.txt", priority=0, force=False):
        # type: (str, str, int, bool) -> Optional[asyncio.Task]

        logger.info("starting download")
        task = DownloadTask(url, path)
        if force:
            return self._start(task)
        else:
            self._enqueue(task, priority)
            return self._trystart()

    async def _close(self):
        await self.session.close()
コード例 #12
0
class SimpleBreadthFirstLearner(TemplateLearner):
    def __init__(self, solver_instance: Prolog, max_body_literals=4):
        super().__init__(solver_instance)
        self._max_body_literals = max_body_literals

    def initialise_pool(self):
        self._candidate_pool = OrderedSet()

    def put_into_pool(
            self, candidates: typing.Union[Clause, Procedure,
                                           typing.Sequence]) -> None:
        if isinstance(candidates, Clause):
            self._candidate_pool.add(candidates)
        else:
            self._candidate_pool |= candidates

    def get_from_pool(self) -> Clause:
        return self._candidate_pool.pop(0)

    def evaluate(self, examples: Task, clause: Clause):
        pos, neg = examples.get_examples()
        numberofpositivecoverance = 0
        self._solver.assertz(clause)
        for example in pos:
            if self._solver.has_solution(example):
                numberofpositivecoverance += 1
        numberofnegativecoverance = 0
        for example in neg:
            if self._solver.has_solution(example):
                numberofnegativecoverance += 1
                # print(example)
        self._solver.retract(clause)
        if numberofnegativecoverance + numberofpositivecoverance == 0:
            return [0, 0]
        else:
            return [
                numberofpositivecoverance /
                (numberofpositivecoverance + numberofnegativecoverance) *
                (numberofpositivecoverance) / len(pos),
                numberofnegativecoverance
            ]

    def stop_inner_search(self, eval, examples: Task, clause: Clause) -> bool:
        if eval[1] > 0:
            return True
        else:
            return False

    def process_expansions(
            self, examples: Task, exps: typing.Sequence[Clause],
            hypothesis_space: TopDownHypothesisSpace
    ) -> typing.Sequence[Clause]:
        # eliminate every clause with more body literals than allowed
        exps = [cl for cl in exps if len(cl) <= self._max_body_literals]

        new_exps = []
        # check if every clause has solutions
        for cl in exps:
            y = self.evaluate(task, cl)
            if y[0] > 0:
                new_exps.append(cl)
            else:
                hypothesis_space.remove(cl)
        return new_exps
コード例 #13
0
ファイル: exop.py プロジェクト: rsumner31/ngraph
    def add_ops(self, roots, after_exop=None):
        """
        Add exops needed to compute ops in roots.

        Args:
            roots: A collection of ops whose values are needed.
            after_exop: Where in the list to add the ops. Defaults to the end.

        """
        if after_exop is None:
            after_exop = self.prev_exop

        # Get computation graph ops that are already inserted.
        available = OrderedSet()
        counts = dict()
        parents = defaultdict(OrderedSet)
        ready = OrderedSet()

        # Setting the environmental variable below to 0 can be used to disable toposort
        # with priorities and switch to naive algo in case something went wrong unexpectedly
        algo_num = int(os.getenv('NGRAPH_TOPOSORT_ALGO', 1))
        pqueue = PriorityQueue()
        op_counter = 0
        wait_order = 100000
        std_order = 2
        start_order = 1

        # Some ops in roots may have been replaced by other ops; if so, they
        # are in the graph already, although maybe not in this block. Get the
        # op from the exop so we have the current version.
        for op in roots:
            exop = self.computation_decl.get_exop(op, None)
            if exop is not None:
                op = exop.op
            available.add(op)

        while available:
            op = available.pop()
            if algo_num > 0:
                if 'priority' in op.metadata:
                    if op.metadata['priority'] == 'high':
                        op.metadata['order'] = start_order
                    else:
                        op.metadata['order'] = wait_order
                elif 'order' not in op.metadata:
                    op.metadata['order'] = std_order
            if op in counts or op in self.all_ops:
                continue

            nchildren = 0

            op_deps = op.all_deps
            if (isinstance(op, CPUMlslGatherRecvOp) or isinstance(
                    op, CPUMlslScatterRecvOp)) and op.send_node() in available:
                op_deps.add(op.send_node())
            for child in op_deps:
                exop = self.computation_decl.get_exop(child, None)
                if exop is not None:
                    child = exop.op
                if child not in self.all_ops:
                    parents[child].add(op)
                    available.add(child)
                    if algo_num > 0:
                        ch_order = child.metadata[
                            'order'] if 'order' in child.metadata else -1
                        new_order = op.metadata['order'] + 1
                        if 'priority' not in child.metadata and \
                                ('order' not in child.metadata or new_order < ch_order):
                            child.metadata['order'] = new_order
                    nchildren += 1
            if nchildren > 0:
                counts[op] = nchildren
            else:
                if op not in ready:
                    ready.add(op)
                    if algo_num > 0:
                        op_counter = op_counter - 1
                        pqueue.put((op.metadata['order'], op_counter, op))

        if algo_num == 0:
            while ready:
                op = ready.pop()
                after_exop = self.add_op(op, after_exop=after_exop)
                for p in parents.get(op, []):
                    count = counts[p] - 1
                    if count == 0:
                        ready.add(p)
                        del counts[p]
                    else:
                        counts[p] = count

        else:
            while len(pqueue.queue) > 0:
                _, _, op = pqueue.get()
                after_exop = self.add_op(op, after_exop=after_exop)
                for p in parents.get(op, []):
                    count = counts[p] - 1
                    if count == 0:
                        op_counter = op_counter - 1
                        # Shouldn't happen, but we have a way to get back to naive scheduling
                        assert 'order' in p.metadata, \
                            "Something went wrong with the scheduling. \
                                 Please try NGRAPH_TOPOSORT_ALGO=0"

                        if p.metadata['order'] == wait_order:
                            pqueue.put(
                                (p.metadata['order'], int(-op_counter), p))
                        else:
                            pqueue.put((p.metadata['order'], op_counter, p))
                        del counts[p]
                    else:
                        counts[p] = count
        if len(counts) > 0:
            raise ValueError("Graph not a DAG")
コード例 #14
0
class Aleph(TemplateLearner):
    """
    Implements the Aleph learner in loreleai. See https://www.cs.ox.ac.uk/activities/programinduction/Aleph/aleph.html#SEC45.
    Aleph efficiently searches the hypothesis space by bounding the search from above (X :- true) and below (using the bottom clause),
    and by using mode declarations for predicates. It iteratively adds new clauses that maximize the evalfn. Searching for a new clause
    is done using a branch-and-bound algorithm, where clauses that are guaranteed to not lead to improvements are immediately pruned.

    Aleph currently only supports eval functions that can define an upper bound on the quality of a clause, such as Coverage
    and Compression.
    """

    def __init__(
        self,
        solver: LPSolver,
        eval_fn: EvalFunction,
        max_body_literals=5,
        do_print=False,
    ):
        super().__init__(solver, eval_fn, do_print)
        self._max_body_literals = max_body_literals

    def learn(
        self, examples: Task, knowledge: Knowledge, hypothesis_space: HypothesisSpace, 
        initial_clause: typing.Union[Body,Clause] = None, minimum_freq: int = 0
    ):
        """
        To find a hypothesis, Aleph uses the following set covering approach:
        1.  Select a positive example to be generalised. If none exists, stop; otherwise proceed to the next step.
        2.  Construct the most specific clause (the bottom clause) (Muggleton, 1995) that entails the selected example
            and that is consistent with the mode declarations.
        3.  Search for a clause more general than the bottom clause and that has the best score.
        4.  Add the clause to the current hypothesis and remove all the examples made redundant by it.
        Return to step 1.
        (Description from Cropper and Dumancic )
        """

        # Variables for learning statics
        start_time = datetime.datetime.now()
        i = 0
        stop = False
        self._learnresult = LearnResult()   # Reset in case the learner is reused
        self._prolog_queries = 0
        self._intermediate_coverage = []
        self._eval_fn._clauses_evaluated = 0

        # Assert all BK into engines
        self._solver.retract_all()
        self._assert_knowledge(knowledge)

        # Start with all examples
        examples_to_use = examples
        pos, _ = examples_to_use.get_examples()

        # List of clauses we're learning
        prog = []

        # parameters for aleph_extension()
        allowed_positions = find_allowed_positions(knowledge)
        allowed_reflexivity = find_allowed_reflexivity(knowledge)
        if minimum_freq > 0:
            allowed_constants = find_frequent_constants(knowledge,minimum_freq)
        else:
            allowed_constants = None        

        # Create HypothesisSpace: primitives will be different in each iteration 
        # (based on the chosen positive example)
        hs = TopDownHypothesisSpace(
            primitives=[],
            head_constructor=list(pos)[0].get_predicate(),
            expansion_hooks_reject=[
                lambda x, y: has_duplicated_literal(x, y),
            ],
            initial_clause=initial_clause
        )

        while len(pos) > 0 and not stop:
            i += 1

            # Pick example from pos
            pos_ex = Clause(list(pos)[0], [])
            bk = knowledge.as_clauses()
            bottom = compute_bottom_clause(bk, pos_ex)
            if self._print:
                print("Next iteration: generalizing example {}".format(str(pos_ex)))
                # print("Bottom clause: " + str(bottom))

            # Predicates can only be picked from the body of the bottom clause
            body_predicates = list(
                set(map(
                    lambda l: l.get_predicate(), 
                    bottom.get_body().get_literals()))
            )

            # Constants can only be picked from the literals in the bottom clause,
            # and from constants that are frequent enough in bk (if applicable)
            if allowed_constants is None:
                allowed = lambda l: isinstance(l,Constant) or isinstance(l,int)
            else:
                allowed = lambda l: (isinstance(l,Constant) and l in allowed_constants) or isinstance(l,int)

            constants = list(set(list(filter(
                allowed,
                bottom.get_body().get_arguments(),))))
            if self._print:
                print("Constants in bottom clause: {}".format(constants))
                print("Predicates in bottom clause: {}".format(body_predicates))

            # IMPORTANT: use VALUES of pred and constants, not the variables
            # Has something to do with closures 
            extensions = [
                lambda x,a=pred,b=allowed_positions,c=constants,d=allowed_reflexivity: aleph_extension(x,a,b,c,d) for pred in body_predicates
            ]

            # Update hypothesis space for this iteration
            hs._primitives = extensions
            hs.remove_all_edges()

            # Learn 1 clause and add to program
            cl = self._learn_one_clause(examples_to_use, hs)
            prog.append(cl)
            if self._print:
                print("- New clause: " + str(cl))

            # update covered positive examples
            covered = self._execute_program(cl)
            if self._print:
                print(
                    "Clause covers {} pos examples: {}".format(
                        len(pos.intersection(covered)), pos.intersection(covered)
                    )
                )

            # Find intermediate quality of program at this point, add to learnresult (don't cound these as Prolog queries)
            c = set()
            for cl in prog:
                c = c.union(self._execute_program(cl,count_as_query=False))
            pos_covered = len(c.intersection(examples._positive_examples))
            neg_covered = len(c.intersection(examples._negative_examples))
            self._intermediate_coverage.append((pos_covered,neg_covered))

            # Remove covered examples and start next iteration
            pos, neg = examples_to_use.get_examples()
            pos = pos.difference(covered)
            examples_to_use = Task(pos, neg)

            if self._print:
                print("Finished iteration {}".format(i))
                # print("Current program: {}".format(str(prog)))

        # Wrap results into learnresult and return
        self._learnresult['learner'] = "Aleph"
        self._learnresult["total_time"] = (datetime.datetime.now() - start_time).total_seconds()
        self._learnresult["final_program"] = prog
        self._learnresult["num_iterations"] = i
        self._learnresult["evalfn_evaluations"] = self._eval_fn._clauses_evaluated
        self._learnresult["prolog_queries"] = self._prolog_queries
        self._learnresult["intermediate_coverage"] = self._intermediate_coverage

        return self._learnresult

    def initialise_pool(self):
        self._candidate_pool = OrderedSet()

    def put_into_pool(
        self, candidates: Tuple[typing.Union[Clause, Procedure, typing.Sequence], float]
    ) -> None:
        if isinstance(candidates, Tuple):
            self._candidate_pool.add(candidates)
        else:
            self._candidate_pool |= candidates

    def prune_pool(self, minValue):
        """
        Removes all clauss with upper bound on value < minValue form pool
        """
        self._candidate_pool = OrderedSet(
            [t for t in self._candidate_pool if not t[2] < minValue]
        )

    def get_from_pool(self) -> Clause:
        return self._candidate_pool.pop(0)

    def stop_inner_search(
        self, eval: typing.Union[int, float], examples: Task, clause: Clause
    ) -> bool:
        raise NotImplementedError()

    def process_expansions(
        self,
        examples: Task,
        exps: typing.Sequence[Clause],
        hypothesis_space: TopDownHypothesisSpace,
    ) -> typing.Sequence[Clause]:
        # eliminate every clause with more body literals than allowed
        exps = [cl for cl in exps if len(cl) <= self._max_body_literals]

        # check if every clause has solutions
        exps = [
            (cl, self._solver.has_solution(*cl.get_body().get_literals()))
            for cl in exps
        ]
        new_exps = []

        for ind in range(len(exps)):
            if exps[ind][1]:
                # keep it if it has solutions
                new_exps.append(exps[ind][0])
                # print(f"Not removed: {exps[ind][0]}")
            else:
                # remove from hypothesis space if it does not
                hypothesis_space.remove(exps[ind][0])
                # print(f"Removed: {exps[ind][0]}")

        return new_exps

    def _execute_program(self, clause: Clause, count_as_query: bool = True) -> typing.Sequence[Atom]:
        """
        Evaluates a clause using the Prolog engine and background knowledge

        Returns a set of atoms that the clause covers
        """
        if len(clause.get_body().get_literals()) == 0:
            # Covers all possible examples because trivial hypothesis
            return None
        else:
            head_predicate = clause.get_head().get_predicate()
            head_args = clause.get_head_arguments()
            # print("{}({})".format(head_predicate, *head_args))

            sols = self._solver.query(*clause.get_body().get_literals())
            self._prolog_queries += 1 if count_as_query else 0

            # Build a solution by substituting Variables with their found value
            # and copying constants without change
            sols = [head_predicate(*[s[v] if isinstance(v,Variable) else v for v in head_args]) for s in sols]

            return sols

    def _learn_one_clause(
        self, examples: Task, hypothesis_space: TopDownHypothesisSpace
    ) -> Clause:
        """
        Learns a single clause to add to the theory.
        Algorithm from https://www.cs.ox.ac.uk/activities/programinduction/Aleph/aleph.html#SEC45
        """
        # reset the search space
        hypothesis_space.reset_pointer()

        # empty the pool just in case
        self.initialise_pool()

        # Add first clauses into pool (active)
        initial_clauses = hypothesis_space.get_current_candidate()
        self.put_into_pool(
            [
                (cl, self.evaluate(examples, cl,hypothesis_space)[0], self.evaluate(examples, cl,hypothesis_space)[1])
                for cl in initial_clauses
            ]
        )
        # print(self._candidate_pool)
        currentbest = None
        currentbestvalue = -99999

        i = 0

        while len(self._candidate_pool) > 0:
            # Optimise: pick smart according to evalFn (e.g. shorter clause when using compression)
            k = self.get_from_pool()
            if self._print:
                print("Expanding clause {}".format(k[0]))
            # Generate children of k
            new_clauses = hypothesis_space.expand(k[0])

            # Remove clauses that are too long...
            new_clauses = self.process_expansions(
                examples, new_clauses, hypothesis_space
            )
            # Compute costs for these children
            value = {cl: self.evaluate(examples, cl, hypothesis_space)[0] for cl in new_clauses}
            upperbound_value = {
                cl: self.evaluate(examples, cl, hypothesis_space)[1] for cl in new_clauses
            }
           
            for c in new_clauses:
                # If upper bound too low, don't bother expanding
                if upperbound_value[c] <= currentbestvalue and not c == currentbest:
                    hypothesis_space.remove(c)
                else:
                    if value[c] > currentbestvalue:
                        currentbestvalue = value[c]
                        currentbest = c
                        len_before = len(self._candidate_pool)
                        self.prune_pool(value[c])
                        len_after = len(self._candidate_pool)

                        if self._print:
                            print("Found new best: {}: {} {}".format(c,self._eval_fn.name(),value[c]))
                            print("Pruning to upperbound {} >= {}: {} of {} clauses removed".format(self._eval_fn.name(),value[c],(len_before-len_after),len_before))

                    self.put_into_pool((c, value[c], upperbound_value[c]))
                    if self._print:
                        print("Put {} into pool, contains {} clauses".format(str(c),len(self._candidate_pool)))

            i += 1

        if self._print:
            print("New clause: {} with score {}".format(currentbest,currentbestvalue))
        return currentbest
コード例 #15
0
class Propagator:
    MAX_REQUESTED_KEYS_TO_KEEP = 1000

    def __init__(self, metrics: MetricsCollector = NullMetricsCollector()):
        self.requests = Requests()
        self.requested_propagates_for = OrderedSet()
        self.metrics = metrics

    # noinspection PyUnresolvedReferences
    def propagate(self, request: Request, clientName):
        """
        Broadcast a PROPAGATE to all other nodes

        :param request: the REQUEST to propagate
        """
        if self.requests.has_propagated(request, self.name):
            logger.trace("{} already propagated {}".format(self, request))
        else:
            with self.metrics.measure_time(MetricsName.SEND_PROPAGATE_TIME):
                self.requests.add_propagate(request, self.name)
                propagate = self.createPropagate(request, clientName)
                logger.debug("{} propagating request {} from client {}".format(self, request.key, clientName),
                             extra={"cli": True, "tags": ["node-propagate"]})
                self.send(propagate)

    @staticmethod
    def createPropagate(
            request: Union[Request, dict], client_name) -> Propagate:
        """
        Create a new PROPAGATE for the given REQUEST.

        :param request: the client REQUEST
        :return: a new PROPAGATE msg
        """
        if not isinstance(request, (Request, dict)):
            logger.error("{}Request not formatted properly to create propagate"
                         .format(THREE_PC_PREFIX))
            return
        logger.trace("Creating PROPAGATE for REQUEST {}".format(request))
        request = request.as_dict if isinstance(request, Request) else \
            request
        if isinstance(client_name, bytes):
            client_name = client_name.decode()
        return Propagate(request, client_name)

    # noinspection PyUnresolvedReferences
    def canForward(self, request: Request):
        """
        Determine whether to forward client REQUESTs to replicas, based on the
        following logic:

        - If exactly f+1 PROPAGATE requests are received, then forward.
        - If less than f+1 of requests then probably there's no consensus on the
            REQUEST, don't forward.
        - If more than f+1 then already forwarded to replicas, don't forward

        Even if the node hasn't received the client REQUEST itself, if it has
        received enough number of PROPAGATE messages for the same, the REQUEST
        can be forwarded.

        :param request: the client REQUEST
        """

        if self.requests.forwarded(request):
            return 'already forwarded'

        # If not enough Propagates, don't bother comparing
        if not self.quorums.propagate.is_reached(self.requests.votes(request)):
            return 'not finalised'

        req = self.requests.req_with_acceptable_quorum(request,
                                                       self.quorums.propagate)
        if req:
            self.requests.set_finalised(req)
            return None
        else:
            return 'not finalised'

    # noinspection PyUnresolvedReferences
    def forward(self, request: Request):
        """
        Forward the specified client REQUEST to the other replicas on this node

        :param request: the REQUEST to propagate
        """
        key = request.key
        num_replicas = self.replicas.num_replicas
        logger.debug('{} forwarding request {} to {} replicas'
                     .format(self, key, num_replicas))
        self.replicas.pass_message(ReqKey(key))
        self.monitor.requestUnOrdered(key)
        self.requests.mark_as_forwarded(request, num_replicas)

    # noinspection PyUnresolvedReferences
    def recordAndPropagate(self, request: Request, clientName):
        """
        Record the request in the list of requests and propagate.

        :param request:
        :param clientName:
        """
        self.requests.add(request)
        self.propagate(request, clientName)
        self.tryForwarding(request)

    def tryForwarding(self, request: Request):
        """
        Try to forward the request if the required conditions are met.
        See the method `canForward` for the conditions to check before
        forwarding a request.
        """
        cannot_reason_msg = self.canForward(request)
        if cannot_reason_msg is None:
            # If haven't got the client request(REQUEST) for the corresponding
            # propagate request(PROPAGATE) but have enough propagate requests
            # to move ahead
            self.forward(request)
        else:
            logger.trace("{} not forwarding request {} to its replicas "
                         "since {}".format(self, request, cannot_reason_msg))

    def request_propagates(self, req_keys):
        """
        Request PROPAGATEs for the given request keys. Since replicas can
        request PROPAGATEs independently of each other, check if it has
        been requested recently
        :param req_keys:
        :return:
        """
        i = 0
        for digest in req_keys:
            if digest not in self.requested_propagates_for:
                self.request_msg(PROPAGATE, {f.DIGEST.nm: digest})
                self._add_to_recently_requested(digest)
                i += 1
            else:
                logger.debug('{} already requested PROPAGATE recently for {}'.
                             format(self, digest))
        return i

    def _add_to_recently_requested(self, key):
        while len(
                self.requested_propagates_for) > self.MAX_REQUESTED_KEYS_TO_KEEP:
            self.requested_propagates_for.pop(last=False)
        self.requested_propagates_for.add(key)
コード例 #16
0
def get_JSONSchema_requirements(se, root, schema_name):
    
    json_schema = {
          "$schema": "http://json-schema.org/draft-07/schema#",
          "$id":"http://example.com/" + schema_name,
          "title": schema_name,
          "type": "object",
          "properties":{},
          "required":[],
          "allOf":[]
    }

    # get graph corresponding to data model schema
    mm_graph = se.get_nx_schema()

    # nodes to check for dependencies, starting with the provided root
    nodes_to_process = OrderedSet()
    nodes_to_process.add(root) 

    # keep track of nodes with processed dependencies
    nodes_with_processed_dependencies = set()

    '''
    keep checking for dependencies until there are no nodes
    left to process
    '''
    while nodes_to_process:  
        process_node = nodes_to_process.pop()

        '''
        get allowable values for this node;
        each of these values is a node that in turn is processed for
        dependencies and allowed values
        '''
        """ 
        print("===============")
        print(mm_graph.nodes[process_node])
        print("===============")
        """
        if requires_child in mm_graph.nodes[process_node]:
            if mm_graph.nodes[process_node][requires_child]:
                children = get_node_children(mm_graph, process_node)
                print(children)
                # set allowable values based on children nodes
                if children:
                    schema_properties = { process_node:{"enum":children}}
                    json_schema["properties"].update(schema_properties)                
                
                    # add children for requirements processing
                    nodes_to_process.update(children)
                
                    # set conditional dependencies based on children dependencies
                    for child in children:
                        child_dependencies = get_node_neighbor_dependencies(mm_graph, child)
                        if child_dependencies:
                            schema_conditional_dependencies = {
                                    "if": {
                                        "properties": {
                                        process_node: { "enum": [child] }
                                        },
                                        "required":[process_node],
                                      },
                                    "then": { "required": child_dependencies },
                            }
                            nodes_with_processed_dependencies.add(child)
                            nodes_to_process.update(child_dependencies)
                            # only append dependencies if there are any
                            #if schema_conditional_dependencies:
                            #    json_schema["allOf"].append(schema_conditional_dependencies)

        '''
        get required nodes by this node (e.g. other terms/nodes
        that need to be specified based on a data model, if the 
        given term is specified); each of these node/terms needs to be 
        processed for dependencies in turn.
        '''
        if not process_node in nodes_with_processed_dependencies:
            process_node_dependencies = get_node_neighbor_dependencies(mm_graph, process_node)

            if process_node_dependencies:
                if process_node == root: # these are unconditional dependencies 
                    json_schema["required"] += process_node_dependencies
                else: # these are dependencies given the processed node 
                    schema_conditional_dependencies = {
                            "if": {
                                "properties": {
                                process_node: { "string":"*" }
                                },
                                "required":[process_node],
                              },
                            "then": { "required": [process_node_dependencies] },
                    }

                    # only append dependencies if there are any
                    #if schema_conditional_dependencies:
                    #    json_schema["allOf"].append(schema_conditional_dependencies)

                nodes_to_process.update(process_node_dependencies)
                nodes_with_processed_dependencies.add(process_node)


        """
        print("Nodes to process")
        print(nodes_to_process)
        print("=================")
        """

    print("=================")
    print("JSONSchema successfully generated from Schema.org schema!")
    print("=================")
    
    # if no conditional dependencies were added we can't have an empty 'AllOf' block in the schema, so remove it
    if not json_schema["allOf"]:
        del json_schema["allOf"]

    return json_schema