Example #1
0
    def containsNearbyAlmostDuplicate(self, nums, k, t):
        """
        :type nums: List[int]
        :type k: int
        :type t: int
        :rtype: bool
        """

        # 滑动窗口 + 有序容器(这里用的是 SortedSet,有序集合)

        length = len(nums)
        sw = SortedSet([nums[0]])  

        for i in range(1, length):
            # 此时滑动窗口sw已经满了, 需要删除最左边的元素
            if i > k:
                sw.discard(nums[i-k-1])
            if nums[i] in sw:
                return True
            else:
                sw.add(nums[i])
            ind = bisect.bisect_left(sw, nums[i])
            diff = 0
            if ind == 0:
                diff = sw[ind+1] - sw[ind]
            elif ind == len(sw) - 1:
                diff = sw[ind] - sw[ind-1]
            else:
                diff = min(sw[ind+1] - sw[ind], sw[ind] - sw[ind-1])
            if diff <= t:
                return True
        return False
Example #2
0
class DinnerPlates:
    def __init__(self, capacity: int):
        self.cap = capacity
        self.index = SortedSet()
        self.stacks = []

    def push(self, val: int) -> None:
        idx = self.index[0] if self.index else len(self.stacks)
        if idx == len(self.stacks):
            self.stacks.append([])
        s = self.stacks[idx]
        s.append(val)
        if len(s) == self.cap:
            self.index.discard(idx)
        elif len(s) == 1 and self.cap != 1:
            self.index.add(idx)

    def pop(self) -> int:
        return self.popAtStack(len(self.stacks) - 1)

    def popAtStack(self, index: int) -> int:
        if index < 0 or index >= len(self.stacks) or not self.stacks[index]:
            return -1
        s = self.stacks[index]
        val = s.pop()
        if len(s) == self.cap - 1:
            self.index.add(index)
        while self.stacks and not self.stacks[-1]:
            tmp = len(self.stacks) - 1
            self.stacks.pop()
            self.index.discard(tmp)
        return val
Example #3
0
    def busiestServers(self, k: int, arrival: List[int],
                       load: List[int]) -> List[int]:
        cnts = [0] * k
        idle = SortedSet([i for i in range(k)])
        busy = []
        heapq.heapify(busy)

        n, most = len(arrival), 0
        for i in range(n):
            while busy and busy[0][0] <= arrival[i]:
                _, sr = heapq.heappop(busy)
                idle.add(sr)

            if not idle:
                continue

            idx = idle.bisect_left(i % k)
            if idx == len(idle):
                idx = 0
            sr = idle[idx]
            cnts[sr] += 1
            most = max(most, cnts[sr])
            heapq.heappush(busy, (arrival[i] + load[i], sr))
            idle.discard(sr)

        ans = []
        for i, cnt in enumerate(cnts):
            if cnt == most:
                ans.append(i)

        return ans
Example #4
0
def greedyCoverage(inFile, outFile, maxCoverage, verboseFlag):
    '''
    This method is greedy but reads in low coverage areas may be missed if the
    current set is full.
    '''

    if verboseFlag:
        print("Reducing coverage using the Greedy method")

    curr = SortedSet()
    mapped = 0
    filtered = 0
    for r in inFile.fetch(until_eof=True):
        if (r.is_unmapped): continue
        mapped += 1

        # Attempt to find read that ends before this one
        itr = curr.irange(maximum=r.reference_start)
        try:
            ending = itr.__next__()
            # Some read is ending, replace it in the current set
            curr.discard(ending)
            curr.add(r.reference_end)
            outFile.write(r)
            filtered += 1
        except StopIteration:
            if (len(curr) < maxCoverage):
                # There is still room to include this read
                curr.add(r.reference_end)
                outFile.write(r)
                filtered += 1

    if verboseFlag:
        print("Reduced BAM from " + str(mapped) + " to " + str(filtered) +
              " reads")
Example #5
0
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int,
                                      t: int) -> bool:
        win = SortedSet()
        for i in range(len(nums)):
            pos = win.bisect_left(nums[i] - t)
            if pos < len(win) and win[pos] <= nums[i] + t:
                return True

            win.add(nums[i])
            if i >= k:
                win.discard(nums[i - k])

        return False
Example #6
0
def find_path(graph, start, end):
    def dist(p1, p2):
        return abs(p1[0] - p2[0]) + abs(p1[1] - p2[1])

    def heuristic(p):
        return dist(p, end)

    start = (start.x(), start.y())  
    end = (end.x(), end.y())
    data = dict()
    q = SortedSet(key=lambda it: -sum(data[it]))
    # cost, heuristic
    data[start] = (0, heuristic(start))
    q.add(start)
    back = dict()
    back[start] = start
    found = False
    while q:
        curr = q.pop()
        c, h = data[curr]
        prev = back[curr]
        dx = curr[0] - prev[0]
        dy = curr[1] - prev[1]
        if curr == end:
            found = True
            break
        for next in graph.get(curr, tuple()):
            if not (dx == 0 and next[0] - curr[0] == 0 or
                dy == 0 and next[1] - curr[1] == 0):
                np = 1e20
            else:
                np = 0
            if next in data:
                if c + dist(curr, next) + np < data[next][0]:
                    q.discard(next)
                    data[next] = (c + dist(curr, next) + np, heuristic(next))
                    back[next] = curr
                    q.add(next)
            else:
                back[next] = curr
                data[next] = (c + dist(curr, next) + np, heuristic(next))
                q.add(next)

    if found:
        path = [end]
        while path[-1] != start:
            path.append(back[path[-1]])

        return list(reversed(path))
    return list()
Example #7
0
 def checkClusterScore(self, floorplan):
     leftToCheck=SortedSet(set(self.nodes.keys())-floorplan.occupiedNodes-self.serviceNodeIds)
     leftToCheck.discard('0')    #ignore outside node
     clusters=[]
     clusterDoors=[]
     self.recurseCluster([],leftToCheck,clusters,clusterDoors)
     
     ##penalize based on non-accessibility to doors
     penalty=0
     for doors in clusterDoors:
         if doors==0:
             penalty+=1
     floorplan.clusterPenalty=penalty
     return (len(clusters),penalty)
Example #8
0
 def fillEmptyNodes(self,floorplan):
     leftToCheck=SortedSet(set(self.nodes.keys())-floorplan.occupiedNodes-self.serviceNodeIds)
     leftToCheck.discard('0')    #ignore outside node
     clusters=[]
     clusterDoors=[]
     self.recurseCluster([],leftToCheck,clusters,clusterDoors)
     for cluster in clusters:
         if len(cluster.doorIds)>0:
             if 'normal' in cluster.properties and 'toilet' in cluster.properties and 'storage' in cluster.properties:
                 unit=p_unit.Unit(doorwayId=cluster.doorIds[0],constraint=con.Constraint(roomType=cluster.properties['normal']-1,prefWeight=1,prefDoors=cluster.doorIds,roomConstraints=cluster.properties),
                                  connectedNodeIds=cluster.connectedNodeIds,connectedEdgeIds=cluster.connectedEdgeIds)
                 floorplan.addUnit(unit,self.doorEdgeIds,cluster.doorIds,True)
                 floorplan.occupiedNodes.update(cluster.connectedNodeIds)
                 self.testFloorplanScore(floorplan)
     return floorplan
def djikstra(graph_object, start_vertex, destination_vertex):
    graph = graph_object.graph_dict
    distance_dict = graph_object.distance_dict
    #-1 -> not yet encountered, 0 -> encountered a someone's adjacent neighbour, 1 -> encountered completely
    visited = defaultdict(lambda: -1)
    #distance to unreached node is set to max dist
    min_distance_to_reach_specific_node = defaultdict(lambda: sys.maxsize)
    parent_vertex_dict = defaultdict(lambda: -1)
    priority_vertex_set = SortedSet()
    priority_vertex_set.add((0, start_vertex))
    parent_vertex_dict[start_vertex] = -1
    while priority_vertex_set:
        cur_dist, cur_vertex = priority_vertex_set[0]
        print(cur_dist, cur_vertex)
        priority_vertex_set.discard(priority_vertex_set[0])
        if visited[cur_vertex] == 1:
            continue

        visited[cur_vertex] = 1
        for item in graph[cur_vertex]:
            if visited[item] == 1:
                continue
            else:
                if visited[item] == -1:
                    priority_vertex_set.add(
                        (cur_dist + distance_dict[cur_vertex][item], item))
                    min_distance_to_reach_specific_node[
                        item] = cur_dist + distance_dict[cur_vertex][item]
                    visited[item] = 0
                    parent_vertex_dict[item] = cur_vertex
                elif min_distance_to_reach_specific_node[
                        item] > cur_dist + distance_dict[cur_vertex][item]:
                    priority_vertex_set.add(
                        (cur_dist + distance_dict[cur_vertex][item], item))
                    min_distance_to_reach_specific_node[
                        item] = cur_dist + distance_dict[cur_vertex][item]
                    visited[item] = 0
                    parent_vertex_dict[item] = cur_vertex
    path = []
    if visited[destination_vertex] != -1:
        cur_vertex = destination_vertex
        while cur_vertex != -1:
            path.append(cur_vertex)
            cur_vertex = parent_vertex_dict[cur_vertex]
        path = list(reversed(path))
        return (min_distance_to_reach_specific_node[destination_vertex], path)
    else:
        return (-1, path)
Example #10
0
def run(N):
    p = randint(0, 100)
    times = sorted(sample(xrange(MAXT+1), 2*N))
    tutors = []
    present = SortedSet()
    for t in times:
        if len(present) == 0 or (len(tutors) < N and randint(0,100) < p):
            present.add(len(tutors))
            tutors.append([t,-1])
        else:
            x = choice(present)
            present.discard(x)
            tutors[x][1] = t
    shuffle(tutors)
    print N
    for t in tutors:
        print t[0], t[1]
    def test_SortedSet(self):
        # construct
        sorted_set = SortedSet([1, 5, 2, 7, 4])

        # inserting values one by one
        for i in range(5, 0, -1):
            sorted_set.add(i)
        print('set after adding elements: ', sorted_set)

        # inserting duplicate value
        sorted_set.add(5)
        print('set after inserting duplicate element: ', sorted_set)

        # discarding an element
        sorted_set.discard(4)
        print('set after discarding: ', sorted_set)

        for i in sorted_set:
            print(i)
Example #12
0
    def closestRoom(self, rooms: List[List[int]],
                    queries: List[List[int]]) -> List[int]:
        n = len(rooms)
        rooms.sort(key=lambda x: x[1])
        ids = SortedSet(room[0] for room in rooms)
        for i, query in enumerate(queries):
            query.append(i)
        queries.sort(key=lambda x: x[1])

        def searchSize(size: int) -> int:
            left, right = 0, n
            while left < right:
                mid = (left + right) // 2
                if rooms[mid][1] < size:
                    left = mid + 1
                else:
                    right = mid
            return left

        ans = [-1] * len(queries)
        pre = 0
        for preferred, minSize, idx in queries:
            cur = searchSize(minSize)
            if cur == n:
                continue

            while pre < cur:
                ids.discard(rooms[pre][0])
                pre += 1

            lt = ids.bisect_left(preferred)
            if lt == len(ids):
                if lt > 0:
                    ans[idx] = ids[lt - 1]
            else:
                ans[idx] = ids[lt]
                tmp = ids[lt] - preferred
                if lt > 0 and preferred - ids[lt - 1] <= tmp:
                    ans[idx] = ids[lt - 1]

        return ans
def test_discard():
    temp = SortedSet(range(100), load=7)
    temp.discard(0)
    temp.discard(99)
    temp.discard(50)
    temp.discard(1000)
    temp._check()
    assert len(temp) == 97
def test_discard():
    temp = SortedSet(range(100), load=7)
    temp.discard(0)
    temp.discard(99)
    temp.discard(50)
    temp.discard(1000)
    temp._check()
    assert len(temp) == 97
    def containsNearbyAlmostDuplicate(self, nums, k, t):
        """
        :type nums: List[int]
        :type k: int
        :type t: int
        :rtype: bool
        """
        if k < 1 or t < 0 or nums == None or len(nums) < 2:
            return False

        treeset = SortedSet()

        for i in xrange(len(nums)):
            # Solution 1
            subset = [x for x in treeset.irange(nums[i] - t, nums[i] + t)]
            if len(subset) > 0:
                return True
            treeset.add(nums[i])

            if i >= k:
                treeset.discard(nums[i - k])

        return False
    def containsNearbyAlmostDuplicate(self, nums, k, t):
        """
        :type nums: List[int]
        :type k: int
        :type t: int
        :rtype: bool
        """
        if k < 1 or t < 0 or nums == None or len(nums) < 2:
            return False

        treeset = SortedSet()

        for i in xrange(len(nums)):
            # Solution 1
            subset = [x for x in treeset.irange(nums[i] - t, nums[i] + t)]
            if len(subset) > 0:
                return True
            treeset.add(nums[i])

            if i >= k:
                treeset.discard(nums[i - k])

        return False
Example #17
0
def astar(stare_initiala, stare_finala, euristica, lista_chei):
    nod_initial = Nod(stare_initiala, None, None)
    deschise = SortedSet([nod_initial])
    scor_optim = SortedDict({tuple(stare_initiala): 0})

    # [1, 1, 1, 1, 1]
    # (1, 1, 1, 1, 1)

    while len(deschise) > 0:
        # extragem nodul cu f minim
        nod = deschise[0]
        deschise.pop(0)

        # daca am ajuns la starea finala, ne oprim
        if nod.stare == stare_finala:
            return nod

        # generam succesorii si facem verificari
        lista_succesori = genereaza_succesori(nod, lista_chei, euristica)
        for succesor in lista_succesori:
            if scor_optim.__contains__(tuple(succesor.stare)) == False:
                # daca starea succesorului nu a mai fost intalnita pana acum, o inseram
                scor_optim[tuple(succesor.stare)] = succesor.g
                deschise.add(succesor)
            elif succesor.g < scor_optim[tuple(succesor.stare)]:
                # introducem/editam starea curenta in setul "deschis", dupa caz
                succesor_fals = Nod(succesor.stare, None, None)
                succesor_fals.f = scor_optim[tuple(
                    succesor.stare)] + euristica(succesor.stare)

                if deschise.__contains__(succesor_fals) is True:
                    deschise.discard(succesor)
                deschise.add(succesor)
                # daca starea curenta este intalnita cu un cost mai mic, o reactualizam
                scor_optim[tuple(succesor.stare)] = succesor.g

    return None
Example #18
0
class ExamRoom(object):
    def __init__(self, N):
        """
        :type N: int
        """
        self.n = N
        self.spots = SortedSet()

    def seat(self):
        """
        :rtype: int
        """
        start, mx, idx = 0, 0, 0
        for i in self.spots:
            if start == 0:
                if mx < i - start:
                    mx = i - start
                    idx = 0
            else:
                if mx < (i - start + 1) // 2:
                    mx = (i - start + 1) // 2
                    idx = start + mx - 1
            start = i + 1

        if start > 0 and mx < self.n - start:
            mx = self.n - start
            idx = self.n - 1
        self.spots.add(idx)
        return idx

    def leave(self, p):
        """
        :type p: int
        :rtype: None
        """
        self.spots.discard(p)
def max_unique_element(array, k):
    """We need 2 data structures will update as we move the sliding window.:
    - A sorted set to keep track of seen items that are unique in each subarray
    - A dictionary to keep track of count for each item in each subarray
    We will return a list of max elements for subarrays
    """
    max_elements = []
    unique_seen = SortedSet()
    counts = dict()
    # Find the max unique value for the first subarray
    for e in array[:k]:
        if e not in counts:
            counts[e] = 1
            unique_seen.add(e)
        else:
            counts[e] += 1
            unique_seen.discard(e)
    if len(unique_seen) > 0:
        # Since unique_seen is a sorted set, the last item is the largest one
        max_elements.append(unique_seen[-1])
    else:
        max_elements.append(None)
    for i in range(1, len(array) - k + 1):
        # Update counts and unique_seen for the last item of previous subarray
        counts[array[i - 1]] -= 1
        if counts[array[i - 1]] == 1:
            unique_seen.add(array[i - 1])
        else:
            unique_seen.discard(array[i - 1])

        # Update counts and unique_seen for the new member of current subarray
        if array[i + k - 1] not in counts or counts[array[i + k - 1]] == 0:
            counts[array[i + k - 1]] = 1
            unique_seen.add(array[i + k - 1])
        else:
            counts[array[i + k - 1]] += 1
            unique_seen.discard(array[i + k - 1])
        # Recoard the max unique element for the current subarray, if any
        if len(unique_seen) > 0:
            max_elements.append(unique_seen[-1])
        else:
            max_elements.append(None)
    return max_elements
Example #20
0
class SequenceLearner(BaseLearner):
    r"""A learner that will learn a sequence. It simply returns
    the points in the provided sequence when asked.

    This is useful when your problem cannot be formulated in terms of
    another adaptive learner, but you still want to use Adaptive's
    routines to run, save, and plot.

    Parameters
    ----------
    function : callable
        The function to learn. Must take a single element `sequence`.
    sequence : sequence
        The sequence to learn.

    Attributes
    ----------
    data : dict
        The data as a mapping from "index of element in sequence" => value.

    Notes
    -----
    From primitive tests, the `~adaptive.SequenceLearner` appears to have a
    similar performance to `ipyparallel`\s ``load_balanced_view().map``. With
    the added benefit of having results in the local kernel already.
    """
    def __init__(self, function, sequence):
        self._original_function = function
        self.function = _IgnoreFirstArgument(function)
        self._to_do_indices = SortedSet({i for i, _ in enumerate(sequence)})
        self._ntotal = len(sequence)
        self.sequence = copy(sequence)
        self.data = SortedDict()
        self.pending_points = set()

    def ask(self, n, tell_pending=True):
        indices = []
        points = []
        loss_improvements = []
        for index in self._to_do_indices:
            if len(points) >= n:
                break
            point = self.sequence[index]
            indices.append(index)
            points.append((index, point))
            loss_improvements.append(1 / self._ntotal)

        if tell_pending:
            for i, p in zip(indices, points):
                self.tell_pending((i, p))

        return points, loss_improvements

    def _get_data(self):
        return self.data

    def _set_data(self, data):
        if data:
            indices, values = zip(*data.items())
            # the points aren't used by tell, so we can safely pass None
            points = [(i, None) for i in indices]
            self.tell_many(points, values)

    def loss(self, real=True):
        if not (self._to_do_indices or self.pending_points):
            return 0
        else:
            npoints = self.npoints + (0 if real else len(self.pending_points))
            return (self._ntotal - npoints) / self._ntotal

    def remove_unfinished(self):
        for i in self.pending_points:
            self._to_do_indices.add(i)
        self.pending_points = set()

    def tell(self, point, value):
        index, point = point
        self.data[index] = value
        self.pending_points.discard(index)
        self._to_do_indices.discard(index)

    def tell_pending(self, point):
        index, point = point
        self.pending_points.add(index)
        self._to_do_indices.discard(index)

    def done(self):
        return not self._to_do_indices and not self.pending_points

    def result(self):
        """Get the function values in the same order as ``sequence``."""
        if not self.done():
            raise Exception("Learner is not yet complete.")
        return list(self.data.values())

    @property
    def npoints(self):
        return len(self.data)
Example #21
0
class ReplayBuffer(object):
    """Buffer to store environment transitions."""
    def __init__(self, obs_shape, action_shape, capacity, device,
                 normalize_obs):
        self.obs_shape = obs_shape
        self.action_shape = action_shape
        self.capacity = capacity
        self.device = device

        self.pixels = len(obs_shape) > 1
        self.empty_data()

        self.done_idxs = SortedSet()
        self.global_idx = 0
        self.global_last_save = 0

        self.normalize_obs = normalize_obs

        if normalize_obs:
            assert not self.pixels
            self.welford = utils.Welford()

    def __getstate__(self):
        d = copy.copy(self.__dict__)
        del d['obses'], d['next_obses'], d['actions'], d['rewards'], \
          d['not_dones'], d['not_dones_no_max']
        return d

    def __setstate__(self, d):
        self.__dict__ = d

        # Manually need to re-load the transitions with load()
        self.empty_data()

    def empty_data(self):
        obs_dtype = np.float32 if not self.pixels else np.uint8
        obs_shape = self.obs_shape
        action_shape = self.action_shape
        capacity = self.capacity

        self.obses = np.empty((capacity, *obs_shape), dtype=obs_dtype)
        self.next_obses = np.empty((capacity, *obs_shape), dtype=obs_dtype)
        self.actions = np.empty((capacity, *action_shape), dtype=np.float32)
        self.rewards = np.empty((capacity, 1), dtype=np.float32)
        self.not_dones = np.empty((capacity, 1), dtype=np.float32)
        self.not_dones_no_max = np.empty((capacity, 1), dtype=np.float32)

        self.idx = 0
        self.full = False
        self.payload = []
        self.done_idxs = None

    def __len__(self):
        return self.capacity if self.full else self.idx

    def get_obs_stats(self):
        assert not self.pixels
        MIN_STD = 1e-1
        MAX_STD = 10
        mean = self.welford.mean()
        std = self.welford.std()
        std[std < MIN_STD] = MIN_STD
        std[std > MAX_STD] = MAX_STD
        return mean, std

    def add(self, obs, action, reward, next_obs, done, done_no_max):
        # For saving
        self.payload.append((obs.copy(), next_obs.copy(), action.copy(),
                             reward, not done, not done_no_max))

        if self.normalize_obs:
            self.welford.add_data(obs)

        # if self.full and not self.not_dones[self.idx]:
        if done:
            self.done_idxs.add(self.idx)
        elif self.full:
            self.done_idxs.discard(self.idx)

        np.copyto(self.obses[self.idx], obs)
        np.copyto(self.actions[self.idx], action)
        np.copyto(self.rewards[self.idx], reward)
        np.copyto(self.next_obses[self.idx], next_obs)
        np.copyto(self.not_dones[self.idx], not done)
        np.copyto(self.not_dones_no_max[self.idx], not done_no_max)

        self.idx = (self.idx + 1) % self.capacity
        self.global_idx += 1
        self.full = self.full or self.idx == 0

    def sample(self, batch_size):
        idxs = np.random.randint(0,
                                 self.capacity if self.full else self.idx,
                                 size=batch_size)

        obses = self.obses[idxs]
        next_obses = self.next_obses[idxs]

        if self.normalize_obs:
            mu, sigma = self.get_obs_stats()
            obses = (obses - mu) / sigma
            next_obses = (next_obses - mu) / sigma

        obses = torch.as_tensor(obses, device=self.device).float()
        actions = torch.as_tensor(self.actions[idxs], device=self.device)
        rewards = torch.as_tensor(self.rewards[idxs], device=self.device)
        next_obses = torch.as_tensor(next_obses, device=self.device).float()
        not_dones = torch.as_tensor(self.not_dones[idxs], device=self.device)
        not_dones_no_max = torch.as_tensor(self.not_dones_no_max[idxs],
                                           device=self.device)

        return obses, actions, rewards, next_obses, not_dones, not_dones_no_max

    def sample_multistep(self, batch_size, T):
        assert batch_size < self.idx or self.full

        last_idx = self.capacity if self.full else self.idx
        last_idx -= T

        # raw here means the "coalesced" indices that map to valid
        # indicies that are more than T steps away from a done
        done_idxs_sorted = np.array(list(self.done_idxs) + [last_idx])
        n_done = len(done_idxs_sorted)
        done_idxs_raw = done_idxs_sorted - np.arange(1, n_done + 1) * T

        samples_raw = npr.choice(
            last_idx - (T + 1) * n_done,
            size=batch_size,
            replace=True  # for speed
        )
        samples_raw = sorted(samples_raw)
        js = np.searchsorted(done_idxs_raw, samples_raw)
        offsets = done_idxs_raw[js] - samples_raw + T
        start_idxs = done_idxs_sorted[js] - offsets

        obses, actions, rewards = [], [], []

        for t in range(T):
            obses.append(self.obses[start_idxs + t])
            actions.append(self.actions[start_idxs + t])
            rewards.append(self.rewards[start_idxs + t])
            assert np.all(self.not_dones[start_idxs + t])

        obses = np.stack(obses)
        actions = np.stack(actions)
        rewards = np.stack(rewards).squeeze(2)

        if self.normalize_obs:
            mu, sigma = self.get_obs_stats()
            obses = (obses - mu) / sigma

        obses = torch.as_tensor(obses, device=self.device).float()
        actions = torch.as_tensor(actions, device=self.device)
        rewards = torch.as_tensor(rewards, device=self.device)

        return obses, actions, rewards

    def save_data(self, save_dir):
        if self.global_idx == self.global_last_save:
            return
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        path = os.path.join(
            save_dir, f'{self.global_last_save:08d}_{self.global_idx:08d}.pt')

        payload = list(zip(*self.payload))
        payload = [np.vstack(x) for x in payload]
        self.global_last_save = self.global_idx
        torch.save(payload, path)
        self.payload = []

    def load_data(self, save_dir):
        def parse_chunk(chunk):
            start, end = [int(x) for x in chunk.split('.')[0].split('_')]
            return (start, end)

        self.idx = 0

        chunks = os.listdir(save_dir)
        chunks = filter(lambda fname: 'stats' not in fname, chunks)
        chunks = sorted(chunks, key=lambda x: int(x.split('_')[0]))

        self.full = self.global_idx > self.capacity
        global_beginning = self.global_idx - self.capacity if self.full else 0

        for chunk in chunks:
            global_start, global_end = parse_chunk(chunk)
            if global_start >= self.global_idx:
                continue
            start = global_start - global_beginning
            end = global_end - global_beginning
            if end <= 0:
                continue

            path = os.path.join(save_dir, chunk)
            payload = torch.load(path)
            if start < 0:
                payload = [x[-start:] for x in payload]
                start = 0
            assert self.idx == start

            obses = payload[0]
            next_obses = payload[1]

            self.obses[start:end] = obses
            self.next_obses[start:end] = next_obses
            self.actions[start:end] = payload[2]
            self.rewards[start:end] = payload[3]
            self.not_dones[start:end] = payload[4]
            self.not_dones_no_max[start:end] = payload[5]
            self.idx = end

        self.last_save = self.idx

        if self.full:
            assert self.idx == self.capacity
            self.idx = 0

        last_idx = self.capacity if self.full else self.idx
        self.done_idxs = SortedSet(np.where(1. - self.not_dones[:last_idx])[0])
Example #22
0
class Solver:
    def __init__(self, var_count, clause_count):
        self.var_count = var_count
        self.clause_count = clause_count
        self.clauses = []
        self.unary_clauses = []
        self.curr_level = 0  # Current depth of the decision tree
        self.max_level = 0

        # Since variables are 1-indexed, size of these lists if (var_count + 1)
        # curr_assignment gives the latest assignment of a variable
        self.curr_assignment = [LiteralState.L_UNASSIGNED] * (var_count + 1)
        self.curr_literal_assignment = [LiteralState.L_UNASSIGNED
                                        ] * (2 * var_count + 1)
        # prev_assignment is used in PHASE SAVING
        self.prev_assignment = [-1] * (var_count + 1)
        # The level the variable was assigned at (if at all)
        self.assignment_level = [-1] * (var_count + 1)
        # A stack of all assigned variables in current path, most recently assigned variables are at top
        self.assigned_till_now = []
        self.assignments_upto_level = [
            0
        ]  # How many assignments had happened upto a level?
        self.conflicts_upto_level = [
            0
        ]  # How many conflicts hence clauses learned upto a level?
        self.antecedent = [-1] * (var_count + 1)
        self.score2var = SortedSet()

        self.bcp_stack = []
        # watch_map: literal -> list of clauses for which this literal is the watcher
        self.watch_map = {}

        # Used in MINISAT decision heuristic explained in decider()
        self.increment_value = 1.0
        self.activity = [0.0] * (var_count + 1)

        # Used in restart optimisation explained in reset_state()
        self.restart_threshold = CONSTANTS.RESTART_LOWER_BOUND
        self.restart_upper_bound = CONSTANTS.RESTART_UPPER_BOUND_BASE

        # Statistics
        self.restart_count = 0
        self.learnt_clauses_count = 0
        self.decision_count = 0
        self.assignments_count = 0
        self.global_max_score = 0.0

    def assign_variable(self, var: int, assignment: LiteralState):
        self.curr_assignment[var] = assignment
        if assignment != LiteralState.L_UNASSIGNED:
            self.prev_assignment[var] = assignment
        self.curr_literal_assignment[get_literal(var)] = assignment
        neg_assignment = LiteralState.L_UNASSIGNED
        if assignment == LiteralState.L_TRUE:
            neg_assignment = LiteralState.L_FALSE
        elif assignment == LiteralState.L_FALSE:
            neg_assignment = LiteralState.L_TRUE
        self.curr_literal_assignment[get_literal(-1 * var)] = neg_assignment

    def bump_var_score(self, var: int, increment_value=0.0):
        if increment_value > 0:
            self.score2var.discard((self.activity[var], var))
            self.activity[var] += increment_value
        self.score2var.add((self.activity[var], var))

    def print_clauses(self):
        print("{} variables, {} clauses".format(self.var_count,
                                                self.clause_count))
        for clause_id, clause in enumerate(self.clauses):
            clause.print(clause_id)

    def print_curr_assignment(self):
        assignment = "State: "
        for var, state in enumerate(self.curr_assignment):
            if (var == 0):
                continue
            assignment += ", {}: {}".format(var, state.value)
        print(assignment)

    # This fn is used to add the given clause to the watchlist of given literal
    def watch_this_clause(self, lit, clause_id):
        if lit in self.watch_map:
            self.watch_map[lit].add(clause_id)
        else:
            self.watch_map[lit] = set([clause_id])

    # Insert a new (input / learned) clause to the cnf
    def insert_clause(self, clause: Clause, first_watch, second_watch):
        self.clauses.append(clause)
        # Setup the two-watch mechanism, both these literals are guaranteed to be unassigned currently
        clause.first_watcher = first_watch
        clause.second_watcher = second_watch
        clause_id = len(self.clauses) - 1
        self.watch_this_clause(clause.get_first_watcher(), clause_id)
        self.watch_this_clause(clause.get_second_watcher(), clause_id)

        # In MINISAT decision heusristic:
        # Score of a varible is the number of clauses in it
        # Since we are inserting a clause, increase the scores of variables in this literal
        for literal in clause.literals:
            var = get_variable(literal)
            self.bump_var_score(var, self.increment_value)
            # self.activity[var] += self.increment_value

    # Function used to assign a literal TRUE in a unary clause
    # These assignments are never reset hence not put in assigned_till_now[]
    def assert_unary_literal(self, lit):
        self.assignments_count += 1
        var = get_variable(lit)
        # Set state of the underlying variable
        if is_negative(lit):
            self.assign_variable(var, LiteralState.L_FALSE)
            # self.curr_assignment[var] = LiteralState.L_FALSE
        else:
            self.assign_variable(var, LiteralState.L_TRUE)
            # self.curr_assignment[var] = LiteralState.L_TRUE
        self.assignment_level[var] = 0  # Always done at ground level

    # Function used to assign a literal TRUE in a non-unary clause
    # Note that current level is important here
    def assert_nonunary_literal(self, lit):
        self.assignments_count += 1
        self.assigned_till_now.append(lit)
        var = get_variable(lit)
        if is_negative(lit):
            self.assign_variable(var, LiteralState.L_FALSE)
            # self.prev_assignment[var] = self.curr_assignment[var] = LiteralState.L_FALSE
        else:
            self.assign_variable(var, LiteralState.L_TRUE)
            # self.prev_assignment[var] = self.curr_assignment[var] = LiteralState.L_TRUE
        self.assignment_level[var] = self.curr_level

    """
    Function to implement Boolean Constant Propagation using Two-watcher optimisation:
    bcp_stack contains all the literals which have been assigned false in current search path.
    Since we know these literals can now change the state of other clauses.
    A naive approach of bcp would be iterate every clause to find a unit/unsatisifed clause.
    If found, repreat the process again else, stop and start guessing some variables in decider()
    """

    def bcp(self) -> (SolverState, int):
        # print("Running BCP with stack", self.bcp_stack)
        conflicting_clause_id = -1
        while (self.bcp_stack):
            # Got a literal with FALSE assignment
            lit = self.bcp_stack.pop()
            assert self.curr_literal_assignment[lit] == LiteralState.L_FALSE
            # assert self.get_literal_status(lit) == LiteralState.L_FALSE

            if lit not in self.watch_map:
                self.watch_map[lit] = set()
            new_watch_list = copy.copy(
                self.watch_map[lit])  # Backup watch list of lit

            # Traverse only the watchlist of that clause to save computation
            for clause_id in self.watch_map[lit]:
                clause = self.clauses[clause_id]

                # This block determines which watcher (1st / 2nd) was lit
                first_watch = clause.get_first_watcher()
                second_watch = clause.get_second_watcher()
                lit_is_first = (lit == first_watch)
                other_watch = second_watch if lit_is_first else first_watch
                # Now that we know lit has been assigned FALSE, we need to find another watcher
                new_clause_state, new_watch_loc = clause.change_watch_location(
                    self, lit_is_first, other_watch)

                # clause has one more literal FALSE, this might change a state
                if (new_clause_state == ClauseState.C_SATISFIED):
                    pass
                elif (new_clause_state == ClauseState.C_UNIT):
                    # If the clause had become unit, we have got another implication here
                    self.assert_nonunary_literal(other_watch)
                    var = get_variable(other_watch)
                    self.antecedent[var] = clause_id
                    self.bcp_stack.append(get_opposite_literal(other_watch))
                elif (new_clause_state == ClauseState.C_CONFLICTING):
                    # All the literals of this clause became false, we have a conflict, need to backtrack
                    # If the conflict occured at ground level, we have a unsatisfiable cnf like (x) ^ (-x)
                    if self.curr_level == 0:
                        return SolverState.S_UNSATISFIED, conflicting_clause_id
                    conflicting_clause_id = clause_id
                    # Clear bcp_stack as a backtrack is coming, which will unassign several variables
                    # As such some information in bcp_state is likely to become stale
                    self.bcp_stack.clear()
                    break
                elif (new_clause_state == ClauseState.C_UNRESOLVED):
                    # The clause is still unresolved as we have found another watcher
                    # Remove this clause from watch list of current lit
                    new_watch_list.remove(clause_id)
                    new_watcher = clause.literals[new_watch_loc]
                    self.watch_this_clause(new_watcher, clause_id)

            # new_watch_list contains the clauses for which lit is still the watcher
            # Note that in case of backtrack, we dot need to revert the watchers in two-watcher method
            # since in backtracking, some variables will be unassigned, enforcing the two-watch invariant
            self.watch_map[lit].clear()
            self.watch_map[lit] = new_watch_list
            if (conflicting_clause_id >= 0):
                return SolverState.S_CONFLICT, conflicting_clause_id
        return SolverState.S_UNRESOLVED, conflicting_clause_id

    """
    This function is for the PHASE-SAVING heuristic
    In decider() after the variable to be guessed has been selected, we then
    need it set it to TRUE of FALSE. Phase-saving says we should set it to our
    previous assignment if any.  
    """

    def get_lit_memoised(self, var: int) -> int:
        prev_state = self.prev_assignment[var]
        if (prev_state == LiteralState.L_TRUE):
            return get_literal(var)
        else:
            return get_literal(-1 * var)

    """
    decide() function selects the next variable to be guessed and the guessed value.
    Based on MINISAT decision heuristic. Results in increment of current level.  
    Score of a varible is the number of clauses in it.
    """

    def decide(self) -> SolverState:  # MINISAT based decision heuristic
        # print("Running decider")
        # self.print_curr_assignment()
        # print("Activity: ", self.activity)
        # Find an unassigned one with maximum score
        # Some inputs have unused variables, so we select only those with positive score.
        selected_lit = 0
        unassigned_var_found = False
        while self.score2var:
            max_score, var = self.score2var.pop()
            self.global_max_score = max(self.global_max_score, max_score)
            if self.curr_assignment[var] == LiteralState.L_UNASSIGNED:
                unassigned_var_found = True
                selected_lit = self.get_lit_memoised(var)
                break

        if not unassigned_var_found:
            return SolverState.S_SATISFIED
        # print(selected_lit, selected_var, max_activity_till_now)
        assert selected_lit != 0
        self.decision_count += 1
        self.curr_level += 1
        # We need to track this new assignment
        if (self.curr_level > self.max_level):
            # This branch is separate since we are at a new decision level,
            # so push_back is required instead of update
            self.max_level = self.curr_level
            self.assignments_upto_level.append(len(self.assigned_till_now))
            self.conflicts_upto_level.append(self.learnt_clauses_count)
        else:
            self.assignments_upto_level[self.curr_level] = len(
                self.assigned_till_now)
            self.conflicts_upto_level[
                self.curr_level] = self.learnt_clauses_count

        # Now we assign the literal as TRUE, and since put the (FALSE) opposite literal to bcp stack
        self.assert_nonunary_literal(selected_lit)
        self.bcp_stack.append(get_opposite_literal(selected_lit))
        return SolverState.S_UNRESOLVED

    """
    analyse_conflict() takes a conflicting clause and returns the level to backtrack to, and a learned clause
    We use the nearest UIP (Unique Implication Point) finding method as highlighted in Kroening's book.
    """

    def analyze_conflict(self, conflicting_clause: Clause) -> (int, int):
        # print("Running analyse_conflict")
        curr_literals = [lit for lit in conflicting_clause.literals]
        learned_clause = Clause([])
        backtrack_level = 0  # to be returned by this function
        to_resolve_count = 0
        watch_lit = 0  # a watcher for the new learned literal

        marked = [False] * (self.var_count + 1)
        trail_index = len(self.assigned_till_now) - 1
        resolve_lit = 0
        resolve_var = 0
        iter = 0
        """
        This loop outputs the learned clause, it works as follows:
        Invariant 1: curr_literals is the clause to be fused into learned_clause
        Invariant 2: learned caluse contains exactly one variable assigned at current level (UIP)
        All other literals are assigned before.
        """
        while (iter == 0 or to_resolve_count > 0):
            iter += 1
            for lit in curr_literals:
                var = get_variable(lit)
                if marked[var]:
                    continue
                marked[var] = True
                if (self.assignment_level[var] == self.curr_level):
                    to_resolve_count += 1
                else:
                    learned_clause.insert_literal(lit)
                    if (self.assignment_level[var] > backtrack_level):
                        # watch_lit: 2nd highest assigment level, first is UIP
                        backtrack_level = self.assignment_level[var]
                        watch_lit = len(learned_clause.literals) - 1
            # Find a variable to be resolved by traversing the recently assigned literals first
            while (trail_index >= 0):
                resolve_lit = self.assigned_till_now[trail_index]
                resolve_var = get_variable(resolve_lit)
                trail_index -= 1
                if marked[resolve_var]:
                    break
            marked[resolve_var] = False
            to_resolve_count -= 1
            if not to_resolve_count:
                # Just one literal remaining with current level assignment, we are done
                continue
            antecedent_id = self.antecedent[resolve_var]
            curr_literals = [
                lit for lit in self.clauses[antecedent_id].literals
                if lit != resolve_lit
            ]

        # The learned clause becomes an unit clause after backtracking
        # This is because every other literal in the learned clause was assigned before
        # the backtrack level
        # resolve_lit is an UIP
        self.learnt_clauses_count += 1
        opposite_resolv_lit = get_opposite_literal(resolve_lit)
        learned_clause.insert_literal(opposite_resolv_lit)
        self.increment_value /= CONSTANTS.VAR_DECAY_RATE
        if learned_clause.is_unary:
            # Not that we are inserting to bcp_stack without asserting UIP
            # Asserting will be done immediately after backtrack (see backtrack())
            self.bcp_stack.append(resolve_lit)
            self.unary_clauses.append(learned_clause)
        else:
            self.bcp_stack.append(resolve_lit)
            self.insert_clause(learned_clause, watch_lit,
                               len(learned_clause.literals) - 1)
        # for lit in learned_clause.literals:
        #     var = get_variable(lit)
        #     print("({}, {})".format(var, self.assignment_level[var]))
        return backtrack_level, opposite_resolv_lit

    # RESTART heuristic, reset all assignments except ground level and start afresh
    # Note that learned claused are not deleted only we start assignments from the beginning
    def reset_state(self):
        # print("Restart")
        """
        The threshold system works as follows eg. (chainsaw graph)
        1. We have a range [1, 10]. Threshold increases after every restart till it crosses the ub
        2. At that point the threshold is rest and the range is also increased to let it go even higher
        """
        self.restart_count += 1
        self.restart_threshold = int(self.restart_threshold *
                                     CONSTANTS.THRESHOLD_MULTIPLIER)
        if (self.restart_threshold > self.restart_upper_bound):
            self.restart_threshold = CONSTANTS.RESTART_LOWER_BOUND
            self.restart_upper_bound = int(self.restart_upper_bound *
                                           CONSTANTS.THRESHOLD_MULTIPLIER)

        # Resets are similar to backtrack() function below, except that it resets evrything to ground level
        for var in range(1, self.var_count + 1):
            if (self.assignment_level[var] > 0):
                self.assign_variable(var, LiteralState.L_UNASSIGNED)
                # self.curr_assignment[var] = LiteralState.L_UNASSIGNED
                self.bump_var_score(var)

        self.bcp_stack.clear()
        self.assigned_till_now.clear()
        self.assignments_upto_level = [0]
        self.conflicts_upto_level = [0]
        self.curr_level = 0
        self.max_level = 0

    # Function to backtrack based on the output of analyse_conflict()
    def backtrack(self, k: int, uip_lit):
        # print("Running backtrack")
        # Invoke restart heuristic if too many clauses have been learnt after backtrack target level
        if k > 0 and (self.learnt_clauses_count - self.conflicts_upto_level[k]
                      > self.restart_threshold):
            self.reset_state()
            return
        # Iterate over the variables assigned at level >= k + 1 and unassign them
        for index in range(self.assignments_upto_level[k + 1],
                           len(self.assigned_till_now)):
            var = get_variable(self.assigned_till_now[index])
            if (self.assignment_level[var] > k):
                self.assign_variable(var, LiteralState.L_UNASSIGNED)
                # self.curr_assignment[var] = LiteralState.L_UNASSIGNED
                self.bump_var_score(var)

        # analyse_function() returns an asserting clause with the UIP just ready for assignment
        # This helps to immediately put the learnt clause into practice
        self.assigned_till_now = self.assigned_till_now[:self.
                                                        assignments_upto_level[
                                                            k + 1]]
        self.curr_level = k
        if k == 0:
            # We had learnt a unary clause
            self.assert_unary_literal(uip_lit)
        else:
            self.assert_nonunary_literal(uip_lit)
        self.antecedent[get_variable(uip_lit)] = len(self.clauses) - 1

    # Function to verify output assignment if any
    def verify_assignment(self):
        non_true_clauses = []
        all_clauses = self.clauses + self.unary_clauses
        # Every clause including learnt and unary must have atleast one TRUE literal
        for clause in all_clauses:
            true_literal_found = False
            for lit in clause.literals:
                if self.curr_literal_assignment[lit] == LiteralState.L_TRUE:
                    # if (self.get_literal_status(lit) == LiteralState.L_TRUE):
                    true_literal_found = True
                    break
            if not true_literal_found:
                non_true_clauses.append(clause)
        if not non_true_clauses:
            print("AC, All clauses evaluate to true under given assignment")
        else:
            print("WA, {} unsatisfied clauses found".format(
                len(non_true_clauses)))

    """
    Function implementing the standard CDCL framework:
        1. [Outer Loop] Run BCP and decide() alternately, bcp first because of unary clauses
        2. [INNER LOOP] Run till BCP gives UNRESOLVED result on which point guesswork must be done.
        If bcp encounters conflict a analyse, backtrack pair is done
    """

    def run_cdcl(self) -> SolverState:
        result: SolverState
        while (True):
            while (True):
                result, conflicting_clause_id = self.bcp()
                # print("BCP result was {}".format(result))
                if (result == SolverState.S_UNSATISFIED):
                    return result
                if (result == SolverState.S_CONFLICT):
                    assert conflicting_clause_id != -1
                    backtrack_level, uip_lit = self.analyze_conflict(
                        self.clauses[conflicting_clause_id])
                    # print("Analyze result was k = {}, uip = {}".format(backtrack_level, uip_lit))
                    self.backtrack(backtrack_level, uip_lit)
                else:
                    break
            result = self.decide()
            # print("Decide result was {}".format(result))
            if (result == SolverState.S_UNSATISFIED
                    or result == SolverState.S_SATISFIED):
                return result

    # Wrapper function to print the result of the CDCL framework
    def solve(self):
        # print("Solving")
        result: SolverState = self.run_cdcl()
        if (result == SolverState.S_SATISFIED):
            print("SATISFIABLE")
            self.verify_assignment()
            with open("assignment.txt", 'w') as assignment_file:
                for var, state in enumerate(self.curr_assignment):
                    if (var == 0):
                        assignment_file.write("State: ")
                        continue
                    assignment_file.write("{} ".format(
                        -1 * var if state == LiteralState.L_FALSE else var))
        else:
            print("UNSATISFIABLE")

    def print_statistics(self, solve_time):
        print("## Statistics: ")
        print("# Restarts: ", self.restart_count)
        print("# Learned clauses: ", self.learnt_clauses_count)
        print("# Decisions: ", self.decision_count)
        print("# Implications: ", self.assignments_count - self.decision_count)
        print("# Max score: ", self.global_max_score)
        print("# Time (s): ", solve_time)
Example #23
0
class Scheduler(object):
    '''
    This scheduler executes Tasks taking into account their dependencies and worker locality.

    Worker assignment takes into account:
     * concurrency (how many tasks must a worker execute concurrently)
     * and worker locality (0 is indifferent, -1 is forbidden, 1+ increasing locality)
       as locality 0 is likely to be common, this is assumed throughout the scheduler
       to reduce the memory cost for scheduling

    The most important component in the computational complexity of the scheduler is the number of
    dependencies to track. Many-to-many dependencies should be kept to the thousands or tens of
    thousands (i.e. 100 * 100 tasks). Such issues can be resolved by introducing a 'barrier task'
    as is done in bndl.compute (this reduced the number of dependencies to n+m instead of n*m).
    '''
    def __init__(self, tasks, done, workers, concurrency=1, attempts=1):
        '''
        Execute tasks in the given context and invoke done(task) when a task completes.

        :param tasks: iterable[task]
        :param done: callable(task)
            Invoked when a task completes. Must be thread safe. May be called multiple times
            if a task is reran (e.g. in case a worker fails). done(None) is called to signal
            completion of the last task.
        :param: workers: sequence[Peer]
            Sequence of workers to execute on.
        :param: concurrency: int (defaults to 1)
            @see: bndl.execute.concurrency
        :param: attempts: int (defaults to 1)
            @see: bndl.execute.attempts
        '''
        self.tasks = OrderedDict(
            (task.id, task)
            for task in sorted(tasks, key=lambda t: t.priority))
        if len(self.tasks) == 0:
            raise ValueError('Tasks must provide at least one task to execute')
        if len(self.tasks) < len(tasks):
            raise ValueError('Tasks must have a unique task ID')

        for task in tasks:
            task.add_listener(noop, self.task_done)

        self.done = done
        self.workers = {worker.name: worker for worker in workers}

        if not self.workers:
            raise Exception('No workers available')

        self.concurrency = concurrency
        # failed tasks are retried on error, but they are executed at most attempts
        self.max_attempts = attempts

        # task completion is (may be) executed on another thread, this lock serializes access
        # on the containers below and workers_idle
        self.lock = RLock()
        # a condition is used to signal that a worker is available or the scheduler is aborted
        self.condition = Condition(self.lock)

    def run(self):
        logger.info('Executing job with %r tasks on %r workers',
                    len(self.tasks), len(self.workers))

        self._abort = False
        self._exc = None

        # containers for states a task can be in
        self.executable = SortedSet(
            key=lambda task: task.priority
        )  # sorted executable tasks (sorted by task.id by default)
        self.blocked = defaultdict(
            set)  # blocked tasks task -> dependencies executable or pending

        self.locality = {worker: {}
                         for worker in self.workers.keys()
                         }  # worker_name -> task -> locality > 0
        self.forbidden = defaultdict(set)  # task -> set[worker]
        # worker -> SortedList[task] in descending locality order
        self.executable_on = {
            worker: SortedSet(key=lambda task, worker=worker: -self.locality[
                worker].get(task, 0))
            for worker in self.workers.keys()
        }

        self.pending = set(
        )  # mapping of task -> worker for tasks which are currently in progress
        self.succeeded = set()  # tasks which have been executed successfully
        self.failures = defaultdict(
            int)  # failure counts per task (task -> int)

        # keep a FIFO queue of workers ready
        # and a list of idle workers (ready, but no more tasks to execute)
        self.workers_ready = deque(self.workers.keys())
        self.workers_idle = set()
        self.workers_failed = set()

        # perform scheduling under lock
        try:
            with self.lock:
                logger.debug(
                    'Calculating which tasks are executable, which are blocked and if there is locality'
                )

                # create list of executable tasks and set of blocked tasks
                for task in self.tasks.values():
                    for worker, locality in task.locality(
                            self.workers.values()) or ():
                        worker = worker.name
                        if locality < 0:
                            self.forbidden[task].add(worker)
                        elif locality > 0:
                            self.locality[worker][task] = locality
                            self.executable_on[worker].add(task)

                for task in self.tasks.values():
                    if task.succeeded:
                        self.succeeded.add(task)
                        self.done(task)
                    elif task.dependencies:
                        remaining = set(dep for dep in task.dependencies
                                        if not dep.succeeded)
                        if remaining:
                            self.blocked[task] = remaining
                        else:
                            self.executable.add(task)
                    else:
                        self.executable.add(task)

                if not self.executable:
                    raise Exception(
                        'No tasks executable (all tasks have dependencies)')
                if not self.workers_ready:
                    raise Exception(
                        'No workers available (all workers are forbidden by all tasks)'
                    )

                logger.debug(
                    'Starting %r tasks (%r tasks blocked) on %r workers (%r tasks already done)',
                    len(self.executable), len(self.blocked),
                    len(self.workers_ready), len(self.succeeded))

                while True:
                    # wait for a worker to become available (signals task completion
                    self.condition.wait_for(
                        lambda: self.workers_ready or self._abort)

                    if self._abort:
                        # the abort flag can be set to True to break the loop (in case of emergency)
                        for task in self.tasks.values():
                            if task in self.pending:
                                task.cancel()
                        break

                    worker = self.workers_ready.popleft()

                    if worker in self.workers_failed:
                        # the worker is 'ready' (a task was 'completed'), but with an error
                        # or the worker was marked as failed because another task depended on an output
                        # on this worker and the dependency failed
                        continue
                    elif not (self.executable or self.pending):
                        if logger.isEnabledFor(logging.DEBUG):
                            logger.debug(
                                'No more tasks to execute or pending (%r tasks blocked)',
                                sum(1 for _ in filter(None,
                                                      self.blocked.values())))
                        break
                    else:
                        task = self.select_task(worker)
                        if task:
                            # execute a task on the given worker and add the task_done callback
                            # the task is added to the pending set
                            try:
                                # assert task in self.executable, '%r is not executable' % task
                                # assert task not in self.succeeded, '%r already executed successfully' % task
                                # assert task not in self.pending, '%r already pending' % task
                                # assert not task.pending, '%r already pending' % task
                                # assert not task.done or task.failed, '%r done or failed' % task
                                # assert not self.blocked[task], '%r blocked' % task

                                # assert self.locality[worker].get(task, 0) >= 0, '%r forbidden on %r' % (task, worker)
                                # assert all(dep.succeeded for dep in task.dependencies), 'not all dependencies of %r succeeded' % task
                                # assert all(dep.id not in self.tasks or self.blocked[dep] for dep in task.dependents), \
                                #        'not all dependents of %r blocked' % task

                                self.executable.remove(task)
                                self.executable_on[worker].discard(task)
                                self.pending.add(task)
                                if logger.isEnabledFor(logging.DEBUG):
                                    logger.debug(
                                        '%r executing on %r with locality %r',
                                        task, worker,
                                        self.locality[worker].get(task, 0))
                                task.execute(self, self.workers[worker])
                            except CancelledError:
                                pass
                            except AssertionError:
                                raise
                            except Exception as exc:
                                task.mark_failed(exc)
                                self.task_done(task)
                        else:
                            self.workers_idle.add(worker)

        except Exception as exc:
            self._exc = exc

        if self._exc:
            logger.info('Failed after %r tasks with %s: %s',
                        len(self.succeeded), self._exc.__class__.__name__,
                        self._exc)
            self.done(self._exc)
        elif self._abort:
            logger.info('Aborted after %r tasks', len(self.succeeded))
            self.done(Exception('Scheduler aborted'))
        else:
            logger.info('Completed %r tasks', len(self.succeeded))

        # always issue None (to facilitate e.g. iter(queue.get, None))
        self.done(None)

    def abort(self, exc=None):
        if exc is not None:
            self._exc = exc
        self._abort = True
        with self.lock:
            self.condition.notify_all()

    def select_task(self, worker):
        if not self.executable:
            return None

        # select a task for the worker
        worker_queue = self.executable_on[worker]
        for task in list(worker_queue):
            if task in self.pending or task in self.succeeded:
                # task executed by another worker
                worker_queue.remove(task)
            elif task in self.executable:
                return task
            elif self.blocked[task]:
                pass
            else:  # task not executable
                logger.error(
                    '%r not executable, blocked, pending nor executed', task)
                # assert False, '%r not executable, blocked, pending nor executed' % task

        # no task available with locality > 0
        # find task which is allowed to execute on this worker
        for task in self.executable:
            if worker not in self.forbidden[task]:
                return task

    def set_executable(self, task):
        if task.id not in self.tasks:
            return

        # assert not self.blocked[task], '%r isn\'t executable because it is blocked'
        # assert all(dep.succeeded for dep in task.dependencies), 'not all dependencies of %r succeeded: %r' \
        #     % (task, [dep for dep in task.dependencies if not dep.succeeded])
        # assert task not in self.succeeded, '%r already succeeded'

        if task in self.executable or task in self.pending or task.succeeded:
            return

        # calculate for each worker which tasks are forbidden or which have locality
        for worker in self.workers.keys():
            # don't bother with 'failed' workers
            if worker not in self.workers_failed:
                locality = self.locality[worker].get(task, 0)
                if locality >= 0:
                    # make sure the worker isn't 'stuck' in the idle set
                    if worker in self.workers_idle:
                        self.workers_idle.remove(worker)
                        for _ in range(self.concurrency):
                            self.workers_ready.append(worker)
                        self.condition.notify()

                    # the task has a preference for this worker
                    if locality > 0:
                        self.executable_on[worker].add(task)

        # check if there is a worker allowed to execute the task
        if len(self.forbidden[task]) == len(self.workers):
            raise Exception('%r cannot be executed on any available workers' %
                            task)

        # add the task to the executable queue
        self.executable.add(task)

    def task_done(self, task):
        '''
        When a task completes, delete it from pending, add it to done
        and set dependent tasks as executable if this task was the last dependency.
        Reschedule failed tasks or abort scheduling if failed to often.
        '''
        if not task.done:
            return

        try:
            # nothing to do, scheduling was aborted
            if self._abort:
                return

            with self.lock:
                self.pending.discard(task)

                if task.failed:
                    self.task_failed(task)
                else:
                    # assert task.succeeded, '%r not failed and not succeeded' % task
                    # assert task not in self.succeeded, '%r completed while already in succeeded list' % task
                    if logger.isEnabledFor(logging.DEBUG):
                        logger.debug('%r was executed on %r', task,
                                     task.executed_on_last())
                    # add to executed and signal done
                    self.succeeded.add(task)
                    self.done(task)
                    # check for unblocking of dependents
                    for dependent in task.dependents:
                        blocked_by = self.blocked[dependent]
                        blocked_by.discard(task)
                        if not blocked_by and dependent:
                            if dependent in self.succeeded:
                                logger.debug(
                                    '%r unblocked because %r was executed, but already succeeded',
                                    dependent, task)
                            else:
                                logger.debug(
                                    '%r unblocked because %r was executed',
                                    dependent, task)
                                self.set_executable(dependent)

                self.workers_ready.append(task.executed_on_last())
                self.condition.notify()
        except Exception as exc:
            logger.exception('Unable to handle task completion of %r on %r',
                             task, task.executed_on_last())
            self.abort(exc)

    def task_failed(self, task):
        # in these cases we consider the task already re-scheduled
        if task in self.executable:
            logger.debug('%r failed with %s, but already marked as executable',
                         task,
                         type(root_exc(task.exception())).__name__)
            return
        elif task in self.pending:
            logger.debug('%r failed with %s, but already pending', task,
                         type(root_exc(task.exception())))
            return

        # assert task.failed, "Can't reschedule task %r which hasn't failed." % task

        exc = root_exc(task.exception())

        if isinstance(exc, DependenciesFailed):
            # assert task not in self.succeeded, 'Dependencies of %r failed which already completed successfully' % task
            if logger.isEnabledFor(logging.INFO):
                logger.info(
                    '%r failed on %s because %r failed, rescheduling', task,
                    task.executed_on_last(), ', '.join(
                        worker + ': ' + ','.join(map(str, dependencies))
                        for worker, dependencies in exc.failures.items()))

            for worker, dependencies in exc.failures.items():
                for task_id in dependencies:
                    try:
                        dependency = self.tasks[task_id]
                    except KeyError as e:
                        logger.error(
                            'Received DependenciesFailed for unknown task with id %r',
                            task_id)
                        self.abort(e)
                    else:
                        # mark the worker as failed
                        executed_on_last = dependency.executed_on_last()
                        if not worker or worker == executed_on_last:
                            if worker == executed_on_last:
                                logger.info(
                                    'Marking %r as failed for dependency %s of %s',
                                    worker, dependency, task)
                                self.workers_failed.add(worker)
                                self.workers_idle.discard(worker)
                            dependency.mark_failed(FailedDependency(worker))
                            self.task_failed(dependency)
                        else:
                            # this should only occur with really really short tasks where the failure of a
                            # task noticed by task b is already obsolete because of the dependency was already
                            # restarted (because another task also issued DependenciesFailed)
                            logger.info(
                                'Received DependenciesFailed for task with id %r and worker %r '
                                'but the task is last executed on %r', task_id,
                                worker, executed_on_last)

        elif isinstance(exc, FailedDependency):
            self.succeeded.discard(task)
            worker = exc.worker_failed
            if worker:
                logger.info(
                    '%r marked as failed post-hoc, marking %r as failed', task,
                    worker)
                self.workers_failed.add(worker)
                self.workers_idle.discard(worker)

        elif isinstance(exc, NotConnected):
            # mark the worker as failed
            if logger.isEnabledFor(logging.INFO):
                logger.info(
                    '%r failed with NotConnected, marking %r as failed', task,
                    task.executed_on_last())
            self.workers_failed.add(task.executed_on_last())

        else:
            self.failures[task] = failures = self.failures[task] + 1
            if failures >= self.max_attempts:
                logger.warning(
                    '%r failed on %r after %r attempts ... aborting', task,
                    task.executed_on_last(), len(task.executed_on))
                # signal done (failed) to allow bubbling up the error and abort
                self.done(task)
                self.abort(task.exception())
                return
            elif task.executed_on_last():
                logger.info('%r failed on %r with %s: %s, rescheduling', task,
                            task.executed_on_last(), exc.__class__.__name__,
                            exc)
                self.forbidden[task].add(task.executed_on_last())
            else:
                logger.info(
                    '%r failed before being executed with %s: %s, rescheduling',
                    task, exc.__class__.__name__, exc)

        # block its dependencies
        for dependent in task.dependents:
            # logger.debug('%r is blocked by %r because it failed', dependent, task)
            self.blocked[dependent].add(task)
            self.executable.discard(dependent)

        if len(self.workers_failed) == len(self.workers):
            self.abort(Exception('Unable to complete job, all workers failed'))

        if not self.blocked[
                task] and task not in self.executable and task not in self.pending:
            self.set_executable(task)
Example #24
0
class Agenda(object):
    def __init__(self, parse):
        self._parse = parse
        self._skipMC = False
        self._skipCompose = False
        self._mc_neighs = dict()
        self._compose_cnt = dict()
        self._agendaToScore = set()
        self._clustIdx_agenda = dict()
        self._inactiveAgenda_score = dict()
        self._activeAgenda_score = dict()
        self._scoreActiveAgenda = SortedSet()  # (float, SearchOp)
        self._minAbsCntObserved =  ParseParams.minAbsCnt \
                                 * (ParseParams.minAbsCnt-1)/2
        # self.logc = open("/Users/ben_ryan/Documents/DARPA ASKE/usp-code/genia_full/create_agenda.log", "a+")
        # self.logp = open("/Users/ben_ryan/Documents/DARPA ASKE/usp-code/genia_full/proc_agenda.log", "a+")

    def save_agenda(self, path):
        '''
            Save all objects necessary to recreate the current state of Agenda
        '''
        with open(path, 'wb') as f:
            pickle.dump({'saved_agenda': self}, f)

        return None

    def load_agenda(path, prs):
        '''
            Given a Parse object, load the saved state of an Agenda and 
            attach it, returning the updated Parse object.
        '''
        with open(path, 'rb') as f:
            sav = pickle.load(f)

        prs.agenda = sav['saved_agenda']
        prs.agenda._parse = prs

        return prs

    def createAgenda(self, verbose=False):
        if verbose:
            clust_cnt = len(Part.getClustPartRootNodeIds())
            milestones = set([x for x in range(1, 10, 1)])
            i = 0

        for clust_id in Part.getClustPartRootNodeIds():
            clust = Clust.getClust(clust_id)

            if clust.getType() != 'C':
                continue
            elif clust.isStop():
                continue

            # # self.logc.write("Adding to agenda for cluster {}\n".format(clust_id))
            self.addAgendaForNewClust(clust_id, verbose)

            if verbose:
                i += 1
                done = math.floor(i * 10 / clust_cnt)

                if done in milestones:
                    milestones.remove(done)
                    print("{}% complete.".format(done * 10))

        # self.logc.close()

        return None

    def addAgendaForNewClust(self, newClustIdx, verbose=False):
        part_node_ids = Part.getClustPartRootNodeIds()[newClustIdx]
        num_parts = len(part_node_ids)

        # if verbose:
        #     print("Updating agenda: {} possible operations.".format(num_parts*(num_parts-1)))

        if len(part_node_ids) > 1:
            for node_id in part_node_ids:
                part_1 = Part.getPartByRootNodeId(node_id)

                for node_id2 in part_node_ids:
                    if node_id <= node_id2:
                        break
                    part_2 = Part.getPartByRootNodeId(node_id2)

                    # self.logc.write("\tAdding parts {} and {} to agenda for cluster {}\n".format(node_id, node_id2, newClustIdx))
                    self.addAgendaAfterMergeClust(part_1, part_2)

        return None

    def addAgendaAfterMergeClust(self, part_1, part_2):
        # First, check that these parts belong to the same cluster
        assert part_1._clustIdx == part_2._clustIdx

        clustIdx = part_1._clustIdx

        # If they have parents, check whether the parents are in the same cluster
        # If not, look at merging their clusters, and if so, look at composing
        # the clusters for part_1 and its parent.

        if part_1.getParPart() is not None and part_2.getParPart() is not None:
            clustIdx1 = part_1.getParPart()._clustIdx
            clustIdx2 = part_2.getParPart()._clustIdx

            if clustIdx1 != clustIdx2:
                self.addAgendaMC(clustIdx1, clustIdx2, 2 * clustIdx + 1)
            else:
                self.addAgendaAbs(clustIdx1, clustIdx)

        # Next, get the arguments (children) of each part
        # Compare each argument in A) with each argument in B) - if they have
        # different clusters, look at merging them, and if they have the same
        # look at composing the clusters for part_1 and its argument(s).

        kids_1 = part_1.getArguments()
        kids_2 = part_2.getArguments()
        # # self.logc.write("\tAdding to agenda for kids of {} and {} in {}\n".format(part_1.getRelTreeRoot().getId(),
        #                                                                           part_2.getRelTreeRoot().getId(),
        #                                                                           clustIdx))

        for kid1 in kids_1.values():
            clustIdx1 = kid1._argPart._clustIdx

            for kid2 in kids_2.values():
                clustIdx2 = kid2._argPart._clustIdx

                if clustIdx1 != clustIdx2:
                    #print("Add agenda - Merge Clusters {} and {}".format(clustIdx1, clustIdx2))
                    self.addAgendaMC(clustIdx1, clustIdx2, 2 * clustIdx + 1)
                else:
                    #print("Add agenda - Compose Clusters {} and {}".format(clustIdx, clustIdx1))
                    self.addAgendaAbs(clustIdx, clustIdx1)

        return None

    def addAgendaMC(self, clustIdx1, clustIdx2, neighType):
        if not (self._skipMC or clustIdx1 == clustIdx2):
            type1 = Clust.getClust(clustIdx1).getType()
            type2 = Clust.getClust(clustIdx2).getType()

            if type2 == 'C' and type1 == 'C':
                op = SearchOp()
                op._op = SearchOp.OP_MERGE_CLUST
                op._clustIdx1 = min((clustIdx1, clustIdx2))
                op._clustIdx2 = max((clustIdx1, clustIdx2))

                if not self.moveAgendaToScore(op):
                    if op not in self._mc_neighs:
                        self._mc_neighs[op] = set()

                    if len(self._mc_neighs[op]) + 1 >= ParseParams.minMCCnt:
                        self._agendaToScore.add(op)
                        del self._mc_neighs[op]
                    else:
                        self._mc_neighs[op].add(neighType)

                    ## self.logc.write("\t\tMerge Op: {}; mc_neighs: {}, agendaToScore: {}\n".format(op, len(self._mc_neighs), len(self._agendaToScore)))

        return None

    def addAgendaAbs(self, parClustIdx, chdClustIdx):
        if not self._skipCompose:
            op = SearchOp()
            op._op = SearchOp.OP_COMPOSE
            op._parClustIdx = parClustIdx
            op._chdClustIdx = chdClustIdx

            if not self.moveAgendaToScore(op):
                if op not in self._compose_cnt:
                    self._compose_cnt[op] = 1

                if self._compose_cnt[op] + 1 >= self._minAbsCntObserved:
                    self._agendaToScore.add(op)
                    del self._compose_cnt[op]
                else:
                    self._compose_cnt[op] += 1

                ## self.logc.write("\t\tCompose Op: {}; compose_cnt: {}, agendaToScore: {}\n".format(op, len(self._compose_cnt), len(self._agendaToScore)))

        return None

    def moveAgendaToScore(self, op):
        #assert op in self._activeAgenda_score or op in self._inactiveAgenda_score

        if op in self._agendaToScore:
            return True

        if op in self._activeAgenda_score:
            score = self._activeAgenda_score[op]
            self._scoreActiveAgenda.discard((score, op))
            del self._activeAgenda_score[op]
            self._agendaToScore.add(op)

            return True
        elif op in self._inactiveAgenda_score:
            del self._inactiveAgenda_score[op]
            self._agendaToScore.add(op)

            return True

        return False

    def procAgenda(self, verbose=False):
        if verbose:
            print("Processing agenda with {} operations in queue.".format(
                len(self._agendaToScore)))
        ttlAgendaScored, ttlExecMC, ttlExecAbs = (0, 0, 0)
        i = 1

        while True:
            As = 0

            for op in self._agendaToScore:
                score = self._parse.scorer.scoreOp(op)
                if verbose:
                    print("<SCORE> {} score={}".format(op, score))
                As += 1

                if score < -200:
                    continue

                if verbose:
                    print("<Add Agenda> {} score={}".format(op, score))
                self.addAgenda(op, score)

            self._agendaToScore.clear()
            ttlAgendaScored = As + 1

            if len(self._scoreActiveAgenda) == 0:
                break

            score, op = next(reversed(self._scoreActiveAgenda))
            if verbose:
                print("Executing: {}, score={}".format(op, score))
            newClustIdx = self._parse.executor.executeOp(op)
            self.updateAgendaAfterExec(op, newClustIdx, verbose)

            if op._op == SearchOp.OP_COMPOSE:
                ttlExecAbs += 1
            elif op._op == SearchOp.OP_MERGE_CLUST:
                ttlExecMC += 1

            if verbose:
                print("Total op_compose: {}, Total op_merge_clust: {}".format(
                    ttlExecAbs, ttlExecMC))
            i += 1

            if verbose and i % 10 == 0:
                print("{} Processing agenda: {} loops".format(
                    datetime.now(), i))

        return None

    def addAgenda(self, op, score):
        ci1, ci2 = (-1, -1)

        if op._op == SearchOp.OP_MERGE_CLUST:
            ci1 = op._clustIdx1
            ci2 = op._clustIdx2
        elif op._op == SearchOp.OP_COMPOSE:
            ci1 = op._parClustIdx
            ci2 = op._chdClustIdx

        if ci1 not in self._clustIdx_agenda:
            self._clustIdx_agenda[ci1] = set()

        self._clustIdx_agenda[ci1].add(op)

        if ci2 not in self._clustIdx_agenda:
            self._clustIdx_agenda[ci2] = set()

        self._clustIdx_agenda[ci2].add(op)

        if score < ParseParams.priorCutOff:
            self._inactiveAgenda_score[op] = score
        else:
            self._activeAgenda_score[op] = score
            self._scoreActiveAgenda.add((score, op))

        return None

    def updateAgendaAfterExec(self, op, newClustIdx, verbose=False):
        self.removeAgenda(op)

        if newClustIdx >= 0:
            if op._op == SearchOp.OP_MERGE_CLUST:
                self.updateAgendaAfterExecMC(op, newClustIdx, verbose)
            elif op._op == SearchOp.OP_COMPOSE:
                self.updateAgendaAfterExecAbs(op, newClustIdx, verbose=verbose)

        return None

    def addAgendaToScore(self, op):
        self._agendaToScore.add(op)
        return None

    def updateAgendaAfterExecMC(self, op, newClustIdx, verbose=False):
        assert op._op == SearchOp.OP_MERGE_CLUST

        oldClustIdx = op._clustIdx2

        if oldClustIdx == newClustIdx:
            oldClustIdx = op._clustIdx1

        while len(self._clustIdx_agenda[oldClustIdx]) > 0:
            oop = next(iter(self._clustIdx_agenda[oldClustIdx]))
            self.removeAgenda(oop)

            if oop._op == SearchOp.OP_MERGE_CLUST:
                ci1 = oop._clustIdx1
                ci2 = oop._clustIdx2

                if ci1 == oldClustIdx:
                    ci1 = newClustIdx

                if ci2 == oldClustIdx:
                    ci2 = newClustIdx

                if ci1 != ci2:
                    nop = oop
                    nop._clustIdx1 = min((ci1, ci2))
                    nop._clustIdx2 = max((ci1, ci2))
                    nop.genString()
                    self.addAgendaToScore(nop)
            elif oop._op == SearchOp.OP_COMPOSE:
                ci1 = oop._parClustIdx
                ci2 = oop._chdClustIdx

                if ci1 == oldClustIdx:
                    ci1 = newClustIdx

                if ci2 == oldClustIdx:
                    ci2 = newClustIdx

                nop = oop
                nop._parClustIdx = ci1
                nop._chdClustIdx = ci2
                nop.genString()
                self.addAgendaToScore(nop)

        del self._clustIdx_agenda[oldClustIdx]

        num_parts_old = len(Part.getClustPartRootNodeIds()[oldClustIdx])
        num_parts_new = len(Part.getClustPartRootNodeIds()[newClustIdx])

        if verbose:
            print("Updating agenda: {} possible operations.".format(
                num_parts_new * (num_parts_old)))

        for prnid in Part.getClustPartRootNodeIds()[newClustIdx]:
            p = Part.getPartByRootNodeId(prnid)

            for prnid2 in Part.getClustPartRootNodeIds()[oldClustIdx]:
                p2 = Part.getPartByRootNodeId(prnid2)
                self.addAgendaAfterMergeClust(p, p2)

        return None

    def updateAgendaAfterExecAbs(self,
                                 op,
                                 newClustIdx,
                                 oop=None,
                                 verbose=False):
        if op._op == SearchOp.OP_COMPOSE:
            parClustIdx = op._parClustIdx
            chdClustIdx = op._chdClustIdx

            while len(self._clustIdx_agenda[parClustIdx]) > 0:
                oop = next(iter(self._clustIdx_agenda[parClustIdx]))
                self.removeAgenda(oop)
                # oop.genString()
                self.addAgendaToScore(oop)

            while len(self._clustIdx_agenda[chdClustIdx]) > 0:
                oop = next(iter(self._clustIdx_agenda[chdClustIdx]))
                self.removeAgenda(oop)
                # oop.genString()
                self.addAgendaToScore(oop)

            self.addAgendaForNewClust(newClustIdx, verbose)
        # elif oop is not None:
        #     ci1, ci2 = (-1, -1)

        #     if oop._op == SearchOp.OP_MERGE_CLUST:
        #         ci1 = oop._clustIdx1
        #         ci2 = oop._clustIdx2
        #     elif oop._op == SearchOp.OP_COMPOSE:
        #         ci1 = oop._parClustIdx
        #         ci2 = oop._chdClustIdx

        #     if ci1 in (op._parClustIdx, op._chdClustIdx):
        #         ci1 = newClustIdx

        #     if ci2 in (op._parClustIdx, op._chdClustIdx):
        #         ci2 = newClustIdx

        #     if oop._op == SearchOp.OP_MERGE_CLUST:
        #         if ci1 != ci2:
        #             nop = SearchOp()
        #             nop._clustIdx1 = min((ci1, ci2))
        #             nop._clustIdx2 = max((ci1, ci2))
        #             nop._op = oop._op
        #             self.addAgendaToScore(nop)
        #     elif oop._op == SearchOp.OP_COMPOSE:
        #         nop = SearchOp()
        #         nop._parClustIdx = ci1
        #         nop._chdClustIdx = ci2
        #         nop._op = oop._op
        #         self.addAgendaToScore(nop)

        return None

    def removeAgenda(self, op):
        # assert (op in self._activeAgenda_score or op in self._inactiveAgenda_score)

        if op in self._activeAgenda_score:
            score = self._activeAgenda_score[op]
            self._scoreActiveAgenda.discard((score, op))
            del self._activeAgenda_score[op]
        elif op in self._inactiveAgenda_score:
            del self._inactiveAgenda_score[op]

        if op._op == SearchOp.OP_MERGE_CLUST:
            self._clustIdx_agenda[op._clustIdx1].discard(op)
            self._clustIdx_agenda[op._clustIdx2].discard(op)
        elif op._op == SearchOp.OP_COMPOSE:
            self._clustIdx_agenda[op._parClustIdx].discard(op)
            self._clustIdx_agenda[op._chdClustIdx].discard(op)

        return None
Example #25
0
class Display:
    def __init__(self, interface, dimensions):
        self.logger = logging.getLogger(__name__)

        self.interface = interface
        self.dimensions = dimensions

        (rows, columns) = self.dimensions

        self.buffer = bytearray(rows * columns)
        self.dirty = SortedSet()

        self.address_counter = None

        self.status_line = StatusLine(self)

        self.cursor_reverse = False
        self.cursor_blink = False

    def move_cursor(self, index=None, row=None, column=None, force_load=False):
        """Load the address counter."""
        address = self._calculate_address(index=index, row=row, column=column)

        # TODO: Verify that the address is within range - exclude status line.

        return self._load_address_counter(address, force_load)

    def buffered_write(self, byte, index=None, row=None, column=None):
        if index is None:
            if row is None or column is None:
                raise ValueError('Either index or row and column is required')

            index = self._get_index(row, column)

        # TODO: Verify that index is within range.

        if self.buffer[index] == byte:
            return False

        self.buffer[index] = byte

        self.dirty.add(index)

        return True

    def flush(self):
        for (start_index, end_index) in self._get_dirty_ranges():
            self._flush_range(start_index, end_index)

    def clear(self, clear_status_line=False):
        """Clear the screen."""
        (rows, columns) = self.dimensions

        if clear_status_line:
            address = 0
            count = (rows + 1) * columns
        else:
            address = columns
            count = rows * columns

        self._write((b'\x00', count), address=address)

        # Update the buffer and dirty indicators to reflect the cleared screen.
        for index in range(rows * columns):
            self.buffer[index] = 0x00

        self.dirty.clear()

        self.move_cursor(row=0, column=0, force_load=True)

    def toggle_cursor_blink(self):
        self.cursor_blink = not self.cursor_blink

    def toggle_cursor_reverse(self):
        self.cursor_reverse = not self.cursor_reverse

    def _get_index(self, row, column):
        return (row * self.dimensions.columns) + column

    def _calculate_address(self, index=None, row=None, column=None):
        if index is not None:
            return self.dimensions.columns + index

        if row is not None and column is not None:
            return self.dimensions.columns + self._get_index(row, column)

        raise ValueError('Either index or row and column is required')

    def _calculate_address_after_write(self, address, count):
        if address is None:
            return None

        address += count

        (rows, columns) = self.dimensions

        # TODO: Determine the correct behavior here...
        if self.address_counter >= self._calculate_address((rows * columns) -
                                                           1):
            return None

        return address

    def _read_address_counter(self):
        hi = read_address_counter_hi(self.interface)
        lo = read_address_counter_lo(self.interface)

        return (hi << 8) | lo

    def _load_address_counter(self, address, force_load):
        if address == self.address_counter and not force_load:
            return False

        (hi, lo) = _split_address(address)
        (current_hi, current_lo) = _split_address(self.address_counter)

        if hi != current_hi or force_load:
            load_address_counter_hi(self.interface, hi)

        if lo != current_lo or force_load:
            load_address_counter_lo(self.interface, lo)

        self.address_counter = address

        return True

    def _get_dirty_ranges(self):
        if not self.dirty:
            return []

        # TODO: Implement multiple ranges with optimization.
        return [(self.dirty[0], self.dirty[-1])]

    def _flush_range(self, start_index, end_index):
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug(
                f'Flushing changes for range {start_index}-{end_index}')

        data = self.buffer[start_index:end_index + 1]

        address = self._calculate_address(start_index)

        try:
            self._write(data, address=address)
        except Exception as error:
            # TODO: This could leave the address_counter incorrect.
            self.logger.error(f'Write error: {error}', exc_info=error)

        for index in range(start_index, end_index + 1):
            self.dirty.discard(index)

        return self.address_counter

    def _write(self, data, address=None, restore_original_address=False):
        if restore_original_address:
            original_address = self.address_counter

            if original_address is None:
                original_address = self._read_address_counter()

        if address is not None:
            self._load_address_counter(address, force_load=False)

        write_data(self.interface, data)

        if isinstance(address, tuple):
            length = len(data[0]) * data[1]
        else:
            length = len(data)

        self.address_counter = self._calculate_address_after_write(
            address, length)

        if restore_original_address:
            self._load_address_counter(original_address, force_load=True)
Example #26
0
class graph:
    """Implementa il concetto di grafo allo scopo di generare grafi pseudo-casuali con proprieta' fissate.
	Il grafo viene rappresentato come insieme ordinato di archi.
	
	Il costruttore consente di creare un qualsiasi tipo speciale noto di grafi, le usuali operazioni aritmetiche consentono di effettuare analoghe operazioni combinatoriali, mentre le usuali operazioni logiche consentono di effettuare le analoghe operazioni insiemistiche.
	
	E' inoltre possibile aggiungere archi a caso (con il metodo addedges) o aggiungere archi in modo da connettere il grafo (con il metodo connect)."""

    # costruttore
    def __init__(self, N=0, E=None, M=None, w=None, type=None):
        """Costruisce un grafo vuoto con N vertici, e insieme di archi E (se specificato).
		Se type e' specificato, costruisce invece un grafo di quel tipo.
		I valori ammissibili per type sono cycle,path,tree,forest,clique.
		Se w e' specificato, il grafo viene considerato pesato, con pesi generati dalla funzione w().
		E' anche ammessa l'instanziazione graph(G) con G un grafo gia' esistente."""
        if isinstance(N, graph) and E is None:
            E = [self.cod(e) for e in N]
            N = N.V
        if not ((E is None) or (type is None)):
            raise StandardError("Incompatible parameters specified.")
        self.V = N
        self.w = w
        if E and isinstance(E[0], list):
            E = SortedSet([self.cod(e) for e in E])
        if E is None:
            E = SortedSet([])
        if len(E) == 0 and N > 1:
            if type is 'cycle':
                for i in xrange(N):
                    E.add(self.cod([i, (i + 1) % N]))
            if type is 'path':
                for i in xrange(N - 1):
                    E.add(self.cod([i, (i + 1) % N]))
            if type is 'tree':
                for i in xrange(1, N):
                    E.add(self.cod([randint(i), i]))
            if type is 'forest':
                if not (0 <= M < N):
                    raise StandardError("Parameter M out of bounds.")
                for i in lsample(N - 1, M):
                    E.add(self.cod([randint(i + 1), i + 1]))
            if type is 'clique':
                for i in xrange(N - 1):
                    for j in xrange(i + 1, N):
                        E.add(self.cod([i, j]))
            if type is 'star':
                for i in xrange(1, N):
                    E.add(self.cod([0, i]))
            if type is 'wheel':
                for i in xrange(1, N):
                    E.add(self.cod([0, i]))
                    E.add(self.cod([i, (i + 1) % N]))
        # eventualmente aggiungere: gear, caterpillar/lobster, BIPARTITE
        self.E = SortedSet(E)

    # funzioni di stampa
    def __repr__(self):
        """Rappresentazione python del grafo."""
        return self.__class__.__name__ + '(' + str(self.V) + ',' + str(
            [e for e in self]) + ')'

    def __str__(self):
        """Rappresentazione olimpica del grafo."""
        s = str(self.N()) + ' ' + str(self.M()) + '\n'
        Ed = list(self.E)
        shuffle(Ed)
        for e in Ed:
            de = self.dec(e)
            s += str(de[0] + 1) + ' ' + str(de[1] + 1)
            if self.w is not None:
                s += ' ' + str(self.w())
            s += '\n'
        return s.rstrip()

    def printedges(self):
        """Rappresentazione olimpica del grafo, senza la prima riga."""
        s = ""
        Ed = list(self.E)
        shuffle(Ed)
        for e in Ed:
            de = self.dec(e)
            s += str(de[0] + 1) + ' ' + str(de[1] + 1)
            if self.w is not None:
                s += ' ' + str(self.w())
            s += '\n'
        return s.rstrip()

    # funzioni di confronto
    def __lt__(self, other):
        """Relazione di sottoinsieme proprio."""
        return self.E < other.E

    def __le__(self, other):
        """Relazione di sottoinsieme."""
        return self.E <= other.E

    def __eq__(self, other):
        """Relazione di uguaglianza degli archi."""
        return self.E == other.E

    def __ne__(self, other):
        """Relazione di disuguaglianza degli archi."""
        return self.E != other.E

    def __gt__(self, other):
        """Relazione di sovrainsieme proprio."""
        return self.E > other.E

    def __ge__(self, other):
        """Relazione di sovrainsieme."""
        return self.E >= other.E

    # funzioni da container
    def __len__(self):
        """Numero di archi del grafo."""
        return len(self.E)

    def __getitem__(self, i):
        """Restituisce l'i-esimo arco del grafo."""
        return self.dec(self.E[i])

    def __iter__(self):
        """Restituisce un iteratore sugli archi del grafo."""
        return _generic_iter(self)

    def __contains__(self, e):
        """Verifica se un arco e' presente o meno nel grafo."""
        if isinstance(e, list):
            e = self.cod(e)
        return (e in self.E)

    # unione disgiunta di grafi (+)
    def __add__(self, other):
        """Unione disgiunta di grafi."""
        G = self.__class__(self.V, self.E)
        G += other
        return G

    def __iadd__(self, other):
        """Unione disgiunta di grafi."""
        if isinstance(other, graph):
            self.E |= [self.cod([e[0] + self.V, e[1] + self.V]) for e in other]
            self.V += other.V
        else:
            self.add(other)
        return self

    # prodotto cartesiano di grafi (*)
    def __mul__(self, other):
        """Prodotto cartesiano di grafi."""
        G = self.__class__()
        for i in xrange(other.V):
            G += self
        for e in other:
            for i in xrange(self.V):
                G += [e[0] * self.V + i, e[1] * self.V + i]
        return G

    def __imul__(self, other):
        """Prodotto cartesiano di grafi."""
        G = self * other
        self.V = G.V
        self.E = G.E
        return self

    # intersezione di grafi (&)
    def __and__(self, other):
        """Intersezione di grafi."""
        G = self.__class__(self.V, self.E)
        G &= other
        return G

    def __iand__(self, other):
        """Intersezione di grafi."""
        self.V = min(self.V, other.V)
        self.E = self.E & other.E
        return self

    # unione di grafi (|)
    def __or__(self, other):
        """Unione di grafi."""
        G = self.__class__(self.V, self.E)
        G |= other
        return G

    def __ior__(self, other):
        """Unione di grafi."""
        self.V = max(self.V, other.V)
        self.E = self.E | other.E
        return self

    # grafo complementare (~)
    def __invert__(self):
        """Grafo complementare."""
        G = self.__class__(self.V, xrange(self.mMax()) - self.E)
        return G

    # funzioni astratte di codifica degli archi.
    # devono rispettare che:
    #   *  0 <= cod(e) < mMax()
    #   *  cod(e) = cod(e') => e = e'
    #   *  cod(e) e' indipendente dal self
    #   *  gli archi validi per V=N sono esattamente mMax()
    def cod(self, e):
        """Codifica un arco in un intero univoco e indipendente da N,M tra 0 e mMax()."""
        raise NotImplementedError("Abstract class graph must be inherited.")

    def dec(self, n):
        """Decodifica l'id di un arco."""
        raise NotImplementedError("Abstract class graph must be inherited.")

    def mMax(self):
        """Il numero massimo di archi che un grafo con N nodi puo' contenere."""
        raise NotImplementedError("Abstract class graph must be inherited.")

    # calcolo della taglia
    def N(self):
        """Restituisce il numero di nodi del grafo."""
        return self.V

    def M(self):
        """Restituisce il numero di archi del grafo."""
        return len(self.E)

    # aggiunta e rimozione di un arco
    def add(self, e):
        """Aggiunge un arco al grafo."""
        if max(e[0], e[1]) >= self.V:
            self.V = max(e[0], e[1]) + 1
        self.E.add(self.cod(e))

    def discard(self, e):
        """Rimuove un arco dal grafo, se presente."""
        self.E.discard(self.cod(e))

    # aggiungo K nuovi archi a caso, tra i candidati (oggetto edgerange-style)
    def addedges(self, K, candidates=None):
        """Aggiunge K archi a caso al grafo, tra i candidati (oggetto di tipo edgerange o set/list di archi)."""
        if candidates is None:
            self.E.add(self.mMax())
            new = lsample(self.mMax() - self.M() + 1, K)
            i = j = 0
            while j < K:
                if self.E[i] > new[j] + i:
                    new[j] += i
                    j += 1
                else:
                    i += 1
            self.E.remove(self.mMax())
            self.E |= new
        else:
            dup = SortedSet([])
            for e in self:
                if e in candidates:
                    dup.add(self.cod(e))
            new = lsample(len(candidates) - len(dup), K)
            dup.add(self.mMax())
            i = j = 0
            while j < K:
                if dup[i] > candidates[new[j] + i]:
                    new[j] = candidates[new[j] + i]
                    j += 1
                else:
                    i += 1
            self.E |= new

    # aggiunge archi fino a connettere il grafo
    def connect(self):
        """Aggiunge il minor numero di archi necessario a connettere il grafo."""
        lbl = range(self.N())
        rnk = [1 for i in lbl]

        def find(x):
            if x == lbl[x]:
                return x
            lbl[x] = find(lbl[x])
            return lbl[x]

        def union(x, y):
            lx = find(x)
            ly = find(y)
            if lx == ly:
                return
            if rnk[lx] < rnk[ly]:
                lx, ly = ly, lx
            lbl[ly] = lx
            rnk[lx] += rnk[ly]

        def bsearch(v, k):
            if len(v) == 1:
                return 0
            m = len(v) / 2
            if v[m] <= k:
                return m + bsearch(v[m:], k)
            else:
                if v[m - 1] <= k:
                    return m
                return bsearch(v[0:m], k)

        for e in self:
            union(e[0], e[1])
        comp = [[] for i in range(self.N())]
        for i in range(self.N()):
            comp[lbl[i]] += [i]
        comp = [[len(i)] + i for i in comp]
        comp.sort()
        comp.reverse()
        while comp[-1] == [0]:
            comp.pop()
        kcomp = [i[0] for i in comp]
        for i in range(1, len(kcomp)):
            kcomp[i] += kcomp[i - 1]
        for i in range(1, len(comp)):
            a = randint(kcomp[i - 1])
            a = bsearch(kcomp, a)
            a = choice(comp[a][1:])
            b = choice(comp[i][1:])
            self.add([a, b])

    # permuta i nodi del grafo
    def shuffle(self):
        """Permuta casualmente i nodi del grafo tra di loro."""
        lbl = range(self.V)
        shuffle(lbl)
        new = [self.cod([lbl[e[0]], lbl[e[1]]]) for e in self]
        self.E = SortedSet(new)
Example #27
0
class History(object):
    def __init__(self, history=None, modification_history=None):
        # Dict var_name -> Timeline
        self.chunk_history = Timeline() if history is None else Timeline(
            history)
        if modification_history is None:
            self.modification_history = {}
            for c in self.chunk_history:
                for p in c.modifications:
                    if p not in self.modification_history:
                        self.modification_history[p] = Timeline()
                    self.modification_history[p].add(c)
                for p in c.dependencies:
                    if p not in self.modification_history:
                        raise Exception(
                            'Illegal sequence of operations was supplied! Referenced dependency {} does not exist at time {}'
                            .format(p, c.stamp))
                    self.modification_history[p][-1].dependents.add(c)
        else:
            self.modification_history = modification_history
        self.dirty_chunks = SortedSet()

    def __iter__(self):
        return iter(self.chunk_history)

    def __len__(self):
        return len(self.modification_history)

    def get_time_stamp(self, before=None, after=None):
        if before is not None:
            pos, succ = self.chunk_history.get_ceil(before) if type(
                before) != Chunk else self.chunk_history.get_ceil(before.stamp)
            return 0.5 * (succ.stamp + self.chunk_history[pos - 1].stamp
                          ) if pos > 0 else succ.stamp - 1
        elif after is not None:
            pos, succ = self.chunk_history.get_floor(after) if type(
                after) != Chunk else self.chunk_history.get_floor(after.stamp)
            return 0.5 * (succ.stamp +
                          self.chunk_history[pos + 1].stamp) if pos < len(
                              self.chunk_history) - 1 else succ.stamp + 1
        return self.chunk_history[-1].stamp + 1 if len(
            self.chunk_history) > 0 else 1

    @profile
    def _insert_modification(self, chunk, path):
        if path not in self.modification_history:
            self.modification_history[path] = Timeline()
        _, pred = self.modification_history[path].get_floor(chunk.stamp)
        if pred is not None:
            to_remove = set()
            for d in pred.dependents:
                # Fetch all dependents from predecessor which are going to depend on the new chunk
                # Save them as dependents and mark them as dirty
                if d.stamp > chunk.stamp:
                    dep_overlap_diff = d.dependencies.difference(
                        chunk.modifications)
                    # Is there at least one element overlap
                    if len(dep_overlap_diff) < len(d.dependencies):
                        chunk.dependents.add(d)
                        self.dirty_chunks.add(d)
                        # If there is no remaining overlap with pred anymore, remove d
                        if len(dep_overlap_diff.difference(
                                pred.modifications)) == len(dep_overlap_diff):
                            to_remove.add(d)
            pred.dependents -= to_remove
        self.modification_history[path].add(chunk)

    @profile
    def insert_chunk(self, chunk):
        for p in chunk.dependencies:
            if p not in self.modification_history:
                raise Exception(
                    'Chunk depends on attribute without history!\n Operation "{}" at {}\n Attribute: {}\n'
                    .format(chunk.operation.name, chunk.stamp, p))
            _, pred = self.modification_history[p].get_floor(chunk.stamp)
            if pred is None:
                raise Exception(
                    'Chunk at time {} executing "{}" depends on attributes with empty history! Attributes:\n  {}'
                    .format(
                        chunk.stamp, chunk.operation.name, '\n  '.join([
                            str(p) for p in chunk.dependencies
                            if p not in self.modification_history
                            or self.modification_history[p].get_floor(
                                chunk.stamp)[1] is None
                        ])))
            pred.dependents.add(chunk)

        for p in chunk.modifications:
            self._insert_modification(chunk, p)

        self.chunk_history.add(chunk)

    @profile
    def remove_chunk(self, chunk):
        for p in chunk.modifications:
            if self.modification_history[p][0] == chunk and len(
                    chunk.dependents) > 0 and max(
                        [p in c.dependencies for c in chunk.dependents]):
                raise Exception(
                    'Can not remove chunk at timestamp {} because it is the founding chunk in the history of {} and would create dangling dependencies.'
                    .format(chunk.stamp, p))

        for p in chunk.modifications:
            self.modification_history[p].discard(chunk)
            _, pred = self.modification_history[p].get_floor(chunk.stamp)
            # Copy dependents that depend on this variable to predecessor
            if pred is not None:
                pred.dependents.update(
                    {d
                     for d in chunk.dependents if p in d.dependencies})

        for p in chunk.dependencies:
            pos, pred = self.modification_history[p].get_floor(chunk.stamp)
            if pred is None:
                raise Exception(
                    'Chunk depends on attribute with empty history!')
            # It can happen that this chunk modifies the variable it depends on.
            # In this case it needs to be removed from the history and from
            if pred == chunk:
                pos -= 1
                pred = self.modification_history[p][pos]
            pred.dependents.discard(chunk)

        self.chunk_history.remove(chunk)
        self.dirty_chunks.update(chunk.dependents)

    @profile
    def replace_chunk(self, c_old, c_new):
        if c_old.stamp != c_new.stamp:
            raise Exception(
                'Can only replace chunk if stamps match. Stamps:\n Old: {:>8.3f}\n New: {:>8.3f}'
                .format(c_old.stamp, c_new.stamp))

        overlap = c_old.modifications.intersection(c_new.modifications)
        if len(overlap) != len(c_old.modifications):
            raise Exception(
                'Chunks can only be replaced by others with at least the same definition coverage. Missing variables:\n {}'
                .format('\n '.join(
                    sorted(c_old.modifications.difference(
                        c_new.modifications)))))

        new_deps = {
            p: self.modification_history[p].get_floor(c_new.stamp)[1]
            if p in self.modification_history else None
            for p in c_new.dependencies.difference(overlap)
        }
        if None in new_deps.values():
            raise Exception(
                'Replacement chunk at {} tries to depend on variables with insufficient histories. variables:\n {}'
                .format('\n '.join(sorted(new_deps.keys()))))

        for p in overlap:
            pos, _ = self.modification_history[p].get_floor(c_old.stamp)
            # If we are already here, we might as well remove old and establish new deps
            if p in c_old.dependencies:
                self.modification_history[p][pos - 1].dependents.discard(c_old)
            if p in c_new.dependencies:
                self.modification_history[p][pos - 1].dependents.add(c_new)
            self.modification_history[p].remove(c_old)
            self.modification_history[p].add(c_new)

        c_new.dependents = c_old.dependents.copy()
        self.flag_dirty(*c_new.dependents)

        # Remove old, non-modified deps
        for p in c_old.dependencies.difference(overlap):
            self.modification_history[p].get_floor(
                c_old.stamp)[1].dependents.remove(c_old)

        # Insert additional modifications
        for p in c_new.modifications.difference(overlap):
            self._insert_modification(c_new, p)

        for c in new_deps.values():
            c.dependents.add(c_new)

        self.chunk_history.remove(c_old)
        self.chunk_history.add(c_new)

    def get_chunk_by_index(self, idx):
        return self.chunk_history[idx]

    def get_chunk(self, stamp):
        return self.get_chunk_pos(stamp)[0]

    def get_chunk_pos(self, stamp):
        pos, chunk = self.chunk_history.get_floor(stamp)
        return (chunk,
                pos) if chunk is None or chunk.stamp == stamp else (None, None)

    def flag_dirty(self, *chunks):
        self.dirty_chunks.update(chunks)

    def flag_clean(self, *chunks):
        for c in chunks:
            self.dirty_chunks.discard(c)

    def expand_dirty_set(self):
        active_set = set(self.dirty_chunks)
        while len(active_set) > 0:
            a = active_set.pop()
            u = a.dependents.difference(self.dirty_chunks)
            active_set.update(u)
            self.dirty_chunks.update(u)

    def get_dirty(self):
        return self.dirty_chunks.copy()

    def get_subhistory(self, time):
        if len(self.chunk_history) > 0 and self.chunk_history[0].stamp >= time:
            chunks = self.chunk_history[:self.chunk_history.get_floor(time
                                                                      )[0] + 1]
            mod_history = {
                p: Timeline(h[:h.get_floor(time)])
                for p, h in self.modification_history.items()
                if h[0].stamp >= time
            }
            return History(chunks, mod_history)
        return History()

    def get_history_of(self, *paths):
        out = set()
        remaining = set()
        for p in paths:
            if p in self.modification_history:
                remaining.update(self.modification_history[p])

        while len(remaining) > 0:
            chunk = remaining.pop()
            out.add(chunk)
            for p in chunk.dependencies:
                pos, dep = self.modification_history[p].get_floor(chunk.stamp)
                if dep == chunk:  # Catch if predecessor is chunk itself
                    dep = self.modification_history[p][pos - 1]
                if dep not in out:
                    remaining.add(dep)

        return Timeline(out)

    def str_history_of(self, p):
        if p not in self.modification_history:
            raise Exception('Path {} has no history.'.format(p))
        return '\n'.join([
            '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op))
            for chunk in self.modification_history[p]
        ])

    def str_history(self):
        return '\n'.join([
            '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op))
            for chunk in self.chunk_history
        ])

    def __eq__(self, other):
        if isinstance(other, History):
            return self.chunk_history == other.chunk_history
        return False
class SocialNetwork(object):
    ID = 0
    
    strategies = [COOP, DEFE]

    def __init__(self, 
                 fluct,
                 rep,
                 nt_seed,
                 nt_desc,
                 nt_randomseed,
                 coop_prob = JUST_COOPERATORS,
                 randomseed = None,
                 b=1,                   
                 n_per_gen=10, 
                 e_per_gen=2, 
                 epsilon = 0.99,
                 max=1000, 
                 tourn=0.01, 
                 X=0.025,
                 K=sys.maxsize,
                 X2= 0.025):

        # this is for identification of the network
        self.id = self.__class__.ID
        self.__class__.ID += 1
        self.fluct = fluct
             
        self.rep = rep
        self.nt_desc = nt_desc
        self.nt_randomseed = nt_randomseed
        self.coop_prob = coop_prob
        
        # set the PD game
        self.T = b
        self.R = 1
        self.P = 0
        self.S = 0               
        
        # seed for the network, this is useful to replicate exactly the same
        # experiment, particularly useful for debugging
        if randomseed == None:
            self.randomseed = time.time()
        else:
            print("WARNING: random seed is not null. Are you sure?")
            self.randomseed = randomseed
        random.seed(self.randomseed)
        
        # main parameters
        self.b = b
        self.n_per_gen = n_per_gen
        self.e_per_gen = e_per_gen
        if (epsilon >= 1.0):
            raise ValueError("""Epsilon cannot be bigger or equal to 1.0.
                             You can use epsilon that are similar to 1.0, 
                             e.g 0.999999999 """)
        else:
            self.epsilon = epsilon
        self.max = max
        self.tourn = tourn
        self.X = X
        self.K = K
        self.X2 = X2
        
        # counters
        self.gen = 0
        self.count = 0
        self.cooperators = 0
        self.removed_nodes = 0
        self.total_fit = 0
        self.total_efit = 0
        self.degrees = 0
        self.size = 0
        g = self.g = nx.Graph()

        # crate auxiliary network structures to increase efficiency
        self._max = max+n_per_gen
        self.eps_fitness = np.empty(self._max)
        self.degrees = np.empty(self._max)
        self.fitness = np.empty(self._max)
        self.fitness_of = np.empty(self._max, dtype=np.int_)
        self.free_indexes = []
        self.node_set = SortedSet()
        
        # initialize the auxiliary structures
        for i in range(0, self._max):
            self.degrees[i] = 0
            self.fitness_of[i] = -1
            self.free_indexes.append(i)
       
        # create the network 
        self.__create_from_seed(nt_seed, coop_prob)
        
        # define the game the nodes are going to play
        self.game = PD(b, self.fitness)
        
        self.treatment = '_'.join(str(x) for x in (self.nt_desc, 
                                                   self.coop_prob,
                                                   self.fluct, self.b,
                                                   self.X, self.K, self.X2))
        
        self.signature = str(self.id) + '_' + \
                         str(self.rep) + '(' + self.treatment + ')'
        

    def __create_from_seed(self, seed, coop_prob):
        """ This method use the networks structure that comes in the parameter 
        seed as a template for the graph. It adds the necessary attributes to 
        run the algorithm, such as which nodes are cooperators and defectors 
        based on the coop_prob parameter. A value from 0 to 1 indicating a 
        probability of any node of being a cooperators.
        
        Assumes that it is called from the constructor. So it assumes a new 
        SocialNetwork.
        """   
        self.count = -1
        g = self.g
        
        # add nodes from the seed to the network 
        for node in seed.nodes_iter(data = True):
            # define the attributes of the node 
            id = node[0]          
            if coop_prob == 1 or random.uniform(0,1) < coop_prob:
                st = COOP
                self.cooperators += 1
            else:
                st = DEFE
            r_index = self.free_indexes.pop()   
            
            # add the node
            g.add_node(id, st=st, nst=st, r_index=r_index)
            
            self.node_set.add(id)
            self.fitness_of[r_index] = id
            self.fitness[r_index] = 0
            
            # update parameters of the graph
            if id > self.count: 
                self.count = id
            self.size += 1

        self.count += 1
        
        # add edges from the seed to the network
        for e0, e1 in seed.edges_iter():
            g.add_edge(e0, e1)
            
        self.__remove_isolated_nodes()
        
    
    def __remove_isolated_nodes(self):
        g = self.g
        to_remove = []
        for n, adj in g.adj.items():
            if (len(adj) == 0):
                to_remove.append(n)
                
        for n in to_remove:
            r_index = g.node[n]['r_index']
            self.fitness_of[r_index] = -1
            self.free_indexes.append(r_index)
            self.node_set.discard(n)
            g.remove_node(n)
            self.size -= 1
    
    def add_node(self, st):
        """ Add a node to the network
        """
        # calculate rest of the node attributes
        id = self.count
        r_index = self.free_indexes.pop()
        
        # add node
        self.g.add_node(id, st=st, nst=st, r_index=r_index, gen=self.gen)
        
        # update network structures
        self.node_set.add(id)
        self.fitness_of[r_index] = id
        self.fitness[r_index] = 0
        self.degrees[r_index] = 0
        
        # update network parameters
        if st == COOP:
            self.cooperators += 1
        self.size += 1
        self.count += 1
        
        return id


    def play_games_and_remove_isolated_nodes(self):
        g = self.g
        node = g.node
        node_set = self.node_set
        adjacency = self.g.adj
        f = self.fitness
        ef = self.eps_fitness
        eps = self.epsilon
        degrees = self.degrees
                
        f.fill(0)

        total_fit = 0
        total_efit = 0
        total_degrees = 0
        to_remove=[]
        
        for n1 in node_set:
            adj = adjacency[n1]
            len_adj = len(adj)

            # make sure to remove the nodes that has no more edges
            if (len_adj == 0):
                to_remove.append(n1)
                self.removed_nodes += 1
            else:
                att1 = node[n1]
                r_index1 = att1['r_index']    
                
                #update the strategy
                n1_e = att1['st'] = att1['nst']
                                              
                # play against all the neighbors
                for n2 in adj.keys():
                    # make sure to play just once, nodes should be in order
                                    # make sure all the adjacent nodes are in order
                    
                    if (n2 > n1):
                        att2 = node[n2]
                        if n1_e == att2['nst']:
                            if n1_e == COOP:
                                f[r_index1] += self.R
                                f[att2['r_index']] += self.R
                                total_fit += self.R + self.R
                            else:
                                f[r_index1] += self.P
                                f[att2['r_index']] += self.P
                                total_fit += self.P + self.P
                        else:
                            if n1_e == COOP:
                                f[r_index1] += self.S
                                f[att2['r_index']] += self.T
                                total_fit += self.S + self.T
                            else:
                                f[r_index1] += self.T
                                f[att2['r_index']] += self.S
                                total_fit += self.T + self.S
                
                # this epsilon is important to give some of the nodes 
                # some chance to cooperate
                ef[r_index1] = 1 - eps + eps * f[r_index1]
                total_efit += ef[r_index1]
                
                # keep the degrees updates for PA
                degrees[r_index1] = len_adj
                total_degrees += degrees[r_index1]
                
                       
        # set the class attribute
        self.total_fit = total_fit
        self.total_efit = total_efit
        self.total_degrees = total_degrees
        
        # population will  collapse
        if self.size - len(to_remove) < self.e_per_gen:
            print ("population collapsed with", 
                   count_coop(sn), "cooperators and",
                   self.size - count_coop(sn), "defectors" )

        # remove nodes that didn't have any edges            
        for n in to_remove:
            r_index = g.node[n]['r_index']
            self.fitness_of[r_index] = -1
            self.free_indexes.append(r_index)
            self.node_set.discard(n)
            g.remove_node(n)
            self.size -= 1

        
    def update_strategies(self):
        g = self.g
        self.gen += 1
        cooperators = 0
        degrees = self.degrees
        
        for n1 in g.nodes_iter(data = True):
            
            neighbors_n1 = g.neighbors(n1[0])
            r_index1 = n1[1]['r_index']
                        
            n2_index = random.choice(neighbors_n1)
            n2 = g.node[n2_index]
            
            # check that the strategies are actually different
            if n1[1]['st'] != n2['st']:
                
                r_n1 = self.fitness[r_index1]
                r_n2 = self.fitness[n2['r_index']]
                
                # Look to see if difference is less than a millionth of
                # largest value and then assume equivalence
                epsilon_fitness = max(r_n2,r_n1) / 1000000
                
                # if the neighbor has a bigger accumulated fitness
                if r_n2 > r_n1 + epsilon_fitness:
                    
                    #   probP = (neighbour_fitness - focal_node_fitness)
                    #           ----------------------------------------
                    #               b * max[k_focal_node, k_neighbour]
                    
                    if random.random() < \
                            (1.0 * (r_n2 - r_n1)) / \
                            (self.b * max(len(neighbors_n1), \
                            len(g.neighbors(n2_index)))):
                        # update the strategy to a temporary vector
                        n1[1]['nst'] = n2['st']

                    
                    """
                    Poncela´s Formula gives to much weight to the number 
                    of nodes, this is an alternate version that would be
                    worth to test:
                      
                    probability P = neighbour_fitness   focal_node_fitness
                                    ------------------ - -----------------
                                     b * k_neighbour      b * k_focal_node

                    
                    if random.random() < (1.0 * r_n2) / \
                                         (self.b*len(g.neighbors(n2_index)))-\
                                         (1.0 * r_n1) / \
                                         (self.b*len(neighbors_n1)):
                     n1[1]['nst'] = n2['st']
                     """
            
            # update cooperators counter
            if n1[1]['nst'] == COOP:
                cooperators += 1
                
        self.cooperators = cooperators
    
    
    def growth_initial(self, growth):
        """ This method make sure that the first growth completes the nodes
        necessary to get to a consistent increment of 10 per generation. It
        just applies for starting networks that are smaller than self.n_per_gen 
        """
        if self.size < self.n_per_gen:
            temp = self.n_per_gen
            self.n_per_gen = self.n_per_gen - self.count
            growth(self)
            self.n_per_gen = temp



    def attrition(self, selection_method):
        g = self.g

        # it should be call losers
        winners = selection_method(self)
      
        # remove the winning nodes
        for winner in winners:    
            # remove the node from the graph and update fitness arrays
            r_index = g.node[winner]['r_index']
            self.fitness_of[r_index] = -1
            self.free_indexes.append(r_index)
            self.node_set.discard(winner)
            g.remove_node(winner)
            self.size -= 1
            
        # I have moved the removal of nodes with no edges to the play_games 
        # phase to save optimize the code. The auxiliary method remove_isolated
        # has been created in order to produce real results.
        
    def remove_isolated(self, select_winners):
        g = self.g
        to_remove = []

        for n, adj in g.adj.items():
            if (len(adj) == 0):
                to_remove.append(n)
                self.removed_nodes += 1
        
        if self.size - len(to_remove) < self.e_per_gen:
            print ("population collapsed with", 
                   self.count_coop(), "cooperators and",
                   self.size - self.count_coop(), "defectors" )
            
        for n in to_remove:
            r_index = g.node[n]['r_index']
            self.fitness_of[r_index] = -1
            self.free_indexes.append(r_index)
            self.node_set.discard(n)
            g.remove_node(n)
            self.size -= 1
Example #29
0
class TaskCache:
    """
    Thread-safe object to provide functionality of a task queue with fast read, write and delete.

    Let n be the number of tasks.
    Space complexity: O(n)
    Time complexity:
        add_task: O(log n)
        get_next_task: O(log n)
        remove_task: O(log n)
    """
    def __init__(self):
        self.__lock = RLock()
        self.__tasks_schedules = SortedSet()  # {(expiry_dt, id)}
        self.__tasks_dict = dict()  # {[id: (title, expiry_dt)]}

    def add_task(self, task: Task):
        """
        Add Task to the task queue.
        If a Task with the same id already exists, an existing record will be replaced.
        Otherwise will add a new record with key id.

        :param task: Task
        :return:
        """
        id, title, expiry_dt = task
        self.__lock.acquire()
        try:
            if id in self.__tasks_dict:
                self.__tasks_schedules.remove((self.__tasks_dict[id][1], id))
            self.__tasks_dict[id] = (title, expiry_dt)
            self.__tasks_schedules.add((expiry_dt, id))
        finally:
            self.__lock.release()

    def get_next_task(self) -> Optional[Task]:
        """
        Return the most earliest Task in terms of expiry datetime.
        It will return a task if there is at least one task in self.__tasks_schedules

        :return: task: Task or None
        """
        self.__lock.acquire()
        try:
            if self.__tasks_schedules:
                expiry_dt, id = self.__tasks_schedules[0]
                task_content = self.__tasks_dict.get(id)
                task = id, task_content[0], task_content[1]
            else:
                task = None
        finally:
            self.__lock.release()
        return task

    def task_done(self, task: Task):
        """
        Remove task from self.__tasks_schedules and self.__tasks_dict.

        :param task: Task
        :return:
        """
        id, title, expiry_dt = task
        self.remove_task(id)

    def remove_task(self, id: int):
        """
        Remove task from both self.__tasks_schedules and self.__tasks_dict

        :param task: Task
        :return:
        """
        self.__lock.acquire()
        try:
            title, expiry_dt = self.__tasks_dict[id]
            self.__tasks_schedules.discard((expiry_dt, id))
            del self.__tasks_dict[id]
        except KeyError:
            pass
        finally:
            self.__lock.release()

    def clear_all_tasks(self):
        """
        Clear all tasks from both self.__tasks_schedules and self.__tasks_dict

        :param task: Task
        :return:
        """
        self.__lock.acquire()
        try:
            self.__tasks_schedules = SortedSet()
            self.__tasks_dict = dict()
        except KeyError:
            pass
        finally:
            self.__lock.release()
Example #30
0
    def computePairs(self):
        # extract pairs of set , each set contain activities
        #that doesnt have any relation between them and the activities in the two set have to be direcly successed by each other
        #Lemme 4
        pairs_causality = []
        pairs_choices = []
        pairs = []
        #Extract all possible pairs of activity with causality relation
        for activity1, relations1 in self.relations.items():
            for activity2, relation in relations1.items():
                if relation == Relations.RIGHT_CAUSALITY:
                    pairs_causality.append((activity1, activity2))
                if relation == Relations.CHOICES:
                    if activity1 == activity2:
                        pairs_choices.append((activity1, ))
                    else:
                        pairs_choices.append((activity1, activity2))

        print(pairs_causality)
        pairs = pairs_causality
        print(pairs_choices)

        #  find all possible sets of activity with causality relation
        #
        i = 0
        j = len(pairs_choices)

        while i < j:
            seti = pairs_choices[i]
            for pair in pairs_choices:
                union = True
                if len(SortedSet(seti).intersection(SortedSet(pair))) != 0:
                    for e1 in pair:
                        if union == False:
                            break
                        for e2 in seti:
                            if self.relations[e1][e2] != Relations.CHOICES:
                                union = False
                                break
                    if union:
                        new_pair = SortedSet(seti) | SortedSet(pair)
                        if tuple(new_pair) not in pairs_choices:
                            pairs_choices.append(tuple(new_pair))
                            j = j + 1
                            #Reevaluate the length

            i = i + 1

        print(pairs_choices)

        # Union
        for pair_choices1 in pairs_choices:
            for pair_choices2 in pairs_choices:
                relation_between_pair = None
                makePair = True
                print("pair 1", pair_choices1)
                print("pair 2", pair_choices2)
                intersection = SortedSet(pair_choices1).intersection(
                    pair_choices2)
                pair_choices2 = SortedSet(pair_choices2)
                if len(intersection) != 0:
                    # remove intersection terms in the second pair
                    for term in intersection:
                        pair_choices2.discard(term)

                if (len(pair_choices2) == 0):
                    continue
                pair_choices2 = tuple(pair_choices2)
                print("pair_choices2 with discarded term :", pair_choices2)

                for activity1 in pair_choices1:
                    print(activity1)
                    if makePair == False:
                        break
                    for activity2 in pair_choices2:
                        print(activity2)
                        relation = self.relations[activity1][activity2]
                        if relation_between_pair != None and relation_between_pair != relation:
                            makePair = False
                            break
                        else:
                            relation_between_pair = relation
                        if relation != Relations.RIGHT_CAUSALITY:
                            makePair = False
                            break
                if makePair == True:
                    print("makepair true")
                    print(pair_choices1)
                    print(pair_choices2)
                    if relation_between_pair == Relations.RIGHT_CAUSALITY:
                        new_pair = (pair_choices1, pair_choices2)
                    else:
                        new_pair = (pair_choices2, pair_choices1)
                    pairs.append(new_pair)
        print("\n")
        print("\n")
        print(pairs)
        self.pairs = pairs
        '''
Example #31
0
def test5():
    """
    有序的集合:SortedSet
    网址:http://www.grantjenks.com/docs/sortedcontainers/sortedset.html
    """
    from sortedcontainers import SortedSet
    # 创建 SortedSet
    ss = SortedSet([3, 1, 2, 5, 4])
    print(ss)  # SortedSet([1, 2, 3, 4, 5])
    from operator import neg
    ss1 = SortedSet([3, 1, 2, 5, 4], neg)
    print(ss1)  # SortedSet([5, 4, 3, 2, 1], key=<built-in function neg>)
    # SortedSet 转为 list/tuple/set
    print(list(ss))  # SortedSet转为list    [1, 2, 3, 4, 5]
    print(tuple(ss))  # SortedSet转为tuple    (1, 2, 3, 4, 5)
    print(set(ss))  # SortedSet转为set    {1, 2, 3, 4, 5}
    # 插入、删除元素
    ss.discard(-1)  # 删除不存在的元素不报错
    ss.remove(1)  # 删除不存在的元素报错, KeyError
    ss.discard(3)  # SortedSet([1, 2, 4, 5])
    ss.add(-10)  # SortedSet([-10, 1, 2, 4, 5])
    # 返回第一个和最后一个元素
    print(ss[0])  # -10
    print(ss[-1])  # 5
    # 遍历 set
    for e in ss:
        print(e, end=", ")  # -10, 2, 4, 5,
    print()
    # set 中判断某元素是否存在
    print(2 in ss)  # True
    # bisect_left() / bisect_right()
    print(ss.bisect_left(4))  # 返回大于等于4的最小元素对应的下标    2
    print(ss.bisect_right(4))  # 返回大于4的最小元素对应的下标    3
    # 清空 set
    ss.clear()
    print(len(ss))  # 0
    print(len(ss) == 0)  # True
    """
    无序的集合: set
    """
    # 集合的定义:集合是不可变的,因此集合中元素不能是list
    A = {"hi", 2, ("we", 24)}
    B = set()  # 空集合的定义,不能使用B = {}定义集合,这样是字典的定义
    # 集合间的操作, 下面的运算法符都可以写成 op= 的形式
    print("---------------------------------------")
    S = {1, 2, 3}
    T = {3, 4, 5}
    print(S & T)  # 交集,返回一个新集合,包括同时在集合S和T中的元素
    print(S | T)  # 并集,返回一个新集合,包括在集合S和T中的所有元素
    print(S - T)  # 差集,返回一个新集合,包括在集合S但不在T中的元素
    print(S ^ T)  # 补集,返回一个新集合,包括集合S和T中的非相同元素
    # 集合的包含关系
    print("---------------------------------------")
    C = {1, 2}
    D = {1, 2}
    print(C <= D)  # C是否是D的子集  True
    print(C < D)  # C是否是D的真子集  False
    print(C >= D)  # D是否是C的子集  True
    print(C > D)  # D是否是C的真子集  False
    # 集合的处理方法
    print("---------------------------------------")
    S = {1, 2, 3, 5, 6}
    S.add(4)  # 如果x不在集合S中,将x增加到S
    S.discard(1)  # 移除S中元素x,如果x不在集合S中,不报错
    S.remove(2)  # 移除S中元素x,如果x不在集合S中,产生KeyError异常
    for e in S:  # 遍历
        print(e, end=",")
    print()
    print(S.pop())  # 从S中随机弹出一个元素,S长度减1,若S为空产生KeyError异常
    print(S.copy())  # 返回集合S的一个副本, 对该副本的操作不会影响S
    print(len(S))  # 返回集合S的元素个数
    print(5 in S)  # 判断S中元素x, x在集合S中,返回True,否则返回False
    print(5 not in S)  # 判断S中元素x, x在集合S中,返回True,否则返回False
    S.clear()  # 移除S中所有元素
Example #32
0
def py_star(width, height, costs, startIndex, endIndex, diagonalOk):
    if (width < 0 or height < 0):
        raise ValueError("Width and height have to be positive!")
    if (width * height != len(costs)):
        raise ValueError("Width * height != len(costs)!")
    if (startIndex < 0) or (startIndex >
                            (len(costs) - 1)) or (endIndex <
                                                  0) or (endIndex >
                                                         (len(costs) - 1)):
        raise ValueError(
            f"Start and end indices have to be in the range [0, {len(costs)})!"
        )

    # find path from exit to start, this way when traversing the nodes from the start
# every node points to the next one in the path
    startIndex, endIndex = endIndex, startIndex
    startPos = (startIndex % width, startIndex // width)
    endPos = endIndex % width, endIndex / width
    nodeMap = [
        Node(idx, math.inf, 0.0, math.inf, None)
        for idx in range(0, len(costs))
    ]
    endNode = nodeMap[endIndex]
    startNode = nodeMap[startIndex]

    startNode.sureCost = 0
    startNode.heuristicCost = heuristicCost(startPos, endPos, diagonalOk)
    startNode.combinedCost = startNode.sureCost + startNode.heuristicCost

    DIAG_COST = math.sqrt(2)
    openlist = SortedSet([startNode], key=lambda node: node.combinedCost)
    closedlist = set()
    while len(openlist) > 0:
        current = openlist.pop(0)
        if current == endNode:
            # call with end and start switched to get correct direction back
            return (constructPath(endNode, startNode), closedlist)
        closedlist.add(current.idx)
        curX, curY = posFromIndex(current.idx, width)
        for dx in range(-1, 2):
            for dy in range(-1, 2):
                # skip diagonal entrys if diagonals are not viable
                if not diagonalOk and (abs(dx) == abs(dy)):
                    continue
                x, y = curX + dx, curY + dy
                # skip if node would go outside rectangle
                # cannot wrap with unsigned cast like in cpp
                if (x - width + 1) * x > 0 or (y - height + 1) * y > 0:
                    continue
                neighbor = nodeMap[current.idx + dx + dy * width]
                # skip previously visited nodes, including the current node
                if neighbor.idx in closedlist:
                    continue
                # skip if node is not passable
                if costs[neighbor.idx] < 0:
                    continue
                diagonalMove = (dx * dy) != 0
                newSureCost = current.sureCost + (DIAG_COST if diagonalMove
                                                  else 1) * costs[neighbor.idx]
                if newSureCost < neighbor.sureCost:
                    # Make sure to not invalidate the ordered set
                    openlist.discard(neighbor)
                    neighbor.sureCost = newSureCost
                    neighbor.heuristicCost = heuristicCost((x, y), endPos,
                                                           diagonalOk)
                    # combined cost for ordering of the open set
                    neighbor.combinedCost = neighbor.sureCost + neighbor.heuristicCost
                    neighbor.parent = current
                    openlist.add(neighbor)

    return ([-1], closedlist)
class ExpandUserIdentity:
    def __init__(self,
                 lg,
                 rg,
                 seeds_0,
                 name_sim_threshold=61,
                 is_repeat=False,
                 model=None,
                 cache_files=None):
        if is_repeat:
            print("With repeated seeds algorithm is selected")
        else:
            print('NO seeds repeat algorithm is selected ')
        self.lg = lg
        self.rg = rg
        self.seed_0_count = len(seeds_0)
        if is_repeat:
            print('WITH REPEAT!!!')
        self.with_repeat = is_repeat
        # M < - A_0 ps: A = A_0
        self.lNodeM = set()
        self.rNodeM = set()
        self.matches = set()

        for s in seeds_0:
            lnode, rnode = s
            self.matches.add((lnode, rnode))
            self.lNodeM.add(lnode)
            self.rNodeM.add(rnode)

        self.seeds = list(seeds_0)
        self.used = set()
        # marks for every pair mark count > r
        self.score_map = dict()
        self.bad_name = set()
        if model:
            print('WITH MODEL!!!')
            self.model = model
            self.has_model = True
            self.load_cache(cache_files)
            self.__get_top = self.__get_top_with_model
            self.inactive_pairs = SortedSet(
                key=lambda x: (x[2], self.__name_similar(x[0], x[1]) / 100 +
                               self.__top_with_model(x)))
            self.__decide_seed = self.__decide_seed_with_model
        else:
            self.__get_top = self.__get_top_no_model
            self.inactive_pairs = SortedSet(
                key=lambda x: (x[2], -1 * self.f_deg_diff(x)))
            self.__decide_seed = self.__decide_seed_no_model
            self.has_model = False

        self.name_sim_threshold = name_sim_threshold
        print('name_sim_threshold', name_sim_threshold)
        n_common = 0
        s2 = set([v['uid'] for v in lg.vs])
        for v in rg.vs:
            n_common += int(v['uid'] in s2)
        self.n_common = n_common

    def load_cache(self, cache_files):
        base_folder = '/home/ildar/projects/pycharm/social_network_revealing/graphmatching/'
        folder_data = os.path.join(base_folder, 'data')
        folder_gen = os.path.join(folder_data, 'generated')
        self.f_set1s = dict(
            pickle.load(open(os.path.join(folder_gen, cache_files[0]), "rb")))
        self.f_set2s = dict(
            pickle.load(open(os.path.join(folder_gen, cache_files[1]), "rb")))
        print('Cache loaded', len(self.f_set1s), len(self.f_set2s))

    def to_str(self, ln, rn):
        return '%d|%d' % (ln, rn)

    def untokenize(self, s):
        i = s.index('|')
        return int(s[:i]), int(s[i + 1:])

    def f_deg_diff(self, s):
        return abs(self.lg.degree(s[0]) - self.rg.degree(s[1]))

    def __in_matched(self, lnode, rnode):
        return lnode in self.lNodeM or rnode in self.rNodeM

    def __add_match(self, lnode, rnode, seed_count):
        self.matches.add((lnode, rnode))
        self.lNodeM.add(lnode)
        self.rNodeM.add(rnode)

    def __name_similar(self, li, ri):
        return fuzz.token_set_ratio(self.lg.vs[li]['fname'],
                                    self.rg.vs[ri]['fname'])

    # def __part_spread_marks(self, data):
    #     seeds = data['seeds']
    #     seeds_collect = {}
    #     old_marks = {}
    #     for seed in tqdm(seeds):
    #         self.used.add(self.to_str(*seed))
    #         self.__spread_mark(*seed, seeds_collect=seeds_collect, old_marks=old_marks)

    # def __spread_marks_parallel(self):
    #     # for all pairs[i, j] of A do
    #     threads = 4
    #     print('start __spread_marks')
    #     self.seeds_collect = {}
    #     self.old_marks = {}
    #     data_list = []
    #     thr_size = len(self.seeds) // threads
    #     for i in range(threads):
    #         s = i * thr_size
    #         e = i * thr_size + thr_size
    #         data = {
    #             'seeds' : self.seeds[s:e] if (i + 1) < threads else self.seeds[s:]
    #         }
    #         data_list.append(data)
    #
    #     pool = ThreadPool(threads)
    #     pool.map(self.__part_spread_marks, data_list)
    #     pool.close()
    #     pool.join()
    #
    #     for seed, marks_count in tqdm(self.seeds_collect.items()):
    #         ID_str = self.to_str(*seed)
    #         m = (seed[0], seed[1], self.old_marks[ID_str])
    #         self.inactive_pairs.discard(m)
    #         self.inactive_pairs.add((seed[0], seed[1], marks_count))
    #
    #     # A <- None
    #     self.seeds.clear()
    #     print("Seed are expanded")
    def __spread_mark(self, lnode, rnode, seeds_collect=None, old_marks=None):
        # add one mark to all neighboring pairs of [i,j]
        if seeds_collect == None:
            seeds_collect = {}
            old_marks = {}
            is_from_spread_marks = False
        else:
            is_from_spread_marks = True
        for l_neighbor in self.lg.neighbors(lnode):
            for r_neighbor in self.rg.neighbors(rnode):
                ID_str = self.__decide_seed(l_neighbor, r_neighbor)
                if not ID_str:
                    continue

                val = self.score_map.get(ID_str)
                if not val:
                    self.score_map[ID_str] = 1
                    continue
                if ID_str not in old_marks:
                    old_marks[ID_str] = val
                self.score_map[ID_str] += 1
                seeds_collect[(l_neighbor, r_neighbor)] = val + 1
        if is_from_spread_marks:
            return
        for seed, marks_count in seeds_collect.items():
            ID_str = self.to_str(*seed)
            m = (seed[0], seed[1], old_marks[ID_str])
            self.inactive_pairs.discard(m)
            self.inactive_pairs.add((seed[0], seed[1], marks_count))

    def __spread_marks(self):
        # for all pairs[i, j] of A do
        print('start __spread_marks')
        seeds_collect = {}
        old_marks = {}
        for seed in tqdm(self.seeds):
            self.used.add(self.to_str(*seed))
            self.__spread_mark(*seed,
                               seeds_collect=seeds_collect,
                               old_marks=old_marks)

        for seed, marks_count in tqdm(seeds_collect.items()):
            ID_str = self.to_str(*seed)
            m = (seed[0], seed[1], old_marks[ID_str])
            self.inactive_pairs.discard(m)
            self.inactive_pairs.add((seed[0], seed[1], marks_count))

        # A <- None
        self.seeds.clear()
        print("Seed are expanded")

    def __get_top_no_model(self):
        # remove from start matched pairs
        while self.inactive_pairs:
            s = self.inactive_pairs.pop()
            if not self.__in_matched(s[0], s[1]):
                return s
        return None

    def __get_top_with_model(self):
        # remove from start matched pairs
        while self.inactive_pairs:
            s = self.inactive_pairs.pop()
            if not self.__in_matched(s[0], s[1]):
                return s
        return None

    def __decide_seed_no_model(self, lnode, rnode):
        # i,j not in V_1,V_2(M) and [i,j] not in Z
        if self.__in_matched(lnode, rnode):
            return False
        ID_str = self.to_str(lnode, rnode)
        if ID_str in self.used or ID_str in self.bad_name:
            return False
        if self.__name_similar(lnode, rnode) < self.name_sim_threshold:
            self.bad_name.add(self.to_str(lnode, rnode))
            return False
        return ID_str

    def __decide_seed_with_model(self, lnode, rnode):
        # i,j not in V_1,V_2(M) and [i,j] not in Z
        if self.__in_matched(lnode, rnode):
            return False
        ID_str = self.to_str(lnode, rnode)
        if ID_str in self.used:
            return False
        return ID_str

    def degs(self, node):
        s = []
        for v in node.neighbors():
            s.append(v.degree())
        return s

    def __top_with_model(self, s):
        lv = self.lg.vs[s[0]]
        rv = self.rg.vs[s[1]]

        feature_l = self.f_set1s[lv['uid']]
        feature_r = self.f_set2s[rv['uid']]

        feature_set = feature_l + feature_r
        n_deg = lv.degree()
        m_deg = rv.degree()
        feature_set.append(abs(n_deg - m_deg) / max(n_deg, m_deg, 1))

        # ratio = self.__name_similar(s[0], s[1])
        # feature_set.append(ratio)
        x = np.array(feature_set).reshape((1, -1))
        return self.model.predict(
            x) == 1  # ratio >= self.name_sim_threshold and

    def __extend_seeds_by_matches(self):
        # A <- all neighbors of M [i,j] not in Z, i,j not in V_1,V_2(M)
        print('__extend_seeds_by_matches')
        for m in tqdm(self.matches):
            lnode, rnode = m
            # all neighbors of M
            for l_neighbor in self.lg.neighbors(lnode):
                for r_neighbor in self.rg.neighbors(rnode):
                    if not self.__decide_seed(l_neighbor, r_neighbor):
                        continue
                    self.seeds.append((l_neighbor, r_neighbor))
        print("Extended seed size: ", len(self.seeds))

    def __garbage_collect(self):
        #####################
        # Garbage collector #
        #####################
        if len(self.score_map) < 20000000 or len(
                self.inactive_pairs) < 10000000:
            return
        print("Garbage collector:")
        print("\talgorithm: time elapsed: %s" % (time.time() - self.s_time))
        print("\tSize score map: %d" % len(self.score_map))
        for s in tqdm(self.score_map):
            ln, rn = self.untokenize(s)
            if self.__in_matched(ln, rn):
                del self.score_map[s]

        print("\tSize score map (cleared): %d" % len(self.score_map))

        print("\tSize inactive pairs: %d" % len(self.inactive_pairs))
        for p in self.inactive_pairs:
            if self.__in_matched(p[0], p[1]):
                self.inactive_pairs.remove(p)
        print("\tSize inactive pairs (cleared): %d" % len(self.inactive_pairs))

    def __inter_result(self):
        correct, wrong = 0, 0
        for ln, rn in self.matches:
            ln = self.lg.vs[ln]['name']
            rn = self.rg.vs[rn]['name']
            if ln == rn:
                correct += 1
            else:
                wrong += 1
        return correct, wrong

    def __dist_sim(self, vl, vr):
        sl = [v.degree() for v in self.lg.vs[vl].neighbors()]
        sr = [v.degree() for v in self.rg.vs[vr].neighbors()]
        return ks_2samp(sl, sr).pvalue  # statistic

    def execute(self):
        self.s_time = time.time()

        iter_num = 0
        show_counter = 0
        show_bound = 50
        show_bound_match = 50
        used_used = set()
        round = 1
        # while |A| > 0 do
        while (len(self.seeds) > 0):
            # while |A| > 0 do
            while (len(self.seeds) > 0):
                iter_num += 1
                print("Iter num: %d\tseed size = %d" %
                      (iter_num, len(self.seeds)))
                # for all pairs[i, j] of A do
                self.__spread_marks()
                print('Done')
                # while there exists an unmatched pair with score at least r+1
                while self.inactive_pairs:
                    show_counter += 1
                    if show_counter % show_bound == 0:
                        print("In progress... (%d)" % len(self.inactive_pairs))
                    # remove from start matched pairs
                    s = self.__get_top()
                    if not s: break
                    elif (show_counter % show_bound == 0):
                        print("[%d] select the unmatched pair [%d,%d]" %
                              (show_counter, s[0], s[1]))
                        print("score map size = %d" % len(self.score_map))
                    lnode, rnode, seed_count = s
                    # add [i,j] to M
                    self.__add_match(lnode, rnode, seed_count)

                    ID_not_active = self.to_str(lnode, rnode)
                    # if [i,j] not in Z
                    if not ID_not_active in self.used:
                        # add [i,j] to Z
                        self.used.add(ID_not_active)
                        # add one marks to all of its neighbouring pairs
                        self.__spread_mark(lnode, rnode)
                    # self.__garbage_collect()
                    if len(self.matches) % show_bound_match == 0:
                        print("Correct = %d, Wrong = %d" %
                              self.__inter_result())

                print("Finish with inactive_pairs")
                # if len(self.bad_name) > 40000000:
                #     self.bad_name.clear()
                #     print("Cleared bad names storage")
                # A <- all neighbors of M [i,j] not in Z, i,j not in V_1,V_2(M)
                self.__extend_seeds_by_matches()
            if self.with_repeat:
                for s in self.used:
                    l_neighbor, r_neighbor = self.untokenize(s)
                    if not self.__in_matched(l_neighbor, r_neighbor) and s not in used_used and \
                                    self.__name_similar(l_neighbor, r_neighbor) >= 99:
                        self.seeds.append((l_neighbor, r_neighbor))
                        used_used.add(s)
                        print(
                            'Updated round %d, seed count = %d. used_used = %d'
                            % (round, len(self.seeds), len(used_used)))
                        round += 1
        self.time_elapsed = time.time() - self.s_time

    def assure_folder_exists(self, path):
        folder = os.path.dirname(path)
        print(os.path.abspath(folder))
        if not os.path.exists(folder):
            os.makedirs(folder)

    def save_result(self):
        if self.seed_0_count > 100:
            repeat_name = 'seed_matches'
        else:
            repeat_name = 'repeat' if self.with_repeat else 'no_repeat'
        fname = '%.3d/matches_s_%.2d_th_%.3d_t_%s.pickle' % (
            self.name_sim_threshold, self.seed_0_count,
            self.name_sim_threshold, time.strftime("%m-%d_%H:%M:%S"))
        fname = os.path.join('matches', repeat_name, fname)
        self.assure_folder_exists(fname)

        lid_rid = []
        for lnode, rnode in self.matches:
            lid = self.lg.vs[lnode]['uid']
            rid = self.rg.vs[rnode]['uid']
            lid_rid.append((lid, rid))
        assert len(lid_rid) == len(self.matches)
        pickle.dump(lid_rid, open(fname, 'wb'))
        return fname

    def check_result(self):
        correct, wrong = self.__inter_result()
        msize = len(self.matches)

        recall = float(correct) / self.n_common
        precision = float(correct) / msize
        f1_score = 2 * (precision * recall / (precision + recall))

        print("------RESULT-------")
        print("\tfor lN = %d, rN = %d, |seed_0| = %d" %
              (self.lg.vcount(), self.rg.vcount(), self.seed_0_count))
        print("\tmatched =", msize)
        print("\t\tcorrect = %d; wrong = %d" % (correct, wrong))
        print("\tRecall = %f" % recall)
        print("\tPrecision = %f" % precision)
        print("\tF1-score = %f" % f1_score)