def containsNearbyAlmostDuplicate(self, nums, k, t): """ :type nums: List[int] :type k: int :type t: int :rtype: bool """ # 滑动窗口 + 有序容器(这里用的是 SortedSet,有序集合) length = len(nums) sw = SortedSet([nums[0]]) for i in range(1, length): # 此时滑动窗口sw已经满了, 需要删除最左边的元素 if i > k: sw.discard(nums[i-k-1]) if nums[i] in sw: return True else: sw.add(nums[i]) ind = bisect.bisect_left(sw, nums[i]) diff = 0 if ind == 0: diff = sw[ind+1] - sw[ind] elif ind == len(sw) - 1: diff = sw[ind] - sw[ind-1] else: diff = min(sw[ind+1] - sw[ind], sw[ind] - sw[ind-1]) if diff <= t: return True return False
class DinnerPlates: def __init__(self, capacity: int): self.cap = capacity self.index = SortedSet() self.stacks = [] def push(self, val: int) -> None: idx = self.index[0] if self.index else len(self.stacks) if idx == len(self.stacks): self.stacks.append([]) s = self.stacks[idx] s.append(val) if len(s) == self.cap: self.index.discard(idx) elif len(s) == 1 and self.cap != 1: self.index.add(idx) def pop(self) -> int: return self.popAtStack(len(self.stacks) - 1) def popAtStack(self, index: int) -> int: if index < 0 or index >= len(self.stacks) or not self.stacks[index]: return -1 s = self.stacks[index] val = s.pop() if len(s) == self.cap - 1: self.index.add(index) while self.stacks and not self.stacks[-1]: tmp = len(self.stacks) - 1 self.stacks.pop() self.index.discard(tmp) return val
def busiestServers(self, k: int, arrival: List[int], load: List[int]) -> List[int]: cnts = [0] * k idle = SortedSet([i for i in range(k)]) busy = [] heapq.heapify(busy) n, most = len(arrival), 0 for i in range(n): while busy and busy[0][0] <= arrival[i]: _, sr = heapq.heappop(busy) idle.add(sr) if not idle: continue idx = idle.bisect_left(i % k) if idx == len(idle): idx = 0 sr = idle[idx] cnts[sr] += 1 most = max(most, cnts[sr]) heapq.heappush(busy, (arrival[i] + load[i], sr)) idle.discard(sr) ans = [] for i, cnt in enumerate(cnts): if cnt == most: ans.append(i) return ans
def greedyCoverage(inFile, outFile, maxCoverage, verboseFlag): ''' This method is greedy but reads in low coverage areas may be missed if the current set is full. ''' if verboseFlag: print("Reducing coverage using the Greedy method") curr = SortedSet() mapped = 0 filtered = 0 for r in inFile.fetch(until_eof=True): if (r.is_unmapped): continue mapped += 1 # Attempt to find read that ends before this one itr = curr.irange(maximum=r.reference_start) try: ending = itr.__next__() # Some read is ending, replace it in the current set curr.discard(ending) curr.add(r.reference_end) outFile.write(r) filtered += 1 except StopIteration: if (len(curr) < maxCoverage): # There is still room to include this read curr.add(r.reference_end) outFile.write(r) filtered += 1 if verboseFlag: print("Reduced BAM from " + str(mapped) + " to " + str(filtered) + " reads")
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: win = SortedSet() for i in range(len(nums)): pos = win.bisect_left(nums[i] - t) if pos < len(win) and win[pos] <= nums[i] + t: return True win.add(nums[i]) if i >= k: win.discard(nums[i - k]) return False
def find_path(graph, start, end): def dist(p1, p2): return abs(p1[0] - p2[0]) + abs(p1[1] - p2[1]) def heuristic(p): return dist(p, end) start = (start.x(), start.y()) end = (end.x(), end.y()) data = dict() q = SortedSet(key=lambda it: -sum(data[it])) # cost, heuristic data[start] = (0, heuristic(start)) q.add(start) back = dict() back[start] = start found = False while q: curr = q.pop() c, h = data[curr] prev = back[curr] dx = curr[0] - prev[0] dy = curr[1] - prev[1] if curr == end: found = True break for next in graph.get(curr, tuple()): if not (dx == 0 and next[0] - curr[0] == 0 or dy == 0 and next[1] - curr[1] == 0): np = 1e20 else: np = 0 if next in data: if c + dist(curr, next) + np < data[next][0]: q.discard(next) data[next] = (c + dist(curr, next) + np, heuristic(next)) back[next] = curr q.add(next) else: back[next] = curr data[next] = (c + dist(curr, next) + np, heuristic(next)) q.add(next) if found: path = [end] while path[-1] != start: path.append(back[path[-1]]) return list(reversed(path)) return list()
def checkClusterScore(self, floorplan): leftToCheck=SortedSet(set(self.nodes.keys())-floorplan.occupiedNodes-self.serviceNodeIds) leftToCheck.discard('0') #ignore outside node clusters=[] clusterDoors=[] self.recurseCluster([],leftToCheck,clusters,clusterDoors) ##penalize based on non-accessibility to doors penalty=0 for doors in clusterDoors: if doors==0: penalty+=1 floorplan.clusterPenalty=penalty return (len(clusters),penalty)
def fillEmptyNodes(self,floorplan): leftToCheck=SortedSet(set(self.nodes.keys())-floorplan.occupiedNodes-self.serviceNodeIds) leftToCheck.discard('0') #ignore outside node clusters=[] clusterDoors=[] self.recurseCluster([],leftToCheck,clusters,clusterDoors) for cluster in clusters: if len(cluster.doorIds)>0: if 'normal' in cluster.properties and 'toilet' in cluster.properties and 'storage' in cluster.properties: unit=p_unit.Unit(doorwayId=cluster.doorIds[0],constraint=con.Constraint(roomType=cluster.properties['normal']-1,prefWeight=1,prefDoors=cluster.doorIds,roomConstraints=cluster.properties), connectedNodeIds=cluster.connectedNodeIds,connectedEdgeIds=cluster.connectedEdgeIds) floorplan.addUnit(unit,self.doorEdgeIds,cluster.doorIds,True) floorplan.occupiedNodes.update(cluster.connectedNodeIds) self.testFloorplanScore(floorplan) return floorplan
def djikstra(graph_object, start_vertex, destination_vertex): graph = graph_object.graph_dict distance_dict = graph_object.distance_dict #-1 -> not yet encountered, 0 -> encountered a someone's adjacent neighbour, 1 -> encountered completely visited = defaultdict(lambda: -1) #distance to unreached node is set to max dist min_distance_to_reach_specific_node = defaultdict(lambda: sys.maxsize) parent_vertex_dict = defaultdict(lambda: -1) priority_vertex_set = SortedSet() priority_vertex_set.add((0, start_vertex)) parent_vertex_dict[start_vertex] = -1 while priority_vertex_set: cur_dist, cur_vertex = priority_vertex_set[0] print(cur_dist, cur_vertex) priority_vertex_set.discard(priority_vertex_set[0]) if visited[cur_vertex] == 1: continue visited[cur_vertex] = 1 for item in graph[cur_vertex]: if visited[item] == 1: continue else: if visited[item] == -1: priority_vertex_set.add( (cur_dist + distance_dict[cur_vertex][item], item)) min_distance_to_reach_specific_node[ item] = cur_dist + distance_dict[cur_vertex][item] visited[item] = 0 parent_vertex_dict[item] = cur_vertex elif min_distance_to_reach_specific_node[ item] > cur_dist + distance_dict[cur_vertex][item]: priority_vertex_set.add( (cur_dist + distance_dict[cur_vertex][item], item)) min_distance_to_reach_specific_node[ item] = cur_dist + distance_dict[cur_vertex][item] visited[item] = 0 parent_vertex_dict[item] = cur_vertex path = [] if visited[destination_vertex] != -1: cur_vertex = destination_vertex while cur_vertex != -1: path.append(cur_vertex) cur_vertex = parent_vertex_dict[cur_vertex] path = list(reversed(path)) return (min_distance_to_reach_specific_node[destination_vertex], path) else: return (-1, path)
def run(N): p = randint(0, 100) times = sorted(sample(xrange(MAXT+1), 2*N)) tutors = [] present = SortedSet() for t in times: if len(present) == 0 or (len(tutors) < N and randint(0,100) < p): present.add(len(tutors)) tutors.append([t,-1]) else: x = choice(present) present.discard(x) tutors[x][1] = t shuffle(tutors) print N for t in tutors: print t[0], t[1]
def test_SortedSet(self): # construct sorted_set = SortedSet([1, 5, 2, 7, 4]) # inserting values one by one for i in range(5, 0, -1): sorted_set.add(i) print('set after adding elements: ', sorted_set) # inserting duplicate value sorted_set.add(5) print('set after inserting duplicate element: ', sorted_set) # discarding an element sorted_set.discard(4) print('set after discarding: ', sorted_set) for i in sorted_set: print(i)
def closestRoom(self, rooms: List[List[int]], queries: List[List[int]]) -> List[int]: n = len(rooms) rooms.sort(key=lambda x: x[1]) ids = SortedSet(room[0] for room in rooms) for i, query in enumerate(queries): query.append(i) queries.sort(key=lambda x: x[1]) def searchSize(size: int) -> int: left, right = 0, n while left < right: mid = (left + right) // 2 if rooms[mid][1] < size: left = mid + 1 else: right = mid return left ans = [-1] * len(queries) pre = 0 for preferred, minSize, idx in queries: cur = searchSize(minSize) if cur == n: continue while pre < cur: ids.discard(rooms[pre][0]) pre += 1 lt = ids.bisect_left(preferred) if lt == len(ids): if lt > 0: ans[idx] = ids[lt - 1] else: ans[idx] = ids[lt] tmp = ids[lt] - preferred if lt > 0 and preferred - ids[lt - 1] <= tmp: ans[idx] = ids[lt - 1] return ans
def test_discard(): temp = SortedSet(range(100), load=7) temp.discard(0) temp.discard(99) temp.discard(50) temp.discard(1000) temp._check() assert len(temp) == 97
def test_discard(): temp = SortedSet(range(100), load=7) temp.discard(0) temp.discard(99) temp.discard(50) temp.discard(1000) temp._check() assert len(temp) == 97
def containsNearbyAlmostDuplicate(self, nums, k, t): """ :type nums: List[int] :type k: int :type t: int :rtype: bool """ if k < 1 or t < 0 or nums == None or len(nums) < 2: return False treeset = SortedSet() for i in xrange(len(nums)): # Solution 1 subset = [x for x in treeset.irange(nums[i] - t, nums[i] + t)] if len(subset) > 0: return True treeset.add(nums[i]) if i >= k: treeset.discard(nums[i - k]) return False
def containsNearbyAlmostDuplicate(self, nums, k, t): """ :type nums: List[int] :type k: int :type t: int :rtype: bool """ if k < 1 or t < 0 or nums == None or len(nums) < 2: return False treeset = SortedSet() for i in xrange(len(nums)): # Solution 1 subset = [x for x in treeset.irange(nums[i] - t, nums[i] + t)] if len(subset) > 0: return True treeset.add(nums[i]) if i >= k: treeset.discard(nums[i - k]) return False
def astar(stare_initiala, stare_finala, euristica, lista_chei): nod_initial = Nod(stare_initiala, None, None) deschise = SortedSet([nod_initial]) scor_optim = SortedDict({tuple(stare_initiala): 0}) # [1, 1, 1, 1, 1] # (1, 1, 1, 1, 1) while len(deschise) > 0: # extragem nodul cu f minim nod = deschise[0] deschise.pop(0) # daca am ajuns la starea finala, ne oprim if nod.stare == stare_finala: return nod # generam succesorii si facem verificari lista_succesori = genereaza_succesori(nod, lista_chei, euristica) for succesor in lista_succesori: if scor_optim.__contains__(tuple(succesor.stare)) == False: # daca starea succesorului nu a mai fost intalnita pana acum, o inseram scor_optim[tuple(succesor.stare)] = succesor.g deschise.add(succesor) elif succesor.g < scor_optim[tuple(succesor.stare)]: # introducem/editam starea curenta in setul "deschis", dupa caz succesor_fals = Nod(succesor.stare, None, None) succesor_fals.f = scor_optim[tuple( succesor.stare)] + euristica(succesor.stare) if deschise.__contains__(succesor_fals) is True: deschise.discard(succesor) deschise.add(succesor) # daca starea curenta este intalnita cu un cost mai mic, o reactualizam scor_optim[tuple(succesor.stare)] = succesor.g return None
class ExamRoom(object): def __init__(self, N): """ :type N: int """ self.n = N self.spots = SortedSet() def seat(self): """ :rtype: int """ start, mx, idx = 0, 0, 0 for i in self.spots: if start == 0: if mx < i - start: mx = i - start idx = 0 else: if mx < (i - start + 1) // 2: mx = (i - start + 1) // 2 idx = start + mx - 1 start = i + 1 if start > 0 and mx < self.n - start: mx = self.n - start idx = self.n - 1 self.spots.add(idx) return idx def leave(self, p): """ :type p: int :rtype: None """ self.spots.discard(p)
def max_unique_element(array, k): """We need 2 data structures will update as we move the sliding window.: - A sorted set to keep track of seen items that are unique in each subarray - A dictionary to keep track of count for each item in each subarray We will return a list of max elements for subarrays """ max_elements = [] unique_seen = SortedSet() counts = dict() # Find the max unique value for the first subarray for e in array[:k]: if e not in counts: counts[e] = 1 unique_seen.add(e) else: counts[e] += 1 unique_seen.discard(e) if len(unique_seen) > 0: # Since unique_seen is a sorted set, the last item is the largest one max_elements.append(unique_seen[-1]) else: max_elements.append(None) for i in range(1, len(array) - k + 1): # Update counts and unique_seen for the last item of previous subarray counts[array[i - 1]] -= 1 if counts[array[i - 1]] == 1: unique_seen.add(array[i - 1]) else: unique_seen.discard(array[i - 1]) # Update counts and unique_seen for the new member of current subarray if array[i + k - 1] not in counts or counts[array[i + k - 1]] == 0: counts[array[i + k - 1]] = 1 unique_seen.add(array[i + k - 1]) else: counts[array[i + k - 1]] += 1 unique_seen.discard(array[i + k - 1]) # Recoard the max unique element for the current subarray, if any if len(unique_seen) > 0: max_elements.append(unique_seen[-1]) else: max_elements.append(None) return max_elements
class SequenceLearner(BaseLearner): r"""A learner that will learn a sequence. It simply returns the points in the provided sequence when asked. This is useful when your problem cannot be formulated in terms of another adaptive learner, but you still want to use Adaptive's routines to run, save, and plot. Parameters ---------- function : callable The function to learn. Must take a single element `sequence`. sequence : sequence The sequence to learn. Attributes ---------- data : dict The data as a mapping from "index of element in sequence" => value. Notes ----- From primitive tests, the `~adaptive.SequenceLearner` appears to have a similar performance to `ipyparallel`\s ``load_balanced_view().map``. With the added benefit of having results in the local kernel already. """ def __init__(self, function, sequence): self._original_function = function self.function = _IgnoreFirstArgument(function) self._to_do_indices = SortedSet({i for i, _ in enumerate(sequence)}) self._ntotal = len(sequence) self.sequence = copy(sequence) self.data = SortedDict() self.pending_points = set() def ask(self, n, tell_pending=True): indices = [] points = [] loss_improvements = [] for index in self._to_do_indices: if len(points) >= n: break point = self.sequence[index] indices.append(index) points.append((index, point)) loss_improvements.append(1 / self._ntotal) if tell_pending: for i, p in zip(indices, points): self.tell_pending((i, p)) return points, loss_improvements def _get_data(self): return self.data def _set_data(self, data): if data: indices, values = zip(*data.items()) # the points aren't used by tell, so we can safely pass None points = [(i, None) for i in indices] self.tell_many(points, values) def loss(self, real=True): if not (self._to_do_indices or self.pending_points): return 0 else: npoints = self.npoints + (0 if real else len(self.pending_points)) return (self._ntotal - npoints) / self._ntotal def remove_unfinished(self): for i in self.pending_points: self._to_do_indices.add(i) self.pending_points = set() def tell(self, point, value): index, point = point self.data[index] = value self.pending_points.discard(index) self._to_do_indices.discard(index) def tell_pending(self, point): index, point = point self.pending_points.add(index) self._to_do_indices.discard(index) def done(self): return not self._to_do_indices and not self.pending_points def result(self): """Get the function values in the same order as ``sequence``.""" if not self.done(): raise Exception("Learner is not yet complete.") return list(self.data.values()) @property def npoints(self): return len(self.data)
class ReplayBuffer(object): """Buffer to store environment transitions.""" def __init__(self, obs_shape, action_shape, capacity, device, normalize_obs): self.obs_shape = obs_shape self.action_shape = action_shape self.capacity = capacity self.device = device self.pixels = len(obs_shape) > 1 self.empty_data() self.done_idxs = SortedSet() self.global_idx = 0 self.global_last_save = 0 self.normalize_obs = normalize_obs if normalize_obs: assert not self.pixels self.welford = utils.Welford() def __getstate__(self): d = copy.copy(self.__dict__) del d['obses'], d['next_obses'], d['actions'], d['rewards'], \ d['not_dones'], d['not_dones_no_max'] return d def __setstate__(self, d): self.__dict__ = d # Manually need to re-load the transitions with load() self.empty_data() def empty_data(self): obs_dtype = np.float32 if not self.pixels else np.uint8 obs_shape = self.obs_shape action_shape = self.action_shape capacity = self.capacity self.obses = np.empty((capacity, *obs_shape), dtype=obs_dtype) self.next_obses = np.empty((capacity, *obs_shape), dtype=obs_dtype) self.actions = np.empty((capacity, *action_shape), dtype=np.float32) self.rewards = np.empty((capacity, 1), dtype=np.float32) self.not_dones = np.empty((capacity, 1), dtype=np.float32) self.not_dones_no_max = np.empty((capacity, 1), dtype=np.float32) self.idx = 0 self.full = False self.payload = [] self.done_idxs = None def __len__(self): return self.capacity if self.full else self.idx def get_obs_stats(self): assert not self.pixels MIN_STD = 1e-1 MAX_STD = 10 mean = self.welford.mean() std = self.welford.std() std[std < MIN_STD] = MIN_STD std[std > MAX_STD] = MAX_STD return mean, std def add(self, obs, action, reward, next_obs, done, done_no_max): # For saving self.payload.append((obs.copy(), next_obs.copy(), action.copy(), reward, not done, not done_no_max)) if self.normalize_obs: self.welford.add_data(obs) # if self.full and not self.not_dones[self.idx]: if done: self.done_idxs.add(self.idx) elif self.full: self.done_idxs.discard(self.idx) np.copyto(self.obses[self.idx], obs) np.copyto(self.actions[self.idx], action) np.copyto(self.rewards[self.idx], reward) np.copyto(self.next_obses[self.idx], next_obs) np.copyto(self.not_dones[self.idx], not done) np.copyto(self.not_dones_no_max[self.idx], not done_no_max) self.idx = (self.idx + 1) % self.capacity self.global_idx += 1 self.full = self.full or self.idx == 0 def sample(self, batch_size): idxs = np.random.randint(0, self.capacity if self.full else self.idx, size=batch_size) obses = self.obses[idxs] next_obses = self.next_obses[idxs] if self.normalize_obs: mu, sigma = self.get_obs_stats() obses = (obses - mu) / sigma next_obses = (next_obses - mu) / sigma obses = torch.as_tensor(obses, device=self.device).float() actions = torch.as_tensor(self.actions[idxs], device=self.device) rewards = torch.as_tensor(self.rewards[idxs], device=self.device) next_obses = torch.as_tensor(next_obses, device=self.device).float() not_dones = torch.as_tensor(self.not_dones[idxs], device=self.device) not_dones_no_max = torch.as_tensor(self.not_dones_no_max[idxs], device=self.device) return obses, actions, rewards, next_obses, not_dones, not_dones_no_max def sample_multistep(self, batch_size, T): assert batch_size < self.idx or self.full last_idx = self.capacity if self.full else self.idx last_idx -= T # raw here means the "coalesced" indices that map to valid # indicies that are more than T steps away from a done done_idxs_sorted = np.array(list(self.done_idxs) + [last_idx]) n_done = len(done_idxs_sorted) done_idxs_raw = done_idxs_sorted - np.arange(1, n_done + 1) * T samples_raw = npr.choice( last_idx - (T + 1) * n_done, size=batch_size, replace=True # for speed ) samples_raw = sorted(samples_raw) js = np.searchsorted(done_idxs_raw, samples_raw) offsets = done_idxs_raw[js] - samples_raw + T start_idxs = done_idxs_sorted[js] - offsets obses, actions, rewards = [], [], [] for t in range(T): obses.append(self.obses[start_idxs + t]) actions.append(self.actions[start_idxs + t]) rewards.append(self.rewards[start_idxs + t]) assert np.all(self.not_dones[start_idxs + t]) obses = np.stack(obses) actions = np.stack(actions) rewards = np.stack(rewards).squeeze(2) if self.normalize_obs: mu, sigma = self.get_obs_stats() obses = (obses - mu) / sigma obses = torch.as_tensor(obses, device=self.device).float() actions = torch.as_tensor(actions, device=self.device) rewards = torch.as_tensor(rewards, device=self.device) return obses, actions, rewards def save_data(self, save_dir): if self.global_idx == self.global_last_save: return if not os.path.exists(save_dir): os.makedirs(save_dir) path = os.path.join( save_dir, f'{self.global_last_save:08d}_{self.global_idx:08d}.pt') payload = list(zip(*self.payload)) payload = [np.vstack(x) for x in payload] self.global_last_save = self.global_idx torch.save(payload, path) self.payload = [] def load_data(self, save_dir): def parse_chunk(chunk): start, end = [int(x) for x in chunk.split('.')[0].split('_')] return (start, end) self.idx = 0 chunks = os.listdir(save_dir) chunks = filter(lambda fname: 'stats' not in fname, chunks) chunks = sorted(chunks, key=lambda x: int(x.split('_')[0])) self.full = self.global_idx > self.capacity global_beginning = self.global_idx - self.capacity if self.full else 0 for chunk in chunks: global_start, global_end = parse_chunk(chunk) if global_start >= self.global_idx: continue start = global_start - global_beginning end = global_end - global_beginning if end <= 0: continue path = os.path.join(save_dir, chunk) payload = torch.load(path) if start < 0: payload = [x[-start:] for x in payload] start = 0 assert self.idx == start obses = payload[0] next_obses = payload[1] self.obses[start:end] = obses self.next_obses[start:end] = next_obses self.actions[start:end] = payload[2] self.rewards[start:end] = payload[3] self.not_dones[start:end] = payload[4] self.not_dones_no_max[start:end] = payload[5] self.idx = end self.last_save = self.idx if self.full: assert self.idx == self.capacity self.idx = 0 last_idx = self.capacity if self.full else self.idx self.done_idxs = SortedSet(np.where(1. - self.not_dones[:last_idx])[0])
class Solver: def __init__(self, var_count, clause_count): self.var_count = var_count self.clause_count = clause_count self.clauses = [] self.unary_clauses = [] self.curr_level = 0 # Current depth of the decision tree self.max_level = 0 # Since variables are 1-indexed, size of these lists if (var_count + 1) # curr_assignment gives the latest assignment of a variable self.curr_assignment = [LiteralState.L_UNASSIGNED] * (var_count + 1) self.curr_literal_assignment = [LiteralState.L_UNASSIGNED ] * (2 * var_count + 1) # prev_assignment is used in PHASE SAVING self.prev_assignment = [-1] * (var_count + 1) # The level the variable was assigned at (if at all) self.assignment_level = [-1] * (var_count + 1) # A stack of all assigned variables in current path, most recently assigned variables are at top self.assigned_till_now = [] self.assignments_upto_level = [ 0 ] # How many assignments had happened upto a level? self.conflicts_upto_level = [ 0 ] # How many conflicts hence clauses learned upto a level? self.antecedent = [-1] * (var_count + 1) self.score2var = SortedSet() self.bcp_stack = [] # watch_map: literal -> list of clauses for which this literal is the watcher self.watch_map = {} # Used in MINISAT decision heuristic explained in decider() self.increment_value = 1.0 self.activity = [0.0] * (var_count + 1) # Used in restart optimisation explained in reset_state() self.restart_threshold = CONSTANTS.RESTART_LOWER_BOUND self.restart_upper_bound = CONSTANTS.RESTART_UPPER_BOUND_BASE # Statistics self.restart_count = 0 self.learnt_clauses_count = 0 self.decision_count = 0 self.assignments_count = 0 self.global_max_score = 0.0 def assign_variable(self, var: int, assignment: LiteralState): self.curr_assignment[var] = assignment if assignment != LiteralState.L_UNASSIGNED: self.prev_assignment[var] = assignment self.curr_literal_assignment[get_literal(var)] = assignment neg_assignment = LiteralState.L_UNASSIGNED if assignment == LiteralState.L_TRUE: neg_assignment = LiteralState.L_FALSE elif assignment == LiteralState.L_FALSE: neg_assignment = LiteralState.L_TRUE self.curr_literal_assignment[get_literal(-1 * var)] = neg_assignment def bump_var_score(self, var: int, increment_value=0.0): if increment_value > 0: self.score2var.discard((self.activity[var], var)) self.activity[var] += increment_value self.score2var.add((self.activity[var], var)) def print_clauses(self): print("{} variables, {} clauses".format(self.var_count, self.clause_count)) for clause_id, clause in enumerate(self.clauses): clause.print(clause_id) def print_curr_assignment(self): assignment = "State: " for var, state in enumerate(self.curr_assignment): if (var == 0): continue assignment += ", {}: {}".format(var, state.value) print(assignment) # This fn is used to add the given clause to the watchlist of given literal def watch_this_clause(self, lit, clause_id): if lit in self.watch_map: self.watch_map[lit].add(clause_id) else: self.watch_map[lit] = set([clause_id]) # Insert a new (input / learned) clause to the cnf def insert_clause(self, clause: Clause, first_watch, second_watch): self.clauses.append(clause) # Setup the two-watch mechanism, both these literals are guaranteed to be unassigned currently clause.first_watcher = first_watch clause.second_watcher = second_watch clause_id = len(self.clauses) - 1 self.watch_this_clause(clause.get_first_watcher(), clause_id) self.watch_this_clause(clause.get_second_watcher(), clause_id) # In MINISAT decision heusristic: # Score of a varible is the number of clauses in it # Since we are inserting a clause, increase the scores of variables in this literal for literal in clause.literals: var = get_variable(literal) self.bump_var_score(var, self.increment_value) # self.activity[var] += self.increment_value # Function used to assign a literal TRUE in a unary clause # These assignments are never reset hence not put in assigned_till_now[] def assert_unary_literal(self, lit): self.assignments_count += 1 var = get_variable(lit) # Set state of the underlying variable if is_negative(lit): self.assign_variable(var, LiteralState.L_FALSE) # self.curr_assignment[var] = LiteralState.L_FALSE else: self.assign_variable(var, LiteralState.L_TRUE) # self.curr_assignment[var] = LiteralState.L_TRUE self.assignment_level[var] = 0 # Always done at ground level # Function used to assign a literal TRUE in a non-unary clause # Note that current level is important here def assert_nonunary_literal(self, lit): self.assignments_count += 1 self.assigned_till_now.append(lit) var = get_variable(lit) if is_negative(lit): self.assign_variable(var, LiteralState.L_FALSE) # self.prev_assignment[var] = self.curr_assignment[var] = LiteralState.L_FALSE else: self.assign_variable(var, LiteralState.L_TRUE) # self.prev_assignment[var] = self.curr_assignment[var] = LiteralState.L_TRUE self.assignment_level[var] = self.curr_level """ Function to implement Boolean Constant Propagation using Two-watcher optimisation: bcp_stack contains all the literals which have been assigned false in current search path. Since we know these literals can now change the state of other clauses. A naive approach of bcp would be iterate every clause to find a unit/unsatisifed clause. If found, repreat the process again else, stop and start guessing some variables in decider() """ def bcp(self) -> (SolverState, int): # print("Running BCP with stack", self.bcp_stack) conflicting_clause_id = -1 while (self.bcp_stack): # Got a literal with FALSE assignment lit = self.bcp_stack.pop() assert self.curr_literal_assignment[lit] == LiteralState.L_FALSE # assert self.get_literal_status(lit) == LiteralState.L_FALSE if lit not in self.watch_map: self.watch_map[lit] = set() new_watch_list = copy.copy( self.watch_map[lit]) # Backup watch list of lit # Traverse only the watchlist of that clause to save computation for clause_id in self.watch_map[lit]: clause = self.clauses[clause_id] # This block determines which watcher (1st / 2nd) was lit first_watch = clause.get_first_watcher() second_watch = clause.get_second_watcher() lit_is_first = (lit == first_watch) other_watch = second_watch if lit_is_first else first_watch # Now that we know lit has been assigned FALSE, we need to find another watcher new_clause_state, new_watch_loc = clause.change_watch_location( self, lit_is_first, other_watch) # clause has one more literal FALSE, this might change a state if (new_clause_state == ClauseState.C_SATISFIED): pass elif (new_clause_state == ClauseState.C_UNIT): # If the clause had become unit, we have got another implication here self.assert_nonunary_literal(other_watch) var = get_variable(other_watch) self.antecedent[var] = clause_id self.bcp_stack.append(get_opposite_literal(other_watch)) elif (new_clause_state == ClauseState.C_CONFLICTING): # All the literals of this clause became false, we have a conflict, need to backtrack # If the conflict occured at ground level, we have a unsatisfiable cnf like (x) ^ (-x) if self.curr_level == 0: return SolverState.S_UNSATISFIED, conflicting_clause_id conflicting_clause_id = clause_id # Clear bcp_stack as a backtrack is coming, which will unassign several variables # As such some information in bcp_state is likely to become stale self.bcp_stack.clear() break elif (new_clause_state == ClauseState.C_UNRESOLVED): # The clause is still unresolved as we have found another watcher # Remove this clause from watch list of current lit new_watch_list.remove(clause_id) new_watcher = clause.literals[new_watch_loc] self.watch_this_clause(new_watcher, clause_id) # new_watch_list contains the clauses for which lit is still the watcher # Note that in case of backtrack, we dot need to revert the watchers in two-watcher method # since in backtracking, some variables will be unassigned, enforcing the two-watch invariant self.watch_map[lit].clear() self.watch_map[lit] = new_watch_list if (conflicting_clause_id >= 0): return SolverState.S_CONFLICT, conflicting_clause_id return SolverState.S_UNRESOLVED, conflicting_clause_id """ This function is for the PHASE-SAVING heuristic In decider() after the variable to be guessed has been selected, we then need it set it to TRUE of FALSE. Phase-saving says we should set it to our previous assignment if any. """ def get_lit_memoised(self, var: int) -> int: prev_state = self.prev_assignment[var] if (prev_state == LiteralState.L_TRUE): return get_literal(var) else: return get_literal(-1 * var) """ decide() function selects the next variable to be guessed and the guessed value. Based on MINISAT decision heuristic. Results in increment of current level. Score of a varible is the number of clauses in it. """ def decide(self) -> SolverState: # MINISAT based decision heuristic # print("Running decider") # self.print_curr_assignment() # print("Activity: ", self.activity) # Find an unassigned one with maximum score # Some inputs have unused variables, so we select only those with positive score. selected_lit = 0 unassigned_var_found = False while self.score2var: max_score, var = self.score2var.pop() self.global_max_score = max(self.global_max_score, max_score) if self.curr_assignment[var] == LiteralState.L_UNASSIGNED: unassigned_var_found = True selected_lit = self.get_lit_memoised(var) break if not unassigned_var_found: return SolverState.S_SATISFIED # print(selected_lit, selected_var, max_activity_till_now) assert selected_lit != 0 self.decision_count += 1 self.curr_level += 1 # We need to track this new assignment if (self.curr_level > self.max_level): # This branch is separate since we are at a new decision level, # so push_back is required instead of update self.max_level = self.curr_level self.assignments_upto_level.append(len(self.assigned_till_now)) self.conflicts_upto_level.append(self.learnt_clauses_count) else: self.assignments_upto_level[self.curr_level] = len( self.assigned_till_now) self.conflicts_upto_level[ self.curr_level] = self.learnt_clauses_count # Now we assign the literal as TRUE, and since put the (FALSE) opposite literal to bcp stack self.assert_nonunary_literal(selected_lit) self.bcp_stack.append(get_opposite_literal(selected_lit)) return SolverState.S_UNRESOLVED """ analyse_conflict() takes a conflicting clause and returns the level to backtrack to, and a learned clause We use the nearest UIP (Unique Implication Point) finding method as highlighted in Kroening's book. """ def analyze_conflict(self, conflicting_clause: Clause) -> (int, int): # print("Running analyse_conflict") curr_literals = [lit for lit in conflicting_clause.literals] learned_clause = Clause([]) backtrack_level = 0 # to be returned by this function to_resolve_count = 0 watch_lit = 0 # a watcher for the new learned literal marked = [False] * (self.var_count + 1) trail_index = len(self.assigned_till_now) - 1 resolve_lit = 0 resolve_var = 0 iter = 0 """ This loop outputs the learned clause, it works as follows: Invariant 1: curr_literals is the clause to be fused into learned_clause Invariant 2: learned caluse contains exactly one variable assigned at current level (UIP) All other literals are assigned before. """ while (iter == 0 or to_resolve_count > 0): iter += 1 for lit in curr_literals: var = get_variable(lit) if marked[var]: continue marked[var] = True if (self.assignment_level[var] == self.curr_level): to_resolve_count += 1 else: learned_clause.insert_literal(lit) if (self.assignment_level[var] > backtrack_level): # watch_lit: 2nd highest assigment level, first is UIP backtrack_level = self.assignment_level[var] watch_lit = len(learned_clause.literals) - 1 # Find a variable to be resolved by traversing the recently assigned literals first while (trail_index >= 0): resolve_lit = self.assigned_till_now[trail_index] resolve_var = get_variable(resolve_lit) trail_index -= 1 if marked[resolve_var]: break marked[resolve_var] = False to_resolve_count -= 1 if not to_resolve_count: # Just one literal remaining with current level assignment, we are done continue antecedent_id = self.antecedent[resolve_var] curr_literals = [ lit for lit in self.clauses[antecedent_id].literals if lit != resolve_lit ] # The learned clause becomes an unit clause after backtracking # This is because every other literal in the learned clause was assigned before # the backtrack level # resolve_lit is an UIP self.learnt_clauses_count += 1 opposite_resolv_lit = get_opposite_literal(resolve_lit) learned_clause.insert_literal(opposite_resolv_lit) self.increment_value /= CONSTANTS.VAR_DECAY_RATE if learned_clause.is_unary: # Not that we are inserting to bcp_stack without asserting UIP # Asserting will be done immediately after backtrack (see backtrack()) self.bcp_stack.append(resolve_lit) self.unary_clauses.append(learned_clause) else: self.bcp_stack.append(resolve_lit) self.insert_clause(learned_clause, watch_lit, len(learned_clause.literals) - 1) # for lit in learned_clause.literals: # var = get_variable(lit) # print("({}, {})".format(var, self.assignment_level[var])) return backtrack_level, opposite_resolv_lit # RESTART heuristic, reset all assignments except ground level and start afresh # Note that learned claused are not deleted only we start assignments from the beginning def reset_state(self): # print("Restart") """ The threshold system works as follows eg. (chainsaw graph) 1. We have a range [1, 10]. Threshold increases after every restart till it crosses the ub 2. At that point the threshold is rest and the range is also increased to let it go even higher """ self.restart_count += 1 self.restart_threshold = int(self.restart_threshold * CONSTANTS.THRESHOLD_MULTIPLIER) if (self.restart_threshold > self.restart_upper_bound): self.restart_threshold = CONSTANTS.RESTART_LOWER_BOUND self.restart_upper_bound = int(self.restart_upper_bound * CONSTANTS.THRESHOLD_MULTIPLIER) # Resets are similar to backtrack() function below, except that it resets evrything to ground level for var in range(1, self.var_count + 1): if (self.assignment_level[var] > 0): self.assign_variable(var, LiteralState.L_UNASSIGNED) # self.curr_assignment[var] = LiteralState.L_UNASSIGNED self.bump_var_score(var) self.bcp_stack.clear() self.assigned_till_now.clear() self.assignments_upto_level = [0] self.conflicts_upto_level = [0] self.curr_level = 0 self.max_level = 0 # Function to backtrack based on the output of analyse_conflict() def backtrack(self, k: int, uip_lit): # print("Running backtrack") # Invoke restart heuristic if too many clauses have been learnt after backtrack target level if k > 0 and (self.learnt_clauses_count - self.conflicts_upto_level[k] > self.restart_threshold): self.reset_state() return # Iterate over the variables assigned at level >= k + 1 and unassign them for index in range(self.assignments_upto_level[k + 1], len(self.assigned_till_now)): var = get_variable(self.assigned_till_now[index]) if (self.assignment_level[var] > k): self.assign_variable(var, LiteralState.L_UNASSIGNED) # self.curr_assignment[var] = LiteralState.L_UNASSIGNED self.bump_var_score(var) # analyse_function() returns an asserting clause with the UIP just ready for assignment # This helps to immediately put the learnt clause into practice self.assigned_till_now = self.assigned_till_now[:self. assignments_upto_level[ k + 1]] self.curr_level = k if k == 0: # We had learnt a unary clause self.assert_unary_literal(uip_lit) else: self.assert_nonunary_literal(uip_lit) self.antecedent[get_variable(uip_lit)] = len(self.clauses) - 1 # Function to verify output assignment if any def verify_assignment(self): non_true_clauses = [] all_clauses = self.clauses + self.unary_clauses # Every clause including learnt and unary must have atleast one TRUE literal for clause in all_clauses: true_literal_found = False for lit in clause.literals: if self.curr_literal_assignment[lit] == LiteralState.L_TRUE: # if (self.get_literal_status(lit) == LiteralState.L_TRUE): true_literal_found = True break if not true_literal_found: non_true_clauses.append(clause) if not non_true_clauses: print("AC, All clauses evaluate to true under given assignment") else: print("WA, {} unsatisfied clauses found".format( len(non_true_clauses))) """ Function implementing the standard CDCL framework: 1. [Outer Loop] Run BCP and decide() alternately, bcp first because of unary clauses 2. [INNER LOOP] Run till BCP gives UNRESOLVED result on which point guesswork must be done. If bcp encounters conflict a analyse, backtrack pair is done """ def run_cdcl(self) -> SolverState: result: SolverState while (True): while (True): result, conflicting_clause_id = self.bcp() # print("BCP result was {}".format(result)) if (result == SolverState.S_UNSATISFIED): return result if (result == SolverState.S_CONFLICT): assert conflicting_clause_id != -1 backtrack_level, uip_lit = self.analyze_conflict( self.clauses[conflicting_clause_id]) # print("Analyze result was k = {}, uip = {}".format(backtrack_level, uip_lit)) self.backtrack(backtrack_level, uip_lit) else: break result = self.decide() # print("Decide result was {}".format(result)) if (result == SolverState.S_UNSATISFIED or result == SolverState.S_SATISFIED): return result # Wrapper function to print the result of the CDCL framework def solve(self): # print("Solving") result: SolverState = self.run_cdcl() if (result == SolverState.S_SATISFIED): print("SATISFIABLE") self.verify_assignment() with open("assignment.txt", 'w') as assignment_file: for var, state in enumerate(self.curr_assignment): if (var == 0): assignment_file.write("State: ") continue assignment_file.write("{} ".format( -1 * var if state == LiteralState.L_FALSE else var)) else: print("UNSATISFIABLE") def print_statistics(self, solve_time): print("## Statistics: ") print("# Restarts: ", self.restart_count) print("# Learned clauses: ", self.learnt_clauses_count) print("# Decisions: ", self.decision_count) print("# Implications: ", self.assignments_count - self.decision_count) print("# Max score: ", self.global_max_score) print("# Time (s): ", solve_time)
class Scheduler(object): ''' This scheduler executes Tasks taking into account their dependencies and worker locality. Worker assignment takes into account: * concurrency (how many tasks must a worker execute concurrently) * and worker locality (0 is indifferent, -1 is forbidden, 1+ increasing locality) as locality 0 is likely to be common, this is assumed throughout the scheduler to reduce the memory cost for scheduling The most important component in the computational complexity of the scheduler is the number of dependencies to track. Many-to-many dependencies should be kept to the thousands or tens of thousands (i.e. 100 * 100 tasks). Such issues can be resolved by introducing a 'barrier task' as is done in bndl.compute (this reduced the number of dependencies to n+m instead of n*m). ''' def __init__(self, tasks, done, workers, concurrency=1, attempts=1): ''' Execute tasks in the given context and invoke done(task) when a task completes. :param tasks: iterable[task] :param done: callable(task) Invoked when a task completes. Must be thread safe. May be called multiple times if a task is reran (e.g. in case a worker fails). done(None) is called to signal completion of the last task. :param: workers: sequence[Peer] Sequence of workers to execute on. :param: concurrency: int (defaults to 1) @see: bndl.execute.concurrency :param: attempts: int (defaults to 1) @see: bndl.execute.attempts ''' self.tasks = OrderedDict( (task.id, task) for task in sorted(tasks, key=lambda t: t.priority)) if len(self.tasks) == 0: raise ValueError('Tasks must provide at least one task to execute') if len(self.tasks) < len(tasks): raise ValueError('Tasks must have a unique task ID') for task in tasks: task.add_listener(noop, self.task_done) self.done = done self.workers = {worker.name: worker for worker in workers} if not self.workers: raise Exception('No workers available') self.concurrency = concurrency # failed tasks are retried on error, but they are executed at most attempts self.max_attempts = attempts # task completion is (may be) executed on another thread, this lock serializes access # on the containers below and workers_idle self.lock = RLock() # a condition is used to signal that a worker is available or the scheduler is aborted self.condition = Condition(self.lock) def run(self): logger.info('Executing job with %r tasks on %r workers', len(self.tasks), len(self.workers)) self._abort = False self._exc = None # containers for states a task can be in self.executable = SortedSet( key=lambda task: task.priority ) # sorted executable tasks (sorted by task.id by default) self.blocked = defaultdict( set) # blocked tasks task -> dependencies executable or pending self.locality = {worker: {} for worker in self.workers.keys() } # worker_name -> task -> locality > 0 self.forbidden = defaultdict(set) # task -> set[worker] # worker -> SortedList[task] in descending locality order self.executable_on = { worker: SortedSet(key=lambda task, worker=worker: -self.locality[ worker].get(task, 0)) for worker in self.workers.keys() } self.pending = set( ) # mapping of task -> worker for tasks which are currently in progress self.succeeded = set() # tasks which have been executed successfully self.failures = defaultdict( int) # failure counts per task (task -> int) # keep a FIFO queue of workers ready # and a list of idle workers (ready, but no more tasks to execute) self.workers_ready = deque(self.workers.keys()) self.workers_idle = set() self.workers_failed = set() # perform scheduling under lock try: with self.lock: logger.debug( 'Calculating which tasks are executable, which are blocked and if there is locality' ) # create list of executable tasks and set of blocked tasks for task in self.tasks.values(): for worker, locality in task.locality( self.workers.values()) or (): worker = worker.name if locality < 0: self.forbidden[task].add(worker) elif locality > 0: self.locality[worker][task] = locality self.executable_on[worker].add(task) for task in self.tasks.values(): if task.succeeded: self.succeeded.add(task) self.done(task) elif task.dependencies: remaining = set(dep for dep in task.dependencies if not dep.succeeded) if remaining: self.blocked[task] = remaining else: self.executable.add(task) else: self.executable.add(task) if not self.executable: raise Exception( 'No tasks executable (all tasks have dependencies)') if not self.workers_ready: raise Exception( 'No workers available (all workers are forbidden by all tasks)' ) logger.debug( 'Starting %r tasks (%r tasks blocked) on %r workers (%r tasks already done)', len(self.executable), len(self.blocked), len(self.workers_ready), len(self.succeeded)) while True: # wait for a worker to become available (signals task completion self.condition.wait_for( lambda: self.workers_ready or self._abort) if self._abort: # the abort flag can be set to True to break the loop (in case of emergency) for task in self.tasks.values(): if task in self.pending: task.cancel() break worker = self.workers_ready.popleft() if worker in self.workers_failed: # the worker is 'ready' (a task was 'completed'), but with an error # or the worker was marked as failed because another task depended on an output # on this worker and the dependency failed continue elif not (self.executable or self.pending): if logger.isEnabledFor(logging.DEBUG): logger.debug( 'No more tasks to execute or pending (%r tasks blocked)', sum(1 for _ in filter(None, self.blocked.values()))) break else: task = self.select_task(worker) if task: # execute a task on the given worker and add the task_done callback # the task is added to the pending set try: # assert task in self.executable, '%r is not executable' % task # assert task not in self.succeeded, '%r already executed successfully' % task # assert task not in self.pending, '%r already pending' % task # assert not task.pending, '%r already pending' % task # assert not task.done or task.failed, '%r done or failed' % task # assert not self.blocked[task], '%r blocked' % task # assert self.locality[worker].get(task, 0) >= 0, '%r forbidden on %r' % (task, worker) # assert all(dep.succeeded for dep in task.dependencies), 'not all dependencies of %r succeeded' % task # assert all(dep.id not in self.tasks or self.blocked[dep] for dep in task.dependents), \ # 'not all dependents of %r blocked' % task self.executable.remove(task) self.executable_on[worker].discard(task) self.pending.add(task) if logger.isEnabledFor(logging.DEBUG): logger.debug( '%r executing on %r with locality %r', task, worker, self.locality[worker].get(task, 0)) task.execute(self, self.workers[worker]) except CancelledError: pass except AssertionError: raise except Exception as exc: task.mark_failed(exc) self.task_done(task) else: self.workers_idle.add(worker) except Exception as exc: self._exc = exc if self._exc: logger.info('Failed after %r tasks with %s: %s', len(self.succeeded), self._exc.__class__.__name__, self._exc) self.done(self._exc) elif self._abort: logger.info('Aborted after %r tasks', len(self.succeeded)) self.done(Exception('Scheduler aborted')) else: logger.info('Completed %r tasks', len(self.succeeded)) # always issue None (to facilitate e.g. iter(queue.get, None)) self.done(None) def abort(self, exc=None): if exc is not None: self._exc = exc self._abort = True with self.lock: self.condition.notify_all() def select_task(self, worker): if not self.executable: return None # select a task for the worker worker_queue = self.executable_on[worker] for task in list(worker_queue): if task in self.pending or task in self.succeeded: # task executed by another worker worker_queue.remove(task) elif task in self.executable: return task elif self.blocked[task]: pass else: # task not executable logger.error( '%r not executable, blocked, pending nor executed', task) # assert False, '%r not executable, blocked, pending nor executed' % task # no task available with locality > 0 # find task which is allowed to execute on this worker for task in self.executable: if worker not in self.forbidden[task]: return task def set_executable(self, task): if task.id not in self.tasks: return # assert not self.blocked[task], '%r isn\'t executable because it is blocked' # assert all(dep.succeeded for dep in task.dependencies), 'not all dependencies of %r succeeded: %r' \ # % (task, [dep for dep in task.dependencies if not dep.succeeded]) # assert task not in self.succeeded, '%r already succeeded' if task in self.executable or task in self.pending or task.succeeded: return # calculate for each worker which tasks are forbidden or which have locality for worker in self.workers.keys(): # don't bother with 'failed' workers if worker not in self.workers_failed: locality = self.locality[worker].get(task, 0) if locality >= 0: # make sure the worker isn't 'stuck' in the idle set if worker in self.workers_idle: self.workers_idle.remove(worker) for _ in range(self.concurrency): self.workers_ready.append(worker) self.condition.notify() # the task has a preference for this worker if locality > 0: self.executable_on[worker].add(task) # check if there is a worker allowed to execute the task if len(self.forbidden[task]) == len(self.workers): raise Exception('%r cannot be executed on any available workers' % task) # add the task to the executable queue self.executable.add(task) def task_done(self, task): ''' When a task completes, delete it from pending, add it to done and set dependent tasks as executable if this task was the last dependency. Reschedule failed tasks or abort scheduling if failed to often. ''' if not task.done: return try: # nothing to do, scheduling was aborted if self._abort: return with self.lock: self.pending.discard(task) if task.failed: self.task_failed(task) else: # assert task.succeeded, '%r not failed and not succeeded' % task # assert task not in self.succeeded, '%r completed while already in succeeded list' % task if logger.isEnabledFor(logging.DEBUG): logger.debug('%r was executed on %r', task, task.executed_on_last()) # add to executed and signal done self.succeeded.add(task) self.done(task) # check for unblocking of dependents for dependent in task.dependents: blocked_by = self.blocked[dependent] blocked_by.discard(task) if not blocked_by and dependent: if dependent in self.succeeded: logger.debug( '%r unblocked because %r was executed, but already succeeded', dependent, task) else: logger.debug( '%r unblocked because %r was executed', dependent, task) self.set_executable(dependent) self.workers_ready.append(task.executed_on_last()) self.condition.notify() except Exception as exc: logger.exception('Unable to handle task completion of %r on %r', task, task.executed_on_last()) self.abort(exc) def task_failed(self, task): # in these cases we consider the task already re-scheduled if task in self.executable: logger.debug('%r failed with %s, but already marked as executable', task, type(root_exc(task.exception())).__name__) return elif task in self.pending: logger.debug('%r failed with %s, but already pending', task, type(root_exc(task.exception()))) return # assert task.failed, "Can't reschedule task %r which hasn't failed." % task exc = root_exc(task.exception()) if isinstance(exc, DependenciesFailed): # assert task not in self.succeeded, 'Dependencies of %r failed which already completed successfully' % task if logger.isEnabledFor(logging.INFO): logger.info( '%r failed on %s because %r failed, rescheduling', task, task.executed_on_last(), ', '.join( worker + ': ' + ','.join(map(str, dependencies)) for worker, dependencies in exc.failures.items())) for worker, dependencies in exc.failures.items(): for task_id in dependencies: try: dependency = self.tasks[task_id] except KeyError as e: logger.error( 'Received DependenciesFailed for unknown task with id %r', task_id) self.abort(e) else: # mark the worker as failed executed_on_last = dependency.executed_on_last() if not worker or worker == executed_on_last: if worker == executed_on_last: logger.info( 'Marking %r as failed for dependency %s of %s', worker, dependency, task) self.workers_failed.add(worker) self.workers_idle.discard(worker) dependency.mark_failed(FailedDependency(worker)) self.task_failed(dependency) else: # this should only occur with really really short tasks where the failure of a # task noticed by task b is already obsolete because of the dependency was already # restarted (because another task also issued DependenciesFailed) logger.info( 'Received DependenciesFailed for task with id %r and worker %r ' 'but the task is last executed on %r', task_id, worker, executed_on_last) elif isinstance(exc, FailedDependency): self.succeeded.discard(task) worker = exc.worker_failed if worker: logger.info( '%r marked as failed post-hoc, marking %r as failed', task, worker) self.workers_failed.add(worker) self.workers_idle.discard(worker) elif isinstance(exc, NotConnected): # mark the worker as failed if logger.isEnabledFor(logging.INFO): logger.info( '%r failed with NotConnected, marking %r as failed', task, task.executed_on_last()) self.workers_failed.add(task.executed_on_last()) else: self.failures[task] = failures = self.failures[task] + 1 if failures >= self.max_attempts: logger.warning( '%r failed on %r after %r attempts ... aborting', task, task.executed_on_last(), len(task.executed_on)) # signal done (failed) to allow bubbling up the error and abort self.done(task) self.abort(task.exception()) return elif task.executed_on_last(): logger.info('%r failed on %r with %s: %s, rescheduling', task, task.executed_on_last(), exc.__class__.__name__, exc) self.forbidden[task].add(task.executed_on_last()) else: logger.info( '%r failed before being executed with %s: %s, rescheduling', task, exc.__class__.__name__, exc) # block its dependencies for dependent in task.dependents: # logger.debug('%r is blocked by %r because it failed', dependent, task) self.blocked[dependent].add(task) self.executable.discard(dependent) if len(self.workers_failed) == len(self.workers): self.abort(Exception('Unable to complete job, all workers failed')) if not self.blocked[ task] and task not in self.executable and task not in self.pending: self.set_executable(task)
class Agenda(object): def __init__(self, parse): self._parse = parse self._skipMC = False self._skipCompose = False self._mc_neighs = dict() self._compose_cnt = dict() self._agendaToScore = set() self._clustIdx_agenda = dict() self._inactiveAgenda_score = dict() self._activeAgenda_score = dict() self._scoreActiveAgenda = SortedSet() # (float, SearchOp) self._minAbsCntObserved = ParseParams.minAbsCnt \ * (ParseParams.minAbsCnt-1)/2 # self.logc = open("/Users/ben_ryan/Documents/DARPA ASKE/usp-code/genia_full/create_agenda.log", "a+") # self.logp = open("/Users/ben_ryan/Documents/DARPA ASKE/usp-code/genia_full/proc_agenda.log", "a+") def save_agenda(self, path): ''' Save all objects necessary to recreate the current state of Agenda ''' with open(path, 'wb') as f: pickle.dump({'saved_agenda': self}, f) return None def load_agenda(path, prs): ''' Given a Parse object, load the saved state of an Agenda and attach it, returning the updated Parse object. ''' with open(path, 'rb') as f: sav = pickle.load(f) prs.agenda = sav['saved_agenda'] prs.agenda._parse = prs return prs def createAgenda(self, verbose=False): if verbose: clust_cnt = len(Part.getClustPartRootNodeIds()) milestones = set([x for x in range(1, 10, 1)]) i = 0 for clust_id in Part.getClustPartRootNodeIds(): clust = Clust.getClust(clust_id) if clust.getType() != 'C': continue elif clust.isStop(): continue # # self.logc.write("Adding to agenda for cluster {}\n".format(clust_id)) self.addAgendaForNewClust(clust_id, verbose) if verbose: i += 1 done = math.floor(i * 10 / clust_cnt) if done in milestones: milestones.remove(done) print("{}% complete.".format(done * 10)) # self.logc.close() return None def addAgendaForNewClust(self, newClustIdx, verbose=False): part_node_ids = Part.getClustPartRootNodeIds()[newClustIdx] num_parts = len(part_node_ids) # if verbose: # print("Updating agenda: {} possible operations.".format(num_parts*(num_parts-1))) if len(part_node_ids) > 1: for node_id in part_node_ids: part_1 = Part.getPartByRootNodeId(node_id) for node_id2 in part_node_ids: if node_id <= node_id2: break part_2 = Part.getPartByRootNodeId(node_id2) # self.logc.write("\tAdding parts {} and {} to agenda for cluster {}\n".format(node_id, node_id2, newClustIdx)) self.addAgendaAfterMergeClust(part_1, part_2) return None def addAgendaAfterMergeClust(self, part_1, part_2): # First, check that these parts belong to the same cluster assert part_1._clustIdx == part_2._clustIdx clustIdx = part_1._clustIdx # If they have parents, check whether the parents are in the same cluster # If not, look at merging their clusters, and if so, look at composing # the clusters for part_1 and its parent. if part_1.getParPart() is not None and part_2.getParPart() is not None: clustIdx1 = part_1.getParPart()._clustIdx clustIdx2 = part_2.getParPart()._clustIdx if clustIdx1 != clustIdx2: self.addAgendaMC(clustIdx1, clustIdx2, 2 * clustIdx + 1) else: self.addAgendaAbs(clustIdx1, clustIdx) # Next, get the arguments (children) of each part # Compare each argument in A) with each argument in B) - if they have # different clusters, look at merging them, and if they have the same # look at composing the clusters for part_1 and its argument(s). kids_1 = part_1.getArguments() kids_2 = part_2.getArguments() # # self.logc.write("\tAdding to agenda for kids of {} and {} in {}\n".format(part_1.getRelTreeRoot().getId(), # part_2.getRelTreeRoot().getId(), # clustIdx)) for kid1 in kids_1.values(): clustIdx1 = kid1._argPart._clustIdx for kid2 in kids_2.values(): clustIdx2 = kid2._argPart._clustIdx if clustIdx1 != clustIdx2: #print("Add agenda - Merge Clusters {} and {}".format(clustIdx1, clustIdx2)) self.addAgendaMC(clustIdx1, clustIdx2, 2 * clustIdx + 1) else: #print("Add agenda - Compose Clusters {} and {}".format(clustIdx, clustIdx1)) self.addAgendaAbs(clustIdx, clustIdx1) return None def addAgendaMC(self, clustIdx1, clustIdx2, neighType): if not (self._skipMC or clustIdx1 == clustIdx2): type1 = Clust.getClust(clustIdx1).getType() type2 = Clust.getClust(clustIdx2).getType() if type2 == 'C' and type1 == 'C': op = SearchOp() op._op = SearchOp.OP_MERGE_CLUST op._clustIdx1 = min((clustIdx1, clustIdx2)) op._clustIdx2 = max((clustIdx1, clustIdx2)) if not self.moveAgendaToScore(op): if op not in self._mc_neighs: self._mc_neighs[op] = set() if len(self._mc_neighs[op]) + 1 >= ParseParams.minMCCnt: self._agendaToScore.add(op) del self._mc_neighs[op] else: self._mc_neighs[op].add(neighType) ## self.logc.write("\t\tMerge Op: {}; mc_neighs: {}, agendaToScore: {}\n".format(op, len(self._mc_neighs), len(self._agendaToScore))) return None def addAgendaAbs(self, parClustIdx, chdClustIdx): if not self._skipCompose: op = SearchOp() op._op = SearchOp.OP_COMPOSE op._parClustIdx = parClustIdx op._chdClustIdx = chdClustIdx if not self.moveAgendaToScore(op): if op not in self._compose_cnt: self._compose_cnt[op] = 1 if self._compose_cnt[op] + 1 >= self._minAbsCntObserved: self._agendaToScore.add(op) del self._compose_cnt[op] else: self._compose_cnt[op] += 1 ## self.logc.write("\t\tCompose Op: {}; compose_cnt: {}, agendaToScore: {}\n".format(op, len(self._compose_cnt), len(self._agendaToScore))) return None def moveAgendaToScore(self, op): #assert op in self._activeAgenda_score or op in self._inactiveAgenda_score if op in self._agendaToScore: return True if op in self._activeAgenda_score: score = self._activeAgenda_score[op] self._scoreActiveAgenda.discard((score, op)) del self._activeAgenda_score[op] self._agendaToScore.add(op) return True elif op in self._inactiveAgenda_score: del self._inactiveAgenda_score[op] self._agendaToScore.add(op) return True return False def procAgenda(self, verbose=False): if verbose: print("Processing agenda with {} operations in queue.".format( len(self._agendaToScore))) ttlAgendaScored, ttlExecMC, ttlExecAbs = (0, 0, 0) i = 1 while True: As = 0 for op in self._agendaToScore: score = self._parse.scorer.scoreOp(op) if verbose: print("<SCORE> {} score={}".format(op, score)) As += 1 if score < -200: continue if verbose: print("<Add Agenda> {} score={}".format(op, score)) self.addAgenda(op, score) self._agendaToScore.clear() ttlAgendaScored = As + 1 if len(self._scoreActiveAgenda) == 0: break score, op = next(reversed(self._scoreActiveAgenda)) if verbose: print("Executing: {}, score={}".format(op, score)) newClustIdx = self._parse.executor.executeOp(op) self.updateAgendaAfterExec(op, newClustIdx, verbose) if op._op == SearchOp.OP_COMPOSE: ttlExecAbs += 1 elif op._op == SearchOp.OP_MERGE_CLUST: ttlExecMC += 1 if verbose: print("Total op_compose: {}, Total op_merge_clust: {}".format( ttlExecAbs, ttlExecMC)) i += 1 if verbose and i % 10 == 0: print("{} Processing agenda: {} loops".format( datetime.now(), i)) return None def addAgenda(self, op, score): ci1, ci2 = (-1, -1) if op._op == SearchOp.OP_MERGE_CLUST: ci1 = op._clustIdx1 ci2 = op._clustIdx2 elif op._op == SearchOp.OP_COMPOSE: ci1 = op._parClustIdx ci2 = op._chdClustIdx if ci1 not in self._clustIdx_agenda: self._clustIdx_agenda[ci1] = set() self._clustIdx_agenda[ci1].add(op) if ci2 not in self._clustIdx_agenda: self._clustIdx_agenda[ci2] = set() self._clustIdx_agenda[ci2].add(op) if score < ParseParams.priorCutOff: self._inactiveAgenda_score[op] = score else: self._activeAgenda_score[op] = score self._scoreActiveAgenda.add((score, op)) return None def updateAgendaAfterExec(self, op, newClustIdx, verbose=False): self.removeAgenda(op) if newClustIdx >= 0: if op._op == SearchOp.OP_MERGE_CLUST: self.updateAgendaAfterExecMC(op, newClustIdx, verbose) elif op._op == SearchOp.OP_COMPOSE: self.updateAgendaAfterExecAbs(op, newClustIdx, verbose=verbose) return None def addAgendaToScore(self, op): self._agendaToScore.add(op) return None def updateAgendaAfterExecMC(self, op, newClustIdx, verbose=False): assert op._op == SearchOp.OP_MERGE_CLUST oldClustIdx = op._clustIdx2 if oldClustIdx == newClustIdx: oldClustIdx = op._clustIdx1 while len(self._clustIdx_agenda[oldClustIdx]) > 0: oop = next(iter(self._clustIdx_agenda[oldClustIdx])) self.removeAgenda(oop) if oop._op == SearchOp.OP_MERGE_CLUST: ci1 = oop._clustIdx1 ci2 = oop._clustIdx2 if ci1 == oldClustIdx: ci1 = newClustIdx if ci2 == oldClustIdx: ci2 = newClustIdx if ci1 != ci2: nop = oop nop._clustIdx1 = min((ci1, ci2)) nop._clustIdx2 = max((ci1, ci2)) nop.genString() self.addAgendaToScore(nop) elif oop._op == SearchOp.OP_COMPOSE: ci1 = oop._parClustIdx ci2 = oop._chdClustIdx if ci1 == oldClustIdx: ci1 = newClustIdx if ci2 == oldClustIdx: ci2 = newClustIdx nop = oop nop._parClustIdx = ci1 nop._chdClustIdx = ci2 nop.genString() self.addAgendaToScore(nop) del self._clustIdx_agenda[oldClustIdx] num_parts_old = len(Part.getClustPartRootNodeIds()[oldClustIdx]) num_parts_new = len(Part.getClustPartRootNodeIds()[newClustIdx]) if verbose: print("Updating agenda: {} possible operations.".format( num_parts_new * (num_parts_old))) for prnid in Part.getClustPartRootNodeIds()[newClustIdx]: p = Part.getPartByRootNodeId(prnid) for prnid2 in Part.getClustPartRootNodeIds()[oldClustIdx]: p2 = Part.getPartByRootNodeId(prnid2) self.addAgendaAfterMergeClust(p, p2) return None def updateAgendaAfterExecAbs(self, op, newClustIdx, oop=None, verbose=False): if op._op == SearchOp.OP_COMPOSE: parClustIdx = op._parClustIdx chdClustIdx = op._chdClustIdx while len(self._clustIdx_agenda[parClustIdx]) > 0: oop = next(iter(self._clustIdx_agenda[parClustIdx])) self.removeAgenda(oop) # oop.genString() self.addAgendaToScore(oop) while len(self._clustIdx_agenda[chdClustIdx]) > 0: oop = next(iter(self._clustIdx_agenda[chdClustIdx])) self.removeAgenda(oop) # oop.genString() self.addAgendaToScore(oop) self.addAgendaForNewClust(newClustIdx, verbose) # elif oop is not None: # ci1, ci2 = (-1, -1) # if oop._op == SearchOp.OP_MERGE_CLUST: # ci1 = oop._clustIdx1 # ci2 = oop._clustIdx2 # elif oop._op == SearchOp.OP_COMPOSE: # ci1 = oop._parClustIdx # ci2 = oop._chdClustIdx # if ci1 in (op._parClustIdx, op._chdClustIdx): # ci1 = newClustIdx # if ci2 in (op._parClustIdx, op._chdClustIdx): # ci2 = newClustIdx # if oop._op == SearchOp.OP_MERGE_CLUST: # if ci1 != ci2: # nop = SearchOp() # nop._clustIdx1 = min((ci1, ci2)) # nop._clustIdx2 = max((ci1, ci2)) # nop._op = oop._op # self.addAgendaToScore(nop) # elif oop._op == SearchOp.OP_COMPOSE: # nop = SearchOp() # nop._parClustIdx = ci1 # nop._chdClustIdx = ci2 # nop._op = oop._op # self.addAgendaToScore(nop) return None def removeAgenda(self, op): # assert (op in self._activeAgenda_score or op in self._inactiveAgenda_score) if op in self._activeAgenda_score: score = self._activeAgenda_score[op] self._scoreActiveAgenda.discard((score, op)) del self._activeAgenda_score[op] elif op in self._inactiveAgenda_score: del self._inactiveAgenda_score[op] if op._op == SearchOp.OP_MERGE_CLUST: self._clustIdx_agenda[op._clustIdx1].discard(op) self._clustIdx_agenda[op._clustIdx2].discard(op) elif op._op == SearchOp.OP_COMPOSE: self._clustIdx_agenda[op._parClustIdx].discard(op) self._clustIdx_agenda[op._chdClustIdx].discard(op) return None
class Display: def __init__(self, interface, dimensions): self.logger = logging.getLogger(__name__) self.interface = interface self.dimensions = dimensions (rows, columns) = self.dimensions self.buffer = bytearray(rows * columns) self.dirty = SortedSet() self.address_counter = None self.status_line = StatusLine(self) self.cursor_reverse = False self.cursor_blink = False def move_cursor(self, index=None, row=None, column=None, force_load=False): """Load the address counter.""" address = self._calculate_address(index=index, row=row, column=column) # TODO: Verify that the address is within range - exclude status line. return self._load_address_counter(address, force_load) def buffered_write(self, byte, index=None, row=None, column=None): if index is None: if row is None or column is None: raise ValueError('Either index or row and column is required') index = self._get_index(row, column) # TODO: Verify that index is within range. if self.buffer[index] == byte: return False self.buffer[index] = byte self.dirty.add(index) return True def flush(self): for (start_index, end_index) in self._get_dirty_ranges(): self._flush_range(start_index, end_index) def clear(self, clear_status_line=False): """Clear the screen.""" (rows, columns) = self.dimensions if clear_status_line: address = 0 count = (rows + 1) * columns else: address = columns count = rows * columns self._write((b'\x00', count), address=address) # Update the buffer and dirty indicators to reflect the cleared screen. for index in range(rows * columns): self.buffer[index] = 0x00 self.dirty.clear() self.move_cursor(row=0, column=0, force_load=True) def toggle_cursor_blink(self): self.cursor_blink = not self.cursor_blink def toggle_cursor_reverse(self): self.cursor_reverse = not self.cursor_reverse def _get_index(self, row, column): return (row * self.dimensions.columns) + column def _calculate_address(self, index=None, row=None, column=None): if index is not None: return self.dimensions.columns + index if row is not None and column is not None: return self.dimensions.columns + self._get_index(row, column) raise ValueError('Either index or row and column is required') def _calculate_address_after_write(self, address, count): if address is None: return None address += count (rows, columns) = self.dimensions # TODO: Determine the correct behavior here... if self.address_counter >= self._calculate_address((rows * columns) - 1): return None return address def _read_address_counter(self): hi = read_address_counter_hi(self.interface) lo = read_address_counter_lo(self.interface) return (hi << 8) | lo def _load_address_counter(self, address, force_load): if address == self.address_counter and not force_load: return False (hi, lo) = _split_address(address) (current_hi, current_lo) = _split_address(self.address_counter) if hi != current_hi or force_load: load_address_counter_hi(self.interface, hi) if lo != current_lo or force_load: load_address_counter_lo(self.interface, lo) self.address_counter = address return True def _get_dirty_ranges(self): if not self.dirty: return [] # TODO: Implement multiple ranges with optimization. return [(self.dirty[0], self.dirty[-1])] def _flush_range(self, start_index, end_index): if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug( f'Flushing changes for range {start_index}-{end_index}') data = self.buffer[start_index:end_index + 1] address = self._calculate_address(start_index) try: self._write(data, address=address) except Exception as error: # TODO: This could leave the address_counter incorrect. self.logger.error(f'Write error: {error}', exc_info=error) for index in range(start_index, end_index + 1): self.dirty.discard(index) return self.address_counter def _write(self, data, address=None, restore_original_address=False): if restore_original_address: original_address = self.address_counter if original_address is None: original_address = self._read_address_counter() if address is not None: self._load_address_counter(address, force_load=False) write_data(self.interface, data) if isinstance(address, tuple): length = len(data[0]) * data[1] else: length = len(data) self.address_counter = self._calculate_address_after_write( address, length) if restore_original_address: self._load_address_counter(original_address, force_load=True)
class graph: """Implementa il concetto di grafo allo scopo di generare grafi pseudo-casuali con proprieta' fissate. Il grafo viene rappresentato come insieme ordinato di archi. Il costruttore consente di creare un qualsiasi tipo speciale noto di grafi, le usuali operazioni aritmetiche consentono di effettuare analoghe operazioni combinatoriali, mentre le usuali operazioni logiche consentono di effettuare le analoghe operazioni insiemistiche. E' inoltre possibile aggiungere archi a caso (con il metodo addedges) o aggiungere archi in modo da connettere il grafo (con il metodo connect).""" # costruttore def __init__(self, N=0, E=None, M=None, w=None, type=None): """Costruisce un grafo vuoto con N vertici, e insieme di archi E (se specificato). Se type e' specificato, costruisce invece un grafo di quel tipo. I valori ammissibili per type sono cycle,path,tree,forest,clique. Se w e' specificato, il grafo viene considerato pesato, con pesi generati dalla funzione w(). E' anche ammessa l'instanziazione graph(G) con G un grafo gia' esistente.""" if isinstance(N, graph) and E is None: E = [self.cod(e) for e in N] N = N.V if not ((E is None) or (type is None)): raise StandardError("Incompatible parameters specified.") self.V = N self.w = w if E and isinstance(E[0], list): E = SortedSet([self.cod(e) for e in E]) if E is None: E = SortedSet([]) if len(E) == 0 and N > 1: if type is 'cycle': for i in xrange(N): E.add(self.cod([i, (i + 1) % N])) if type is 'path': for i in xrange(N - 1): E.add(self.cod([i, (i + 1) % N])) if type is 'tree': for i in xrange(1, N): E.add(self.cod([randint(i), i])) if type is 'forest': if not (0 <= M < N): raise StandardError("Parameter M out of bounds.") for i in lsample(N - 1, M): E.add(self.cod([randint(i + 1), i + 1])) if type is 'clique': for i in xrange(N - 1): for j in xrange(i + 1, N): E.add(self.cod([i, j])) if type is 'star': for i in xrange(1, N): E.add(self.cod([0, i])) if type is 'wheel': for i in xrange(1, N): E.add(self.cod([0, i])) E.add(self.cod([i, (i + 1) % N])) # eventualmente aggiungere: gear, caterpillar/lobster, BIPARTITE self.E = SortedSet(E) # funzioni di stampa def __repr__(self): """Rappresentazione python del grafo.""" return self.__class__.__name__ + '(' + str(self.V) + ',' + str( [e for e in self]) + ')' def __str__(self): """Rappresentazione olimpica del grafo.""" s = str(self.N()) + ' ' + str(self.M()) + '\n' Ed = list(self.E) shuffle(Ed) for e in Ed: de = self.dec(e) s += str(de[0] + 1) + ' ' + str(de[1] + 1) if self.w is not None: s += ' ' + str(self.w()) s += '\n' return s.rstrip() def printedges(self): """Rappresentazione olimpica del grafo, senza la prima riga.""" s = "" Ed = list(self.E) shuffle(Ed) for e in Ed: de = self.dec(e) s += str(de[0] + 1) + ' ' + str(de[1] + 1) if self.w is not None: s += ' ' + str(self.w()) s += '\n' return s.rstrip() # funzioni di confronto def __lt__(self, other): """Relazione di sottoinsieme proprio.""" return self.E < other.E def __le__(self, other): """Relazione di sottoinsieme.""" return self.E <= other.E def __eq__(self, other): """Relazione di uguaglianza degli archi.""" return self.E == other.E def __ne__(self, other): """Relazione di disuguaglianza degli archi.""" return self.E != other.E def __gt__(self, other): """Relazione di sovrainsieme proprio.""" return self.E > other.E def __ge__(self, other): """Relazione di sovrainsieme.""" return self.E >= other.E # funzioni da container def __len__(self): """Numero di archi del grafo.""" return len(self.E) def __getitem__(self, i): """Restituisce l'i-esimo arco del grafo.""" return self.dec(self.E[i]) def __iter__(self): """Restituisce un iteratore sugli archi del grafo.""" return _generic_iter(self) def __contains__(self, e): """Verifica se un arco e' presente o meno nel grafo.""" if isinstance(e, list): e = self.cod(e) return (e in self.E) # unione disgiunta di grafi (+) def __add__(self, other): """Unione disgiunta di grafi.""" G = self.__class__(self.V, self.E) G += other return G def __iadd__(self, other): """Unione disgiunta di grafi.""" if isinstance(other, graph): self.E |= [self.cod([e[0] + self.V, e[1] + self.V]) for e in other] self.V += other.V else: self.add(other) return self # prodotto cartesiano di grafi (*) def __mul__(self, other): """Prodotto cartesiano di grafi.""" G = self.__class__() for i in xrange(other.V): G += self for e in other: for i in xrange(self.V): G += [e[0] * self.V + i, e[1] * self.V + i] return G def __imul__(self, other): """Prodotto cartesiano di grafi.""" G = self * other self.V = G.V self.E = G.E return self # intersezione di grafi (&) def __and__(self, other): """Intersezione di grafi.""" G = self.__class__(self.V, self.E) G &= other return G def __iand__(self, other): """Intersezione di grafi.""" self.V = min(self.V, other.V) self.E = self.E & other.E return self # unione di grafi (|) def __or__(self, other): """Unione di grafi.""" G = self.__class__(self.V, self.E) G |= other return G def __ior__(self, other): """Unione di grafi.""" self.V = max(self.V, other.V) self.E = self.E | other.E return self # grafo complementare (~) def __invert__(self): """Grafo complementare.""" G = self.__class__(self.V, xrange(self.mMax()) - self.E) return G # funzioni astratte di codifica degli archi. # devono rispettare che: # * 0 <= cod(e) < mMax() # * cod(e) = cod(e') => e = e' # * cod(e) e' indipendente dal self # * gli archi validi per V=N sono esattamente mMax() def cod(self, e): """Codifica un arco in un intero univoco e indipendente da N,M tra 0 e mMax().""" raise NotImplementedError("Abstract class graph must be inherited.") def dec(self, n): """Decodifica l'id di un arco.""" raise NotImplementedError("Abstract class graph must be inherited.") def mMax(self): """Il numero massimo di archi che un grafo con N nodi puo' contenere.""" raise NotImplementedError("Abstract class graph must be inherited.") # calcolo della taglia def N(self): """Restituisce il numero di nodi del grafo.""" return self.V def M(self): """Restituisce il numero di archi del grafo.""" return len(self.E) # aggiunta e rimozione di un arco def add(self, e): """Aggiunge un arco al grafo.""" if max(e[0], e[1]) >= self.V: self.V = max(e[0], e[1]) + 1 self.E.add(self.cod(e)) def discard(self, e): """Rimuove un arco dal grafo, se presente.""" self.E.discard(self.cod(e)) # aggiungo K nuovi archi a caso, tra i candidati (oggetto edgerange-style) def addedges(self, K, candidates=None): """Aggiunge K archi a caso al grafo, tra i candidati (oggetto di tipo edgerange o set/list di archi).""" if candidates is None: self.E.add(self.mMax()) new = lsample(self.mMax() - self.M() + 1, K) i = j = 0 while j < K: if self.E[i] > new[j] + i: new[j] += i j += 1 else: i += 1 self.E.remove(self.mMax()) self.E |= new else: dup = SortedSet([]) for e in self: if e in candidates: dup.add(self.cod(e)) new = lsample(len(candidates) - len(dup), K) dup.add(self.mMax()) i = j = 0 while j < K: if dup[i] > candidates[new[j] + i]: new[j] = candidates[new[j] + i] j += 1 else: i += 1 self.E |= new # aggiunge archi fino a connettere il grafo def connect(self): """Aggiunge il minor numero di archi necessario a connettere il grafo.""" lbl = range(self.N()) rnk = [1 for i in lbl] def find(x): if x == lbl[x]: return x lbl[x] = find(lbl[x]) return lbl[x] def union(x, y): lx = find(x) ly = find(y) if lx == ly: return if rnk[lx] < rnk[ly]: lx, ly = ly, lx lbl[ly] = lx rnk[lx] += rnk[ly] def bsearch(v, k): if len(v) == 1: return 0 m = len(v) / 2 if v[m] <= k: return m + bsearch(v[m:], k) else: if v[m - 1] <= k: return m return bsearch(v[0:m], k) for e in self: union(e[0], e[1]) comp = [[] for i in range(self.N())] for i in range(self.N()): comp[lbl[i]] += [i] comp = [[len(i)] + i for i in comp] comp.sort() comp.reverse() while comp[-1] == [0]: comp.pop() kcomp = [i[0] for i in comp] for i in range(1, len(kcomp)): kcomp[i] += kcomp[i - 1] for i in range(1, len(comp)): a = randint(kcomp[i - 1]) a = bsearch(kcomp, a) a = choice(comp[a][1:]) b = choice(comp[i][1:]) self.add([a, b]) # permuta i nodi del grafo def shuffle(self): """Permuta casualmente i nodi del grafo tra di loro.""" lbl = range(self.V) shuffle(lbl) new = [self.cod([lbl[e[0]], lbl[e[1]]]) for e in self] self.E = SortedSet(new)
class History(object): def __init__(self, history=None, modification_history=None): # Dict var_name -> Timeline self.chunk_history = Timeline() if history is None else Timeline( history) if modification_history is None: self.modification_history = {} for c in self.chunk_history: for p in c.modifications: if p not in self.modification_history: self.modification_history[p] = Timeline() self.modification_history[p].add(c) for p in c.dependencies: if p not in self.modification_history: raise Exception( 'Illegal sequence of operations was supplied! Referenced dependency {} does not exist at time {}' .format(p, c.stamp)) self.modification_history[p][-1].dependents.add(c) else: self.modification_history = modification_history self.dirty_chunks = SortedSet() def __iter__(self): return iter(self.chunk_history) def __len__(self): return len(self.modification_history) def get_time_stamp(self, before=None, after=None): if before is not None: pos, succ = self.chunk_history.get_ceil(before) if type( before) != Chunk else self.chunk_history.get_ceil(before.stamp) return 0.5 * (succ.stamp + self.chunk_history[pos - 1].stamp ) if pos > 0 else succ.stamp - 1 elif after is not None: pos, succ = self.chunk_history.get_floor(after) if type( after) != Chunk else self.chunk_history.get_floor(after.stamp) return 0.5 * (succ.stamp + self.chunk_history[pos + 1].stamp) if pos < len( self.chunk_history) - 1 else succ.stamp + 1 return self.chunk_history[-1].stamp + 1 if len( self.chunk_history) > 0 else 1 @profile def _insert_modification(self, chunk, path): if path not in self.modification_history: self.modification_history[path] = Timeline() _, pred = self.modification_history[path].get_floor(chunk.stamp) if pred is not None: to_remove = set() for d in pred.dependents: # Fetch all dependents from predecessor which are going to depend on the new chunk # Save them as dependents and mark them as dirty if d.stamp > chunk.stamp: dep_overlap_diff = d.dependencies.difference( chunk.modifications) # Is there at least one element overlap if len(dep_overlap_diff) < len(d.dependencies): chunk.dependents.add(d) self.dirty_chunks.add(d) # If there is no remaining overlap with pred anymore, remove d if len(dep_overlap_diff.difference( pred.modifications)) == len(dep_overlap_diff): to_remove.add(d) pred.dependents -= to_remove self.modification_history[path].add(chunk) @profile def insert_chunk(self, chunk): for p in chunk.dependencies: if p not in self.modification_history: raise Exception( 'Chunk depends on attribute without history!\n Operation "{}" at {}\n Attribute: {}\n' .format(chunk.operation.name, chunk.stamp, p)) _, pred = self.modification_history[p].get_floor(chunk.stamp) if pred is None: raise Exception( 'Chunk at time {} executing "{}" depends on attributes with empty history! Attributes:\n {}' .format( chunk.stamp, chunk.operation.name, '\n '.join([ str(p) for p in chunk.dependencies if p not in self.modification_history or self.modification_history[p].get_floor( chunk.stamp)[1] is None ]))) pred.dependents.add(chunk) for p in chunk.modifications: self._insert_modification(chunk, p) self.chunk_history.add(chunk) @profile def remove_chunk(self, chunk): for p in chunk.modifications: if self.modification_history[p][0] == chunk and len( chunk.dependents) > 0 and max( [p in c.dependencies for c in chunk.dependents]): raise Exception( 'Can not remove chunk at timestamp {} because it is the founding chunk in the history of {} and would create dangling dependencies.' .format(chunk.stamp, p)) for p in chunk.modifications: self.modification_history[p].discard(chunk) _, pred = self.modification_history[p].get_floor(chunk.stamp) # Copy dependents that depend on this variable to predecessor if pred is not None: pred.dependents.update( {d for d in chunk.dependents if p in d.dependencies}) for p in chunk.dependencies: pos, pred = self.modification_history[p].get_floor(chunk.stamp) if pred is None: raise Exception( 'Chunk depends on attribute with empty history!') # It can happen that this chunk modifies the variable it depends on. # In this case it needs to be removed from the history and from if pred == chunk: pos -= 1 pred = self.modification_history[p][pos] pred.dependents.discard(chunk) self.chunk_history.remove(chunk) self.dirty_chunks.update(chunk.dependents) @profile def replace_chunk(self, c_old, c_new): if c_old.stamp != c_new.stamp: raise Exception( 'Can only replace chunk if stamps match. Stamps:\n Old: {:>8.3f}\n New: {:>8.3f}' .format(c_old.stamp, c_new.stamp)) overlap = c_old.modifications.intersection(c_new.modifications) if len(overlap) != len(c_old.modifications): raise Exception( 'Chunks can only be replaced by others with at least the same definition coverage. Missing variables:\n {}' .format('\n '.join( sorted(c_old.modifications.difference( c_new.modifications))))) new_deps = { p: self.modification_history[p].get_floor(c_new.stamp)[1] if p in self.modification_history else None for p in c_new.dependencies.difference(overlap) } if None in new_deps.values(): raise Exception( 'Replacement chunk at {} tries to depend on variables with insufficient histories. variables:\n {}' .format('\n '.join(sorted(new_deps.keys())))) for p in overlap: pos, _ = self.modification_history[p].get_floor(c_old.stamp) # If we are already here, we might as well remove old and establish new deps if p in c_old.dependencies: self.modification_history[p][pos - 1].dependents.discard(c_old) if p in c_new.dependencies: self.modification_history[p][pos - 1].dependents.add(c_new) self.modification_history[p].remove(c_old) self.modification_history[p].add(c_new) c_new.dependents = c_old.dependents.copy() self.flag_dirty(*c_new.dependents) # Remove old, non-modified deps for p in c_old.dependencies.difference(overlap): self.modification_history[p].get_floor( c_old.stamp)[1].dependents.remove(c_old) # Insert additional modifications for p in c_new.modifications.difference(overlap): self._insert_modification(c_new, p) for c in new_deps.values(): c.dependents.add(c_new) self.chunk_history.remove(c_old) self.chunk_history.add(c_new) def get_chunk_by_index(self, idx): return self.chunk_history[idx] def get_chunk(self, stamp): return self.get_chunk_pos(stamp)[0] def get_chunk_pos(self, stamp): pos, chunk = self.chunk_history.get_floor(stamp) return (chunk, pos) if chunk is None or chunk.stamp == stamp else (None, None) def flag_dirty(self, *chunks): self.dirty_chunks.update(chunks) def flag_clean(self, *chunks): for c in chunks: self.dirty_chunks.discard(c) def expand_dirty_set(self): active_set = set(self.dirty_chunks) while len(active_set) > 0: a = active_set.pop() u = a.dependents.difference(self.dirty_chunks) active_set.update(u) self.dirty_chunks.update(u) def get_dirty(self): return self.dirty_chunks.copy() def get_subhistory(self, time): if len(self.chunk_history) > 0 and self.chunk_history[0].stamp >= time: chunks = self.chunk_history[:self.chunk_history.get_floor(time )[0] + 1] mod_history = { p: Timeline(h[:h.get_floor(time)]) for p, h in self.modification_history.items() if h[0].stamp >= time } return History(chunks, mod_history) return History() def get_history_of(self, *paths): out = set() remaining = set() for p in paths: if p in self.modification_history: remaining.update(self.modification_history[p]) while len(remaining) > 0: chunk = remaining.pop() out.add(chunk) for p in chunk.dependencies: pos, dep = self.modification_history[p].get_floor(chunk.stamp) if dep == chunk: # Catch if predecessor is chunk itself dep = self.modification_history[p][pos - 1] if dep not in out: remaining.add(dep) return Timeline(out) def str_history_of(self, p): if p not in self.modification_history: raise Exception('Path {} has no history.'.format(p)) return '\n'.join([ '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op)) for chunk in self.modification_history[p] ]) def str_history(self): return '\n'.join([ '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op)) for chunk in self.chunk_history ]) def __eq__(self, other): if isinstance(other, History): return self.chunk_history == other.chunk_history return False
class SocialNetwork(object): ID = 0 strategies = [COOP, DEFE] def __init__(self, fluct, rep, nt_seed, nt_desc, nt_randomseed, coop_prob = JUST_COOPERATORS, randomseed = None, b=1, n_per_gen=10, e_per_gen=2, epsilon = 0.99, max=1000, tourn=0.01, X=0.025, K=sys.maxsize, X2= 0.025): # this is for identification of the network self.id = self.__class__.ID self.__class__.ID += 1 self.fluct = fluct self.rep = rep self.nt_desc = nt_desc self.nt_randomseed = nt_randomseed self.coop_prob = coop_prob # set the PD game self.T = b self.R = 1 self.P = 0 self.S = 0 # seed for the network, this is useful to replicate exactly the same # experiment, particularly useful for debugging if randomseed == None: self.randomseed = time.time() else: print("WARNING: random seed is not null. Are you sure?") self.randomseed = randomseed random.seed(self.randomseed) # main parameters self.b = b self.n_per_gen = n_per_gen self.e_per_gen = e_per_gen if (epsilon >= 1.0): raise ValueError("""Epsilon cannot be bigger or equal to 1.0. You can use epsilon that are similar to 1.0, e.g 0.999999999 """) else: self.epsilon = epsilon self.max = max self.tourn = tourn self.X = X self.K = K self.X2 = X2 # counters self.gen = 0 self.count = 0 self.cooperators = 0 self.removed_nodes = 0 self.total_fit = 0 self.total_efit = 0 self.degrees = 0 self.size = 0 g = self.g = nx.Graph() # crate auxiliary network structures to increase efficiency self._max = max+n_per_gen self.eps_fitness = np.empty(self._max) self.degrees = np.empty(self._max) self.fitness = np.empty(self._max) self.fitness_of = np.empty(self._max, dtype=np.int_) self.free_indexes = [] self.node_set = SortedSet() # initialize the auxiliary structures for i in range(0, self._max): self.degrees[i] = 0 self.fitness_of[i] = -1 self.free_indexes.append(i) # create the network self.__create_from_seed(nt_seed, coop_prob) # define the game the nodes are going to play self.game = PD(b, self.fitness) self.treatment = '_'.join(str(x) for x in (self.nt_desc, self.coop_prob, self.fluct, self.b, self.X, self.K, self.X2)) self.signature = str(self.id) + '_' + \ str(self.rep) + '(' + self.treatment + ')' def __create_from_seed(self, seed, coop_prob): """ This method use the networks structure that comes in the parameter seed as a template for the graph. It adds the necessary attributes to run the algorithm, such as which nodes are cooperators and defectors based on the coop_prob parameter. A value from 0 to 1 indicating a probability of any node of being a cooperators. Assumes that it is called from the constructor. So it assumes a new SocialNetwork. """ self.count = -1 g = self.g # add nodes from the seed to the network for node in seed.nodes_iter(data = True): # define the attributes of the node id = node[0] if coop_prob == 1 or random.uniform(0,1) < coop_prob: st = COOP self.cooperators += 1 else: st = DEFE r_index = self.free_indexes.pop() # add the node g.add_node(id, st=st, nst=st, r_index=r_index) self.node_set.add(id) self.fitness_of[r_index] = id self.fitness[r_index] = 0 # update parameters of the graph if id > self.count: self.count = id self.size += 1 self.count += 1 # add edges from the seed to the network for e0, e1 in seed.edges_iter(): g.add_edge(e0, e1) self.__remove_isolated_nodes() def __remove_isolated_nodes(self): g = self.g to_remove = [] for n, adj in g.adj.items(): if (len(adj) == 0): to_remove.append(n) for n in to_remove: r_index = g.node[n]['r_index'] self.fitness_of[r_index] = -1 self.free_indexes.append(r_index) self.node_set.discard(n) g.remove_node(n) self.size -= 1 def add_node(self, st): """ Add a node to the network """ # calculate rest of the node attributes id = self.count r_index = self.free_indexes.pop() # add node self.g.add_node(id, st=st, nst=st, r_index=r_index, gen=self.gen) # update network structures self.node_set.add(id) self.fitness_of[r_index] = id self.fitness[r_index] = 0 self.degrees[r_index] = 0 # update network parameters if st == COOP: self.cooperators += 1 self.size += 1 self.count += 1 return id def play_games_and_remove_isolated_nodes(self): g = self.g node = g.node node_set = self.node_set adjacency = self.g.adj f = self.fitness ef = self.eps_fitness eps = self.epsilon degrees = self.degrees f.fill(0) total_fit = 0 total_efit = 0 total_degrees = 0 to_remove=[] for n1 in node_set: adj = adjacency[n1] len_adj = len(adj) # make sure to remove the nodes that has no more edges if (len_adj == 0): to_remove.append(n1) self.removed_nodes += 1 else: att1 = node[n1] r_index1 = att1['r_index'] #update the strategy n1_e = att1['st'] = att1['nst'] # play against all the neighbors for n2 in adj.keys(): # make sure to play just once, nodes should be in order # make sure all the adjacent nodes are in order if (n2 > n1): att2 = node[n2] if n1_e == att2['nst']: if n1_e == COOP: f[r_index1] += self.R f[att2['r_index']] += self.R total_fit += self.R + self.R else: f[r_index1] += self.P f[att2['r_index']] += self.P total_fit += self.P + self.P else: if n1_e == COOP: f[r_index1] += self.S f[att2['r_index']] += self.T total_fit += self.S + self.T else: f[r_index1] += self.T f[att2['r_index']] += self.S total_fit += self.T + self.S # this epsilon is important to give some of the nodes # some chance to cooperate ef[r_index1] = 1 - eps + eps * f[r_index1] total_efit += ef[r_index1] # keep the degrees updates for PA degrees[r_index1] = len_adj total_degrees += degrees[r_index1] # set the class attribute self.total_fit = total_fit self.total_efit = total_efit self.total_degrees = total_degrees # population will collapse if self.size - len(to_remove) < self.e_per_gen: print ("population collapsed with", count_coop(sn), "cooperators and", self.size - count_coop(sn), "defectors" ) # remove nodes that didn't have any edges for n in to_remove: r_index = g.node[n]['r_index'] self.fitness_of[r_index] = -1 self.free_indexes.append(r_index) self.node_set.discard(n) g.remove_node(n) self.size -= 1 def update_strategies(self): g = self.g self.gen += 1 cooperators = 0 degrees = self.degrees for n1 in g.nodes_iter(data = True): neighbors_n1 = g.neighbors(n1[0]) r_index1 = n1[1]['r_index'] n2_index = random.choice(neighbors_n1) n2 = g.node[n2_index] # check that the strategies are actually different if n1[1]['st'] != n2['st']: r_n1 = self.fitness[r_index1] r_n2 = self.fitness[n2['r_index']] # Look to see if difference is less than a millionth of # largest value and then assume equivalence epsilon_fitness = max(r_n2,r_n1) / 1000000 # if the neighbor has a bigger accumulated fitness if r_n2 > r_n1 + epsilon_fitness: # probP = (neighbour_fitness - focal_node_fitness) # ---------------------------------------- # b * max[k_focal_node, k_neighbour] if random.random() < \ (1.0 * (r_n2 - r_n1)) / \ (self.b * max(len(neighbors_n1), \ len(g.neighbors(n2_index)))): # update the strategy to a temporary vector n1[1]['nst'] = n2['st'] """ Poncela´s Formula gives to much weight to the number of nodes, this is an alternate version that would be worth to test: probability P = neighbour_fitness focal_node_fitness ------------------ - ----------------- b * k_neighbour b * k_focal_node if random.random() < (1.0 * r_n2) / \ (self.b*len(g.neighbors(n2_index)))-\ (1.0 * r_n1) / \ (self.b*len(neighbors_n1)): n1[1]['nst'] = n2['st'] """ # update cooperators counter if n1[1]['nst'] == COOP: cooperators += 1 self.cooperators = cooperators def growth_initial(self, growth): """ This method make sure that the first growth completes the nodes necessary to get to a consistent increment of 10 per generation. It just applies for starting networks that are smaller than self.n_per_gen """ if self.size < self.n_per_gen: temp = self.n_per_gen self.n_per_gen = self.n_per_gen - self.count growth(self) self.n_per_gen = temp def attrition(self, selection_method): g = self.g # it should be call losers winners = selection_method(self) # remove the winning nodes for winner in winners: # remove the node from the graph and update fitness arrays r_index = g.node[winner]['r_index'] self.fitness_of[r_index] = -1 self.free_indexes.append(r_index) self.node_set.discard(winner) g.remove_node(winner) self.size -= 1 # I have moved the removal of nodes with no edges to the play_games # phase to save optimize the code. The auxiliary method remove_isolated # has been created in order to produce real results. def remove_isolated(self, select_winners): g = self.g to_remove = [] for n, adj in g.adj.items(): if (len(adj) == 0): to_remove.append(n) self.removed_nodes += 1 if self.size - len(to_remove) < self.e_per_gen: print ("population collapsed with", self.count_coop(), "cooperators and", self.size - self.count_coop(), "defectors" ) for n in to_remove: r_index = g.node[n]['r_index'] self.fitness_of[r_index] = -1 self.free_indexes.append(r_index) self.node_set.discard(n) g.remove_node(n) self.size -= 1
class TaskCache: """ Thread-safe object to provide functionality of a task queue with fast read, write and delete. Let n be the number of tasks. Space complexity: O(n) Time complexity: add_task: O(log n) get_next_task: O(log n) remove_task: O(log n) """ def __init__(self): self.__lock = RLock() self.__tasks_schedules = SortedSet() # {(expiry_dt, id)} self.__tasks_dict = dict() # {[id: (title, expiry_dt)]} def add_task(self, task: Task): """ Add Task to the task queue. If a Task with the same id already exists, an existing record will be replaced. Otherwise will add a new record with key id. :param task: Task :return: """ id, title, expiry_dt = task self.__lock.acquire() try: if id in self.__tasks_dict: self.__tasks_schedules.remove((self.__tasks_dict[id][1], id)) self.__tasks_dict[id] = (title, expiry_dt) self.__tasks_schedules.add((expiry_dt, id)) finally: self.__lock.release() def get_next_task(self) -> Optional[Task]: """ Return the most earliest Task in terms of expiry datetime. It will return a task if there is at least one task in self.__tasks_schedules :return: task: Task or None """ self.__lock.acquire() try: if self.__tasks_schedules: expiry_dt, id = self.__tasks_schedules[0] task_content = self.__tasks_dict.get(id) task = id, task_content[0], task_content[1] else: task = None finally: self.__lock.release() return task def task_done(self, task: Task): """ Remove task from self.__tasks_schedules and self.__tasks_dict. :param task: Task :return: """ id, title, expiry_dt = task self.remove_task(id) def remove_task(self, id: int): """ Remove task from both self.__tasks_schedules and self.__tasks_dict :param task: Task :return: """ self.__lock.acquire() try: title, expiry_dt = self.__tasks_dict[id] self.__tasks_schedules.discard((expiry_dt, id)) del self.__tasks_dict[id] except KeyError: pass finally: self.__lock.release() def clear_all_tasks(self): """ Clear all tasks from both self.__tasks_schedules and self.__tasks_dict :param task: Task :return: """ self.__lock.acquire() try: self.__tasks_schedules = SortedSet() self.__tasks_dict = dict() except KeyError: pass finally: self.__lock.release()
def computePairs(self): # extract pairs of set , each set contain activities #that doesnt have any relation between them and the activities in the two set have to be direcly successed by each other #Lemme 4 pairs_causality = [] pairs_choices = [] pairs = [] #Extract all possible pairs of activity with causality relation for activity1, relations1 in self.relations.items(): for activity2, relation in relations1.items(): if relation == Relations.RIGHT_CAUSALITY: pairs_causality.append((activity1, activity2)) if relation == Relations.CHOICES: if activity1 == activity2: pairs_choices.append((activity1, )) else: pairs_choices.append((activity1, activity2)) print(pairs_causality) pairs = pairs_causality print(pairs_choices) # find all possible sets of activity with causality relation # i = 0 j = len(pairs_choices) while i < j: seti = pairs_choices[i] for pair in pairs_choices: union = True if len(SortedSet(seti).intersection(SortedSet(pair))) != 0: for e1 in pair: if union == False: break for e2 in seti: if self.relations[e1][e2] != Relations.CHOICES: union = False break if union: new_pair = SortedSet(seti) | SortedSet(pair) if tuple(new_pair) not in pairs_choices: pairs_choices.append(tuple(new_pair)) j = j + 1 #Reevaluate the length i = i + 1 print(pairs_choices) # Union for pair_choices1 in pairs_choices: for pair_choices2 in pairs_choices: relation_between_pair = None makePair = True print("pair 1", pair_choices1) print("pair 2", pair_choices2) intersection = SortedSet(pair_choices1).intersection( pair_choices2) pair_choices2 = SortedSet(pair_choices2) if len(intersection) != 0: # remove intersection terms in the second pair for term in intersection: pair_choices2.discard(term) if (len(pair_choices2) == 0): continue pair_choices2 = tuple(pair_choices2) print("pair_choices2 with discarded term :", pair_choices2) for activity1 in pair_choices1: print(activity1) if makePair == False: break for activity2 in pair_choices2: print(activity2) relation = self.relations[activity1][activity2] if relation_between_pair != None and relation_between_pair != relation: makePair = False break else: relation_between_pair = relation if relation != Relations.RIGHT_CAUSALITY: makePair = False break if makePair == True: print("makepair true") print(pair_choices1) print(pair_choices2) if relation_between_pair == Relations.RIGHT_CAUSALITY: new_pair = (pair_choices1, pair_choices2) else: new_pair = (pair_choices2, pair_choices1) pairs.append(new_pair) print("\n") print("\n") print(pairs) self.pairs = pairs '''
def test5(): """ 有序的集合:SortedSet 网址:http://www.grantjenks.com/docs/sortedcontainers/sortedset.html """ from sortedcontainers import SortedSet # 创建 SortedSet ss = SortedSet([3, 1, 2, 5, 4]) print(ss) # SortedSet([1, 2, 3, 4, 5]) from operator import neg ss1 = SortedSet([3, 1, 2, 5, 4], neg) print(ss1) # SortedSet([5, 4, 3, 2, 1], key=<built-in function neg>) # SortedSet 转为 list/tuple/set print(list(ss)) # SortedSet转为list [1, 2, 3, 4, 5] print(tuple(ss)) # SortedSet转为tuple (1, 2, 3, 4, 5) print(set(ss)) # SortedSet转为set {1, 2, 3, 4, 5} # 插入、删除元素 ss.discard(-1) # 删除不存在的元素不报错 ss.remove(1) # 删除不存在的元素报错, KeyError ss.discard(3) # SortedSet([1, 2, 4, 5]) ss.add(-10) # SortedSet([-10, 1, 2, 4, 5]) # 返回第一个和最后一个元素 print(ss[0]) # -10 print(ss[-1]) # 5 # 遍历 set for e in ss: print(e, end=", ") # -10, 2, 4, 5, print() # set 中判断某元素是否存在 print(2 in ss) # True # bisect_left() / bisect_right() print(ss.bisect_left(4)) # 返回大于等于4的最小元素对应的下标 2 print(ss.bisect_right(4)) # 返回大于4的最小元素对应的下标 3 # 清空 set ss.clear() print(len(ss)) # 0 print(len(ss) == 0) # True """ 无序的集合: set """ # 集合的定义:集合是不可变的,因此集合中元素不能是list A = {"hi", 2, ("we", 24)} B = set() # 空集合的定义,不能使用B = {}定义集合,这样是字典的定义 # 集合间的操作, 下面的运算法符都可以写成 op= 的形式 print("---------------------------------------") S = {1, 2, 3} T = {3, 4, 5} print(S & T) # 交集,返回一个新集合,包括同时在集合S和T中的元素 print(S | T) # 并集,返回一个新集合,包括在集合S和T中的所有元素 print(S - T) # 差集,返回一个新集合,包括在集合S但不在T中的元素 print(S ^ T) # 补集,返回一个新集合,包括集合S和T中的非相同元素 # 集合的包含关系 print("---------------------------------------") C = {1, 2} D = {1, 2} print(C <= D) # C是否是D的子集 True print(C < D) # C是否是D的真子集 False print(C >= D) # D是否是C的子集 True print(C > D) # D是否是C的真子集 False # 集合的处理方法 print("---------------------------------------") S = {1, 2, 3, 5, 6} S.add(4) # 如果x不在集合S中,将x增加到S S.discard(1) # 移除S中元素x,如果x不在集合S中,不报错 S.remove(2) # 移除S中元素x,如果x不在集合S中,产生KeyError异常 for e in S: # 遍历 print(e, end=",") print() print(S.pop()) # 从S中随机弹出一个元素,S长度减1,若S为空产生KeyError异常 print(S.copy()) # 返回集合S的一个副本, 对该副本的操作不会影响S print(len(S)) # 返回集合S的元素个数 print(5 in S) # 判断S中元素x, x在集合S中,返回True,否则返回False print(5 not in S) # 判断S中元素x, x在集合S中,返回True,否则返回False S.clear() # 移除S中所有元素
def py_star(width, height, costs, startIndex, endIndex, diagonalOk): if (width < 0 or height < 0): raise ValueError("Width and height have to be positive!") if (width * height != len(costs)): raise ValueError("Width * height != len(costs)!") if (startIndex < 0) or (startIndex > (len(costs) - 1)) or (endIndex < 0) or (endIndex > (len(costs) - 1)): raise ValueError( f"Start and end indices have to be in the range [0, {len(costs)})!" ) # find path from exit to start, this way when traversing the nodes from the start # every node points to the next one in the path startIndex, endIndex = endIndex, startIndex startPos = (startIndex % width, startIndex // width) endPos = endIndex % width, endIndex / width nodeMap = [ Node(idx, math.inf, 0.0, math.inf, None) for idx in range(0, len(costs)) ] endNode = nodeMap[endIndex] startNode = nodeMap[startIndex] startNode.sureCost = 0 startNode.heuristicCost = heuristicCost(startPos, endPos, diagonalOk) startNode.combinedCost = startNode.sureCost + startNode.heuristicCost DIAG_COST = math.sqrt(2) openlist = SortedSet([startNode], key=lambda node: node.combinedCost) closedlist = set() while len(openlist) > 0: current = openlist.pop(0) if current == endNode: # call with end and start switched to get correct direction back return (constructPath(endNode, startNode), closedlist) closedlist.add(current.idx) curX, curY = posFromIndex(current.idx, width) for dx in range(-1, 2): for dy in range(-1, 2): # skip diagonal entrys if diagonals are not viable if not diagonalOk and (abs(dx) == abs(dy)): continue x, y = curX + dx, curY + dy # skip if node would go outside rectangle # cannot wrap with unsigned cast like in cpp if (x - width + 1) * x > 0 or (y - height + 1) * y > 0: continue neighbor = nodeMap[current.idx + dx + dy * width] # skip previously visited nodes, including the current node if neighbor.idx in closedlist: continue # skip if node is not passable if costs[neighbor.idx] < 0: continue diagonalMove = (dx * dy) != 0 newSureCost = current.sureCost + (DIAG_COST if diagonalMove else 1) * costs[neighbor.idx] if newSureCost < neighbor.sureCost: # Make sure to not invalidate the ordered set openlist.discard(neighbor) neighbor.sureCost = newSureCost neighbor.heuristicCost = heuristicCost((x, y), endPos, diagonalOk) # combined cost for ordering of the open set neighbor.combinedCost = neighbor.sureCost + neighbor.heuristicCost neighbor.parent = current openlist.add(neighbor) return ([-1], closedlist)
class ExpandUserIdentity: def __init__(self, lg, rg, seeds_0, name_sim_threshold=61, is_repeat=False, model=None, cache_files=None): if is_repeat: print("With repeated seeds algorithm is selected") else: print('NO seeds repeat algorithm is selected ') self.lg = lg self.rg = rg self.seed_0_count = len(seeds_0) if is_repeat: print('WITH REPEAT!!!') self.with_repeat = is_repeat # M < - A_0 ps: A = A_0 self.lNodeM = set() self.rNodeM = set() self.matches = set() for s in seeds_0: lnode, rnode = s self.matches.add((lnode, rnode)) self.lNodeM.add(lnode) self.rNodeM.add(rnode) self.seeds = list(seeds_0) self.used = set() # marks for every pair mark count > r self.score_map = dict() self.bad_name = set() if model: print('WITH MODEL!!!') self.model = model self.has_model = True self.load_cache(cache_files) self.__get_top = self.__get_top_with_model self.inactive_pairs = SortedSet( key=lambda x: (x[2], self.__name_similar(x[0], x[1]) / 100 + self.__top_with_model(x))) self.__decide_seed = self.__decide_seed_with_model else: self.__get_top = self.__get_top_no_model self.inactive_pairs = SortedSet( key=lambda x: (x[2], -1 * self.f_deg_diff(x))) self.__decide_seed = self.__decide_seed_no_model self.has_model = False self.name_sim_threshold = name_sim_threshold print('name_sim_threshold', name_sim_threshold) n_common = 0 s2 = set([v['uid'] for v in lg.vs]) for v in rg.vs: n_common += int(v['uid'] in s2) self.n_common = n_common def load_cache(self, cache_files): base_folder = '/home/ildar/projects/pycharm/social_network_revealing/graphmatching/' folder_data = os.path.join(base_folder, 'data') folder_gen = os.path.join(folder_data, 'generated') self.f_set1s = dict( pickle.load(open(os.path.join(folder_gen, cache_files[0]), "rb"))) self.f_set2s = dict( pickle.load(open(os.path.join(folder_gen, cache_files[1]), "rb"))) print('Cache loaded', len(self.f_set1s), len(self.f_set2s)) def to_str(self, ln, rn): return '%d|%d' % (ln, rn) def untokenize(self, s): i = s.index('|') return int(s[:i]), int(s[i + 1:]) def f_deg_diff(self, s): return abs(self.lg.degree(s[0]) - self.rg.degree(s[1])) def __in_matched(self, lnode, rnode): return lnode in self.lNodeM or rnode in self.rNodeM def __add_match(self, lnode, rnode, seed_count): self.matches.add((lnode, rnode)) self.lNodeM.add(lnode) self.rNodeM.add(rnode) def __name_similar(self, li, ri): return fuzz.token_set_ratio(self.lg.vs[li]['fname'], self.rg.vs[ri]['fname']) # def __part_spread_marks(self, data): # seeds = data['seeds'] # seeds_collect = {} # old_marks = {} # for seed in tqdm(seeds): # self.used.add(self.to_str(*seed)) # self.__spread_mark(*seed, seeds_collect=seeds_collect, old_marks=old_marks) # def __spread_marks_parallel(self): # # for all pairs[i, j] of A do # threads = 4 # print('start __spread_marks') # self.seeds_collect = {} # self.old_marks = {} # data_list = [] # thr_size = len(self.seeds) // threads # for i in range(threads): # s = i * thr_size # e = i * thr_size + thr_size # data = { # 'seeds' : self.seeds[s:e] if (i + 1) < threads else self.seeds[s:] # } # data_list.append(data) # # pool = ThreadPool(threads) # pool.map(self.__part_spread_marks, data_list) # pool.close() # pool.join() # # for seed, marks_count in tqdm(self.seeds_collect.items()): # ID_str = self.to_str(*seed) # m = (seed[0], seed[1], self.old_marks[ID_str]) # self.inactive_pairs.discard(m) # self.inactive_pairs.add((seed[0], seed[1], marks_count)) # # # A <- None # self.seeds.clear() # print("Seed are expanded") def __spread_mark(self, lnode, rnode, seeds_collect=None, old_marks=None): # add one mark to all neighboring pairs of [i,j] if seeds_collect == None: seeds_collect = {} old_marks = {} is_from_spread_marks = False else: is_from_spread_marks = True for l_neighbor in self.lg.neighbors(lnode): for r_neighbor in self.rg.neighbors(rnode): ID_str = self.__decide_seed(l_neighbor, r_neighbor) if not ID_str: continue val = self.score_map.get(ID_str) if not val: self.score_map[ID_str] = 1 continue if ID_str not in old_marks: old_marks[ID_str] = val self.score_map[ID_str] += 1 seeds_collect[(l_neighbor, r_neighbor)] = val + 1 if is_from_spread_marks: return for seed, marks_count in seeds_collect.items(): ID_str = self.to_str(*seed) m = (seed[0], seed[1], old_marks[ID_str]) self.inactive_pairs.discard(m) self.inactive_pairs.add((seed[0], seed[1], marks_count)) def __spread_marks(self): # for all pairs[i, j] of A do print('start __spread_marks') seeds_collect = {} old_marks = {} for seed in tqdm(self.seeds): self.used.add(self.to_str(*seed)) self.__spread_mark(*seed, seeds_collect=seeds_collect, old_marks=old_marks) for seed, marks_count in tqdm(seeds_collect.items()): ID_str = self.to_str(*seed) m = (seed[0], seed[1], old_marks[ID_str]) self.inactive_pairs.discard(m) self.inactive_pairs.add((seed[0], seed[1], marks_count)) # A <- None self.seeds.clear() print("Seed are expanded") def __get_top_no_model(self): # remove from start matched pairs while self.inactive_pairs: s = self.inactive_pairs.pop() if not self.__in_matched(s[0], s[1]): return s return None def __get_top_with_model(self): # remove from start matched pairs while self.inactive_pairs: s = self.inactive_pairs.pop() if not self.__in_matched(s[0], s[1]): return s return None def __decide_seed_no_model(self, lnode, rnode): # i,j not in V_1,V_2(M) and [i,j] not in Z if self.__in_matched(lnode, rnode): return False ID_str = self.to_str(lnode, rnode) if ID_str in self.used or ID_str in self.bad_name: return False if self.__name_similar(lnode, rnode) < self.name_sim_threshold: self.bad_name.add(self.to_str(lnode, rnode)) return False return ID_str def __decide_seed_with_model(self, lnode, rnode): # i,j not in V_1,V_2(M) and [i,j] not in Z if self.__in_matched(lnode, rnode): return False ID_str = self.to_str(lnode, rnode) if ID_str in self.used: return False return ID_str def degs(self, node): s = [] for v in node.neighbors(): s.append(v.degree()) return s def __top_with_model(self, s): lv = self.lg.vs[s[0]] rv = self.rg.vs[s[1]] feature_l = self.f_set1s[lv['uid']] feature_r = self.f_set2s[rv['uid']] feature_set = feature_l + feature_r n_deg = lv.degree() m_deg = rv.degree() feature_set.append(abs(n_deg - m_deg) / max(n_deg, m_deg, 1)) # ratio = self.__name_similar(s[0], s[1]) # feature_set.append(ratio) x = np.array(feature_set).reshape((1, -1)) return self.model.predict( x) == 1 # ratio >= self.name_sim_threshold and def __extend_seeds_by_matches(self): # A <- all neighbors of M [i,j] not in Z, i,j not in V_1,V_2(M) print('__extend_seeds_by_matches') for m in tqdm(self.matches): lnode, rnode = m # all neighbors of M for l_neighbor in self.lg.neighbors(lnode): for r_neighbor in self.rg.neighbors(rnode): if not self.__decide_seed(l_neighbor, r_neighbor): continue self.seeds.append((l_neighbor, r_neighbor)) print("Extended seed size: ", len(self.seeds)) def __garbage_collect(self): ##################### # Garbage collector # ##################### if len(self.score_map) < 20000000 or len( self.inactive_pairs) < 10000000: return print("Garbage collector:") print("\talgorithm: time elapsed: %s" % (time.time() - self.s_time)) print("\tSize score map: %d" % len(self.score_map)) for s in tqdm(self.score_map): ln, rn = self.untokenize(s) if self.__in_matched(ln, rn): del self.score_map[s] print("\tSize score map (cleared): %d" % len(self.score_map)) print("\tSize inactive pairs: %d" % len(self.inactive_pairs)) for p in self.inactive_pairs: if self.__in_matched(p[0], p[1]): self.inactive_pairs.remove(p) print("\tSize inactive pairs (cleared): %d" % len(self.inactive_pairs)) def __inter_result(self): correct, wrong = 0, 0 for ln, rn in self.matches: ln = self.lg.vs[ln]['name'] rn = self.rg.vs[rn]['name'] if ln == rn: correct += 1 else: wrong += 1 return correct, wrong def __dist_sim(self, vl, vr): sl = [v.degree() for v in self.lg.vs[vl].neighbors()] sr = [v.degree() for v in self.rg.vs[vr].neighbors()] return ks_2samp(sl, sr).pvalue # statistic def execute(self): self.s_time = time.time() iter_num = 0 show_counter = 0 show_bound = 50 show_bound_match = 50 used_used = set() round = 1 # while |A| > 0 do while (len(self.seeds) > 0): # while |A| > 0 do while (len(self.seeds) > 0): iter_num += 1 print("Iter num: %d\tseed size = %d" % (iter_num, len(self.seeds))) # for all pairs[i, j] of A do self.__spread_marks() print('Done') # while there exists an unmatched pair with score at least r+1 while self.inactive_pairs: show_counter += 1 if show_counter % show_bound == 0: print("In progress... (%d)" % len(self.inactive_pairs)) # remove from start matched pairs s = self.__get_top() if not s: break elif (show_counter % show_bound == 0): print("[%d] select the unmatched pair [%d,%d]" % (show_counter, s[0], s[1])) print("score map size = %d" % len(self.score_map)) lnode, rnode, seed_count = s # add [i,j] to M self.__add_match(lnode, rnode, seed_count) ID_not_active = self.to_str(lnode, rnode) # if [i,j] not in Z if not ID_not_active in self.used: # add [i,j] to Z self.used.add(ID_not_active) # add one marks to all of its neighbouring pairs self.__spread_mark(lnode, rnode) # self.__garbage_collect() if len(self.matches) % show_bound_match == 0: print("Correct = %d, Wrong = %d" % self.__inter_result()) print("Finish with inactive_pairs") # if len(self.bad_name) > 40000000: # self.bad_name.clear() # print("Cleared bad names storage") # A <- all neighbors of M [i,j] not in Z, i,j not in V_1,V_2(M) self.__extend_seeds_by_matches() if self.with_repeat: for s in self.used: l_neighbor, r_neighbor = self.untokenize(s) if not self.__in_matched(l_neighbor, r_neighbor) and s not in used_used and \ self.__name_similar(l_neighbor, r_neighbor) >= 99: self.seeds.append((l_neighbor, r_neighbor)) used_used.add(s) print( 'Updated round %d, seed count = %d. used_used = %d' % (round, len(self.seeds), len(used_used))) round += 1 self.time_elapsed = time.time() - self.s_time def assure_folder_exists(self, path): folder = os.path.dirname(path) print(os.path.abspath(folder)) if not os.path.exists(folder): os.makedirs(folder) def save_result(self): if self.seed_0_count > 100: repeat_name = 'seed_matches' else: repeat_name = 'repeat' if self.with_repeat else 'no_repeat' fname = '%.3d/matches_s_%.2d_th_%.3d_t_%s.pickle' % ( self.name_sim_threshold, self.seed_0_count, self.name_sim_threshold, time.strftime("%m-%d_%H:%M:%S")) fname = os.path.join('matches', repeat_name, fname) self.assure_folder_exists(fname) lid_rid = [] for lnode, rnode in self.matches: lid = self.lg.vs[lnode]['uid'] rid = self.rg.vs[rnode]['uid'] lid_rid.append((lid, rid)) assert len(lid_rid) == len(self.matches) pickle.dump(lid_rid, open(fname, 'wb')) return fname def check_result(self): correct, wrong = self.__inter_result() msize = len(self.matches) recall = float(correct) / self.n_common precision = float(correct) / msize f1_score = 2 * (precision * recall / (precision + recall)) print("------RESULT-------") print("\tfor lN = %d, rN = %d, |seed_0| = %d" % (self.lg.vcount(), self.rg.vcount(), self.seed_0_count)) print("\tmatched =", msize) print("\t\tcorrect = %d; wrong = %d" % (correct, wrong)) print("\tRecall = %f" % recall) print("\tPrecision = %f" % precision) print("\tF1-score = %f" % f1_score)