def initScore_unbiased(sampleindex, D1index, k, sr, queries): """ Unbiased benefit estimation. :param sampleindex: inverted index of sample :param k: top-k restriction :param sr: sample rate :param Dratio: local database rate :param queries: query pool :return: query pool with biased benefit """ query_pool = maxpq() for q, l1 in queries.iteritems(): if len(sampleindex[q]) != 0: ls = len(sampleindex[q]) lcap = len(sampleindex[q].intersection(D1index[q])) est_score = lcap / sr if est_score > k: score = lcap * k / (ls * 1.0) else: score = est_score else: score = 0 query_pool[q] = score return query_pool
def initScore_biased(sampleindex, k, sr, Dratio, queries): """ Biased benefit estimation. :param sampleindex: inverted index of sample :param k: top-k restriction :param sr: sample rate :param Dratio: local database rate :param queries: query pool :return: query pool with biased benefit """ query_pool = maxpq() for q, l1 in queries.iteritems(): if len(sampleindex[q]) != 0: ls = len(sampleindex[q]) est_score = ls / sr if est_score > k: score = k * l1 / (est_score * 1.0) else: score = l1 else: if l1 > k * Dratio: score = 1.0 * k * Dratio else: score = l1 query_pool[q] = score return query_pool
def _immunize_deg(adj, num_nodes, queue=True): """Internal function. To immunize a graph, use `immunize`.""" # Takes O(n) time and O(n) space deg = {n: len(adj[n]) for n in adj} # Make sure we don't remove more nodes than there are available num_nodes = min(num_nodes, len(deg)) # If using a queue, we need to heapify it (which takes O(n)) if queue: deg = maxpq(deg) # Main loop removed = [] for _ in range(num_nodes): if queue: node = deg.pop() # Takes O(log n) else: node = max(deg, key=deg.get) # Takes O(n) del deg[node] # Takes O(degree[node]) for neigh in adj[node]: adj[neigh].remove(node) deg[neigh] -= 1 # Finally, finish udpating the graph, and store the node. del adj[node] removed.append(node) return removed
def __init__(self, protocol, fact, parents=None, barzer_svc=None): super(ConvoCompositeFact, self).__init__(protocol, fact, parents, barzer_svc=barzer_svc) self.text = fact.text op = self.OPERATOR_MAP.get(fact.operator) if op: self.op = op() self.pq = pqdict.maxpq() self.set_children([ protocol.create_or_update_fact(f, [self]) for f in fact.facts]) self.value = self.op.calc(children=self.get_children()) self.confidence = self.op.confidence(children=self.get_children())
def __init__(self): self.query = input("Enter search query: ") self.webpages_limit = input( "Set total number of webpages to be crawled: ") self.limit = input( "Set limits on how many webpages be crawled from single site: ") self.priority_queue = maxpq() self.queue = queue.Queue() self.downloader = Downloader() self.parser = Parser(self.query) self.calculator = Calculator(self.query) self.relevance = Relevance() self.webpages_crawled = 0 self.logger = logging.getLogger(__name__) self.visited_urls = set() self.sites_times = {}
def test_equality(): # eq pq1 = pqdict(sample_items) pq2 = pqdict(sample_items) assert pq1 == pq2 assert not pq1 != pq2 # ne pq2[random.choice(sample_keys)] += 1 assert not pq1 == pq2 assert pq1 != pq2 # pqdict == regular dict if they have same key/value pairs adict = dict(sample_items) assert pq1 == adict # TODO: FIX? # pqdicts evaluate as equal even if they have different # key functions and/or precedence functions pq3 = maxpq(sample_items) assert pq1 == pq3
def test_equality(self): # eq pq1 = pqdict(sample_items) pq2 = pqdict(sample_items) self.assertTrue(pq1 == pq2) self.assertFalse(pq1 != pq2) # ne pq2[random.choice(sample_keys)] += 1 self.assertFalse(pq1 == pq2) self.assertTrue(pq1 != pq2) # pqdict == regular dict if they have same key/value pairs adict = dict(sample_items) self.assertEqual(pq1, adict) # TODO: FIX? # pqdicts evaluate as equal even if they have different # key functions and/or precedence functions pq3 = maxpq(sample_items) self.assertEqual(pq1, pq3)
def __init__(self, data, barzer_svc=None): protocol = Protocol(data) self.id = 'protocol' self.index = cg_index.Index() super(ConvoProtocol, self).__init__() self.terminals = {} self.facts = defaultdict(set) self.barzer_svc = barzer_svc or default_barzer_instance self.visited_facts = set() self.facts_to_update = deque() for t in protocol.terminals: self.facts[t.id] = self.terminals[t.id] = ConvoCompositeFact(protocol=self, fact=t, parents=[self]) self.set_children(self.terminals.values()) self.pq = pqdict.maxpq() for t in self.terminals.values(): self.pq[t] = t.score()
def test_maxpq(): pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1) assert list(pq.popvalues()) == [12, 9, 8, 7, 5, 3, 1] assert pq.precedes == operator.gt
def test_maxpq(self): pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1) self.assertEqual(list(pq.popvalues()), [12, 9, 8, 7, 5, 3, 1]) self.assertEqual(pq.precedes, operator.gt)
def _immunize_ci(adj, num_nodes, queue=True): """Internal function. To immunize a graph, use `immunize`.""" # Once populated, will take O(n) space ci = defaultdict(int) # Takes O(m) time for node in adj: excess_deg = len(adj[node]) - 1 for neigh in adj[node]: ci[node] += excess_deg * (len(adj[neigh]) - 1) # Make sure we don't remove more nodes than there are available num_nodes = min(num_nodes, len(ci)) # If using a queue, we need to heapify it (which takes O(n)) if queue: ci = maxpq(ci) # Main loop removed = [] for _ in range(num_nodes): if queue: node = ci.pop() # Takes O(log n) else: node = max(ci, key=ci.get) # Takes O(n) del ci[node] # Takes O(degree[node]) for neigh in adj[node]: adj[neigh].remove(node) # Compute the deltas. Takes O(degree^2[node]) deg = len(adj[node]) delta = defaultdict(int) count = defaultdict(int) for neigh in adj[node]: if len(adj[neigh]) > 0: delta[neigh] += \ ((deg - 1) * (len(adj[neigh]) - 1) + ci[neigh] // (len(adj[neigh]))) else: # Nodes of degree 1 decrease all the way to zero delta[neigh] += ci[neigh] for neigh2 in adj[neigh]: if neigh2 in adj[node]: delta[neigh2] += len(adj[neigh2]) else: delta[neigh2] += len(adj[neigh2]) - 1 count[neigh2] += 1 # Apply the changes at the same time. Takes O(degree^2[node]). for neigh2 in delta: # At the end of the previous loop, nodes of type 1 and 2 # already have the correct deltas, while nodes of type 3 are # missing a term. if neigh2 in adj[node] and count[neigh2] > 0: delta[neigh2] -= count[neigh2] # If dict, takes O(1). If heap, takes O(log n) ci[neigh2] = ci[neigh2] - delta[neigh2] # Finally, finish udpating the graph, and store the node. del adj[node] removed.append(node) return removed
def _immunize_xdeg(adj, num_nodes, queue=True): """Internal function. To immunize a graph, use `immunize`.""" # Takes O(m) time and O(2n) space to_be_squared, sum_squares = defaultdict(int), defaultdict(int) for node in adj: for neigh in adj[node]: # Note we keep the sum of squares, not its square, and square # it only when needed. to_be_squared[node] += len(adj[neigh]) - 1 sum_squares[node] += (len(adj[neigh]) - 1)**2 # Takes O(n) time and O(n) space # Remember to square the first term xdeg = {n: to_be_squared[n]**2 - sum_squares[n] for n in adj} # We actually don't need this again del sum_squares # Make sure we don't remove more nodes than there are available num_nodes = min(num_nodes, len(xdeg)) # If using a queue, we need to heapify it (which takes O(n)) if queue: xdeg = maxpq(xdeg) # Main loop # Takes O(m) time and O(m) space removed = [] for _ in range(num_nodes): if queue: node = xdeg.pop() # Takes O(log n) else: node = max(xdeg, key=xdeg.get) # Takes O(n) del xdeg[node] # Takes O(degree[node]) for neigh in adj[node]: adj[neigh].remove(node) # The following loop will compute the difference in xdeg of each # node, without changing any of the variables. The loop after that # will actually apply the changes. # # We do this as follows. For a node i, define s(i) to be the sum of # the excess degrees of its neighbors, i.e. s(i) == # to_be_squared[i]. Let deg be the degree of the target node. There # are four types of nodes that will be affected by the removal: # # 1. The nodes that are 1hop neighbors but not 2hop heighbors of # the target node will have their degree decrase by 1, and their # xdeg decreased by 2(s(i) - deg + 1)(deg - 1). # # 2. The nodes that are 2hop neighbors but not 1hop neighbors of # the target node will have their degree decreased by t(i), where # t(i) is the number of common neighbors they share with the target # node (i.e. the number of paths of length 2 between the target # node and i). Their xdeg will decrease by 2t(i)s(i) + t(i) - # t(i)**2 - 2p(i). Here, p(i) is the sum of the excess degrees of # the neighbors of i who are also neighbors of the target node. # # 3. The nodes that are both 1hop and 2hop neighbors will have # their degree decrease by t(i) + deg - 1. Their xdeg will decrease # by 2t(i)s(i) + t(i) - t(i)**2 - 2p(i) + 2(s(i) - deg + 1)(deg - # 1) - 2 t(i)(deg - 1). Note this is the sum of the changes for # 1hop and 2hop neighbors, plus the additional term 2 t(i)(deg - 1). # # 4. The target node itself will be removed: its degree and xdeg # will decrase to zero. deg = len(adj[node]) delta = defaultdict(int) count = defaultdict(int) # Compute the deltas. Takes O(degree^2[node]) for neigh in adj[node]: delta[neigh] += \ (2 # 2 * (to_be_squared[neigh] - deg + 1) # (s - deg + 1) * (deg - 1)) # (deg - 1) for neigh2 in adj[neigh]: # Each time r we visit a node i through a 2hop path, we are # adding 2s(i) + 1 - (2r + 1) - 2(p_r - 1), where p_r is # the degree of the node that led us to i. After visiting # t(i) times, this adds up to 2t(i)s(i) + t(i) - t(i)**2 - # 2p(i), as desired. Note that p_r is the degree BEFORE any # changes have been made to the network, but the degrees of # the neighbors of the target node already changed in the # previous loop, therefore p_r - 1 = len(adj[neigh]). delta[neigh2] += \ (2 * to_be_squared[neigh2] # 2s + 1 # + 1 - (2 * count[neigh2] + 1) # - (2r + 1) - 2 * len(adj[neigh])) # - 2(p_r - 1) # Increment the count r(i). At the end of this double loop, # we will have count[i] == t(i). count[neigh2] += 1 # Apply the changes at the same time. Takes O(degree^2[node]). for neigh2 in delta: # At the end of the previous loop, nodes of type 1 and 2 # already have the correct deltas, while nodes of type 3 are # missing a term. We can finally update xdeg. if neigh2 in adj[node] and count[neigh2] > 0: delta[neigh2] -= 2 * count[neigh2] * (deg - 1) # If dict, takes O(1). If heap, takes O(log n) xdeg[neigh2] = xdeg[neigh2] - delta[neigh2] # Update the s(i) of each node. Note this is simply computing # the changes in i's neighbors' degrees. Note nodes of type 3 # receive both updates. if neigh2 in adj[node]: to_be_squared[neigh2] -= deg - 1 if count[neigh2] > 0: to_be_squared[neigh2] -= count[neigh2] # Finally, finish udpating the graph, and store the node. del adj[node] del to_be_squared[node] removed.append(node) return removed
def test_maxpq(self): pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1) self.assertEqual( list(pq.popvalues()), [12, 9, 8 ,7, 5, 3, 1]) self.assertEqual(pq.precedes, operator.gt)
def choose_edges_for_target_coord_num(network, n_coord): """ Given a network and an array of required coordination number for each vertex, returns a list of edges that would (approximately) satisfy the given coordination number. Parameters ---------- network: PoreNetwork n_coord: ndarray Returns ------- out: ndarray Index array of the chosen throats Notes _____ Implementation of algorithm as described in ROBERT M. SOK ET AL. 2002 Section 4.1.2 """ WHITE = 0 GRAY = 1 BLACK = 2 tube_marker = np.ones(network.nr_t, dtype=np.int)*GRAY n_avail = network.nr_nghs n_white = np.zeros(network.nr_p, dtype=np.int) assert np.all(n_coord < network.nr_nghs) def priority_tube(ti): pi_1, pi_2 = network.edgelist[ti, :] ns_1 = n_coord[pi_1] - n_white[pi_1] ns_2 = n_coord[pi_2] - n_white[pi_2] fs_1 = n_avail[pi_1] - n_white[pi_1] fs_2 = n_avail[pi_2] - n_white[pi_2] Fs_1 = 1. - float(ns_1) / float(fs_1) Fs_2 = 1. - float(ns_2) / float(fs_2) return 1. / (1. + Fs_1 * Fs_2) # Initialize priority queue pq = maxpq() for ti in xrange(network.nr_t): pq[ti] = priority_tube(ti) while pq: # Pop tube and mark it as white ti, _ = pq.popitem() tube_marker[ti] = WHITE # update n_white pi_1, pi_2 = network.edgelist[ti, :] n_white[[pi_1, pi_2]] += 1 # If the target coordination number is reached, mark other tubes as black and delete for pi in [pi_1, pi_2]: if n_white[pi] == n_coord[pi]: for ti in network.ngh_tubes[pi]: if tube_marker[ti] == GRAY: tube_marker[ti] = BLACK del pq[ti] # Update priorities of adj GRAY tubes for pi in network.edgelist[ti]: for ti in network.ngh_tubes[pi]: if tube_marker[ti] == GRAY: pq[ti] = priority_tube(ti) assert np.all(n_white <= n_coord) print "Unsuccessful tubes", np.sum(n_coord - n_white) print "Total number of tubes", np.sum(n_white) return (tube_marker == WHITE).nonzero()[0]