Ejemplo n.º 1
0
def initScore_unbiased(sampleindex, D1index, k, sr, queries):
    """
    Unbiased benefit estimation.

    :param sampleindex: inverted index of sample
    :param k: top-k restriction
    :param sr: sample rate
    :param Dratio: local database rate
    :param queries: query pool
    :return: query pool with biased benefit
    """
    query_pool = maxpq()
    for q, l1 in queries.iteritems():
        if len(sampleindex[q]) != 0:
            ls = len(sampleindex[q])
            lcap = len(sampleindex[q].intersection(D1index[q]))
            est_score = lcap / sr
            if est_score > k:
                score = lcap * k / (ls * 1.0)
            else:
                score = est_score
        else:
            score = 0
        query_pool[q] = score
    return query_pool
Ejemplo n.º 2
0
def initScore_biased(sampleindex, k, sr, Dratio, queries):
    """
    Biased benefit estimation.

    :param sampleindex: inverted index of sample
    :param k: top-k restriction
    :param sr: sample rate
    :param Dratio: local database rate
    :param queries: query pool
    :return: query pool with biased benefit
    """
    query_pool = maxpq()
    for q, l1 in queries.iteritems():
        if len(sampleindex[q]) != 0:
            ls = len(sampleindex[q])
            est_score = ls / sr
            if est_score > k:
                score = k * l1 / (est_score * 1.0)
            else:
                score = l1
        else:
            if l1 > k * Dratio:
                score = 1.0 * k * Dratio
            else:
                score = l1
        query_pool[q] = score
    return query_pool
Ejemplo n.º 3
0
def _immunize_deg(adj, num_nodes, queue=True):
    """Internal function. To immunize a graph, use `immunize`."""
    # Takes O(n) time and O(n) space
    deg = {n: len(adj[n]) for n in adj}

    # Make sure we don't remove more nodes than there are available
    num_nodes = min(num_nodes, len(deg))

    # If using a queue, we need to heapify it (which takes O(n))
    if queue:
        deg = maxpq(deg)

    # Main loop
    removed = []
    for _ in range(num_nodes):
        if queue:
            node = deg.pop()             # Takes O(log n)
        else:
            node = max(deg, key=deg.get) # Takes O(n)
            del deg[node]

        # Takes O(degree[node])
        for neigh in adj[node]:
            adj[neigh].remove(node)
            deg[neigh] -= 1

        # Finally, finish udpating the graph, and store the node.
        del adj[node]
        removed.append(node)

    return removed
Ejemplo n.º 4
0
    def __init__(self, protocol, fact, parents=None, barzer_svc=None):
        super(ConvoCompositeFact, self).__init__(protocol, fact, parents, barzer_svc=barzer_svc)
        self.text = fact.text

        op = self.OPERATOR_MAP.get(fact.operator)
        if op:
            self.op = op()

        self.pq = pqdict.maxpq()
        self.set_children([
            protocol.create_or_update_fact(f, [self]) for f in fact.facts])

        self.value = self.op.calc(children=self.get_children())
        self.confidence = self.op.confidence(children=self.get_children())
Ejemplo n.º 5
0
    def __init__(self):

        self.query = input("Enter search query: ")
        self.webpages_limit = input(
            "Set total number of webpages to be crawled: ")
        self.limit = input(
            "Set limits on how many webpages be crawled from single site: ")
        self.priority_queue = maxpq()
        self.queue = queue.Queue()
        self.downloader = Downloader()
        self.parser = Parser(self.query)
        self.calculator = Calculator(self.query)
        self.relevance = Relevance()
        self.webpages_crawled = 0
        self.logger = logging.getLogger(__name__)
        self.visited_urls = set()
        self.sites_times = {}
Ejemplo n.º 6
0
def test_equality():
    # eq
    pq1 = pqdict(sample_items)
    pq2 = pqdict(sample_items)
    assert pq1 == pq2
    assert not pq1 != pq2
    # ne
    pq2[random.choice(sample_keys)] += 1
    assert not pq1 == pq2
    assert pq1 != pq2
    # pqdict == regular dict if they have same key/value pairs
    adict = dict(sample_items)
    assert pq1 == adict
    # TODO: FIX?
    # pqdicts evaluate as equal even if they have different
    # key functions and/or precedence functions
    pq3 = maxpq(sample_items)
    assert pq1 == pq3
Ejemplo n.º 7
0
 def test_equality(self):
     # eq
     pq1 = pqdict(sample_items)
     pq2 = pqdict(sample_items)
     self.assertTrue(pq1 == pq2)
     self.assertFalse(pq1 != pq2)
     # ne
     pq2[random.choice(sample_keys)] += 1
     self.assertFalse(pq1 == pq2)
     self.assertTrue(pq1 != pq2)
     # pqdict == regular dict if they have same key/value pairs
     adict = dict(sample_items)
     self.assertEqual(pq1, adict)
     # TODO: FIX?
     # pqdicts evaluate as equal even if they have different
     # key functions and/or precedence functions
     pq3 = maxpq(sample_items)
     self.assertEqual(pq1, pq3)
 def test_equality(self):
     # eq
     pq1 = pqdict(sample_items)
     pq2 = pqdict(sample_items)
     self.assertTrue(pq1 == pq2)
     self.assertFalse(pq1 != pq2)
     # ne
     pq2[random.choice(sample_keys)] += 1
     self.assertFalse(pq1 == pq2)
     self.assertTrue(pq1 != pq2)
     # pqdict == regular dict if they have same key/value pairs
     adict = dict(sample_items)
     self.assertEqual(pq1, adict)
     # TODO: FIX? 
     # pqdicts evaluate as equal even if they have different 
     # key functions and/or precedence functions
     pq3 = maxpq(sample_items)
     self.assertEqual(pq1, pq3)
Ejemplo n.º 9
0
    def __init__(self, data, barzer_svc=None):
        protocol = Protocol(data)
        self.id = 'protocol'
        self.index = cg_index.Index()

        super(ConvoProtocol, self).__init__()
        self.terminals = {}
        self.facts = defaultdict(set)
        self.barzer_svc = barzer_svc or default_barzer_instance

        self.visited_facts = set()
        self.facts_to_update = deque()

        for t in protocol.terminals:
            self.facts[t.id] = self.terminals[t.id] = ConvoCompositeFact(protocol=self, fact=t, parents=[self])

        self.set_children(self.terminals.values())
        self.pq = pqdict.maxpq()
        for t in self.terminals.values():
            self.pq[t] = t.score()
Ejemplo n.º 10
0
def test_maxpq():
    pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1)
    assert list(pq.popvalues()) == [12, 9, 8, 7, 5, 3, 1]
    assert pq.precedes == operator.gt
Ejemplo n.º 11
0
 def test_maxpq(self):
     pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1)
     self.assertEqual(list(pq.popvalues()), [12, 9, 8, 7, 5, 3, 1])
     self.assertEqual(pq.precedes, operator.gt)
Ejemplo n.º 12
0
def _immunize_ci(adj, num_nodes, queue=True):
    """Internal function. To immunize a graph, use `immunize`."""
    # Once populated, will take O(n) space
    ci = defaultdict(int)

    # Takes O(m) time
    for node in adj:
        excess_deg = len(adj[node]) - 1
        for neigh in adj[node]:
            ci[node] += excess_deg * (len(adj[neigh]) - 1)

    # Make sure we don't remove more nodes than there are available
    num_nodes = min(num_nodes, len(ci))

    # If using a queue, we need to heapify it (which takes O(n))
    if queue:
        ci = maxpq(ci)

    # Main loop
    removed = []
    for _ in range(num_nodes):
        if queue:
            node = ci.pop() # Takes O(log n)
        else:
            node = max(ci, key=ci.get) # Takes O(n)
            del ci[node]

        # Takes O(degree[node])
        for neigh in adj[node]:
            adj[neigh].remove(node)

        # Compute the deltas. Takes O(degree^2[node])
        deg = len(adj[node])
        delta = defaultdict(int)
        count = defaultdict(int)
        for neigh in adj[node]:
            if len(adj[neigh]) > 0:
                delta[neigh] += \
                      ((deg - 1) * (len(adj[neigh]) - 1)
                       + ci[neigh] // (len(adj[neigh])))
            else:
                # Nodes of degree 1 decrease all the way to zero
                delta[neigh] += ci[neigh]

            for neigh2 in adj[neigh]:
                if neigh2 in adj[node]:
                    delta[neigh2] += len(adj[neigh2])
                else:
                    delta[neigh2] += len(adj[neigh2]) - 1
                count[neigh2] += 1

        # Apply the changes at the same time. Takes O(degree^2[node]).
        for neigh2 in delta:
            # At the end of the previous loop, nodes of type 1 and 2
            # already have the correct deltas, while nodes of type 3 are
            # missing a term.
            if neigh2 in adj[node] and count[neigh2] > 0:
                delta[neigh2] -= count[neigh2]

            # If dict, takes O(1). If heap, takes O(log n)
            ci[neigh2] = ci[neigh2] - delta[neigh2]

        # Finally, finish udpating the graph, and store the node.
        del adj[node]
        removed.append(node)

    return removed
Ejemplo n.º 13
0
def _immunize_xdeg(adj, num_nodes, queue=True):
    """Internal function. To immunize a graph, use `immunize`."""
    # Takes O(m) time and O(2n) space
    to_be_squared, sum_squares = defaultdict(int), defaultdict(int)
    for node in adj:
        for neigh in adj[node]:
            # Note we keep the sum of squares, not its square, and square
            # it only when needed.
            to_be_squared[node] += len(adj[neigh]) - 1
            sum_squares[node] += (len(adj[neigh]) - 1)**2

    # Takes O(n) time and O(n) space
    # Remember to square the first term
    xdeg = {n: to_be_squared[n]**2 - sum_squares[n] for n in adj}

    # We actually don't need this again
    del sum_squares

    # Make sure we don't remove more nodes than there are available
    num_nodes = min(num_nodes, len(xdeg))

    # If using a queue, we need to heapify it (which takes O(n))
    if queue:
        xdeg = maxpq(xdeg)

    # Main loop
    # Takes O(m) time and O(m) space
    removed = []
    for _ in range(num_nodes):
        if queue:
            node = xdeg.pop()             # Takes O(log n)
        else:
            node = max(xdeg, key=xdeg.get) # Takes O(n)
            del xdeg[node]

        # Takes O(degree[node])
        for neigh in adj[node]:
            adj[neigh].remove(node)

        # The following loop will compute the difference in xdeg of each
        # node, without changing any of the variables. The loop after that
        # will actually apply the changes.
        #
        # We do this as follows. For a node i, define s(i) to be the sum of
        # the excess degrees of its neighbors, i.e. s(i) ==
        # to_be_squared[i]. Let deg be the degree of the target node. There
        # are four types of nodes that will be affected by the removal:
        #
        # 1. The nodes that are 1hop neighbors but not 2hop heighbors of
        # the target node will have their degree decrase by 1, and their
        # xdeg decreased by 2(s(i) - deg + 1)(deg - 1).
        #
        # 2. The nodes that are 2hop neighbors but not 1hop neighbors of
        # the target node will have their degree decreased by t(i), where
        # t(i) is the number of common neighbors they share with the target
        # node (i.e. the number of paths of length 2 between the target
        # node and i). Their xdeg will decrease by 2t(i)s(i) + t(i) -
        # t(i)**2 - 2p(i). Here, p(i) is the sum of the excess degrees of
        # the neighbors of i who are also neighbors of the target node.
        #
        # 3. The nodes that are both 1hop and 2hop neighbors will have
        # their degree decrease by t(i) + deg - 1. Their xdeg will decrease
        # by 2t(i)s(i) + t(i) - t(i)**2 - 2p(i) + 2(s(i) - deg + 1)(deg -
        # 1) - 2 t(i)(deg - 1). Note this is the sum of the changes for
        # 1hop and 2hop neighbors, plus the additional term 2 t(i)(deg - 1).
        #
        # 4. The target node itself will be removed: its degree and xdeg
        # will decrase to zero.
        deg = len(adj[node])
        delta = defaultdict(int)
        count = defaultdict(int)

        # Compute the deltas. Takes O(degree^2[node])
        for neigh in adj[node]:
            delta[neigh] += \
                  (2                                  # 2
                   * (to_be_squared[neigh] - deg + 1) # (s - deg + 1)
                   * (deg - 1))                       # (deg - 1)
            for neigh2 in adj[neigh]:
                # Each time r we visit a node i through a 2hop path, we are
                # adding 2s(i) + 1 - (2r + 1) - 2(p_r - 1), where p_r is
                # the degree of the node that led us to i. After visiting
                # t(i) times, this adds up to 2t(i)s(i) + t(i) - t(i)**2 -
                # 2p(i), as desired. Note that p_r is the degree BEFORE any
                # changes have been made to the network, but the degrees of
                # the neighbors of the target node already changed in the
                # previous loop, therefore p_r - 1 = len(adj[neigh]).
                delta[neigh2] += \
                      (2 * to_be_squared[neigh2]           # 2s
                       + 1                                 # + 1
                       - (2 * count[neigh2] + 1)           # - (2r + 1)
                       - 2 * len(adj[neigh]))              # - 2(p_r - 1)

                # Increment the count r(i). At the end of this double loop,
                # we will have count[i] == t(i).
                count[neigh2] += 1

        # Apply the changes at the same time. Takes O(degree^2[node]).
        for neigh2 in delta:
            # At the end of the previous loop, nodes of type 1 and 2
            # already have the correct deltas, while nodes of type 3 are
            # missing a term. We can finally update xdeg.
            if neigh2 in adj[node] and count[neigh2] > 0:
                delta[neigh2] -= 2 * count[neigh2] * (deg - 1)

            # If dict, takes O(1). If heap, takes O(log n)
            xdeg[neigh2] = xdeg[neigh2] - delta[neigh2]

            # Update the s(i) of each node. Note this is simply computing
            # the changes in i's neighbors' degrees. Note nodes of type 3
            # receive both updates.
            if neigh2 in adj[node]:
                to_be_squared[neigh2] -= deg - 1
            if count[neigh2] > 0:
                to_be_squared[neigh2] -= count[neigh2]

        # Finally, finish udpating the graph, and store the node.
        del adj[node]
        del to_be_squared[node]
        removed.append(node)

    return removed
 def test_maxpq(self):
     pq = maxpq(A=5, B=8, C=7, D=3, E=9, F=12, G=1)
     self.assertEqual(
         list(pq.popvalues()), 
         [12, 9, 8 ,7, 5, 3, 1])
     self.assertEqual(pq.precedes, operator.gt)
Ejemplo n.º 15
0
def choose_edges_for_target_coord_num(network, n_coord):
    """
    Given a network and an array of required coordination number for each vertex, returns a list of edges that would
    (approximately) satisfy the given coordination number.

    Parameters
    ----------
    network: PoreNetwork
    n_coord: ndarray

    Returns
    -------
    out: ndarray
        Index array of the chosen throats

    Notes
    _____
    Implementation of algorithm as described in ROBERT M. SOK ET AL. 2002 Section 4.1.2

    """

    WHITE = 0
    GRAY = 1
    BLACK = 2

    tube_marker = np.ones(network.nr_t, dtype=np.int)*GRAY
    n_avail = network.nr_nghs
    n_white = np.zeros(network.nr_p, dtype=np.int)
    assert np.all(n_coord < network.nr_nghs)

    def priority_tube(ti):
        pi_1, pi_2 = network.edgelist[ti, :]

        ns_1 = n_coord[pi_1] - n_white[pi_1]
        ns_2 = n_coord[pi_2] - n_white[pi_2]

        fs_1 = n_avail[pi_1] - n_white[pi_1]
        fs_2 = n_avail[pi_2] - n_white[pi_2]

        Fs_1 = 1. - float(ns_1) / float(fs_1)
        Fs_2 = 1. - float(ns_2) / float(fs_2)

        return 1. / (1. + Fs_1 * Fs_2)

    # Initialize priority queue

    pq = maxpq()
    for ti in xrange(network.nr_t):
        pq[ti] = priority_tube(ti)

    while pq:
        # Pop tube and mark it as white
        ti, _ = pq.popitem()
        tube_marker[ti] = WHITE

        # update n_white
        pi_1, pi_2 = network.edgelist[ti, :]
        n_white[[pi_1, pi_2]] += 1

        # If the target coordination number is reached, mark other tubes as black and delete
        for pi in [pi_1, pi_2]:
            if n_white[pi] == n_coord[pi]:
                for ti in network.ngh_tubes[pi]:
                    if tube_marker[ti] == GRAY:
                        tube_marker[ti] = BLACK
                        del pq[ti]

        # Update priorities of adj GRAY tubes
        for pi in network.edgelist[ti]:
            for ti in network.ngh_tubes[pi]:
                if tube_marker[ti] == GRAY:
                    pq[ti] = priority_tube(ti)

    assert np.all(n_white <= n_coord)

    print "Unsuccessful tubes", np.sum(n_coord - n_white)
    print "Total number of tubes", np.sum(n_white)

    return (tube_marker == WHITE).nonzero()[0]