Example #1
0
    def mergeKLists(self, lists):
        """
        :type lists: List[ListNode]
        :rtype: ListNode
        """
        from Queue import PriorityQueue
        q = PriorityQueue()
        for i, l in enumerate(lists):
            if l:
                q.put((l.val, i))

        try:
            got = q.get_nowait()
        except:
            return None
        res = ListNode(got[0])
        lists[got[1]] = lists[got[1]].next
        if lists[got[1]]: q.put((lists[got[1]].val, got[1]))
        curr = res

        while True:
            try:
                got = q.get_nowait()
            except:
                return res
            curr.next = ListNode(got[0])
            lists[got[1]] = lists[got[1]].next
            curr = curr.next
            if lists[got[1]]: q.put((lists[got[1]].val, got[1]))
def queue_sort(queue, pathTuple, opened_nodes, method):
	q = queue
	if method != "depth_limited" or pathTuple[0] != 0:
		for path in opened_nodes:
			value = calculateValue(path, method)
			path.fnValue = value
			pathTuple = (value, path)
			q.put_nowait(pathTuple)
	printLine(q, method)
	if method == "hill_climbing":
		if not q.empty():
			first = q.get_nowait()
			q = PriorityQueue()
			q.put_nowait(first)
	if method == "beam":
		first = False
		second = False
		if not q.empty():
			first = q.get_nowait()
		if not q.empty():
			second = q.get_nowait()
		q = PriorityQueue()
		if first != False:
			q.put_nowait(first)
		if second != False:
			q.put_nowait(second)
	return q
Example #3
0
def main():
    args = sys.argv[1:]
    passes = 1000000
    if args:
        passes = int(args[0])

    print
    print "Using GrimHeaper..."
    print

    heap = BinaryHeap()

    print
    print "Creating heap with %d items..." % passes
    print

    started = time.time()
    for i in range(passes):
        heap.put(i)

    fill_time = time.time() - started
    print
    print "Heap filled with %d items after %.2fms" % (passes, fill_time)

    started = time.time()
    for i in range(passes):
        heap.pop()

    empty_time = time.time() - started
    print "Heap emptied with %d items after %.2fms" % (passes, empty_time)
    print
    print "Total time: %.2f" % (fill_time + empty_time)

    print
    print "Using Python's PriorityQueue..."
    print

    queue = PriorityQueue()

    print
    print "Creating queue with %d items..." % passes
    print

    started = time.time()
    for i in range(passes):
        queue.put(i)

    fill_time = time.time() - started
    print
    print "Queue filled with %d items after %.2fms" % (passes, fill_time)

    started = time.time()
    for i in range(passes):
        queue.get_nowait()

    empty_time = time.time() - started
    print "Queue emptied with %d items after %.2fms" % (passes, empty_time)
    print
    print "Total time: %.2f" % (fill_time + empty_time)
class ThreadPool(object):

    def __init__(self, workersLimit, queueLimit=-1):
        self._jobs = PriorityQueue(queueLimit)
        self._running = False
        self._workers = []
        self._workersLimit = workersLimit

    def start(self):
        for _ in xrange(self._workersLimit):
            worker = self._createNewWorker()
            try:
                worker.start()
            except Exception:
                _logger.error('Worker has not been started properly: %r', worker)
            else:
                self._workers.append(worker)

        self._running = True

    def stop(self):
        self._running = False
        try:
            while True:
                self._jobs.get_nowait()

        except QueueEmptyError:
            pass

        for _ in self._workers:
            self._jobs.put_nowait((_LOW_PRIORITY, TerminateJob()))

        self._workers = []

    def _createNewWorker(self):
        return Worker(self._jobs)

    def putLowPriorityJob(self, job):
        if not self._running:
            _logger.error('Thread pool is not running. Trying to put new job: %r', job)
            return
        self._jobs.put_nowait((_LOW_PRIORITY, job))

    def putJob(self, job):
        if not self._running:
            _logger.error('Thread pool is not running. Trying to put new job: %r', job)
            return
        self._jobs.put_nowait((_DEFAULT_PRIORITY, job))

    def __repr__(self):
        return '%s(workers = %d; jobs = %d)' % (self.__class__.__name__, len(self._workers), self._jobs.qsize())
Example #5
0
def getAlphaBeta( game, player, other, reward=AlphaBeta(-100,100), depth=10, tab=0 ):
    '''
    alpha: minimum bound of the outcome -- currently, evaluate of best move possible by other
    beta: maximum bound of the outcome -- currently, evaluate of best move possible by player
    
    returns: (alpha, beta, evaluation, move)
    '''
    # evaluation is the current value of board, assuming no more moves in future
    # alpha == beta == finalvalue if we figure out the outcome.
    reward.evaluation = game.evaluate_cached( player )
    move = Move(-1,-1)
    # base case: can't play further, lost
    if (reward.evaluation==reward.beta):
#         print 'Player', player, 'won!!!'
        return AlphaBetaOfMove(AlphaBeta(reward.beta, reward.beta, reward.beta), move)
    # base case: can't evaluate further
    if (depth == 0):
        return AlphaBetaOfMove(reward, move )
    q = PriorityQueue()
    for m in game.next_moves( ):
            # g2 = copy.deepcopy( game )
            assert game.move( player, m)
            try:
                oponent_reward = AlphaBeta(-reward.beta, -reward.alpha, -reward.evaluation)
                oponent_reward = getAlphaBeta(game, other, player, reward=oponent_reward, depth=depth-1, tab=tab+1 ).alphabeta
                player_reward = AlphaBeta(-oponent_reward.beta, -oponent_reward.alpha, -oponent_reward.evaluation)
                q.put_nowait( AlphaBetaOfMove( player_reward, m ) )
            finally:
                assert game.unmove(player, m)
#             if( tab < 1 ):
#                 print ("\t"*tab), (player,row,col), (other,other_r,other_c), (other_alpha,other_beta,other_evaluation), (next_alpha,next_beta,evaluation), update
    if( q.empty() ):
        return AlphaBetaOfMove( reward, Move(-1,-1) )
    else:
        return q.get_nowait()
Example #6
0
 def req_proxy(self, url):
     from urlparse import urlparse
     netloc = urlparse(url).netloc
     busy_queue = PriorityQueue()
     lazy_queue = PriorityQueue()
     index = 0
     now = datetime.utcnow()
     while index < self.proxy_in_queue_count():
         index += 1
         proxy_url = self.get_proxy_from_queue()
         if not proxy_url:
             break
         if proxy_url in self.proxy_meta_map:
             proxy_meta = self.proxy_meta_map[proxy_url]
             if proxy_meta.last_used_time and (now - proxy_meta.last_used_time).total_seconds() < self.settings["interval_second"]:
                 busy_queue.put_nowait((proxy_meta.last_used_time, proxy_url))
                 continue
             if netloc in proxy_meta.latency and proxy_meta.latency[netloc][0] >= self.settings["max_unavailable_count"]:
                 import random
                 if random.randint(1, 10) > 1:
                     lazy_queue.put_nowait((proxy_meta.latency[netloc], proxy_url))
                     continue
             proxy_meta.last_used_time = now
             proxy_meta.master = netloc
             return proxy_meta.proxy
     while not lazy_queue.empty():
         _, proxy_url = lazy_queue.get_nowait()
         if proxy_url in self.proxy_meta_map:
             proxy_meta = self.proxy_meta_map[proxy_url]
             proxy_meta.last_used_time = now
             proxy_meta.master = netloc
             return proxy_meta.proxy
     return None
Example #7
0
    def __call__(self, graph, start_node, target_node):
        frontier = PriorityQueue()

        current_node = start_node
        distance_dict = defaultdict(lambda: infinity)
        distance_dict[current_node] = 0
        ancestors_dict = {}
        visited_set = set()

        while True:
            neighbors = graph.get_neighbors(current_node)

            current_distance = distance_dict[current_node]

            for neighbor in neighbors:
                if neighbor not in visited_set and (current_distance + 1) < distance_dict[neighbor]:
                    distance_dict[neighbor] = current_distance + 1
                    ancestors_dict[neighbor] = current_node
                    frontier.put((self.cost_function(distance_dict[neighbor], neighbor, target_node), neighbor))
                    self.nodes_expanded += 1

            visited_set.add(current_node)
            self.nodes_visited += 1

            if current_node == target_node:
                return list(reversed(find_ancestors(ancestors_dict, current_node, start_node)))
            else:
                try:
                    current_node = frontier.get_nowait()[1]
                except Empty:
                    break
Example #8
0
def consume_solution_queue(q):
    priority_queue = PriorityQueue()
    proposed_solutions = set()

    try:
        while True:
            fetch = True
            while fetch:
                try:
                    item = q.get_nowait()

                    if item[1] not in proposed_solutions:
                        priorized_item = (-1 * (item[0] / len(item[1])), item[0], item[1])
                        priority_queue.put(priorized_item)
                        proposed_solutions.add(item[1])
                except Empty:
                    fetch = False

            try:
                solution = priority_queue.get_nowait()
                print_solution(solution[1], solution[2])
            except Empty:
                pass

            time.sleep(2)
    except KeyboardInterrupt:
        pass
Example #9
0
 def a_star(self):
     # like BFS, but puts coords with lowest heuristic (path length + manhattan dist to goal) up front
     pq = PriorityQueue(maxsize=0)
     pq.put_nowait((self.manhattan_distance(self.currPos, self.goalPos), (self.currPos, [])))
     visited = set()
     bestPath = None
     bestHeur = None
     numNodes = 0
     while not pq.empty():
         priority, curr = pq.get_nowait()
         coord, path = curr
         visited.add(coord)
         if bestPath is not None and priority >= bestHeur:
             pass
         elif self.getChar(coord) == '%': # wall
             pass
         else: # recursive case
             if self.getChar(coord) == '.': # goal
                 print "Found a path:", path
                 if bestPath is None or len(path) < len(bestPath):
                     print "Is best path"
                     bestPath = path[:]
                     bestHeur = priority
             for adj, direction in self.adjacent(coord):
                 if adj not in visited and self.getChar(adj) != '%':
                     numNodes += 1
                     heur = len(path + direction) + self.manhattan_distance(adj, self.goalPos)
                     if bestPath is None or heur < bestHeur: # preselect based on heuristic
                         pq.put_nowait((heur, (adj, path + direction)))
     print "Num Nodes:", numNodes
     print self.debug(bestPath) # debug
     return bestPath
Example #10
0
 def a_star_penalize(self, forwardPenalty, turnPenalty):
     # part 1.2
     # using euclidean heuristic (not manhattan)
     pq = PriorityQueue(maxsize=0)
     pq.put_nowait((self.manhattan_distance(self.currPos, self.goalPos), (self.currPos, [])))
     visited = set()
     bestPath = None
     bestHeur = None
     numNodes = 0
     while not pq.empty():
         priority, curr = pq.get_nowait()
         coord, path = curr
         visited.add(coord)
         if bestPath is not None and priority >= bestHeur:
             pass
         elif self.getChar(coord) == '%': # wall
             pass
         else: # recursive case
             if self.getChar(coord) == '.': # goal
                 print "Found a path:", path
                 if bestPath is None or len(path) < len(bestPath):
                     bestPath = path[:]
             for adj, direction in self.adjacent(coord):
                 if adj not in visited and self.getChar(adj) != '%':
                     numNodes += 1
                     heur = self.calculate_penalty(path + direction, forwardPenalty, turnPenalty) + self.manhattan_distance(adj, self.goalPos) * forwardPenalty
                     if bestPath is None or heur < bestHeur: # preselect based on heuristic
                         pq.put_nowait((heur, (adj, path + direction)))
     print "Num Nodes:", numNodes
     print self.debug(bestPath) # debug
     return bestPath
Example #11
0
 def greedy(self):
     # like DFS, but puts coords closest to goal up front
     pq = PriorityQueue(maxsize=0)
     pq.put_nowait((self.manhattan_distance(self.currPos, self.goalPos), (self.currPos, [])))
     visited = set()
     bestPath = None
     numNodes = 0
     while not pq.empty():
         priority, curr = pq.get_nowait()
         coord, path = curr
         visited.add(coord)
         if bestPath is not None and len(path) >= len(bestPath):
             pass
         elif self.getChar(coord) == '%': # wall
             pass
         else: # recursive case
             if self.getChar(coord) == '.': # goal
                 print "Num Nodes:", numNodes
                 print self.debug(path)
                 return path # return on first path found
             for adj, direction in self.adjacent(coord):
                 if adj not in visited and self.getChar(adj) != '%':
                     numNodes += 1
                     heur = self.manhattan_distance(adj, self.goalPos)
                     if bestPath is None: # preselect based on heuristic
                         pq.put_nowait((heur, (adj, path + direction)))
     return [] # impossible
Example #12
0
def consume_solution_queue(q):
    priority_queue = PriorityQueue()
    proposed_solutions = set()

    try:
        while True:
            fetch = True
            while fetch:
                try:
                    item = q.get_nowait()

                    if item[1] not in proposed_solutions:
                        priorized_item = (-1 * (item[0] / len(item[1])),
                                          item[0], item[1])
                        priority_queue.put(priorized_item)
                        proposed_solutions.add(item[1])
                except Empty:
                    fetch = False

            try:
                solution = priority_queue.get_nowait()
                print_solution(solution[1], solution[2])
            except Empty:
                pass

            time.sleep(2)
    except KeyboardInterrupt:
        pass
Example #13
0
def a_star(graph, start, goal):
    extendedSet = set()
    if start == goal:
        return list(start)

    paths = PriorityQueue()
    for node in graph.get_connected_nodes(start):
        paths.put_nowait((path_length(graph, [start, node]) +
                          graph.get_heuristic(node, goal), [start, node]))
    extendedSet.add(start)

    while not paths.empty():
        path = paths.get_nowait()
        if path[1][-1] == goal:
            return path[1]
        elif path[1][-1] not in extendedSet:
            extendedSet.add(path[1][-1])
            cnodes = graph.get_connected_nodes(path[1][-1])
            for node in cnodes:
                if path[1].count(node) == 0:
                    epath = list(path[1])
                    epath.append(node)
                    paths.put_nowait((path_length(graph, epath) +
                                      graph.get_heuristic(node, goal), epath))
    return []
Example #14
0
class MultiQueue(object):
    """
    Simple priority queue interface to push/pull tasks
    Priority queue maintain the order by first element of the tuple, no futher ordering is guarantied
    """
    def __init__(self):
        self.queue = PriorityQueue()

    def empty(self):
        return self.queue.empty()

    def pull_nowait(self):
        task_data = self.queue.get_nowait()
        if task_data:
            (EnterTime, User, Task) = task_data
            self.queue.task_done()
            return (User, Task)

    def pull_wait(self, wait):
        try:
            task_data = self.queue.get(block=True, timeout=wait)
            (EnterTime, User, Task) = task_data
            self.queue.task_done()
            return (User, Task)
        except Empty:
            return None

    def push(self, User, Tasks):
        EnterTime = time()
        for task in Tasks:
            self.queue.put_nowait((EnterTime, User, task))
Example #15
0
    def predict(self, image):
        result_priority_queue = PriorityQueue()
        results = []

        bbs = self.align.getAllFaceBoundingBoxes(image)

        for bb_index, bb in enumerate(bbs):
            alignedFace = self.align.alignImg("affine", 96, image, bb)
            if alignedFace is None:
                continue

            phash = str(imagehash.phash(Image.fromarray(alignedFace)))
            if phash in self.trained_images:
                identity = self.trained_images[phash].identity
                result_priority_queue.put_nowait((-1.0, identity, bb_index))
            else:
                rep = self.net.forwardImage(alignedFace)
                if self.svm is not None:
                    result_proba_list = self.svm.predict_proba(rep)
                    identity = np.argmax(result_proba_list[0])
                    print str(result_proba_list[0]) + " " + str(bb)
                    for index, prob in enumerate(result_proba_list[0]):
                        result_priority_queue.put_nowait((prob * -1.0, self.identities[index], bb_index))
                else:
                    result_priority_queue.put_nowait((0.0, -1, bb_index))

        matched_identities = []
        matched_bb_indices = []
        threshold = 0.6

        while len(matched_identities) != len(bbs) and result_priority_queue.empty() is False:
            detectedFaceInfo = result_priority_queue.get_nowait()

            identity = detectedFaceInfo[1]
            probability = detectedFaceInfo[0] * -1.0
            bb_index = detectedFaceInfo[2]
            # print detectedFaceInfo

            if identity in matched_identities:
                # print "matched_bbs : " + str(matched_identities)
                continue

            matched_bb_indices.append(bb_index)
            matched_identities.append(identity)

            if probability < threshold:
                results.append((-1, bbs[bb_index], 0.0))
            else:
                results.append((identity, bbs[bb_index], probability))

                # print '+' + str(results[len(results) - 1])

        for bb_index, bb in enumerate(bbs):
            if bb_index in matched_bb_indices:
                continue

            results.append((-1, bb, 0.0))

        return results
Example #16
0
    def _get_backfill_events(self, txn, room_id, event_list, limit):
        logger.debug(
            "_get_backfill_events: %s, %s, %s",
            room_id, repr(event_list), limit
        )

        event_results = set()

        # We want to make sure that we do a breadth-first, "depth" ordered
        # search.

        query = (
            "SELECT depth, prev_event_id FROM event_edges"
            " INNER JOIN events"
            " ON prev_event_id = events.event_id"
            " AND event_edges.room_id = events.room_id"
            " WHERE event_edges.room_id = ? AND event_edges.event_id = ?"
            " AND event_edges.is_state = ?"
            " LIMIT ?"
        )

        queue = PriorityQueue()

        for event_id in event_list:
            depth = self._simple_select_one_onecol_txn(
                txn,
                table="events",
                keyvalues={
                    "event_id": event_id,
                },
                retcol="depth",
                allow_none=True,
            )

            if depth:
                queue.put((-depth, event_id))

        while not queue.empty() and len(event_results) < limit:
            try:
                _, event_id = queue.get_nowait()
            except Empty:
                break

            if event_id in event_results:
                continue

            event_results.add(event_id)

            txn.execute(
                query,
                (room_id, event_id, False, limit - len(event_results))
            )

            for row in txn.fetchall():
                if row[1] not in event_results:
                    queue.put((-row[0], row[1]))

        return event_results
Example #17
0
    def _get_backfill_events(self, txn, room_id, event_list, limit):
        logger.debug(
            "_get_backfill_events: %s, %s, %s",
            room_id, repr(event_list), limit
        )

        event_results = set()

        # We want to make sure that we do a breadth-first, "depth" ordered
        # search.

        query = (
            "SELECT depth, prev_event_id FROM event_edges"
            " INNER JOIN events"
            " ON prev_event_id = events.event_id"
            " AND event_edges.room_id = events.room_id"
            " WHERE event_edges.room_id = ? AND event_edges.event_id = ?"
            " AND event_edges.is_state = ?"
            " LIMIT ?"
        )

        queue = PriorityQueue()

        for event_id in event_list:
            depth = self._simple_select_one_onecol_txn(
                txn,
                table="events",
                keyvalues={
                    "event_id": event_id,
                },
                retcol="depth",
                allow_none=True,
            )

            if depth:
                queue.put((-depth, event_id))

        while not queue.empty() and len(event_results) < limit:
            try:
                _, event_id = queue.get_nowait()
            except Empty:
                break

            if event_id in event_results:
                continue

            event_results.add(event_id)

            txn.execute(
                query,
                (room_id, event_id, False, limit - len(event_results))
            )

            for row in txn:
                if row[1] not in event_results:
                    queue.put((-row[0], row[1]))

        return event_results
Example #18
0
def a_star(grid, start, end):
    visit_ctr = 0
    print "Starting at {}".format(start)

    start = a_cell(start)
    end = a_cell(end)

    # Priority to sort queue on heuristic values
    openset = PriorityQueue()
    closedset = set()
    openset.put((start.h + start.g, start))

    while openset:
        # Find the item in the open set with the lowest G + H score
        current = openset.get_nowait()[1]
        visit_ctr += 1
        # print "Visiting at {}".format(current.val)

        if current.val == end.val:
            print "Found {}".format(current.val)
            path = []
            while current.parent:
                path.append(current.val)
                current = current.parent
            path.append(current.val)
            return path, visit_ctr
            break

        # Add it to the closed set
        closedset.add(current)

        for neighbor in get_neighbors(grid, current.val):

            cell = a_cell(neighbor)

            # If it is already in the closed set, skip it
            if cell in closedset:
                continue

            # Otherwise if it is already in the open set
            if in_queue(cell, openset):
                # Check if we beat the G score
                new_g = current.g + 1
                if cell.g > new_g:
                    # If so, update the cell to have a new parent
                    cell.g = new_g
                    cell.parent = current
            else:
                # Not in open set, calculate the G and H score for cell
                cell.g = current.g + 1
                cell.h = heuristic(cell.val, end.val)

                # Set the parent to our current item
                cell.parent = current

                # Add it to the set
                openset.put((cell.h + cell.g, cell))
Example #19
0
class DataSource(object):
    """
    DataSource acts as an abstract representation of the data source,
    though in reality it also pulls its data from the XOMBIE stream.
    Handles pushing data to possibly multiple listeners in a thread-safe manner.

    class variables:
        sources - a mapping from signal-names to all live data sources

    class methods:
        find - Either finds the existing data source for some signal name,
               or creates a new one for that signal

    instance variables:
        name  - the signal name that this data source tracks, in the format
                id-in-hex:message-name. For example, the identifier for
                the Tritium Motor Drive Command Motor current is
                "0x501:Motor Current"
        queue - the internal data queue that the data source uses to pull
                data from the stream in a thread-safe manner
        data  - the GraphData object that handles filtering (not used right now)
                and storing the data for use with collections

    method summary:
        push  - notifies all listeners that new data is pending and copies
                any data from the internal queue to the GraphData storage
        pull  - pulls all data from a queue into the internal data queue.
                Intended for initializing with accumulated data
    """
    def __init__(self, identifier, desc=None):
        self.name = identifier
        self.queue = PriorityQueue()
        self.data = GraphData([])
        self.descriptor = desc

        self.last_received = datetime.datetime(1993, 6, 20)

    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return self.name == other.name

    def put(self, point):
        "Add data from the stream to the internal data queue"
        time, datum = point
        self.queue.put(point)
        self.last_received = max(self.last_received, time)

    def pull(self):
        "Adds all of the data from the stream's queue to its internal queue"
        while not self.queue.empty():
            self.data.addPoint(self.queue.get_nowait())

    def __repr__(self):
        return "DataSource(%r)" % self.name
Example #20
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '-c',
        '--config',
        default='/usr/local/factory/properties.json',
        help=('Specify path to the config file, '
              'default file: /usr/local/factory/properties.json'))

    args = parser.parse_args()

    with open(args.config) as f:
        properties = json.load(f)
        try:
            lights = properties['ui']['lights']
        except Exception:
            lights = []
            logging.warning("Can't find ui > lights entry in `%s'",
                            args.config)

        try:
            data = properties['ui']['display']['data']
        except Exception:
            data = []
            logging.warning("Can't find ui > display > data entry in `%s'",
                            args.config)

        items = lights + data
        queue = PriorityQueue(len(items))

        for item in items:
            if 'poll' in item:
                poll = item['poll']
                poll['interval'] = min(poll.get('interval', 0), 10000)
                queue.put((time.time(), poll))
            if 'init_cmd' in item:
                subprocess.call(item['init_cmd'], shell=True)

        if queue.empty():
            sys.exit(0)

        try:
            while True:
                (when, poll) = queue.get_nowait()
                if time.time() < when:  # not now
                    queue.put((when, poll))
                    sleep_time = when - time.time()
                    if sleep_time > 0:
                        time.sleep(sleep_time)
                else:
                    subprocess.call(poll['cmd'], shell=True)
                    queue.put(
                        (time.time() + (poll['interval'] / 1000.0), poll))
        except (KeyboardInterrupt, SystemExit):
            pass
Example #21
0
def outlierRejection(graph, K, percent=5.0, max_dist=5.0):
    """ 
    Examine graph and remove some top percentage of outliers 
    and those outside a certain radius. 
    """

    # iterate through all points
    pq = PriorityQueue()
    marked_keys = []
    for key, entry in graph["3Dmatches"].iteritems():

        X = entry["3Dlocs"]

        # mark and continue if too far away from the origin
        if np.linalg.norm(X) > max_dist:
            marked_keys.append(key)
            continue

        # project into each frame
        errors = []
        for frame, x in zip(entry["frames"], entry["2Dlocs"]):
            frame -= graph["frameOffset"]
            Rt = graph["motion"][frame]

            proj = fromHomogenous(K * Rt * toHomogenous(X))
            diff = proj - x

            err = np.sqrt(np.multiply(diff, diff).sum())
            #print (frame, err)

            errors.append(err)

        # get mean error and add to priority queue
        # (priority is reciprocal of error since this is a MinPQ)
        mean_error = np.array(errors).mean()
        pq.put_nowait((1.0 / mean_error, key))

    # remove worst keys
    N = max(
        0,
        int((percent / 100.0) * len(graph["3Dmatches"].keys())) -
        len(marked_keys))
    for i in range(N):
        score, key = pq.get_nowait()
        del graph["3Dmatches"][key]
        pq.task_done()

    # remove keys out of range
    for key in marked_keys:
        del graph["3Dmatches"][key]

    print "Removed %d outliers." % (N + len(marked_keys))
class AsyncDatabaseManager(Thread):
    def __init__(self, directory):
        super(AsyncDatabaseManager, self).__init__()
        self.directory = directory
        if not os.path.exists(self.directory):
            open(self.directory, 'w').close()
        self.queue = PriorityQueue()
        self.event = Event()
        self.start()  # Threading module start

    def run(self):
        super(AsyncDatabaseManager, self).run()
        db = sqlite3.connect(self.directory)
        cursor = db.cursor()
        while True:
            if self.queue.empty():
                time.sleep(0.1)
                continue
            job, sql, arg, res = self.queue.get_nowait()
            if sql == '__close__':
                break
            cursor.execute(sql, arg)
            time.sleep(0.01)
            db.commit()
            if res:
                for rec in cursor:
                    res.put(rec)
                res.put('__last__')
        db.close()
        self.event.set()  # TODO: Question: Do I want the database to finish or end it when the app ends?

    def execute(self, sql, args=None, res=None, priority=2):
        self.queue.put_nowait((priority, sql, args, res))

    def select(self, sql, args=None, priority=2):
        '''
        :param: sql - command to execute
        :param: args - sql arguments
        :param: priority - 2 for system and 1 for user
        '''
        res = Queue()
        self.execute(sql, args, res, priority)
        while True:
            rec = res.get()
            if rec == '__last__':
                break
            yield rec

    def close(self):
        self.execute('__close__')
Example #23
0
class PrioritySet(object):
    def __init__(self):
        self.lock = RLock()
        self.set_ = set()
        self.queue = PriorityQueue()

    def __len__(self):
        with self.lock:
            return min(self.queue.qsize(), len(self.set_))

    def __iter__(self):
        with self.lock:
            return iter(list(self.set_))

    def add(self, priority, obj):
        item = (obj.__class__, obj.pk)
        LOG.debug('%s objects in queue'%len(self))
        with self.lock:
            if item not in self.set_:
                self.queue.put((priority, item))
                self.set_.add(item)
                return True

    def pop(self):
        LOG.debug("Trying to pop from queue")
        while 1:
            try:
                with self.lock:
                    priority, item = self.queue.get_nowait()
                    self.set_.remove(item)
            except Empty:
                LOG.debug("queue is empty")
                raise KeyError("pop from an empty set")

            except KeyError:
                LOG.error("item not in self.set_")
                continue
            except:
                LOG.exception("An error occured while getting an item in queue")
            else:
                klass, pk = item
                try:
                    return klass.objects.get(pk=pk)
                except klass.DoesNotExist:
                    LOG.warn("%s don't exist anymore"%((priority, item),))
                    if priority < 100:
                        with self.lock:
                            self.queue.put((priority+10, item))
                            self.set_.add(item)
                    continue
Example #24
0
def getAlphaBeta(game,
                 player,
                 other,
                 reward=AlphaBeta(-100, 100),
                 depth=10,
                 tab=0):
    '''
    alpha: minimum bound of the outcome -- currently, evaluate of best move possible by other
    beta: maximum bound of the outcome -- currently, evaluate of best move possible by player
    
    returns: (alpha, beta, evaluation, move)
    '''
    # evaluation is the current value of board, assuming no more moves in future
    # alpha == beta == finalvalue if we figure out the outcome.
    reward.evaluation = game.evaluate_cached(player)
    move = Move(-1, -1)
    # base case: can't play further, lost
    if (reward.evaluation == reward.beta):
        #         print 'Player', player, 'won!!!'
        return AlphaBetaOfMove(
            AlphaBeta(reward.beta, reward.beta, reward.beta), move)
    # base case: can't evaluate further
    if (depth == 0):
        return AlphaBetaOfMove(reward, move)
    q = PriorityQueue()
    for m in game.next_moves():
        # g2 = copy.deepcopy( game )
        assert game.move(player, m)
        try:
            oponent_reward = AlphaBeta(-reward.beta, -reward.alpha,
                                       -reward.evaluation)
            oponent_reward = getAlphaBeta(game,
                                          other,
                                          player,
                                          reward=oponent_reward,
                                          depth=depth - 1,
                                          tab=tab + 1).alphabeta
            player_reward = AlphaBeta(-oponent_reward.beta,
                                      -oponent_reward.alpha,
                                      -oponent_reward.evaluation)
            q.put_nowait(AlphaBetaOfMove(player_reward, m))
        finally:
            assert game.unmove(player, m)
#             if( tab < 1 ):
#                 print ("\t"*tab), (player,row,col), (other,other_r,other_c), (other_alpha,other_beta,other_evaluation), (next_alpha,next_beta,evaluation), update
    if (q.empty()):
        return AlphaBetaOfMove(reward, Move(-1, -1))
    else:
        return q.get_nowait()
Example #25
0
def outlierRejection(graph, K, percent=5.0, max_dist=5.0):
    """ 
    Examine graph and remove some top percentage of outliers 
    and those outside a certain radius. 
    """

    # iterate through all points
    pq = PriorityQueue()
    marked_keys = []
    for key, entry in graph["3Dmatches"].iteritems():

        X = entry["3Dlocs"]

        # mark and continue if too far away from the origin
        if np.linalg.norm(X) > max_dist:
            marked_keys.append(key)
            continue

        # project into each frame
        errors = []
        for frame, x in zip(entry["frames"], entry["2Dlocs"]):
            frame -= graph["frameOffset"]
            Rt = graph["motion"][frame]

            proj = fromHomogenous(K * Rt * toHomogenous(X))
            diff = proj - x

            err = np.sqrt(np.multiply(diff, diff).sum())
            #print (frame, err)

            errors.append(err)

        # get mean error and add to priority queue
        # (priority is reciprocal of error since this is a MinPQ)
        mean_error = np.array(errors).mean()
        pq.put_nowait((1.0 / mean_error, key))

    # remove worst keys
    N = max(0, int((percent/100.0) * len(graph["3Dmatches"].keys())) - len(marked_keys))
    for i in range(N):
        score, key = pq.get_nowait()
        del graph["3Dmatches"][key]
        pq.task_done()

    # remove keys out of range
    for key in marked_keys:
        del graph["3Dmatches"][key]

    print "Removed %d outliers." % (N + len(marked_keys))
Example #26
0
def ShortestPath(startNode, destinationNodes):
   # Dijkstra w/ priority queue.
   Infinity = 999999999999
   distance = defaultdict(lambda: Infinity)
   predecessor = defaultdict(lambda: None)
   queued = defaultdict(lambda: False)

   nextNodes = PriorityQueue()
   nextNodes.put_nowait((0, startNode))
   queued[startNode] = True
   distance[startNode] = startNode.weight

   while True:
      try:
         priority, node = nextNodes.get_nowait()
         queued[node] = False
      except Empty:
         break

      for neighbor in node.edges:
         alternate = distance[node] + neighbor.weight

         if alternate < distance[neighbor]:
            distance[neighbor] = alternate
            predecessor[neighbor] = node

            if not queued[neighbor]:
               nextNodes.put_nowait((alternate, neighbor))
               queued[neighbor] = True

   destinationDistances = [
    (distance[node], node) for node in destinationNodes]

   bestCost, bestDestination = sorted(destinationDistances)[0]

   # For the best destination node, construct the path taken to get there.

   path = [bestDestination]
   node = bestDestination

   while True:
      node = predecessor[node]
      if node is None:
         break
      path.insert(0, node)

   return bestCost, path
Example #27
0
def ShortestPath(startNode, endNode):
   # Dijkstra w/ priority queue.
   Infinity = 999999999999
   distance = defaultdict(lambda: Infinity)
   predecessor = defaultdict(lambda: None)
   queued = defaultdict(lambda: False)

   nextNodes = PriorityQueue()
   nextNodes.put_nowait((0, startNode))
   queued[startNode] = True
   distance[startNode] = startNode.weight

   while True:
      try:
         priority, node = nextNodes.get_nowait()
         queued[node] = False
      except Empty:
         break

      for neighbor in node.edges:
         alternate = distance[node] + neighbor.weight

         if alternate < distance[neighbor]:
            distance[neighbor] = alternate
            predecessor[neighbor] = node

            if not queued[neighbor]:
               nextNodes.put_nowait((alternate, neighbor))
               queued[neighbor] = True

   # Dijkstra done here. Now we process the results.

   cost = distance[endNode]

   # Construct the path taken to get there.

   path = [endNode]
   node = endNode

   while True:
      node = predecessor[node]
      if node is None:
         break
      path.insert(0, node)

   return cost, path
Example #28
0
	def distance(self, i, j):
		lon = self.pixel_lons[i]
		lat = self.pixel_lats[j]		

		point = Point(lat, lon)

		elements = PriorityQueue()
		elements.put_nowait((self.grid.distance(point), self.grid))
		# We iterate over the priority queue until the nearest element is a point. While it isn't we add its children to the queue.
		while True:
			(distance, elem) = elements.get_nowait()
			#print "Iterating (%d, %d) distance: %f" % (i, j, distance)
			if isinstance(elem, Point):
				return distance
			else:
				for child in elem.children:
					elements.put_nowait((child.distance(point), child))
Example #29
0
    def distance(self, i, j):
        lon = self.pixel_lons[i]
        lat = self.pixel_lats[j]

        point = Point(lat, lon)

        elements = PriorityQueue()
        elements.put_nowait((self.grid.distance(point), self.grid))
        # We iterate over the priority queue until the nearest element is a point. While it isn't we add its children to the queue.
        while True:
            (distance, elem) = elements.get_nowait()
            #print "Iterating (%d, %d) distance: %f" % (i, j, distance)
            if isinstance(elem, Point):
                return distance
            else:
                for child in elem.children:
                    elements.put_nowait((child.distance(point), child))
Example #30
0
class JobQueue(object):
    def __init__(self):
        self._priorityQueue = PriorityQueue()
    
    def put(self,job,priority_metric):
        self._priorityQueue.put_nowait((-priority_metric,job))
        
    def get(self):
        try:
            return self._priorityQueue.get_nowait()[1]
        except Exception:
            return None
    
    def __iter__(self):
        job = True
        while job:
            job = self.get()
            yield job
Example #31
0
    def UCS(self):
        queue = PriorityQueue()
        queue.put([0, self.problem.tabuleiro, ''])
        visitados = []

        while queue:
            self.num_visited += 1
            if self.max_mem < queue.qsize():
                self.max_mem = queue.qsize()

            custo, node, caminho = queue.get_nowait()
            visitados.append(node)

            if self.problem.testeObjetivo(node):
                return caminho

            for suc, move in self.problem.sucessores(node):
                if suc not in visitados:
                    queue.put([custo + 1, suc, caminho + move])
Example #32
0
class MaxQueue(object):
    ''' 
    A priority queue sorted in descending order instead of ascending order
    
    If maxlength > 0, queue keeps only the maxlength entries with highest values
    not memory efficient in python since small memory is not reused
    
        # reduce length to maxlength if queue is too long
        if self.maxlength > 0 and len(self) > self.maxlength + self.length_tol:
            with self.decrease_length:
                print 'Reducing length of MaxQueue by removing values less than',       
                print 'Used memory before = ', self.memory_mon.usage()
                new_pq = PriorityQueue()
                for i in range(self.maxlength - 1):
                    new_pq.put(self.pq.get())
                print -self.pq.get()[0], 'from consideration'
                self.pq = new_pq
                print 'Used memory after = ', self.memory_mon.usage()
    '''
    def __init__(self, maxlength = 0, length_tol = 1000):
        self.pq = PriorityQueue()
        self.decrease_length = multiprocessing.Lock()
        self.maxlength = maxlength
        self.length_tol = length_tol
        
    def put(self,tup):
        # add new item to queue            
        self.pq.put((-tup[0],tup[1]))

    def get(self,block=True):
        tup = self.pq.get(block)
        return (-tup[0],tup[1])
        
    def get_nowait(self):
        tup = self.pq.get_nowait()
        return (-tup[0],tup[1])        

    def __len__(self):
        return self.pq.qsize()
        
    def empty(self):
        return self.pq.empty()
Example #33
0
class MaxQueue(object):
    ''' 
    A priority queue sorted in descending order instead of ascending order
    
    If maxlength > 0, queue keeps only the maxlength entries with highest values
    not memory efficient in python since small memory is not reused
    
        # reduce length to maxlength if queue is too long
        if self.maxlength > 0 and len(self) > self.maxlength + self.length_tol:
            with self.decrease_length:
                print 'Reducing length of MaxQueue by removing values less than',       
                print 'Used memory before = ', self.memory_mon.usage()
                new_pq = PriorityQueue()
                for i in range(self.maxlength - 1):
                    new_pq.put(self.pq.get())
                print -self.pq.get()[0], 'from consideration'
                self.pq = new_pq
                print 'Used memory after = ', self.memory_mon.usage()
    '''
    def __init__(self, maxlength=0, length_tol=1000):
        self.pq = PriorityQueue()
        self.decrease_length = multiprocessing.Lock()
        self.maxlength = maxlength
        self.length_tol = length_tol

    def put(self, tup):
        # add new item to queue
        self.pq.put((-tup[0], tup[1]))

    def get(self, block=True):
        tup = self.pq.get(block)
        return (-tup[0], tup[1])

    def get_nowait(self):
        tup = self.pq.get_nowait()
        return (-tup[0], tup[1])

    def __len__(self):
        return self.pq.qsize()

    def empty(self):
        return self.pq.empty()
Example #34
0
def branch_and_bound(graph, start, goal):
    if start == goal:
        return list(start)

    paths = PriorityQueue()
    for node in graph.get_connected_nodes(start):
        paths.put_nowait((path_length(graph, [start, node]), [start, node]))

    while not paths.empty():
        path = paths.get_nowait()
        if path[1][-1] == goal:
            return path[1]
        else:
            cnodes = graph.get_connected_nodes(path[1][-1])
            for node in cnodes:
                if path[1].count(node) == 0:
                    epath = list(path[1])
                    epath.append(node)
                    paths.put_nowait((path_length(graph, epath), epath))
    return []
Example #35
0
    def getTwoBestClusters(self):
        best_clusters = PriorityQueue()
        cnt = 0

        for centroid, stats in self.clusters.iteritems():
            avg_mass = stats["total_mass"] / stats["size"]
            best_clusters.put_nowait((avg_mass, centroid))
            cnt += 1

        if cnt < 2:
            print "Warning. Only found %d clusters." % cnt
            return [], []

        avg_mass, cluster1 = best_clusters.get_nowait(); best_clusters.task_done()
        avg_mass, cluster2 = best_clusters.get_nowait(); best_clusters.task_done()

        centroids = [cluster1, cluster2]
        scores = [self.clusters[cluster1]["total_mass"],
                  self.clusters[cluster2]["total_mass"]]

        return centroids, scores
class PriorityLock(object):
    def __init__(self):
        self._is_available = True
        self._mutex = Lock()
        self._waiter_queue = PriorityQueue()

    def acquire(self, priority=0):
        self._mutex.acquire()
        # First, just check the lock.
        if self._is_available:
            self._is_available = False
            self._mutex.release()
            return True
        condition = Condition()
        condition.acquire()
        self._waiter_queue.put((priority, condition))
        self._mutex.release()
        condition.wait()
        condition.release()
        return True

    def release(self):
        self._mutex.acquire()
        # Notify the next thread in line, if any.
        try:
            _, condition = self._waiter_queue.get_nowait()
        except Empty:
            self._is_available = True
        else:
            condition.acquire()
            condition.notify()
            condition.release()
        self._mutex.release()

    def __enter__(self):
        self.acquire()
        return self

    def __exit__(self, type, value, traceback):
        self.release()
Example #37
0
    def ASTAR(self, heuristic):
        queue = PriorityQueue()
        queue.put([0, self.problem.tabuleiro, ''])
        visitados = []

        while not queue.empty():
            self.num_visited += 1
            if self.max_mem < queue.qsize():
                self.max_mem = queue.qsize()

            custo, node, caminho = queue.get_nowait()
            visitados.append(node)

            if self.problem.testeObjetivo(node):
                return caminho

            for suc, move in self.problem.sucessores(node):
                if suc not in visitados:
                    if heuristic == 1:
                        queue.put([self.heuristic_manhattan(suc) + custo + 1, suc, caminho + move])
                    else:
                        queue.put([self.heuristic_full_manhattan(suc) + custo + 1, suc, caminho + move])
    def build_dict(self):

        from Queue import PriorityQueue
        token_count_dict = {}

        with open(self.input_file) as f:

            print "Creating Dictionary..."
            line_count = 0
            for line in f:
                token_list = re.findall(expression, line.lower())
                for token in token_list:
                    if token not in token_count_dict:
                        token_count_dict[token] = 1
                    else:
                        token_count_dict[token] += 1
                line_count += 1

            print "Lines in the Dataset: " + str(line_count)

        q = PriorityQueue()
        for t in token_count_dict:
            q.put([-token_count_dict[t], t])
        self.token_dict = {}
        #add special token
        self.token_dict[zero_token] = 0
        self.token_dict[unknown_token] = 1
        self.token_dict[start_token] = 2
        self.token_dict[end_token] = 3
        token_index = 4

        token_count_dict = {}

        #priority queue
        while (not q.empty()):
            get = q.get_nowait()
            self.token_dict[get[1]] = token_index
            token_index += 1
Example #39
0
class TaskChain(object):

    def __init__(self):
        self.task_chain = PriorityQueue()
        self.task_num = 0

    def put(self, task):
        try:
            self.task_chain.put_nowait((self.task_num, task))
            self.task_num += 1
        except Queue.Full as e:
            raise e

    def get(self):
        try:
            priority, task = self.task_chain.get_nowait()
            self.task_num -= 1
            return task
        except Queue.Empty as e:
            raise  e

    def size(self):
        return self.task_num
Example #40
0
 def a_star_ghost(self):
     pq = PriorityQueue(maxsize=0)
     pq.put_nowait((self.manhattan_distance(self.currPos, self.goalPos), (self.currPos, [])))
     visited = set()
     bestPath = None
     bestHeur = None
     numNodes = 0
     backwardsPenalty = len(self.maze) * len(self.maze[0]) / 2 # backwards penalty to allow loitering
     while not pq.empty():
         priority, curr = pq.get_nowait()
         coord, path = curr
         visited.add(coord)
         if bestPath is not None and priority >= bestHeur:
             pass
         elif self.getChar(coord) == '%': # wall
             pass
         else: # recursive case
             if self.getChar(coord) == '.': # goal
                 print "Found a path:", path
                 if bestPath is None or len(path) < len(bestPath):
                     print "Is best path"
                     bestPath = path[:]
                     bestHeur = priority
             for adj, direction in self.adjacent(coord):
                 if self.getChar(adj) != '%':
                     numNodes += 1
                     heur = len(path + direction) + self.manhattan_distance(adj, self.goalPos)
                     if adj in visited:
                         heur += backwardsPenalty
                     if bestPath is None or heur < bestHeur: # preselect based on heuristic
                         if adj != self.getGhostPos(path + direction) and (adj != self.getGhostPos(path) and coord != self.getGhostPos(path + direction)):
                             # check that next step won't put pacman on same square as ghost, or won't cross paths with ghost
                             pq.put_nowait((heur, (adj, path + direction)))
     print "Num Nodes:", numNodes
     print self.debug(bestPath) # debug
     return bestPath
    def find_feasible_schedule(para, rtn):
        """find a feasible schedule"""
        # todo: do not work for G6
        # todo or, expand the time horizon and assign a bigM price for t>96
        # todo the following is simply a greedy alg
        # todo offset to the most cheapst hours
        # todo need test for 3EAF,2AOD,4LF,1CC

        rtn_casters = rtn.steel_rtn.casters

        task_start = [-100] + [-1] * para.num_tasks
        heat_ready_q2 = PriorityQueue()

        # get caster start asap by arranging heat priority
        # group_time_q = PriorityQueue()
        # for group_ in range(object.num_groups):
        # total_slot = 0
        # for heat in object.optmath.steel_rtn.group2heats[group_+1]:
        #         for s in object.process_sequence:
        #             total_slot += object.optmath.steel_rtn.task_length[object.optmath.steel_rtn.tasks[s][heat-1]]
        #     total_slot += max([object.optmath.steel_rtn.task_cleanup_length[object.optmath.steel_rtn.tasks[unit][group_]] for unit in casters])
        #     group_time_q.put_nowait((total_slot,group_+1))
        # equip_count = 0
        # num_eaf = object.optmath.steel_rtn.stage2units['1']['EAF']
        # while not group_time_q.empty():
        #     (total_slot, group) = group_time_q.get_nowait()
        #     for heat in object.optmath.steel_rtn.group2heats[group]:
        #         heat_ready_q2.put_nowait((math.floor(equip_count/num_eaf),heat-1))
        #         equip_count += 1

        for heat_ in range(para.num_heats):
            heat_ready_q2.put_nowait((0, heat_))
        for seq in [0, 2, 4]:
            heat_ready_q = heat_ready_q2
            heat_ready_q2 = PriorityQueue()
            task_type = para.heat_sequence[seq]
            equip_time = [0] * para.unit2num[task_type]
            equip_id = 0
            while not heat_ready_q.empty():
                (ready_t, heat_) = heat_ready_q.get_nowait()
                task = rtn.steel_rtn.tasks[task_type][heat_]
                equip_time[equip_id] = max(equip_time[equip_id], ready_t)
                task_start[task] = equip_time[equip_id]
                equip_time[equip_id] += rtn.steel_rtn.task_length[task]
                trans_time = rtn.steel_rtn.task_length[task + para.num_heats]
                heat_ready_q2.put_nowait((equip_time[equip_id] + trans_time, heat_))
                equip_id = (equip_id + 1) % len(equip_time)
        # casting
        heat_ready_list = [-1] * para.num_heats
        while not heat_ready_q2.empty():
            (read_t, heat_) = heat_ready_q2.get_nowait()
            heat_ready_list[heat_] = read_t
        group_ready_t = dict()
        caster_time = [0] * len(rtn_casters)
        for caster in rtn_casters:
            group_ready_t[caster] = PriorityQueue()
        # group ready time
        for group_ in range(para.num_groups):
            heats = rtn.steel_rtn.group2heats[group_ + 1]
            for caster in rtn_casters:
                read_t = [heat_ready_list[heat - 1] - rtn.steel_rtn.cast_heat_rel_slot[caster][heat] for heat
                          in heats]
                group_ready_t[caster].put_nowait((max(read_t), group_))
        scheduled_groups = []
        while len(scheduled_groups) < para.num_groups:
            caster_id = np.argmin(caster_time)
            (read_t, group_) = group_ready_t[rtn_casters[caster_id]].get_nowait()
            while group_ in scheduled_groups:
                (read_t, group_) = group_ready_t[rtn_casters[caster_id]].get_nowait()
            scheduled_groups.append(group_)
            schedule_time = max(read_t, caster_time[caster_id])
            schedule_task = rtn.steel_rtn.tasks[rtn_casters[caster_id]][group_]
            caster_time[caster_id] = schedule_time + rtn.steel_rtn.task_cleanup_length[schedule_task]
            task_start[schedule_task] = schedule_time

        task_time = [(-100, -100)] + [(-1, -1)] * rtn.steel_rtn.num_tasks  # task counts from 1
        for task in range(1, para.num_tasks + 1):
            if task_start[task] < 0:
                continue
            task_time[task] = (task_start[task], task_start[task] + 1)
        return task_time
    def find_feasible_schedule(para, rtn):
        """find a feasible schedule"""
        # todo: do not work for G6
        # todo or, expand the time horizon and assign a bigM price for t>96
        # todo the following is simply a greedy alg
        # todo offset to the most cheapst hours
        # todo need test for 3EAF,2AOD,4LF,1CC

        rtn_casters = rtn.steel_rtn.casters

        task_start = [-100] + [-1] * para.num_tasks
        heat_ready_q2 = PriorityQueue()

        # get caster start asap by arranging heat priority
        # group_time_q = PriorityQueue()
        # for group_ in range(object.num_groups):
        # total_slot = 0
        # for heat in object.optmath.steel_rtn.group2heats[group_+1]:
        #         for s in object.process_sequence:
        #             total_slot += object.optmath.steel_rtn.task_length[object.optmath.steel_rtn.tasks[s][heat-1]]
        #     total_slot += max([object.optmath.steel_rtn.task_cleanup_length[object.optmath.steel_rtn.tasks[unit][group_]] for unit in casters])
        #     group_time_q.put_nowait((total_slot,group_+1))
        # equip_count = 0
        # num_eaf = object.optmath.steel_rtn.stage2units['1']['EAF']
        # while not group_time_q.empty():
        #     (total_slot, group) = group_time_q.get_nowait()
        #     for heat in object.optmath.steel_rtn.group2heats[group]:
        #         heat_ready_q2.put_nowait((math.floor(equip_count/num_eaf),heat-1))
        #         equip_count += 1

        for heat_ in range(para.num_heats):
            heat_ready_q2.put_nowait((0, heat_))
        for seq in [0, 2, 4]:
            heat_ready_q = heat_ready_q2
            heat_ready_q2 = PriorityQueue()
            task_type = para.heat_sequence[seq]
            equip_time = [0] * para.unit2num[task_type]
            equip_id = 0
            while not heat_ready_q.empty():
                (ready_t, heat_) = heat_ready_q.get_nowait()
                task = rtn.steel_rtn.tasks[task_type][heat_]
                equip_time[equip_id] = max(equip_time[equip_id], ready_t)
                task_start[task] = equip_time[equip_id]
                equip_time[equip_id] += rtn.steel_rtn.task_length[task]
                trans_time = rtn.steel_rtn.task_length[task + para.num_heats]
                heat_ready_q2.put_nowait(
                    (equip_time[equip_id] + trans_time, heat_))
                equip_id = (equip_id + 1) % len(equip_time)
        # casting
        heat_ready_list = [-1] * para.num_heats
        while not heat_ready_q2.empty():
            (read_t, heat_) = heat_ready_q2.get_nowait()
            heat_ready_list[heat_] = read_t
        group_ready_t = dict()
        caster_time = [0] * len(rtn_casters)
        for caster in rtn_casters:
            group_ready_t[caster] = PriorityQueue()
        # group ready time
        for group_ in range(para.num_groups):
            heats = rtn.steel_rtn.group2heats[group_ + 1]
            for caster in rtn_casters:
                read_t = [
                    heat_ready_list[heat - 1] -
                    rtn.steel_rtn.cast_heat_rel_slot[caster][heat]
                    for heat in heats
                ]
                group_ready_t[caster].put_nowait((max(read_t), group_))
        scheduled_groups = []
        while len(scheduled_groups) < para.num_groups:
            caster_id = np.argmin(caster_time)
            (read_t,
             group_) = group_ready_t[rtn_casters[caster_id]].get_nowait()
            while group_ in scheduled_groups:
                (read_t,
                 group_) = group_ready_t[rtn_casters[caster_id]].get_nowait()
            scheduled_groups.append(group_)
            schedule_time = max(read_t, caster_time[caster_id])
            schedule_task = rtn.steel_rtn.tasks[rtn_casters[caster_id]][group_]
            caster_time[
                caster_id] = schedule_time + rtn.steel_rtn.task_cleanup_length[
                    schedule_task]
            task_start[schedule_task] = schedule_time

        task_time = [
            (-100, -100)
        ] + [(-1, -1)] * rtn.steel_rtn.num_tasks  # task counts from 1
        for task in range(1, para.num_tasks + 1):
            if task_start[task] < 0:
                continue
            task_time[task] = (task_start[task], task_start[task] + 1)
        return task_time
Example #43
0
from Queue import PriorityQueue
from fractions import Fraction

nums = {}

seen = set()

pq = PriorityQueue()
frac = Fraction(1, 2), Fraction(1, 2)
pq.put_nowait(((frac[0] + frac[1]).denominator, (frac[0], frac[1])))

while not pq.empty():
    weight, (frac1, frac2) = pq.get_nowait()
    hashing = frac1.denominator, frac2.denominator
    if hashing not in seen:
        seen.add(hashing)
        if weight not in nums:
            nums[weight] = 0

        nums[weight] += 1
        print '{} + {} = {}'.format(frac1, frac2, frac1 + frac2)
        if nums[weight] > 5:
            print weight
            print nums
            break
        fracp = Fraction(1, frac1.denominator + 1), frac2
        fracpp = frac1, Fraction(1, frac2.denominator + 1)
        pq.put_nowait(
            (((fracp[0] + fracp[1]).denominator), (fracp[0], fracp[1])))
        pq.put_nowait(
            (((fracpp[0] + fracpp[1]).denominator), (fracpp[0], fracpp[1])))
class AsyncoreReactor(object):
    _thread = None
    _is_live = False
    logger = logging.getLogger("Reactor")

    def __init__(self):
        self._timers = PriorityQueue()
        self._map = {}

    def start(self):
        self._is_live = True
        self._thread = threading.Thread(target=self._loop, name="hazelcast-reactor")
        self._thread.daemon = True
        self._thread.start()

    def _loop(self):
        self.logger.debug("Starting Reactor Thread")
        Future._threading_locals.is_reactor_thread = True
        while self._is_live:
            try:
                asyncore.loop(count=1000, timeout=0.01, map=self._map)
                self._check_timers()
            except select.error as err:
                # TODO: parse error type to catch only error "9"
                pass
            except:
                self.logger.exception("Error in Reactor Thread")
                # TODO: shutdown client
                return
        self.logger.debug("Reactor Thread exited.")

    def _check_timers(self):
        now = time.time()
        while not self._timers.empty():
            try:
                _, timer = self._timers.queue[0]
            except IndexError:
                return

            if timer.check_timer(now):
                self._timers.get_nowait()
            else:
                return

    def add_timer_absolute(self, timeout, callback):
        timer = Timer(timeout, callback, self._cleanup_timer)
        self._timers.put_nowait((timer.end, timer))
        return timer

    def add_timer(self, delay, callback):
        return self.add_timer_absolute(delay + time.time(), callback)

    def shutdown(self):
        for connection in self._map.values():
            try:
                connection.close(HazelcastError("Client is shutting down"))
            except OSError, connection:
                if connection.args[0] == socket.EBADF:
                    pass
                else:
                    raise
        self._map.clear()
        self._is_live = False
        self._thread.join()
Example #45
0
class RandomDelayedAction(threading.Thread):
    def __init__(self):

        threading.Thread.__init__(self)

        # Job queue
        self._pq = PriorityQueue()
        self._pq_lock = threading.RLock()
        self._exec_lock = threading.RLock()

        if NO_OP:
            return

        # Capture ingress SYN/ACK traffic into queue in a separate process.
        self._pkt_queue = multiprocessing.Queue()
        pcap_p = multiprocessing.Process(target=_pcap_process, args=(self._pkt_queue,))
        pcap_p.daemon = True
        pcap_p.start()

        # Introduce packet delays based on real performance.
        self._pkt_in_profiler = DelayProfiler("./profile/%s/%s-pkt-in.csv" % (DELAY_PROFILE_TYPE, DELAY_PROFILE))
        self._flow_mod_profiler = DelayProfiler("./profile/%s/%s-flow-mod.csv" % (DELAY_PROFILE_TYPE, DELAY_PROFILE))

        # Part of the ovs overhead that has not been accounted for.
        self._unused_ovs_overhead = 0

        # Start loop that executes jobs and that processes tcpdump output.
        self.daemon = True
        self.start()

        print "*" * 80
        print 'Delayed Action, using profile "%s"-"%s".' % (DELAY_PROFILE_TYPE, DELAY_PROFILE)
        print "*" * 80

    def _get_delay(self, filter_obj):

        if NO_OP:
            return 0

        if isinstance(filter_obj, ofp_packet_in):
            return self._pkt_in_profiler.get_delay()

        elif isinstance(filter_obj, ofp_flow_mod):
            return self._flow_mod_profiler.get_delay()

        return 0

    def add_job(self, filter_obj, func, *args, **kwargs):

        delay = self._get_delay(filter_obj) - MAGIC_OVERHEAD

        if delay <= 0.002:
            return self._execute(func, *args, **kwargs)
        elif delay > 5:
            return  # Drop straight away

        current_time = time.time()

        # Compensate for OVS overhead, but only for packet-in events.
        if isinstance(filter_obj, ofp_packet_in):
            pkt_in = args[1]
            (src_port, dst_port) = _get_tcp_src_dst_ports(pkt_in.data)
            if src_port and dst_port:
                ovs_overhead = self._get_ovs_overhead(src_port, dst_port, current_time)
                ovs_overhead += self._unused_ovs_overhead
                delay = delay - ovs_overhead
                if delay <= 0:
                    # self._unused_ovs_overhead += 0.0 - delay #TODO: Should we do this?
                    return self._execute(func, *args, **kwargs)

        # Add event to job queue.
        with self._pq_lock:
            self._pq.put((delay + current_time, func, args, kwargs))

    def run(self):

        if NO_OP:
            return

        while True:

            # Peek
            current_time = time.time()
            try:
                with self._pq_lock:
                    (next_time, _, _, _) = self._pq.queue[0]
                if current_time < next_time:
                    raise IndexError

            except IndexError:
                time.sleep(0.001)
                continue

            # Pop
            try:
                with self._pq_lock:
                    (_, func, args, kwargs) = self._pq.get_nowait()
            except Empty:
                continue

            # Run the job.
            self._execute(func, *args, **kwargs)

    def _get_ovs_overhead(self, src_port, dst_port, current_time, max_attempt=5):
        """
        Continuously asks if pcap has seen <src_port, dst_port>. Stops when it
        appears in the pcap history. Extract the pcap time. Based on the current
        time, we can compute and return the overhead as a result of OVS.
        
        """
        # Average loop count is around 2.
        for _ in range(max_attempt):

            try:
                (timestamp, src, dst) = self._pkt_queue.get_nowait()
            except Empty:
                return 0  # What usually happens is pcap cannot keep up

            if src == src_port and dst == dst_port:
                return current_time - timestamp + 0.001  # Magic number

        return 0  # Almost never happens.

    def _execute(self, func, *args, **kwargs):

        try:
            with self._exec_lock:
                func(*args, **kwargs)
        except Exception, err:
            print >> sys.stderr, "DelayedAction exception:", err
            print >> sys.stderr, traceback.format_exc()
Example #46
0
def learnDT(learnDTNode, func, dist, initCons, params):
    # Step 1: Initialize decision tree, score, and worklist

    # The decision tree is represented by dt, which is a map of type
    #
    #  type params:
    #    I : internal node
    #    L : leaf node
    #
    #  types:
    #    dt : {int : (I * _DT_INTERNAL) | (L * _DT_LEAF) }
    dt = {}
    worklist = PriorityQueue()
    index = 0
    depth = 1
    (dtInternalData, dtInternalScore, dtLeafData,
     dtLeafScore) = learnDTNode(func, dist, initCons)
    gain = dtInternalScore - dtLeafScore
    worklist.put_nowait((-gain, dtInternalData, dtLeafData, index, depth))
    score = dtLeafScore
    size = 1

    # Step 2: Iterate through the worklist and construct internal nodes
    while True:
        # Step 2a: Get the next element (break if worklist is empty)
        if worklist.empty():
            break
        (minusGain, dtInternalData, dtLeafData, index,
         depth) = worklist.get_nowait()
        gain = -minusGain
        log('Internal node index: ' + str(index), INFO)

        # Step 2b: Get the internal data, and add to decision tree
        if dtInternalData is None:
            log('No internal data!', INFO)
            worklist.put_nowait(
                (2.0, dtInternalData, dtLeafData, index, depth))
            if gain < -1.5:
                log('No internal nodes remaining, ending!', INFO)
                break
            else:
                continue

        (dtInternalNode, lcons, rcons) = dtInternalData
        dt[index] = (dtInternalNode, _DT_INTERNAL)

        # Step 2c: Learn the left and right children
        (dtInternalDataLeft, dtInternalScoreLeft, dtLeafDataLeft,
         dtLeafScoreLeft) = learnDTNode(func, dist, lcons)
        (dtInternalDataRight, dtInternalScoreRight, dtLeafDataRight,
         dtLeafScoreRight) = learnDTNode(func, dist, rcons)
        gainLeft = dtInternalScoreLeft - dtLeafScoreLeft
        gainRight = dtInternalScoreRight - dtLeafScoreRight

        # Step 2d: Add children to worklist
        worklist.put_nowait((-gainLeft, dtInternalDataLeft, dtLeafDataLeft,
                             2 * index + 1, depth + 1))
        worklist.put_nowait((-gainRight, dtInternalDataRight, dtLeafDataRight,
                             2 * index + 2, depth + 1))

        # Step 2e: Compute score
        score += gain
        size += 2
        log('Current gain: ' + str(gain), INFO)
        log('Current score: ' + str(score), INFO)
        log('Current size: ' + str(size), INFO)

        # Step 2f: Check stopping conditions
        if not params.minGain is None and gain < params.minGain:
            log('Gain too small, ending!', INFO)
            break
        if not params.tgtScore is None and score >= params.tgtScore:
            log('Achieved target score, ending!', INFO)
            break
        if not params.maxSize is None and size >= params.maxSize:
            log('Reached maximum size, ending!', INFO)
            break
        if gain < -1.5:
            log('No internal nodes remaining, ending!', INFO)
            break

    # Step 3: Iterate through remaining nodes and construct leaf nodes
    while not worklist.empty():
        (minusGain, dtInternalData, dtLeafData, index,
         depth) = worklist.get_nowait()
        gain = -minusGain
        if dtInternalData is None and gain > 0.0:
            raise Exception('None node with non-zero gain: ' +
                            str(dtInternalData))
        log('Leaf node index: ' + str(index), INFO)
        dt[index] = (dtLeafData, _DT_LEAF)

    # Step 4: Construct the decision tree
    return DT(_learnDTHelper(dt, 0))
Example #47
0
def parse(sequence, expects, timeout):
    q = PriorityQueue() # The unprocessed items end up into the queue
                        # Parsing constantly fills it up, and cannot
                        # progress after it becomes empty.
    wait = defaultdict(list) # Items that wait for reduction. (start, rule) -> [item]
    fini = defaultdict(list) # Items that have been finished at (start, rule) -> [(r_badness, stop, value)]
    halt = time() + timeout # When we should give up.

    for rule in expects: # The queue is populated with initial starting states.
        if valid_compound(rule):
            q.put((0, 0, 0, rule, []))
    while not q.empty():
        if halt < time():
            raise Exception("timeout")
        badness, start, index, rule, matches = q.get_nowait()
        # Queue is filled up from the results of shifting.
        if ((isinstance(rule, Group) and len(rule) == len(matches)) or
            (isinstance(rule, Plus) and len(matches) >= 1) or
            (isinstance(rule, Star))):
            # If shifting results in completely reduced construct, we want to reduce using it.
            # Reduction usually results in one or more shifts and it is stored
            # to allow worse reductions with the same rule again.
            if start == 0 and index == len(sequence):
                yield Reduction(rule, matches, badness)
                halt = time() + timeout # reset halt when we succeed.
                continue
            else:
                result = Reduction(rule, matches, badness)
                fini[(start, rule)].append((index, result))
                for g_badness, g_start, g_rule, g_matches in wait[(start, rule)]:
                    q.put((
                        g_badness + result.badness,
                        g_start,
                        index,
                        g_rule,
                        g_matches + [result]))
                if isinstance(rule, Group):
                    continue
        if index >= len(sequence): # Some rules may appear at positions where they cannot complete.
            continue
        subrule = rule.at(len(matches))
        subrules = ()
        match = subrule.match(sequence[index])
        if subrule.validate(sequence[index]) and match[1]:
            if isinstance(match[1], Keyword): # Operator inserted where Keyword matches.
                shift_badness = 1
                term = Operator(match[1], sequence[index])
            else:
                shift_badness = 10
                term = sequence[index]
            q.put((
                badness + shift_badness,
                start,
                index + 1,
                rule,
                matches + [term]))
        # Even if rule matched to a symbol or construct, it may match other ways too
        if isinstance(subrule, ListRule):
            subrules = [(10, subrule)]
        elif isinstance(subrule, Context): # Larger constructs with many indirections 
                                           # are treated as worse results.
            subrules = [(100, d_rule) for d_rule in subrule.rules if valid_compound(d_rule)]
            for pre, ind_rule in subrule.indirect_rules:
                if valid_compound(ind_rule):
                    subrules.append((100 + len(pre)*10, ind_rule))

        # If there are rules that can reduce, we shift with them.
        # Otherwise we add a blank shift to parse the rule and initiate fini to fill up.
        for b_badness, subrule in subrules:
            if fini.has_key((index, subrule)):
                for stop, result in fini[(index, subrule)]:
                    q.put((
                        b_badness + badness + result.badness,
                        start,
                        stop,
                        rule,
                        matches + [result]))
            elif isinstance(rule, ListRule):
                q.put((0, index, index, subrule, []))
                # Avoid recursion
                fini[(start, rule)] = []
            # Even if fini contained items, at this point we're not sure if
            # fini still fills up, so we need to add a wait every time.
            wait[(index, subrule)].append((b_badness+badness, start, rule, matches))
Example #48
0
File: aws.py Project: santtu/freezr
class AWS(object):
    """Abstraction of an AWS state. Typically this is fed `INSTANCES`
    on startup (via `Mock`) and this maintains information on state
    changes on those instances later, for example, terminated
    instances are marked terminated etc."""

    STABLE_INSTANCE_STATES = ('running', 'stopped', 'terminated')

    def __init__(self, instances=[]):
        """Initialize AWS state mockup from given list of
        `instances`. Each `instance` record in `instances` is a
        dictionary. See global `INSTANCES` for example on how to
        configure."""

        self.log = logging.getLogger('freezr.systemtests.aws.AWS')
        self.instances = {}
        self.count = 0
        self.ops = PriorityQueue()
        for instance in deepcopy(instances):
            self.add_instance(instance)

    def add_instance(self, data):
        """Adds a single instance data. This will set meaningful
        defaults on any missing fields (including instance id, which
        is autogenerated if missing). If the initial instance state is
        in a transitioning state it'll be scheduled for later update
        automatically."""

        self.count += 1

        instance = {
            'id': 'i-%06d' % (self.count,),
            'region': DEFAULT_REGION,
            'root_device_type': DEFAULT_ROOT_DEVICE_TYPE,
            'instance_type': DEFAULT_INSTANCE_TYPE,
            'state': DEFAULT_STATE,
            'vpc_id': DEFAULT_VPC_ID,
            'tags': {},
            }

        instance.update(data)
        self.instances[instance['id']] = instance

        if instance['state'] not in self.STABLE_INSTANCE_STATES:
            self.later(10, self.instance_state_proceed, instance)

        self.log.debug('Added instance: %r', instance)

    def later(self, secs, fn, *args, **kwargs):
        """Schedule an operation a minimum of `secs` later, calling
        `fn` with args `args` and kwargs `kwargs`."""
        when = time() + secs
        op = (when, lambda: fn(*args, **kwargs))
        self.ops.put(op)
        self.log.debug("Added later %.1fs: %r", secs, op)

    def tick(self):
        """'Tick' the AWS state by checking whether there are any
        pending operations (see `later`) that should be run before
        proceeding."""
        self.log.debug("tick (%d ops)", self.ops.qsize())
        while not self.ops.empty():
            when, call = self.ops.get_nowait()

            # not yet?
            if time() < when:
                self.log.debug("Task due in %.1fs, put it back",
                               when - time())
                self.ops.put((when, call))
                return

            self.log.debug("Running task due for %.1fs: %r",
                           when, call)

            call()

    def get_instances(self):
        """Return a list of instances. The returned list elements try
        to mimic the behavior of `boto.ec2.instances.Instance` to the
        extent needed by freezr."""
        self.tick()
        self.log.debug("get_instances: %d instances",
                       len(self.instances))
        return [AttrDict(instance) for instance in self.instances.values()]

    def terminate_instance(self, id):
        self.tick()
        self.log.debug("terminate_instance: %r", id)
        instance = self.instances[id]
        assert instance['state'] == 'running'
        instance['state'] = 'terminating'
        self.later(10, self.instance_state_proceed, instance)

    def stop_instance(self, id):
        self.tick()
        self.log.debug("stop_instance: %r", id)
        instance = self.instances[id]
        assert instance['state'] == 'running'
        instance['state'] = 'stopping'
        self.later(10, self.instance_state_proceed, instance)

    def start_instance(self, id):
        self.tick()
        self.log.debug("start_instance: %r", id)
        instance = self.instances[id]
        assert instance['state'] == 'stopped'
        instance['state'] = 'pending'
        self.later(10, self.instance_state_proceed, instance)

    # operations on instances
    def instance_state_proceed(self, instance):
        """Given an instance that is in a transitioning state, move it
        to the matching stable state (e.g. "pending" -> "running",
        "terminating" -> "terminated", "stopping" -> "stopped")."""

        self.log.debug("instance_state_proceed: instance %s, state %s",
                       instance['id'], instance['state'])

        state = instance['state']

        if state == 'pending':
            state = 'running'
        elif state == 'stopping':
            state = 'stopped'
        elif state == 'terminating':
            state = 'terminated'

        instance['state'] = state
        self.log.debug("instance_state_proceed: final state %s", state)
for t in token_count_dict:
    q.put([-token_count_dict[t], t])

token_dict = {}
# add special token
token_dict[zero_token] = 0
token_dict[unknown_token] = 1
token_dict[start_token] = 2
token_dict[end_token] = 3
token_index = 4

token_count_dict = {}

# priority queue
while not q.empty():
    get = q.get_nowait()
    if token_index == max_dict_size:
        break
    token_dict[get[1]] = token_index
    token_index += 1

# -------------------------build data pair------------------------------
# write to file
with open(file_name) as f:
    line_count = 0
    last_exist = False
    last_list = []
    pair_count = 0
    total_token = 0

    # one way flag
Example #50
0
class AbstractBaseFrontier(object, LoggingMixin):
    """
    A base class for implementing frontiers.

    Basically this class provides the different general methods and
    configuration parameters used for frontiers.
    """

    def __init__(self, settings, log_handler, front_end_queues, prioritizer,
        unique_hash='sha1'):
        """
        Initialize the frontier and instantiate the
        :class:`SQLiteSingleHostUriQueue`.

        The default frontier we will use the `sha1` hash function for the
        unique uri filter. For very large crawls you might want to use a
        larger hash function (`sha512`, e.g.)
        """
        LoggingMixin.__init__(self, log_handler, settings.LOG_LEVEL_MASTER)
        # front end queue
        self._prioritizer = prioritizer
        self._front_end_queues = front_end_queues
        # checkpointing
        self._checkpoint_interval = settings.FRONTIER_CHECKPOINTING
        self._uris_added = 0

        # the heap
        self._heap = PriorityQueue(maxsize=settings.FRONTIER_HEAP_SIZE)
        self._heap_min_size = settings.FRONTIER_HEAP_MIN

        # a list of uris currently being crawled.
        self._current_uris = dict()
        # dns cache
        self._dns_cache = DnsCache(settings)
        # unique uri filter
        self._unique_uri = UniqueUriFilter(unique_hash)
        for url in self._front_end_queues.all_uris():
            assert not self._unique_uri.is_known(url, add_if_unknown=True)

        # the sinks
        self._sinks = []

        # timezone
        self._timezone = settings.LOCAL_TIMEZONE
        self._logger.info("frontier::initialized")

    def add_sink(self, sink):
        """
        Add a sink to the frontier. A sink will be responsible for the long
        term storage of the crawled contents.
        """
        self._sinks.append(sink)

    def add_uri(self, curi):
        """
        Add the specified :class:`CrawlUri` to the frontier.

        `next_date` is a datetime object for the next time the uri should be
        crawled.

        Note: time based crawling is never strict, it is generally used as some
        kind of prioritization.
        """
        if self._unique_uri.is_known(curi.url, add_if_unknown=True):
            # we already know this uri
            self._logger.debug("frontier::Trying to update a known uri... " + \
                    "(%s)" % (curi.url,))
            return

        self._logger.info("frontier::Adding '%s' to the frontier" % curi.url)
        self._front_end_queues.add_uri(self._uri_from_curi(curi))
        self._maybe_checkpoint()

    def update_uri(self, curi):
        """
        Update a given uri.
        """
        self._front_end_queues.update_uri(self._uri_from_curi(curi))
        self._maybe_checkpoint()

    def get_next(self):
        """
        Return the next uri scheduled for crawling.
        """
        if self._heap.qsize() < self._heap_min_size:
            self._update_heap()

        try:
            (_next_date, next_uri) = self._heap.get_nowait()
        except Empty:
            # heap is empty, there is nothing to crawl right now!
            # maybe log this in the future
            raise

        return self._crawluri_from_uri(next_uri)

    def close(self):
        """
        Close the underlying frontend queues.
        """
        self._front_end_queues.checkpoint()
        self._front_end_queues.close()

    def _add_to_heap(self, uri, next_date):
        """
        Add an URI to the heap that is ready to be crawled.
        """
        self._heap.put_nowait((next_date, uri))
        (url, _etag, _mod_date, _next_date, _prio) = uri
        self._current_uris[url] = uri
        self._logger.debug("frontier::Adding '%s' to the heap" % url)

    def _reschedule_uri(self, curi):
        """
        Return the `next_crawl_date` for :class:`CrawlUri`s.
        """
        (prio, delta) = self._prioritizer.calculate_priority(curi)
        now = datetime.now(self._timezone)
        return (prio, time.mktime((now + delta).timetuple()))

    def _ignore_uri(self, curi):
        """
        Ignore a :class:`CrawlUri` from now on.
        """
        self._front_end_queues.ignore_uri(curi.url, curi.status_code)

    def _uri_from_curi(self, curi):
        """
        Create the uri tuple from the :class:`CrawlUri` and calculate the
        priority.

        Overwrite this method in more specific frontiers.
        """
        etag = mod_date = None
        if curi.rep_header:
            if "Etag" in curi.rep_header:
                etag = curi.rep_header["Etag"]
            if "Last-Modified" in curi.rep_header:
                mod_date = time.mktime(deserialize_date_time(
                    curi.rep_header["Last-Modified"]).timetuple())
            if not mod_date and 'Date' in curi.rep_header:
                mod_date = time.mktime(deserialize_date_time(
                    curi.rep_header["Date"]).timetuple())

        if mod_date:
            # only reschedule if it has been crawled before
            (prio, next_crawl_date) = self._reschedule_uri(curi)
        else:
            (prio, next_crawl_date) = (1,
                    time.mktime(datetime.now(self._timezone).timetuple()))

        return (curi.url, etag, mod_date, next_crawl_date, prio)

    def _crawluri_from_uri(self, uri):
        """
        Convert an URI tuple to a :class:`CrawlUri`.

        Replace the hostname with the real IP in order to cache DNS queries.
        """
        (url, etag, mod_date, _next_date, prio) = uri

        parsed_url = urlparse(url)

        # dns resolution and caching
        port = parsed_url.port
        if not port:
            port = PROTOCOLS_DEFAULT_PORT[parsed_url.scheme]

        effective_netloc = self._dns_cache["%s:%s" % (parsed_url.hostname,
            port)]

        curi = CrawlUri(url)
        curi.effective_url = url.replace(parsed_url.netloc, "%s:%s" %
                effective_netloc)
        curi.current_priority = prio
        curi.req_header = dict()
        if etag:
            curi.req_header["Etag"] = etag
        if mod_date:
            mod_date_time = datetime.fromtimestamp(mod_date)
            curi.req_header["Last-Modified"] = serialize_date_time(
                    mod_date_time)

        curi.optional_vars = dict()
        if parsed_url.username and parsed_url.password:
            curi.optional_vars[CURI_SITE_USERNAME] = \
                parsed_url.username.encode()
            curi.optional_vars[CURI_SITE_PASSWORD] = \
                parsed_url.password.encode()

        return curi

    def _update_heap(self):
        """
        Abstract method. Implement this in the actual Frontier.

        The implementation should really only add uris to the heap if they can
        be downloaded right away.
        """
        pass

    def _maybe_checkpoint(self, force_checkpoint=False):
        """
        Periodically checkpoint the state db.
        """
        self._uris_added += 1
        if self._uris_added > self._checkpoint_interval or force_checkpoint:
            self._front_end_queues.checkpoint()
            self._uris_added = 0

    def process_successful_crawl(self, curi):
        """
        Called when an URI has been crawled successfully.

        `curi` is a :class:`CrawlUri`
        """
        self.update_uri(curi)

        if curi.optional_vars and CURI_EXTRACTED_URLS in curi.optional_vars:
            for url in curi.optional_vars[CURI_EXTRACTED_URLS].split("\n"):
                if len(url) > 5 and not self._unique_uri.is_known(url):
                    self.add_uri(CrawlUri(url))

        del self._current_uris[curi.url]

        for sink in self._sinks:
            sink.process_successful_crawl(curi)

    def process_not_found(self, curi):
        """
        Called when an URL was not found.

        This could mean, that the URL has been removed from the server. If so,
        do something about it!

        Override this method in the actual frontier implementation.
        """
        del self._current_uris[curi.url]
        self._ignore_uri(curi)

        for sink in self._sinks:
            sink.process_not_found(curi)

    def process_redirect(self, curi):
        """
        Called when there were too many redirects for an URL, or the site has
        note been updated since the last visit.

        In the latter case, update the internal uri and increase the priority
        level.
        """
        del self._current_uris[curi.url]

        if curi.status_code in [301, 302]:
            # simply ignore the URL. The URL that is being redirected to is
            # extracted and added in the processing
            self._ignore_uri(curi)

        if curi.status_code == 304:
            # the page has not been modified since the last visit! Update it
            # NOTE: prio increasing happens in the prioritizer
            self.update_uri(curi)

        for sink in self._sinks:
            sink.process_redirect(curi)

    def process_server_error(self, curi):
        """
        Called when there was some kind of server error.

        Override this method in the actual frontier implementation.
        """
        del self._current_uris[curi.url]
        self._ignore_uri(curi)

        for sink in self._sinks:
            sink.process_server_error(curi)
Example #51
0
class MCMH(object):
    '''
    A generic searching algorithm that samples from the distribution of the
    scores in accordance with the algorithm's belief in the viability of that
    region.

    Supports asynchronous updating (i.e., it is possible to draw sequential
    samples without updating the object's knowledge about the distribution)

    Note that this will has a min_dist for sampling, such that if the next
    largest sampled thumbnail by frameno is closer than min_dist, it will not
    draw that sample.
    '''
    def __init__(self, elements, search_interval, clip=None):
        '''
        elements: the number of elements to search over.
        search_interval: The number of frames between search frames plus the
                         start frame.
        clip: how much of the bookends of the region to ignore, as a fraction.

        NOTES:
            Search interval is the number of frames between the search frames.
            In this diagram, we have search interval of 4, and search frame j, 
            and search step (not surfaced to mcmh) of 2. 
            ...   j-1   j     j+1   j+2   j+3   j+4   j+5   j+6   ...
                        ^                        ^                  search frames
                        ^            ^                              search step frames
                        |----search interval-----|
            
            * indicate frames that will be processed during the conducting
            of a local search.

        '''
        self.search_interval = search_interval
        self.clip = clip
        self.elements = elements
        self._lock = threading.Lock()
        self._setup()

    def _setup(self):
        '''
        Allocates all the required memory and things.
        '''
        N = self.elements
        c = self.clip
        intr = self.search_interval - 1

        start = int(c * N)
        stop = int(N - (c * N))
        search_frames = np.arange(start, stop, intr + 1).astype(int)

        self._tot = 0.  # sum of scores
        self.n_samples = 0.
        self._n = 0.  # total scored

        self._first = search_frames[0]
        self._last = search_frames[-1]
        # search frame to frame number dictionary
        self._sf2fno = {n: v for n, v in enumerate(search_frames)}
        # frame number to search frame dictionary
        self._fno2sf = {v: k for k, v in self._sf2fno.iteritems()}
        self._scores = []  # list of frames and scores, sorted by frameno.
        self._scored = [False] * len(
            search_frames)  # whether or not frame has been scored
        self._srt_scores = []  # list of scores, sorted by the score
        self._search_queue = PriorityQueue()
        self._sample_queue = range(len(search_frames))
        self.max_samps = len(search_frames)
        self._up_next = None  # for ensuring search intervals are produced.

    @property
    def _mean(self):
        return self._tot / max(self._n, 1.)

    def update(self, frameno, score):
        with self._lock:
            self._update(frameno, score)

    def _update(self, frameno, score):
        '''
        Updates the knowledge of the algorithm. A score of 'None'
        indicates there was a problem with this search frame.
        '''
        sf = self._fno2sf.get(frameno, None)
        if sf is None:
            # That is not a valid search frame.
            _log.warn('Invalid search frame.')
            return
        if self._scored[sf]:
            # you've already sampled this frame.
            _log.debug('Sample has already been scored.')
            return
        insort(self._scores, (sf, score))
        # we have to keep track of which scores were actually
        # updated in case we get a score of 0.
        self._scored[sf] = True
        if frameno < self._last:
            if self._scored[sf + 1]:
                # then you can search it!
                # add it to the search queue
                est = (self._get_score(sf) + self._get_score(sf + 1)) * 0.5
                self._search_queue.put((-est, sf))
        if frameno > self._first:
            if self._scored[sf - 1]:
                # then you can search it!
                # add it to the search queue
                est = (self._get_score(sf - 1) + self._get_score(sf)) * 0.5
                self._search_queue.put((-est, sf - 1))
        insort(self._srt_scores, score)
        self._tot += score
        self._n += 1
        self.n_samples += 1
        _log.debug('Sampling %.1f%% complete',
                   self.n_samples * 100. / self.max_samps)

    def get_search(self):
        '''
        Returns an interval to search.
        '''
        try:
            item = self._search_queue.get_nowait()
        except Empty:
            return
        sf = item[1]
        f1 = self._sf2fno[sf]
        f2 = self._sf2fno[sf + 1]
        s1 = self._get_score(sf)
        s2 = self._get_score(sf + 1)
        return (f1, s1, f2, s2)

    def get_sample(self):
        with self._lock:
            return self._get_sample()

    def _get_sample(self):
        '''
        Returns a frame to search.
        '''
        if self._up_next is not None:
            # then return that to complete a local search interval
            sample = self._up_next
            self._up_next = None
            return self._sf2fno[sample]
        if not len(self._sample_queue):
            _log.debug_n('Sampling complete.')
            return None  # there is nothing left to sample.
        while True:
            sf = int(np.random.choice(self._sample_queue))
            isc = self._interp_score(sf)
            rnk = (1 + float(bisect_left(self._srt_scores, isc))) / (
                1 + len(self._srt_scores))
            if np.random.rand() < rnk:
                # then take the sample
                break
        self._sample_queue.remove(sf)
        if (sf + 1) in self._sample_queue:
            self._up_next = sf + 1
            self._sample_queue.remove(sf + 1)
        return self._sf2fno[sf]

    def _find_lt(self, sf):
        'Find the closest earlier frameno to sf'
        i = bisect_left(self._scores, (sf, 0.))
        if i:
            return self._scores[i - 1]
        return (-1, self._mean)

    def _find_gt(self, sf):
        'Find closest later frameno to sf'
        i = bisect_right(self._scores, (sf, 0))
        if i != len(self._scores):
            return self._scores[i]
        return (len(self._scored), self._mean)

    def _get_score(self, sf):
        'Locate the leftmost value exactly equal to x'
        i = bisect_left(self._scores, (sf, -np.inf))
        if i != len(self._scores) and self._scores[i][0] == sf:
            sf, score = self._scores[i]
            return score
        _log.exception('Could not locate score for search frame %i' % sf)
        raise ValueError('Could not locate the score for %i' % sf)

    def _interp_score(self, sf):
        '''
        Returns the interpolated score for a search frame.
        '''
        x1, y1 = self._find_lt(sf)
        x2, y2 = self._find_gt(sf)
        x3 = sf
        m = float(y2 - y1) / float(x2 - x1)
        return m * (x3 - x1) + y1
Example #52
0
from Queue import PriorityQueue

A = []

next_a = 3
next_a_s = next_a * next_a
pq = PriorityQueue()
pq.put_nowait((2, (2, 1)))
while pq.not_empty:
    ans, (a, n) = pq.get_nowait()
    # print 'checking: {}^{} = {}'.format(a, n, ans)
    if ans > next_a:
        pq.put_nowait((ans, (a, n)))
        pq.put_nowait((next_a_s, (next_a, 2)))
        next_a += 1
        next_a_s = next_a * next_a
    else:
        if len(str(ans)) > 1 and a == sum(map(int, str(ans))):
            A.append(ans)
            print '{}^{} = {}'.format(a, n, ans)
        pq.put_nowait((ans * a, (a, n + 1)))
    print MAP_NFILES_DIR[max_nfiles]
    nfiles = 0
    for k in MAP_NFILES_DIR:
        nfiles += k * len(MAP_NFILES_DIR[k])
    print 'we found',nfiles,'files in total'
    print 'average number of files per leaf:',nfiles * 1. / n_leaves

    # tmp files
    ntmpfiles = len( get_all_files(maindir,ext='.h5_tmp') )
    print 'we found',ntmpfiles,'temp files'
    if ntmpfiles > 0: print 'WATCHOUT FOR TMP FILES!!!!'

    # find modif date for all files, and pop out the most recent ones
    get_all_files_modif_date(maindir)
    print '******************************************************'
    if not trim and not trimdryrun:
        print 'most recent files are:'
        for k in range(5):
            t,f = MODIFQUEUE.get_nowait()
            print f,'(',time.ctime(-t),')'
    elif trim or trimdryrun:
        ntoomany = nfiles - 1000000
        print 'we have',ntoomany,'too many files.'
        for k in range(ntoomany):
            t,f = MODIFQUEUE.get_nowait()
            print f,'(',time.ctime(-t),')'
            if trim:
                os.remove(f)
    # done
    print '******************************************************'
Example #54
0
    print MAP_NFILES_DIR[max_nfiles]
    nfiles = 0
    for k in MAP_NFILES_DIR:
        nfiles += k * len(MAP_NFILES_DIR[k])
    print 'we found', nfiles, 'files in total'
    print 'average number of files per leaf:', nfiles * 1. / n_leaves

    # tmp files
    ntmpfiles = len(get_all_files(maindir, ext='.h5_tmp'))
    print 'we found', ntmpfiles, 'temp files'
    if ntmpfiles > 0: print 'WATCHOUT FOR TMP FILES!!!!'

    # find modif date for all files, and pop out the most recent ones
    get_all_files_modif_date(maindir)
    print '******************************************************'
    if not trim and not trimdryrun:
        print 'most recent files are:'
        for k in range(5):
            t, f = MODIFQUEUE.get_nowait()
            print f, '(', time.ctime(-t), ')'
    elif trim or trimdryrun:
        ntoomany = nfiles - 1000000
        print 'we have', ntoomany, 'too many files.'
        for k in range(ntoomany):
            t, f = MODIFQUEUE.get_nowait()
            print f, '(', time.ctime(-t), ')'
            if trim:
                os.remove(f)
    # done
    print '******************************************************'
Example #55
0
import Queue
from Queue import PriorityQueue


queue = PriorityQueue(maxsize = 100)
queue.put((1, 1, "item 1"))
queue.put((1, 1, "item 2"))
queue.put((1, 1, "item 3"))
queue.put((1, 1, "item 3"))

print queue.get()
print queue.get()
print queue.get()
try:
    2 /0
    print queue.get_nowait()
except Queue.Empty:
    print "empty"
except ZeroDivisionError:
    print 'zero'
except:
    print 'other'
Example #56
0
class AbstractBaseFrontier(object, LoggingMixin):
    """
    A base class for implementing frontiers.

    Basically this class provides the different general methods and
    configuration parameters used for frontiers.
    """
    def __init__(self,
                 settings,
                 log_handler,
                 front_end_queues,
                 prioritizer,
                 unique_hash='sha1'):
        """
        Initialize the frontier and instantiate the
        :class:`SQLiteSingleHostUriQueue`.

        The default frontier we will use the `sha1` hash function for the
        unique uri filter. For very large crawls you might want to use a
        larger hash function (`sha512`, e.g.)
        """
        LoggingMixin.__init__(self, log_handler, settings.LOG_LEVEL_MASTER)
        # front end queue
        self._prioritizer = prioritizer
        self._front_end_queues = front_end_queues
        # checkpointing
        self._checkpoint_interval = settings.FRONTIER_CHECKPOINTING
        self._uris_added = 0

        # the heap
        self._heap = PriorityQueue(maxsize=settings.FRONTIER_HEAP_SIZE)
        self._heap_min_size = settings.FRONTIER_HEAP_MIN

        # a list of uris currently being crawled.
        self._current_uris = dict()
        # dns cache
        self._dns_cache = DnsCache(settings)
        # unique uri filter
        self._unique_uri = UniqueUriFilter(unique_hash)
        for url in self._front_end_queues.all_uris():
            assert not self._unique_uri.is_known(url, add_if_unknown=True)

        # the sinks
        self._sinks = []

        # timezone
        self._timezone = settings.LOCAL_TIMEZONE
        self._logger.info("frontier::initialized")

    def add_sink(self, sink):
        """
        Add a sink to the frontier. A sink will be responsible for the long
        term storage of the crawled contents.
        """
        self._sinks.append(sink)

    def add_uri(self, curi):
        """
        Add the specified :class:`CrawlUri` to the frontier.

        `next_date` is a datetime object for the next time the uri should be
        crawled.

        Note: time based crawling is never strict, it is generally used as some
        kind of prioritization.
        """
        if self._unique_uri.is_known(curi.url, add_if_unknown=True):
            # we already know this uri
            self._logger.debug("frontier::Trying to update a known uri... " + \
                    "(%s)" % (curi.url,))
            return

        self._logger.info("frontier::Adding '%s' to the frontier" % curi.url)
        self._front_end_queues.add_uri(self._uri_from_curi(curi))
        self._maybe_checkpoint()

    def update_uri(self, curi):
        """
        Update a given uri.
        """
        self._front_end_queues.update_uri(self._uri_from_curi(curi))
        self._maybe_checkpoint()

    def get_next(self):
        """
        Return the next uri scheduled for crawling.
        """
        if self._heap.qsize() < self._heap_min_size:
            self._update_heap()

        try:
            (_next_date, next_uri) = self._heap.get_nowait()
        except Empty:
            # heap is empty, there is nothing to crawl right now!
            # maybe log this in the future
            raise

        return self._crawluri_from_uri(next_uri)

    def close(self):
        """
        Close the underlying frontend queues.
        """
        self._front_end_queues.checkpoint()
        self._front_end_queues.close()

    def _crawl_now(self, uri):
        """
        Convinience method for crawling an uri right away.
        """
        self._add_to_heap(uri, 3000)

    def _add_to_heap(self, uri, next_date):
        """
        Add an URI to the heap that is ready to be crawled.
        """
        self._heap.put_nowait((next_date, uri))
        (url, _etag, _mod_date, _next_date, _prio) = uri
        self._current_uris[url] = uri
        self._logger.debug("frontier::Adding '%s' to the heap" % url)

    def _reschedule_uri(self, curi):
        """
        Return the `next_crawl_date` for :class:`CrawlUri`s.
        """
        (prio, delta) = self._prioritizer.calculate_priority(curi)
        now = datetime.now(self._timezone)
        return (prio, time.mktime((now + delta).timetuple()))

    def _ignore_uri(self, curi):
        """
        Ignore a :class:`CrawlUri` from now on.
        """
        self._front_end_queues.ignore_uri(curi.url, curi.status_code)

    def _uri_from_curi(self, curi):
        """
        Create the uri tuple from the :class:`CrawlUri` and calculate the
        priority.

        Overwrite this method in more specific frontiers.
        """
        etag = mod_date = None
        if curi.rep_header:
            if "Etag" in curi.rep_header:
                etag = curi.rep_header["Etag"]
            if "Last-Modified" in curi.rep_header:
                mod_date = time.mktime(
                    deserialize_date_time(
                        curi.rep_header["Last-Modified"]).timetuple())
            if not mod_date and 'Date' in curi.rep_header:
                mod_date = time.mktime(
                    deserialize_date_time(curi.rep_header["Date"]).timetuple())

        if mod_date:
            # only reschedule if it has been crawled before
            (prio, next_crawl_date) = self._reschedule_uri(curi)
        else:
            (prio,
             next_crawl_date) = (1,
                                 time.mktime(
                                     datetime.now(self._timezone).timetuple()))

        return (curi.url, etag, mod_date, next_crawl_date, prio)

    def _crawluri_from_uri(self, uri):
        """
        Convert an URI tuple to a :class:`CrawlUri`.

        Replace the hostname with the real IP in order to cache DNS queries.
        """
        (url, etag, mod_date, _next_date, prio) = uri

        parsed_url = urlparse(url)

        # dns resolution and caching
        port = parsed_url.port
        if not port:
            port = PROTOCOLS_DEFAULT_PORT[parsed_url.scheme]

        effective_netloc = self._dns_cache["%s:%s" %
                                           (parsed_url.hostname, port)]

        curi = CrawlUri(url)
        curi.effective_url = url.replace(parsed_url.netloc,
                                         "%s:%s" % effective_netloc)
        curi.current_priority = prio
        curi.req_header = dict()
        if etag:
            curi.req_header["Etag"] = etag
        if mod_date:
            mod_date_time = datetime.fromtimestamp(mod_date)
            curi.req_header["Last-Modified"] = serialize_date_time(
                mod_date_time)

        curi.optional_vars = dict()
        if parsed_url.username and parsed_url.password:
            curi.optional_vars[CURI_SITE_USERNAME] = \
                parsed_url.username.encode()
            curi.optional_vars[CURI_SITE_PASSWORD] = \
                parsed_url.password.encode()

        return curi

    def _update_heap(self):
        """
        Abstract method. Implement this in the actual Frontier.

        The implementation should really only add uris to the heap if they can
        be downloaded right away.
        """
        pass

    def _maybe_checkpoint(self, force_checkpoint=False):
        """
        Periodically checkpoint the state db.
        """
        self._uris_added += 1
        if self._uris_added > self._checkpoint_interval or force_checkpoint:
            self._front_end_queues.checkpoint()
            self._uris_added = 0

    def process_successful_crawl(self, curi):
        """
        Called when an URI has been crawled successfully.

        `curi` is a :class:`CrawlUri`
        """
        self.update_uri(curi)

        if curi.optional_vars and CURI_EXTRACTED_URLS in curi.optional_vars:
            for url in curi.optional_vars[CURI_EXTRACTED_URLS].split("\n"):
                if len(url) > 5 and not self._unique_uri.is_known(url):
                    self.add_uri(CrawlUri(url))

        del self._current_uris[curi.url]

        for sink in self._sinks:
            sink.process_successful_crawl(curi)

    def process_not_found(self, curi):
        """
        Called when an URL was not found.

        This could mean, that the URL has been removed from the server. If so,
        do something about it!

        Override this method in the actual frontier implementation.
        """
        del self._current_uris[curi.url]
        self._ignore_uri(curi)

        for sink in self._sinks:
            sink.process_not_found(curi)

    def process_redirect(self, curi):
        """
        Called when there were too many redirects for an URL, or the site has
        note been updated since the last visit.

        In the latter case, update the internal uri and increase the priority
        level.
        """
        del self._current_uris[curi.url]

        if curi.status_code in [301, 302]:
            # simply ignore the URL. The URL that is being redirected to is
            # extracted and added in the processing
            self._ignore_uri(curi)

        if curi.status_code == 304:
            # the page has not been modified since the last visit! Update it
            # NOTE: prio increasing happens in the prioritizer
            self.update_uri(curi)

        for sink in self._sinks:
            sink.process_redirect(curi)

    def process_server_error(self, curi):
        """
        Called when there was some kind of server error.

        Override this method in the actual frontier implementation.
        """
        del self._current_uris[curi.url]
        self._ignore_uri(curi)

        for sink in self._sinks:
            sink.process_server_error(curi)
class AsyncoreReactor(object):
    _thread = None
    _is_live = False
    logger = logging.getLogger("Reactor")

    def __init__(self):
        self._timers = PriorityQueue()
        self._map = {}

    def start(self):
        self._is_live = True
        self._thread = threading.Thread(target=self._loop, name="hazelcast-reactor")
        self._thread.daemon = True
        self._thread.start()

    def _loop(self):
        self.logger.debug("Starting Reactor Thread")
        Future._threading_locals.is_reactor_thread = True
        while self._is_live:
            try:
                asyncore.loop(count=10000, timeout=0.1, map=self._map)
                self._check_timers()
            except select.error as err:
                # TODO: parse error type to catch only error "9"
                pass
            except:
                self.logger.exception("Error in Reactor Thread")
                # TODO: shutdown client
                return
        self.logger.debug("Reactor Thread exited.")

    def _check_timers(self):
        now = time.time()
        while not self._timers.empty():
            try:
                _, timer = self._timers.queue[0]
            except IndexError:
                return

            if timer.check_timer(now):
                self._timers.get_nowait()
            else:
                return

    def add_timer_absolute(self, timeout, callback):
        timer = Timer(timeout, callback, self._cleanup_timer)
        self._timers.put_nowait((timer.end, timer))
        return timer

    def add_timer(self, delay, callback):
        return self.add_timer_absolute(delay + time.time(), callback)

    def shutdown(self):
        for connection in self._map.values():
            try:
                connection.close(HazelcastError("Client is shutting down"))
            except OSError, connection:
                if connection.args[0] == socket.EBADF:
                    pass
                else:
                    raise
        self._map.clear()
        self._is_live = False
        self._thread.join()