コード例 #1
0
def parallel_map(func, items, max_workers=1, single_thread_fallback=True):
    """
    Our own implementation for parallel processing
    which handles gracefully CTRL+C and reverts to 
    single thread processing in case of errors
    :param items list of objects
    :param func function to execute on each object
    """
    global error
    error = None

    def process_one(q):
        func(q)

    def worker():
        global error

        while True:
            (num, q) = pq.get()
            if q is None or error is not None:
                pq.task_done()
                break

            try:
                process_one(q)
            except Exception as e:
                error = e
            finally:
                pq.task_done()

    if max_workers > 1:
        use_single_thread = False
        pq = queue.PriorityQueue()
        threads = []
        for i in range(max_workers):
            t = threading.Thread(target=worker)
            t.start()
            threads.append(t)

        i = 1
        for t in items:
            pq.put((i, t.copy()))
            i += 1

        def stop_workers():
            for i in range(len(threads)):
                pq.put((-1, None))
            for t in threads:
                t.join()

        # block until all tasks are done
        try:
            while pq.unfinished_tasks > 0:
                time.sleep(0.5)
        except KeyboardInterrupt:
            print("CTRL+C terminating...")
            stop_workers()
            sys.exit(1)

        stop_workers()

        if error is not None and single_thread_fallback:
            # Try to reprocess using a single thread
            # in case this was a memory error
            log.ODM_WARNING("Failed to run process in parallel, retrying with a single thread...")
            use_single_thread = True
    else:
        use_single_thread = True

    if use_single_thread:
        # Boring, single thread processing
        for q in items:
            process_one(q)
コード例 #2
0
ファイル: search.py プロジェクト: AchintyaChavan/University
def AStar_Search(edges, start, goal):

    frontier = Q.PriorityQueue()
    currentCost = {}
    path = {}

    initial = [m[1] for m in edges.keys() if m[0] == goal]

    for next in initial:

        cfg = (goal, next)

        #         print(next.getASVPositions())

        currentCost[cfg] = 0
        path[cfg] = None
        frontier.put((0, cfg, edges[cfg]))

#     j = 0

    while not frontier.empty():

        #         print(j)
        #         j = j + 1

        element = frontier.get()
        cfg = element[1]  #cfg[0] is current, cfg[1] is destination config
        node = element[2]

        if node != None:

            #             print(cfg[0].getASVPositions())
            #             print(cfg[1].getASVPositions())

            if cfg[0].totalDistance(start) <= (0.5 * 2.) \
               or cfg[1].totalDistance(start) <= (0.5 * 2.):

                print("Solution reached")
                c = cfg
                route = []
                route.extend(edges[cfg])

                cost = sum(currentCost.values())

                while path[c] != None:

                    route.insert(0, edges[c])
                    c = path[c]
                    i += 1

                return cost, route

            adjacent = [
                n for n in edges.keys() if (n[0] == cfg[1] and n[1] != cfg[0])
            ]

            for next in adjacent:

                #                 if (next[1], next[0]) not in currentCost:

                #                 print(next[0].getASVPositions())
                #                 print(next[1].getASVPositions())

                estimatedCost = cfg[1].totalDistance(
                    next[1]) + currentCost[cfg]

                if (next not in currentCost \
                    and (next[1], next[0])) not in currentCost \
                    or estimatedCost < currentCost[next]:

                    #                     print('hello')
                    currentCost[next] = estimatedCost
                    priority = estimatedCost + heuristic(goal, next[0])
                    frontier.put((priority, next, edges[next]))
                    path[next] = cfg

#     print("nothing")
    return None, None
コード例 #3
0
 def __init__(self, title, occupancyGrid):
     CellBasedForwardSearch.__init__(self, title, occupancyGrid)
     self.greedyQueue = Queue.PriorityQueue()
コード例 #4
0
            if u not in g.nodes():
                g.add_node(u)
            temp[u] = 1
        score.append(temp)
        Graphs.append(g)
        for node in g.nodes():
            flags[i][node] = 0
            if g.degree(node) > DN[i]:
                DN[i] = g.degree(node)
    return Graphs
IL = []
H_measure = []
max_h_measure = []
K_Shell_measure = []
I_U_value = []
que = Q.PriorityQueue()
ques = []
seed = set()
seeds = []


def reset():
    global IL
    global H_measure
    global max_h_measure
    global K_Shell_measure
    global I_U_value
    global seeds
    global flag
    for i in range(m):
        flags.append(dict())
コード例 #5
0
            self.completed = True
            env.updateState(self.status, source, destination)
        return self.completed


#load road network, starting time, and paths for vehicles
road = np.array(pk.load(open("road", "r")))
time = np.array(pk.load(open("time", "r")))
paths = np.array(pk.load(open("vehicle", "r")))
#store the total number of cars
n = len(time)

#create a city and place vehicles in the city with the given information in a priority queue
#the queue takes next instant the vehicle changes the road as priority
city = Environment(road)
vehicle = Q.PriorityQueue()
for i in range(n):
    vehicle.put(Agent(time[i], paths[i], i))

#create an empty output matrix (to be filled as vehicles complete their journey)
output = np.zeros((n, 5))

#while the vehicles who have not completed the journey exist
while not vehicle.empty():
    #get the vehicle with next time instance
    min = vehicle.get()
    #take action and check whether it has completed it's journey
    min_completed = min.takeAction(city)
    #if not, put it back in the queue
    if (not min_completed):
        vehicle.put(min)
コード例 #6
0
def document_selection_task(topicId, topic_initial_info, per_topic_train_index_list, al_protocol, batch_size, collection_size='qrels'):

    per_topic_X, per_topic_y, _, _, per_topic_seed_one_counter, per_topic_seed_zero_counter = topic_initial_info[topicId]

    # this train_index_list will be used to calculate the acuracy of the classifier
    train_index_list = copy.deepcopy(per_topic_train_index_list)
    test_index_list = []
    # it means everything in the train list and we do not need to predict
    # so we do not need any training of the model
    # so return here
    if len(per_topic_train_index_list) == len(per_topic_y):
        return (per_topic_train_index_list, 1.0, 1.0, 1.0, 0, True) # 0 means no new document selected, as f1, precision, recall all reached 1.0, True means this topic is completed
    # print isPredictable.count(1)

    total_documents = len(per_topic_y)
    train_size_controller = len(per_topic_train_index_list)
    size_limit = train_size_controller + batch_size
    number_of_document_selected = batch_size

    # boundary checking
    if size_limit > len(per_topic_y):
        size_limit = len(per_topic_y)
        number_of_document_selected = len(per_topic_y) - len(per_topic_train_index_list)

    per_topic_initial_X_test = []
    per_topic_test_index_dictionary = {}
    test_index_counter = 0

    for train_index in xrange(0, total_documents):
        if train_index not in per_topic_train_index_list:
            per_topic_initial_X_test.append(per_topic_X[train_index])
            per_topic_test_index_dictionary[test_index_counter] = train_index
            test_index_counter = test_index_counter + 1

    predictableSize = len(per_topic_initial_X_test)
    isPredictable = [1] * predictableSize  # initially we will predict all


    # here modeling is utilizing the document selected in previous
    # iteration for training
    # when loopCounter == 0
    # model is utilizing all the seed document collected at the begining
    model = None
    if collection_size == 'qrels':
        model = LogisticRegression(solver=small_data_solver, C=small_data_C_parameter)

    model.fit(per_topic_X[per_topic_train_index_list], per_topic_y[per_topic_train_index_list])

    queueSize = isPredictable.count(1)
    queue = Queue.PriorityQueue(queueSize)

    # these are used for SPL
    randomArray = []

    for counter in xrange(0, predictableSize):
        if isPredictable[counter] == 1:
            # model.predict returns a list of values in so we need index [0] as we
            # have only one element in the list
            y_prob = model.predict_proba(per_topic_initial_X_test[counter])[0]
            val = 0
            if al_protocol == 'CAL':
                val = y_prob[1]
                queue.put(relevance(val, counter))
            elif al_protocol == 'SAL':
                val = calculate_entropy(y_prob[0], y_prob[1])
                queue.put(relevance(val, counter))
            elif al_protocol == 'SPL':
                randomArray.append(counter)



    if al_protocol == 'SPL':
        random.shuffle(randomArray)
        batch_counter = 0
        #for batch_counter in xrange(0, batch_size):
        #    if batch_counter > len(randomArray) - 1:
        #        break
        while True:
            if train_size_controller == size_limit:
                break

            itemIndex = randomArray[batch_counter]
            batch_counter = batch_counter + 1
            isPredictable[itemIndex] = 0
            per_topic_train_index_list.append(per_topic_test_index_dictionary[itemIndex])
            # test_index_list will be used for calculating the accuracy of the classiifer
            test_index_list.append(per_topic_test_index_dictionary[itemIndex])
            train_size_controller = train_size_controller + 1


    else:
        while not queue.empty():
            if train_size_controller == size_limit:
                break
            item = queue.get()
            isPredictable[item.index] = 0  # not predictable

            per_topic_train_index_list.append(per_topic_test_index_dictionary[item.index])
            # test_index_list will be used for calculating the accuracy of the classiifer
            test_index_list.append(per_topic_test_index_dictionary[item.index])
            train_size_controller = train_size_controller + 1

    f1score, precision, recall, _ = calculate_accuracy_classifier(per_topic_X, per_topic_y, train_index_list, test_index_list,collection_size)

    return (per_topic_train_index_list, f1score, precision, recall, number_of_document_selected, False)
コード例 #7
0
N = raw_input('How many processes you have : ')
print N
ls = []
i = 1
while (True):
    if (int(i) > int(N)):
        break
    n = raw_input('Enter Name of process %d: ' % i)
    at = raw_input('Enter Arrival Time of process %d: ' % i)
    d = raw_input('Enter Duration of process %d: ' % i)
    obj = ProcessA(n, at, d)
    ls.append(obj)
    i += 1
print "Gantt Chart"
queueA = Q.PriorityQueue()
for x in ls:
    queueA.put(x)
var = queueA.get()
ob = ProcessR(var.name, var.arrivalTime, var.duration)
queueR = Q.PriorityQueue()
queueR.put(ob)
j = 0
while (not queueA.empty()):
    var = queueA.get()
    if (int(var.arrivalTime) == j):
        ob = ProcessR(var.name, var.arrivalTime, var.duration)
        queueR.put(ob)
    else:
        queueA.put(var)
        break
コード例 #8
0
'''
下例 为我们展示了 PriorityQueue 的使用.
如果参数为元组, 元组的第一个成员表示优先级(数值越小优先级越高).
如果参数为数字或字符串, 则按照数字字母的ASCII码排序, 值越小优先级越高.
'''

import Queue
import bisect

Empty = Queue.Empty

#
# try it
queue = Queue.PriorityQueue(0)

# add items out of order
queue.put((20, "second"))
queue.put((10, "first"))
queue.put((30, "third"))

# print queue contents
try:
    while 1:
        print queue.get_nowait()
except Empty:
    pass
コード例 #9
0
scores = []
Graphs = []
status = []
for i in range(m):
    scores.append([])
    graph = nx.Graph()
    for u in nodes:
        for v in nodes:
            if u == v:
                continue
            if random.randint(1, 1000) < 996:
                continue
            graph.add_edge(u, v, weight=round(random.uniform(0, 1), 2))
    Graphs.append(graph)
pq = []
PQ = Q.PriorityQueue()
infl = []
ttl_infl = []
for i in range(m):
    t2 = []
    t3 = []
    for u in range(max(nodes) + 1):
        t3.append(0)
        t1 = []
        for v in range(max(nodes) + 1):
            t1.append(0)
        t2.append(t1)
    infl.append(t2)
    ttl_infl.append(t3)
seed = set()
seeds = []
コード例 #10
0
def solve(road_segments, idsDepth=sys.maxsize):
    closed_list = []
    if initial_state == goal_state:
        return initial_state

    if routing_algo == 'bfs':
        fringe = q.Queue()
    elif routing_algo == 'dfs' or routing_algo == 'ids':
        fringe = q.LifoQueue()
    else:
        fringe = q.PriorityQueue()

    heurSegment, heurDistance, heurTime, heurCost = getHeuristic(
        initial_state, goal_state, cost_function)

    # dic of all the elements in fringe and explored elements
    previoslyExists = {}

    # heurCost is to return the heuristic as distance/time when inital and goal state are same.
    fringe.put(State(initial_state, 0, 0, 0, heurCost, None, cost_function))
    previoslyExists[(initial_state)] = 1

    while not fringe.empty():
        best = fringe.get()

        closed_list.append(best.cityName)
        if best.cityName == goal_state:
            return best

        if routing_algo == 'ids' and best.segment > idsDepth:
            continue

        if (best.cityName in adjacency_list.keys()):
            successor_list = adjacency_list[best.cityName]
            for i in successor_list:
                if i not in closed_list:
                    from_city = best.cityName
                    to_city = i
                    city_Pair = (from_city, to_city)
                    costToCurrent = best.cost
                    segmentSucc, distanceSucc, timeSucc, costSucc = getCostToSuccessor(
                        from_city, to_city, cost_function, city_Pair)
                    segmentHeur, distanceHeur, timeHeur, costHeur = getHeuristic(
                        to_city, goal_state, cost_function)

                    # costSucc is to generalize. to avoid any loop. This will automatically use this value dependent on cost function
                    totalCostToSuccessor = costToCurrent + costSucc

                    currentHash = (to_city)
                    flag = False
                    if currentHash in previoslyExists:
                        for succ in list(fringe.queue):
                            if to_city in succ.cityName:
                                if succ.cost > totalCostToSuccessor:
                                    fringe.queue.remove(succ)
                                else:
                                    flag = True
                    if not flag:
                        fringe.put(
                            State(to_city, best.segment + segmentSucc,
                                  best.distance + distanceSucc,
                                  best.time + timeSucc, costHeur, best,
                                  cost_function))
                        previoslyExists[currentHash] = 1

    return False
コード例 #11
0
 def __init__(self, start, goal):
     self.path=[]
     self.visited_queue=[]
     self.priority_queue=Q.PriorityQueue()
     self.start=start
     self.goal=goal
コード例 #12
0
ファイル: conftest.py プロジェクト: jcass77/mopidy-pandora
def rq():
    return Queue.PriorityQueue()
コード例 #13
0
def astar(maze):
    # TODO: Write your code here
    cur_pos = maze.getStart()
    dimensions = maze.getDimensions()
    visited = [[False for x in range(dimensions[1])]
               for y in range(dimensions[0])]
    pellet_arr = [[0 for x in range(dimensions[1])]
                  for y in range(dimensions[0])]
    came_from = [[() for x in range(dimensions[1])]
                 for y in range(dimensions[0])]
    num_states_explored = 0
    frontier = queue.PriorityQueue()
    distance = heuristic(maze, cur_pos)
    frontier.put([distance, cur_pos])
    came_from[cur_pos[0]][cur_pos[1]] = None
    cost_so_far = {}
    cost_so_far[cur_pos] = 0
    prev = cur_pos
    prev_goal = cur_pos
    optimal = []
    prim_list = maze.getObjectives()
    prim_list.insert(0, cur_pos)
    obj_list = maze.getObjectives()
    pellets = 0
    cur_goal = closest_goal(cur_pos, obj_list)
    mst_sum = prim(cur_pos, prim_list)

    while not frontier.empty():
        cur_pos = frontier.get()
        location = cur_pos[1]  #set location to the coordinates

        neighbors = maze.getNeighbors(location[0], location[1])

        if (location in obj_list):
            obj_list.remove(
                location
            )  #if location is an objective then we move it from the objective list
            frontier.queue.clear(
            )  #clear the priority queue since we will move on to another objective
            if (obj_list):
                cur_goal = closest_goal(
                    location, obj_list
                )  #if obj_list is not empty, we set the current goal to the closest goal
                del prim_list[0]  #remove the first item from prim list
                prim_list.remove(location)  #remove the objective we found
                prim_list.insert(
                    0, location)  #add the objective we found to the start
                mst_sum = prim(
                    location,
                    prim_list)  #calculate the sum of minimum spanning tree

            prev = location
            pellets += 1  #increment pellets if we reached a goal
            while prev != prev_goal:
                prev = came_from[prev[0]][prev[
                    1]]  #while prev is not equal to the previous goal, then we set prev to what it came from
                optimal.append(
                    (prev[0], prev[1]))  #append prev to the optimal path
            prev_goal = location

            if (pellets == len(maze.getObjectives())):
                optimal.append(
                    (location[0], location[1])
                )  #if pellets equals the # of objectives, we append the location to the optimal path
                return (optimal, num_states_explored)

        if (visited[location[0]][location[1]] == False):
            visited[location[0]][location[
                1]] = True  #if we have not visited if before, set it to visited
            num_states_explored += 1

        for i in neighbors:
            new_cost = cost_so_far[
                location] + 1  #add 1 since each move is 1 unit cost
            if i not in cost_so_far or new_cost < cost_so_far[
                    i]:  #if it's not an old location or if the new cost is lower than the cost so far
                cost_so_far[i] = new_cost
                distance = new_cost + manhattan(i, cur_goal) + mst_sum
                frontier.put([
                    distance, i
                ])  #put the neighbor and its distance into the priority queue
                came_from[i[0]][i[1]] = location

            if pellet_arr[i[0]][i[
                    1]] < pellets:  # used to check the current state and the state at the i location
                pellet_arr[i[0]][i[1]] = pellets
                cost_so_far[i] = new_cost
                distance = new_cost + manhattan(i, cur_goal) + mst_sum
                frontier.put([distance, i])
                came_from[i[0]][i[1]] = location

    return [], 0
コード例 #14
0
 def __init__(self, start=State(), end=State(), graph=defaultdict(list)):
     super(GreedySearch, self).__init__(start, end, graph)
     self.heap = Q.PriorityQueue()
コード例 #15
0
    #		if any(n[0] == i or n[1] == i for n in featureIndex):
    #			continue
    #		for j in range(i+1, len(ellipses)):
    #			if any(n[0] == j or n[1] == j for n in featureIndex):
    #				continue
    #			distance, isCentroid = target(ellipses[i],ellipses[j], threshD)
    #			if isCentroid:
    #				flags[k]=1
    #				featureIndex[k] = (i,j)
    #				threshD = distance
    #		if flags[k] == 1:
    #			k += 1
    #		if k > 3:
    #			break

    featureQueue = Queue.PriorityQueue()

    for i in range(0,
                   len(ellipses)):  # for each blob find the best matching pair
        threshD = THRESHD
        for j in range(i + 1, len(ellipses)):
            distance, isCentroid = target(ellipses[i], ellipses[j], threshD)
            if isCentroid:
                featureQueue.put((distance, (i, j)))
                threshD = distance

    #if featureQueue.qsize() > 4:
    #	THRESHD = (THRESHD - 0.25) if THRESHD > 5.0 else 5.0
    #elif featureQueue.qsize() < 4:
    #	THRESHD = (THRESHD + 0.25) if THRESHD < 25.0 else 25.0
コード例 #16
0
 def fib(n):
     q = Queue.PriorityQueue()
     q.get()
コード例 #17
0
ファイル: proxy_handler.py プロジェクト: honcho-cheng/XX-Net
    def fetch(self):
        response_status = self.response.status
        response_headers = dict(
            (k.title(), v) for k, v in self.response.getheaders())
        content_range = response_headers['Content-Range']
        #content_length = response_headers['Content-Length']
        start, end, length = tuple(
            int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)',
                                      content_range).group(1, 2, 3))
        if start == 0:
            response_status = 200
            response_headers['Content-Length'] = str(length)
            del response_headers['Content-Range']
        else:
            response_headers['Content-Range'] = 'bytes %s-%s/%s' % (start, end,
                                                                    length)
            response_headers['Content-Length'] = str(length - start)

        logging.info('>>>>>>>>>>>>>>> RangeFetch started(%r) %d-%d', self.url,
                     start, end)
        self.wfile.write(
            ('HTTP/1.1 %s\r\n%s\r\n' %
             (response_status, ''.join('%s: %s\r\n' % (k, v)
                                       for k, v in response_headers.items()))))

        data_queue = Queue.PriorityQueue()
        range_queue = Queue.PriorityQueue()
        range_queue.put((start, end, self.response))
        for begin in range(end + 1, length, self.maxsize):
            range_queue.put((begin, min(begin + self.maxsize - 1,
                                        length - 1), None))
        #thread.start_new_thread(self.__fetchlet, (range_queue, data_queue, 0))
        p = threading.Thread(target=self.__fetchlet,
                             args=(range_queue, data_queue, 0))
        p.daemon = True
        p.start()
        t0 = time.time()
        cur_threads = 1
        has_peek = hasattr(data_queue, 'peek')
        peek_timeout = 90
        self.expect_begin = start
        while self.expect_begin < length - 1:
            while cur_threads < self.threads and time.time(
            ) - t0 > cur_threads * config.AUTORANGE_MAXSIZE / 1048576:
                #thread.start_new_thread(self.__fetchlet, (range_queue, data_queue, cur_threads * config.AUTORANGE_MAXSIZE))
                p = threading.Thread(
                    target=self.__fetchlet,
                    args=(range_queue, data_queue,
                          cur_threads * config.AUTORANGE_MAXSIZE))
                p.daemon = True
                p.start()
                cur_threads += 1
            try:
                if has_peek:
                    begin, data = data_queue.peek(timeout=peek_timeout)
                    if self.expect_begin == begin:
                        data_queue.get()
                    elif self.expect_begin < begin:
                        time.sleep(0.1)
                        continue
                    else:
                        logging.error(
                            'RangeFetch Error: begin(%r) < expect_begin(%r), quit.',
                            begin, self.expect_begin)
                        break
                else:
                    begin, data = data_queue.get(timeout=peek_timeout)
                    if self.expect_begin == begin:
                        pass
                    elif self.expect_begin < begin:
                        data_queue.put((begin, data))
                        time.sleep(0.1)
                        continue
                    else:
                        logging.error(
                            'RangeFetch Error: begin(%r) < expect_begin(%r), quit.',
                            begin, self.expect_begin)
                        break
            except Queue.Empty:
                logging.error('data_queue peek timeout, break')
                break
            try:
                self.wfile.write(data)
                self.expect_begin += len(data)
            except Exception as e:
                logging.info('RangeFetch client connection aborted(%s).', e)
                break
        self._stopped = True
コード例 #18
0
        self.N = float(N or sum(self.itervalues()))
        self.missingfn = missingfn or (lambda k, N: 1. / N)
    
    def __call__(self, key):
        if key in self: return float(self[key]) / float(self.N)
        elif digitRegex.match(key): return 0.1
        else : return self.missingfn(key, self.N)

# the default segmenter does not use any probabilities, but you could ...
# Pw  = Pdist(opts.counts1w)
Pw = PdistUnigram(opts.counts2w)
PwJoint = PdistJoint(opts.counts2w)
old = sys.stdout
sys.stdout = codecs.lookup('utf-8')[-1](sys.stdout)
# ignoring the dictionary provided in opts.counts
pq = Q.PriorityQueue()
with open(opts.input) as f:
    for line in f:
        utf8line = unicode(line.strip(), 'utf-8')  
        input = [i for i in utf8line]
        chart = {}
        for j in range(1, min(1 + Pw.maxlen, len(input)) + 1):
            newWord = "".join(input[:j])
            prev_word = unicode("<S>", 'utf-8');
            joint_tuple = " ".join([prev_word, newWord]);
            joint_prob = PwJoint(joint_tuple)
            
            unigramPrevProb = Pw(prev_word)

            if  joint_prob is None:
                # print "Inside"
コード例 #19
0
def active_learning_multi_processing(topicId, al_protocol, al_classifier, document_collection, topic_seed_info, topic_complete_qrels_address, train_per_centage, use_pooled_budget, per_topic_budget_from_trec_qrels):
    train_index_list = topic_seed_info[topicId]
    #print topicId
    #print type(train_index_list)
    #print "train_index_list", train_index_list
    #print len(topic_complete_qrels[topicId][0]), len(topic_complete_qrels[topicId][1]), len(topic_complete_qrels[topicId][2])

    topic_complete_qrels = pickle.load(open(topic_complete_qrels_address + topicId + '.pickle', 'rb'))

    original_labels = topic_complete_qrels[0]
    predicted_label = topic_complete_qrels[1]

    original_predicted_merged_dict = {}
    original_labels_list = []
    for k, v in original_labels.iteritems():
        original_predicted_merged_dict[k] = v
        original_labels_list.append(v)
    #exit(0)

    #print "tmp_l1:",original_labels_list.count(1)

    predicted_labels_list = []
    for k, v in predicted_label.iteritems():
        original_predicted_merged_dict[k] = v
        predicted_labels_list.append(v)

    #print "tmp_l2:",predicted_labels_list.count(1)

    #print "sum", original_labels_list.count(1) + predicted_labels_list.count(1)

    original_predicted_merged_list = []
    for k in sorted(original_predicted_merged_dict.iterkeys()):
        #print k, original_predicted_merged_dict[k]
        original_predicted_merged_list.append(original_predicted_merged_dict[k])

    #print "again sum", original_predicted_merged_list.count(1)


    # need to convert y to np.array the Y otherwise Y[train_index_list] does not work directly on a list
    y = np.array(original_predicted_merged_list)  # 2 is complete labels of all documents in document collection
    # type needed because y is an object need and throws error Unknown label type: 'unknown'
    y = y.astype('int')
    #print "numpy sum", np.count_nonzero(y)
    #print y

    #print y.shape
    #print train_index_list
    #print y[train_index_list]

    #exit(0)

    total_documents = len(y)
    total_document_set = set(np.arange(0, total_documents, 1))

    initial_X_test = []
    test_index_dictionary = {}
    test_index_counter = 0

    #print "Starting Test Set Generation:"
    #start = time.time()
    for train_index in xrange(0, total_documents):
        if train_index not in train_index_list:
            initial_X_test.append(document_collection[train_index])
            test_index_dictionary[test_index_counter] = train_index
            test_index_counter = test_index_counter + 1

    #print "Finshed Building Test Set:", time.time() - start

    predictableSize = len(initial_X_test)
    isPredictable = [1] * predictableSize  # initially we will predict all

    # initializing the train_size controller
    train_size_controller = len(train_index_list)
    loopCounter = 1  # loop starts from 1 because 0 is for seed_set
    topic_all_info = {}  # key is the loopCounter

    while True:
        #print "iteration:", loopCounter
        # here modeling is utilizing the document selected in previous
        # iteration for training
        # when loopCounter == 0
        # model is utilizing all the seed document collected at the begining
        if al_classifier == 'LR':
            model = LogisticRegression(solver=large_data_solver, C=large_data_C_parameter, max_iter=200)
        elif al_classifier == 'SVM':
            model = SVC(C=1.0, kernel='linear', degree=3, gamma='auto', probability = True)
        elif al_classifier == 'RF':
            model =  RandomForestClassifier(n_estimators=10, max_depth=10, random_state=0)
        elif al_classifier == 'RFN':
            model = RandomForestClassifier(n_estimators=10, max_depth=None, random_state=0)
        elif al_classifier == 'RFN100':
            model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=0)
        elif al_classifier == 'NB':
            model = MultinomialNB()
        elif al_classifier == 'Ada':
            # base model is decision tree
            # logistic regression will not help
            model = AdaBoostClassifier(n_estimators=50,
                                     learning_rate=1)
        elif al_classifier == 'Xgb':
            model = XGBClassifier(random_state=1, learning_rate=0.01)
        elif al_classifier == 'BagLR':
            LRmodel = LogisticRegression(solver=large_data_solver, C=large_data_C_parameter, max_iter=200)
            model = BaggingClassifier(LRmodel, n_estimators = 5, max_samples = 1) # If float, then draw max_samples * X.shape[0] samples. 1 means use all samples
        elif al_classifier == 'BagNB':
            model = BaggingClassifier(MultinomialNB(), n_estimators = 5, max_samples = 0.5) # If float, then draw max_samples * X.shape[0] samples. 1 means use all samples
        elif al_classifier == 'Vot':
            LRmodel = LogisticRegression(solver=large_data_solver, C=large_data_C_parameter, max_iter=200)
            NBmodel = MultinomialNB()
            model = VotingClassifier(estimators=[('lr', LRmodel), ('nb', NBmodel)], voting = 'soft')

        model.fit(document_collection[train_index_list], y[train_index_list])

        test_index_list = list(total_document_set - set(train_index_list))
        pooled_document_count = len(set(train_index_list).intersection(set(original_labels_list)))
        non_pooled_document_count = len(set(train_index_list).intersection(set(predicted_labels_list)))

        y_actual = None
        y_pred = None
        y_pred_all = []

        if isPredictable.count(1) != 0:
            y_pred = model.predict(document_collection[test_index_list])
            start = time.time()
            #print 'Statred y_pred_all'
            y_actual = np.concatenate((y[train_index_list], y[test_index_list]), axis=None)
            y_pred_all = np.concatenate((y[train_index_list], y_pred), axis=None)
            '''
            for doc_index in xrange(0,total_documents):
                if doc_index in train_index_list:
                    y_pred_all.append(y[doc_index])
                else:
                    # result_index in test_set
                    # test_index_list is a list of doc_index
                    # test_Index_list [25, 9, 12]
                    # test_index_list[0] = 25 and its prediction in y_pred[0] --one to one mapping
                    # so find the index of doc_index in test_index_list using
                    pred_index = test_index_list.index(doc_index)
                    y_pred_all.append(y_pred[pred_index])
            '''
            #print "Finsh y_pred_all", time.time() - start

        else: # everything in trainset
            y_pred = y
            y_actual = y
            y_pred_all = y
            test_index_list = train_index_list

        f1score = f1_score(y_actual, y_pred_all, average='binary')
        precision = precision_score(y_actual, y_pred_all, average='binary')
        recall = recall_score(y_actual, y_pred_all, average='binary')

        #print f1score, precision, recall, len(train_index_list), len(test_index_list), len(y_pred_all)

        # save all info using (loopCounter - 1)
        # list should be deep_copy otherwise all will point to final referecne at final iterraion
        topic_all_info[loopCounter - 1] = (topicId, f1score, precision, recall, copy.deepcopy(train_index_list), test_index_list, y_pred, pooled_document_count, non_pooled_document_count)

        # it means everything in the train list and we do not need to predict
        # so we do not need any training of the model
        # so break here
        if isPredictable.count(1) == 0:
            break
        #print isPredictable.count(1)
        # suppose original budget is 5,
        # then when train_index_list is 5, we cannot just turn off Active learning
        # we need to use that AL with train_index_list of size 5 to train use that to predict the rest
        # so we cannot exit at 5, we should exit at 5 + 1
        # that is the reason we set per_topic_budget_from_trec_qrels[topicId] + 1 where 1 is the batch_size
        # it means everything of pooled_budget size in the train_list so we need not tany training of the model
        # so break here
        if use_pooled_budget == 1 and per_topic_budget_from_trec_qrels[topicId] == len(train_index_list):
            break

        queueSize = isPredictable.count(1)
        queue = Queue.PriorityQueue(queueSize)

        # these are used for SPL
        randomArray = []

        for counter in xrange(0, predictableSize):
            if isPredictable[counter] == 1:
                # model.predict returns a list of values in so we need index [0] as we
                # have only one element in the list
                y_prob = model.predict_proba(initial_X_test[counter])[0]
                val = 0
                if al_protocol == 'CAL':
                    val = y_prob[1]
                    queue.put(relevance(val, counter))
                elif al_protocol == 'SAL':
                    val = calculate_entropy(y_prob[0], y_prob[1])
                    queue.put(relevance(val, counter))
                elif al_protocol == 'SPL':
                    randomArray.append(counter)

        if use_pooled_budget == 1:
            #print "use pooled budget"
            size_limit = math.ceil(train_per_centage[loopCounter] * per_topic_budget_from_trec_qrels[topicId])
            print "size limit:", size_limit, "total_docs:", per_topic_budget_from_trec_qrels[topicId]

        else:
            size_limit = math.ceil(train_per_centage[loopCounter] * total_documents)
            print "size limit:", size_limit, "total_docs:", total_documents
        if al_protocol == 'SPL':
            random.shuffle(randomArray)
            batch_counter = 0
            # for batch_counter in xrange(0, batch_size):
            #    if batch_counter > len(randomArray) - 1:
            #        break
            while True:
                if train_size_controller == size_limit:
                    break

                itemIndex = randomArray[batch_counter]
                isPredictable[itemIndex] = 0
                train_index_list.append(test_index_dictionary[itemIndex])
                train_size_controller = train_size_controller + 1
                batch_counter = batch_counter + 1


        else:
            while not queue.empty():
                if train_size_controller == size_limit:
                    break
                item = queue.get()
                isPredictable[item.index] = 0  # not predictable

                train_index_list.append(test_index_dictionary[item.index])
                train_size_controller = train_size_controller + 1

        loopCounter = loopCounter + 1
    return topic_all_info
コード例 #20
0
    def __init__(self, cameraMaxX, cameraMaxY, dummy=False):
        """ port = usb port of the arduino controling the motors
            set to "" on a computer without arduino
        """

        self.dummy = dummy

        # this variable is True when no gestures are being executed
        self.isIdle = True

        self.headDataUpdated = False

        self.tracking_disabled = False

        # TODO: hardcoded configs?
        gesture_files = {
            "neutral": "gestures/neutral_gestures.csv",
            "fear": "gestures/fear_gestures.csv",
            "longing": "gestures/longing_gestures.csv",
            "surprise": "gestures/surprise_gestures.csv",
            "shame": "gestures/shame_gestures.csv"
        }

        # contains a dictionary of emotions to sequences
        self.gestureNameToSeq = {}
        # reading gesture files
        for gesture_type, filename in gesture_files.iteritems():
            with open(filename, "r") as f:
                reader = csv.reader(f)
                reader.next()
                for row in reader:
                    self.gestureNameToSeq[gesture_type + "_" +
                                          row[0]] = (float(row[1]), row[2:])

        # Read the positions from the Positions.json file
        self.positions = {}

        # Read the positions from the json files
        fileList = ["Positions.json"]
        try:
            os.chdir("./Positions")
            for filename in glob.glob("*.json"):
                fileList.append("Positions/" + filename)
            os.chdir("..")
        except:
            pass

        for filename in fileList:
            self.loadPositionsFromFile(filename)

        self.timeInterval = 0.25  # (1/4 second)

        # Initialize the angles to the marionette's default (0 everywhere)
        marionette = Marionette()
        self.currentAngles = marionette.getAngles()
        self.currentTargetAngles = marionette.getAngles()
        self.targetReached = False

        # Get min/max head angle values
        self.headMinAngle = marionette.motor['H'].minAngle
        self.headMaxAngle = marionette.motor['H'].maxAngle

        # Head IMU angles:
        self.roll = 0
        self.pitch = 0
        self.yaw = 0
        self.pitchMax = 0
        self.pitchMin = 0
        self.yawMax = 0
        self.yawMin = 0

        # Angle delta above which a head motion will be triggered
        self.headMotionThreshold = 0.0

        # Max x and y in camera coordinates space
        self.cameraCoordMaxX = predictor.PROCESSING_SIZE
        self.cameraCoordMaxY = self.cameraCoordMaxX * cameraMaxY / cameraMaxX

        # Read the calibration file
        self.calibration = []
        filename = "IMUCameraCalibration.json"
        with open(filename, "r") as read_file:
            self.calibration = json.load(read_file)
            print self.calibration
            self.pitchMax = self.calibration["up"][1]
            self.pitchMin = self.calibration["down"][1]
            self.yawMax = self.calibration["left"][2]
            self.yawMin = self.calibration["right"][2]

        # motor name to Arduino motor id
        self.arduinoID = {}
        self.arduinoID['motorH'] = 'head'
        self.arduinoID['motorS'] = 'shoulder'
        self.arduinoID['motorHR'] = 0  # "Right head"
        self.arduinoID['motorHL'] = 1  # "Left head"
        self.arduinoID['motorSR'] = 9  # "Right shoulder"
        self.arduinoID['motorSL'] = 8  # "Left shoulder"
        self.arduinoID['motorAR'] = -1  # "Right arm"
        self.arduinoID['motorAL'] = 7  # "Left arm"
        self.arduinoID['motorWR'] = 2  # "Right hand"
        self.arduinoID['motorWL'] = 3  # "Left hand"
        self.arduinoID['motorFR'] = 5  # "Right foot"
        self.arduinoID['motorFL'] = 4  # "Left foot"
        self.arduinoID['motorEX'] = 'eyeX'  # "Eye horizontal"
        self.arduinoID['motorEY'] = 'eyeY'  # "Eye vertical"

        # Arduino motor id to index of the angle in the angles list
        # motor:       'S' 'SR' 'SL' 'AR' 'AL' 'H' 'HR' 'HL' 'FR' 'FL' 'WR' 'WL' 'EX' 'EY'
        # angle index:  0    1    2    3    4   5    6    7    8    9   10   11   12   13
        # arduino id:   s  m,9  m,8   -1  m,7   h  m,0  m,1  m,5  m,4  m,2  m,3      e
        self.arduinoIDToAngleIndex = {}
        self.arduinoIDToAngleIndex['h'] = 5
        self.arduinoIDToAngleIndex['s'] = 0
        self.arduinoIDToAngleIndex['0'] = 6  # "Right head"
        self.arduinoIDToAngleIndex['1'] = 7  # "Left head"
        self.arduinoIDToAngleIndex['9'] = 1  # "Right shoulder"
        self.arduinoIDToAngleIndex['8'] = 2  # "Left shoulder"
        #self.arduinoIDToAngleIndex['6'] = 3   # "Right arm"
        self.arduinoIDToAngleIndex['7'] = 4  # "Left arm"
        self.arduinoIDToAngleIndex['2'] = 10  # "Right hand"
        self.arduinoIDToAngleIndex['3'] = 11  # "Left hand"
        self.arduinoIDToAngleIndex['5'] = 8  # "Right foot"
        self.arduinoIDToAngleIndex['4'] = 9  # "Left foot"
        self.arduinoIDToAngleIndex['e,x'] = 12  # "Eye horizontal"
        self.arduinoIDToAngleIndex['e,y'] = 13  # "Eye vertical"

        # Thread related variables
        self.movementCount = long(0)
        self.busy_executing = False
        self.qMotorCmds = Queue.PriorityQueue()
        self.running = False
        self.arduino_thread = None
        self.start()
コード例 #21
0
def FCFS(plist,n):

	p=process()
	q=Q.PriorityQueue(maxsize=n)
	
	x=0 
	Pdone=0
	time_lap=0
	numer_completed=0
	templ=[]
	btl=dict()
	for y in plist:
		btl[y.pid]=y.bt
		pass
	print("in srt")
	while(1):
		
		print "this is  iteration number %d"%(x)
		if(x<n):
			print "going to push %d "%(x)
			
			while(plist[x].at<=time_lap):
				# print "pushed %d"%(x)
				q.put(plist[x])
				
				x+=1
				if(x>=n):
					break

			

		
		
		#print(q.empty())
		if(q.empty()==False):
			#time_lap+=1

			p=q.get()
			print "printing in q"
			p.disp()
			
			time_lap=time_lap+p.bt
			p.bt=0
			if(p.bt==0):
				p.ct=time_lap
				p.wat=time_lap-btl[p.pid]-p.at
				if p.wat<0:
					p.wat=0
					pass
				p.tat=p.ct-p.at
				p.bt=btl[p.pid]
				templ.append(p)
				Pdone+=1
			else:
				q.put(p)	

		else :
			time_lap+=1
		if(Pdone==n):
			break

	print "PRnp schduling done !"
	return templ
	pass
コード例 #22
0
def resolution(clauses,out_file):
    global globalvar
    global lineNum
    global maxQueue
    candidates = Q.PriorityQueue()
    lengthC1 = len(clauses)
    i = 0
    while i<lengthC1:
        j=i+1
        while j<lengthC1:
            if resolvable(clauses[i],clauses[j]):
                candidates.put(ResClauses(clauses[i],clauses[j]))
            j+=1
        i+=1
    iter = 1
    while not candidates.empty():
        curPair = candidates.get()
        list1 = curPair.clause1.clause_list
        list2 = curPair.clause2.clause_list
        resolvant = Clause()
        for p in list1:
            stripped = p
            stripped = stripped.strip("-")
            if(p == stripped and ("-"+p) in list2) or (p!=stripped and stripped in list2):
                print "Iteration %s, queue size %s, resolution on %s and %s" % (iter, candidates.qsize()+1,curPair.clause1.index,curPair.clause2.index)
                out_file.write("Iteration %s, queue size %s, resolution on %s and %s" % (iter, candidates.qsize()+1,curPair.clause1.index,curPair.clause2.index))
                out_file.write("\n")
                iter+=1
                if maxQueue < candidates.qsize()+1:
                    maxQueue=candidates.qsize()+1
                print "resolving %s and %s" %(curPair.clause1.getList(),curPair.clause2.getList())
                out_file.write("resolving %s and %s" %(curPair.clause1.getList(),curPair.clause2.getList()))
                out_file.write("\n")
                resolvant=resolve(curPair.clause1, curPair.clause2, p)
                if len(resolvant.clause_list)==0:
                    resolvant.parent1=curPair.clause1.index
                    resolvant.parent2=curPair.clause2.index
                    resolvant.index=globalvar
                    print "success empty clause found!"
                    print "Max Queue Size %d"% maxQueue
                    print "Total Number of iterations %d" %iter
                    out_file.write("\nSuccess empty clause found! \n")
                    out_file.write("Max Queue Size %d \n"% maxQueue)
                    out_file.write("Total Number of iterations %d" %iter)
                    return resolvant
                if(not clauseExists(clauses, resolvant)):
                    print "%d: %s generated from %d and %d" %(lineNum, resolvant.getList(),curPair.clause1.index,curPair.clause2.index)
                    print
                    out_file.write("%d: %s generated from %d and %d" %(lineNum, resolvant.getList(),curPair.clause1.index,curPair.clause2.index))
                    out_file.write("\n\n")
                    lineNum+=1
                    resolvant.parent1 = curPair.clause1.index
                    resolvant.parent2 = curPair.clause2.index
                    resolvant.index = globalvar
                    clauses.append(resolvant)
                    globalvar+=1
                    i=0
                    while i<globalvar-1:
                        if resolvable(clauses[i], resolvant):
                            candidates.put(ResClauses(resolvant, clauses[i]))
                        i+=1
                else:
                    print "Discard %s generated as it already exists in clauses" %resolvant.getList()
                    print
                    out_file.write("Discard %s generated as it already exists in clauses" %resolvant.getList())
                    out_file.write("\n")
                    out_file.write("\n")
    return False
            
                
        
        
    print candidates.qsize()
コード例 #23
0
    money = np.random.randint(100000)
    #stock = 0
    price = startprice

    currentstock = stock[num]

    arrlock.acquire()
    stock[num] = money
    arrlock.release()

    sellqueue.put_nowait((-money, num))


threads = []
numpeople = 500
sells = Queue.PriorityQueue()
buys = Queue.PriorityQueue()
startprice = 100
arrlock = threading.Lock()
money = [np.random.randint(10000) for i in range(numpeople)]
stock = [np.random.randint(50) for i in range(numpeople)]

print sum(money), max(money), min(money)

lastprice = 10.
multiplier = .75
for roundi in range(100):
    offers = [
        np.random.normal(multiplier * lastprice, 5) for i in range(numpeople)
    ]
    sellprices = []
コード例 #24
0
ファイル: PriorityQueue.py プロジェクト: johndpope/works
 def __init__(self):
     self.pq = Queue.PriorityQueue(maxsize=0)
コード例 #25
0
 def __init__(self):
     self.nodes = {}
     self.startNode = None
     self.goalNode = None
     self.queue = Queue.PriorityQueue()
コード例 #26
0
 def __init__(self):
     self.q = Q.PriorityQueue()
コード例 #27
0
import time

import Queue
import threading

numbers = [23, 23, 23, 23]
q = Queue.PriorityQueue()
threads = []
queue_condition = threading.Condition()
shared_queue = Queue.Queue()


def fibonacci(condition, priority):
    with condition:

        while shared_queue.empty():
            condition.wait()

        num = shared_queue.get()
        a, b = 0, 1
        for i in range(num):
            a, b = b, a + b
        q.put((priority, a))
    return


def queue_task(condition):
    with condition:
        for item in numbers:
            shared_queue.put(item)
コード例 #28
0
def astar2d(start, goal, obstacle_map, epsilon=1):
    #print "Planning From", start, "to", goal

    #Error Check Trivial Paths
    if not isFree(start, obstacle_map):
        # print "Invalid Start"
        return [[], float('Inf'), 0]
    if not isFree(goal, obstacle_map):
        # print "Invalid Goal"
        return [[], float('Inf'), 0]
    if start == goal:
        # print "Astar Complete"
        return [[goal], 0.0, 0]

    #Initialize Distance Matrix
    [x, y] = obstacle_map.shape
    distances = {}

    for ii in range(x):
        for jj in range(y):
            distances[ii, jj] = float('inf')

    #Expand Start Node
    expanded_node = start
    distances[start[0], start[1]] = 0.0
    open_list = Queue.PriorityQueue()
    closed_list = []
    current_node = start
    num_expanded = 1

    for node in getNeighbors(start, obstacle_map):
        distances[node[0], node[1]] = min(
            distances[current_node[0], current_node[1]] + 1,
            distances[node[0], node[1]])
        if node == goal:
            #print "Astar Complete"
            #print distances[goal[0], goal[1]]
            return getPath(start, goal, distances,
                           obstacle_map) + [len(closed_list)]
        scored_node = (f(node, goal, distances, epsilon), num_expanded, node)
        #print scored_node
        open_list.put(scored_node)

    closed_list.append(start)

    while not open_list.empty():
        current_scored_node = open_list.get(block=False)
        current_node = current_scored_node[2]

        closed_list.append(current_node)
        #print "Expanding", current_node, f(current_node, goal, distances)
        num_expanded += 1
        for node in getNeighbors(current_node, obstacle_map, closed_list):
            distances[node[0], node[1]] = min(
                distances[current_node[0], current_node[1]] + 1,
                distances[node[0], node[1]])  #4-connected graph assumption

            if node == goal:
                #print "Astar Complete"
                #print distances[goal[0], goal[1]]
                return getPath(start, goal, distances,
                               obstacle_map) + [len(closed_list)]

            scored_node = (f(node, goal, distances,
                             epsilon), num_expanded, node)
            updatePQueue(scored_node, open_list)
    print "astar Failed"
コード例 #29
0
def bfs(grid, car_list):
    """
	Breadth First Search solver for the Rush Hour board game
	"""

    # Init vars etc.
    queue = Queue.PriorityQueue()
    queue.put((0, grid, car_list))
    pre_grid = {}
    pre_grid[str(grid)] = "finished"

    # Main loop
    while queue:

        # Take first grid from queue
        get_grid = queue.get()
        gridObj = Grid(get_grid[1], get_grid[2])

        # Check for all cars in the grid if there are moves possible
        for car in get_grid[2]:

            # Skip the placeholder car
            if car != 'placeholder':

                # Find the car number
                car_n = gridObj.retrieve_value(car[2], car[3])

                # Function for moving the car, checking for solution and creating new states
                def move_car(move):

                    # Check if move is possible
                    if gridObj.check_move_car(car_n, move):

                        # Create copy of grid and move the car in the new grid
                        newGridObj = Grid([x[:] for x in get_grid[1]],
                                          get_grid[2][:])
                        newGridObj.move_car(car_n, move)
                        newGridObjGridStr = str(newGridObj.grid)

                        # Check if grid has already existed
                        if newGridObjGridStr not in pre_grid:

                            cost = get_grid[0]

                            # Heuristic: Cost of blocking car is higher than blocking truck
                            for i in range(
                                    newGridObj.car_list[1][2] +
                                    newGridObj.car_list[1][1],
                                    len(newGridObj.grid)):
                                if newGridObj.retrieve_value(
                                        i, newGridObj.car_list[1][3]) != 0:
                                    if newGridObj.car_list[1][1] == 3:
                                        cost += 10
                                    if newGridObj.car_list[1][1] == 2:
                                        cost += 100

                            # Check for solution (clear path to endpoint)
                            if newGridObj.check_solution():

                                # Check if red car is at the endpoint
                                if newGridObj.car_list[1][2] != (
                                        len(newGridObj.grid[0]) - 2):

                                    # Add grid to queue to be further processed
                                    queue.put((cost, newGridObj.grid,
                                               newGridObj.car_list))
                                    pre_grid[newGridObjGridStr] = get_grid[
                                        1][:]

                                # Return and finish algorithm
                                else:
                                    pre_grid[newGridObjGridStr] = get_grid[
                                        1][:]
                                    return (newGridObj, pre_grid)

                            # Add state to queue to be further processed
                            queue.put(
                                (cost, newGridObj.grid, newGridObj.car_list))
                            pre_grid[newGridObjGridStr] = get_grid[1][:]

                # Try to move selected car both ways
                returned = move_car(1)
                if returned:
                    return returned

                returned = move_car(-1)
                if returned:
                    return returned

    return "No solution"
コード例 #30
0
ファイル: run-tests.py プロジェクト: indigos33k3r/spark-2
def main():
    opts = parse_opts()
    if opts.verbose:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    should_test_modules = opts.testnames is None
    logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s")
    LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE)
    if os.path.exists(LOG_FILE):
        os.remove(LOG_FILE)
    python_execs = opts.python_executables.split(',')
    LOGGER.info("Will test against the following Python executables: %s", python_execs)

    if should_test_modules:
        modules_to_test = []
        for module_name in opts.modules.split(','):
            if module_name in python_modules:
                modules_to_test.append(python_modules[module_name])
            else:
                print("Error: unrecognized module '%s'. Supported modules: %s" %
                      (module_name, ", ".join(python_modules)))
                sys.exit(-1)
        LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test])
    else:
        testnames_to_test = opts.testnames.split(',')
        LOGGER.info("Will test the following Python tests: %s", testnames_to_test)

    task_queue = Queue.PriorityQueue()
    for python_exec in python_execs:
        # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START'
        # environmental variable is set.
        if "COVERAGE_PROCESS_START" in os.environ:
            _check_coverage(python_exec)

        python_implementation = subprocess_check_output(
            [python_exec, "-c", "import platform; print(platform.python_implementation())"],
            universal_newlines=True).strip()
        LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation)
        LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output(
            [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip())
        if should_test_modules:
            for module in modules_to_test:
                if python_implementation not in module.blacklisted_python_implementations:
                    for test_goal in module.python_test_goals:
                        heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests',
                                       'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests']
                        if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)):
                            priority = 0
                        else:
                            priority = 100
                        task_queue.put((priority, (python_exec, test_goal)))
        else:
            for test_goal in testnames_to_test:
                task_queue.put((0, (python_exec, test_goal)))

    # Create the target directory before starting tasks to avoid races.
    target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target'))
    if not os.path.isdir(target_dir):
        os.mkdir(target_dir)

    def process_queue(task_queue):
        while True:
            try:
                (priority, (python_exec, test_goal)) = task_queue.get_nowait()
            except Queue.Empty:
                break
            try:
                run_individual_python_test(target_dir, test_goal, python_exec)
            finally:
                task_queue.task_done()

    start_time = time.time()
    for _ in range(opts.parallelism):
        worker = Thread(target=process_queue, args=(task_queue,))
        worker.daemon = True
        worker.start()
    try:
        task_queue.join()
    except (KeyboardInterrupt, SystemExit):
        print_red("Exiting due to interrupt")
        sys.exit(-1)
    total_duration = time.time() - start_time
    LOGGER.info("Tests passed in %i seconds", total_duration)

    for key, lines in sorted(SKIPPED_TESTS.items()):
        pyspark_python, test_name = key
        LOGGER.info("\nSkipped tests in %s with %s:" % (test_name, pyspark_python))
        for line in lines:
            LOGGER.info("    %s" % line.rstrip())