Ejemplo n.º 1
0
def find_path_astar(start, stop):
    visited = []
    unvisited = [start]
    parent = {start: "None"}
    gscore = {start: 0}
    #fscore = {start:1/links_in_common(start,stop)}
    fscore = MinHeap()
    fscore.insert((start, distance_heuristic(start, stop)))
    #print(fscore)
    global article_data
    while len(unvisited) > 0:
        curr = fscore.extract()[0]
        print(ancestor_chain(parent, curr))
        if curr == stop:
            unwrap_path(parent, start, stop)
            break
        try:
            unvisited.remove(curr)
            visited.append(curr)

            curr_links = get_page_links(curr)["parse"]["links"]
            for l in curr_links:
                try:
                    if l["exists"] == "":
                        link = l["*"]
                        if (is_article(link)) and (link not in visited):
                            tentative_gscore = gscore[curr] + 1
                            if link not in unvisited:
                                unvisited.append(link)
                            elif tentative_gscore >= gscore[link]:
                                continue

                            parent[link] = curr
                            gscore[link] = tentative_gscore
                            #fscore[link] = gscore[link] + (1/links_in_common(link,stop))
                            f = gscore[link] + distance_heuristic(link, stop)
                            fscore.insert((link, f))
                except:
                    continue
            #print(fscore.array)
            #print("{0} of {1}".format(fscore.size,fscore.capacity))
        except:
            continue
        del article_data[curr]
Ejemplo n.º 2
0
from minheap import MinHeap
import sys

#get the count for every job and state
count = Counting(sys.argv[1])
job_count, state_count, sum_count = count.count()

#find the top_10 jobs
job_heap = MinHeap(10)
job_fhandle = open(sys.argv[2],'w')
for key,value in job_count.items():
    job_heap.add(key,value)
job_fhandle.write('TOP_OCCUPATIONS;NUMBER_CERTIFIED_APPLICATIONS;PERCENTAGE\n')
result = dict()
for i in range(min(10,len(job_count))):
    key,value = job_heap.extract()
    result[key.lstrip('""').rstrip('""')] = value
result = sorted(result.items(), key = lambda item:item[0])
result.sort(key = lambda x:x[1], reverse = True)
for item in result:
    key = item[0]
    value = item[1]
    p = round(value / sum_count * 100.0, 1)
    s = key + ';' + str(value) + ';' + str(p) + '%' + '\n'
    job_fhandle.write(s)

#find the top_10 states
state_heap = MinHeap(10)
state_fhandle = open(sys.argv[3],'w')
for key,value in state_count.items():
    state_heap.add(key,value)