Example #1
0
def main(url, log):
    log1 = log
    while True:
        if not os.path.exists(log1):
            os.makedirs(log1)
            break
        else : log1 = log1+'_'
    content = content_extractor.get_content(url)

    logFile = open(log1+'/metadata',"a")
    logFile.write("URL : "+url+"\n\n")
    logFile.write("Title : "+content['title']+"\n\n")
    logFile.write("Meta Desc. : "+content['meta']+"\n\n")
    logFile.write("Content : "+content['content'].encode("utf8")+"\n\n")
    logFile.close()

    d = TextRank.text_rank(content['content'])
    sortd = sorted(d.iteritems(), key = operator.itemgetter(1), reverse=True)

    logtext = open(log1+'/textrank_result',"a")
    logtext.write(str(sortd))
    logtext.close()

    final=[]
    for i in sortd:
        for j in range(len(source_probs)):
            final.append((i[0], j, i[1]*source_probs[j]))

    fsort = sorted(final, key = operator.itemgetter(2), reverse=True)
    logres = open(log1+'/result',"a")
    logres.write(str(fsort))
    logres.close()
    server.run_server(fsort[:10])
Example #2
0
def get_wiki_article(search_term, verbose=False, search_list=default_list):
    content_list = []
    if verbose:
        print "Begin Search Algorithm for keyword : ", search_term
    for provider in search_list:
        if provider != "random":
            search_url_generator =  search(search_term+" "+provider, stop=1)
            root_url = search_url_generator.next()
            if verbose:
                print "Looking at Encyclopedia Article :", root_url
            term = content_extractor.get_content(root_url)
            def_size = len(term['content'])
            if verbose:
                print term['meta'].encode('utf-8', errors='replace')
            content_list.append(term['content'])
        else:
            print "Random Text generation"
            content_list.append(random_text.get_random_text(def_size))

    return content_list