def main(url, log): log1 = log while True: if not os.path.exists(log1): os.makedirs(log1) break else : log1 = log1+'_' content = content_extractor.get_content(url) logFile = open(log1+'/metadata',"a") logFile.write("URL : "+url+"\n\n") logFile.write("Title : "+content['title']+"\n\n") logFile.write("Meta Desc. : "+content['meta']+"\n\n") logFile.write("Content : "+content['content'].encode("utf8")+"\n\n") logFile.close() d = TextRank.text_rank(content['content']) sortd = sorted(d.iteritems(), key = operator.itemgetter(1), reverse=True) logtext = open(log1+'/textrank_result',"a") logtext.write(str(sortd)) logtext.close() final=[] for i in sortd: for j in range(len(source_probs)): final.append((i[0], j, i[1]*source_probs[j])) fsort = sorted(final, key = operator.itemgetter(2), reverse=True) logres = open(log1+'/result',"a") logres.write(str(fsort)) logres.close() server.run_server(fsort[:10])
def get_wiki_article(search_term, verbose=False, search_list=default_list): content_list = [] if verbose: print "Begin Search Algorithm for keyword : ", search_term for provider in search_list: if provider != "random": search_url_generator = search(search_term+" "+provider, stop=1) root_url = search_url_generator.next() if verbose: print "Looking at Encyclopedia Article :", root_url term = content_extractor.get_content(root_url) def_size = len(term['content']) if verbose: print term['meta'].encode('utf-8', errors='replace') content_list.append(term['content']) else: print "Random Text generation" content_list.append(random_text.get_random_text(def_size)) return content_list