def test_calculate_page_rank(self): pagerank = PageRank(self.graph, 0.85, 0.0001) pagerank_dict = pagerank.run() for k, v in pagerank_dict.items(): print k, v assert True
def test_pagerank(self): M = mat([[0, 1 / 2, 0, 0], [1 / 3, 0, 0, 1 / 2], [1 / 3, 0, 1, 1 / 2], [ 1 / 3, 1 / 2, 0, 0, ]]) R = array([1 / 4, 1 / 4, 1 / 4, 1 / 4]).reshape(-1, 1) pr = PageRank(M, R, damping=0.8, max_iter=100) R = pr.fit() R_true = array([15 / 148, 19 / 148, 95 / 148, 19 / 148]).reshape(-1, 1) assert mean(R - R_true) < 1e-3
def calculateSolution(dswa, method, gt_solution): # BaselineApproach if method == 'pagerank': pagerank_approach = PageRank(dswa) print("Calculation solution...") calculated_solution = pagerank_approach.returnSolution(5) #UnsupevisedApproach elif method == 'unsupervised': unsupervisedApproach = Unsupervised_Approach(dswa) print("Calculation solution...") calculated_solution = unsupervisedApproach.returnSolution() print('--- Solution ---') print(calculated_solution) return calculated_solution
def and_rank3(r1, r2): t1s = map(lambda x: x[0], r1) t2s = map(lambda x: x[0], r2) intersec = set(t1s).intersection(set(t2s)) map1, map2 = {}, {} for t1, fr1 in r1: map1[t1] = fr1 for t2, fr2 in r2: map2[t2] = fr2 r1_i, r2_i = [], [] for e in intersec: r1_i.append((e,map1[e])) r2_i.append((e,map2[e])) r1_i.sort(snd_cmp) r2_i.sort(snd_cmp) sum_pos_rank = {} for e in intersec: sum_pos_rank[e] = 0.0 for (e, pr), pos in zip(r1_i, range(1,len(r1_i)+1)): sum_pos_rank[e] += float(pos) for (e, pr), pos in zip(r2_i, range(1,len(r2_i)+1)): sum_pos_rank[e] += float(pos) final = [] for e, val in sum_pos_rank.iteritems(): final.append((e,val)) final.sort(snd_cmp) final.reverse() return PageRank.normalize(final)
def or_rank(r1, r2): # ret = not_rank(and_rank(not_rank(r1), not_rank(r2))) # ret = ret.sort(snd_cmp, None, True) # return ret t1s = map(lambda x: x[0], r1) t2s = map(lambda x: x[0], r2) newr = [] for t1, fr1 in r1: for t2, fr2 in r2: if t1 == t2: newr.append((t1, fr1 + fr2 - fr1 * fr2)) diff12 = set(t1s).difference(set(t2s)) diff21 = set(t2s).difference(set(t1s)) for ent1 in diff12: for ent1_aux, float_rank in r1: if ent1 == ent1_aux: newr.append((ent1, float_rank)) for ent2 in diff21: for ent2_aux, float_rank in r2: if ent2 == ent2_aux: newr.append((ent2, float_rank)) newr.sort(snd_cmp, None, True) #print str(newr) return PageRank.normalize(newr)
def and_rank5_gold2(tags, ranker): # first compute offline rank # OR tag formula tag_form = TagBooleanFormula() for tag in tags: and1 = TagBooleanConjunction() and1.addAtom(TagBooleanAtom(True,tag)) tag_form.addTagAnd(and1) # filter graph by OR of tags ranker.filter(tag_form) if len(ranker.get_nodes()) > 0: ranker.rank(10) offline_rank = ranker.get_rank() else: offline_rank = [] # AND tag formula and_nodes = set([]) for tag in set(tags): ranker.filter_one_tag(tag) if tag == list(set(tags))[0]: and_nodes = ranker.get_nodes() else: and_nodes = and_nodes.intersection(ranker.get_nodes()) # now filter by intersection ret = [] for name, pagerank in offline_rank: if name in and_nodes: ret.append((name, pagerank)) return PageRank.normalize(ret)
def rank_author(): ac_net = AuthorCitationNetwork() ac_net_m, ac_net_list = ac_net.make_matrix() print("Caculate pagerank...") ac_net_pgr = PageRank(ac_net_m) ac_net_pr = ac_net_pgr.caculate("author_iter.txt", m_d=1e-5) ac_net_results = list(zip(ac_net_list, ac_net_pr.tolist())) ac_net_results.sort(key=itemgetter(1), reverse=True) results = [] for result in ac_net_results: results.append(str(result[0][1]) + " " + str(result[1][0]) + "\n") # results.sort(key=itemgetter(2),reverse=True) print("Writing results...") f = open("2014/results/author.txt", 'w') # f.writelines(str(ac_net_results)) f.writelines(str(results))
def rank_paper(): pp_net = PaperCitationNetwork() pp_net_m, pp_net_list = pp_net.make_matrix() print("Caculating pagerank...") pp_net_pgr = PageRank(pp_net_m) pp_net_pr = pp_net_pgr.caculate("paper_iter.txt", m_d=1e-7) pp_net_results = list(zip(pp_net_list, pp_net_pr.tolist())) pp_net_results.sort(key=itemgetter(1)) results = [] for result in pp_net_results: results.append(str(result[0][1]) + " " + str(result[1][0]) + "\n") # results.sort(key=lambda x:-1*x[2]) # printer=[] # for r in results: # printer.append(str(r[1])+" "+str(r[2])+"\n") print("Writing results...") f = open("2014/results/paper.txt", 'w') f.writelines(str(results))
def __merge_rank_and_monolitic(self, tags): users = self.__map_tag_users[tags[0]] for tag in tags[1:]: users = users.intersection(self.__map_tag_users[tag]) mono_rank = [] for name, pagerank, pos in self.__rank: if name in users: mono_rank.append((name,pagerank)) if self.__max_per_rank <= len(mono_rank): break mono_rank = PageRank.normalize(mono_rank) return add_pos(mono_rank)
def and_rank2(r1, r2): t1s = map(lambda x: x[0], r1) t2s = map(lambda x: x[0], r2) intersec = set(t1s).intersection(set(t2s)) map1, map2 = {}, {} for t1, fr1 in r1: map1[t1] = fr1 for t2, fr2 in r2: map2[t2] = fr2 newr = [] r1_int, r2_int = [], [] for t in intersec: r1_int.append((t,map1[t])) r2_int.append((t,map2[t])) r1_int = PageRank.normalize(r1_int) r2_int = PageRank.normalize(r2_int) return and_rank(r1_int, r2_int)
def rank_venue(): vn_net = VenueCitationNetwork() vn_net_m, vn_net_list = vn_net.make_matrix() print("Caculating pagerank...") vn_net_pgr = PageRank(vn_net_m) vn_net_pr = vn_net_pgr.caculate("venue_iter.txt", m_d=1e-7) vn_list = [] for vn in vn_net_list: vn_list.append(vn[0:2]) vn_net_results = list(zip(vn_list, vn_net_pr.tolist())) vn_net_results.sort(key=itemgetter(1), reverse=True) # ac_net_results.sort(key=lambda x:-1*x[1]) results = [] for result in vn_net_results: results.append( str(result[0][0]) + " " + str(result[0][1]) + " " + str(result[1][0]) + "\n") f = open("2014/results/venue.txt", 'w') # f.writelines(str(vn_net_results)) f.writelines(results) print("Writing results...")
def main(args): summarizer = { 'tfidf': TfIdf(), 'cluster': Cluster(), 'svd': SVD(), 'pagerank': PageRank() }[args['alg']] summarizer.initialize(args['tf'], args['df']) summary = summarizer.summarize(args['doc']) for s in summary: print(s),
def sample_generation(args): # Preprocessing Step print("Numpy Version Check") print(np.__version__) print("Scipy Version Check") print(scipy.__version__) data_dicts = preprocessing(transition_matrix_path=args.transition_matrix, doc_topics_path=args.document_topic, user_topic_path=args.user_topic_interest, query_topic_path=args.query_topic_relation, search_relevance_path=args.search_relevance) # GPR, PTSPR, QTSPR construction if args.pagerank == "gpr": pr = PageRank(trans_matrix=data_dicts['transition_matrix'], dampening_factor=args.dampening_factor) elif args.pagerank == "ptspr" or args.pagerank == "qtspr": pr = TopicSensitivePageRank( trans_matrix=data_dicts['transition_matrix'], topic_matrix=data_dicts['doc_topic_matrix'], dampening_factor=args.dampening_factor, topic_factor=args.topic_factor) pr.converge() if args.pagerank == "gpr": np.savetxt("GPR.txt", pr.ranked_vector, delimiter=" ") elif args.pagerank == "ptspr": topic_prob = data_dicts['user_topic_probs']["2-1"] vector = (pr.ranked_matrix * topic_prob.reshape(12, 1)).view( np.ndarray).squeeze() np.savetxt("QTSPR-U2Q1.txt", vector, delimiter=" ") elif args.pagerank == "qtspr": topic_prob = data_dicts['query_topic_probs']["2-1"] vector = (pr.ranked_matrix * topic_prob.reshape(12, 1)).view( np.ndarray).squeeze() np.savetxt("PTSPR-U2Q1.txt", vector, delimiter=" ") print("===================== END =====================")
def main(): google_file = "web-Google.txt" simple_test = "web-Matvii.txt" sparse_matrix = create_sparse_matrix(simple_test) pr = PageRank(sparse_matrix) pr.init_weights() print("Initial weights\n", pr.weights) pr.calculate_page_rank(5) print("Weights after 5 iterations\n", pr.weights)
def runStates(): """run pagerank on stateborders.csv""" f = open('stateborders.csv') graph = PageRank() for line in f: columns = line.split(',') left = columns[0].strip('"') right = columns[2].strip('"') graph.addEdge(left, right) graph.printGraph() iterations, ranks = graph.getPageRank() print "Number of iterations:", iterations print returnSorted(ranks)
def and_rank(r1, r2): t1s = map(lambda x: x[0], r1) t2s = map(lambda x: x[0], r2) intersec = set(t1s).intersection(set(t2s)) map1, map2 = {}, {} for t1, fr1 in r1: map1[t1] = fr1 for t2, fr2 in r2: map2[t2] = fr2 newr = [] for t in intersec: newr.append((t,map1[t]*map2[t])) # newr = [] # for t1, fr1 in r1: # for t2, fr2 in r2: # if t1 == t2: # newr.append((t1, fr1 * fr2)) newr.sort(snd_cmp, None, True) #print str(newr) return PageRank.normalize(newr)
def __init__(self): self.pagerank = PageRank() self.ranking = None self.res = None lista = list(self.pagerank.autores) lista.sort() self.AutorList = lista self.raiz = Tk() self.raiz.geometry('950x500') self.raiz.title('Buscador') # Celda donde se muestran los resultados. self.tinfo = scrolledtext.ScrolledText(self.raiz, width=50, height=30) self.tinfo.grid(column = 0, row = 6) # Celda donde se introduce la búsqueda. self.tentry = Entry(self.raiz, width=40) self.tentry.grid(column = 0, row = 5) # Botón de buscar. self.binfo = ttk.Button(self.raiz, text='Buscar', command=self.verinfo) self.binfo.grid(column = 1, row =5) # Botón de búsqueda personalizada. self.bper = ttk.Button(self.raiz, text='Búsqueda personalizada', command=self.verper) self.bper.grid(column = 1, row =3) # Botón de mostrar Ranking inicial. self.bpag = ttk.Button(self.raiz, text='Mostrar Ranking inicial', command=self.verpag) self.bpag.grid(column = 2, row =3) # Botón de salir. self.bsalir = ttk.Button(self.raiz, text='Salir', command=self.raiz.destroy) self.bsalir.grid(column = 2, row = 5) # Desplegable para elegir el sitio de búsqueda. self.variable = tk.StringVar(self.raiz) self.variable.set(self.OptionList[0]) opt = tk.OptionMenu(self.raiz, self.variable, *self.OptionList) opt.config(width=30, font=('Helvetica', 12)) opt.grid(column = 0, row = 1) # Desplegable para elegir el autor. self.variable2 = tk.StringVar(self.raiz) self.variable2.set(self.AutorList[0]) opt2 = ttk.Combobox(self.raiz, textvariable = self.variable2, values = self.AutorList) opt2.config(width=30, font=('Helvetica', 12)) opt2.grid(column = 0, row = 3) # Desplegable para elegir el modelo. self.variable3 = tk.StringVar(self.raiz) self.variable3.set(self.OptionMod[0]) opt3 = tk.OptionMenu(self.raiz, self.variable3, *self.OptionMod) opt3.config(width=30, font=('Helvetica', 12)) opt3.grid(column = 2, row = 1) # Desplegable para elegir si mostrar los pesos. self.variable4 = tk.StringVar(self.raiz) self.variable4.set(self.OptionRank[0]) opt3 = tk.OptionMenu(self.raiz, self.variable4, *self.OptionRank) opt3.config(width=30, font=('Helvetica', 12)) opt3.grid(column = 1, row = 1) # Muestra la opción del desplegable del sitio de búsqueda. self.labelTest = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest.grid(column = 0, row = 2) self.variable.trace("w", self.callback) # Muestra la opción del desplegable del autor. self.labelTest2 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest2.grid(column = 0, row = 4) self.variable2.trace("w", self.callback2) # Muestra la opción del desplegable del modelo. self.labelTest3 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest3.grid(column = 2, row = 2) self.variable3.trace("w", self.callback3) # Muestra la opción elegida de mostrar los pesos. self.labelTest4 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest4.grid(column = 1, row = 2) self.variable4.trace("w", self.callback4) self.tentry.focus_set() self.raiz.mainloop()
def rank(self, iterations=50, damping_factor=0.85, accurate=False): # pagerank = PageRankNumarray(list(self.__nodes), self.__edges) # self.__pagerank = pagerank.rank() use_native = len(self.__edges) > 0 pagerank = PageRank(list(self.__nodes), self.__edges, use_native, damping_factor) self.__pagerank = pagerank.ranking(-1, iterations)
def not_rank(rank): notr = [] for thing, float_rank in rank: notr.append((thing, 1-float_rank)) notr.reverse() return PageRank.normalize(notr)
def __init__(self): util.log("Loading data set...") self.newsgroup_data = fetch_20newsgroups(remove=('headers', 'footers')) # print(data_set.target.shape) # categories per document # print(data_set.filenames.shape) # filenames per document for doc in self.newsgroup_data.data: if len(doc) < 5: self.newsgroup_data.data.remove(doc) self.newsgroup_frame = pd.DataFrame.from_dict( {'text': self.newsgroup_data.data}) #f = self.newsgroup_frame.text.str.contains('National Rifle Association') #ids = np.arange(len(self.newsgroup_frame))[f] #self.list_docs(ids) #return self.tfidf_matrix = TfIdfMatrix.from_data_set(self.newsgroup_data.data) self.inverted_index = InvertedIndex.from_tf_idf_matrix( self.tfidf_matrix) util.log("Clustering...") self.kmeans = KMeans(tfidf=self.tfidf_matrix.get_matrix(), k=100, max_iterations=30, random_initial=False) try: self.kmeans.load_cluster_vector('cluster_vector.pkl') except FileNotFoundError: self.kmeans.do_magic() self.kmeans.store_cluster_vector('cluster_vector.pkl') util.log("Finished.") r = self.kmeans.vector.ravel() u = np.unique(self.kmeans.vector) print(u) try: self.adjacency_matrix = pkl.load(open('adjacency_matrix.pkl', "rb")) except FileNotFoundError: self.adjacency_matrix = AdjacencyMatrix.from_cluster_and_tf_idf_matrix( r, self.tfidf_matrix) with open('adjacency_matrix.pkl', 'wb') as f: pkl.dump(self.adjacency_matrix, f) try: pr = PageRank(pickle='pr.pkl') except FileNotFoundError: util.log("No precomputed PageRank...") util.log("Calculating PR...") pr = PageRank(adjacency_matrix=self.adjacency_matrix.get_matrix(), alpha=0.85, converge=0.00001) util.log("Finished PR") pr.store_rank_vector('pr.pkl') self.pr_vector = pr.get_pagerank(normalized=True)
def search_init(self): jieba.initialize() self.pagerank = PageRank()
def main(args): # Preprocessing Step data_dicts = preprocessing(transition_matrix_path=args.transition_matrix, doc_topics_path=args.document_topic, user_topic_path=args.user_topic_interest, query_topic_path=args.query_topic_relation, search_relevance_path=args.search_relevance) # GPR, PTSPR, QTSPR construction if args.pagerank == "gpr": pr = PageRank(trans_matrix=data_dicts['transition_matrix'], dampening_factor=args.dampening_factor) elif args.pagerank == "ptspr" or args.pagerank == "qtspr": pr = TopicSensitivePageRank( trans_matrix=data_dicts['transition_matrix'], topic_matrix=data_dicts['doc_topic_matrix'], dampening_factor=args.dampening_factor, topic_factor=args.topic_factor) pr_start = time.time() pr.converge() pr_end = time.time() print("Power iteration - {} required time: {:.3f}seconds".format( args.pagerank, pr_end - pr_start)) pr_result = [] for query_ID in data_dicts['search_relevance_score'].keys(): candidate_indices, retrieval_scores = data_dicts[ 'search_relevance_score'][query_ID] user_topic_prob = data_dicts['user_topic_probs'][query_ID] query_topic_prob = data_dicts['query_topic_probs'][query_ID] if args.pagerank == "gpr": pr_indices, pr_scores = pr.ranking(candidate_indices, retrieval_scores, criterion=args.criterion) elif args.pagerank == "ptspr": pr_indices, pr_scores = pr.ranking(candidate_indices, retrieval_scores, user_topic_prob, criterion=args.criterion) elif args.pagerank == "qtspr": pr_indices, pr_scores = pr.ranking(candidate_indices, retrieval_scores, query_topic_prob, criterion=args.criterion) for idx in range(len(candidate_indices)): # Print function temp = [[]] temp[0].append(query_ID) temp[0].append("Q0") temp[0].append(str(pr_indices[idx] + 1)) temp[0].append(str(idx + 1)) temp[0].append(str(pr_scores[idx])) temp[0].append(args.cfg) pr_str = " ".join(temp[0]) pr_result.append(pr_str) pr_result_text = "\n".join(pr_result) with open(args.pagerank + "_" + args.cfg + ".txt", "w") as f: f.write(pr_result_text) pr_end = time.time() print("total {} required time : {:.3f}seconds".format( args.pagerank, pr_end - pr_start)) print("===================== END =====================")
class SQLDB: def __init__(self, path, clean, commit_rate=50): self.db = None self.logger = logging.getLogger('SQLDB') if clean: if os.path.exists(path): os.remove(path) self.logger.info('Previous database has been deleted.') rebuild = not os.path.exists(path) try: self.db = sqlite3.connect(path) except: self.logger.error('Error while opening database.') exit(1) else: self.logger.info('Database opened successfully.') cursor = self.db.cursor() if rebuild: cursor.execute( 'CREATE TABLE pages (id int primary key, journal text, title text, content text, keys text)' ) cursor.execute('CREATE TABLE dicts (word text, id int)') self.index = 0 else: self.index = cursor.execute( 'SELECT max(id) from pages').fetchone()[0] self.logger.info('The size of database if {}'.format(self.index)) self.commit_rate = commit_rate self.commit_count = 0 def search_init(self): jieba.initialize() self.pagerank = PageRank() def commit(self, show=True): self.db.commit() if show: self.logger.info('Database commit finished.') def flush(self): self.db.commit() self.db.close() self.logger.info('Database flushed.') def save(self, journal, title, content, words, keys, commit=False): self.index += 1 cursor = self.db.cursor() cursor.execute("INSERT INTO pages VALUES (?, ?, ?, ?, ?)", [self.index, journal, title, content, "+".join(keys)]) for word in words: cursor.execute("INSERT INTO dicts VALUES (?, ?)", [word, self.index]) info = '{} (Journal {})'.format(title, journal) self.logger.info('Saved {}, index will be {}.'.format( info, self.index)) self.commit_count += 1 if commit or self.commit_count % self.commit_rate == 0: self.commit() def search(self, keyword, sort=True): # search keys = re.sub('[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*·():;【】“”]+', '', keyword) self.logger.info('Get request for {}.'.format(keys)) keys = list(jieba.cut_for_search(keys)) cursor = self.db.cursor() cursor.execute( 'SELECT * from dicts WHERE word in ({})'.format(', '.join( '?' for _ in keys)), keys) arts = set() for art in cursor.fetchall(): arts.add(art[1]) arts = list(arts) cursor.execute( 'SELECT * from pages WHERE id in ({})'.format(', '.join( '?' for _ in arts)), arts) arts = cursor.fetchall() results = self.pagerank.sort(keys, arts) for art in results: print('Title: {}'.format(art[2])) return # pagerank if sort: return self.pagerank.sort(keys, arts) return arts
class Aplicacion(): OptionList = [ "Todos los campos", "Título", "Abstract", "Palabras clave"] OptionMod = ["Vectorial", "Booleano"] OptionRank = ["No mostrar Ranking", "Mostrar Ranking"] def __init__(self): self.pagerank = PageRank() self.ranking = None self.res = None lista = list(self.pagerank.autores) lista.sort() self.AutorList = lista self.raiz = Tk() self.raiz.geometry('950x500') self.raiz.title('Buscador') # Celda donde se muestran los resultados. self.tinfo = scrolledtext.ScrolledText(self.raiz, width=50, height=30) self.tinfo.grid(column = 0, row = 6) # Celda donde se introduce la búsqueda. self.tentry = Entry(self.raiz, width=40) self.tentry.grid(column = 0, row = 5) # Botón de buscar. self.binfo = ttk.Button(self.raiz, text='Buscar', command=self.verinfo) self.binfo.grid(column = 1, row =5) # Botón de búsqueda personalizada. self.bper = ttk.Button(self.raiz, text='Búsqueda personalizada', command=self.verper) self.bper.grid(column = 1, row =3) # Botón de mostrar Ranking inicial. self.bpag = ttk.Button(self.raiz, text='Mostrar Ranking inicial', command=self.verpag) self.bpag.grid(column = 2, row =3) # Botón de salir. self.bsalir = ttk.Button(self.raiz, text='Salir', command=self.raiz.destroy) self.bsalir.grid(column = 2, row = 5) # Desplegable para elegir el sitio de búsqueda. self.variable = tk.StringVar(self.raiz) self.variable.set(self.OptionList[0]) opt = tk.OptionMenu(self.raiz, self.variable, *self.OptionList) opt.config(width=30, font=('Helvetica', 12)) opt.grid(column = 0, row = 1) # Desplegable para elegir el autor. self.variable2 = tk.StringVar(self.raiz) self.variable2.set(self.AutorList[0]) opt2 = ttk.Combobox(self.raiz, textvariable = self.variable2, values = self.AutorList) opt2.config(width=30, font=('Helvetica', 12)) opt2.grid(column = 0, row = 3) # Desplegable para elegir el modelo. self.variable3 = tk.StringVar(self.raiz) self.variable3.set(self.OptionMod[0]) opt3 = tk.OptionMenu(self.raiz, self.variable3, *self.OptionMod) opt3.config(width=30, font=('Helvetica', 12)) opt3.grid(column = 2, row = 1) # Desplegable para elegir si mostrar los pesos. self.variable4 = tk.StringVar(self.raiz) self.variable4.set(self.OptionRank[0]) opt3 = tk.OptionMenu(self.raiz, self.variable4, *self.OptionRank) opt3.config(width=30, font=('Helvetica', 12)) opt3.grid(column = 1, row = 1) # Muestra la opción del desplegable del sitio de búsqueda. self.labelTest = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest.grid(column = 0, row = 2) self.variable.trace("w", self.callback) # Muestra la opción del desplegable del autor. self.labelTest2 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest2.grid(column = 0, row = 4) self.variable2.trace("w", self.callback2) # Muestra la opción del desplegable del modelo. self.labelTest3 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest3.grid(column = 2, row = 2) self.variable3.trace("w", self.callback3) # Muestra la opción elegida de mostrar los pesos. self.labelTest4 = tk.Label(text="", font=('Helvetica', 12), fg='red') self.labelTest4.grid(column = 1, row = 2) self.variable4.trace("w", self.callback4) self.tentry.focus_set() self.raiz.mainloop() # Función que cambia el desplegable, cambia el sitio de búsqueda. def callback(self, *args): self.labelTest.configure(text="Has seleccionado {}".format(self.variable.get())) # Función que cambia el desplegable, cambia el autor. def callback2(self, *args): self.labelTest2.configure(text="Has seleccionado {}".format(self.variable2.get())) # Función que cambia el desplegable, cambia el modelo. def callback3(self, *args): self.labelTest3.configure(text="Has seleccionado {}".format(self.variable3.get())) # Función que cambia el desplegable, cambia la elección de mostrar los pesos. def callback4(self, *args): self.labelTest4.configure(text="Has seleccionado {}".format(self.variable4.get())) self.tinfo.delete("1.0", END) texto_info = "" if(not self.res): texto_info = "No hay articulos que coincidan con su búsqueda \n" else: if(isinstance(self.res, list)): if(self.variable4.get() == "No mostrar Ranking"): for r in self.res: texto_info += "- " + r.titulo + "\n" else: for i in range(len(self.res)): texto_info += "- " + self.res[i].titulo + " - "+ str(self.ranking[i]) +"\n" else: texto_info = self.res self.tinfo.insert("1.0", texto_info) # Función llamada por el boton "Buscar". Realiza la búsqueda de la consulta # introducida en el modelo fijado. def verinfo(self): self.tinfo.delete("1.0", END) palabra = self.tentry.get() if(self.variable3.get() == "Vectorial"): self.res, self.ranking = self.pagerank.busquedapersonalizada(palabra, self.variable2.get(), False) else: self.res, self.ranking = self.pagerank.filtrar(palabra, self.variable.get()) texto_info = "" if(not self.res): texto_info = "No hay articulos que coincidan con su búsqueda \n" else: if(isinstance(self.res, list)): if(self.variable4.get() == "No mostrar Ranking"): for r in self.res: texto_info += "- " + r.titulo + "\n" else: for i in range(len(self.res)): texto_info += "- " + self.res[i].titulo + " - "+ str(self.ranking[i]) +"\n" else: texto_info = self.res self.tinfo.insert("1.0", texto_info) # Función llamada por el boton "Búsqueda personalizada". Realiza la búsqueda # de la consulta introducida con el método de realimentación de consultas. def verper(self): self.tinfo.delete("1.0", END) palabra = self.tentry.get() self.res, self.ranking = self.pagerank.busquedapersonalizada(palabra, self.variable2.get(), True) texto_info = "" if(not self.res): texto_info = "No hay articulos que coincidan con su búsqueda \n" else: if(isinstance(self.res, list)): if(self.variable4.get() == "No mostrar Ranking"): for r in self.res: texto_info += "- " + r.titulo + "\n" else: for i in range(len(self.res)): texto_info += "- " + self.res[i].titulo + " - "+ str(self.ranking[i]) +"\n" else: texto_info = self.res self.tinfo.insert("1.0", texto_info) # Función llamada por el boton "Mostrar Ranking Inicial". Muestra # los documentos ordenados por el PageRank. def verpag(self): self.tinfo.delete("1.0", END) self.res, self.ranking = self.pagerank.ordenarresultados(self.pagerank.nodos.copy()) texto_info = "" if(not self.res): texto_info = "No hay articulos que coincidan con su búsqueda \n" else: if(isinstance(self.res, list)): if(self.variable4.get() == "No mostrar Ranking"): for r in self.res: texto_info += "- " + r.titulo + "\n" else: for i in range(len(self.res)): texto_info += "- " + self.res[i].titulo + " - "+ str(self.ranking[i]) +"\n" else: texto_info = self.res self.tinfo.insert("1.0", texto_info)
seCur = "d" indCur = -1 seSeed = seasons[0] for line in seedsF: gal = line.split("\t") if seCur not in gal[0]: if indCur != -1: seasons[indCur] = deepcopy(seSeed) seCur = gal[0] indCur += 1 seSeed = deepcopy(seasons[indCur]) te = gal[2][0:3] seSeed[te].seed = float(gal[1][1:3]) seasons[indCur] = deepcopy(seSeed) # j = seasons[0] pr = PageRank() # m = pr.rank(j) # new_dict = dict(zip(m.values(), m.keys())) # sorted_arr = sorted(new_dict.keys()) # for n in reversed(sorted_arr): # print names[new_dict[n]] + " " + str(n) mad = open("madresults.txt", "r") curseason = "0" curInd = -1 correct = 0 wrong = 0 correctAvg = 0.0 overCorrect = 0 wrongAvg = 0.0 lowestWrong = 0.0
def runFootball(): """run pagerank on NCAA_football.csv""" f = open('NCAA_football.csv') graph = PageRank() for line in f: columns = line.split(',') team1 = columns[0].strip() value1 = int(columns[1]) team2 = columns[2].strip() value2 = int(columns[3]) if value1 > value2: graph.addEdge(team2, team1) elif value1 < value2: graph.addEdge(team1, team2) else: graph.addEdge(team2, team1) graph.addEdge(team1, team2) graph.printGraph() iterations, ranks = graph.getPageRank() print "Number of iterations:", iterations print returnSorted(ranks)
def __init__(self): self.db = Datasource() self.pagerank = PageRank()
from pagerank import PageRank graph = {} graph["A"] = set(["B", "C", "E"]) graph["B"] = set(["C", "E"]) graph["C"] = set(["D"]) graph["D"] = set([]) graph["E"] = set([]) pr = PageRank(graph, .25, "test") print "Itterations" pr.runPageRankI(100) print "\nConverge" pr.runPageRankE(.000001)
from copy import deepcopy names = dict() e = open('teamname.txt', 'r') for name in e: sp = name.split('\t') names[sp[0]] = sp[1][0:len(sp[1])-1] f = open('2014se.txt', 'r') seasons = [] seas = dict() #team, score for line in f: gam = line.split('\t') if gam[2] in seas: team1 = seas[gam[2]] else: team1 = Team(names[gam[2]]) if gam[4] in seas: team2 = seas[gam[4]] else: team2 = Team(names[gam[4]]) team1.addGame(Game(gam[4], gam[3], gam[5])) team2.addGame(Game(gam[2], gam[5], gam[3])) seas[gam[2]] = deepcopy(team1) seas[gam[4]] = deepcopy(team2) seasons.append(deepcopy(seas)) j = seas pr = PageRank() m = pr.rank(j) for team in j.keys(): print names[team] + '\t' + str(m[team]*169.6 + j[team].getScores()[0]*0.0701)