def wikigoogle(w, k, p): ''' Input: - w: a word - k: number of results - p: pagerank eigenvector Output: - the list of the names of the kth heighest-pagerank Wikipedia articles containing the word w ''' related = pagerank.find_word(w) related.sort(key=lambda x: p[x], reverse=True) return related[:k]
def power_method_biased(A1, i, r): ''' Input: - A1: a matrix, as in power_method - i: number of iterations - r: bias label Output: - Approximate eigenvector of .55A_1 + 0.15A_2 + 0.3A_r ''' biased_articles = pagerank.find_word(r) A_r = Mat(A1.D, {(r,c): 1 for c in biased_articles for r in A1.D[0] if A1[r,c] > 0}) A = .55 * A1 + .15 * Mat(A1.D, {(r,c): 1/len(A1.D[0]) for r in A1.D[0] for c in A1.D[1]}) + .30 * A_r return power_method(A, i)
def power_method_biased(A1, i, r): ''' Input: - A1: a matrix, as in power_method - i: number of iterations - r: bias label Output: - Approximate eigenvector of .55A_1 + 0.15A_2 + 0.3A_r ''' list_r = pagerank.find_word(r) list_links2r = [c for r in list_r for c in A1.D[0] if A1[r,c] > 0] A_r = make_Markov(Mat(A1.D,{(r,c): 1 for r in list_r for c in list_links2r})) A = 0.55*A1 + 0.15A2 + 0.3*A_r return power_method(A,i)
for _ in range(i): v1 = (A1 * v) v2 = (Vec(A1.D[1], {c: 1.0 / len(A1.D[1]) for c in A1.D[1]}) * v) * Vec( v.D, {c: 1 for c in v.D}) v = 0.85 * v1 + 0.15 * v2 v = normalized(v) print("Power_method, v['sport']: ", v['sport']) return v ## 4: (Task 12.12.4) Jordan reload(pagerank) A1 = pagerank.read_data() number_of_docs_with_jordan = len(pagerank.find_word('jordan')) print("Num jordan docs: ", number_of_docs_with_jordan) ## 5: (Task 12.12.5) Wikigoogle def wikigoogle(w, k, p): ''' Input: - w: a word - k: number of results - p: pagerank eigenvector Output: - the list of the names of the kth heighest-pagerank Wikipedia articles containing the word w ''' related = pagerank.find_word(w)
def wikigoogle(w, k, p): related = pagerank.find_word(w) related.sort(key=lambda x:p[x], reverse=True) return related[:k]