Ejemplo n.º 1
0
def wikigoogle(w, k, p):
    '''
    Input:
        - w: a word
        - k: number of results
        - p: pagerank eigenvector
    Output:
        - the list of the names of the kth heighest-pagerank Wikipedia
          articles containing the word w
    '''
    related = pagerank.find_word(w)
    related.sort(key=lambda x: p[x], reverse=True)
    return related[:k]
Ejemplo n.º 2
0
def power_method_biased(A1, i, r):
    '''
    Input:
        - A1: a matrix, as in power_method
        - i: number of iterations
        - r: bias label
    Output:
        - Approximate eigenvector of .55A_1 + 0.15A_2 + 0.3A_r
    '''
    biased_articles = pagerank.find_word(r)
    A_r = Mat(A1.D, {(r,c): 1 for c in biased_articles for r in A1.D[0] if A1[r,c] > 0})
    A   = .55 * A1 + .15 * Mat(A1.D, {(r,c): 1/len(A1.D[0]) for r in A1.D[0] for c in A1.D[1]}) + .30 * A_r
    return power_method(A, i)
Ejemplo n.º 3
0
def power_method_biased(A1, i, r):
    '''
    Input:
        - A1: a matrix, as in power_method
        - i: number of iterations
        - r: bias label
    Output:
        - Approximate eigenvector of .55A_1 + 0.15A_2 + 0.3A_r
    '''
    list_r = pagerank.find_word(r)
    list_links2r = [c for r in list_r for c in A1.D[0] if A1[r,c] > 0]
    A_r = make_Markov(Mat(A1.D,{(r,c): 1 for r in list_r for c in list_links2r}))
    A = 0.55*A1 + 0.15A2 + 0.3*A_r
    return power_method(A,i)
Ejemplo n.º 4
0
    for _ in range(i):
        v1 = (A1 * v)
        v2 = (Vec(A1.D[1], {c: 1.0 / len(A1.D[1])
                            for c in A1.D[1]}) * v) * Vec(
                                v.D, {c: 1
                                      for c in v.D})
        v = 0.85 * v1 + 0.15 * v2
        v = normalized(v)
        print("Power_method, v['sport']: ", v['sport'])
    return v


## 4: (Task 12.12.4) Jordan
reload(pagerank)
A1 = pagerank.read_data()
number_of_docs_with_jordan = len(pagerank.find_word('jordan'))
print("Num jordan docs: ", number_of_docs_with_jordan)


## 5: (Task 12.12.5) Wikigoogle
def wikigoogle(w, k, p):
    '''
    Input:
        - w: a word
        - k: number of results
        - p: pagerank eigenvector
    Output:
        - the list of the names of the kth heighest-pagerank Wikipedia
          articles containing the word w
    '''
    related = pagerank.find_word(w)
Ejemplo n.º 5
0
def wikigoogle(w, k, p):
  related = pagerank.find_word(w)
  related.sort(key=lambda x:p[x], reverse=True)
  return related[:k]