Ejemplo n.º 1
0
def findAnchors(Q, K, params, candidates=None):
    eps = params['eps']
    # Random number generator for generating dimension reduction
    if params['seed'] > 0:
        prng_W = RandomState(params['seed'])
    else:
        prng_W = RandomState(None)
    #checkpoint_prefix = params['checkpoint_prefix']
    new_dim = params['new_dim']
    
    if candidates == None:
        candidates = np.arange(Q.shape[0])

    # row normalize Q
    row_sums = Q.sum(1)
    row_sums[row_sums < eps] = eps
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :]/float(row_sums[i])

    # Reduced dimension random projection method for recovering anchor words
    Q_red = rp.Random_Projection(Q.T, new_dim, prng_W)
    Q_red = Q_red.T
    (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates)

    # restore the original Q
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :]*float(row_sums[i])

    return anchor_indices
Ejemplo n.º 2
0
def find_anchors(Q, K, candidates, dim, seed):
    # Random number generator for generating dimension reduction
    prng_W = np.random.RandomState(seed)

    # row normalize Q
    row_sums = Q.sum(1)
    for i in range(len(Q[:, 0])):
        Q[i, :] = Q[i, :] / float(row_sums[i])

    # Reduced dimension random projection method for recovering anchor words
    Q_red = rp.Random_Projection(Q.T, dim, prng_W)
    Q_red = Q_red.T
    (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates)

    # restore the original Q
    for i in range(len(Q[:, 0])):
        Q[i, :] = Q[i, :] * float(row_sums[i])

    return anchor_indices
Ejemplo n.º 3
0
def findAnchors(Q, K, params, candidates):
    # Random number generator for generating dimension reduction
    prng_W = RandomState(params.seed)
    checkpoint_prefix = params.checkpoint_prefix
    new_dim = params.new_dim

    # row normalize Q
    row_sums = Q.sum(1)
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :] / float(row_sums[i])

    # Reduced dimension random projection method for recovering anchor words
    Q_red = rp.Random_Projection(Q.T, new_dim, prng_W)
    Q_red = Q_red.T
    (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates)

    # restore the original Q
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :] * float(row_sums[i])

    return anchor_indices
Ejemplo n.º 4
0
def findAnchors(Q, K, params, candidates):

    # row normalize Q
    row_sums = Q.sum(axis=1)
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :]/float(row_sums[i] + 1e-100)    

    # Reduced dimension random projection method for recovering anchor words
    if params.lowerDim is None  or params.lowerDim >= Q.shape[1]:
      Q_red = Q.copy()
    else:
      # Random number generator for generating dimension reduction
      prng_W = RandomState(params.seed)
      Q_red = rp.Random_Projection(Q.T, params.lowerDim, prng_W)
      Q_red = Q_red.T
    (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates)

    # restore the original Q
    for i in xrange(len(Q[:, 0])):
        Q[i, :] = Q[i, :]*float(row_sums[i])

    return anchor_indices
    R = rp.Random_Matrix(V, params.new_dim, prng)

    #only accept anchors that appear in a significant number of docs
    print "identifying candidate anchors"
    candidate_anchors = []
    for i in xrange(V):
        if len(np.nonzero(row_M[i, :])[1]) > params.anchor_thresh:
            candidate_anchors.append(i)
    print len(candidate_anchors), "candidates"

    Q = np.vstack(
        generate_Q_matrix(row_M,
                          col_M,
                          row_normalize=True,
                          projection_matrix=R.T))  #row-by-row generation
    _, anchors = gs.Projection_Find(Q, K, candidate_anchors)
    print "anchors are:", anchors
    anchor_file = file(outfile + '.anchors', 'w')
    print >> anchor_file, "\t".join(["topic id", "word id", "word"])
    for i, a in enumerate(anchors):
        print i, vocab[a]
        print >> anchor_file, "\t".join([str(x) for x in (i, a, vocab[a])])

    anchor_file.close()

    #recover topics
    row_sums = np.array(row_M.sum(1)).reshape(V)
    #generate Q_matrix rows for anchors
    Q_A = np.vstack(
        generate_Q_matrix(row_M,
                          col_M,