예제 #1
0
def makeWeights(options, X, Y, GX, GY):
    # hack to compute scores using normalized projections
    if options.normalize_projections == 1:
        X = common.normalize_rows(X, 2)  # note that normalize_rows works on arrays, not matrices.
        Y = common.normalize_rows(Y, 2)

    X = np.mat(X)
    Y = np.mat(Y)

    if GX is not None or GY is not None:
        GX = np.mat(GX)
        GY = np.mat(GY)
    Z = np.mat(0)
    if options.weight_type == 'inner':
        U = X*Y.T  # linear kernel
        if options.alpha > 0:  # TODO: add higher order graphs
            Z = GX * U * GY.T
            W = (1-options.alpha)*U + options.alpha*Z
        else:
            W = U
        W = np.max(W) - W
    elif options.weight_type == 'dist':
        U = common.dist(X, Y)
        if options.alpha > 0:  # TODO: add higher order graph
            Z = common.dist(GX * X, GY * Y)
            W = (1-options.alpha)*U + options.alpha*Z
        else:
            W = U
    elif options.weight_type == 'sqrdist':
        U = common.dist(X, Y, metric='sqeuclidean')
        if options.alpha > 0:  # TODO: add higher order graph
            Z = common.dist(GX * X, GY * Y, metric='sqeuclidean')
            W = (1-options.alpha)*U + options.alpha*Z
        else:
            W = U
    elif options.weight_type == 'graph_min_dist':
        U = common.dist(X, Y)
        GX = np.array(GX)
        GY = np.array(GY)
        if options.alpha > 0:  # TODO: add higher order graph
            Z, IX, IY = cy_getGraphMinDist(GX, GY, U)
        W = (1-options.alpha)*U + options.alpha*Z
    else:
        W = []

    saveWUZ(U, W, Z, options)
    #print 'norm(U) = ', np.linalg.norm(U, 2), '| norm(Z) = ', np.linalg.norm(Z, 2)
    return W, U, Z
예제 #2
0
    def setupFeatures(self, options=None):
        #logFr = np.log(X.freq)
        # L = strings.strlen(self.words)

        # normalize the features
        if self.isPickled():
            (orthoDD, orthoFeatures) = strings.to_ngram_dictionary(self.words, affix=True)
            if options.log_features == 1:
                self.orthoMSK = MSK(orthoDD, self.words, orthoFeatures).log(offset=1).normalize(norm='l2')
                self.contextMSK = MSK(self.repr, self.words, self.featureNames).log(offset=1).normalize(norm='l2')
            else:
                self.orthoMSK = MSK(orthoDD, self.words, orthoFeatures).normalize(norm='l2')
                self.contextMSK = MSK(self.repr, self.words, self.featureNames).normalize(norm='l2')
        else:
            self.features = common.normalize_rows(self.features)