Python sort Examples

Programming Language: Python

Namespace/Package Name: perm

Method/Function: sort

Examples at hotexamples.com: 2

Python sort - 2 examples found. These are the top rated real world Python examples of perm.sort extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: BilexiconUtil.py Project: vaswani/LEXICON_INDUCTION

def getScores(lex, source_words, target_words, weights):
    M = len(lex)
    # sort according to weights (increasing)
    (_, pi) = perm.sort(weights, reverse=False)
    source_words = source_words[pi]
    target_words = target_words[pi]
    N = len(source_words)
    assert N == len(target_words)
    C = np.zeros((N, 3))  # [1, exists in source, target matches]
    dict_keys = lex.keys()
    for i, (source_word) in enumerate(source_words):
        target_word = target_words[i]
        C[i, 0] = 1  # always 1
        if source_word in dict_keys:
            C[i, 1] = 1  # word exists as a source word
            #if target_word in lex[source_word]:
            if is_valid_match(lex, source_word, target_word):
                C[i, 2] = 1  # (source, target) words are correctly matched according to dict

    C = np.cumsum(C, 0)  # cumulative sum per column
    scores = common.Struct()
    scores.M = M
    scores.precision = C[:, 2] / C[:, 1]
    scores.recall = C[:, 2] / M
    scores.F1 = F1(scores.precision, scores.recall)

    return scores  # C should allow computing precision/recall/F1 for any cutoff value.

Example #2

Show file

File: mcca.py Project: vaswani/LEXICON_INDUCTION

def find_matching(options, wordsX, wordsY):
    # finds a permutation pi that best matches Y to X
    # The optimization procedure works as follows:
    # suppose there are 2000 words to be matched, 100 seed words and step size is 100
    # The seed is stored at the end (so, X[i, :] matches Y[i, :] for i > 2000] in all iterations
    # at each iteration t (starting at t=0):
    # 1. compute the CCA on the last 100 + 100*t entries
    # 2. compute the CCA representation of all words
    # 3. perform a matching on the first N=2000 words to get pi_t
    # 4. sort the first 2000 matches in descending order.

    # initially, assume that pi is ID
    N = len(wordsX.words)
    M = N - options.seed_length  # The first M entries can be permuted. The rest are fixed
    GX = None
    GY = None

    options.cca_weights = None
    sorted_edge_cost = None

    fixed_point = False
    for t in range(0, options.T):
        options.t = t
        Nt = M - options.step_size * t
        # STEP 0: when the feature dimension is high, ICD the seed and project the rest
        if wordsX.isPickled():
            wordsX.ICD_representation(Nt, options.eta)
            wordsY.ICD_representation(Nt, options.eta)

        # STEP 1: compute CCA model on the well matched portion of the matching (which includes the fixed seed)
        fixedX = wordsX.features[Nt:, :]
        fixedY = wordsY.features[Nt:, :]
        if options.useCCAWeights == 1 and sorted_edge_cost is not None:
            q = np.square(sorted_edge_cost[Nt:])
            bandwidth = np.median(q)
            options.cca_weights = np.exp(-q / (2 * bandwidth))  # exp is useful when dist is used
        # if options.noise_level > 0:
        #     fixedX += options.noise_level*common.randn(fixedX.shape)
        #     fixedY += options.noise_level*common.randn(fixedY.shape)

        print >> sys.stderr, colored("CCA dimensions =", "green"), len(fixedX)
        cca_model = CU.learn(fixedX, fixedY, options)
        print >> sys.stderr, len(cca_model.p), "Top 10 correlation coefficients:", cca_model.p[:10]
        # STEP 2: compute CCA representation of all samples
        print >> sys.stderr, "norms", norm(wordsX.features), norm(wordsY.features)
        Z = CU.project(options, cca_model, wordsX.features, wordsY.features)

        print >> sys.stderr, "Z", norm(Z.X), norm(Z.Y)

        # STEP 3: compute weight matrix and run matching (approximate) algorithm
        if options.alpha > 0:
            GX = wordsX.materializeGraph()
            GY = wordsY.materializeGraph()
        print >> sys.stderr, colored("Computing matching weight matrix.", "green")

        W, U0, Z0 = MU.makeWeights(options, Z.X, Z.Y, GX, GY)
        print >> sys.stderr, "Matching."
        (cost, pi_t, edge_cost) = MU.exactMatch(W[:M, :M])
        # STEP 4: sort the words, such that the best matches are at the end.
        # note that pi_t is of length M < N and that
        (sorted_edge_cost, I) = perm.sort(edge_cost, reverse=True)
        sorted_edge_cost = np.concatenate((sorted_edge_cost, np.zeros(N - M)))

        if perm.isID(pi_t):  # the best permutation is the identity
            fixed_point = True
        else:
            wordsX.permuteFirstWords(I)
            wordsY.permuteFirstWords(pi_t[I])
            # END OF ITERATION: output Matching
        print >> sys.stderr, "cost =", cost, "latent inner product = ", np.sum(Z.X.A * Z.Y.A)

        # MU.printMatching(wordsX.words[:M], wordsY.words[:M], sorted_edge_cost[:M], options.gold_lex)
        if options.gold_lex is not None:
            scores = BU.getScores(options.gold_lex, wordsX.words[:M], wordsY.words[:M], sorted_edge_cost[:M])
            BU.outputScores(scores, options.title)

        print "---------- ", "iteration = ", (t + 1), "/", options.T, "----------"
        sys.stdout.flush()
        if fixed_point:
            break

    # either we reached the maximum number of iterations, or a fixed point
    log(100, "Stopped after, ", (t + 1), "iterations. Fixed point =", fixed_point)
    IO.writeString(
        options.matchingFilename,
        MU.toString(wordsX.words[:M], wordsY.words[:M], sorted_edge_cost[:M], options.gold_lex),
    )
    if options.is_mock:
        log("Hamming distance:", perm.hamming(wordsX.words, wordsY.words))
    return wordsX, wordsY, sorted_edge_cost, cost