Exemple #1
0
    def set_position(self, x, y):
        inp = [x, y]
        position = self.rect[:2]

        if self.grid():
            inp = mul_lists(inp, self.config.grid)
            position = mul_lists(position, self.config.grid)

        self.config.delta = sub_lists(inp, position)
Exemple #2
0
def call_rank(ranking_index, flattened, n_large, start_words=[], model=None):
    """ranking_index: 0 = TFIDF; 1 = C-value; 2 = C-value + Unigrams; 3 = TF"""
    ranking_fn = ranking_fns[ranking_index]
    ranking_fn_name = ranking_fn_names[ranking_index]
    set_status('ranking with %s' % ranking_fn_name, model=model)
    if debug:
        print 'ranking with %s' % ranking_fn_name
    scored_phrases, phrase_frequencies = ranking_fn(flattened)
    set_status('ordering', model=model)
    if debug:
        print 'ordering'
    ordered_phrases = sorted(scored_phrases.iteritems(),
                             key=lambda p: p[1],
                             reverse=True)
    #    ordered_fname ='../phrase_lists/%s.phrases' % ranking_index
    #    print 'writing ordered phrases to file %s' % ordered_fname
    #    with open(ordered_fname, 'w') as f:
    #        for o in ordered_phrases[:n_large]:
    #            f.write('%s\n' % str(o))
    if debug:
        print 'mapping'
    ranked_phrases = [p[0] for p in ordered_phrases]

    if debug:
        print 'trimming large'
    large_phrases = ranked_phrases[:n_large]

    if start_words:
        if debug:
            print 'looking for start words', start_words
        found_start_words = []
        for start_word in start_words:
            matches = (ranked_phrase for ranked_phrase in ranked_phrases
                       if start_word in sub_lists(ranked_phrase, proper=False))
            try:
                word = matches.next()
                if word not in large_phrases:
                    found_start_words.append(word)
            except StopIteration:
                if debug:
                    print 'start word %s not found' % start_word
            if debug:
                print 'found start words', found_start_words

        top_phrases = found_start_words + large_phrases
    else:
        top_phrases = large_phrases

    filtered_frequencies = dict(
        (phrase, freq) for (phrase, freq) in phrase_frequencies.items()
        if phrase in top_phrases)
    return top_phrases, filtered_frequencies, scored_phrases
Exemple #3
0
def cnc(phrase_lists,
        c_value_threshold=0,
        include_unigrams=False,
        weight_by_length=True):
    """given a list of phrases, run the cnc algorithm and return a dictionary of word, c-value (ranking) pairs"""
    frequency_dists_by_length = {}
    for phrase in phrase_lists:
        l = len(phrase)
        if l not in frequency_dists_by_length:
            frequency_dists_by_length[l] = FreqDist()
        frequency_dists_by_length[l].inc(tuple(phrase))

    # word -> C-value(word)
    phrase_scores = {}

    # word -> num occurrences(word)
    phrase_frequencies = FreqDist()

    # word -> (t(word), c(word))
    sub_phrase_scores = {}

    # traverse from longest phrases to shortest
    for length, frequency_dist in sorted(frequency_dists_by_length.items(), \
                                         key=lambda pair: pair[0], reverse=True):
        # update global frequency counts with all counts of this length
        phrase_frequencies.update(frequency_dist)
        # within each phrase length, traverse from most common phrases to least
        for phrase, frequency in frequency_dist.iteritems():
            if phrase in sub_phrase_scores:
                t, c = sub_phrase_scores[phrase]
                subtractive = 1.0 / c * t
            else:
                subtractive = 0
            if weight_by_length:
                if include_unigrams:
                    weight = log(length + 1, 2)
                else:
                    weight = log(length, 2)
            else:
                weight = 1
            c_value = weight * (frequency - subtractive)
            if c_value >= c_value_threshold:
                phrase_scores[phrase] = c_value
                for sub_phrase in utils.sub_lists(phrase):
                    if sub_phrase in sub_phrase_scores:
                        t, c = sub_phrase_scores[sub_phrase]
                    else:
                        t, c = 0, 0
                    sub_phrase_scores[sub_phrase] = t + frequency, c + 1
    return phrase_scores, phrase_frequencies
Exemple #4
0
def cnc(phrase_lists, c_value_threshold=0, include_unigrams=False, weight_by_length=True):
    """given a list of phrases, run the cnc algorithm and return a dictionary of word, c-value (ranking) pairs"""
    frequency_dists_by_length = {}
    for phrase in phrase_lists:
        l = len(phrase)
        if l not in frequency_dists_by_length:
            frequency_dists_by_length[l] = FreqDist()
        frequency_dists_by_length[l].inc(tuple(phrase))

    # word -> C-value(word)
    phrase_scores = {}

    # word -> num occurrences(word)
    phrase_frequencies = FreqDist()

    # word -> (t(word), c(word))
    sub_phrase_scores = {}

    # traverse from longest phrases to shortest
    for length, frequency_dist in sorted(frequency_dists_by_length.items(), key=lambda pair: pair[0], reverse=True):
        # update global frequency counts with all counts of this length
        phrase_frequencies.update(frequency_dist)
        # within each phrase length, traverse from most common phrases to least
        for phrase, frequency in frequency_dist.iteritems():
            if phrase in sub_phrase_scores:
                t, c = sub_phrase_scores[phrase]
                subtractive = 1.0 / c * t
            else:
                subtractive = 0
            if weight_by_length:
                if include_unigrams:
                    weight = log(length + 1, 2)
                else:
                    weight = log(length, 2)
            else:
                weight = 1
            c_value = weight * (frequency - subtractive)
            if c_value >= c_value_threshold:
                phrase_scores[phrase] = c_value
                for sub_phrase in utils.sub_lists(phrase):
                    if sub_phrase in sub_phrase_scores:
                        t, c = sub_phrase_scores[sub_phrase]
                    else:
                        t, c = 0, 0
                    sub_phrase_scores[sub_phrase] = t + frequency, c + 1
    return phrase_scores, phrase_frequencies
Exemple #5
0
def call_rank(ranking_index, flattened, n_large, start_words=[], model=None):
    """ranking_index: 0 = TFIDF; 1 = C-value; 2 = C-value + Unigrams; 3 = TF"""
    ranking_fn = ranking_fns[ranking_index]
    ranking_fn_name = ranking_fn_names[ranking_index]
    set_status('ranking with %s' % ranking_fn_name, model=model)
    if debug:
        print 'ranking with %s' % ranking_fn_name
    scored_phrases, phrase_frequencies = ranking_fn(flattened)
    set_status('ordering', model=model)
    if debug:
        print 'ordering'
    ordered_phrases = sorted(scored_phrases.iteritems(),
                             key=lambda p: p[1], reverse=True)
#    ordered_fname ='../phrase_lists/%s.phrases' % ranking_index
#    print 'writing ordered phrases to file %s' % ordered_fname
#    with open(ordered_fname, 'w') as f:
#        for o in ordered_phrases[:n_large]:
#            f.write('%s\n' % str(o))
    if debug:
        print 'mapping'
    ranked_phrases = [p[0] for p in ordered_phrases]

    if debug:
        print 'trimming large'
    large_phrases = ranked_phrases[:n_large]

    if start_words:
        if debug:
            print 'looking for start words', start_words
        found_start_words = []
        for start_word in start_words:
            matches = (ranked_phrase for ranked_phrase in ranked_phrases if start_word in sub_lists(ranked_phrase, proper=False))
            try:
                word = matches.next()
                if word not in large_phrases:
                    found_start_words.append(word)
            except StopIteration:
                if debug:
                    print 'start word %s not found' % start_word
            if debug:
                print 'found start words', found_start_words

        top_phrases = found_start_words + large_phrases
    else:
        top_phrases = large_phrases

    filtered_frequencies = dict((phrase, freq) for (phrase, freq) in phrase_frequencies.items() if phrase in top_phrases)
    return top_phrases, filtered_frequencies, scored_phrases