Python TwoLevelCountDict.add Examples

Programming Language: Python

Namespace/Package Name: intent.utils.dicts

Method/Function: add

Examples at hotexamples.com: 3

Python TwoLevelCountDict.add - 3 examples found. These are the top rated real world Python examples of intent.utils.dicts.TwoLevelCountDict.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add(3)

keys(3)

sub_distribution(1)

top_n(1)

Example #1

Show file

File: mallet_maxent.py Project: rgeorgi/intent

    def info(self):
        """
        Print the feature statistics for the given model. (Assumes MaxEnt)
        """
        mallet = c['mallet']
        env = set_env_lang_utf8()
        info_bin = os.path.join(os.path.join(mallet, 'bin'), 'classifier2info')
        info_p = sub.Popen([info_bin, '--classifier', self._model],
                            stdout=sub.PIPE, stdin=sub.PIPE, stderr=sub.PIPE, env=env)

        cur_class = None
        feats = TwoLevelCountDict()

        # Go through and pick out what the features are for
        for line in info_p.stdout:
            content = line.decode(encoding='utf-8')

            class_change = re.search('FEATURES FOR CLASS (.*)', content)
            # Set the current class if the section changes
            if class_change:
                cur_class = class_change.group(1).strip()
                continue

            # Otherwise, let's catalog the features.
            word, prob = content.split()
            feats.add(cur_class, word, float(prob))

        # Now, print some info
        for cur_class in feats.keys():
            print(cur_class, end='\t')
            print('%s:%.4f' % ('<default>', feats[cur_class]['<default>']), end='\t')
            top_10 = feats.top_n(cur_class, n=10, key2_re='^nom')
            print('\t'.join(['%s:%.4f' % (w,p) for w,p in top_10]))

Example #2

Show file

File: getPrototypes.py Project: rgeorgi/intent

def get_prototypes(tagged_path, proto_out, delimeter, ignoretags=[], unambiguous=False, maxproto=0):

    encoding = getencoding(tagged_path)

    tagged_file = codecs.open(tagged_path, "r", encoding=encoding)

    tag_word_dict = TwoLevelCountDict()
    word_tag_dict = TwoLevelCountDict()

    proto_dict = defaultdict(set)

    for line in tagged_file:
        tokens = line.split()
        for token in tokens:
            word, pos = re.search("(^.*)%s(.*?)$" % delimeter, token).groups()
            if pos not in ignoretags:
                word = word.lower()
                tag_word_dict.add(pos, word)
                word_tag_dict.add(word, pos)

    numproto = 0
    # First, let's pick the maxproto most frequent words for a tag.
    for tag in tag_word_dict.keys():
        words = tag_word_dict[tag].most_frequent(minimum=1, num=None)
        found_words = 0
        for word in words:

            freq_tag = word_tag_dict[word].most_frequent(minimum=1)

            if freq_tag and freq_tag[0] == tag:
                # 			if freq_tag:

                proto_dict.add(freq_tag[0], word)
                numproto += 1
                found_words += 1

            if maxproto and found_words == maxproto:
                break

    print("%s Prototypes found." % numproto)

    # Now, set up the proto file for writing.
    proto_file = open(proto_out, "w")
    for tag in proto_dict:
        proto_file.write(tag)
        for word in proto_dict[tag]:
            proto_file.write("\t" + word.lower())  # LOWERCASE for testing
        proto_file.write("\n")
    proto_file.close()

Example #3

Show file

File: xigt_to_classifier.py Project: rgeorgi/intent

def _process_file(f):
    c = TwoLevelCountDict()
    d = TwoLevelCountDict()
    m = TwoLevelCountDict()

    print("Processing file {}".format(f))
    xc = xc_load(f)
    for inst in xc:
        LOG.info("Now on instance {}".format(inst.id))

        # Search for the gloss POS tier, if it exists.
        gpos = inst.find(alignment=GLOSS_WORD_ID, type=POS_TIER_TYPE)

        # If a gloss POS tier was found...
        if gpos:

            # Iterate through the projected tags.
            for gp in gpos:

                word = gp.igt.find(id=gp.attributes[ALIGNMENT])

                grams = tokenize_item(word, morpheme_tokenizer)

                # Add the (gram, POSTag) pair as something that was encountered.
                for gram in grams:
                    m.add(gram.content.lower(), gp.value())

                c.add(gp.value(), word.value().lower())
                d.add(word.value().lower(), gp.value())

    return (c, d, m)