Python TwoLevelCountDict.keysの例

プログラミング言語: Python

名前空間/パッケージ名: intent.utils.dicts

クラス/型: TwoLevelCountDict

メソッド/関数: keys

hotexamples.comのコード掲載数: 3

Python TwoLevelCountDict.keys - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのintent.utils.dicts.TwoLevelCountDict.keysの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

add(3)

keys(3)

sub_distribution(1)

top_n(1)

よく使われるメソッド

add (3)

keys (3)

sub_distribution (1)

top_n (1)

コード例 #1

ファイルを表示

ファイル: conll.py プロジェクト: rgeorgi/intent

class ConllEval(object):
    def __init__(self):
        self.dep_acc_by_pos = TwoLevelCountDict()
        self.head_acc_by_pos = TwoLevelCountDict()

        self.long_sent_stats = CountDict()
        self.short_sent_stats = CountDict()

        self.fields = ["pos_acc", "ul_acc", "l_acc"]

    def add(self, k, sent):
        self.long_sent_stats.add(k)
        if len(sent) < 10:
            self.short_sent_stats.add(k)

    def pos_stats(self):

        for pos in sorted(set(self.dep_acc_by_pos.keys()).union(set(self.head_acc_by_pos.keys()))):
            print(
                ",".join(
                    [
                        pos,
                        str(self.dep_acc_by_pos.sub_distribution(pos).get(True, 0.0)),
                        str(self.head_acc_by_pos.sub_distribution(pos).get(True, 0.0)),
                    ]
                )
            )

    def acc(self, d, k):
        return d[k] / d["words"] * 100

    def long_stats(self):
        return [self.acc(self.long_sent_stats, k) for k in self.fields]

    def short_stats(self):
        return [self.acc(self.short_sent_stats, k) for k in self.fields]

    def short_ul(self):
        return self.acc(self.short_sent_stats, "ul_acc")

    def short_ul_count(self):
        return self.short_sent_stats.get("ul_acc", 0)

    def short_words(self):
        return self.short_sent_stats.get("words", 0)

    def long_ul(self):
        return self.acc(self.long_sent_stats, "ul_acc")

    def long_ul_count(self):
        return self.long_sent_stats.get("ul_acc", 0)

    def long_words(self):
        return self.long_sent_stats.get("words", 0)

コード例 #2

ファイルを表示

ファイル: mallet_maxent.py プロジェクト: rgeorgi/intent

    def info(self):
        """
        Print the feature statistics for the given model. (Assumes MaxEnt)
        """
        mallet = c['mallet']
        env = set_env_lang_utf8()
        info_bin = os.path.join(os.path.join(mallet, 'bin'), 'classifier2info')
        info_p = sub.Popen([info_bin, '--classifier', self._model],
                            stdout=sub.PIPE, stdin=sub.PIPE, stderr=sub.PIPE, env=env)

        cur_class = None
        feats = TwoLevelCountDict()

        # Go through and pick out what the features are for
        for line in info_p.stdout:
            content = line.decode(encoding='utf-8')

            class_change = re.search('FEATURES FOR CLASS (.*)', content)
            # Set the current class if the section changes
            if class_change:
                cur_class = class_change.group(1).strip()
                continue

            # Otherwise, let's catalog the features.
            word, prob = content.split()
            feats.add(cur_class, word, float(prob))

        # Now, print some info
        for cur_class in feats.keys():
            print(cur_class, end='\t')
            print('%s:%.4f' % ('<default>', feats[cur_class]['<default>']), end='\t')
            top_10 = feats.top_n(cur_class, n=10, key2_re='^nom')
            print('\t'.join(['%s:%.4f' % (w,p) for w,p in top_10]))

コード例 #3

ファイルを表示

ファイル: getPrototypes.py プロジェクト: rgeorgi/intent

def get_prototypes(tagged_path, proto_out, delimeter, ignoretags=[], unambiguous=False, maxproto=0):

    encoding = getencoding(tagged_path)

    tagged_file = codecs.open(tagged_path, "r", encoding=encoding)

    tag_word_dict = TwoLevelCountDict()
    word_tag_dict = TwoLevelCountDict()

    proto_dict = defaultdict(set)

    for line in tagged_file:
        tokens = line.split()
        for token in tokens:
            word, pos = re.search("(^.*)%s(.*?)$" % delimeter, token).groups()
            if pos not in ignoretags:
                word = word.lower()
                tag_word_dict.add(pos, word)
                word_tag_dict.add(word, pos)

    numproto = 0
    # First, let's pick the maxproto most frequent words for a tag.
    for tag in tag_word_dict.keys():
        words = tag_word_dict[tag].most_frequent(minimum=1, num=None)
        found_words = 0
        for word in words:

            freq_tag = word_tag_dict[word].most_frequent(minimum=1)

            if freq_tag and freq_tag[0] == tag:
                # 			if freq_tag:

                proto_dict.add(freq_tag[0], word)
                numproto += 1
                found_words += 1

            if maxproto and found_words == maxproto:
                break

    print("%s Prototypes found." % numproto)

    # Now, set up the proto file for writing.
    proto_file = open(proto_out, "w")
    for tag in proto_dict:
        proto_file.write(tag)
        for word in proto_dict[tag]:
            proto_file.write("\t" + word.lower())  # LOWERCASE for testing
        proto_file.write("\n")
    proto_file.close()