Example #1
0
# -------------------------------------------------------------------------------
# Name: Retrieve

# -------------------------------------------------------------------------------

import os
import re
import string
from collections import defaultdict

from lex.oed.projects.thesaurus.classifier.pickler.sensemanager import PickleLoader
from lex.oed.projects.thesaurus.classifier.tracer import trace_class, trace_instance, trace_sense
from lex.oed.projects.thesaurus.classifier.config import ThesaurusConfig


config = ThesaurusConfig()
parent_directories = [config.get("paths", "iteration1_dir"), config.get("paths", "iteration2_dir")]
letters = string.ascii_uppercase


while 1:
    print """
===========================================================


Enter lemma (optionally followed by '-c' or '-u' to specify
    classified or unclassified):
"""
    lemma = raw_input(">>>")
    lemma = lemma.strip()
    if lemma.endswith(" -c"):
Example #2
0
from __future__ import division, print_function
from collections import defaultdict
import string
import numpy

from lex.oed.projects.thesaurus.classifier.config import ThesaurusConfig
from lex.oed.projects.thesaurus.classifier.bayes.bayesclassifier import BayesClassifier
from lex.oed.projects.thesaurus.classifier.compounds.bayescompounds import BayesCompounds

config = ThesaurusConfig()

#bayes = BayesClassifier(
#    resources_dir=config.get('paths', 'resources_dir'),
#)
bayes = BayesCompounds(
    resources_dir=config.get('paths', 'resources_dir'),
)


def spool():
    for letter in string.ascii_lowercase:
        bayes.load_results(letter)
        for s in bayes.results.values():
            s.recover_probabilities()
            #ad = s.average_delta(total_probability=.95)
            #if s.confidence() >= 7 and s.num_features() < 10:
            show_probabilities(s)

def find_word(word):
    initial = word.lower()[0]
    bayes.load_results(initial, 'bias_high')
Example #3
0
#-------------------------------------------------------------------------------
# Name: CheckLevels

#-------------------------------------------------------------------------------

import os
import re
import string
from collections import defaultdict

import lex.oed.thesaurus.thesaurusdb as tdb
from lex.oed.projects.thesaurus.classifier.pickler.sensemanager import PickleLoader
from lex.oed.projects.thesaurus.classifier.config import ThesaurusConfig


config = ThesaurusConfig()
training_dir = config.get('paths', 'classified_dir')
parent_directories=[
    config.get('paths', 'iteration1_dir'),
    config.get('paths', 'iteration2_dir'),
]


def count_training():
    counts = {i: 0 for i in range(17)}
    pl = PickleLoader(training_dir)
    for sense in pl.iterate():
        for n in sense.thesaurus_nodes:
            thesclass = tdb.get_thesclass(n)
            counts[thesclass.level] += 1
    for i in range(17):
from __future__ import division
import os
import numpy

from lex.oed.projects.thesaurus.classifier.config import ThesaurusConfig
from lex.oed.projects.thesaurus.classifier.bayes.classifiers_io import load_classifiers

config = ThesaurusConfig()
dir = os.path.join(config.get('paths', 'resources_dir'), 'bayes', 'classifiers')


def variation(scores):
    mean = numpy.mean(scores)
    max_deviation = max([abs(max(scores)-mean), abs(min(scores)-mean)])
    return max_deviation / mean

prior_probabilities, classifiers = load_classifiers(dir, mode='raw')
keywords = [(keyword, variation(scores.values())) for keyword, scores in
    classifiers.items() if keyword.startswith('T')]

# Look for the smallest deviation between average and max/min value
keywords.sort(key=lambda k: k[1])
for k in keywords[0:300]:
    print repr(k[0]), repr(k[1])