Example #1
0
def get_pmi_model(wc_dir, noun):
    pmi_model = features.PMINounModel()
    filename = pmi_model.lemma_to_filename(noun, wc_dir)
    try:
        os.stat(filename)
    except OSError:
        return None
    pmi_model._load_from_pmi_file(filename)
    return pmi_model
Example #2
0
def generate_cv_old_recursive(cv_entry, cv_entries_dict, input_dir,
                              output_dir):
    concept = cv_entry.concept

    # check if it exists on disk already
    pmi_model = features.PMINounModel()
    filename = pmi_model.lemma_to_filename(concept, output_dir)
    try:
        os.stat(filename)
        pmi_model._load_from_pmi_file(filename)
        return pmi_model
    except OSError:
        pass

    # since it doesn't exist on disk, compute it recursively
    wv_models = []

    for word in cv_entry.wv:
        word_pmi_model = get_pmi_model(input_dir, word)
        if word_pmi_model:
            wv_models.append(word_pmi_model)

    for word1, word2 in cv_entry.wvi:
        word1_pmi_model = get_pmi_model(input_dir, word1)
        word2_pmi_model = get_pmi_model(input_dir, word2)
        if word1_pmi_model and word2_pmi_model:
            wv_models.append(word1_pmi_model.intersection(word2_pmi_model))

    for parent, weight in cv_entry.p:
        parent_cv_entry = cv_entries_dict[parent]
        parent_pmi_model = generate_cv(parent_cv_entry, cv_entries_dict,
                                       input_dir, output_dir)
        parent_pmi_model.scale(weight)
        wv_models.append(parent_pmi_model)

    concept_pmi_model = features.PMINounModel()
    concept_pmi_model.noun = concept
    for model in wv_models:
        concept_pmi_model.union_max(model)

    concept_pmi_model.save_to_file(output_dir)
    return concept_pmi_model
Example #3
0
def get_pmi_model_OLD1(wc_dir, noun):
    if cached_pmi_models.has_key(noun):
        return cached_pmi_models[noun]
    else:
        pmi_model = features.PMINounModel()
        filename = pmi_model.lemma_to_filename(noun, wc_dir)
        try:
            os.stat(filename)
        except OSError:
            return None
        pmi_model._load_from_pmi_file(filename)
        cached_pmi_models[noun] = pmi_model
        return pmi_model
Example #4
0
def get_feature_set(source_dir):
    print 'Loading models...'
    pmi_models = []
    realcount = 0
    feature_set = set()

    for n, pmi_filename in enumerate(glob.glob('%s/*.pmi.bz2' % source_dir)):
        print 'Loading:', n, pmi_filename
        pmi_model = features.PMINounModel()
        pmi_model._load_from_pmi_file(pmi_filename)
        for section in pmi_model.sections:
            for feature in pmi_model.__dict__[section]:
                if (section, feature) not in feature_set:
                    feature_set.add((section, feature))
    return feature_set
Example #5
0
def generate_cv_entries(cv_entries_dict, input_dir, output_dir):
    nm = features.PMINounModel()
    for concept in cv_entries_dict:
        filename = nm.lemma_to_filename(concept, output_dir)
        concept_exists_already = False

        try:
            os.stat(filename)
            concept_exists_already = True
        except OSError:
            pass

        if not concept_exists_already:
            generate_cv(cv_entries_dict[concept], cv_entries_dict, input_dir,
                        output_dir)
Example #6
0
def make_db(source_dir, c, feature_map_toidx, concept_map_toidx):
    for n, pmi_filename in enumerate(glob.glob('%s/*.pmi.bz2' % source_dir)):
        print 'Loading:', n, pmi_filename
        pmi_model = features.PMINounModel()
        pmi_model._load_from_pmi_file(pmi_filename)
        if pmi_model.high_fcount < 40:
            continue
        concept = pmi_model.noun
        for section in pmi_model.sections:
            for feature in pmi_model.__dict__[section]:
                pmi = pmi_model.__dict__[section][feature]
                feature_str = featuremap.feature_to_str(feature)
                feature_idx = feature_map_toidx[(section, feature_str)]
                concept_idx = concept_map_toidx[concept]

                c.execute('insert into words values (?, ?, ?)',
                          (concept_idx, feature_idx, pmi))
Example #7
0
def load_concept_model(concept):
    try:
        model = features.PMINounModel(concept, base_dir)
        return model
    except IOError:
        return None
Example #8
0
def generalize_and_save_pmi(source_dir, dest_dir):
    for pmi_filename in glob.glob('%s/*.pmi' % source_dir):
        pmi_model = features.PMINounModel()
        pmi_model._load_from_pmi_file(pmi_filename)
        generalize_pmi_nounmodel(pmi_model)
        pmi_model.save_to_file(dest_dir)