Python WordCounter Examples

Programming Language: Python

Namespace/Package Name: pylib.pruning

Method/Function: WordCounter

Examples at hotexamples.com: 3

Python WordCounter - 3 examples found. These are the top rated real world Python examples of pylib.pruning.WordCounter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def onego_main():
    # Test on toyset.
    preproc.subset("../raw_data/train.csv", "../data/train.csv", 1, 200000)

    # Load toyset .csv -> X & Y
    X, Y = preproc.extract_XY("../data/train.csv")

    # Prune corpora
    label_counter = pruning.LabelCounter(Y)
    word_counter = pruning.WordCounter(X)
    label_counter.prune(no_below=2, no_above=1.0, max_n=None)
    word_counter.prune(no_below=2, no_above=0.4, max_n=None)  # assume balanced
    pruning.prune_corpora(X, Y, label_counter, word_counter)
    del word_counter  # free up memory

    # Transform X to tf-idf
    bin_word_counter = pruning.WordCounter(X, binary=True)
    similarity.transform_tfidf(X, bin_word_counter)
    del bin_word_counter  # free up memory

    # Load hierarchy (parents & children indices)
    parents_index = preproc.extract_parents(Y, "../raw_data/hierarchy.txt")
    children_index = preproc.inverse_index(parents_index)

    # CV-split X & Y (using default params)
    v_X, v_Y, t_X, t_Y = cv.prop_sample_CV(X=X, Y=Y)
    del X, Y  # free up memory

    # Obtain k-NN scores & pscores, predict, and calculate F1!
    k = 70
    w1, w2, w3, w4 = 3.4, 0.6, 0.8, 0.2
    alpha = 0.9
    cat_pns = evaluation.CategoryPNCounter()
    for d_i, labels_i in izip(v_X, v_Y):
        scores, pscores = similarity.cossim(d_i, t_X, k, t_Y, parents_index,
                                            children_index)
        ranks = similarity.optimized_ranks(scores, pscores, label_counter, w1,
                                           w2, w3, w4)
        predicted_labels = similarity.predict(ranks, alpha)
        cat_pns.fill_pns(predicted_labels, labels_i)
    cat_pns.calculate_cat_pr()
    MaF = cat_pns.calculate_MaF()

    print "MaF:", MaF

Example #2

Show file

def stage2():
    with open("../working/tX.dat", 'rb') as picklefile:
        X = cPickle.load(picklefile)

    # Transform X to tf-idf
    bin_word_counter = pruning.WordCounter(X, binary=True)
    similarity.transform_tfidf(X, bin_word_counter)
    del bin_word_counter
    ##Save state
    with open("../working/tX.dat", 'wb') as picklefile:
        cPickle.dump(X, picklefile, -1)

Example #3

Show file

def stage1():
    # Load toyset .csv -> X & Y
    X, Y = preproc.extract_XY("../data/train.csv")
    # Prune corpora
    label_counter = pruning.LabelCounter(Y)
    word_counter = pruning.WordCounter(X)
    label_counter.prune(no_below=2, no_above=1.0, max_n=None)
    word_counter.prune(no_below=2, no_above=0.4, max_n=None)  # assume balanced
    pruning.prune_corpora(X, Y, label_counter, word_counter)
    del word_counter  # free up memory
    ##Save state
    with open("../working/X.dat", 'wb') as picklefile:
        cPickle.dump(X, picklefile, -1)
    with open("../working/Y.dat", 'wb') as picklefile:
        cPickle.dump(Y, picklefile, -1)