Beispiel #1
0
def search(new_grid, Cs):
    grid = {
        'segment': ['gender_female'],
        'files_n': [5],
        'lower_pop_bound': [3],
        'ngram_ns': [[1, 2]],
        'min_word_length': [2],
        'word_type': ['alphanumeric'],
        'use_urls': [True],
        'min_url_depth': [1]
    }
    for k, v in new_grid.iteritems():
        grid[k] = v

    res = []
    for p in params(grid):
        X, y, X_test, y_test, features_map = \
            constr(segment=p['segment'], files_n=p['files_n'],
                   lower_pop_bound=p['lower_pop_bound'],
                   ngram_ns=p['ngram_ns'], min_word_length=p['min_word_length'], word_type=p['word_type'],
                   use_urls=p['use_urls'], min_url_depth=p['min_url_depth'])
        sc = set()
        for C in Cs:
            sc.add((C, C_score(C, X, y, X_test, y_test)))
        res.append((p, X.shape, sc))

    print res

    return res
Beispiel #2
0
def search(new_grid, Cs):
    grid = {'segment' : ['gender_female'], 'files_n' : [5],
               'lower_pop_bound' : [3],
               'ngram_ns' : [[1, 2]], 'min_word_length' : [2], 'word_type' : ['alphanumeric'],
               'use_urls' : [True], 'min_url_depth' : [1]}
    for k, v in new_grid.iteritems():
        grid[k] = v
    
    res = []
    for p in params(grid):
        X, y, X_test, y_test, features_map = \
            constr(segment=p['segment'], files_n=p['files_n'],
                   lower_pop_bound=p['lower_pop_bound'],
                   ngram_ns=p['ngram_ns'], min_word_length=p['min_word_length'], word_type=p['word_type'],
                   use_urls=p['use_urls'], min_url_depth=p['min_url_depth'])
        sc = set()
        for C in Cs:
            sc.add((C, C_score(C, X, y, X_test, y_test)))
        res.append((p, X.shape, sc))
    
    print res
    
    return res
Beispiel #3
0
import cPickle as pickle
from constr import constr

X, y, X_test, y_test, features_map = constr(segment='gender_female', files_n=47,
                                            lower_pop_bound=2,
                                            ngram_ns=[1, 2], min_word_length=1, word_type='alphanumeric',
                                            use_urls=True, min_url_depth=1)

pickle.dump((X, y, X_test, y_test, features_map), open('prepared/gender_female_allegedly_best.pickle', 'wb'), -1)