Example #1
0
def classifier(df, vec, target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    cla = LogisticRegression()
    cla.fit(X, Y)
    print 'classifier training score', cla.score(X, Y)
    return cla
Example #2
0
def classifier(df,vec,target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    cla = LogisticRegression() 
    cla.fit(X,Y)
    print 'classifier training score',cla.score(X,Y)
    return cla
Example #3
0
def vectorizer(df):
    # 1M max_features should fit in memory,
    # OvA will be at max 184 classes,
    # so we can fit coef_ =  1M*184*8B ~ 1GB in memory easily
    vec = MarisaTfidfVectorizer(min_df=1,
                                stop_words=None,
                                max_features=1000000,
                                smooth_idf=True,
                                norm='l2',
                                sublinear_tf=True,
                                use_idf=True,
                                ngram_range=(1, 3))
    vec.fit(iterText(df))
    return vec
Example #4
0
def vectorizer(df):
    # 1M max_features should fit in memory, 
    # OvA will be at max 184 classes, 
    # so we can fit coef_ =  1M*184*8B ~ 1GB in memory easily
    vec = MarisaTfidfVectorizer(
        min_df = 1,
        stop_words = None,
        max_features=1000000,
        smooth_idf=True,
        norm='l2',
        sublinear_tf=True,
        use_idf=True,
        ngram_range=(1,3))
    vec.fit(iterText(df))
    return vec
Example #5
0
def log_proba(df, vec, cla):
    X = vec.transform(iterText(df))
    lp = cla.predict_log_proba(X)
    return lp
Example #6
0
def score(df,vec,cla,target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    sc = cla.score(X,Y)
    return sc
Example #7
0
def log_proba(df,vec,cla):
    X = vec.transform(iterText(df))
    lp = cla.predict_log_proba(X)
    return lp
Example #8
0
def score(df, vec, cla, target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    sc = cla.score(X, Y)
    return sc