Python normalize_file Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: normalize_file

Examples at hotexamples.com: 3

Python normalize_file - 3 examples found. These are the top rated real world Python examples of utils.normalize_file extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: training.py Project: KenHollandWHY/kaggle

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from os.path import basename

"""
import os
os.chdir('C:/Users/ngaude/Documents/GitHub/kaggle/cdiscount/')
"""

########################
# Normalisation
########################

"""
normalize_file(ddir + 'test.csv',header(test=True))
normalize_file(ddir + 'validation.csv',header())
normalize_file(ddir + 'training_shuffled.csv',header())
"""

def score(df,vec,cla,target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    sc = cla.score(X,Y)
    return sc

def vectorizer(df):
    # 1M max_features should fit in memory, 
    # OvA will be at max 184 classes, 
    # so we can fit coef_ =  1M*184*8B ~ 1GB in memory easily

Example #2

Show file

from utils import ddir,normalize_file

normalize_file(ddir + 'test.csv',header(test=True))
normalize_file(ddir + 'training_shuffled.csv',header())

Example #3

Show file

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from os.path import basename
"""
import os
os.chdir('C:/Users/ngaude/Documents/GitHub/kaggle/cdiscount/')
"""

########################
# Normalisation
########################
"""
normalize_file(ddir + 'test.csv',header(test=True))
normalize_file(ddir + 'validation.csv',header())
normalize_file(ddir + 'training_shuffled.csv',header())
"""


def score(df, vec, cla, target):
    X = vec.transform(iterText(df))
    Y = list(df[target])
    sc = cla.score(X, Y)
    return sc


def vectorizer(df):
    # 1M max_features should fit in memory,