예제 #1
0
from matplotlib.cbook import mkdirs
from sklearn.linear_model.logistic import LogisticRegression

from src.submission import create_submission
from src.utils import data_path, setup
import pandas as pd
from src.transformers import normalize_features, transform_normalized_time
from src.OneHotTransformer import OneHotTransformer, categorical
from src.validation import cross_validation
from sklearn.multiclass import OneVsRestClassifier
from sklearn.externals import joblib
from sklearn.svm import LinearSVC


setup(pd)
# for now only dates, day of week, pd district, x, y


def transform_set(name, train=True):
    train_path = data_path(name)
    train_frame = pd.read_csv(train_path)
    if train:
        del train_frame['Descript']
        del train_frame['Resolution']
    del train_frame['Address']

    train_frame['X'] = normalize_features(train_frame['X'])
    train_frame['Y'] = normalize_features(train_frame['Y'])
    train_frame['Dates'] = train_frame['Dates'].apply(transform_normalized_time)
    transformer = OneHotTransformer(categorical(train_frame), train_frame.columns)
    transformer.fit(train_frame)
예제 #2
0
from matplotlib.cbook import mkdirs
from sklearn.linear_model.logistic import LogisticRegression

from src.submission import create_submission
from src.utils import data_path, setup
import pandas as pd
from src.transformers import normalize_features, transform_normalized_time
from src.OneHotTransformer import OneHotTransformer, categorical
from src.validation import cross_validation
from sklearn.multiclass import OneVsRestClassifier
from sklearn.externals import joblib
from sklearn.svm import LinearSVC

setup(pd)
# for now only dates, day of week, pd district, x, y


def transform_set(name, train=True):
    train_path = data_path(name)
    train_frame = pd.read_csv(train_path)
    if train:
        del train_frame['Descript']
        del train_frame['Resolution']
    del train_frame['Address']

    train_frame['X'] = normalize_features(train_frame['X'])
    train_frame['Y'] = normalize_features(train_frame['Y'])
    train_frame['Dates'] = train_frame['Dates'].apply(
        transform_normalized_time)
    transformer = OneHotTransformer(categorical(train_frame),
                                    train_frame.columns)
예제 #3
0
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

from src.utils import data_path, setup
import pandas as pd

setup()


train_path = data_path('train.csv')
train_frame = pd.read_csv(train_path)
train_frame['Descript'] = train_frame['Descript'].apply(lambda des: re.sub('[\(\),]', '', des))

text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ['clf', MultinomialNB()],
])

text_clf = text_clf.fit(train_frame['Descript'], train_frame["Category"])
print(train_frame.ix[0]['Descript'], train_frame.ix[0]['Category'])

prediction = text_clf.predict_proba(train_frame['Descript'])

print(prediction[0])
print(text_clf.classes_)

# TODO: validate