Beispiel #1
0
def infer(img_list,model_path,flag):
    model = Net(3,10)
    para = sum([np.prod(list(p.size())) for p in model.parameters()])
    print('Model {} : params: {:4f}M'.format(model._get_name(), para * 4 / 1000 / 1000))
    state_dict = torch.load(model_path,map_location='cpu')
    model.load_state_dict(state_dict['model_state_dict'])
    idx_to_class = state_dict['idx_to_class']
    model = model.cuda()#.to('cuda:1')
    model.eval()
    modules_for_plot = (Basic_Conv,torch.nn.AdaptiveAvgPool2d)
    if flag:
       for name, module in model.named_modules():
           #pdb.set_trace()
           if isinstance(module, modules_for_plot):
              module.register_forward_hook(hook_func)
    with torch.no_grad():
        for img_name in img_list:
            global IMG_NAME
            IMG_NAME = img_name
            img = Image.open(img_name)
            img = transform(img)[None,:,:,:].cuda()#.to('cuda:1')
            class_idx = model(img)
            pred = torch.argmax(class_idx,1).item()
            class_name = idx_to_class[pred]
            print('file:{},label:{},pred:{}'.format(img_name,img_name.split('/')[-2],class_name))
Beispiel #2
0
def generic_translate_python(src, lib, lang_import, highlight,
                             pre_code='', post_code=''):
    ''' Translate Python code into Javascript and execute

        src: source code in editor
        lib: string - language specific lib (e.g. "library" in English, "biblio" in French)
             already imported in html file
        lang_import: something like "from reeborg_en import *"
    '''
    if lib in sys.modules:
        del sys.modules[lib]

    globals_ = {}
    globals_.update(globals())
    globals_['dir_py'] = dir_py
    globals_['Help'] = Help

    src = transform(src)
    exec(lang_import, globals_)

    if highlight:
        temp_src, problem = insert_highlight_info(src)
        if not problem:
            src = temp_src
        else:
            exec("RUR.ui.highlight('{}')".format(problem), globals_)
            window.jQuery("#highlight-impossible").show()
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    src = "help=Help\n" + pre_code + "\n" + src + "\n" + post_code
    exec(src, globals_)
Beispiel #3
0
def generic_translate_python(src, highlight, var_watch, pre_code='',
                             post_code=''):
    ''' RUR.translate Python code into Javascript and execute

        src: source code in editor
        highlight: determines if the code will be highlighted as it is run
        var_watch: determines if some variable watch will take place
        pre_code: code included with world definition and prepended to user code
        post_code: code included with world definition and appended to user code
    '''
    # lib: string - language specific lib
    #      (e.g. "library" in English, "biblio" in French)
    #      already imported in html file
    lib = window.RUR.library_name
    # lang_import: something like "from reeborg_en import *"
    lang_import = window.RUR.from_import
    sys.stdout.write = __write
    sys.stderr.write = __write_err
    if lib in sys.modules:
        del sys.modules[lib]

    globals_ = {}
    globals_.update(globals())
    globals_['dir_py'] = dir_py
    globals_['Help'] = Help
    globals_['_watch_'] = _watch_
    globals_['_v_'] = None
    globals_['previous_watch_values'] = {}

    src = transform(src)
    exec(lang_import, globals_)
    # globals_['system_default_vars'] = set([key for key in globals_])

    if highlight or var_watch:
        try:
            temp_src, problem = insert_highlight_info(src, highlight=highlight,
                                                      var_watch=var_watch)
            if not problem:
                src = temp_src
            else:
                window.RUR.toggle_highlight()
                window.jQuery("#highlight-impossible").show()
        except Exception as e:
            window.RUR.__python_error = e
            window.console.log("problem with hightlight:", e)
            return
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    # include v again to reset its value
    _v_ = "system_default_vars = set(locals().keys())\n"
    src = "help=Help\n" + pre_code + "\n" + _v_ + src + "\n" + post_code
    try:
        exec(src, globals_)
    except Exception as e:
        window.RUR.__python_error = e
Beispiel #4
0
def generic_translate_python(src,
                             lib,
                             lang_import,
                             highlight,
                             var_watch,
                             pre_code='',
                             post_code=''):
    ''' Translate Python code into Javascript and execute

        src: source code in editor
        lib: string - language specific lib
             (e.g. "library" in English, "biblio" in French)
             already imported in html file
        lang_import: something like "from reeborg_en import *"
    '''
    sys.stdout.write = __write
    sys.stderr.write = __write_err
    if lib in sys.modules:
        del sys.modules[lib]

    globals_ = {}
    globals_.update(globals())
    globals_['dir_py'] = dir_py
    globals_['Help'] = Help
    globals_['_watch_'] = _watch_
    globals_['_v_'] = None
    globals_['previous_watch_values'] = {}

    src = transform(src)
    exec(lang_import, globals_)
    # globals_['system_default_vars'] = set([key for key in globals_])

    if highlight or var_watch:
        try:
            temp_src, problem = insert_highlight_info(src,
                                                      highlight=highlight,
                                                      var_watch=var_watch)
            if not problem:
                src = temp_src
            else:
                window.RUR.ui.highlight()
                window.jQuery("#highlight-impossible").show()
        except Exception as e:
            window.RUR.__python_error = e
            window.console.log("problem with hightlight:", e)
            return
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    # include v again to reset its value
    _v_ = "system_default_vars = set(locals().keys())\n"
    src = "help=Help\n" + pre_code + "\n" + _v_ + src + "\n" + post_code
    try:
        exec(src, globals_)
    except Exception as e:
        window.RUR.__python_error = e
    def __init__(self, path, min_length=3, max_length=25, transform=None):
        corpus = [l.strip() for l in open(path, "r", encoding="utf-8")]

        self.raw_X, self.raw_y = [], []
        for l in corpus:
            X, y = l.split("\t")
            if transform:
                X = transform(X)
                y = transform(y)
            X = X.split()
            y = ["<s>"] + y.split()
            if (
                len(X) >= min_length
                and len(X) <= max_length
                and len(y) >= min_length
                and len(y) <= max_length
            ):
                self.raw_X.append(X)
                self.raw_y.append(y)

        source_vocab = list(
            set([word for sent in self.raw_X for word in sent])
        )
        target_vocab = list(
            set([word for sent in self.raw_y for word in sent])
        )

        self.source2index = {"<PAD>": 0, "<UNK>": 1, "<s>": 2, "</s>": 3}
        for v in source_vocab:
            if v not in self.source2index:
                self.source2index[v] = len(self.source2index)

        self.target2index = {"<PAD>": 0, "<UNK>": 1, "<s>": 2, "</s>": 3}
        for v in target_vocab:
            if v not in self.target2index:
                self.target2index[v] = len(self.target2index)

        self.index2target = {}
        for k, v in self.target2index.items():
            self.index2target[v] = k
Beispiel #6
0
def generic_translate_python(src, lib, lang_import, highlight,
                             pre_code='', post_code=''):
    ''' Translate Python code into Javascript and execute

        src: source code in editor
        lib: string - language specific lib
             (e.g. "library" in English, "biblio" in French)
             already imported in html file
        lang_import: something like "from reeborg_en import *"
    '''
    sys.stdout.write = __write
    sys.stderr.write = __write_err
    if lib in sys.modules:
        del sys.modules[lib]

    globals_ = {}
    globals_.update(globals())
    globals_['dir_py'] = dir_py
    globals_['Help'] = Help
    globals_['__watch'] = __watch
    globals_['__v'] = None
    globals_['previous_watch_values'] = {}

    src = transform(src)
    exec(lang_import, globals_)
    # globals_['system_default_vars'] = set([key for key in globals_])

    if highlight:
        try:
            temp_src, problem = insert_highlight_info(src)
            if not problem:
                src = temp_src
            else:
                window.RUR.ui.highlight()
                window.jQuery("#highlight-impossible").show()
        except Exception as e:
            window.RUR.__python_error = e
            window.console.log("problem with hightlight:", e)
            return
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    # include v again to reset its value
    __v = "system_default_vars = set(locals().keys())\n"
    src = "help=Help\n" + pre_code + "\n" + __v + src + "\n" + post_code
    try:
        exec(src, globals_)
    except Exception as e:
        window.RUR.__python_error = e
Beispiel #7
0
from browser import window
from preprocess import transform
from common import _import_fr

_import_fr(globals())

src = transform(window.library.getValue())
try:
    exec(src)
except Exception as e:
    window.RUR.__python_error = e
print 'Logistic Regression with Transformed Features:'

# print 'refreshing dataset...'

# parse train and test text files
train_x = get_features('spam_train.txt')
train_y = get_classification('spam_train.txt')

test_x = get_features('spam_test.txt')
test_y = get_classification('spam_test.txt')

# print 'transforming features...'

# standardize features
train_x = transform(train_x)
test_x = transform(test_x)

# add 1 y-intercept column
train_x = add_ones(train_x)
test_x = add_ones(test_x)

# print 'calculating weights...'

# find W for logistic regression with gradient descent
w = logistic_regression(train_x, train_y)

# print 'predicting...'

# make predictions
train_predictions = predict_y(train_x, w)
Beispiel #9
0
import pandas as pd
import sys

sys.path.append('../')
from preprocess import transform
from pandas.tools.plotting import parallel_coordinates
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

filename_train = '../data/train.csv'
train_dataset = transform(filename_train)
X = train_dataset['data']
y = train_dataset['target']

row_idx = X[:, 0]
X = X[:, 2:7]
col_name = ['Gender', 'Income', 'HouseholdStatus', 'EducationLevel', 'Party']
X = pd.DataFrame(data=X, index=row_idx, columns=col_name)
y = pd.DataFrame(data=y, index=row_idx, columns=['Happy'])

data = ((pd.concat([X, y], axis=1)).dropna())[:100]
#print(data)

mapping = {1: 'Happy', 0: 'Unhappy'}
data = data.replace({'Happy': mapping})

fig = plt.figure()
fig.suptitle("Parallel coordinates plot")
parallel_coordinates(data, 'Happy', color=['red', 'blue'])
plt.ylabel("The discrete numbers of the categories")
Beispiel #10
0
def main():
    # load training data
    filename_train = './data/train.csv'
    train_dataset = transform(filename_train)

    X = train_dataset['data']
    y = train_dataset['target']

    # fill in missing data (optional)
    X_full = fill_missing(X, 'mode', False)

    X_full_train, X_full_test, y_train, y_test = train_test_split(X_full, y, test_size=0.25, random_state=0)

    ### use the logistic regression
    print('Train the logistic regression classifier')
    """ your code here """
    lr_model = LogisticRegression()
    start_time = time.time()
    lr_model.fit(X_full_train,y_train)
    elapsed_time = time.time() - start_time
    y_predict = lr_model.predict(X_full_test)
    print('The accuracy of the sklearn lr classifier: '+str(sum(y_test ==  y_predict)/y_test.shape[0])+' elapsed time: '+str(elapsed_time))
    clf = logisticRegression()
    start_time = time.time()
    clf.fit(X_full_train,y_train)
    elapsed_time = time.time() - start_time
    y_predict = clf.predict(X_full_test)
    print('The accuracy of my lr classifier: '+str(sum(y_test ==  y_predict)/y_test.shape[0])+' elapsed time: '+str(elapsed_time))
    
    ### use the naive bayes
    print('Train the naive bayes classifier')
    """ your code here """
    nb_model = MultinomialNB()
    start_time = time.time()
    nb_model.fit(X_full_train, y_train)
    elapsed_time = time.time() - start_time
    y_predict = nb_model.predict(X_full_test)
    print('The accuracy of the sklearn nb classifier: '+str(sum(y_test ==  y_predict)/y_test.shape[0])+' elapsed time: '+str(elapsed_time))
    clf = NaiveBayes()
    start_time = time.time()
    clf = clf.fit(X_full_train, y_train)
    elapsed_time = time.time() - start_time
    y_predict = clf.predict(X_full_test)
    print('The accuracy of my nb classifier: '+str(sum(y_test ==  y_predict)/y_test.shape[0])+' elapsed time: '+str(elapsed_time))

    ## use the svm
    print('Train the SVM classifier')
    """ your code here """
    svm_model = svm.SVC(kernel='linear', C=1).fit(X_full_train, y_train)
    print(('The accuracy of the sklearn SVM classifier: %f')%(svm_model.score(X_full_test, y_test)))                       

    ## use the random forest
    print('Train the random forest classifier')
    rf_model = RandomForestClassifier(n_estimators=500)
    rf_model.fit(X_full_train, y_train)
    print(('The accuracy of the sklearn random forest classifier: %f')%(rf_model.score(X_full_test, y_test))) 


    ## get predictions
    df = pd.read_csv('./data/test.csv')
    UserID=df.loc[:,'UserID'].as_matrix()
    df = df.drop('UserID', 1)
    X_predict=df.as_matrix()
    for n in range(df.shape[1]):
        if df.iloc[:,n].dtypes!=np.int64 and df.iloc[:,n].dtypes!=np.float64:
            g= pd.get_dummies(X_predict[:,n])
            i=0
            for e in list(g):
                X_predict[:,n][X_predict[:,n]==e]=i
                i=i+1 
    X_full_predict = fill_missing(X_predict, 'mode', False)
    
    y_predict = lr_model.predict(X_full_predict)
    fo = open("./predictions/lr_predictions.csv", "w")
    fo.write("UserID,Happy\n");
    for i in range(y_predict.shape[0]):
        fo.write(("%d,%d\n")%(UserID[i],y_predict[i]));
    fo.close()

    y_predict = nb_model.predict(X_full_predict)
    fo = open("./predictions/nb_predictions.csv", "w")
    fo.write("UserID,Happy\n");
    for i in range(y_predict.shape[0]):
        fo.write(("%d,%d\n")%(UserID[i],y_predict[i]));
    fo.close()

    y_predict = svm_model.predict(X_full_predict)
    fo = open("./predictions/svm_predictions.csv", "w")
    fo.write("UserID,Happy\n");
    for i in range(y_predict.shape[0]):
        fo.write(("%d,%d\n")%(UserID[i],y_predict[i]));
    fo.close()
    
    y_predict = rf_model.predict(X_full_predict)
    fo = open("./predictions/rf_predictions.csv", "w")
    fo.write("UserID,Happy\n");
    for i in range(y_predict.shape[0]):
        fo.write(("%d,%d\n")%(UserID[i],y_predict[i]));
    fo.close()
Beispiel #11
0
#! /usr/bin/env python3

from docx2python import docx2python
from docx2python.iterators import enum_at_depth
from pprint import pprint
from preprocess import clean, transform
from logic import gen_nonconflict_report
import json

f = docx2python('Avengers.docx')

# ready_f is a list of sets
ready_f = transform(clean(f))

nonconflicts, stats = gen_nonconflict_report(ready_f)

with open("results/statsv1.json", "w") as outfile:
    processed = [{'keys': k, 'times': v, 'non-conflicts': nonconflicts[k]} for k, v in stats.items()]
    json.dump(processed, outfile, indent=4)

Beispiel #12
0
# pylint: skip-file
'''The purpose of this file is to enable code from the editor
   to be imported when using the REPL.'''

from browser import window
from preprocess import transform
from common import _import_en

_import_en(globals())

src = transform(window.editor.getValue())
exec(src)
Beispiel #13
0
def generic_translate_python(src, highlight=False, var_watch=False, pre_code='',
                             post_code=''):
    ''' RUR.translate Python code into Javascript and execute

        src: source code in editor
        highlight: determines if the code will be highlighted as it is run
        var_watch: determines if some variable watch will take place
        pre_code: code included with world definition and prepended to user code
        post_code: code included with world definition and appended to user code
    '''
    # lib: string - language specific lib
    #      (e.g. "library" in English, "biblio" in French)
    #      already imported in html file
    lib = window.RUR.library_name
    # lang_import: something like "from reeborg_en import *"
    lang_import = window.RUR.from_import
    sys.stdout.write = __write
    sys.stderr.write = __write_err
    if lib in sys.modules:
        del sys.modules[lib]

    globals_ = {}
    globals_.update(globals())
    globals_['dir_py'] = dir_py
    globals_['Help'] = Help
    globals_['_watch_'] = _watch_
    globals_['_v_'] = None
    globals_['previous_watch_values'] = {}

    src = transform(src)
    # sometimes, when copying from documentation displayed in the browsers
    # some nonbreaking spaces are inserted instead of regular spaces.
    # We make the assumption that nonbreaking spaces should never appear
    # in source code - which is not necessarily valid...
    if '\xa0' in src:
        src = src.replace('\xa0', ' ')
        window.console.warn("Some nonbreaking spaces were replaced in the Python code.")
    exec(lang_import, globals_)
    # globals_['system_default_vars'] = set([key for key in globals_])

    if highlight or var_watch:
        try:
            temp_src, problem = insert_highlight_info(src, highlight=highlight,
                                                      var_watch=var_watch)
            if not problem:
                src = temp_src
            else:
                window.RUR.toggle_highlight()
                window.jQuery("#highlight-impossible").show()
        except Exception as e:
            window.RUR.__python_error = e
            window.console.log("problem with hightlight:", e)
            return
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    # include v again to reset its value
    _v_ = "system_default_vars = set(locals().keys())\n"
    src = "help=Help\n" + pre_code + "\n" + _v_ + src + "\n" + post_code
    try:
        exec(src, globals_)
    except Exception as e:
        window.RUR.__python_error = e
def main():
    # load training data
    filename_train = "./data/train.csv"
    filename_test = "./data/test.csv"
    df = pd.read_csv(filename_test, header=0)
    X_pre_userId = df['UserID']
    X_pre_userId = X_pre_userId.as_matrix()
    train_dataset = transform(filename_train)
    test_dateset = transform(filename_test)

    X = train_dataset['data']
    y = train_dataset['target']
    X_pre = test_dateset['data']
    num_train = X.shape[0]
    X = np.append(X, X_pre, 0)

    X_fill = fill_missing(X, 'most_frequent', False)
    # X_fill = fill_missing(X, 'most_frequent', True)
    X_pre_fill = X_fill[num_train::]
    X_fill = X_fill[0:num_train]

    X_train, X_test, y_train, y_test = train_test_split(X_fill,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=4)
    print(y_train.shape, y_test.shape)

    ### use the logistic regression
    print('Train the logistic regression classifier')
    """ your code here """
    lr_model = LogisticRegression(random_state=4)
    lr_model.fit(X_train, y_train)
    print(lr_model.score(X_test, y_test))
    lr_pre = lr_model.predict(X_pre_fill)
    file = open('./predictions/lr_predictions.csv', 'w')
    file.write('UserID,Happy\n')
    for temp in range(0, lr_pre.shape[0]):
        file.write('%d' % X_pre_userId[temp])
        file.write(',')
        file.write(str(lr_pre[temp]))
        file.write('\n')

    ### use the naive bayes
    print('Train the naive bayes classifier')
    """ your code here """
    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)
    print(nb_model.score(X_test, y_test))
    nb_pre = nb_model.predict(X_pre_fill)
    file = open('./predictions/nb_predictions.csv', 'w')
    file.write('UserID,Happy\n')
    for temp in range(0, nb_pre.shape[0]):
        file.write('%d' % X_pre_userId[temp])
        file.write(',')
        file.write(str(nb_pre[temp]))
        file.write('\n')

    ## use the svm
    print('Train the SVM classifier')
    """ your code here """
    svm_model = svm.SVC(kernel='linear', random_state=0)
    svm_model.fit(X_train, y_train)
    print(svm_model.score(X_test, y_test))
    svm_pre = svm_model.predict(X_pre_fill)
    file = open('./predictions/svm_predictions.csv', 'w')
    file.write('UserID,Happy\n')
    for temp in range(0, svm_pre.shape[0]):
        file.write('%d' % X_pre_userId[temp])
        file.write(',')
        file.write(str(svm_pre[temp]))
        file.write('\n')

    ## use the random forest
    print('Train the random forest classifier')
    """ your code here """
    rf_model = RandomForestClassifier(n_estimators=2600, random_state=4)
    rf_model = rf_model.fit(X_train, y_train)
    print(rf_model.score(X_test, y_test))
    rf_pre = rf_model.predict(X_pre_fill)
    file = open('./predictions/rf_predictions.csv', 'w')
    file.write('UserID,Happy\n')
    for temp in range(0, rf_pre.shape[0]):
        file.write('%d' % X_pre_userId[temp])
        file.write(',')
        file.write(str(rf_pre[temp]))
        file.write('\n')

    ## get predictions
    """ your code here """
Beispiel #15
0
from browser import window
from preprocess import transform
from reeborg_fr import *  # NOQA
src = transform(window.editor.getValue())
exec(src)
Beispiel #16
0
def __generic_translate_python(src,
                               highlight=False,
                               var_watch=False,
                               pre_code='',
                               post_code=''):
    ''' RUR.translate Python code into Javascript and execute

        src: source code in editor
        highlight: determines if the code will be highlighted as it is run
        var_watch: determines if some variable watch will take place
        pre_code: code included with world definition and prepended to user code
        post_code: code included with world definition and appended to user code
    '''
    from preprocess import transform  # keeping out of global namespace
    from highlight import insert_highlight_info
    sys.stdout.write = __write
    sys.stderr.write = __write

    # reeborg_en and reeborg_fr define some attributes to window; these
    # could have been redefined when importing a different language version -
    # or, perhas even when running a Javascript version; so it
    # is important to ensure that they have their proper definition by forcing
    # a fresh import each time such a request is made via something like
    #     from reeborg_en import *
    # Similarly, library or biblio's content might have changed by the user
    # since the program was run last time
    for mod in ["reeborg_en", "reeborg_fr", "library", "biblio", "extra"]:
        if mod in sys.modules:
            del sys.modules[mod]

    globals_ = {}
    globals_['__help'] = __help
    globals_['__watch'] = __watch
    globals_['__previous_watch_values'] = {}
    globals_['window'] = window
    globals_['console'] = console
    globals_['print_dir'] = print_dir

    src = transform(src)
    # sometimes, when copying from documentation displayed in the browsers
    # some nonbreaking spaces are inserted instead of regular spaces.
    # We make the assumption that nonbreaking spaces should never appear
    # in source code - which is not necessarily valid...
    if '\xa0' in src:
        src = src.replace('\xa0', ' ')
        window.console.warn(
            "Some nonbreaking spaces were replaced in the Python code.")

    # Notwithstanding what is writte above regarding fresh imports,
    # we simulate this here by doing a dict update, thus effectively using a
    # cached version of a previous import  while ensuring that and
    # global ("window") definition is done properly.
    if window.RUR.from_import == "from reeborg_en import *":
        globals_.update(__REEBORG_EN)
    elif window.RUR.from_import == "from reeborg_fr import *":
        globals_.update(__REEBORG_FR)
    else:
        raise Exception("unknown import %s" % window.RUR.from_import)

    if highlight or var_watch:
        try:
            temp_src, problem = insert_highlight_info(src,
                                                      highlight=highlight,
                                                      var_watch=var_watch)
            if not problem:
                src = temp_src
            else:
                window.RUR.toggle_highlight()
                window.jQuery("#highlight-impossible").show()
        except Exception as e:
            window.RUR.__python_error = e
            window.console.log("problem with hightlight:", e)
            return
    if hasattr(window.RUR, "__debug"):
        window.console.log("processed source:")
        window.console.log(src)

    if var_watch:
        system_vars = "system_default_vars = set(locals().keys())\n"
    else:
        system_vars = "\n"
    src = "help=__help\n" + pre_code + "\n" + system_vars + src + "\n" + post_code
    try:
        exec(src, globals_)
    except Exception as e:
        window.RUR.__python_error = e
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pandas.tools.plotting import parallel_coordinates

import sys

sys.path.append('../')
import preprocess

filename = '../data/train.csv'

dataset = preprocess.transform(filename)
X = dataset['data'][:100]
X = X[['Gender', 'Income', 'HouseholdStatus', 'EducationLevel', 'Party']]
y = dataset['target'][:100]

total = pd.concat([X, y], axis=1)

mapping = {1: 'happy', 0: 'unhappy'}
total = total.replace({'Happy': mapping})

plt.figure()
parallel_coordinates(total, 'Happy', color=['red', 'blue'])
plt.title("Parallel Coordinates Plot")
plt.xlabel("Five dimensions")
plt.ylabel("The discrete numbers of the categories")
plt.savefig("parallel_coordinates")
plt.show()
def main():
    # load training data
    filename_train = './data/train.csv'
    train_dataset = transform(filename_train)
    X = train_dataset['data']
    y = train_dataset['target']

    # fill in missing data (optional)
    X_full, discard_row = fill_missing(X, 'most_frequent', True)
    y = np.delete(y,discard_row)
    
    
    n_samples, n_features = X_full.shape
    
    
    ### -------------------- use the logistic regression --------------------
    print('\n\nTrain the logistic regression classifier')
    train_X, train_y, valid_X, valid_y = cross_validation(0.08,X_full,y) #0.08
    # Sklearn package
    lr_model_time1 = time.time()
    lr_model = LogisticRegression()
    lr_model = lr_model.fit(train_X,train_y)
    lr_model_time2 = time.time()
    print("Sklearn LR validation score: {0}".format(lr_model.score(valid_X,valid_y)))
    print("Sklearn LR training time: %.3f s" % (lr_model_time2 - lr_model_time1))
    #print("Sklearn LR learnt coef:\n{0},\n{1}".format(lr_model.coef_[:,:5],lr_model.intercept_))
    
    
    # Self-implemented
    train_X, train_y, valid_X, valid_y = cross_validation(0.15,X_full,y) #0.15
    self_lr_time1 = time.time()
    self_lr = LogitR()
    self_lr = self_lr.fit(train_X,train_y)
    self_lr_time2 = time.time()
    print("Self LR validation score: {0}".format(self_lr.score(valid_X,valid_y)))
    print("Self LR training time: %.3f s" % (self_lr_time2 - self_lr_time1))
    #print("Self LR learnt coef:\n{0},\n{1}".format(self_lr.coef[:5],self_lr.intercept))
    ### -------------------- use the logistic regression --------------------
    
    
    
    ### -------------------- use the naive bayes --------------------
    # Sklearn package
    print('\n\nTrain the naive bayes classifier')
    train_X, train_y, valid_X, valid_y = cross_validation(0.1,X_full,y) # Sklearn NB validation score: 0.6762589928057554
    nb_model_time1 = time.time()
    nb_model = BernoulliNB()
    nb_model.fit(train_X,train_y)
    nb_model_time2 = time.time()
    print("Sklearn NB validation score: {0}".format(nb_model.score(valid_X,valid_y)))
    print("SKlearn NB training time: %.3f s" % (nb_model_time2 - nb_model_time1))
    #sk_y_predict = nb_model.predict(X_full[1800:,1:n_features-1])
    
    
    
    # Self-implemented
    train_X, train_y, valid_X, valid_y = cross_validation(0.118,X_full,y) # Self NB validation score: 0.576 # i  0.118
    self_nb_time1 = time.time()
    self_nb = NaiveBayes()
    self_nb = self_nb.fit(train_X,train_y)
    self_nb_time2 = time.time()
    print("Self NB validation score: {0}".format(self_nb.score(train_X,train_y)))
    print("Self NB training time: %.3f s" % (self_nb_time2 - self_nb_time1))
    #self_y_predict = clf.predict(X_full[1800:,1:n_features-1])
    ### -------------------- use the naive bayes --------------------
    

    
    ### -------------------- use svm --------------------
    print('\n\nTrain the SVM classifier')
    # linear, poly, rbf, or precomputed (or self-defined)?
    train_X, train_y, valid_X, valid_y = cross_validation(0.17,X_full,y) #0.17
    svm_model_time1 = time.time()
    svm_model = svm.SVC(kernel="linear")
        # rbf score: 0.682; validation percentage: 0.113
        # sigmoid score: 0.577; validation percentage: 0.23
        # poly score: 0.685; validation percentage: 0.16
        # linear score: 0.701 validation percentage: 0.17
    svm_model.fit(train_X,train_y)
    print("train_X:", train_X.shape)
    print("train_y:", train_y.shape)
    svm_model_time2 = time.time()
    print("Sklearn SVM validation score: {0}".format(svm_model.score(valid_X,valid_y)))
    print("Sklearn SVM training time: %.3f s" % (svm_model_time2 - svm_model_time1))     
    ### -------------------- use svm --------------------
    
    
 
    ### -------------------- use random forest --------------------
    print('\n\nTrain the random forest classifier')
    train_X, train_y, valid_X, valid_y = cross_validation(0.151,X_full,y) # Sklearn RF validation score: 0.702 # i:  0.151
    rf_model_time1 = time.time()
    rf_model = RandomForestClassifier(n_estimators=29) # 29
    rf_model.fit(train_X,train_y)
    rf_model_time2 = time.time()
    print("Sklearn RF validation score: {0}".format(rf_model.score(valid_X,valid_y)))
    print("Sklearn RF training time: %.3f s" % (rf_model_time2 - rf_model_time1))
    ### -------------------- use random forest --------------------
      
    ## get predictions
    """ your code here """
Beispiel #19
0
from browser import window
from preprocess import transform
from reeborg_fr import *
src = transform(window.library.getValue())
exec(src)