def create_classifier(path, model, **kwargs):
    train = dataset.from_npy(path, "train.npy")
    test = dataset.from_npy(path, "test.npy")

    ###  Logistic Regression  ###
    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]

    clf = model(**kwargs)

    start = time.time()
    clf.fit(train_X, train_y)
    finit = time.time()
    delta = finit - start

    print "Training took %0.3f seconds" % delta
    print clf

    clf_pred = clf.predict(test_X)

    print "Accuracy: " + str(accuracy_score(test_y, clf_pred))
    print "F1 Score: " + str(f1_score(test_y, clf_pred))

    cm = confusion_matrix(test_y, clf_pred)
    plot_confusion_matrix(cm)
    plt.draw()

    return clf
Beispiel #2
0
def create_classifier(path, model, **kwargs):
    train = dataset.from_npy(path, "train.npy")
    test = dataset.from_npy(path, "test.npy")

    ###  Logistic Regression  ###
    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]

    clf = model(**kwargs)

    start = time.time()
    clf.fit(train_X, train_y)
    finit = time.time()
    delta = finit - start

    print "Training took %0.3f seconds" % delta
    print clf

    clf_pred = clf.predict(test_X)

    print "Accuracy: " + str(accuracy_score(test_y, clf_pred))
    print "F1 Score: " + str(f1_score(test_y, clf_pred))

    cm = confusion_matrix(test_y, clf_pred)
    plot_confusion_matrix(cm)
    plt.draw()

    return clf
def controller(folder):
    path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', folder)
    X = dataset.from_npy(path, 'X.npy')
    y = dataset.from_npy(path, 'y.npy')
    classifiers = [GaussianNB(), BernoulliNB(), LinearSVC(), DecisionTreeClassifier(), RandomForestClassifier(),
                   ExtraTreesClassifier(), LogisticRegression(), SGDClassifier()]
    for clf in classifiers:
        accuracy = get_accuracy(X, y, 10, clf) * 100
        print type(clf).__name__, ':', accuracy
Beispiel #4
0
def controller(folder):
    path = os.path.join(
        os.path.expanduser('~'),
        'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', folder)
    X = dataset.from_npy(path, 'X.npy')
    y = dataset.from_npy(path, 'y.npy')
    classifiers = [
        GaussianNB(),
        BernoulliNB(),
        LinearSVC(),
        DecisionTreeClassifier(),
        RandomForestClassifier(),
        ExtraTreesClassifier(),
        LogisticRegression(),
        SGDClassifier()
    ]
    for clf in classifiers:
        accuracy = get_accuracy(X, y, 10, clf) * 100
        print type(clf).__name__, ':', accuracy
Beispiel #5
0
    def initialize(self):
        # Initialize grid layout manager
        self.grid()

        self.entryVariable = Tkinter.StringVar()
        # Create the widget
        self.entry = Tkinter.Entry(self, textvariable=self.entryVariable)
        # When a cell grows larger than the widget is contains,
        # you can ask the widget to stick to some edges of the cell.
        # That's the sticky='EW'.
        # (E=east (left), W=West (right), N=North (top), S=South (bottom))
        # We specified 'EW', which means the widget will try to stick to
        # both left and right edges of its cell.
        self.entry.grid(column=0, row=0, sticky='EW')
        # <Return> event handler
        self.entry.bind("<Return>", self.OnPressEnter)
        self.entryVariable.set(u"Enter URL here.")

        # Load classifier (Choose a path where the voter is saved)
        path = os.path.join(
            os.path.expanduser('~'),
            'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset\\Trial_03')
        self.voter = load_voter(path)
        test = dataset.from_npy(path, 'test.npy')
        test_X, test_y = test[:, :-1], test[:, -1]
        self.voter.confusion_matrix(test_X, test_y)

        ### Button ###
        # Note that in this case, we do not keep a reference to the button
        # (because we will not read or alter its value later)
        button = Tkinter.Button(self,
                                text=u"Submit",
                                command=self.OnButtonClick)
        button.grid(column=1, row=0)

        ### Label ###
        self.labelVariable = Tkinter.StringVar()
        # White text on a blue background
        self.label = Tkinter.Label(self,
                                   textvariable=self.labelVariable,
                                   anchor="w",
                                   fg="white",
                                   bg="blue")
        self.label.grid(column=0, row=1, columnspan=2, sticky='EW')
        self.labelVariable.set(u"Welcome to URL Classifier!")

        # Enable resizing
        self.grid_columnconfigure(0, weight=1)
        # Adding constraint - Prevent vertical resizing
        self.resizable(width=True, height=False)
        # Set window to not grow and shrink automatically
        self.update()
        self.geometry(self.geometry())
        self.entry.focus_set()
        self.entry.selection_range(0, Tkinter.END)
Beispiel #6
0
    def initialize(self):
        # Initialize grid layout manager
        self.grid()

        self.entryVariable = Tkinter.StringVar()
        # Create the widget
        self.entry = Tkinter.Entry(self, textvariable=self.entryVariable)
        # When a cell grows larger than the widget is contains,
        # you can ask the widget to stick to some edges of the cell.
        # That's the sticky='EW'.
        # (E=east (left), W=West (right), N=North (top), S=South (bottom))
        # We specified 'EW', which means the widget will try to stick to
        # both left and right edges of its cell.
        self.entry.grid(column=0, row=0, sticky='EW')
        # <Return> event handler
        self.entry.bind("<Return>", self.OnPressEnter)
        self.entryVariable.set(u"Enter URL here.")

        # Load classifier (Choose a path where the voter is saved)
        path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset\\Trial_03')
        self.voter = load_voter(path)
        test = dataset.from_npy(path, 'test.npy')
        test_X, test_y = test[:, :-1], test[:, -1]
        self.voter.confusion_matrix(test_X, test_y)

        ### Button ###
        # Note that in this case, we do not keep a reference to the button
        # (because we will not read or alter its value later)
        button = Tkinter.Button(self, text=u"Submit",
                                command=self.OnButtonClick)
        button.grid(column=1, row=0)

        ### Label ###
        self.labelVariable = Tkinter.StringVar()
        # White text on a blue background
        self.label = Tkinter.Label(self, textvariable=self.labelVariable,
                                   anchor="w", fg="white", bg="blue")
        self.label.grid(column=0, row=1, columnspan=2, sticky='EW')
        self.labelVariable.set(u"Welcome to URL Classifier!")

        # Enable resizing
        self.grid_columnconfigure(0,weight=1)
        # Adding constraint - Prevent vertical resizing
        self.resizable(width=True, height=False)
        # Set window to not grow and shrink automatically
        self.update()
        self.geometry(self.geometry())
        self.entry.focus_set()
        self.entry.selection_range(0, Tkinter.END)
Beispiel #7
0
    trial = 'Trial_03'
    # trial = 'Lexical'
    path = os.path.join(
        os.path.expanduser('~'),
        'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', trial)

    # voter = create_voter()
    #
    # train = dataset.from_npy(path, 'train.npy')
    # train_X, train_y = train[:, :-1], train[:, -1]
    # voter.fit(train_X, train_y)

    # save_voter(voter, path)

    voter = load_voter(path)
    test = dataset.from_npy(path, 'test.npy')
    test_X, test_y = test[:, :-1], test[:, -1]
    voter.confusion_matrix(test_X, test_y)

    # for clf in voter._classifiers:
    #     print type(clf).__name__
    #     cm = confusion_matrix(test_y, clf.predict(test_X))
    #     plot_confusion_matrix(cm)
    #     plt.draw()
    #
    test_urls = [
        'https://www.youtube.com/watch?v=4WM6hB7l4Lc&list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3&index=12&feature=iv&src_vid=81ZGOib7DTk&annotation_id=annotation_1856532697',
        'http://ld.mediaget.com/index.php?l=ru&amp;fu=http:/www.playground.ru/download/?cheat=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc_sohranenie_100-41709&amp;r=playground.ru&amp;f=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc__&%23x421;&%23x43e;&%23x445;&%23x440;&%23x430;&%23x43d;&%23x435;&%23x43d;&%23x438;&%23x435;_100%25',
        'https://raw.github.com/inquisb/shellcodeexec/master/windows/shellcodeexec.x32.exe',
        'http://www.ezthemes.com/site_advertisers/extrafindWD.exe'
    ]
    # controller('Lexical')
    # controller('Trial_03')
    trial = 'Trial_03'
    # trial = 'Lexical'
    path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', trial)

    # voter = create_voter()
    #
    # train = dataset.from_npy(path, 'train.npy')
    # train_X, train_y = train[:, :-1], train[:, -1]
    # voter.fit(train_X, train_y)

    # save_voter(voter, path)

    voter = load_voter(path)
    test = dataset.from_npy(path, 'test.npy')
    test_X, test_y = test[:, :-1], test[:, -1]
    voter.confusion_matrix(test_X, test_y)

    # for clf in voter._classifiers:
    #     print type(clf).__name__
    #     cm = confusion_matrix(test_y, clf.predict(test_X))
    #     plot_confusion_matrix(cm)
    #     plt.draw()
    #
    test_urls = [
        'https://www.youtube.com/watch?v=4WM6hB7l4Lc&list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3&index=12&feature=iv&src_vid=81ZGOib7DTk&annotation_id=annotation_1856532697',
        'http://ld.mediaget.com/index.php?l=ru&amp;fu=http:/www.playground.ru/download/?cheat=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc_sohranenie_100-41709&amp;r=playground.ru&amp;f=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc__&%23x421;&%23x43e;&%23x445;&%23x440;&%23x430;&%23x43d;&%23x435;&%23x43d;&%23x438;&%23x435;_100%25',
        'https://raw.github.com/inquisb/shellcodeexec/master/windows/shellcodeexec.x32.exe',
        'http://www.ezthemes.com/site_advertisers/extrafindWD.exe']
    for test_url in test_urls: