def create_classifier(path, model, **kwargs): train = dataset.from_npy(path, "train.npy") test = dataset.from_npy(path, "test.npy") ### Logistic Regression ### train_X, train_y = train[:, :-1], train[:, -1] test_X, test_y = test[:, :-1], test[:, -1] clf = model(**kwargs) start = time.time() clf.fit(train_X, train_y) finit = time.time() delta = finit - start print "Training took %0.3f seconds" % delta print clf clf_pred = clf.predict(test_X) print "Accuracy: " + str(accuracy_score(test_y, clf_pred)) print "F1 Score: " + str(f1_score(test_y, clf_pred)) cm = confusion_matrix(test_y, clf_pred) plot_confusion_matrix(cm) plt.draw() return clf
def controller(folder): path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', folder) X = dataset.from_npy(path, 'X.npy') y = dataset.from_npy(path, 'y.npy') classifiers = [GaussianNB(), BernoulliNB(), LinearSVC(), DecisionTreeClassifier(), RandomForestClassifier(), ExtraTreesClassifier(), LogisticRegression(), SGDClassifier()] for clf in classifiers: accuracy = get_accuracy(X, y, 10, clf) * 100 print type(clf).__name__, ':', accuracy
def controller(folder): path = os.path.join( os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', folder) X = dataset.from_npy(path, 'X.npy') y = dataset.from_npy(path, 'y.npy') classifiers = [ GaussianNB(), BernoulliNB(), LinearSVC(), DecisionTreeClassifier(), RandomForestClassifier(), ExtraTreesClassifier(), LogisticRegression(), SGDClassifier() ] for clf in classifiers: accuracy = get_accuracy(X, y, 10, clf) * 100 print type(clf).__name__, ':', accuracy
def initialize(self): # Initialize grid layout manager self.grid() self.entryVariable = Tkinter.StringVar() # Create the widget self.entry = Tkinter.Entry(self, textvariable=self.entryVariable) # When a cell grows larger than the widget is contains, # you can ask the widget to stick to some edges of the cell. # That's the sticky='EW'. # (E=east (left), W=West (right), N=North (top), S=South (bottom)) # We specified 'EW', which means the widget will try to stick to # both left and right edges of its cell. self.entry.grid(column=0, row=0, sticky='EW') # <Return> event handler self.entry.bind("<Return>", self.OnPressEnter) self.entryVariable.set(u"Enter URL here.") # Load classifier (Choose a path where the voter is saved) path = os.path.join( os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset\\Trial_03') self.voter = load_voter(path) test = dataset.from_npy(path, 'test.npy') test_X, test_y = test[:, :-1], test[:, -1] self.voter.confusion_matrix(test_X, test_y) ### Button ### # Note that in this case, we do not keep a reference to the button # (because we will not read or alter its value later) button = Tkinter.Button(self, text=u"Submit", command=self.OnButtonClick) button.grid(column=1, row=0) ### Label ### self.labelVariable = Tkinter.StringVar() # White text on a blue background self.label = Tkinter.Label(self, textvariable=self.labelVariable, anchor="w", fg="white", bg="blue") self.label.grid(column=0, row=1, columnspan=2, sticky='EW') self.labelVariable.set(u"Welcome to URL Classifier!") # Enable resizing self.grid_columnconfigure(0, weight=1) # Adding constraint - Prevent vertical resizing self.resizable(width=True, height=False) # Set window to not grow and shrink automatically self.update() self.geometry(self.geometry()) self.entry.focus_set() self.entry.selection_range(0, Tkinter.END)
def initialize(self): # Initialize grid layout manager self.grid() self.entryVariable = Tkinter.StringVar() # Create the widget self.entry = Tkinter.Entry(self, textvariable=self.entryVariable) # When a cell grows larger than the widget is contains, # you can ask the widget to stick to some edges of the cell. # That's the sticky='EW'. # (E=east (left), W=West (right), N=North (top), S=South (bottom)) # We specified 'EW', which means the widget will try to stick to # both left and right edges of its cell. self.entry.grid(column=0, row=0, sticky='EW') # <Return> event handler self.entry.bind("<Return>", self.OnPressEnter) self.entryVariable.set(u"Enter URL here.") # Load classifier (Choose a path where the voter is saved) path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset\\Trial_03') self.voter = load_voter(path) test = dataset.from_npy(path, 'test.npy') test_X, test_y = test[:, :-1], test[:, -1] self.voter.confusion_matrix(test_X, test_y) ### Button ### # Note that in this case, we do not keep a reference to the button # (because we will not read or alter its value later) button = Tkinter.Button(self, text=u"Submit", command=self.OnButtonClick) button.grid(column=1, row=0) ### Label ### self.labelVariable = Tkinter.StringVar() # White text on a blue background self.label = Tkinter.Label(self, textvariable=self.labelVariable, anchor="w", fg="white", bg="blue") self.label.grid(column=0, row=1, columnspan=2, sticky='EW') self.labelVariable.set(u"Welcome to URL Classifier!") # Enable resizing self.grid_columnconfigure(0,weight=1) # Adding constraint - Prevent vertical resizing self.resizable(width=True, height=False) # Set window to not grow and shrink automatically self.update() self.geometry(self.geometry()) self.entry.focus_set() self.entry.selection_range(0, Tkinter.END)
trial = 'Trial_03' # trial = 'Lexical' path = os.path.join( os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', trial) # voter = create_voter() # # train = dataset.from_npy(path, 'train.npy') # train_X, train_y = train[:, :-1], train[:, -1] # voter.fit(train_X, train_y) # save_voter(voter, path) voter = load_voter(path) test = dataset.from_npy(path, 'test.npy') test_X, test_y = test[:, :-1], test[:, -1] voter.confusion_matrix(test_X, test_y) # for clf in voter._classifiers: # print type(clf).__name__ # cm = confusion_matrix(test_y, clf.predict(test_X)) # plot_confusion_matrix(cm) # plt.draw() # test_urls = [ 'https://www.youtube.com/watch?v=4WM6hB7l4Lc&list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3&index=12&feature=iv&src_vid=81ZGOib7DTk&annotation_id=annotation_1856532697', 'http://ld.mediaget.com/index.php?l=ru&fu=http:/www.playground.ru/download/?cheat=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc_sohranenie_100-41709&r=playground.ru&f=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc__&%23x421;&%23x43e;&%23x445;&%23x440;&%23x430;&%23x43d;&%23x435;&%23x43d;&%23x438;&%23x435;_100%25', 'https://raw.github.com/inquisb/shellcodeexec/master/windows/shellcodeexec.x32.exe', 'http://www.ezthemes.com/site_advertisers/extrafindWD.exe' ]
# controller('Lexical') # controller('Trial_03') trial = 'Trial_03' # trial = 'Lexical' path = os.path.join(os.path.expanduser('~'), 'OneDrive\\RPI\\Summer Project\\URL Classifier\\Dataset', trial) # voter = create_voter() # # train = dataset.from_npy(path, 'train.npy') # train_X, train_y = train[:, :-1], train[:, -1] # voter.fit(train_X, train_y) # save_voter(voter, path) voter = load_voter(path) test = dataset.from_npy(path, 'test.npy') test_X, test_y = test[:, :-1], test[:, -1] voter.confusion_matrix(test_X, test_y) # for clf in voter._classifiers: # print type(clf).__name__ # cm = confusion_matrix(test_y, clf.predict(test_X)) # plot_confusion_matrix(cm) # plt.draw() # test_urls = [ 'https://www.youtube.com/watch?v=4WM6hB7l4Lc&list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3&index=12&feature=iv&src_vid=81ZGOib7DTk&annotation_id=annotation_1856532697', 'http://ld.mediaget.com/index.php?l=ru&fu=http:/www.playground.ru/download/?cheat=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc_sohranenie_100-41709&r=playground.ru&f=grand_theft_auto_4_gta_iv_episodes_from_liberty_city_eflc__&%23x421;&%23x43e;&%23x445;&%23x440;&%23x430;&%23x43d;&%23x435;&%23x43d;&%23x438;&%23x435;_100%25', 'https://raw.github.com/inquisb/shellcodeexec/master/windows/shellcodeexec.x32.exe', 'http://www.ezthemes.com/site_advertisers/extrafindWD.exe'] for test_url in test_urls: