def predict(inputX): #testSet = NBC.loadCsv(filename) loadModel() testSet = inputX # make predictions predictions = NBC.getPredictions(summaries, testSet) ##accuracy = NBC.getAccuracy(testSet, predictions) ##print accuracy return predictions
def main(catalog): #遍历一遍test数据集,送入NBC,并打印结果。 folders = os.listdir(catalog) for folder in folders: tmp_path = os.path.join(catalog, folder) files = os.listdir(folder) for file in files: filepath = os.path.join(tmp_path, file) belongfolder = NBC.NBC(filepath) print(belongfolder)
def multi(train, test): data = NBC.separate_by_class(train) #get class-wise instances liks = lik( data) #train the classifier by calculating classes of all the data predictions = [] priors = {} """The prior probability of a class is the total number of instances in the training set that have that class, divided by all instances in the set.""" for k in range(1, 5): #the classes are 1,2,3,4 priors[k] = len(data[k]) / len(train) for row in test: prob = cal_p(liks, priors, row) output = predict(prob) #predicted class of each instance predictions.append(output) return predictions
def __init__(self, N, ser_): self.numTests = N self.gest = [None for i in range(0, N)] self.state = 0 self.gestures = {} self.activeGestures = {} self.activeStates = {} self.Histogram = {y: 0 for y in self.activeGestures} self.imugestures = [] self.raw_gesture = [] self.base_signals = {} self.classifier = NBC.NBC( self ) #links classifier to Gesture instance, so that classifier can use base signals/gesture library self.ser = ser_ #serial communication line for raw data
def __init__(self, parent): tk.Frame.__init__(self, parent) self.parent = parent self.parent.geometry("1920x700+300+300") self.parent["bg"] = "#121212" self.initUI() self.file_for_detection="" self.data_list = [] self.total_count = 0 # здесь храним максимальное число строк для обучения в выбранном файле # создаем класс НБК self.nbc = NBC.NBC() self.data_list_LSTM = [] self.total_count_LSTM = 0 self.lstm = None self.file_for_detection_LSTM = ""
def Experiment_2(skfold_data): print("Running experiment 2...") Accuracy = [] m1 = list(np.arange(0, 1, 0.1)) m2 = list(np.arange(1, 11, 1)) m = m1 + m2 for sm in m: sm = round(sm, 2) check_acc = [] for i in range(len(skfold_data)): t_train, t_test = train_test.kfold_train_test(skfold_data, i) train_x, train_y, test_x, test_y = train_test.train_test_split( t_train, t_test) accu = NBC.predictMAP(train_x, train_y, test_x, test_y, sm) check_acc.append(accu) Accuracy.append(check_acc) #calculating average accuracy avgAccuracy2 = [] for i in range(len(Accuracy)): x = np.average(Accuracy[i]) avgAccuracy2.append(x) print("list of accuracies:") print(Accuracy) print("list of average accuracies") print(avgAccuracy2) #calculating standard deviation std = [] for i in range(len(Accuracy)): x = np.std(Accuracy[i]) std.append(x) print("Standard Deviation: ") print(std) #plotting plt.errorbar(m, avgAccuracy2, std) plt.xlabel('smoothing factor') plt.ylabel('Average Accuracies') plt.show()
def getUser(test): user_agent = ("Script to get users' comments") errs = open("errors.log", "w") r = praw.Reddit(user_agent=user_agent) file_name = test #"bluedot951" currPath = os.getcwd() if(os.path.isfile(file_name)): users = open(file_name, "r").read().split("\n") users = users[0:len(users)-1] else: users = [file_name] print users #users = ["Hardekyn", "Fogram"] for user_name in users: print "Processing " + user_name numErrs = 0 while True: if numErrs > 2: break try: user = r.get_redditor(user_name) comms = user.get_comments(limit=None) commcount = 0 postcount = 0 w = open(currPath+"/"+file_name+"/"+user_name + ".log", "w") writeclique = open(currPath+"/"+file_name+"/"+user_name + ".clique", "a") clique = [] while(1): try: comm = comms.next() sentiment, neg, pos = NBC.classify(comm.body) mystr = str(int(comm.created_utc)*1000) + "|" title = comm.submission.title mystr += (title.replace('\n', '') if '\n' in title else title) + "|" mystr += comm.subreddit.display_name + "|" mystr += sentiment + "|" mystr += str(neg) + "|" mystr += str(pos) + "|" mystr += "comment" print(mystr) try: poster = comm.submission.author # print str(poster) if(poster != None): # print poster.name postername = poster.name # print(postername) # writeclique.write(postername + "\n") if postername not in clique: clique.append(postername) except AttributeError: pass try: w.write(mystr + "\n") except UnicodeEncodeError: pass commcount+=1 except StopIteration: break subs = user.get_submitted(limit=None) while(1): try: sub = subs.next() sentiment, neg, pos = NBC.classify(sub.selftext) mystr = str(int(sub.created_utc)*1000) + "|" mystr += sub.title + "|" mystr += sub.subreddit.display_name + "|" mystr += sentiment + "|" mystr += str(neg) + "|" mystr += str(pos) + "|" mystr += "post" print(mystr) try: w.write(mystr + "\n") except UnicodeEncodeError: pass postcount+=1 except StopIteration: break print "Comments: " + str(commcount) print "Posts: " + str(postcount) print "Total: " + str(commcount+postcount) for ele in clique: if(ele != user_name): print ele writeclique.write(ele + "\n") w.close() writeclique.close() except Exception, e: print e print "An error occured. Retrying..." errs.write(user_name + "\n") # time.sleep(30) numErrs += 1 continue break
return predictions """We can now train and test the multinomial Naive Bayes.""" seed(1) t = [] cl = {'A': 1, 'B': 2, 'E': 3, 'V': 4} with open("train.csv") as csv_file: absreader = csv.reader(csv_file, delimiter=',', quotechar='|') for row in absreader: x = list( map(int, row[:-1]) ) #originally the values are strings, so they are converted to int x.append(cl[row[-1]]) #appending class as an integer category t.append(x) scoresv, score_meanv = NBC.eval_algo(t, multi) print('Scores on validation set: %s' % scoresv) print('Mean Accuracy: %.3f%%' % score_meanv) """For the test data, we need to be able to predict the classes.""" with open('test.csv') as csv_file: test = [] #stores all test cases absreader = csv.reader(csv_file, delimiter=',', quotechar='|') for row in absreader: x = list( map(int, row) ) #originally the values are strings, so they are converted to int test.append(x) pr = multi(t, test) #get integer predictions p_act = NBC.class_con(p, cl) #get string predictions """Now we have to store them in a csv file.""" with open('sjha286.csv', 'w', newline='') as csvfile:
def Experiment_1(skfold_data): print("Running experiment 1...") accuracy_m0 = [] accuracy_m1 = [] #smoothing factor m = [0, 1] #for each smoothing factor for sm in m: for i in range(len(skfold_data)): #this loop considers ith fold for test dataset #get train(900) and test(100) t_train, t_test = train_test.kfold_train_test(skfold_data, i) #generate subsample factors sampling_factor = np.arange(0.1, 1.1, 0.1) check_acc = [] size_of_train = [] for n in sampling_factor: #loop for subsamples n = round(n, 2) #n = 0.2 sample_size_for_train = int(len(t_train) * n) size_of_train.append(sample_size_for_train) #randomly select datapoints #sample_train = random.sample(t_train,sample_size_for_train) sample_train = t_train[0:sample_size_for_train] train_x, train_y, test_x, test_y = train_test.train_test_split( sample_train, t_test) accu = NBC.predictMAP(train_x, train_y, test_x, test_y, sm) #append all the accuracies of subsamples of kth fold check_acc.append(accu) if (sm == 0): accuracy_m0.append(check_acc) if (sm == 1): accuracy_m1.append(check_acc) avgAccu0, avgAccu1 = calAccuracy(accuracy_m0, accuracy_m1) print("Average accuracies when m=0: ") print(avgAccu0) print("Average accuracies when m=1: ") print(avgAccu1) #calculate standard deviation sd_0 = [] sd_1 = [] for i in range(len(accuracy_m0)): x1 = np.std(accuracy_m0[i]) sd_0.append(x1) x2 = np.std(accuracy_m1[i]) sd_1.append(x2) print("standard deviation for m=0: ", sd_0) print("standard deviation for m=1: ", sd_1) """Refered some online material to know about how to plot error bar graphs""" """https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html""" """https://pythonforundergradengineers.com/python-matplotlib-error-bars.html""" plt.errorbar(size_of_train, avgAccu0, sd_0, label='m=0') plt.errorbar(size_of_train, avgAccu1, sd_1, label='m=1') plt.legend(loc='lower right') plt.xlabel('train set size') plt.ylabel('Average Accuracies') plt.show()