Ejemplo n.º 1
0
def predict(inputX):
    #testSet = NBC.loadCsv(filename)
    loadModel()
    testSet = inputX
    # make predictions
    predictions = NBC.getPredictions(summaries, testSet)
    ##accuracy = NBC.getAccuracy(testSet, predictions)
    ##print accuracy
    return predictions
Ejemplo n.º 2
0
def main(catalog):  #遍历一遍test数据集,送入NBC,并打印结果。
    folders = os.listdir(catalog)
    for folder in folders:
        tmp_path = os.path.join(catalog, folder)
        files = os.listdir(folder)
        for file in files:
            filepath = os.path.join(tmp_path, file)
            belongfolder = NBC.NBC(filepath)
            print(belongfolder)
Ejemplo n.º 3
0
def multi(train, test):
    data = NBC.separate_by_class(train)  #get class-wise instances
    liks = lik(
        data)  #train the classifier by calculating classes of all the data
    predictions = []
    priors = {}
    """The prior probability of a class is the total number of instances in the training set that have that class,
    divided by all instances in the set."""
    for k in range(1, 5):  #the classes are 1,2,3,4
        priors[k] = len(data[k]) / len(train)
    for row in test:
        prob = cal_p(liks, priors, row)
        output = predict(prob)  #predicted class of each instance
        predictions.append(output)
    return predictions
Ejemplo n.º 4
0
 def __init__(self, N, ser_):
     self.numTests = N
     self.gest = [None for i in range(0, N)]
     self.state = 0
     self.gestures = {}
     self.activeGestures = {}
     self.activeStates = {}
     self.Histogram = {y: 0 for y in self.activeGestures}
     self.imugestures = []
     self.raw_gesture = []
     self.base_signals = {}
     self.classifier = NBC.NBC(
         self
     )  #links classifier to Gesture instance, so that classifier can use base signals/gesture library
     self.ser = ser_  #serial communication line for raw data
Ejemplo n.º 5
0
       def __init__(self, parent):
           tk.Frame.__init__(self, parent)

           self.parent = parent
           self.parent.geometry("1920x700+300+300")
           self.parent["bg"] = "#121212"
           self.initUI()

           self.file_for_detection=""

           self.data_list = []
           self.total_count = 0 # здесь храним максимальное число строк для обучения в выбранном файле

           # создаем класс НБК
           self.nbc = NBC.NBC()

           self.data_list_LSTM = []
           self.total_count_LSTM = 0

           self.lstm = None
           self.file_for_detection_LSTM = ""
def Experiment_2(skfold_data):
    print("Running experiment 2...")
    Accuracy = []
    m1 = list(np.arange(0, 1, 0.1))
    m2 = list(np.arange(1, 11, 1))
    m = m1 + m2
    for sm in m:
        sm = round(sm, 2)
        check_acc = []
        for i in range(len(skfold_data)):
            t_train, t_test = train_test.kfold_train_test(skfold_data, i)
            train_x, train_y, test_x, test_y = train_test.train_test_split(
                t_train, t_test)
            accu = NBC.predictMAP(train_x, train_y, test_x, test_y, sm)
            check_acc.append(accu)
        Accuracy.append(check_acc)

    #calculating average accuracy
    avgAccuracy2 = []
    for i in range(len(Accuracy)):
        x = np.average(Accuracy[i])
        avgAccuracy2.append(x)
    print("list of accuracies:")
    print(Accuracy)
    print("list of average accuracies")
    print(avgAccuracy2)

    #calculating standard deviation
    std = []
    for i in range(len(Accuracy)):
        x = np.std(Accuracy[i])
        std.append(x)
    print("Standard Deviation: ")
    print(std)

    #plotting
    plt.errorbar(m, avgAccuracy2, std)
    plt.xlabel('smoothing factor')
    plt.ylabel('Average Accuracies')
    plt.show()
Ejemplo n.º 7
0
def getUser(test):
	user_agent = ("Script to get users' comments")
	errs = open("errors.log", "w")

	r = praw.Reddit(user_agent=user_agent)

	file_name = test #"bluedot951"

	currPath = os.getcwd()

	if(os.path.isfile(file_name)):
		users = open(file_name, "r").read().split("\n")
		users = users[0:len(users)-1]

	else:
		users = [file_name]

	print users

	#users = ["Hardekyn", "Fogram"]

	for user_name in users:
		print "Processing " + user_name
		numErrs = 0

		while True:
			if numErrs > 2:
				break
			try:

				user = r.get_redditor(user_name)

				comms = user.get_comments(limit=None)

				commcount = 0
				postcount = 0

				w = open(currPath+"/"+file_name+"/"+user_name + ".log", "w")
				writeclique = open(currPath+"/"+file_name+"/"+user_name + ".clique", "a")

				clique = []

				while(1):

					try:
						comm = comms.next()
						sentiment, neg, pos = NBC.classify(comm.body)

						mystr = str(int(comm.created_utc)*1000) + "|"
						title = comm.submission.title
						mystr += (title.replace('\n', '') if '\n' in title else title) + "|"
						mystr += comm.subreddit.display_name + "|"
						mystr += sentiment + "|"
						mystr += str(neg) + "|"
						mystr += str(pos) + "|"
						mystr += "comment"

						print(mystr)


						try:
							poster = comm.submission.author

							# print str(poster) 


							if(poster != None):
								# print poster.name
								postername = poster.name
								# print(postername)
								# writeclique.write(postername + "\n")
								if postername not in clique:
									clique.append(postername)

						except AttributeError:
							pass

						try:
							w.write(mystr + "\n")
						except UnicodeEncodeError:
							pass
						commcount+=1
					except StopIteration:
						break

				subs = user.get_submitted(limit=None)

				while(1):
					try:
						sub = subs.next()
						sentiment, neg, pos = NBC.classify(sub.selftext)

						mystr = str(int(sub.created_utc)*1000) + "|"
						mystr += sub.title + "|"
						mystr += sub.subreddit.display_name + "|"
						mystr += sentiment + "|"
						mystr += str(neg) + "|"
						mystr += str(pos) + "|"
						mystr += "post"

						print(mystr)
						try:
							w.write(mystr + "\n")
						except UnicodeEncodeError:
							pass
						postcount+=1

					except StopIteration:
						break

				print "Comments: " + str(commcount)
				print "Posts: " + str(postcount)
				print "Total: " + str(commcount+postcount)

				for ele in clique:
					if(ele != user_name):
						print ele
						writeclique.write(ele + "\n")

				w.close()
				writeclique.close()

			except Exception, e:
				print e
				print "An error occured. Retrying..."
				errs.write(user_name + "\n")
				# time.sleep(30)
				numErrs += 1
				continue
			break
Ejemplo n.º 8
0
    return predictions


"""We can now train and test the multinomial Naive Bayes."""
seed(1)
t = []
cl = {'A': 1, 'B': 2, 'E': 3, 'V': 4}
with open("train.csv") as csv_file:
    absreader = csv.reader(csv_file, delimiter=',', quotechar='|')
    for row in absreader:
        x = list(
            map(int, row[:-1])
        )  #originally the values are strings, so they are converted to int
        x.append(cl[row[-1]])  #appending class as an integer category
        t.append(x)
scoresv, score_meanv = NBC.eval_algo(t, multi)
print('Scores on validation set: %s' % scoresv)
print('Mean Accuracy: %.3f%%' % score_meanv)
"""For the test data, we need to be able to predict the classes."""
with open('test.csv') as csv_file:
    test = []  #stores all test cases
    absreader = csv.reader(csv_file, delimiter=',', quotechar='|')
    for row in absreader:
        x = list(
            map(int, row)
        )  #originally the values are strings, so they are converted to int
        test.append(x)
pr = multi(t, test)  #get integer predictions
p_act = NBC.class_con(p, cl)  #get string predictions
"""Now we have to store them in a csv file."""
with open('sjha286.csv', 'w', newline='') as csvfile:
def Experiment_1(skfold_data):
    print("Running experiment 1...")
    accuracy_m0 = []
    accuracy_m1 = []

    #smoothing factor
    m = [0, 1]
    #for each smoothing factor
    for sm in m:
        for i in range(len(skfold_data)):
            #this loop considers ith fold for test dataset
            #get train(900) and test(100)
            t_train, t_test = train_test.kfold_train_test(skfold_data, i)

            #generate subsample factors
            sampling_factor = np.arange(0.1, 1.1, 0.1)
            check_acc = []
            size_of_train = []
            for n in sampling_factor:
                #loop for subsamples
                n = round(n, 2)
                #n = 0.2
                sample_size_for_train = int(len(t_train) * n)
                size_of_train.append(sample_size_for_train)
                #randomly select datapoints
                #sample_train = random.sample(t_train,sample_size_for_train)
                sample_train = t_train[0:sample_size_for_train]
                train_x, train_y, test_x, test_y = train_test.train_test_split(
                    sample_train, t_test)
                accu = NBC.predictMAP(train_x, train_y, test_x, test_y, sm)
                #append all the accuracies of subsamples of kth fold
                check_acc.append(accu)
            if (sm == 0):
                accuracy_m0.append(check_acc)
            if (sm == 1):
                accuracy_m1.append(check_acc)
    avgAccu0, avgAccu1 = calAccuracy(accuracy_m0, accuracy_m1)
    print("Average accuracies when m=0: ")
    print(avgAccu0)
    print("Average accuracies when m=1: ")
    print(avgAccu1)

    #calculate standard deviation
    sd_0 = []
    sd_1 = []
    for i in range(len(accuracy_m0)):
        x1 = np.std(accuracy_m0[i])
        sd_0.append(x1)
        x2 = np.std(accuracy_m1[i])
        sd_1.append(x2)

    print("standard deviation for m=0: ", sd_0)
    print("standard deviation for m=1: ", sd_1)
    """Refered some online material to know about how to plot error bar graphs"""
    """https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html"""
    """https://pythonforundergradengineers.com/python-matplotlib-error-bars.html"""
    plt.errorbar(size_of_train, avgAccu0, sd_0, label='m=0')
    plt.errorbar(size_of_train, avgAccu1, sd_1, label='m=1')
    plt.legend(loc='lower right')
    plt.xlabel('train set size')
    plt.ylabel('Average Accuracies')
    plt.show()