def main(): # preprocs = ["wav","normalized","bandpass","highpass"] # coefficients = ["mfccs","chroma","mel","contrast","all"] # subsegmentLengths = ["0.2", "0.05", "0.01"] # chunkLengths = ["1","2","3"] preprocs = ["normalized"] coefficients = ["mel"] subsegmentLengths = ["0.2"] chunkLengths = ["2"] numNodes = 20 for preproc in preprocs: for coefficientType in coefficients: for subsegmentLength in subsegmentLengths: for chunkLength in chunkLengths: argv = [] argv.append("") argv.append(preproc) argv.append(coefficientType) argv.append(subsegmentLength) argv.append(chunkLength) X, Y = fetchDataMulti.getData(argv) start = time.time() knn.knn(X, Y) svm.svm(X, Y) return
def __init__(self, data, parameters, nodes=10, connections=2): # define the model we are working with self.model = svm() self.alpha = 0.01 # define the scheme and data self.nodes = nodes self.outdegree = connections self.X = data[0] self.y = data[1] self.examples = len(self.X) self.data_index = [0 for i in range(self.nodes)] # define performance metrics self.loss = [0] self.epochloss = [] self.time_cost_processing = 0 self.time_cost_comm = 0 self.iteration = 0 # define gradient and parameter information temp = np.zeros_like(parameters) self.updates = np.repeat(temp[np.newaxis, :], self.nodes, axis=0) self.grads = np.repeat(temp[np.newaxis, :], self.nodes, axis=0) self.params = np.repeat(parameters[np.newaxis, :], self.nodes, axis=0) # At instantiation, divide the data for each worker and decide which # nodes(worker) are sent updates from each worker self.divide_data() self.indices = self.get_worker_indices() # Get distribution of data (does not contain probabilties) self.get_distribution()
def main2(argv, n=5, ex_count=None, feat_count=None, repeats=1): # argv = [tree_hw_train_f, tree_hw_test_f] total_acc = [0, 0, 0, 0] # argv = [tree_mad_train_f, tree_mad_test_f] for k in range(repeats): tree_arr = [] line_arr = tree.read_in_file(argv[0]) test_arr = tree.read_in_file(argv[1]) if not ex_count: ex_count = len(line_arr) for i in range(n): ex_arr = [] for j in range(ex_count): ex_arr.append(random.choice(line_arr)) tree_arr.append(tree.Tree(ex_arr, id_three.id3, feat_count=feat_count, measure_function=measure_function)) whole_arr = build_vector_from_trees(tree_arr, line_arr, n) for x in whole_arr: x[0] = [1] + x[0] w = svm.svm(whole_arr, c=1, gamma=0.001, e=5) test_arr = build_vector_from_trees(tree_arr, test_arr, n) for x in test_arr: x[0] = [1] + x[0] accuracy = svm.evaluate_perceptron(test_arr, w) total_acc = [x + y for x, y in zip(total_acc, accuracy)] # print(accuracy) total_acc = [x/repeats for x in total_acc] print('Total Accuracy: ' + str(total_acc))
def main(): data = [] inp = [] num_iter = int(input("Enter the number of iterations")) for i in range(0, num_iter): inp = make_granules.make_granules(i, data[:2]) data = svm.svm(inp) print("clf is ", data[-1]) print( "***********************************************************************" ) print("ON THE FINAL DATA !!!!") df = read_data.read_data() number_of_cols = len(df.columns) X = df.values Y = X[:, -1] X = X[:, :-1] indices = np.argwhere(Y == 1) clf = data[-1] indices = indices.ravel() X = X[indices] predictions = clf.predict(X) correctly_done = np.sum(predictions) print("Correctly classified minority points ", correctly_done / len(indices)) print( "***********************************************************************" )
def test_svm_predict(self): w, max_p, max_acc = svm.svm(lambda ll: ll == 1, self.train, self.valid, params=self.params) predict = svm.svm_predict(self.test[1], [(1, w)]) self.assertGreaterEqual(sum(predict == self.test[0]), 460)
def coreOperation(self, module): '''主函数''' try: if module == 'svm': precision, recall = svm() result = precision + ',' + recall if module == 'lightgbm': precision, recall = lightgbm() result = str(precision) + ',' + str(recall) # if a != '' and b != '': # result = add(a, b) # 可调用其他接口 # if result: # result = json.dumps({'code': 200, 'result': result, }) # else: # result = json.dumps({'code': 210, 'result': 'no result', }) else: result = json.dumps({ 'code': 211, 'result': 'wrong parameter', }) self.write(result) except Exception: print('traceback.format_exc():\n%s' % traceback.format_exc()) result = json.dumps({'code': 503, 'result': 'error'}) self.write(result)
def run_svm(n_train=100, noisy=None): print("n_train = ", n_train) n_rep = 100 # number of replicates n_test = 100 its = 0 e_train = 0 e_test = 0 sn = 0 for i in range(n_rep): x, y, w_f = mkdata(n_train + n_test, noisy) x_train = x[:, :n_train] y_train = y[:, :n_train] x_test = x[:, n_train:] y_test = y[:, n_train:] w_g, num = svm(x_train, y_train) sn += num x_test = add_bias(x_test) x_train = add_bias(x_train) e_train += np.where(y_train * (w_g.T @ x_train) < 0)[0].shape[0] / n_train e_test += np.where(y_test * (w_g.T @ x_test) < 0)[0].shape[0] / n_test print('E_train is %f, E_test is %f' % (e_train / n_rep, e_test / n_rep)) print('Number of Support Vectors:', sn / n_rep) noisy_txt = ": noisy" if noisy else "" plotdata(x_train[1:], y_train, w_f, w_g, 'SVM' + noisy_txt)
def problem3_final(): pairs = { "orig": ['astro/original/train.4', 'astro/original/test.4'], "scal": ['astro/scaled/train.4', 'astro/scaled/test.4'], } trains = [] tests = [] for k in pairs: tr, dim = build_set_from_file(pairs[k][0]) te, dim2 = build_set_from_file(pairs[k][1]) if dim != dim2: print "dimensionality of training and test data must be equivalent" raise trains = trains + tr tests = tests + te stats = ["Accuracy \tC \t\tLearn Rt \tE(w)"] results = svm.svm(trains, dim, 30) for i in range(5): r = results[i] correct = test(tests, r['weight'], r['C']) stats.append("%s \t%.03f \t\t%.03f \t\t%.03f" % (correct, r['C'], r['r'], r['loss'])) print "combined astro training sets vs combined astro testing sets at epoch = 30" for s in stats: print "\t%s" % s
def test_svm(self): w, max_p, max_acc = svm.svm(lambda ll: ll == 0, self.train, self.valid, params=self.params) self.assertLessEqual(la.norm(w - self.results[1]), 1e-3) self.assertEqual(max_p, (1e-3, 1e-2)) self.assertGreaterEqual(max_acc, 0.99)
def titanic_pipeline(): train, test = loaddata() train_proc, test_proc = dataprocessing(train, test) train_feat, train_labels = featureengineering(train_proc, test_proc) rf_acc = randomforest(train_feat, train_labels) svm_acc = svm(train_feat, train_labels) lg_acc = logistic_regression(train_feat, train_labels) results(svm_acc, lg_acc, rf_acc)
def trainClassifiers(self): """ Function pre creates objects for all classifiers, so that prediction is fast. An instance dict of those models is created so that they can be indexed easily. arguments: none return: none """ self.which = {0:vsm(),1:nb(),2:svm()} for i in self.which.keys(): self.which[i].fit()
def implementation(dataset): table = { 'Accuracy': [], 'Kappa statistics': [], 'Precision': [], 'Recall': [], 'F_measure': [], 'MCC': [], 'ROC': [], 'PRC': [], 'Specificity': [] } plot_table = {'Accuracy': [], 'Sensitivity': [], 'Specificity': []} ### NAIVE BAYES CLASSIFICATION ALGORITHM --------------------------------------------------------- ac, kp, ps, rc, fm, mc, ra, pa, sp = naive_bayes(dataset=dataset, test_size=0.20) table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table) plot_table = add_to_dict(ac, rc, sp, table=plot_table) print(ac, kp, ps, rc, fm, mc, ra, pa, sp) ### RANDOM FOREST CLASSIFICATION ALGORITHM ------------------------------------------------------- ac, kp, ps, rc, fm, mc, ra, pa, sp = random_forest(dataset=dataset) table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table) plot_table = add_to_dict(ac, rc, sp, table=plot_table) ### SVM CLASSIFICATION ALGORITHM ----------------------------------------------------------------- ac, kp, ps, rc, fm, mc, ra, pa, sp = svm(dataset=dataset) table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table) plot_table = add_to_dict(ac, rc, sp, table=plot_table) ### MLP CLASSIFICATION ALGORITHM ----------------------------------------------------------------- ac, kp, ps, rc, fm, mc, ra, pa, sp = mlp(dataset=dataset) table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table) plot_table = add_to_dict(ac, rc, sp, table=plot_table) ### J48 CLASSIFICATION --------------------------------------------------------------------------- table = add_to_dict(0.928, 0.838, 0.930, 0.929, 0.929, 0.839, 0.975, 0.955, 0.924, table=table) plot_table = add_to_dict(0.928, 0.929, 0.924, table=plot_table) return table, plot_table
def svm_folds(k, path_to_json): kfold = KFold(k, path_to_json, '') stats = [None] * k for i in xrange(k): print '{}: Fold {} of {}'.format(datetime.now(), i + 1, k) # get train and test dataframes train_df, test_df = kfold.get_datasets(i) # create train and test datasets test_set = DatasetVironovaSVM(train_df, do_oversampling=False) train_set = DatasetVironovaSVM(train_df, do_oversampling=False) # get confusion matrix from SVM model cf = svm.svm(train_set, test_set) stats[i] = cf return stats
def compute(train_fn, test_fn, epoch): trains, dim = build_set_from_file(train_fn) tests, dim2 = build_set_from_file(test_fn) if dim != dim2: print "dimensionality of training and test data must be equivalent" raise stats = ["Accuracy \tC \t\tLearn Rt \tE(w)"] results = svm.svm(trains, dim, epoch) for i in range(5): r = results[i] correct = test(tests, r['weight'], r['C']) stats.append("%s \t%.03f \t\t%.03f \t\t%.03f" % (correct, r['C'], r['r'], r['loss'])) return stats
def go(url): having_ip_address(url) long_url(url) shorten_url(url) at_the_rate_symbol(url) double_slash_redirecting(url) prefix_suffix(url) having_sub_domain(url) ssl(url) domain_registration_length(url) favicon(url) port(url) https_token(url) request_url(url) url_of_anchor(url) link_in_tag(url) sfh(url) submitting_to_email(url) abnormal_url(url) redirect(url) on_mouse_over(url) right_click(url) popup_window(url) iframe(url) age_of_domain(url) dns_record(url) web_traffic(url) page_rank(url) google_index(url) links_pointing_to_page(url) statistical_report(url) print(data) r1 = model1.svm(data) r2 = model2.random_forest(data) r3 = model3.logistic_regression(data) if r1[0] + r2[0] + r3[0] == 3: return 'NORMAL URL' else: return 'PHISHING URL'
def main(): ####################################################################### # randomize(x_train, y_train) # Randomize the order of rows in the training data. This to allow for # more integrated real-time plotting; given how the training data is # generated, all datapoints of one class will be plotted first (and # then all points of the other class), unless this function is applied # to allow both clusters to be plotted near-simultaneously. ####################################################################### def randomize(x_train, y_train): merged = np.concatenate((x_train, y_train.T.reshape(-1, 1)), axis=1) np.random.shuffle(merged) return merged[:, 0:2], merged[:, 2] ####################################################################### # gen_train() # Read in the training points, with the label +1 (blue) or -1 (red). ####################################################################### def gen_train(): for a, b, idx, predict in zip(x_train[:, 0], x_train[:, 1], y_train_idx[:, 0], y_train_idx[:, 1]): yield a, b, idx, predict ####################################################################### # plot_train(gen_train) # Plot training points (read in with gen_train) based on class. ####################################################################### def plot_train(gen_train): a, b, idx, predict = (i for i in gen_train) if predict == 1: xplt_train_c1.append(a) yplt_train_c1.append(b) plot_c1_train.set_data(xplt_train_c1, yplt_train_c1) return plot_c1_train else: xplt_train_c2.append(a) yplt_train_c2.append(b) plot_c2_train.set_data(xplt_train_c2, yplt_train_c2) return plot_c2_train ####################################################################### # gen_test() # Read in the generated test points, with the labels +1/-1 as above. ####################################################################### def gen_test(): for c, d, idx, predict in zip(x_test[:, 0], x_test[:, 1], predictions_idx[:, 0], predictions_idx[:, 1]): yield c, d, idx, predict ####################################################################### # plot_test(gen_test) # Plot the test points based on class. ####################################################################### def plot_test(gen_test): c, d, idx, predict = (i for i in gen_test) if predict == 1: xplt_test_c1.append(c) yplt_test_c1.append(d) plot_c1_test.set_data(xplt_test_c1, yplt_test_c1) return plot_c1_test else: xplt_test_c2.append(c) yplt_test_c2.append(d) plot_c2_test.set_data(xplt_test_c2, yplt_test_c2) return plot_c2_test ####################################################################### # clust_input() # Custom input for the training data cluster centers, i.e. where # the avg of each class' data points will be. ####################################################################### def clust_input(): default = [[3, 3],[7, 7]] while True: clust_custom = raw_input("Enter custom cluster centers? [Y/n] ") if clust_custom == "Y": c1_x = raw_input("X-coordinate of class 1's center: ") c1_y = raw_input("Y-coordinate of class 1's center: ") c2_x = raw_input("X-coordinate of class 2's center: ") c2_y = raw_input("Y-coordinate of class 2's center: ") try: return [[float(x) for x in i] for i in ([c1_x, c1_y],[c2_x, c2_y])] except ValueError: if not c1_x or not c1_y or not c2_x or not c2_y: confirm = raw_input("You forgot to enter a value. " "Continue with defaults? [Y/n] ") if confirm == "Y": return default else: print "Please try input again." else: print ("Sorry, one of the values was not a number. " "Please try input again.") else: return default ####################################################################### # cov_input() # Custom input for the covariance matrix, which (basically) affects # how spread out the training data will be. ####################################################################### def cov_input(): default = [[0.6, 0], [0, 0.6]] while True: cov_custom = raw_input("Enter a custom covariance matrix? [Y/n] ") if cov_custom == "Y": print "Enter your matrix values 1-4 in format:" print "[[1, 2]\n [3, 4]]" m1 = raw_input("1: ") m2 = raw_input("2: ") m3 = raw_input("3: ") m4 = raw_input("4: ") try: return [[float(x) for x in i] for i in ([m1, m2],[m3, m4])] except ValueError: if not m1 or not m2 or not m3 or not m4: confirm = raw_input("You forgot to enter a value. " "Continue with defaults? [Y/n] ") if confirm == "Y": return default else: print "Please try input again." else: print ("Sorry, one of the values was not a number. " "Please try input again.") else: return default ####################################################################### # nodes_input() # Custom input for number of nodes, i.e. data points for an individual # class. Because there are two classes for both the training and # testing data, you'll end up with n * 4 datapoints. ####################################################################### def nodes_input(): default = 100 while True: node_custom = raw_input("Enter custom # of nodes? [Y/n] ") if node_custom == "Y": n = raw_input("Number of nodes: ") try: return int(n) except ValueError: print "Input was not a number; please try again." else: return default """ Example of data analysis/visualization with SVM. """ print "If you just want a quick SVM demo, leave the following blank." cluster_ctrs = clust_input() cov_matrix = cov_input() n = nodes_input() # Generate and format datapoints. cluster_c1, cluster_c2 = [i for i in cluster_ctrs] print "Cluster centers:", cluster_c1, cluster_c2 print "Covariance matrix", cov_matrix print "Number of nodes", n x_train_c1 = np.random.multivariate_normal(cluster_c1, cov_matrix, n) y_train_c1 = np.ones(n) x_train_c2 = np.random.multivariate_normal(cluster_c2, cov_matrix, n) y_train_c2 = np.ones(n) * -1 x_train = np.vstack((x_train_c1, x_train_c2)) y_train = np.hstack((y_train_c1, y_train_c2)) # Finish preprocessing code. x_train, y_train = randomize(x_train, y_train) y_train_idx = np.array([(idx, predict) for idx, predict in enumerate(y_train)]) # Empty lists to hold plot train/test data. xplt_train_c1, yplt_train_c1 = [], [] xplt_train_c2, yplt_train_c2 = [], [] xplt_test_c1, yplt_test_c1 = [], [] xplt_test_c2, yplt_test_c2 = [], [] # Set up figure. fig = plt.figure() fig.patch.set_facecolor('white') ax = fig.add_subplot(111) fig_limit = np.sum(cluster_ctrs) / 2.0 # plot size scales to data ax.set_ylim(0, fig_limit) ax.set_xlim(0, fig_limit) plt.xlabel('X values') plt.ylabel('Y values') plt.title('Linear SVM Demo') plot_c1_train, = ax.plot([], [], 'bo', ms=10) plot_c2_train, = ax.plot([], [], 'ro', ms=10) plot_c1_test, = ax.plot([], [], 'b+', ms=10) plot_c2_test, = ax.plot([], [], 'r+', ms=10) plot_c1_train.set_label('Class 1, Train') plot_c2_train.set_label('Class 2, Train') plot_c1_test.set_label('Class 1, Test') plot_c2_test.set_label('Class 2, Test') plt.legend(loc=2, fontsize='small') # Build and train our SVM classifier. clf = svm() clf.fit(x_train, y_train) print "Weight vector: %s\nBias: %s" % (clf.w, clf.bias) # Generate test points & predictions. # n_test: # test points == # of training points # mean_test: test points centered at average of train. cluster centers # cov_test: minimal covariance, since need linearly separable data. n_test = n * 2 mean_test = 0.5 * np.add(cluster_c1, cluster_c2) cov_test = [[1, 0], [0, 1]] a, b = np.random.multivariate_normal(mean_test, cov_test, n_test).T x_test = np.array([(a[i], b[i]) for i in xrange(n_test)]) predictions = clf.predict(x_test) predictions_idx = np.array([(idx, predict) for idx, predict in enumerate(clf.predict(x_test))]) # Visualize results, and write to stdout. # animation.FuncAnimation's interval attribute is the # of milliseconds # between animation events, i.e. plotting points. anim_train = animation.FuncAnimation(fig, plot_train, gen_train, blit=False,interval=1, repeat=False) anim_test = animation.FuncAnimation(fig, plot_test, gen_test, blit=False,interval=1, repeat=False) clf.plot_boundary() plt.show() print "\n--TRAINING DATA--" print "x_train is:" print "Type:", type(x_train) print "Shape:\n", x_train.shape print "y_train is:" print "Type:", type(y_train) print "Shape:\n%s\n" % (y_train.shape) print "--TESTING DATA--" print "x_test is:" print "Type:", type(x_test) print "Shape:\n%s\n" % (x_test.shape)
def y_nn(): with open('y_nn.txt') as f: y = list(map(int, f.read().split())) return y if __name__ == '__main__': # y_result = y_nn() # print(y_result) X_pure_train, X_sentences_train, aspects_list_train, _ = load_text( 'SentiRuEval_rest_markup_train.xml') #X_pure_test, X_sentences_test, aspects_list_test, X_p = load_text('SentiRuEval_rest_markup_test.xml') X_pure_test, X_sentences_test, X_p = load_hotel() y_result1, y_result2, y_result3 = svm(X_pure_train, X_sentences_train, aspects_list_train, X_pure_test, X_sentences_test) pair(X_p, y_result3) #y_result = lingvistic(X_pure_test) #y_result = freq(X_pure_train, X_pure_test) #print (len(X_pure_test), len(y_result1)) #save_result(X_pure_test, y_result, 'SentiRuEval_result_rest_test_on_rest_2LSTM.xml') # save_result(X_pure_test, y_result1, 'SentiRuEval_result_rest_test_on_rest_CNN.xml') # save_result(X_pure_test, y_result2, 'SentiRuEval_result_rest_test_on_rest_RF_myw2v.xml') # save_result(X_pure_test, y_result3, 'SentiRuEval_result_rest_test_on_rest_GNB_myw2v.xml')
from logistic_regression import lr from naive_bayes import nb from svm import svm lr() nb() svm()
import pre_process import logistic_reg import svm import random_forest output_file = "D:/data/output_1.csv" pre_process.pre_processing("D:/data/Epi.pkl") # Modoule selection choice = input( "choose your modoule : 1.Logistic Regression 2.SVM 3.Random Forest :") if choice == '1': logistic_reg.logistic_reg("D:/data/output_3.pkl") elif choice == '2': svm.svm("D:/data/output_3.pkl") elif choice == '3': random_forest.random_forest("D:/data/output_3.pkl")
target[indices, 2] = 1. train = iris[::2, 0:4] traint = target[::2] test = iris[1::2, 0:4] testt = target[1::2] output = np.zeros((np.shape(test)[0], 3)) import svm reload(svm) # Learn the full data #svm0 = svm.svm(kernel='linear') #svm0 = svm.svm(kernel='poly',C=0.1,degree=3) svm0 = svm.svm(kernel='rbf') svm0.train_svm(train, np.reshape(traint[:, 0], (np.shape(train[:, :2])[0], 1))) output[:, 0] = svm0.classifier(test, soft=True).T #svm1 = svm.svm(kernel='linear') #svm1 = svm.svm(kernel='poly',C=0.1,degree=3) svm1 = svm.svm(kernel='rbf') svm1.train_svm(train, np.reshape(traint[:, 1], (np.shape(train[:, :2])[0], 1))) output[:, 1] = svm1.classifier(test, soft=True).T #svm2 = svm.svm(kernel='linear') #svm2 = svm.svm(kernel='poly',C=0.1,degree=3) svm2 = svm.svm(kernel='rbf') svm2.train_svm(train, np.reshape(traint[:, 2], (np.shape(train[:, :2])[0], 1))) output[:, 2] = svm2.classifier(test, soft=True).T
labeltrain0 = np.ones((np.shape(train0)[0], 1)) labeltrain1 = -np.ones((np.shape(train1)[0], 1)) labeltrain = np.concatenate((labeltrain0, labeltrain1), axis=0) labeltest0 = np.ones((np.shape(test0)[0], 1)) labeltest1 = -np.ones((np.shape(test1)[0], 1)) labeltest = np.concatenate((labeltest0, labeltest1), axis=0) pl.figure() pl.plot(train0[:, 0], train0[:, 1], "o", color="0.75") pl.plot(train1[:, 0], train1[:, 1], "s", color="0.25") import svm reload(svm) svm = svm.svm(kernel='linear', C=0.1) #svm = svm.svm(kernel='rbf') #svm = svm.svm(kernel='poly',C=0.1,degree=4) print np.shape(train), np.shape(labeltrain) svm.train_svm(train, labeltrain) pl.scatter(svm.X[:, 0], svm.X[:, 1], s=200, color='k') predict = svm.classifier(test, soft=False) correct = np.sum(predict == labeltest) print correct, np.shape(predict) print float(correct) / np.shape(predict)[0] * 100., "test accuracy" # Classify points over 2D space to fit contour x, y = np.meshgrid(np.linspace(-6, 6, 50), np.linspace(-6, 6, 50)) xx = np.reshape(np.ravel(x), (2500, 1))
def modified_XOR(kernel, degree, C, sdev): import svm sv = svm.svm(kernel, degree=degree, C=C) m = 100 X = sdev * np.random.randn(m, 2) X[m / 2:, 0] += 1. X[m / 4:m / 2, 1] += 1. X[3 * m / 4:, 1] += 1. targets = -np.ones((m, 1)) targets[:m / 4, 0] = 1. targets[3 * m / 4:, 0] = 1. sv.train_svm(X, targets) Y = sdev * np.random.randn(m, 2) Y[m / 2:, 0] += 1. Y[m / 4:m / 2, 1] += 1. Y[3 * m / 4:m, 1] += 1. test = -np.ones((m, 1)) test[:m / 4, 0] = 1. test[3 * m / 4:, 0] = 1. #test = (np.where(Y[:,0]*Y[:,1]>=0,1,-1)*np.ones((1,np.shape(Y)[0]))).T #print test.T output = sv.classifier(Y, soft=False) #print output.T #print test.T err1 = np.where((output == 1.) & (test == -1.))[0] err2 = np.where((output == -1.) & (test == 1.))[0] print kernel, C print "Class 1 errors ", len(err1), " from ", len(test[test == 1]) print "Class 2 errors ", len(err2), " from ", len(test[test == -1]) print "Test accuracy ", 1. - (float(len(err1) + len(err2))) / ( len(test[test == 1]) + len(test[test == -1])) pl.ion() pl.figure() l1 = np.where(targets == 1)[0] l2 = np.where(targets == -1)[0] pl.plot(X[sv.sv, 0], X[sv.sv, 1], 'o', markeredgewidth=5) pl.plot(X[l1, 0], X[l1, 1], 'ko') pl.plot(X[l2, 0], X[l2, 1], 'wo') l1 = np.where(test == 1)[0] l2 = np.where(test == -1)[0] pl.plot(Y[l1, 0], Y[l1, 1], 'ks') pl.plot(Y[l2, 0], Y[l2, 1], 'ws') step = 0.1 f0, f1 = np.meshgrid( np.arange(np.min(X[:, 0]) - 0.5, np.max(X[:, 0]) + 0.5, step), np.arange(np.min(X[:, 1]) - 0.5, np.max(X[:, 1]) + 0.5, step)) out = sv.classifier(np.c_[np.ravel(f0), np.ravel(f1)], soft=True).T out = out.reshape(f0.shape) pl.contour(f0, f1, out, 2) pl.axis('off') pl.show()
def test_svm_predict(self): w, max_p, max_acc = svm.svm(lambda ll : ll == 1, self.train, self.valid, params=self.params) predict = svm.svm_predict(self.test[1], [(1, w)]) self.assertGreaterEqual(sum(predict == self.test[0]), 460)
labeltrain0 = np.ones((np.shape(train0)[0],1)) labeltrain1 = -np.ones((np.shape(train1)[0],1)) labeltrain = np.concatenate((labeltrain0,labeltrain1),axis=0) labeltest0 = np.ones((np.shape(test0)[0],1)) labeltest1 = -np.ones((np.shape(test1)[0],1)) labeltest = np.concatenate((labeltest0,labeltest1),axis=0) pl.figure() pl.plot(train0[:,0], train0[:,1], "o",color="0.75") pl.plot(train1[:,0], train1[:,1], "s",color="0.25") import svm reload(svm) svm = svm.svm(kernel='linear',C=0.1) #svm = svm.svm(kernel='rbf') #svm = svm.svm(kernel='poly',C=0.1,degree=4) print np.shape(train), np.shape(labeltrain) svm.train_svm(train, labeltrain) pl.scatter(svm.X[:,0], svm.X[:,1], s=200,color= 'k') predict = svm.classifier(test,soft=False) correct = np.sum(predict == labeltest) print correct, np.shape(predict) print float(correct)/np.shape(predict)[0]*100., "test accuracy" # Classify points over 2D space to fit contour x,y = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50)) xx = np.reshape(np.ravel(x),(2500,1))
def evaluateSVM(): claResults.append(["SVM"]) for data in claDatasets: #Import the Dataset and separate X and y data_to_test = 'datasets/classification/' + data + '.csv' dataset = pd.read_csv(data_to_test) X_before, y_before = encodeData(dataset) # X_before = dataset.iloc[:, :-1].values # y_before = dataset.iloc[:, 38] count = 0 avg_roc_auc = 0 avg_accuracy = 0 avg_precision = 0 avg_recall = 0 avg_f1score = 0 fpr = 0 tpr = 0 threshold = 0 for train, test in kfold.split(X_before): print("Test:", count+1, " for", data) X_train, X_test = X_before[train], X_before[test] y_train, y_true = y_before[train], y_before[test] #feature scaling X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # run SVM from svm import svm svm = svm(X_train, y_train, X_test, y_true) y_pred = svm.getPredictions() fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred) roc_auc = metrics.auc(fpr, tpr) # get metrics avg_roc_auc += roc_auc avg_accuracy += svm.getAccuracy() avg_precision += metrics.precision_score(y_true, y_pred) avg_recall += metrics.recall_score(y_true, y_pred) avg_f1score += metrics.f1_score(y_true, y_pred) count += 1 avg_roc_auc = avg_roc_auc / count avg_accuracy = avg_accuracy / count avg_precision = avg_precision / count avg_recall = avg_recall / count avg_f1score = avg_f1score / count claResults.append(['', data_to_test, float(avg_roc_auc), float(avg_accuracy), float(avg_precision), float(avg_recall), float(avg_f1score) ]) ''' plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc) plt.legend(loc = 'lower right') plt.plot([0, 1], [0, 1],'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() ''' print("\nSVM evaluation results") print("Average ROC AUC:", avg_roc_auc) print("Average accuracy:", avg_accuracy) print("Average precision:", avg_precision) print("Average recall:", avg_recall) print("Average f1 score:", avg_f1score)
def main(argv): print("start of main\n") # file handling if not os.path.isdir(FLAGS.data_dir): raise FileNotFoundError("data_dir doesn't exist: " + FLAGS.data_dir) html_dir = os.path.join(FLAGS.data_dir, FLAGS.html_folder) if not os.path.isdir(html_dir): raise FileNotFoundError("html_folder doesn't exist: " + FLAGS.html_folder) tfr_dir = os.path.join(FLAGS.data_dir, "TFR_" + CUR_TIME) os.mkdir(tfr_dir) train_dir = os.path.join(tfr_dir, 'train') # os.mkdir(train_dir) test_dir = os.path.join(tfr_dir, 'test') # os.mkdir(test_dir) shuf_dir = os.path.join(tfr_dir, 'shuffle') os.mkdir(shuf_dir) # loging log_file = os.path.join(tfr_dir, "log") set_logging(stream=True, fileh=True, filename=log_file) logging.info("\nall arguments:") for attr, value in sorted(FLAGS.__flags.items()): logging.info("{}={}".format(attr.upper(), value)) logging.info("") # shuffle all data thoroughly # all data stored in several train and test json files. all_data = [] logging.info('') logging.info('reading all data') for category in os.listdir(html_dir): cat_dir = os.path.join(html_dir, category) if os.path.isdir(cat_dir): cat_id = CATEGORIES.index(category) for j_file in os.listdir(cat_dir): j_path = os.path.join(cat_dir, j_file) if os.path.isfile(j_path) and not j_file.startswith('.'): # read single html json file pages = read_json(j_path) for page in pages: page['label'] = cat_id + 1 all_data.extend(pages) # shuffle all data logging.info("\nshuffling the whole dataset") shuffle(all_data) # convert every page string into FastText format fast_data = [] for page in all_data: page_str = '__label__' + str(page['label']) + ', ' # excape \n and add space before comma page_str += page['html'].replace('\n', '\\n').replace(',', ' ,') # print(page_str) fast_data.append(page_str) all_data = fast_data logging.info("\nsplitting data into train and test set") train_num = math.floor(len(all_data) * FLAGS.train_ratio) train_set = all_data[:train_num] test_set = all_data[train_num:] if FLAGS.model == 'svm': svm.svm(FLAGS.num_cats, train_set, test_set) else: logging.info("\nwriting shuffled train data into json") with open(train_dir, 'w') as f: f.write('\n'.join(train_set)) logging.info("\nwriting shuffled test data into json") with open(test_dir, 'w') as f: f.write('\n'.join(test_set)) logging.info("train_num: {}".format(train_num)) logging.info("test_num: {}".format(len(all_data) - train_num)) print("\n end of main~~~")
# Creating dictionary from x_train words, wordList = getWordList(x_train) # Removing most frequent 100 words for _ in range(100): words.pop(0) wordList = [x for x, _ in words] # Forming feature vector, calculating Conditional probabilities, applying NBC trainfv, trainfv0, trainfv1 = featureVector(wordList[:w], x_train, y_train) testfv, testfv0, testfv1 = featureVector(wordList[:w], x_test, y_test) # zoltemplr[i] = lr(trainfv,testfv) zoltempsvm[i] = svm(trainfv, testfv) # zoltempnbc[i] = nbc(trainfv,testfv) avgzollr[r] = np.mean(zoltemplr) avgzolsvm[r] = np.mean(zoltempsvm) avgzolnbc[r] = np.mean(zoltempnbc) stddevzollr[r] = np.std(zoltemplr) stddevzolsvm[r] = np.std(zoltempsvm) stddevzolnbc[r] = np.std(zoltempnbc) stderrzollr[r] = stddevzollr[r] / math.sqrt(it) stderrzolsvm[r] = stddevzolsvm[r] / math.sqrt(it) stderrzolnbc[r] = stddevzolnbc[r] / math.sqrt(it) print stderrzollr
def modified_XOR(kernel,degree,C,sdev): import svm sv = svm.svm(kernel,degree=degree,C=C) #sv = svm.svm(kernel='poly',degree=3,C=0.2) #sv = svm.svm(kernel='rbf',C=0.1) #sv = svm.svm(kernel='poly',degree=3) #sdev = 0.4 #0.3 #0.1 m = 100 X = sdev*np.random.randn(m,2) X[m/2:,0] += 1. X[m/4:m/2,1] += 1. X[3*m/4:,1] += 1. targets = -np.ones((m,1)) targets[:m/4,0] = 1. targets[3*m/4:,0] = 1. #targets = (np.where(X[:,0]*X[:,1]>=0,1,-1)*np.ones((1,np.shape(X)[0]))).T sv.train_svm(X,targets) Y = sdev*np.random.randn(m,2) Y[m/2:,0] += 1. Y[m/4:m/2,1] += 1. Y[3*m/4:m,1] += 1. test = -np.ones((m,1)) test[:m/4,0] = 1. test[3*m/4:,0] = 1. #test = (np.where(Y[:,0]*Y[:,1]>=0,1,-1)*np.ones((1,np.shape(Y)[0]))).T #print test.T output = sv.classifier(Y,soft=False) #print output.T #print test.T err1 = np.where((output==1.) & (test==-1.))[0] err2 = np.where((output==-1.) & (test==1.))[0] print kernel, C print "Class 1 errors ",len(err1)," from ",len(test[test==1]) print "Class 2 errors ",len(err2)," from ",len(test[test==-1]) print "Test accuracy ",1. -(float(len(err1)+len(err2)))/ (len(test[test==1]) + len(test[test==-1])) pl.ion() pl.figure() l1 = np.where(targets==1)[0] l2 = np.where(targets==-1)[0] pl.plot(X[sv.sv,0],X[sv.sv,1],'o',markeredgewidth=5) pl.plot(X[l1,0],X[l1,1],'ko') pl.plot(X[l2,0],X[l2,1],'wo') l1 = np.where(test==1)[0] l2 = np.where(test==-1)[0] pl.plot(Y[l1,0],Y[l1,1],'ks') pl.plot(Y[l2,0],Y[l2,1],'ws') step = 0.1 f0,f1 = np.meshgrid(np.arange(np.min(X[:,0])-0.5, np.max(X[:,0])+0.5, step), np.arange(np.min(X[:,1])-0.5, np.max(X[:,1])+0.5, step)) out = sv.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T out = out.reshape(f0.shape) pl.contour(f0, f1, out,2) pl.axis('off') pl.show()
traint = target[::2] test = iris[1::2,0:4] testt = target[1::2] #print train.max(axis=0), train.min(axis=0) # Train the machines output = np.zeros((np.shape(test)[0],3)) import svm reload(svm) # Learn the full data #svm0 = svm.svm(kernel='linear') #svm0 = svm.svm(kernel='poly',C=0.1,degree=3) svm0 = svm.svm(kernel='rbf') svm0.train_svm(train,np.reshape(traint[:,0],(np.shape(train[:,:2])[0],1))) output[:,0] = svm0.classifier(test,soft=True).T #svm1 = svm.svm(kernel='linear') #svm1 = svm.svm(kernel='poly',C=0.1,degree=3) svm1 = svm.svm(kernel='rbf') svm1.train_svm(train,np.reshape(traint[:,1],(np.shape(train[:,:2])[0],1))) output[:,1] = svm1.classifier(test,soft=True).T #svm2 = svm.svm(kernel='linear') #svm2 = svm.svm(kernel='poly',C=0.1,degree=3) svm2 = svm.svm(kernel='rbf') svm2.train_svm(train,np.reshape(traint[:,2],(np.shape(train[:,:2])[0],1))) output[:,2] = svm2.classifier(test,soft=True).T
def test_svm(self): w, max_p, max_acc = svm.svm(lambda ll : ll == 0, self.train, self.valid, params=self.params) self.assertLessEqual(la.norm(w - self.results[1]), 1e-3) self.assertEqual(max_p, (1e-3, 1e-2)) self.assertGreaterEqual(max_acc, 0.99)
def main(): warnings.simplefilter("ignore", UserWarning) csv = pd.read_csv('Glass.csv', sep=',') #Coluna responsavel por classicar as classes de dados(parametro usado no STRATIFY{serve para manter a proporção dos elementos na hora de realizar as divisões}) classes = csv['Class'] """ realiza o shuffle e divided em 2 conjuntos de tamanho iguais (1/2) => 50% para o conjunto de teste database[0] = conjunto de teste database[1] = resto do conjunto """ database = skms.train_test_split(csv, test_size=0.5, train_size=0.5, shuffle=True, stratify=classes) train = database[0] classes = database[1]['Class'] """ realiza uma segunda divisão sobre o resto do conjunto de dados ((1/2)/2) => 25% para o conjunto de validação e teste database[0]' = conjunto de validação database[1]' = conjutno de teste """ database = skms.train_test_split(database[1], test_size=0.5, train_size=0.5, shuffle=True, stratify=classes) validation = database[0] test = database[1] target_test = test['Class'] # Features features_test = test # Deleta a coluna Target, ou seja, separa ela das Features features_test = features_test.drop(['Class'], axis=1) clfs = [] #classificadores treinados clfs = [None, None, None, None, None] #scores dos classificadores em cima do conjunto de teste clfs_scores = [None, None, None, None, None, None, None, None] clfs[0] = knn.findBestKNN(train, validation) #KNN Euclidiano clfs[1] = dt.decision_tree(train, validation) #Decision-Tree completa(sem poda) clfs[2] = nb.naive_bayes(train, validation) #Naive-Bayes Bernoulli clfs[3] = svm.svm(train, validation) #SMV kernel RBF clfs[4] = mlp.my_little_poney(train, validation) #MLP Constant clfs_scores[0] = testingClassifiers(clfs[0], features_test, target_test) #KNN clfs_scores[1] = testingClassifiers(clfs[1], features_test, target_test) #Decision-Tree clfs_scores[2] = testingClassifiers(clfs[2], features_test, target_test) #Naive-Bayes clfs_scores[3] = testingClassifiers(clfs[3], features_test, target_test) #SMV kernel clfs_scores[4] = testingClassifiers(clfs[4], features_test, target_test) #MLP #temp_sum = VotingClassifier(estimators=[('knn', clfs[0]), ('dt', clfs[0]), ('nb', clfs[0]), ('svm', clfs[0]), ('mlp', clfs[0])], voting='hard') clfs_scores[5] = score(rule_of_sum(clfs, features_test, target_test), target_test) #Regra da Soma clfs_scores[6] = score(rule_of_prod(clfs, features_test, target_test), target_test) #Regra do Produto clfs_scores[7] = score(borda_count(clfs, features_test, target_test), target_test) #RBorda Count del classes, csv, database, test, target_test, features_test return clfs_scores
i_time = config.getint('sys','i_time') with open(complete_file_path,'w') as f: f.write("0,"+str(i_time * 11)) with open(os.path.join(file_path,"pid.txt"),'w') as f: f.write(str(os.getpid())) from knn import knn from ada_boost import ada_boost from random_forest import random_forest from logistic import logistic from svm import svm from decision_tree import c4_5,cart from k_mean import k_mean from xgboost_clf import xgboost from gbdt_clf import gbdt from net import net print('knn:',knn(i_time=i_time)) print('AdaBoost:',ada_boost(i_time=i_time)) print('random forest',random_forest(i_time=i_time)) print('logistic regression:',logistic(i_time=i_time)) print('C4.5:',c4_5(i_time=i_time)) print('cart:',cart(i_time=i_time)) print('k_mean',k_mean(i_time=i_time)) print('xgboost',xgboost(i_time=i_time)) print('gbdt',gbdt(i_time=i_time)) print('SVM:',svm(i_time=i_time)) print('net',net(i_time=i_time))
import numpy as np import pandas as pd from svm import svm df = pd.read_csv('data/pulsar_stars.csv') npa = np.asarray(df) for i in npa: if not i[8]: i[8] = -1 train_X = npa[:10000][:7] train_Y = npa[:10000][8] test_X = npa[10000:][:7] test_Y = npa[10000:][8] w = svm(train_X, train_Y, .00001, 10) errors, tot = 0, 0 for x, y in zip(test_X, test_Y): if y * np.dot(x, w) < 1: errors += 1 tot += 1 print("testing error percentage: ", 100 * (errors / tot))
def handle_my_custom_event(json): def formatJam(teks): print('Jam nya adalah : ' + teks) formm = teks.split() satuan = formm[1] jamm = formm[0] jamm = jamm.split(':') jam2 = jamm[0] menit = jamm[1] detik = jamm[2] formatt = jam2 + menit + detik return int(formatt) nama = 'Aku adalah agias' myprofile = webdriver.FirefoxProfile( r'C:\Users\Aloysius\AppData\Roaming\Mozilla\Firefox\Profiles\fcbei8vp.teleScrape' ) PATH = "C:\Program Files (x86)\geckodriver.exe" driver = webdriver.Firefox(firefox_profile=myprofile, executable_path=PATH) target = 3 Saham = [ 'AALI', 'ABBA', 'ABDA', 'ABMM', 'ACES', 'ACST', 'ADES', 'ADHI', 'ADMF', 'ADMG', 'ADRO', 'AGAR', 'AGII', 'AGRO', 'AGRS', 'AHAP', 'AIMS', 'AISA', 'AKKU', 'AKPI', 'AKRA', 'AKSI', 'ALDO', 'ALKA', 'ALMI', 'ALTO', 'AMAG', 'AMAN', 'AMAR', 'AMFG', 'AMIN', 'AMOR', 'AMRT', 'ANDI', 'ANJT', 'ANTM', 'APEX', 'APIC', 'APII', 'APLI', 'APLN', 'ARGO', 'ARII', 'ARKA', 'ARMY', 'ARNA', 'ARTA', 'ARTI', 'ARTO', 'ASBI', 'ASDM', 'ASGR', 'ASII', 'ASJT', 'ASMI', 'ASPI', 'ASRI', 'ASRM', 'ASSA', 'ATAP', 'ATIC', 'AUTO', 'AYLS', 'BABP', 'BACA', 'BAJA', 'BALI', 'BANK', 'BAPA', 'BAPI', 'BATA', 'BAYU', 'BBCA', 'BBHI', 'BBKP', 'BBLD', 'BBMD', 'BBNI', 'BBRI', 'BBRM', 'BBSI', 'BBSS', 'BBTN', 'BBYB', 'BCAP', 'BCIC', 'BCIP', 'BDMN', 'BEBS', 'BEEF', 'BEKS', 'BELL', 'BESS', 'BEST', 'BFIN', 'BGTG', 'BHAT', 'BHIT', 'BIKA', 'BIMA', 'BINA', 'BIPI', 'BIPP', 'BIRD', 'BISI', 'BJBR', 'BJTM', 'BKDP', 'BKSL', 'BKSW', 'BLTA', 'BLTZ', 'BLUE', 'BMAS', 'BMRI', 'BMSR', 'BMTR', 'BNBA', 'BNBR', 'BNGA', 'BNII', 'BNLI', 'BOGA', 'BOLA', 'BOLT', 'BOSS', 'BPFI', 'BPII', 'BPTR', 'BRAM', 'BRIS', 'BRMS', 'BRNA', 'BRPT', 'BSDE', 'BSIM', 'BSSR', 'BSWD', 'BTEK', 'BTEL', 'BTON', 'BTPN', 'BTPS', 'BUDI', 'BUKK', 'BULL', 'BUMI', 'BUVA', 'BVIC', 'BWPT', 'BYAN', 'CAKK', 'CAMP', 'CANI', 'CARE', 'CARS', 'CASA', 'CASH', 'CASS', 'CBMF', 'CCSI', 'CEKA', 'CENT', 'CFIN', 'CINT', 'CITA', 'CITY', 'CLAY', 'CLEO', 'CLPI', 'CMNP', 'CMPP', 'CNKO', 'CNTX', 'COCO', 'COWL', 'CPIN', 'CPRI', 'CPRO', 'CSAP', 'CSIS', 'CSMI', 'CSRA', 'CTBN', 'CTRA', 'CTTH', 'DADA', 'DART', 'DAYA', 'DCII', 'DEAL', 'DEFI', 'DEWA', 'DFAM', 'DGIK', 'DGNS', 'DIGI', 'DILD', 'DIVA', 'DKFT', 'DLTA', 'DMAS', 'DMMX', 'DMND', 'DNAR', 'DNET', 'DOID', 'DPNS', 'DPUM', 'DSFI', 'DSNG', 'DSSA', 'DUCK', 'DUTI', 'DVLA', 'DWGL', 'DYAN', 'EAST', 'ECII', 'EDGE', 'EKAD', 'ELSA', 'ELTY', 'EMDE', 'EMTK', 'ENRG', 'ENVY', 'ENZO', 'EPAC', 'EPMT', 'ERAA', 'ERTX', 'ESIP', 'ESSA', 'ESTA', 'ESTI', 'ETWA', 'EXCL', 'FAPA', 'FAST', 'FASW', 'FILM', 'FINN', 'FIRE', 'FISH', 'FITT', 'FMII', 'FOOD', 'FORU', 'FORZ', 'FPNI', 'FREN', 'FUJI', 'GAMA', 'GDST', 'GDYR', 'GEMA', 'GEMS', 'GGRM', 'GGRP', 'GHON', 'GIAA', 'GJTL', 'GLOB', 'GLVA', 'GMFI', 'GMTD', 'GOLD', 'GOLL', 'GOOD', 'GPRA', 'GSMF', 'GTBO', 'GWSA', 'GZCO', 'HADE', 'HDFA', 'HDIT', 'HDTX', 'HEAL', 'HELI', 'HERO', 'HEXA', 'HITS', 'HKMU', 'HMSP', 'HOKI', 'HOME', 'HOMI', 'HOTL', 'HRME', 'HRTA', 'HRUM', 'IATA', 'IBFN', 'IBST', 'ICBP', 'ICON', 'IDPR', 'IFII', 'IFSH', 'IGAR', 'IIKP', 'IKAI', 'IKAN', 'IKBI', 'IMAS', 'IMJS', 'IMPC', 'INAF', 'INAI', 'INCF', 'INCI', 'INCO', 'INDF', 'INDO', 'INDR', 'INDS', 'INDX', 'INDY', 'INKP', 'INOV', 'INPC', 'INPP', 'INPS', 'INRU', 'INTA', 'INTD', 'INTP', 'IPCC', 'IPCM', 'IPOL', 'IPTV', 'IRRA', 'ISAT', 'ISSP', 'ITIC', 'ITMA', 'ITMG', 'JAST', 'JAWA', 'JAYA', 'JECC', 'JGLE', 'JIHD', 'JKON', 'JKSW', 'JMAS', 'JPFA', 'JRPT', 'JSKY', 'JSMR', 'JSPT', 'JTPE', 'KAEF', 'KARW', 'KAYU', 'KBAG', 'KBLI', 'KBLM', 'KBLV', 'KBRI', 'KDSI', 'KEEN', 'KEJU', 'KIAS', 'KICI', 'KIJA', 'KINO', 'KIOS', 'KJEN', 'KKGI', 'KLBF', 'KMDS', 'KMTR', 'KOBX', 'KOIN', 'KONI', 'KOPI', 'KOTA', 'KPAL', 'KPAS', 'KPIG', 'KRAH', 'KRAS', 'KREN', 'LAND', 'LAPD', 'LCGP', 'LCKM', 'LEAD', 'LIFE', 'LINK', 'LION', 'LMAS', 'LMPI', 'LMSH', 'LPCK', 'LPGI', 'LPIN', 'LPKR', 'LPLI', 'LPPF', 'LPPS', 'LRNA', 'LSIP', 'LTLS', 'LUCK', 'MABA', 'MAGP', 'MAIN', 'MAMI', 'MAPA', 'MAPB', 'MAPI', 'MARI', 'MARK', 'MASA', 'MAYA', 'MBAP', 'MBSS', 'MBTO', 'MCAS', 'MCOR', 'MDIA', 'MDKA', 'MDKI', 'MDLN', 'MDRN', 'MEDC', 'MEGA', 'MERK', 'META', 'MFIN', 'MFMI', 'MGNA', 'MGRO', 'MICE', 'MIDI', 'MIKA', 'MINA', 'MIRA', 'MITI', 'MKNT', 'MKPI', 'MLBI', 'MLIA', 'MLPL', 'MLPT', 'MMLP', 'MNCN', 'MOLI', 'MPMX', 'MPOW', 'MPPA', 'MPRO', 'MRAT', 'MREI', 'MSIN', 'MSKY', 'MTDL', 'MTFN', 'MTLA', 'MTPS', 'MTRA', 'MTSM', 'MTWI', 'MYOH', 'MYOR', 'MYRX', 'MYTX', 'NASA', 'NATO', 'NELY', 'NFCX', 'NICK', 'NIKL', 'NIPS', 'NIRO', 'NISP', 'NOBU', 'NRCA', 'NUSA', 'NZIA', 'OASA', 'OCAP', 'OKAS', 'OMRE', 'OPMS', 'PADI', 'PALM', 'PAMG', 'PANI', 'PANR', 'PANS', 'PBID', 'PBRX', 'PBSA', 'PCAR', 'PDES', 'PEGE', 'PEHA', 'PGAS', 'PGJO', 'PGLI', 'PGUN', 'PICO', 'PJAA', 'PKPK', 'PLAN', 'PLAS', 'PLIN', 'PMJS', 'PMMP', 'PNBN', 'PNBS', 'PNGO', 'PNIN', 'PNLF', 'PNSE', 'POLA', 'POLI', 'POLL', 'POLU', 'POLY', 'POOL', 'PORT', 'POSA', 'POWR', 'PPGL', 'PPRE', 'PPRO', 'PRAS', 'PRDA', 'PRIM', 'PSAB', 'PSDN', 'PSGO', 'PSKT', 'PSSI', 'PTBA', 'PTDU', 'PTIS', 'PTPP', 'PTPW', 'PTRO', 'PTSN', 'PTSP', 'PUDP', 'PURA', 'PURE', 'PURI', 'PWON', 'PYFA', 'PZZA', 'RAJA', 'RALS', 'RANC', 'RBMS', 'RDTX', 'REAL', 'RELI', 'RICY', 'RIGS', 'RIMO', 'RISE', 'RMBA', 'ROCK', 'RODA', 'RONY', 'ROTI', 'RUIS', 'SAFE', 'SAME', 'SAMF', 'SAPX', 'SATU', 'SBAT', 'SCCO', 'SCMA', 'SCNP', 'SCPI', 'SDMU', 'SDPC', 'SDRA', 'SFAN', 'SGER', 'SGRO', 'SHID', 'SHIP', 'SIDO', 'SILO', 'SIMA', 'SIMP', 'SINI', 'SIPD', 'SKBM', 'SKLT', 'SKRN', 'SKYB', 'SLIS', 'SMAR', 'SMBR', 'SMCB', 'SMDM', 'SMDR', 'SMGR', 'SMKL', 'SMMA', 'SMMT', 'SMRA', 'SMRU', 'SMSM', 'SOCI', 'SOFA', 'SOHO', 'SONA', 'SOSS', 'SOTS', 'SPMA', 'SPTO', 'SQMI', 'SRAJ', 'SRIL', 'SRSN', 'SRTG', 'SSIA', 'SSMS', 'SSTM', 'STAR', 'STTP', 'SUGI', 'SULI', 'SUPR', 'SURE', 'SWAT', 'TALF', 'TAMA', 'TAMU', 'TARA', 'TAXI', 'TBIG', 'TBLA', 'TBMS', 'TCID', 'TCPI', 'TDPM', 'TEBE', 'TECH', 'TELE', 'TFAS', 'TFCO', 'TGKA', 'TGRA', 'TIFA', 'TINS', 'TIRA', 'TIRT', 'TKIM', 'TLKM', 'TMAS', 'TMPO', 'TNCA', 'TOBA', 'TOPS', 'TOTL', 'TOTO', 'TOWR', 'TOYS', 'TPIA', 'TPMA', 'TRAM', 'TRIL', 'TRIM', 'TRIN', 'TRIO', 'TRIS', 'TRJA', 'TRST', 'TRUK', 'TRUS', 'TSPC', 'TUGU', 'TURI', 'UANG', 'UCID', 'UFOE', 'ULTJ', 'UNIC', 'UNIQ', 'UNIT', 'UNSP', 'UNTR', 'UNVR', 'URBN', 'VICI', 'VICO', 'VINS', 'VIVA', 'VOKS', 'VRNA', 'WAPO', 'WEGE', 'WEHA', 'WICO', 'WIFI', 'WIIM', 'WIKA', 'WINS', 'WMUU', 'WOMF', 'WOOD', 'WOWS', 'WSBP', 'WSKT', 'WTON', 'YELO', 'YPAS', 'YULE', 'ZBRA', 'ZINC', 'ZONE' ] hari = 0 tanggal2 = [] tanggal = [] itemss = [] percakapan = [] # driver.get('https://web.telegram.org/#/im?p=@TheTradersGroup') driver.get('https://web.telegram.org/#/im?p=g579054022') time.sleep(20) temp = '' temp2 = '' first = True ptemp = '' wrapper = driver.find_element_by_xpath( '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]' ) chat = wrapper.find_elements_by_xpath( ".//div[contains(@class, 'im_history_message_wrap')]") psn = len(chat) Stoped = False while True: joinn = False balas = '' penulis = '' last = penulis pesan2 = [""] jam = '' pesan2 = [] pesan = driver.find_element_by_xpath( "/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]/div[" + str(psn) + "]") driver.execute_script("arguments[0].scrollIntoView(true);", pesan) psn -= 1 if (len( pesan.find_elements_by_xpath( ".//a[@class='im_message_photo_thumb']")) > 0): # print('ini adalah foto') penulis = penulis + pesan.find_element_by_xpath( ".//a[contains(@class, 'im_message_author user_color_')]").text gambar = pesan.find_element_by_xpath( ".//img[@class='im_message_photo_thumb']").get_attribute('src') pesan4 = "photo" pesan2.insert(0, pesan4) if (len( pesan.find_elements_by_xpath( ".//div[@class='im_message_photo_caption']")) > 0): last = pesan.find_element_by_xpath( ".//div[@class='im_message_photo_caption']").text emo = pesan.find_elements_by_xpath( ".//span[@class='emoji emoji-spritesheet-0']") for x in emo: if (x.text.strip() != ''): last = last.replace(x.text.strip(), ' ') pesan4 = pesan4 + last pesan2.insert(0, last) jam = jam + pesan.find_element_by_xpath( ".//span[@ng-bind='::historyMessage.date | time']").text elif (len( pesan.find_elements_by_xpath( ".//span[@ng-switch-when='messageActionChatJoined']")) > 0): print('seseorang join') penulis = '' last = penulis pesan2 = [""] jam = '' joinn = True # print('Masuk2') elif (len( pesan.find_elements_by_xpath( ".//span[@class='im_message_date_split_text']")) > 0): if ((len( pesan.find_elements_by_xpath( ".//div[@class='im_message_date_split im_service_message_wrap' and @style='display: none;']" )) > 0)): if (len( pesan.find_elements_by_xpath( ".//div[@class='im_message_text']")) > 0): print('Ini juga sebenernya pesan biasa') penulis = penulis + pesan.find_element_by_xpath( ".//a[contains(@class, 'im_message_author user_color_')]" ).text last = pesan.find_element_by_xpath( ".//div[@class='im_message_text']").text emo = pesan.find_elements_by_xpath( ".//span[@class='emoji emoji-spritesheet-0']") for x in emo: if (x.text.strip() != ''): last = last.replace(x.text.strip(), ' ') pesan4 = last pesan2.insert(0, pesan4) try: print('Masuk kesini kali2') jam = jam + pesan.find_element_by_xpath( ".//span[@ng-bind='::historyMessage.date | time']" ).text if (jam == ''): jamm2 = pesan.find_element_by_xpath( ".//span[@class='im_message_date_text nocopy']" ) jam = jam + jamm2.get_attribute('data-content') except: print('Seperti nya masuk ke sini2') jamm2 = pesan.find_element_by_xpath( ".//span[@class='im_message_date_text nocopy']") jam = jam + jamm2.get_attribute('data-content') if (len( pesan.find_elements_by_xpath( ".//span[@my-short-message='replyMessage']")) > 0): balas = pesan.find_element_by_xpath( ".//span[@my-short-message='replyMessage']").text emo = pesan.find_elements_by_xpath( ".//span[@class='emoji emoji-spritesheet-0']") for x in emo: if (x.text.strip() != ''): balas = balas.replace(x.text.strip(), ' ') pesan2.insert(0, "Membalas : " + balas) else: jamm2 = pesan.find_element_by_xpath( ".//span[@class='im_message_date_text nocopy']") jam = jam + jamm2.get_attribute('data-content') print("ini adalah tanggal") tgl = pesan.find_element_by_xpath( ".//span[@class='im_message_date_split_text']").text tgl = tgl.replace(",", "") print(tgl) for k in range(len(tanggal2)): tanggal2[k]['Tanggal'] = tgl tanggal.extend(tanggal2) hari += 1 belum = (hari != target) print(len(tanggal)) tanggal2 = [] else: print('ini adalah pesan biasa') penulis = penulis + pesan.find_element_by_xpath( ".//a[contains(@class, 'im_message_author user_color_')]").text try: print('Masuk kesini kali') jam = jam + pesan.find_element_by_xpath( ".//span[@ng-bind='::historyMessage.date | time']").text if (jam == ''): jamm2 = pesan.find_element_by_xpath( ".//span[@class='im_message_date_text nocopy']") jam = jam + jamm2.get_attribute('data-content') except: try: print('Seperti nya masuk ke sini') jamm2 = pesan.find_element_by_xpath( ".//span[@class='im_message_date_text nocopy']") jam = jam + jamm2.get_attribute('data-content') except: print('seseorang join') penulis = '' last = penulis pesan2 = [""] jam = '' joinn = True if (not joinn): last = pesan.find_element_by_xpath( ".//div[@class='im_message_text']").text emo = pesan.find_elements_by_xpath( ".//span[@class='emoji emoji-spritesheet-0']") for x in emo: if (x.text.strip() != ''): last = last.replace(x.text.strip(), ' ') pesan4 = last pesan2.insert(0, pesan4) if (len( pesan.find_elements_by_xpath( ".//span[@my-short-message='replyMessage']")) > 0): balas = pesan.find_element_by_xpath( ".//span[@my-short-message='replyMessage']").text emo = pesan.find_elements_by_xpath( ".//span[@class='emoji emoji-spritesheet-0']") for x in emo: if (x.text.strip() != ''): balas = balas.replace(x.text.strip(), ' ') pesan2.insert(0, "Membalas : " + balas) pesan3 = "\n".join(pesan2) masuk = False stop = [",", ".", "#", "?", "*", "-"] cek = pesan3 bahas = [] for x in stop: cek = cek.replace(x, " ") for x in pesan3: b = x.isascii() if not b: pesan3 = pesan3.replace(x, ' ') print('Jam sebelum berubah : ', '|' + str(jam) + '|') if ('M' not in str(jam)): jam2 = 0 elif (jam != 0): jam2 = formatJam(jam) else: jam2 = 0 print('Temp sebelum berubah : ', temp) if (temp == '' and not first): temp = 0 elif (isinstance(temp, str) and temp != ''): temp = formatJam(jam) elif (not isinstance(temp, int)): temp = 0 print('banding = ', temp, '<', jam2) print(joinn) if ((temp < jam2 or first) and not joinn): if (first): temp = jam2 temp2 = jam2 elif (temp2 == ''): temp2 = jam2 if any(word in cek.upper().split() for word in Saham): masuk = True if (penulis != ""): ptemp = penulis else: penulis = ptemp if (True): if (True): bahas = [ word for word in Saham if word in cek.upper().split() ] for x in penulis: c = x.isascii() if not c: penulis = penulis.replace(x, ' ') bahas = ",".join(bahas) print("Data : " + str(psn)) print("user : "******"Pesan : ", pesan3) print("Saham : " + bahas) predict = svm(pesan3) print("Label : ", predict) print("jam : " + jam) print( '=======================================================' ) ada = False print('Balas =', balas) print('Percakapan =', percakapan) # counttt = 0 if (not percakapan): print('Cakap1') percakapan.append([pesan4]) perindex = 1 ada = True elif (balas != ''): for a in percakapan: print('Sub percakapan =', a) perindex2 = percakapan.index(a) for b in a: # if(counttt==30): # print('############Batas#####################') # time.sleep(99) # counttt+=1 print('Sub a =', b + '||') if (balas in b and balas != ''): print('Cakap2') percakapan[perindex2].append(pesan4) perindex = perindex2 + 1 ada = True if (not ada): print('Cakap3') percakapan.append([pesan4]) perindex = percakapan.index([pesan4]) + 1 exist = penulis in ratee.User.values if (exist): df1 = ratee[ratee['User'] == penulis] hit = df1.iloc[0]['Hit'] miss = df1.iloc[0]['Miss'] rate = df1.iloc[0]['Rate'] # print(exist) else: hit = 'No Record' miss = 'No Record' rate = 'No Record' item = { 'User': penulis, 'Pesan': pesan3, 'Saham': bahas, 'Label': predict, 'Jam': jam, 'Hit': str(hit), 'Miss': str(miss), 'Rate': str(rate), 'Percakapan': str(perindex) } itemss.append(item) # if(not pesan3==''): # emit('my response', item) tanggal2.append(item) penulis = "" pesan2 = [] jam = "" print( '$$$$$$$$$$$$$$$$$$$$$$$$END OF FOR$$$$$$$$$$$$$$$$$$$$$$$$$$$$' ) if (first == True): Stoped = True else: Stoped = True if (Stoped): if (itemss): itemss.reverse() for it in itemss: emit('my response', it) itemss = [] # time.sleep(10) print('Temp sebelum masuk stop : ', temp) if (not first): if (temp2 != ''): temp = temp2 temp2 = '' # time.sleep(300) driver.execute_script("location.reload()") time.sleep(10) psn = 0 while psn == 0: wrapper = driver.find_element_by_xpath( '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]' ) chat = wrapper.find_elements_by_xpath( ".//div[contains(@class, 'im_history_message_wrap')]") psn = len(chat) Stoped = False temp3 = 0 while temp3 != psn: temp3 = psn pesan = driver.find_element_by_xpath( "/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]/div[" + str(psn) + "]") driver.execute_script("arguments[0].scrollIntoView(true);", pesan) print( "************************************SCROLL***********************************************8" ) time.sleep(2) wrapper = driver.find_element_by_xpath( '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]' ) chat = wrapper.find_elements_by_xpath( ".//div[contains(@class, 'im_history_message_wrap')]") psn = len(chat) print('Jam di akhir = ', jam) print('Temp di akhir = ', temp) first = False
def run(train_file, test_file): svm.svm(train_file, test_file)
def test10Fold(): global allWords splits = tenFoldCrossValidation() count = 0 total = 0 print("Naive Bayes") for split in splits: nb = naiveBayes() trainFeatures = [] trainClasses = [] testFeatures = [] testClasses = [] for example in split.train: trainFeatures.append(example.features) trainClasses.append(example.klass) for example in split.test: testFeatures.append(example.features) testClasses.append(example.klass) nb.train(trainFeatures, trainClasses) nb.test(testFeatures, testClasses) accuracy = nb.getCorrectCount() / len(testClasses) total = total + accuracy print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy)) count = count + 1 print("[INFO]\tAccuracy:", str(total / 10)) count = 0 total = 0 print("Random Forest") for split in splits: nb = RandomForest(100) trainFeatures = [] trainClasses = [] testFeatures = [] testClasses = [] for example in split.train: trainFeatures.append(example.features) trainClasses.append(example.klass) for example in split.test: testFeatures.append(example.features) testClasses.append(example.klass) nb.train(trainFeatures, trainClasses) nb.test(testFeatures, testClasses) accuracy = nb.getCorrectCount() / len(testClasses) total = total + accuracy print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy)) count = count + 1 print("[INFO]\tAccuracy:", str(total / 10)) count = 0 total = 0 print("Neural 5") for split in splits: nb = neuralNetwork((5, ), 1000) trainFeatures = [] trainClasses = [] testFeatures = [] testClasses = [] for example in split.train: trainFeatures.append(example.features) trainClasses.append(example.klass) for example in split.test: testFeatures.append(example.features) testClasses.append(example.klass) nb.train(trainFeatures, trainClasses) nb.test(testFeatures, testClasses) accuracy = nb.getCorrectCount() / len(testClasses) total = total + accuracy print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy)) count = count + 1 print("[INFO]\tAccuracy:", str(total / 10)) count = 0 total = 0 print("Neural 3") for split in splits: nb = neuralNetwork((3, ), 1000) trainFeatures = [] trainClasses = [] testFeatures = [] testClasses = [] for example in split.train: trainFeatures.append(example.features) trainClasses.append(example.klass) for example in split.test: testFeatures.append(example.features) testClasses.append(example.klass) nb.train(trainFeatures, trainClasses) nb.test(testFeatures, testClasses) accuracy = nb.getCorrectCount() / len(testClasses) total = total + accuracy print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy)) count = count + 1 print("[INFO]\tAccuracy:", str(total / 10)) count = 0 total = 0 print("SVM") for split in splits: nb = svm() trainFeatures = [] trainClasses = [] testFeatures = [] testClasses = [] for example in split.train: trainFeatures.append(example.features) trainClasses.append(example.klass) for example in split.test: testFeatures.append(example.features) testClasses.append(example.klass) nb.train(trainFeatures, trainClasses) nb.test(testFeatures, testClasses) accuracy = nb.getCorrectCount() / len(testClasses) total = total + accuracy print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy)) count = count + 1 print("[INFO]\tAccuracy:", str(total / 10))
from sklearn.metrics import accuracy_score, f1_score from svm import svm from ae import ae from DAE import dae from idae import idae noise = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5] output = open('result', 'w') output.write('models ' + 'acc ' + 'f1' + '\n') for i in noise: y_test, y_pre = svm(i) print "svm" print accuracy_score(y_test, y_pre) print f1_score(y_test, y_pre, average=None) print f1_score(y_test, y_pre, average='macro') y_test, y_pre = ae(i) print "AE" print accuracy_score(y_test, y_pre) print f1_score(y_test, y_pre, average=None) print f1_score(y_test, y_pre, average='macro') y_test, y_pre = dae(i) print "DAE" print accuracy_score(y_test, y_pre) print f1_score(y_test, y_pre, average=None) print f1_score(y_test, y_pre, average='macro') y_test, y_pre = idae(i) print "IDAE"
'--acccoin', type=float, dest='acc_coin', default=0.15, help='Accuracy to determine if contour is a coin or not.') parser.add_argument( '-ar', '--accrect', type=float, dest='acc_rect', default=0.01, help='Accuracy to determine if contour is a rectangle or not.') arg = parser.parse_args() if arg.svm is True: #check arguments svm(arg) elif arg.new is True: if arg.image is None: print('Path of image is not set') elif arg.refa is None: print('Reference A is not set') elif arg.refb is None: print('Reference B is not set') else: new(arg) elif arg.count is True: if arg.image is None: print('Path of image is not set') elif arg.refa is None: print('Reference A is not set') elif arg.refb is None:
nn.predict("dataset/001 - Dog bark/1-30226-A.ogg", le, "trained_cnn.h5") elif sys.argv[1] == "mlp": #convert into numpy array X, y, le = get_numpy_array(features_df) # split into training and testing data X_train, X_test, y_train, y_test = get_train_test(X, y) num_labels = y.shape[1] # create model architecture model = nn.create_mlp(num_labels) # train model print("Training..") nn.train(model, X_train, X_test, y_train, y_test, "trained_mlp.h5") # compute test loss and accuracy test_loss, test_accuracy = nn.compute(X_test, y_test, "trained_mlp.h5") print("Test loss", test_loss) print("Test accuracy", test_accuracy) # predicting using trained model with any test file in dataset nn.predict("dataset/001 - Dog bark/1-30226-A.ogg", le, "trained_mlp.h5") elif sys.argv[1] == "svm": svm.svm(features_df)
X = [] Y = [] for k in xrange(N_train): if random.randn() < 0: X.append([-1 + random.randn() * 0.5, -1 + random.randn() * 0.5]) Y.append([-1]) else: X.append([1 + random.randn() * 0.5, 1 + random.randn() * 0.5]) Y.append([1]) N_test = 100 X_test = [] Y_test = [] for k in xrange(N_test): if random.randn() < 0: X_test.append([-1 + random.randn() * 0.5, -1 + random.randn() * 0.5]) Y_test.append([-1]) else: X_test.append([1 + random.randn() * 0.5, 1 * random.randn() * 0.5]) Y_test.append([1]) s = svm(2 + 1, weighted=False) s.train(X, Y, 0.1) Y_pre = s.predict(X_test) #for k in xrange(len(Y_pre)): # print Y_test[k][0]-Y_pre[k][0]