def shuffle_test(gml_file, auth_time, N_getter, tempfile, _iter, window, _small): """ The procedure generate a file with estimated coefficients for instances of randomized dataset. Keyword arguments: gml_file --- the graph in .gml format auth_time --- list of activation times for authority nodes N_getter --- function object that calculates the values of A(x, y) and N(x,y). Can be calc.getNs or calc.getNs2. tempfile --- name of the file the coefficients will be writen to _iter --- number of iterations - runs of the randomization and estimation steps window --- length of the window for one time step _small -- if True, the estimation will be run on restricted, only with nodes that eventually become active """ print "...reading graph..." g = read(gml_file) nodes = dict() unactive_ids = [] for node in g.vs: if int(node["regtime"]) == -1: unactive_ids.append(node["id"]) else: nodes[node["id"]] = node f = open(tempfile, 'a') (A, N) = N_getter(unactive_ids, nodes, auth_time, window, False, _small) f.write(" ".join(str(num) for num in logreg(A, N)) + "\n") all_time = [node["regtime"] for node in g.vs if node["regtime"] != -1] + auth_time for j in range(0, _iter): print j all_time = permutation(all_time) auth_time = [] i = 0 for _id, node in nodes.iteritems(): while len(auth_time) != 11 and all_time[i] != -1: auth_time.append(all_time[i]) i = i + 1 if node["regtime"] == -1: continue nodes[_id]["regtime"] = all_time[i] i = i + 1 (A, N) = N_getter(unactive_ids, nodes, auth_time, window, False, _small) f.write(" ".join(str(num) for num in logreg(A, N)) + "\n") f.close()
def run_all(): import pandas as pd from ds import decision_tree from knn import knn from logreg import logreg from sv import sv from ensemble_methods import ensemble_methods from xg import xgb print('Running All') acc_ds = decision_tree() acc_knn = knn() acc_log = logreg() acc_xg = xgb() acc_svc = sv() acc_rf, acc_ab, acc_gb = ensemble_methods() # Model Performance models = pd.DataFrame({ 'Model': [ 'XGBoost', 'Logistic Regression', 'KNN', 'Support Vector Machines', 'Gradient Boosting', 'Random Forest', 'Decision Tree', 'ADABoost' ], 'Score': [acc_xg, acc_log, acc_knn, acc_svc, acc_gb, acc_rf, acc_ds, acc_ab] }) models = models.sort_values(by='Score', ascending=True) print models.sort_values(by='Score', ascending=False)
def _train(self, X, Y): phi = makePhi(X,self.M) n,m = phi.shape self.result = logreg.logreg(phi, Y, self.params['lamduh'], opt=scipy.optimize.fmin_bfgs, printInfo=self.printInfo) w = self.result[0][:m] b = self.result[0][m] return w,b
def _train(self, X, Y): phi = makePhi(X, self.M) n, m = phi.shape self.result = logreg.logreg(phi, Y, self.params['lamduh'], opt=scipy.optimize.fmin_bfgs, printInfo=self.printInfo) w = self.result[0][:m] b = self.result[0][m] return w, b
def train(lamduh=0.1, basisfunc='lin', plot=False, optimizePrint=False, name='ls'): # handle basis function if basisfunc=='lin': M=1 elif basisfunc=='quad': M=2 else: raise Exception('Value "%s" for basisfunc must be either "lin" or "quad"'%(basisfunc)) # parameters print '======Training======' print 'lambda = ' + str(lamduh) # load data from csv files train = loadtxt('data/data_'+name+'_train.csv') X = train[:,0:2] Y = train[:,2:3] # Carry out training. phi = makePhi(X,M) n,m = phi.shape result = logreg.logreg(phi, Y, lamduh, opt=scipy.optimize.fmin_bfgs, printInfo=optimizePrint) w = result[0][:m] b = result[0][m] # Define the predictLR(x) function, which uses trained parameters predictLR = makePredictor(w,b,M,mode='logreg') # get training error tErr = getError(X, Y, w, b, mode='logreg') # plot training results if plot: plotDecisionBoundary(X, Y, predictLR, [0.5], title = 'LR Train with ${\lambda}=%s$' %(lamduh)) print '======Validation======' # load data from csv files validate = loadtxt('data/data_'+name+'_validate.csv') X = validate[:,0:2] Y = validate[:,2:3] # plot validation results if plot: plotDecisionBoundary(X, Y, predictLR, [0.5], title = 'LR Validate with ${\lambda}=%s$' %(lamduh)) # get validation error vErr = getError(X, Y, w, b, mode='logreg') ## return training and validation error return numpy.array([tErr, vErr])
def main(): t0 = time() args = get_max_norm_args() # labels = np.loadtxt("../data/labels", delimiter="\n", dtype=np.str)[:: 2] # labels = {ii: ll[:-2] for ii, ll in enumerate(labels)} X, y = get_data('../data/pure_landmarks_gender.npy') scores = [] for k in reversed(range(1, len(args) + 1)): print(k, flush=True) # scores.append(logreg(filter_points(X, args[:k]), y)) scores.append(logreg(filter_with_pca(X, k), y)) np.save('log_reg_scores_pca', np.array(scores)) print(time() - t0)
# training_data, validation_data, test_data = cPickle.load(f) # hiddenunits = [200, 300, 400, 500, 600] # learning_rates = [0.01, 0.03, 0.05, 0.07, 0.09, 0.1] # iterations = [1000, 2000, 5000, 10000, 20000 ] # f.close() usps_test_data = [[],[]] for j in range(10): for image in glob.glob("Numerals/" + str(j) + "/*.png"): img = Image.open(image) img = img.resize((new_height, new_width)) pix = numpy.array(img.getdata(), dtype = 'float64') pix = [1]*784 - (pix/([255]*784)) usps_test_data[0].append(pix) usps_test_data[1].append(j) # print numpy.array(usps_test_data[0][0]) # print training_data[0][0] # print numpy.array(usps_test_data[1]) print "LOGISTIC REGRESSION" logreg.logreg(training_data, validation_data, test_data, usps_test_data, 10, 0.0005) print "SINGLE LAYER NEURAL NETWORKS" neuralnetwork.neuralnetwork(training_data, validation_data, test_data, usps_test_data) print "CONVOLUTIONAL NEURAL NETWORKS" tensor.tensorflow(usps_test_data[0], usps_test_data[1])
def train(lamduh=0.1, basisfunc='lin', plot=False, optimizePrint=False, name='ls'): # handle basis function if basisfunc == 'lin': M = 1 elif basisfunc == 'quad': M = 2 else: raise Exception( 'Value "%s" for basisfunc must be either "lin" or "quad"' % (basisfunc)) # parameters print '======Training======' print 'lambda = ' + str(lamduh) # load data from csv files train = loadtxt('data/data_' + name + '_train.csv') X = train[:, 0:2] Y = train[:, 2:3] # Carry out training. phi = makePhi(X, M) n, m = phi.shape result = logreg.logreg(phi, Y, lamduh, opt=scipy.optimize.fmin_bfgs, printInfo=optimizePrint) w = result[0][:m] b = result[0][m] # Define the predictLR(x) function, which uses trained parameters predictLR = makePredictor(w, b, M, mode='logreg') # get training error tErr = getError(X, Y, w, b, mode='logreg') # plot training results if plot: plotDecisionBoundary(X, Y, predictLR, [0.5], title='LR Train with ${\lambda}=%s$' % (lamduh)) print '======Validation======' # load data from csv files validate = loadtxt('data/data_' + name + '_validate.csv') X = validate[:, 0:2] Y = validate[:, 2:3] # plot validation results if plot: plotDecisionBoundary(X, Y, predictLR, [0.5], title='LR Validate with ${\lambda}=%s$' % (lamduh)) # get validation error vErr = getError(X, Y, w, b, mode='logreg') ## return training and validation error return numpy.array([tErr, vErr])
def classification_start(self): try: self.Hitung.configure(state='disabled') self.MulaiKlasifikasi.configure(state='disabled') self.Scrolledtext1.configure(state='normal') file = io.StringIO() with redirect_stdout(file): if v2.get() == 3: if v1.get() == 1: obj_LR = logreg(bow_tfidf_train, train, bow_tfidf_test, test) roc_name = "ROC BagOfWords Logistic Regression" csv_name = "BagOfWords Logistic Regression" elif v1.get() == 2: obj_LR = logreg(d2v_train, train, d2v_test, test) roc_name = "ROC Doc2Vec Logistic Regression" csv_name = "Doc2Vec Logistic Regression" pred_res = obj_LR.logreg() elif v2.get() == 2: if v1.get() == 1: obj_SVM = SVM(bow_tfidf_train, train, bow_tfidf_test, test) roc_name = "ROC BagOfWords Support Vector Machine" csv_name = "BagOfWords Support Vector Machine" elif v1.get() == 2: obj_SVM = SVM(d2v_train, train, d2v_test, test) roc_name = "ROC Doc2Vec Support Vector Machine" csv_name = "Doc2Vec Support Vector Machine" pred_res = obj_SVM.svm() elif v2.get() == 1: if v1.get() == 1: obj_MNB = MNB(bow_tfidf_train, train, bow_tfidf_test, test) roc_name = "ROC BagOfWords Multinomial Naive Bayes" csv_name = "BagOfWords Multinomial Naive Bayes" elif v1.get() == 2: obj_MNB = MNB(d2v_train, train, d2v_test, test) roc_name = "ROC Doc2Vec Multinomial Naive Bayes" csv_name = "Doc2Vec Multinomial Naive Bayes" pred_res = obj_MNB.mnb() obj_csv = save_to_csv(test) obj_csv.save(pred_res, csv_name) output = file.getvalue() self.Scrolledtext1.insert(END, output) self.Scrolledtext1.configure(state='disabled') self.Hitung.configure( state='normal', command=lambda: self.plot(pred_res, roc_name)) self.MulaiKlasifikasi.configure(state='normal') except NameError: self.MulaiKlasifikasi.configure(state='normal') self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert( END, "Lakukan pembentukan VSM terlebih dahulu!\n") self.Scrolledtext1.configure(state='disabled') except ValueError: self.MulaiKlasifikasi.configure(state='normal') self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert( END, "Masukkan nilai learning rate dan jumlah iterasi. Bilangan yang diinput hanya berupa angka!\n" ) self.Scrolledtext1.configure(state='disabled') except AssertionError: self.MulaiKlasifikasi.configure(state='normal') self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert( END, "Range yang diterima: 0 < learning rate <= 1 dan jumlah iterasi > 0!\n" ) self.Scrolledtext1.configure(state='disabled') except AttributeError: self.MulaiKlasifikasi.configure(state='normal') self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert(END, "Fitur tidak bisa bernilai negatif!\n") self.Scrolledtext1.configure(state='disabled')