Exemple #1
0
def shuffle_test(gml_file, auth_time, N_getter, tempfile, _iter, window,
                 _small):
    """
  The procedure generate a file with estimated coefficients 
  for instances of randomized dataset.
  Keyword arguments:
  gml_file --- the graph in  .gml format 
  auth_time --- list of activation times for authority  nodes
  N_getter --- function object that calculates the values of A(x, y)
    and N(x,y). Can be calc.getNs or calc.getNs2.
  tempfile --- name of the file the coefficients will be writen to
  _iter --- number of iterations - runs of the randomization and estimation 
    steps
  window --- length of the window for one time step
  _small -- if True, the estimation will be run on restricted, only 
    with nodes that eventually become active
  """
    print "...reading graph..."
    g = read(gml_file)
    nodes = dict()
    unactive_ids = []
    for node in g.vs:
        if int(node["regtime"]) == -1:
            unactive_ids.append(node["id"])
        else:
            nodes[node["id"]] = node

    f = open(tempfile, 'a')
    (A, N) = N_getter(unactive_ids, nodes, auth_time, window, False, _small)
    f.write(" ".join(str(num) for num in logreg(A, N)) + "\n")

    all_time = [node["regtime"]
                for node in g.vs if node["regtime"] != -1] + auth_time

    for j in range(0, _iter):
        print j

        all_time = permutation(all_time)
        auth_time = []

        i = 0
        for _id, node in nodes.iteritems():
            while len(auth_time) != 11 and all_time[i] != -1:
                auth_time.append(all_time[i])
                i = i + 1
            if node["regtime"] == -1:
                continue
            nodes[_id]["regtime"] = all_time[i]
            i = i + 1

        (A, N) = N_getter(unactive_ids, nodes, auth_time, window, False,
                          _small)
        f.write(" ".join(str(num) for num in logreg(A, N)) + "\n")
    f.close()
Exemple #2
0
def run_all():
    import pandas as pd
    from ds import decision_tree
    from knn import knn
    from logreg import logreg
    from sv import sv
    from ensemble_methods import ensemble_methods
    from xg import xgb

    print('Running All')
    acc_ds = decision_tree()
    acc_knn = knn()
    acc_log = logreg()
    acc_xg = xgb()
    acc_svc = sv()
    acc_rf, acc_ab, acc_gb = ensemble_methods()

    # Model Performance

    models = pd.DataFrame({
        'Model': [
            'XGBoost', 'Logistic Regression', 'KNN', 'Support Vector Machines',
            'Gradient Boosting', 'Random Forest', 'Decision Tree', 'ADABoost'
        ],
        'Score':
        [acc_xg, acc_log, acc_knn, acc_svc, acc_gb, acc_rf, acc_ds, acc_ab]
    })
    models = models.sort_values(by='Score', ascending=True)
    print models.sort_values(by='Score', ascending=False)
Exemple #3
0
 def _train(self, X, Y):
     phi = makePhi(X,self.M)
     n,m = phi.shape
     self.result = logreg.logreg(phi, Y, self.params['lamduh'], opt=scipy.optimize.fmin_bfgs, printInfo=self.printInfo)
     w = self.result[0][:m]
     b = self.result[0][m]
     return w,b
Exemple #4
0
 def _train(self, X, Y):
     phi = makePhi(X, self.M)
     n, m = phi.shape
     self.result = logreg.logreg(phi,
                                 Y,
                                 self.params['lamduh'],
                                 opt=scipy.optimize.fmin_bfgs,
                                 printInfo=self.printInfo)
     w = self.result[0][:m]
     b = self.result[0][m]
     return w, b
Exemple #5
0
def train(lamduh=0.1, basisfunc='lin', plot=False, optimizePrint=False, name='ls'):
    # handle basis function
    if basisfunc=='lin': M=1
    elif basisfunc=='quad': M=2
    else: raise Exception('Value "%s" for basisfunc must be either "lin" or "quad"'%(basisfunc))
    # parameters
    print '======Training======'
    print 'lambda = ' + str(lamduh)
    # load data from csv files
    train = loadtxt('data/data_'+name+'_train.csv')
    X = train[:,0:2]
    Y = train[:,2:3]

    # Carry out training.
    phi = makePhi(X,M)
    n,m = phi.shape
    result = logreg.logreg(phi, Y, lamduh, opt=scipy.optimize.fmin_bfgs, printInfo=optimizePrint)
    w = result[0][:m]
    b = result[0][m]

    # Define the predictLR(x) function, which uses trained parameters
    predictLR = makePredictor(w,b,M,mode='logreg')

    # get training error
    tErr = getError(X, Y, w, b, mode='logreg')

    # plot training results
    if plot:
        plotDecisionBoundary(X, Y, predictLR, [0.5], title = 'LR Train with ${\lambda}=%s$' %(lamduh))

    print '======Validation======'
    # load data from csv files
    validate = loadtxt('data/data_'+name+'_validate.csv')
    X = validate[:,0:2]
    Y = validate[:,2:3]

    # plot validation results
    if plot:
        plotDecisionBoundary(X, Y, predictLR, [0.5], title = 'LR Validate with ${\lambda}=%s$' %(lamduh))

    # get validation error
    vErr = getError(X, Y, w, b, mode='logreg')

    ## return training and validation error
    return numpy.array([tErr, vErr])
Exemple #6
0
def main():
    t0 = time()
    args = get_max_norm_args()

    # labels = np.loadtxt("../data/labels", delimiter="\n", dtype=np.str)[:: 2]
    # labels = {ii: ll[:-2] for ii, ll in enumerate(labels)}

    X, y = get_data('../data/pure_landmarks_gender.npy')

    scores = []

    for k in reversed(range(1, len(args) + 1)):
        print(k, flush=True)
        # scores.append(logreg(filter_points(X, args[:k]), y))
        scores.append(logreg(filter_with_pca(X, k), y))

    np.save('log_reg_scores_pca', np.array(scores))

    print(time() - t0)
# training_data, validation_data, test_data = cPickle.load(f)
# hiddenunits = [200, 300, 400, 500, 600]
# learning_rates = [0.01, 0.03, 0.05, 0.07, 0.09, 0.1]
# iterations = [1000, 2000, 5000, 10000, 20000 ]
# f.close()

usps_test_data = [[],[]]
for j in range(10):
	for image in glob.glob("Numerals/" + str(j) + "/*.png"):
		img = Image.open(image)
		img = img.resize((new_height, new_width))
		pix = numpy.array(img.getdata(), dtype = 'float64')
		pix = [1]*784 - (pix/([255]*784))
		usps_test_data[0].append(pix)
		usps_test_data[1].append(j)
		

# print numpy.array(usps_test_data[0][0])
# print training_data[0][0]
# print numpy.array(usps_test_data[1])

print "LOGISTIC REGRESSION"
logreg.logreg(training_data, validation_data, test_data, usps_test_data, 10, 0.0005)

print "SINGLE LAYER NEURAL NETWORKS"
neuralnetwork.neuralnetwork(training_data, validation_data, test_data, usps_test_data)

print "CONVOLUTIONAL NEURAL NETWORKS"
tensor.tensorflow(usps_test_data[0], usps_test_data[1])
Exemple #8
0
def train(lamduh=0.1,
          basisfunc='lin',
          plot=False,
          optimizePrint=False,
          name='ls'):
    # handle basis function
    if basisfunc == 'lin': M = 1
    elif basisfunc == 'quad': M = 2
    else:
        raise Exception(
            'Value "%s" for basisfunc must be either "lin" or "quad"' %
            (basisfunc))
    # parameters
    print '======Training======'
    print 'lambda = ' + str(lamduh)
    # load data from csv files
    train = loadtxt('data/data_' + name + '_train.csv')
    X = train[:, 0:2]
    Y = train[:, 2:3]

    # Carry out training.
    phi = makePhi(X, M)
    n, m = phi.shape
    result = logreg.logreg(phi,
                           Y,
                           lamduh,
                           opt=scipy.optimize.fmin_bfgs,
                           printInfo=optimizePrint)
    w = result[0][:m]
    b = result[0][m]

    # Define the predictLR(x) function, which uses trained parameters
    predictLR = makePredictor(w, b, M, mode='logreg')

    # get training error
    tErr = getError(X, Y, w, b, mode='logreg')

    # plot training results
    if plot:
        plotDecisionBoundary(X,
                             Y,
                             predictLR, [0.5],
                             title='LR Train with ${\lambda}=%s$' % (lamduh))

    print '======Validation======'
    # load data from csv files
    validate = loadtxt('data/data_' + name + '_validate.csv')
    X = validate[:, 0:2]
    Y = validate[:, 2:3]

    # plot validation results
    if plot:
        plotDecisionBoundary(X,
                             Y,
                             predictLR, [0.5],
                             title='LR Validate with ${\lambda}=%s$' %
                             (lamduh))

    # get validation error
    vErr = getError(X, Y, w, b, mode='logreg')

    ## return training and validation error
    return numpy.array([tErr, vErr])
Exemple #9
0
 def classification_start(self):
     try:
         self.Hitung.configure(state='disabled')
         self.MulaiKlasifikasi.configure(state='disabled')
         self.Scrolledtext1.configure(state='normal')
         file = io.StringIO()
         with redirect_stdout(file):
             if v2.get() == 3:
                 if v1.get() == 1:
                     obj_LR = logreg(bow_tfidf_train, train, bow_tfidf_test,
                                     test)
                     roc_name = "ROC BagOfWords Logistic Regression"
                     csv_name = "BagOfWords Logistic Regression"
                 elif v1.get() == 2:
                     obj_LR = logreg(d2v_train, train, d2v_test, test)
                     roc_name = "ROC Doc2Vec Logistic Regression"
                     csv_name = "Doc2Vec Logistic Regression"
                 pred_res = obj_LR.logreg()
             elif v2.get() == 2:
                 if v1.get() == 1:
                     obj_SVM = SVM(bow_tfidf_train, train, bow_tfidf_test,
                                   test)
                     roc_name = "ROC BagOfWords Support Vector Machine"
                     csv_name = "BagOfWords Support Vector Machine"
                 elif v1.get() == 2:
                     obj_SVM = SVM(d2v_train, train, d2v_test, test)
                     roc_name = "ROC Doc2Vec Support Vector Machine"
                     csv_name = "Doc2Vec Support Vector Machine"
                 pred_res = obj_SVM.svm()
             elif v2.get() == 1:
                 if v1.get() == 1:
                     obj_MNB = MNB(bow_tfidf_train, train, bow_tfidf_test,
                                   test)
                     roc_name = "ROC BagOfWords Multinomial Naive Bayes"
                     csv_name = "BagOfWords Multinomial Naive Bayes"
                 elif v1.get() == 2:
                     obj_MNB = MNB(d2v_train, train, d2v_test, test)
                     roc_name = "ROC Doc2Vec Multinomial Naive Bayes"
                     csv_name = "Doc2Vec Multinomial Naive Bayes"
                 pred_res = obj_MNB.mnb()
             obj_csv = save_to_csv(test)
             obj_csv.save(pred_res, csv_name)
         output = file.getvalue()
         self.Scrolledtext1.insert(END, output)
         self.Scrolledtext1.configure(state='disabled')
         self.Hitung.configure(
             state='normal', command=lambda: self.plot(pred_res, roc_name))
         self.MulaiKlasifikasi.configure(state='normal')
     except NameError:
         self.MulaiKlasifikasi.configure(state='normal')
         self.Scrolledtext1.configure(state='normal')
         self.Scrolledtext1.insert(
             END, "Lakukan pembentukan VSM terlebih dahulu!\n")
         self.Scrolledtext1.configure(state='disabled')
     except ValueError:
         self.MulaiKlasifikasi.configure(state='normal')
         self.Scrolledtext1.configure(state='normal')
         self.Scrolledtext1.insert(
             END,
             "Masukkan nilai learning rate dan jumlah iterasi. Bilangan yang diinput hanya berupa angka!\n"
         )
         self.Scrolledtext1.configure(state='disabled')
     except AssertionError:
         self.MulaiKlasifikasi.configure(state='normal')
         self.Scrolledtext1.configure(state='normal')
         self.Scrolledtext1.insert(
             END,
             "Range yang diterima: 0 < learning rate <= 1 dan jumlah iterasi > 0!\n"
         )
         self.Scrolledtext1.configure(state='disabled')
     except AttributeError:
         self.MulaiKlasifikasi.configure(state='normal')
         self.Scrolledtext1.configure(state='normal')
         self.Scrolledtext1.insert(END,
                                   "Fitur tidak bisa bernilai negatif!\n")
         self.Scrolledtext1.configure(state='disabled')