Exemple #1
0
def main():
    # preprocs = ["wav","normalized","bandpass","highpass"]
    # coefficients = ["mfccs","chroma","mel","contrast","all"]
    # subsegmentLengths = ["0.2", "0.05", "0.01"]
    # chunkLengths = ["1","2","3"]

    preprocs = ["normalized"]
    coefficients = ["mel"]
    subsegmentLengths = ["0.2"]
    chunkLengths = ["2"]

    numNodes = 20

    for preproc in preprocs:
        for coefficientType in coefficients:
            for subsegmentLength in subsegmentLengths:
                for chunkLength in chunkLengths:
                    argv = []
                    argv.append("")
                    argv.append(preproc)
                    argv.append(coefficientType)
                    argv.append(subsegmentLength)
                    argv.append(chunkLength)

                    X, Y = fetchDataMulti.getData(argv)

                    start = time.time()

                    knn.knn(X, Y)
                    svm.svm(X, Y)

                    return
    def __init__(self, data, parameters, nodes=10, connections=2):

        # define the model we are working with
        self.model = svm()
        self.alpha = 0.01

        # define the scheme and data
        self.nodes = nodes
        self.outdegree = connections
        self.X = data[0]
        self.y = data[1]
        self.examples = len(self.X)
        self.data_index = [0 for i in range(self.nodes)]

        # define performance metrics
        self.loss = [0]
        self.epochloss = []
        self.time_cost_processing = 0
        self.time_cost_comm = 0
        self.iteration = 0

        # define gradient and parameter information
        temp = np.zeros_like(parameters)
        self.updates = np.repeat(temp[np.newaxis, :], self.nodes, axis=0)
        self.grads = np.repeat(temp[np.newaxis, :], self.nodes, axis=0)
        self.params = np.repeat(parameters[np.newaxis, :], self.nodes, axis=0)

        # At instantiation, divide the data for each worker and decide which
        # nodes(worker) are sent updates from each worker
        self.divide_data()
        self.indices = self.get_worker_indices()

        # Get  distribution of data (does not contain probabilties)
        self.get_distribution()
def main2(argv, n=5, ex_count=None, feat_count=None, repeats=1):
    # argv = [tree_hw_train_f, tree_hw_test_f]
    total_acc = [0, 0, 0, 0]

    # argv = [tree_mad_train_f, tree_mad_test_f]
    for k in range(repeats):
        tree_arr = []
        line_arr = tree.read_in_file(argv[0])
        test_arr = tree.read_in_file(argv[1])
        if not ex_count:
            ex_count = len(line_arr)

        for i in range(n):
            ex_arr = []
            for j in range(ex_count):
                ex_arr.append(random.choice(line_arr))

            tree_arr.append(tree.Tree(ex_arr, id_three.id3, feat_count=feat_count, measure_function=measure_function))

        whole_arr = build_vector_from_trees(tree_arr, line_arr, n)
        for x in whole_arr:
            x[0] = [1] + x[0]
        w = svm.svm(whole_arr, c=1, gamma=0.001, e=5)

        test_arr = build_vector_from_trees(tree_arr, test_arr, n)
        for x in test_arr:
            x[0] = [1] + x[0]
        accuracy = svm.evaluate_perceptron(test_arr, w)
        total_acc = [x + y for x, y in zip(total_acc, accuracy)]
        # print(accuracy)
    total_acc = [x/repeats for x in total_acc]
    print('Total Accuracy: ' + str(total_acc))
def main():
    data = []
    inp = []
    num_iter = int(input("Enter the number of iterations"))
    for i in range(0, num_iter):
        inp = make_granules.make_granules(i, data[:2])
        data = svm.svm(inp)

    print("clf is ", data[-1])

    print(
        "***********************************************************************"
    )
    print("ON THE FINAL DATA !!!!")
    df = read_data.read_data()
    number_of_cols = len(df.columns)

    X = df.values
    Y = X[:, -1]
    X = X[:, :-1]

    indices = np.argwhere(Y == 1)
    clf = data[-1]
    indices = indices.ravel()
    X = X[indices]

    predictions = clf.predict(X)
    correctly_done = np.sum(predictions)
    print("Correctly  classified minority points ",
          correctly_done / len(indices))
    print(
        "***********************************************************************"
    )
Exemple #5
0
 def test_svm_predict(self):
     w, max_p, max_acc = svm.svm(lambda ll: ll == 1,
                                 self.train,
                                 self.valid,
                                 params=self.params)
     predict = svm.svm_predict(self.test[1], [(1, w)])
     self.assertGreaterEqual(sum(predict == self.test[0]), 460)
Exemple #6
0
    def coreOperation(self, module):
        '''主函数'''
        try:
            if module == 'svm':
                precision, recall = svm()
                result = precision + ',' + recall
            if module == 'lightgbm':
                precision, recall = lightgbm()

                result = str(precision) + ',' + str(recall)

            # if a != '' and b != '':
            #     result = add(a, b)  # 可调用其他接口
            #     if result:
            #         result = json.dumps({'code': 200, 'result': result, })
            #     else:
            #         result = json.dumps({'code': 210, 'result': 'no result', })

            else:
                result = json.dumps({
                    'code': 211,
                    'result': 'wrong parameter',
                })
            self.write(result)
        except Exception:
            print('traceback.format_exc():\n%s' % traceback.format_exc())
            result = json.dumps({'code': 503, 'result': 'error'})
            self.write(result)
Exemple #7
0
def run_svm(n_train=100, noisy=None):
    print("n_train = ", n_train)
    n_rep = 100  #  number of replicates
    n_test = 100

    its = 0
    e_train = 0
    e_test = 0
    sn = 0

    for i in range(n_rep):
        x, y, w_f = mkdata(n_train + n_test, noisy)
        x_train = x[:, :n_train]
        y_train = y[:, :n_train]
        x_test = x[:, n_train:]
        y_test = y[:, n_train:]

        w_g, num = svm(x_train, y_train)
        sn += num

        x_test = add_bias(x_test)
        x_train = add_bias(x_train)

        e_train += np.where(y_train *
                            (w_g.T @ x_train) < 0)[0].shape[0] / n_train
        e_test += np.where(y_test * (w_g.T @ x_test) < 0)[0].shape[0] / n_test

    print('E_train is %f, E_test is %f' % (e_train / n_rep, e_test / n_rep))
    print('Number of Support Vectors:', sn / n_rep)
    noisy_txt = ": noisy" if noisy else ""
    plotdata(x_train[1:], y_train, w_f, w_g, 'SVM' + noisy_txt)
Exemple #8
0
def problem3_final():
  pairs = {
    "orig": ['astro/original/train.4', 'astro/original/test.4'],
    "scal": ['astro/scaled/train.4', 'astro/scaled/test.4'],
  }
  
  trains = []
  tests = []

  for k in pairs:
    tr, dim = build_set_from_file(pairs[k][0])
    te, dim2 = build_set_from_file(pairs[k][1])

    if dim != dim2:
      print "dimensionality of training and test data must be equivalent"
      raise

    trains = trains + tr
    tests = tests + te

  stats = ["Accuracy  \tC  \t\tLearn Rt  \tE(w)"]
  results = svm.svm(trains, dim, 30)
  for i in range(5):
    r = results[i]
    correct = test(tests, r['weight'], r['C'])
    stats.append("%s  \t%.03f  \t\t%.03f  \t\t%.03f" % (correct, r['C'], r['r'], r['loss']))

  print "combined astro training sets vs combined astro testing sets at epoch = 30"
  for s in stats:
    print "\t%s" % s
Exemple #9
0
 def test_svm(self):
     w, max_p, max_acc = svm.svm(lambda ll: ll == 0,
                                 self.train,
                                 self.valid,
                                 params=self.params)
     self.assertLessEqual(la.norm(w - self.results[1]), 1e-3)
     self.assertEqual(max_p, (1e-3, 1e-2))
     self.assertGreaterEqual(max_acc, 0.99)
Exemple #10
0
def titanic_pipeline():
    train, test = loaddata()
    train_proc, test_proc = dataprocessing(train, test)
    train_feat, train_labels = featureengineering(train_proc, test_proc)

    rf_acc = randomforest(train_feat, train_labels)
    svm_acc = svm(train_feat, train_labels)
    lg_acc = logistic_regression(train_feat, train_labels)

    results(svm_acc, lg_acc, rf_acc)
Exemple #11
0
 def trainClassifiers(self):
     """
     Function pre creates objects for all classifiers, so that prediction is fast.
     An instance dict of those models is created so that they can be indexed easily.
     arguments: none
     return: none
     """
     
     self.which = {0:vsm(),1:nb(),2:svm()}
     for i in self.which.keys():
         self.which[i].fit()
Exemple #12
0
def implementation(dataset):
    table = {
        'Accuracy': [],
        'Kappa statistics': [],
        'Precision': [],
        'Recall': [],
        'F_measure': [],
        'MCC': [],
        'ROC': [],
        'PRC': [],
        'Specificity': []
    }
    plot_table = {'Accuracy': [], 'Sensitivity': [], 'Specificity': []}
    ### NAIVE BAYES CLASSIFICATION ALGORITHM ---------------------------------------------------------
    ac, kp, ps, rc, fm, mc, ra, pa, sp = naive_bayes(dataset=dataset,
                                                     test_size=0.20)
    table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table)
    plot_table = add_to_dict(ac, rc, sp, table=plot_table)
    print(ac, kp, ps, rc, fm, mc, ra, pa, sp)
    ### RANDOM FOREST CLASSIFICATION ALGORITHM -------------------------------------------------------
    ac, kp, ps, rc, fm, mc, ra, pa, sp = random_forest(dataset=dataset)
    table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table)
    plot_table = add_to_dict(ac, rc, sp, table=plot_table)

    ### SVM CLASSIFICATION ALGORITHM -----------------------------------------------------------------
    ac, kp, ps, rc, fm, mc, ra, pa, sp = svm(dataset=dataset)
    table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table)
    plot_table = add_to_dict(ac, rc, sp, table=plot_table)

    ### MLP CLASSIFICATION ALGORITHM -----------------------------------------------------------------
    ac, kp, ps, rc, fm, mc, ra, pa, sp = mlp(dataset=dataset)
    table = add_to_dict(ac, kp, ps, rc, fm, mc, ra, pa, sp, table=table)
    plot_table = add_to_dict(ac, rc, sp, table=plot_table)

    ### J48 CLASSIFICATION ---------------------------------------------------------------------------
    table = add_to_dict(0.928,
                        0.838,
                        0.930,
                        0.929,
                        0.929,
                        0.839,
                        0.975,
                        0.955,
                        0.924,
                        table=table)
    plot_table = add_to_dict(0.928, 0.929, 0.924, table=plot_table)

    return table, plot_table
Exemple #13
0
def svm_folds(k, path_to_json):
    kfold = KFold(k, path_to_json, '')
    stats = [None] * k
    for i in xrange(k):
        print '{}: Fold {} of {}'.format(datetime.now(), i + 1, k)
        # get train and test dataframes
        train_df, test_df = kfold.get_datasets(i)

        # create train and test datasets
        test_set = DatasetVironovaSVM(train_df, do_oversampling=False)
        train_set = DatasetVironovaSVM(train_df, do_oversampling=False)

        # get confusion matrix from SVM model
        cf = svm.svm(train_set, test_set)
        stats[i] = cf
    return stats
Exemple #14
0
def compute(train_fn, test_fn, epoch):
  trains, dim = build_set_from_file(train_fn)
  tests, dim2 = build_set_from_file(test_fn)

  if dim != dim2:
    print "dimensionality of training and test data must be equivalent"
    raise

  stats = ["Accuracy  \tC  \t\tLearn Rt  \tE(w)"]
  results = svm.svm(trains, dim, epoch)
  for i in range(5):
    r = results[i]
    correct = test(tests, r['weight'], r['C'])
    stats.append("%s  \t%.03f  \t\t%.03f  \t\t%.03f" % (correct, r['C'], r['r'], r['loss']))

  return stats
def svm_folds(k, path_to_json):
    kfold = KFold(k, path_to_json, '')
    stats = [None] * k
    for i in xrange(k):
        print '{}: Fold {} of {}'.format(datetime.now(), i + 1, k)
        # get train and test dataframes
        train_df, test_df = kfold.get_datasets(i)

        # create train and test datasets
        test_set = DatasetVironovaSVM(train_df, do_oversampling=False)
        train_set = DatasetVironovaSVM(train_df, do_oversampling=False)

        # get confusion matrix from SVM model
        cf = svm.svm(train_set, test_set)
        stats[i] = cf
    return stats
def go(url):
    having_ip_address(url)
    long_url(url)
    shorten_url(url)
    at_the_rate_symbol(url)
    double_slash_redirecting(url)
    prefix_suffix(url)
    having_sub_domain(url)
    ssl(url)
    domain_registration_length(url)
    favicon(url)
    port(url)
    https_token(url)
    request_url(url)
    url_of_anchor(url)
    link_in_tag(url)
    sfh(url)
    submitting_to_email(url)
    abnormal_url(url)
    redirect(url)
    on_mouse_over(url)
    right_click(url)
    popup_window(url)
    iframe(url)
    age_of_domain(url)
    dns_record(url)
    web_traffic(url)
    page_rank(url)
    google_index(url)
    links_pointing_to_page(url)
    statistical_report(url)
    print(data)
    r1 = model1.svm(data)
    r2 = model2.random_forest(data)
    r3 = model3.logistic_regression(data)
    if r1[0] + r2[0] + r3[0] == 3:
        return 'NORMAL URL'
    else:
        return 'PHISHING URL'
Exemple #17
0
def main():

    #######################################################################
    # randomize(x_train, y_train)
    # Randomize the order of rows in the training data. This to allow for 
    # more integrated real-time plotting; given how the training data is
    # generated, all datapoints of one class will be plotted first (and
    # then all points of the other class), unless this function is applied
    # to allow both clusters to be plotted near-simultaneously.
    #######################################################################

    def randomize(x_train, y_train):
        merged = np.concatenate((x_train, y_train.T.reshape(-1, 1)), axis=1)
        np.random.shuffle(merged)
        return merged[:, 0:2], merged[:, 2]

    #######################################################################
    # gen_train()
    # Read in the training points, with the label +1 (blue) or -1 (red).
    #######################################################################

    def gen_train():
        for a, b, idx, predict in zip(x_train[:, 0], x_train[:, 1], 
            y_train_idx[:, 0],  y_train_idx[:, 1]):
            yield a, b, idx, predict

    #######################################################################
    # plot_train(gen_train)
    # Plot training points (read in with gen_train) based on class.
    #######################################################################

    def plot_train(gen_train):
        a, b, idx, predict = (i for i in gen_train)
        if predict == 1:
            xplt_train_c1.append(a)
            yplt_train_c1.append(b)
            plot_c1_train.set_data(xplt_train_c1, yplt_train_c1)
            return plot_c1_train
        else:
            xplt_train_c2.append(a)
            yplt_train_c2.append(b)
            plot_c2_train.set_data(xplt_train_c2, yplt_train_c2)
            return plot_c2_train

    #######################################################################
    # gen_test()
    # Read in the generated test points, with the labels +1/-1 as above.
    #######################################################################

    def gen_test():
        for c, d, idx, predict in zip(x_test[:, 0], x_test[:, 1],  
            predictions_idx[:, 0], predictions_idx[:, 1]):
            yield c, d, idx, predict

    #######################################################################
    # plot_test(gen_test)
    # Plot the test points based on class.
    #######################################################################
    
    def plot_test(gen_test):
        c, d, idx, predict = (i for i in gen_test)
        if predict == 1:
            xplt_test_c1.append(c)
            yplt_test_c1.append(d)
            plot_c1_test.set_data(xplt_test_c1, yplt_test_c1)
            return plot_c1_test
        else:
            xplt_test_c2.append(c)
            yplt_test_c2.append(d)
            plot_c2_test.set_data(xplt_test_c2, yplt_test_c2)
            return plot_c2_test

    #######################################################################
    # clust_input()
    # Custom input for the training data cluster centers, i.e. where
    # the avg of each class' data points will be.
    #######################################################################

    def clust_input():
        default = [[3, 3],[7, 7]]
        while True:
            clust_custom = raw_input("Enter custom cluster centers? [Y/n] ")
            if clust_custom == "Y":
                c1_x = raw_input("X-coordinate of class 1's center: ")
                c1_y = raw_input("Y-coordinate of class 1's center: ")
                c2_x = raw_input("X-coordinate of class 2's center: ")
                c2_y = raw_input("Y-coordinate of class 2's center: ")
                try:
                    return [[float(x) for x in i] 
                    for i in ([c1_x, c1_y],[c2_x, c2_y])]
                except ValueError:
                    if not c1_x or not c1_y or not c2_x or not c2_y:
                        confirm = raw_input("You forgot to enter a value. "
                                            "Continue with defaults? [Y/n] ")
                        if confirm == "Y":
                            return default
                        else:
                            print "Please try input again."
                    else:
                        print ("Sorry, one of the values was not a number. "
                            "Please try input again.")
            else:
                    return default

    #######################################################################
    # cov_input()
    # Custom input for the covariance matrix, which (basically) affects
    # how spread out the training data will be.
    #######################################################################

    def cov_input():
        default = [[0.6, 0], [0, 0.6]]
        while True:
            cov_custom = raw_input("Enter a custom covariance matrix? [Y/n] ")
            if cov_custom == "Y":
                print "Enter your matrix values 1-4 in format:"
                print "[[1, 2]\n [3, 4]]"
                m1 = raw_input("1: ")
                m2 = raw_input("2: ")
                m3 = raw_input("3: ")
                m4 = raw_input("4: ")
                try:
                    return [[float(x) for x in i] for i in ([m1, m2],[m3, m4])]
                except ValueError:
                    if not m1 or not m2 or not m3 or not m4:
                        confirm = raw_input("You forgot to enter a value. "
                            "Continue with defaults? [Y/n] ")
                        if confirm == "Y":
                            return default
                        else:
                            print "Please try input again."
                    else:
                        print ("Sorry, one of the values was not a number. "
                                "Please try input again.")
            else:
                return default

    #######################################################################
    # nodes_input()
    # Custom input for number of nodes, i.e. data points for an individual
    # class. Because there are two classes for both the training and
    # testing data, you'll end up with n * 4 datapoints.
    #######################################################################

    def nodes_input():
        default = 100
        while True:
            node_custom = raw_input("Enter custom # of nodes? [Y/n] ")
            if node_custom == "Y":
                n = raw_input("Number of nodes: ")
                try:
                    return int(n)
                except ValueError:
                    print "Input was not a number; please try again."
            else:
                return default

    """ Example of data analysis/visualization with SVM. """
    
    print "If you just want a quick SVM demo, leave the following blank."
    cluster_ctrs = clust_input()
    cov_matrix = cov_input()
    n = nodes_input()

    # Generate and format datapoints.
    cluster_c1, cluster_c2 = [i for i in cluster_ctrs]
    print "Cluster centers:", cluster_c1, cluster_c2
    print "Covariance matrix", cov_matrix
    print "Number of nodes", n
    x_train_c1 = np.random.multivariate_normal(cluster_c1, cov_matrix, n)
    y_train_c1 = np.ones(n)
    x_train_c2 = np.random.multivariate_normal(cluster_c2, cov_matrix, n)
    y_train_c2 = np.ones(n) * -1
    x_train = np.vstack((x_train_c1, x_train_c2))
    y_train = np.hstack((y_train_c1, y_train_c2))

    # Finish preprocessing code.
    x_train, y_train = randomize(x_train, y_train)
    y_train_idx = np.array([(idx, predict)
        for idx, predict in enumerate(y_train)])

    # Empty lists to hold plot train/test data.
    xplt_train_c1, yplt_train_c1 = [], []
    xplt_train_c2, yplt_train_c2 = [], []
    xplt_test_c1, yplt_test_c1 = [], []
    xplt_test_c2, yplt_test_c2 = [], []

    # Set up figure.
    fig = plt.figure()
    fig.patch.set_facecolor('white')
    ax = fig.add_subplot(111)
    fig_limit = np.sum(cluster_ctrs) / 2.0 # plot size scales to data
    ax.set_ylim(0, fig_limit)
    ax.set_xlim(0, fig_limit)
    plt.xlabel('X values')
    plt.ylabel('Y values')
    plt.title('Linear SVM Demo')
    plot_c1_train, = ax.plot([], [], 'bo', ms=10)
    plot_c2_train, = ax.plot([], [], 'ro', ms=10)
    plot_c1_test, = ax.plot([], [], 'b+', ms=10)
    plot_c2_test, = ax.plot([], [], 'r+', ms=10)
    plot_c1_train.set_label('Class 1, Train')
    plot_c2_train.set_label('Class 2, Train')
    plot_c1_test.set_label('Class 1, Test')
    plot_c2_test.set_label('Class 2, Test')
    plt.legend(loc=2, fontsize='small')

    # Build and train our SVM classifier.
    clf = svm()
    clf.fit(x_train, y_train)
    print "Weight vector: %s\nBias: %s" % (clf.w, clf.bias)

    # Generate test points & predictions.
    # n_test: # test points == # of training points
    # mean_test: test points centered at average of train. cluster centers
    # cov_test: minimal covariance, since need linearly separable data.
    n_test = n * 2
    mean_test = 0.5 * np.add(cluster_c1, cluster_c2)
    cov_test = [[1, 0], [0, 1]]
    a, b = np.random.multivariate_normal(mean_test, cov_test, n_test).T
    x_test = np.array([(a[i], b[i]) for i in xrange(n_test)])
    predictions = clf.predict(x_test)
    predictions_idx = np.array([(idx, predict)
        for idx, predict in enumerate(clf.predict(x_test))])

    # Visualize results, and write to stdout.
    # animation.FuncAnimation's interval attribute is the # of milliseconds
    # between animation events, i.e. plotting points.
    anim_train = animation.FuncAnimation(fig, plot_train, 
        gen_train, blit=False,interval=1, repeat=False)
    anim_test = animation.FuncAnimation(fig, plot_test, 
        gen_test, blit=False,interval=1, repeat=False)
    clf.plot_boundary()
    plt.show()
    print "\n--TRAINING DATA--"
    print "x_train is:"
    print "Type:", type(x_train)
    print "Shape:\n", x_train.shape
    print "y_train is:"
    print "Type:", type(y_train)
    print "Shape:\n%s\n" % (y_train.shape)
    print "--TESTING DATA--"
    print "x_test is:"
    print "Type:", type(x_test)
    print "Shape:\n%s\n" % (x_test.shape)
Exemple #18
0
def y_nn():
    with open('y_nn.txt') as f:
        y = list(map(int, f.read().split()))
        return y


if __name__ == '__main__':

    # y_result = y_nn()
    # print(y_result)

    X_pure_train, X_sentences_train, aspects_list_train, _ = load_text(
        'SentiRuEval_rest_markup_train.xml')
    #X_pure_test, X_sentences_test, aspects_list_test, X_p = load_text('SentiRuEval_rest_markup_test.xml')
    X_pure_test, X_sentences_test, X_p = load_hotel()

    y_result1, y_result2, y_result3 = svm(X_pure_train, X_sentences_train,
                                          aspects_list_train, X_pure_test,
                                          X_sentences_test)

    pair(X_p, y_result3)

    #y_result = lingvistic(X_pure_test)
    #y_result = freq(X_pure_train, X_pure_test)

    #print (len(X_pure_test), len(y_result1))

    #save_result(X_pure_test, y_result, 'SentiRuEval_result_rest_test_on_rest_2LSTM.xml')
    # save_result(X_pure_test, y_result1, 'SentiRuEval_result_rest_test_on_rest_CNN.xml')
    # save_result(X_pure_test, y_result2, 'SentiRuEval_result_rest_test_on_rest_RF_myw2v.xml')
    # save_result(X_pure_test, y_result3, 'SentiRuEval_result_rest_test_on_rest_GNB_myw2v.xml')
Exemple #19
0
from logistic_regression import lr
from naive_bayes import nb
from svm import svm

lr()
nb()
svm()
Exemple #20
0
import pre_process
import logistic_reg
import svm
import random_forest
output_file = "D:/data/output_1.csv"
pre_process.pre_processing("D:/data/Epi.pkl")

# Modoule selection

choice = input(
    "choose your modoule : 1.Logistic Regression  2.SVM  3.Random Forest :")
if choice == '1':
    logistic_reg.logistic_reg("D:/data/output_3.pkl")
elif choice == '2':
    svm.svm("D:/data/output_3.pkl")
elif choice == '3':
    random_forest.random_forest("D:/data/output_3.pkl")
Exemple #21
0
target[indices, 2] = 1.

train = iris[::2, 0:4]
traint = target[::2]
test = iris[1::2, 0:4]
testt = target[1::2]

output = np.zeros((np.shape(test)[0], 3))

import svm
reload(svm)

# Learn the full data
#svm0 = svm.svm(kernel='linear')
#svm0 = svm.svm(kernel='poly',C=0.1,degree=3)
svm0 = svm.svm(kernel='rbf')
svm0.train_svm(train, np.reshape(traint[:, 0], (np.shape(train[:, :2])[0], 1)))
output[:, 0] = svm0.classifier(test, soft=True).T

#svm1 = svm.svm(kernel='linear')
#svm1 = svm.svm(kernel='poly',C=0.1,degree=3)
svm1 = svm.svm(kernel='rbf')
svm1.train_svm(train, np.reshape(traint[:, 1], (np.shape(train[:, :2])[0], 1)))
output[:, 1] = svm1.classifier(test, soft=True).T

#svm2 = svm.svm(kernel='linear')
#svm2 = svm.svm(kernel='poly',C=0.1,degree=3)
svm2 = svm.svm(kernel='rbf')
svm2.train_svm(train, np.reshape(traint[:, 2], (np.shape(train[:, :2])[0], 1)))
output[:, 2] = svm2.classifier(test, soft=True).T
Exemple #22
0
    labeltrain0 = np.ones((np.shape(train0)[0], 1))
    labeltrain1 = -np.ones((np.shape(train1)[0], 1))
    labeltrain = np.concatenate((labeltrain0, labeltrain1), axis=0)
    labeltest0 = np.ones((np.shape(test0)[0], 1))
    labeltest1 = -np.ones((np.shape(test1)[0], 1))
    labeltest = np.concatenate((labeltest0, labeltest1), axis=0)

pl.figure()
pl.plot(train0[:, 0], train0[:, 1], "o", color="0.75")
pl.plot(train1[:, 0], train1[:, 1], "s", color="0.25")

import svm
reload(svm)

svm = svm.svm(kernel='linear', C=0.1)
#svm = svm.svm(kernel='rbf')
#svm = svm.svm(kernel='poly',C=0.1,degree=4)

print np.shape(train), np.shape(labeltrain)
svm.train_svm(train, labeltrain)
pl.scatter(svm.X[:, 0], svm.X[:, 1], s=200, color='k')

predict = svm.classifier(test, soft=False)
correct = np.sum(predict == labeltest)
print correct, np.shape(predict)
print float(correct) / np.shape(predict)[0] * 100., "test accuracy"

# Classify points over 2D space to fit contour
x, y = np.meshgrid(np.linspace(-6, 6, 50), np.linspace(-6, 6, 50))
xx = np.reshape(np.ravel(x), (2500, 1))
Exemple #23
0
def modified_XOR(kernel, degree, C, sdev):
    import svm
    sv = svm.svm(kernel, degree=degree, C=C)

    m = 100
    X = sdev * np.random.randn(m, 2)
    X[m / 2:, 0] += 1.
    X[m / 4:m / 2, 1] += 1.
    X[3 * m / 4:, 1] += 1.
    targets = -np.ones((m, 1))
    targets[:m / 4, 0] = 1.
    targets[3 * m / 4:, 0] = 1.

    sv.train_svm(X, targets)

    Y = sdev * np.random.randn(m, 2)
    Y[m / 2:, 0] += 1.
    Y[m / 4:m / 2, 1] += 1.
    Y[3 * m / 4:m, 1] += 1.
    test = -np.ones((m, 1))
    test[:m / 4, 0] = 1.
    test[3 * m / 4:, 0] = 1.

    #test = (np.where(Y[:,0]*Y[:,1]>=0,1,-1)*np.ones((1,np.shape(Y)[0]))).T
    #print test.T
    output = sv.classifier(Y, soft=False)
    #print output.T
    #print test.T
    err1 = np.where((output == 1.) & (test == -1.))[0]
    err2 = np.where((output == -1.) & (test == 1.))[0]
    print kernel, C
    print "Class 1 errors ", len(err1), " from ", len(test[test == 1])
    print "Class 2 errors ", len(err2), " from ", len(test[test == -1])
    print "Test accuracy ", 1. - (float(len(err1) + len(err2))) / (
        len(test[test == 1]) + len(test[test == -1]))

    pl.ion()
    pl.figure()
    l1 = np.where(targets == 1)[0]
    l2 = np.where(targets == -1)[0]
    pl.plot(X[sv.sv, 0], X[sv.sv, 1], 'o', markeredgewidth=5)
    pl.plot(X[l1, 0], X[l1, 1], 'ko')
    pl.plot(X[l2, 0], X[l2, 1], 'wo')
    l1 = np.where(test == 1)[0]
    l2 = np.where(test == -1)[0]
    pl.plot(Y[l1, 0], Y[l1, 1], 'ks')
    pl.plot(Y[l2, 0], Y[l2, 1], 'ws')

    step = 0.1
    f0, f1 = np.meshgrid(
        np.arange(np.min(X[:, 0]) - 0.5,
                  np.max(X[:, 0]) + 0.5, step),
        np.arange(np.min(X[:, 1]) - 0.5,
                  np.max(X[:, 1]) + 0.5, step))

    out = sv.classifier(np.c_[np.ravel(f0), np.ravel(f1)], soft=True).T

    out = out.reshape(f0.shape)
    pl.contour(f0, f1, out, 2)

    pl.axis('off')
    pl.show()
Exemple #24
0
 def test_svm_predict(self):
     w, max_p, max_acc = svm.svm(lambda ll : ll == 1, self.train, self.valid, params=self.params)
     predict = svm.svm_predict(self.test[1], [(1, w)])
     self.assertGreaterEqual(sum(predict == self.test[0]), 460)
Exemple #25
0
	labeltrain0 = np.ones((np.shape(train0)[0],1))
	labeltrain1 = -np.ones((np.shape(train1)[0],1))
	labeltrain = np.concatenate((labeltrain0,labeltrain1),axis=0)
	labeltest0 = np.ones((np.shape(test0)[0],1))
	labeltest1 = -np.ones((np.shape(test1)[0],1))
	labeltest = np.concatenate((labeltest0,labeltest1),axis=0)

pl.figure()
pl.plot(train0[:,0], train0[:,1], "o",color="0.75")
pl.plot(train1[:,0], train1[:,1], "s",color="0.25")

import svm
reload(svm)

svm = svm.svm(kernel='linear',C=0.1)
#svm = svm.svm(kernel='rbf')
#svm = svm.svm(kernel='poly',C=0.1,degree=4)

print np.shape(train), np.shape(labeltrain)
svm.train_svm(train, labeltrain)
pl.scatter(svm.X[:,0], svm.X[:,1], s=200,color= 'k')

predict = svm.classifier(test,soft=False)
correct = np.sum(predict == labeltest)
print correct, np.shape(predict)
print float(correct)/np.shape(predict)[0]*100., "test accuracy"

# Classify points over 2D space to fit contour
x,y = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50))
xx = np.reshape(np.ravel(x),(2500,1))
Exemple #26
0
def evaluateSVM():
    claResults.append(["SVM"])
    for data in claDatasets:
        #Import the Dataset and separate X and y
        data_to_test = 'datasets/classification/' + data + '.csv'
        dataset = pd.read_csv(data_to_test)
        X_before, y_before = encodeData(dataset)
#        X_before = dataset.iloc[:, :-1].values
#        y_before = dataset.iloc[:, 38]
        
        count = 0
        avg_roc_auc = 0
        avg_accuracy = 0
        avg_precision = 0
        avg_recall = 0
        avg_f1score = 0
        
        fpr = 0
        tpr = 0
        threshold = 0
       
        for train, test in kfold.split(X_before):
            print("Test:", count+1, " for", data)
            X_train, X_test = X_before[train], X_before[test]
            y_train, y_true = y_before[train], y_before[test]
            
            #feature scaling
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            
            # run SVM
            from svm import svm
            svm = svm(X_train, y_train, X_test, y_true)
            y_pred = svm.getPredictions()
            
            fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)
            roc_auc = metrics.auc(fpr, tpr)
            
            # get metrics
            avg_roc_auc += roc_auc
            avg_accuracy += svm.getAccuracy()
            avg_precision += metrics.precision_score(y_true, y_pred)
            avg_recall += metrics.recall_score(y_true, y_pred)
            avg_f1score += metrics.f1_score(y_true, y_pred)
            
            count += 1

        avg_roc_auc = avg_roc_auc / count
        avg_accuracy = avg_accuracy / count
        avg_precision = avg_precision / count
        avg_recall = avg_recall / count
        avg_f1score = avg_f1score / count
        
        claResults.append(['', data_to_test, float(avg_roc_auc), float(avg_accuracy),
                        float(avg_precision), float(avg_recall), float(avg_f1score)
                        ])
    '''
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    '''
    
    print("\nSVM evaluation results")
    print("Average ROC AUC:", avg_roc_auc)
    print("Average accuracy:", avg_accuracy)
    print("Average precision:", avg_precision)
    print("Average recall:", avg_recall)
    print("Average f1 score:", avg_f1score)
def main(argv):
    print("start of main\n")

    # file handling
    if not os.path.isdir(FLAGS.data_dir):
        raise FileNotFoundError("data_dir doesn't exist: " + FLAGS.data_dir)

    html_dir = os.path.join(FLAGS.data_dir, FLAGS.html_folder)
    if not os.path.isdir(html_dir):
        raise FileNotFoundError("html_folder doesn't exist: " +
                                FLAGS.html_folder)

    tfr_dir = os.path.join(FLAGS.data_dir, "TFR_" + CUR_TIME)
    os.mkdir(tfr_dir)
    train_dir = os.path.join(tfr_dir, 'train')
    # os.mkdir(train_dir)
    test_dir = os.path.join(tfr_dir, 'test')
    # os.mkdir(test_dir)
    shuf_dir = os.path.join(tfr_dir, 'shuffle')
    os.mkdir(shuf_dir)

    # loging
    log_file = os.path.join(tfr_dir, "log")
    set_logging(stream=True, fileh=True, filename=log_file)
    logging.info("\nall arguments:")
    for attr, value in sorted(FLAGS.__flags.items()):
        logging.info("{}={}".format(attr.upper(), value))
    logging.info("")

    # shuffle all data thoroughly
    # all data stored in several train and test json files.
    all_data = []
    logging.info('')
    logging.info('reading all data')
    for category in os.listdir(html_dir):
        cat_dir = os.path.join(html_dir, category)
        if os.path.isdir(cat_dir):
            cat_id = CATEGORIES.index(category)
            for j_file in os.listdir(cat_dir):
                j_path = os.path.join(cat_dir, j_file)
                if os.path.isfile(j_path) and not j_file.startswith('.'):
                    # read single html json file
                    pages = read_json(j_path)
                    for page in pages:
                        page['label'] = cat_id + 1
                    all_data.extend(pages)

    # shuffle all data
    logging.info("\nshuffling the whole dataset")
    shuffle(all_data)

    # convert every page string into FastText format
    fast_data = []
    for page in all_data:
        page_str = '__label__' + str(page['label']) + ', '
        # excape \n and add space before comma
        page_str += page['html'].replace('\n', '\\n').replace(',', ' ,')
        # print(page_str)

        fast_data.append(page_str)

    all_data = fast_data
    logging.info("\nsplitting data into train and test set")
    train_num = math.floor(len(all_data) * FLAGS.train_ratio)
    train_set = all_data[:train_num]
    test_set = all_data[train_num:]

    if FLAGS.model == 'svm':
        svm.svm(FLAGS.num_cats, train_set, test_set)
    else:

        logging.info("\nwriting shuffled train data into json")
        with open(train_dir, 'w') as f:
            f.write('\n'.join(train_set))

        logging.info("\nwriting shuffled test data into json")
        with open(test_dir, 'w') as f:
            f.write('\n'.join(test_set))

        logging.info("train_num: {}".format(train_num))
        logging.info("test_num: {}".format(len(all_data) - train_num))
    print("\n end of main~~~")
        # Creating dictionary from x_train
        words, wordList = getWordList(x_train)

        # Removing most frequent 100 words
        for _ in range(100):
            words.pop(0)

        wordList = [x for x, _ in words]

        # Forming feature vector, calculating Conditional probabilities, applying NBC
        trainfv, trainfv0, trainfv1 = featureVector(wordList[:w], x_train,
                                                    y_train)
        testfv, testfv0, testfv1 = featureVector(wordList[:w], x_test, y_test)

        #        zoltemplr[i]  = lr(trainfv,testfv)
        zoltempsvm[i] = svm(trainfv, testfv)
#        zoltempnbc[i] = nbc(trainfv,testfv)

    avgzollr[r] = np.mean(zoltemplr)
    avgzolsvm[r] = np.mean(zoltempsvm)
    avgzolnbc[r] = np.mean(zoltempnbc)

    stddevzollr[r] = np.std(zoltemplr)
    stddevzolsvm[r] = np.std(zoltempsvm)
    stddevzolnbc[r] = np.std(zoltempnbc)

    stderrzollr[r] = stddevzollr[r] / math.sqrt(it)
    stderrzolsvm[r] = stddevzolsvm[r] / math.sqrt(it)
    stderrzolnbc[r] = stddevzolnbc[r] / math.sqrt(it)

print stderrzollr
Exemple #29
0
def modified_XOR(kernel,degree,C,sdev):
	import svm
	sv = svm.svm(kernel,degree=degree,C=C)
	#sv = svm.svm(kernel='poly',degree=3,C=0.2)
	#sv = svm.svm(kernel='rbf',C=0.1)
	#sv = svm.svm(kernel='poly',degree=3)
	#sdev = 0.4 #0.3 #0.1

	m = 100
	X = sdev*np.random.randn(m,2)
	X[m/2:,0] += 1.
	X[m/4:m/2,1] += 1.
	X[3*m/4:,1] += 1.
	targets = -np.ones((m,1))
	targets[:m/4,0] = 1.
	targets[3*m/4:,0] = 1.
	#targets = (np.where(X[:,0]*X[:,1]>=0,1,-1)*np.ones((1,np.shape(X)[0]))).T
	
	sv.train_svm(X,targets)

	Y = sdev*np.random.randn(m,2)
	Y[m/2:,0] += 1.
	Y[m/4:m/2,1] += 1.
	Y[3*m/4:m,1] += 1.
	test = -np.ones((m,1))
	test[:m/4,0] = 1.
	test[3*m/4:,0] = 1.

	#test = (np.where(Y[:,0]*Y[:,1]>=0,1,-1)*np.ones((1,np.shape(Y)[0]))).T
	#print test.T
	output = sv.classifier(Y,soft=False)
	#print output.T
	#print test.T
	err1 = np.where((output==1.) & (test==-1.))[0]
	err2 = np.where((output==-1.) & (test==1.))[0]
	print kernel, C
	print "Class 1 errors ",len(err1)," from ",len(test[test==1])
	print "Class 2 errors ",len(err2)," from ",len(test[test==-1])
	print "Test accuracy ",1. -(float(len(err1)+len(err2)))/ (len(test[test==1]) + len(test[test==-1]))

	pl.ion()
	pl.figure()
	l1 =  np.where(targets==1)[0]
	l2 =  np.where(targets==-1)[0]
	pl.plot(X[sv.sv,0],X[sv.sv,1],'o',markeredgewidth=5)
	pl.plot(X[l1,0],X[l1,1],'ko')
	pl.plot(X[l2,0],X[l2,1],'wo')
	l1 =  np.where(test==1)[0]
	l2 =  np.where(test==-1)[0]
	pl.plot(Y[l1,0],Y[l1,1],'ks')
	pl.plot(Y[l2,0],Y[l2,1],'ws')

	step = 0.1
	f0,f1  = np.meshgrid(np.arange(np.min(X[:,0])-0.5, np.max(X[:,0])+0.5, step), np.arange(np.min(X[:,1])-0.5, np.max(X[:,1])+0.5, step))

	out = sv.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T

	out = out.reshape(f0.shape)
	pl.contour(f0, f1, out,2)

	pl.axis('off')
	pl.show()
def main(argv):
    print("start of main\n")

    # file handling
    if not os.path.isdir(FLAGS.data_dir):
        raise FileNotFoundError("data_dir doesn't exist: " + FLAGS.data_dir)

    html_dir = os.path.join(FLAGS.data_dir, FLAGS.html_folder)
    if not os.path.isdir(html_dir):
        raise FileNotFoundError("html_folder doesn't exist: " +
                                FLAGS.html_folder)

    tfr_dir = os.path.join(FLAGS.data_dir, "TFR_" + CUR_TIME)
    os.mkdir(tfr_dir)
    train_dir = os.path.join(tfr_dir, 'train')
    # os.mkdir(train_dir)
    test_dir = os.path.join(tfr_dir, 'test')
    # os.mkdir(test_dir)
    shuf_dir = os.path.join(tfr_dir, 'shuffle')
    os.mkdir(shuf_dir)

    # loging
    log_file = os.path.join(tfr_dir, "log")
    set_logging(stream=True, fileh=True, filename=log_file)
    logging.info("\nall arguments:")
    for attr, value in sorted(FLAGS.__flags.items()):
        logging.info("{}={}".format(attr.upper(), value))
    logging.info("")

    # shuffle all data thoroughly
    # all data stored in several train and test json files.
    all_data = []
    logging.info('')
    logging.info('reading all data')
    for category in os.listdir(html_dir):
        cat_dir = os.path.join(html_dir, category)
        if os.path.isdir(cat_dir):
            cat_id = CATEGORIES.index(category)
            for j_file in os.listdir(cat_dir):
                j_path = os.path.join(cat_dir, j_file)
                if os.path.isfile(j_path) and not j_file.startswith('.'):
                    # read single html json file
                    pages = read_json(j_path)
                    for page in pages:
                        page['label'] = cat_id + 1
                    all_data.extend(pages)

    # shuffle all data
    logging.info("\nshuffling the whole dataset")
    shuffle(all_data)

    # convert every page string into FastText format
    fast_data = []
    for page in all_data:
        page_str = '__label__' + str(page['label']) + ', '
        # excape \n and add space before comma
        page_str += page['html'].replace('\n', '\\n').replace(',', ' ,')
        # print(page_str)

        fast_data.append(page_str)

    all_data = fast_data
    logging.info("\nsplitting data into train and test set")
    train_num = math.floor(len(all_data) * FLAGS.train_ratio)
    train_set = all_data[:train_num]
    test_set = all_data[train_num:]

    if FLAGS.model == 'svm':
        svm.svm(FLAGS.num_cats, train_set, test_set)
    else:

        logging.info("\nwriting shuffled train data into json")
        with open(train_dir, 'w') as f:
            f.write('\n'.join(train_set))

        logging.info("\nwriting shuffled test data into json")
        with open(test_dir, 'w') as f:
            f.write('\n'.join(test_set))

        logging.info("train_num: {}".format(train_num))
        logging.info("test_num: {}".format(len(all_data) - train_num))
    print("\n end of main~~~")
Exemple #31
0
traint = target[::2]
test = iris[1::2,0:4]
testt = target[1::2]

#print train.max(axis=0), train.min(axis=0)

# Train the machines
output = np.zeros((np.shape(test)[0],3))

import svm
reload(svm)

# Learn the full data
#svm0 = svm.svm(kernel='linear')
#svm0 = svm.svm(kernel='poly',C=0.1,degree=3)
svm0 = svm.svm(kernel='rbf')
svm0.train_svm(train,np.reshape(traint[:,0],(np.shape(train[:,:2])[0],1)))
output[:,0] = svm0.classifier(test,soft=True).T

#svm1 = svm.svm(kernel='linear')
#svm1 = svm.svm(kernel='poly',C=0.1,degree=3)
svm1 = svm.svm(kernel='rbf')
svm1.train_svm(train,np.reshape(traint[:,1],(np.shape(train[:,:2])[0],1)))
output[:,1] = svm1.classifier(test,soft=True).T

#svm2 = svm.svm(kernel='linear')
#svm2 = svm.svm(kernel='poly',C=0.1,degree=3)
svm2 = svm.svm(kernel='rbf')
svm2.train_svm(train,np.reshape(traint[:,2],(np.shape(train[:,:2])[0],1)))
output[:,2] = svm2.classifier(test,soft=True).T
Exemple #32
0
 def test_svm(self):
     w, max_p, max_acc = svm.svm(lambda ll : ll == 0, self.train, self.valid, params=self.params)
     self.assertLessEqual(la.norm(w - self.results[1]),  1e-3)
     self.assertEqual(max_p, (1e-3, 1e-2))
     self.assertGreaterEqual(max_acc, 0.99)
Exemple #33
0
def main():
    warnings.simplefilter("ignore", UserWarning)
    csv = pd.read_csv('Glass.csv', sep=',')

    #Coluna responsavel por classicar as classes de dados(parametro usado no STRATIFY{serve para manter a proporção dos elementos na hora de realizar as divisões})
    classes = csv['Class']
    """
        realiza o shuffle e divided em 2 conjuntos de tamanho iguais (1/2) => 50% para o conjunto de teste
        database[0] = conjunto de teste
        database[1] = resto do conjunto
    """
    database = skms.train_test_split(csv,
                                     test_size=0.5,
                                     train_size=0.5,
                                     shuffle=True,
                                     stratify=classes)
    train = database[0]
    classes = database[1]['Class']
    """
        realiza uma segunda divisão sobre o resto do conjunto de dados ((1/2)/2) => 25% para o conjunto de validação e teste
        database[0]' = conjunto de validação
        database[1]' = conjutno de teste
    """
    database = skms.train_test_split(database[1],
                                     test_size=0.5,
                                     train_size=0.5,
                                     shuffle=True,
                                     stratify=classes)
    validation = database[0]
    test = database[1]

    target_test = test['Class']

    # Features
    features_test = test

    # Deleta a coluna Target, ou seja, separa ela das Features
    features_test = features_test.drop(['Class'], axis=1)

    clfs = []

    #classificadores treinados
    clfs = [None, None, None, None, None]
    #scores dos classificadores em cima do conjunto de teste
    clfs_scores = [None, None, None, None, None, None, None, None]

    clfs[0] = knn.findBestKNN(train, validation)  #KNN Euclidiano
    clfs[1] = dt.decision_tree(train,
                               validation)  #Decision-Tree completa(sem poda)
    clfs[2] = nb.naive_bayes(train, validation)  #Naive-Bayes Bernoulli
    clfs[3] = svm.svm(train, validation)  #SMV kernel RBF
    clfs[4] = mlp.my_little_poney(train, validation)  #MLP Constant

    clfs_scores[0] = testingClassifiers(clfs[0], features_test,
                                        target_test)  #KNN
    clfs_scores[1] = testingClassifiers(clfs[1], features_test,
                                        target_test)  #Decision-Tree
    clfs_scores[2] = testingClassifiers(clfs[2], features_test,
                                        target_test)  #Naive-Bayes
    clfs_scores[3] = testingClassifiers(clfs[3], features_test,
                                        target_test)  #SMV kernel
    clfs_scores[4] = testingClassifiers(clfs[4], features_test,
                                        target_test)  #MLP
    #temp_sum = VotingClassifier(estimators=[('knn', clfs[0]), ('dt', clfs[0]), ('nb', clfs[0]), ('svm', clfs[0]), ('mlp', clfs[0])], voting='hard')
    clfs_scores[5] = score(rule_of_sum(clfs, features_test, target_test),
                           target_test)  #Regra da Soma
    clfs_scores[6] = score(rule_of_prod(clfs, features_test, target_test),
                           target_test)  #Regra do Produto
    clfs_scores[7] = score(borda_count(clfs, features_test, target_test),
                           target_test)  #RBorda Count

    del classes, csv, database, test, target_test, features_test
    return clfs_scores
Exemple #34
0
i_time = config.getint('sys','i_time')

with open(complete_file_path,'w') as f:
    f.write("0,"+str(i_time * 11))

with open(os.path.join(file_path,"pid.txt"),'w') as f:
    f.write(str(os.getpid()))

from knn import knn
from ada_boost import ada_boost
from random_forest import random_forest
from logistic import logistic
from svm import svm
from decision_tree import c4_5,cart
from k_mean import k_mean
from xgboost_clf import xgboost
from gbdt_clf import gbdt
from net import net

print('knn:',knn(i_time=i_time))
print('AdaBoost:',ada_boost(i_time=i_time))
print('random forest',random_forest(i_time=i_time))
print('logistic regression:',logistic(i_time=i_time))
print('C4.5:',c4_5(i_time=i_time))
print('cart:',cart(i_time=i_time))
print('k_mean',k_mean(i_time=i_time))
print('xgboost',xgboost(i_time=i_time))
print('gbdt',gbdt(i_time=i_time))
print('SVM:',svm(i_time=i_time))
print('net',net(i_time=i_time))
import numpy as np
import pandas as pd
from svm import svm

df = pd.read_csv('data/pulsar_stars.csv')
npa = np.asarray(df)
for i in npa:
    if not i[8]:
        i[8] = -1

train_X = npa[:10000][:7]
train_Y = npa[:10000][8]

test_X = npa[10000:][:7]
test_Y = npa[10000:][8]

w = svm(train_X, train_Y, .00001, 10)

errors, tot = 0, 0
for x, y in zip(test_X, test_Y):
    if y * np.dot(x, w) < 1:
        errors += 1
    tot += 1

print("testing error percentage: ", 100 * (errors / tot))
def handle_my_custom_event(json):
    def formatJam(teks):
        print('Jam nya adalah : ' + teks)
        formm = teks.split()
        satuan = formm[1]
        jamm = formm[0]
        jamm = jamm.split(':')
        jam2 = jamm[0]
        menit = jamm[1]
        detik = jamm[2]
        formatt = jam2 + menit + detik
        return int(formatt)

    nama = 'Aku adalah agias'

    myprofile = webdriver.FirefoxProfile(
        r'C:\Users\Aloysius\AppData\Roaming\Mozilla\Firefox\Profiles\fcbei8vp.teleScrape'
    )
    PATH = "C:\Program Files (x86)\geckodriver.exe"
    driver = webdriver.Firefox(firefox_profile=myprofile, executable_path=PATH)

    target = 3
    Saham = [
        'AALI', 'ABBA', 'ABDA', 'ABMM', 'ACES', 'ACST', 'ADES', 'ADHI', 'ADMF',
        'ADMG', 'ADRO', 'AGAR', 'AGII', 'AGRO', 'AGRS', 'AHAP', 'AIMS', 'AISA',
        'AKKU', 'AKPI', 'AKRA', 'AKSI', 'ALDO', 'ALKA', 'ALMI', 'ALTO', 'AMAG',
        'AMAN', 'AMAR', 'AMFG', 'AMIN', 'AMOR', 'AMRT', 'ANDI', 'ANJT', 'ANTM',
        'APEX', 'APIC', 'APII', 'APLI', 'APLN', 'ARGO', 'ARII', 'ARKA', 'ARMY',
        'ARNA', 'ARTA', 'ARTI', 'ARTO', 'ASBI', 'ASDM', 'ASGR', 'ASII', 'ASJT',
        'ASMI', 'ASPI', 'ASRI', 'ASRM', 'ASSA', 'ATAP', 'ATIC', 'AUTO', 'AYLS',
        'BABP', 'BACA', 'BAJA', 'BALI', 'BANK', 'BAPA', 'BAPI', 'BATA', 'BAYU',
        'BBCA', 'BBHI', 'BBKP', 'BBLD', 'BBMD', 'BBNI', 'BBRI', 'BBRM', 'BBSI',
        'BBSS', 'BBTN', 'BBYB', 'BCAP', 'BCIC', 'BCIP', 'BDMN', 'BEBS', 'BEEF',
        'BEKS', 'BELL', 'BESS', 'BEST', 'BFIN', 'BGTG', 'BHAT', 'BHIT', 'BIKA',
        'BIMA', 'BINA', 'BIPI', 'BIPP', 'BIRD', 'BISI', 'BJBR', 'BJTM', 'BKDP',
        'BKSL', 'BKSW', 'BLTA', 'BLTZ', 'BLUE', 'BMAS', 'BMRI', 'BMSR', 'BMTR',
        'BNBA', 'BNBR', 'BNGA', 'BNII', 'BNLI', 'BOGA', 'BOLA', 'BOLT', 'BOSS',
        'BPFI', 'BPII', 'BPTR', 'BRAM', 'BRIS', 'BRMS', 'BRNA', 'BRPT', 'BSDE',
        'BSIM', 'BSSR', 'BSWD', 'BTEK', 'BTEL', 'BTON', 'BTPN', 'BTPS', 'BUDI',
        'BUKK', 'BULL', 'BUMI', 'BUVA', 'BVIC', 'BWPT', 'BYAN', 'CAKK', 'CAMP',
        'CANI', 'CARE', 'CARS', 'CASA', 'CASH', 'CASS', 'CBMF', 'CCSI', 'CEKA',
        'CENT', 'CFIN', 'CINT', 'CITA', 'CITY', 'CLAY', 'CLEO', 'CLPI', 'CMNP',
        'CMPP', 'CNKO', 'CNTX', 'COCO', 'COWL', 'CPIN', 'CPRI', 'CPRO', 'CSAP',
        'CSIS', 'CSMI', 'CSRA', 'CTBN', 'CTRA', 'CTTH', 'DADA', 'DART', 'DAYA',
        'DCII', 'DEAL', 'DEFI', 'DEWA', 'DFAM', 'DGIK', 'DGNS', 'DIGI', 'DILD',
        'DIVA', 'DKFT', 'DLTA', 'DMAS', 'DMMX', 'DMND', 'DNAR', 'DNET', 'DOID',
        'DPNS', 'DPUM', 'DSFI', 'DSNG', 'DSSA', 'DUCK', 'DUTI', 'DVLA', 'DWGL',
        'DYAN', 'EAST', 'ECII', 'EDGE', 'EKAD', 'ELSA', 'ELTY', 'EMDE', 'EMTK',
        'ENRG', 'ENVY', 'ENZO', 'EPAC', 'EPMT', 'ERAA', 'ERTX', 'ESIP', 'ESSA',
        'ESTA', 'ESTI', 'ETWA', 'EXCL', 'FAPA', 'FAST', 'FASW', 'FILM', 'FINN',
        'FIRE', 'FISH', 'FITT', 'FMII', 'FOOD', 'FORU', 'FORZ', 'FPNI', 'FREN',
        'FUJI', 'GAMA', 'GDST', 'GDYR', 'GEMA', 'GEMS', 'GGRM', 'GGRP', 'GHON',
        'GIAA', 'GJTL', 'GLOB', 'GLVA', 'GMFI', 'GMTD', 'GOLD', 'GOLL', 'GOOD',
        'GPRA', 'GSMF', 'GTBO', 'GWSA', 'GZCO', 'HADE', 'HDFA', 'HDIT', 'HDTX',
        'HEAL', 'HELI', 'HERO', 'HEXA', 'HITS', 'HKMU', 'HMSP', 'HOKI', 'HOME',
        'HOMI', 'HOTL', 'HRME', 'HRTA', 'HRUM', 'IATA', 'IBFN', 'IBST', 'ICBP',
        'ICON', 'IDPR', 'IFII', 'IFSH', 'IGAR', 'IIKP', 'IKAI', 'IKAN', 'IKBI',
        'IMAS', 'IMJS', 'IMPC', 'INAF', 'INAI', 'INCF', 'INCI', 'INCO', 'INDF',
        'INDO', 'INDR', 'INDS', 'INDX', 'INDY', 'INKP', 'INOV', 'INPC', 'INPP',
        'INPS', 'INRU', 'INTA', 'INTD', 'INTP', 'IPCC', 'IPCM', 'IPOL', 'IPTV',
        'IRRA', 'ISAT', 'ISSP', 'ITIC', 'ITMA', 'ITMG', 'JAST', 'JAWA', 'JAYA',
        'JECC', 'JGLE', 'JIHD', 'JKON', 'JKSW', 'JMAS', 'JPFA', 'JRPT', 'JSKY',
        'JSMR', 'JSPT', 'JTPE', 'KAEF', 'KARW', 'KAYU', 'KBAG', 'KBLI', 'KBLM',
        'KBLV', 'KBRI', 'KDSI', 'KEEN', 'KEJU', 'KIAS', 'KICI', 'KIJA', 'KINO',
        'KIOS', 'KJEN', 'KKGI', 'KLBF', 'KMDS', 'KMTR', 'KOBX', 'KOIN', 'KONI',
        'KOPI', 'KOTA', 'KPAL', 'KPAS', 'KPIG', 'KRAH', 'KRAS', 'KREN', 'LAND',
        'LAPD', 'LCGP', 'LCKM', 'LEAD', 'LIFE', 'LINK', 'LION', 'LMAS', 'LMPI',
        'LMSH', 'LPCK', 'LPGI', 'LPIN', 'LPKR', 'LPLI', 'LPPF', 'LPPS', 'LRNA',
        'LSIP', 'LTLS', 'LUCK', 'MABA', 'MAGP', 'MAIN', 'MAMI', 'MAPA', 'MAPB',
        'MAPI', 'MARI', 'MARK', 'MASA', 'MAYA', 'MBAP', 'MBSS', 'MBTO', 'MCAS',
        'MCOR', 'MDIA', 'MDKA', 'MDKI', 'MDLN', 'MDRN', 'MEDC', 'MEGA', 'MERK',
        'META', 'MFIN', 'MFMI', 'MGNA', 'MGRO', 'MICE', 'MIDI', 'MIKA', 'MINA',
        'MIRA', 'MITI', 'MKNT', 'MKPI', 'MLBI', 'MLIA', 'MLPL', 'MLPT', 'MMLP',
        'MNCN', 'MOLI', 'MPMX', 'MPOW', 'MPPA', 'MPRO', 'MRAT', 'MREI', 'MSIN',
        'MSKY', 'MTDL', 'MTFN', 'MTLA', 'MTPS', 'MTRA', 'MTSM', 'MTWI', 'MYOH',
        'MYOR', 'MYRX', 'MYTX', 'NASA', 'NATO', 'NELY', 'NFCX', 'NICK', 'NIKL',
        'NIPS', 'NIRO', 'NISP', 'NOBU', 'NRCA', 'NUSA', 'NZIA', 'OASA', 'OCAP',
        'OKAS', 'OMRE', 'OPMS', 'PADI', 'PALM', 'PAMG', 'PANI', 'PANR', 'PANS',
        'PBID', 'PBRX', 'PBSA', 'PCAR', 'PDES', 'PEGE', 'PEHA', 'PGAS', 'PGJO',
        'PGLI', 'PGUN', 'PICO', 'PJAA', 'PKPK', 'PLAN', 'PLAS', 'PLIN', 'PMJS',
        'PMMP', 'PNBN', 'PNBS', 'PNGO', 'PNIN', 'PNLF', 'PNSE', 'POLA', 'POLI',
        'POLL', 'POLU', 'POLY', 'POOL', 'PORT', 'POSA', 'POWR', 'PPGL', 'PPRE',
        'PPRO', 'PRAS', 'PRDA', 'PRIM', 'PSAB', 'PSDN', 'PSGO', 'PSKT', 'PSSI',
        'PTBA', 'PTDU', 'PTIS', 'PTPP', 'PTPW', 'PTRO', 'PTSN', 'PTSP', 'PUDP',
        'PURA', 'PURE', 'PURI', 'PWON', 'PYFA', 'PZZA', 'RAJA', 'RALS', 'RANC',
        'RBMS', 'RDTX', 'REAL', 'RELI', 'RICY', 'RIGS', 'RIMO', 'RISE', 'RMBA',
        'ROCK', 'RODA', 'RONY', 'ROTI', 'RUIS', 'SAFE', 'SAME', 'SAMF', 'SAPX',
        'SATU', 'SBAT', 'SCCO', 'SCMA', 'SCNP', 'SCPI', 'SDMU', 'SDPC', 'SDRA',
        'SFAN', 'SGER', 'SGRO', 'SHID', 'SHIP', 'SIDO', 'SILO', 'SIMA', 'SIMP',
        'SINI', 'SIPD', 'SKBM', 'SKLT', 'SKRN', 'SKYB', 'SLIS', 'SMAR', 'SMBR',
        'SMCB', 'SMDM', 'SMDR', 'SMGR', 'SMKL', 'SMMA', 'SMMT', 'SMRA', 'SMRU',
        'SMSM', 'SOCI', 'SOFA', 'SOHO', 'SONA', 'SOSS', 'SOTS', 'SPMA', 'SPTO',
        'SQMI', 'SRAJ', 'SRIL', 'SRSN', 'SRTG', 'SSIA', 'SSMS', 'SSTM', 'STAR',
        'STTP', 'SUGI', 'SULI', 'SUPR', 'SURE', 'SWAT', 'TALF', 'TAMA', 'TAMU',
        'TARA', 'TAXI', 'TBIG', 'TBLA', 'TBMS', 'TCID', 'TCPI', 'TDPM', 'TEBE',
        'TECH', 'TELE', 'TFAS', 'TFCO', 'TGKA', 'TGRA', 'TIFA', 'TINS', 'TIRA',
        'TIRT', 'TKIM', 'TLKM', 'TMAS', 'TMPO', 'TNCA', 'TOBA', 'TOPS', 'TOTL',
        'TOTO', 'TOWR', 'TOYS', 'TPIA', 'TPMA', 'TRAM', 'TRIL', 'TRIM', 'TRIN',
        'TRIO', 'TRIS', 'TRJA', 'TRST', 'TRUK', 'TRUS', 'TSPC', 'TUGU', 'TURI',
        'UANG', 'UCID', 'UFOE', 'ULTJ', 'UNIC', 'UNIQ', 'UNIT', 'UNSP', 'UNTR',
        'UNVR', 'URBN', 'VICI', 'VICO', 'VINS', 'VIVA', 'VOKS', 'VRNA', 'WAPO',
        'WEGE', 'WEHA', 'WICO', 'WIFI', 'WIIM', 'WIKA', 'WINS', 'WMUU', 'WOMF',
        'WOOD', 'WOWS', 'WSBP', 'WSKT', 'WTON', 'YELO', 'YPAS', 'YULE', 'ZBRA',
        'ZINC', 'ZONE'
    ]
    hari = 0
    tanggal2 = []
    tanggal = []
    itemss = []
    percakapan = []
    # driver.get('https://web.telegram.org/#/im?p=@TheTradersGroup')
    driver.get('https://web.telegram.org/#/im?p=g579054022')
    time.sleep(20)
    temp = ''
    temp2 = ''
    first = True
    ptemp = ''
    wrapper = driver.find_element_by_xpath(
        '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]'
    )
    chat = wrapper.find_elements_by_xpath(
        ".//div[contains(@class, 'im_history_message_wrap')]")
    psn = len(chat)
    Stoped = False
    while True:
        joinn = False
        balas = ''
        penulis = ''
        last = penulis
        pesan2 = [""]
        jam = ''
        pesan2 = []
        pesan = driver.find_element_by_xpath(
            "/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]/div["
            + str(psn) + "]")
        driver.execute_script("arguments[0].scrollIntoView(true);", pesan)
        psn -= 1
        if (len(
                pesan.find_elements_by_xpath(
                    ".//a[@class='im_message_photo_thumb']")) > 0):
            # print('ini adalah foto')
            penulis = penulis + pesan.find_element_by_xpath(
                ".//a[contains(@class, 'im_message_author user_color_')]").text
            gambar = pesan.find_element_by_xpath(
                ".//img[@class='im_message_photo_thumb']").get_attribute('src')
            pesan4 = "photo"
            pesan2.insert(0, pesan4)
            if (len(
                    pesan.find_elements_by_xpath(
                        ".//div[@class='im_message_photo_caption']")) > 0):
                last = pesan.find_element_by_xpath(
                    ".//div[@class='im_message_photo_caption']").text
                emo = pesan.find_elements_by_xpath(
                    ".//span[@class='emoji  emoji-spritesheet-0']")
                for x in emo:
                    if (x.text.strip() != ''):
                        last = last.replace(x.text.strip(), ' ')
                pesan4 = pesan4 + last
                pesan2.insert(0, last)
            jam = jam + pesan.find_element_by_xpath(
                ".//span[@ng-bind='::historyMessage.date | time']").text
        elif (len(
                pesan.find_elements_by_xpath(
                    ".//span[@ng-switch-when='messageActionChatJoined']")) >
              0):
            print('seseorang join')
            penulis = ''
            last = penulis
            pesan2 = [""]
            jam = ''
            joinn = True
            # print('Masuk2')
        elif (len(
                pesan.find_elements_by_xpath(
                    ".//span[@class='im_message_date_split_text']")) > 0):
            if ((len(
                    pesan.find_elements_by_xpath(
                        ".//div[@class='im_message_date_split im_service_message_wrap' and @style='display: none;']"
                    )) > 0)):
                if (len(
                        pesan.find_elements_by_xpath(
                            ".//div[@class='im_message_text']")) > 0):
                    print('Ini juga sebenernya pesan biasa')
                    penulis = penulis + pesan.find_element_by_xpath(
                        ".//a[contains(@class, 'im_message_author user_color_')]"
                    ).text
                    last = pesan.find_element_by_xpath(
                        ".//div[@class='im_message_text']").text
                    emo = pesan.find_elements_by_xpath(
                        ".//span[@class='emoji  emoji-spritesheet-0']")
                    for x in emo:
                        if (x.text.strip() != ''):
                            last = last.replace(x.text.strip(), ' ')
                    pesan4 = last
                    pesan2.insert(0, pesan4)
                    try:
                        print('Masuk kesini kali2')
                        jam = jam + pesan.find_element_by_xpath(
                            ".//span[@ng-bind='::historyMessage.date | time']"
                        ).text
                        if (jam == ''):
                            jamm2 = pesan.find_element_by_xpath(
                                ".//span[@class='im_message_date_text nocopy']"
                            )
                            jam = jam + jamm2.get_attribute('data-content')
                    except:
                        print('Seperti nya masuk ke sini2')
                        jamm2 = pesan.find_element_by_xpath(
                            ".//span[@class='im_message_date_text nocopy']")
                        jam = jam + jamm2.get_attribute('data-content')
                    if (len(
                            pesan.find_elements_by_xpath(
                                ".//span[@my-short-message='replyMessage']")) >
                            0):
                        balas = pesan.find_element_by_xpath(
                            ".//span[@my-short-message='replyMessage']").text
                        emo = pesan.find_elements_by_xpath(
                            ".//span[@class='emoji  emoji-spritesheet-0']")
                        for x in emo:
                            if (x.text.strip() != ''):
                                balas = balas.replace(x.text.strip(), ' ')
                        pesan2.insert(0, "Membalas : " + balas)
            else:
                jamm2 = pesan.find_element_by_xpath(
                    ".//span[@class='im_message_date_text nocopy']")
                jam = jam + jamm2.get_attribute('data-content')
                print("ini adalah tanggal")
                tgl = pesan.find_element_by_xpath(
                    ".//span[@class='im_message_date_split_text']").text
                tgl = tgl.replace(",", "")
                print(tgl)
                for k in range(len(tanggal2)):
                    tanggal2[k]['Tanggal'] = tgl
                tanggal.extend(tanggal2)
                hari += 1
                belum = (hari != target)
                print(len(tanggal))
                tanggal2 = []
        else:
            print('ini adalah pesan biasa')
            penulis = penulis + pesan.find_element_by_xpath(
                ".//a[contains(@class, 'im_message_author user_color_')]").text
            try:
                print('Masuk kesini kali')
                jam = jam + pesan.find_element_by_xpath(
                    ".//span[@ng-bind='::historyMessage.date | time']").text
                if (jam == ''):
                    jamm2 = pesan.find_element_by_xpath(
                        ".//span[@class='im_message_date_text nocopy']")
                    jam = jam + jamm2.get_attribute('data-content')
            except:
                try:
                    print('Seperti nya masuk ke sini')
                    jamm2 = pesan.find_element_by_xpath(
                        ".//span[@class='im_message_date_text nocopy']")
                    jam = jam + jamm2.get_attribute('data-content')
                except:
                    print('seseorang join')
                    penulis = ''
                    last = penulis
                    pesan2 = [""]
                    jam = ''
                    joinn = True
            if (not joinn):
                last = pesan.find_element_by_xpath(
                    ".//div[@class='im_message_text']").text
                emo = pesan.find_elements_by_xpath(
                    ".//span[@class='emoji  emoji-spritesheet-0']")
                for x in emo:
                    if (x.text.strip() != ''):
                        last = last.replace(x.text.strip(), ' ')
                pesan4 = last
                pesan2.insert(0, pesan4)
                if (len(
                        pesan.find_elements_by_xpath(
                            ".//span[@my-short-message='replyMessage']")) > 0):
                    balas = pesan.find_element_by_xpath(
                        ".//span[@my-short-message='replyMessage']").text
                    emo = pesan.find_elements_by_xpath(
                        ".//span[@class='emoji  emoji-spritesheet-0']")
                    for x in emo:
                        if (x.text.strip() != ''):
                            balas = balas.replace(x.text.strip(), ' ')
                    pesan2.insert(0, "Membalas : " + balas)
        pesan3 = "\n".join(pesan2)
        masuk = False
        stop = [",", ".", "#", "?", "*", "-"]
        cek = pesan3
        bahas = []
        for x in stop:
            cek = cek.replace(x, " ")
        for x in pesan3:
            b = x.isascii()
            if not b:
                pesan3 = pesan3.replace(x, ' ')
        print('Jam sebelum berubah : ', '|' + str(jam) + '|')
        if ('M' not in str(jam)):
            jam2 = 0
        elif (jam != 0):
            jam2 = formatJam(jam)
        else:
            jam2 = 0
        print('Temp sebelum berubah : ', temp)
        if (temp == '' and not first):
            temp = 0
        elif (isinstance(temp, str) and temp != ''):
            temp = formatJam(jam)
        elif (not isinstance(temp, int)):
            temp = 0
        print('banding = ', temp, '<', jam2)
        print(joinn)
        if ((temp < jam2 or first) and not joinn):
            if (first):
                temp = jam2
                temp2 = jam2
            elif (temp2 == ''):
                temp2 = jam2
            if any(word in cek.upper().split() for word in Saham):
                masuk = True
            if (penulis != ""):
                ptemp = penulis
            else:
                penulis = ptemp
            if (True):
                if (True):
                    bahas = [
                        word for word in Saham if word in cek.upper().split()
                    ]
                    for x in penulis:
                        c = x.isascii()
                        if not c:
                            penulis = penulis.replace(x, ' ')
                    bahas = ",".join(bahas)
                    print("Data : " + str(psn))
                    print("user : "******"Pesan : ", pesan3)
                    print("Saham : " + bahas)
                    predict = svm(pesan3)
                    print("Label : ", predict)
                    print("jam : " + jam)
                    print(
                        '======================================================='
                    )
                    ada = False
                    print('Balas =', balas)
                    print('Percakapan =', percakapan)
                    # counttt = 0
                    if (not percakapan):
                        print('Cakap1')
                        percakapan.append([pesan4])
                        perindex = 1
                        ada = True
                    elif (balas != ''):
                        for a in percakapan:
                            print('Sub percakapan =', a)
                            perindex2 = percakapan.index(a)
                            for b in a:
                                # if(counttt==30):
                                #     print('############Batas#####################')
                                #     time.sleep(99)
                                # counttt+=1
                                print('Sub a =', b + '||')
                                if (balas in b and balas != ''):
                                    print('Cakap2')
                                    percakapan[perindex2].append(pesan4)
                                    perindex = perindex2 + 1
                                    ada = True
                    if (not ada):
                        print('Cakap3')
                        percakapan.append([pesan4])
                        perindex = percakapan.index([pesan4]) + 1
                    exist = penulis in ratee.User.values
                    if (exist):
                        df1 = ratee[ratee['User'] == penulis]
                        hit = df1.iloc[0]['Hit']
                        miss = df1.iloc[0]['Miss']
                        rate = df1.iloc[0]['Rate']
                        # print(exist)
                    else:
                        hit = 'No Record'
                        miss = 'No Record'
                        rate = 'No Record'
                    item = {
                        'User': penulis,
                        'Pesan': pesan3,
                        'Saham': bahas,
                        'Label': predict,
                        'Jam': jam,
                        'Hit': str(hit),
                        'Miss': str(miss),
                        'Rate': str(rate),
                        'Percakapan': str(perindex)
                    }
                    itemss.append(item)
                    # if(not pesan3==''):
                    #     emit('my response', item)
                    tanggal2.append(item)
                penulis = ""
                pesan2 = []
            jam = ""
            print(
                '$$$$$$$$$$$$$$$$$$$$$$$$END OF FOR$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
            )
            if (first == True):
                Stoped = True
        else:
            Stoped = True
        if (Stoped):
            if (itemss):
                itemss.reverse()
                for it in itemss:
                    emit('my response', it)
            itemss = []
            # time.sleep(10)
            print('Temp sebelum masuk stop : ', temp)
            if (not first):
                if (temp2 != ''):
                    temp = temp2
            temp2 = ''
            # time.sleep(300)
            driver.execute_script("location.reload()")
            time.sleep(10)
            psn = 0
            while psn == 0:
                wrapper = driver.find_element_by_xpath(
                    '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]'
                )
                chat = wrapper.find_elements_by_xpath(
                    ".//div[contains(@class, 'im_history_message_wrap')]")
                psn = len(chat)
            Stoped = False
            temp3 = 0
            while temp3 != psn:
                temp3 = psn
                pesan = driver.find_element_by_xpath(
                    "/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]/div["
                    + str(psn) + "]")
                driver.execute_script("arguments[0].scrollIntoView(true);",
                                      pesan)
                print(
                    "************************************SCROLL***********************************************8"
                )
                time.sleep(2)
                wrapper = driver.find_element_by_xpath(
                    '/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[2]/div[1]/div/div[1]/div[2]/div[2]'
                )
                chat = wrapper.find_elements_by_xpath(
                    ".//div[contains(@class, 'im_history_message_wrap')]")
                psn = len(chat)
        print('Jam di akhir = ', jam)
        print('Temp di akhir = ', temp)
        first = False
def run(train_file, test_file):
    svm.svm(train_file, test_file)
def test10Fold():
    global allWords
    splits = tenFoldCrossValidation()

    count = 0
    total = 0
    print("Naive Bayes")
    for split in splits:
        nb = naiveBayes()
        trainFeatures = []
        trainClasses = []
        testFeatures = []
        testClasses = []
        for example in split.train:
            trainFeatures.append(example.features)
            trainClasses.append(example.klass)
        for example in split.test:
            testFeatures.append(example.features)
            testClasses.append(example.klass)

        nb.train(trainFeatures, trainClasses)
        nb.test(testFeatures, testClasses)
        accuracy = nb.getCorrectCount() / len(testClasses)
        total = total + accuracy
        print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy))
        count = count + 1

    print("[INFO]\tAccuracy:", str(total / 10))

    count = 0
    total = 0
    print("Random Forest")
    for split in splits:
        nb = RandomForest(100)
        trainFeatures = []
        trainClasses = []
        testFeatures = []
        testClasses = []
        for example in split.train:
            trainFeatures.append(example.features)
            trainClasses.append(example.klass)
        for example in split.test:
            testFeatures.append(example.features)
            testClasses.append(example.klass)

        nb.train(trainFeatures, trainClasses)
        nb.test(testFeatures, testClasses)
        accuracy = nb.getCorrectCount() / len(testClasses)
        total = total + accuracy
        print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy))
        count = count + 1

    print("[INFO]\tAccuracy:", str(total / 10))

    count = 0
    total = 0
    print("Neural 5")
    for split in splits:
        nb = neuralNetwork((5, ), 1000)
        trainFeatures = []
        trainClasses = []
        testFeatures = []
        testClasses = []
        for example in split.train:
            trainFeatures.append(example.features)
            trainClasses.append(example.klass)
        for example in split.test:
            testFeatures.append(example.features)
            testClasses.append(example.klass)

        nb.train(trainFeatures, trainClasses)
        nb.test(testFeatures, testClasses)
        accuracy = nb.getCorrectCount() / len(testClasses)
        total = total + accuracy
        print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy))
        count = count + 1

    print("[INFO]\tAccuracy:", str(total / 10))

    count = 0
    total = 0
    print("Neural 3")
    for split in splits:
        nb = neuralNetwork((3, ), 1000)
        trainFeatures = []
        trainClasses = []
        testFeatures = []
        testClasses = []
        for example in split.train:
            trainFeatures.append(example.features)
            trainClasses.append(example.klass)
        for example in split.test:
            testFeatures.append(example.features)
            testClasses.append(example.klass)

        nb.train(trainFeatures, trainClasses)
        nb.test(testFeatures, testClasses)
        accuracy = nb.getCorrectCount() / len(testClasses)
        total = total + accuracy
        print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy))
        count = count + 1

    print("[INFO]\tAccuracy:", str(total / 10))

    count = 0
    total = 0
    print("SVM")
    for split in splits:
        nb = svm()
        trainFeatures = []
        trainClasses = []
        testFeatures = []
        testClasses = []
        for example in split.train:
            trainFeatures.append(example.features)
            trainClasses.append(example.klass)
        for example in split.test:
            testFeatures.append(example.features)
            testClasses.append(example.klass)

        nb.train(trainFeatures, trainClasses)
        nb.test(testFeatures, testClasses)
        accuracy = nb.getCorrectCount() / len(testClasses)
        total = total + accuracy
        print("[INFO]\tFold ", str(count), " Accuracy:", str(accuracy))
        count = count + 1

    print("[INFO]\tAccuracy:", str(total / 10))
from sklearn.metrics import accuracy_score, f1_score
from svm import svm
from ae import ae
from DAE import dae
from idae import idae

noise = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
output = open('result', 'w')
output.write('models    ' + 'acc      ' + 'f1' + '\n')

for i in noise:
    y_test, y_pre = svm(i)
    print "svm"
    print accuracy_score(y_test, y_pre)
    print f1_score(y_test, y_pre, average=None)
    print f1_score(y_test, y_pre, average='macro')

    y_test, y_pre = ae(i)
    print "AE"
    print accuracy_score(y_test, y_pre)
    print f1_score(y_test, y_pre, average=None)
    print f1_score(y_test, y_pre, average='macro')

    y_test, y_pre = dae(i)
    print "DAE"
    print accuracy_score(y_test, y_pre)
    print f1_score(y_test, y_pre, average=None)
    print f1_score(y_test, y_pre, average='macro')

    y_test, y_pre = idae(i)
    print "IDAE"
Exemple #40
0
            '--acccoin',
            type=float,
            dest='acc_coin',
            default=0.15,
            help='Accuracy to determine if contour is a coin or not.')
        parser.add_argument(
            '-ar',
            '--accrect',
            type=float,
            dest='acc_rect',
            default=0.01,
            help='Accuracy to determine if contour is a rectangle or not.')
        arg = parser.parse_args()

        if arg.svm is True:  #check arguments
            svm(arg)
        elif arg.new is True:
            if arg.image is None:
                print('Path of image is not set')
            elif arg.refa is None:
                print('Reference A is not set')
            elif arg.refb is None:
                print('Reference B is not set')
            else:
                new(arg)
        elif arg.count is True:
            if arg.image is None:
                print('Path of image is not set')
            elif arg.refa is None:
                print('Reference A is not set')
            elif arg.refb is None:
Exemple #41
0
        nn.predict("dataset/001 - Dog bark/1-30226-A.ogg", le,
                   "trained_cnn.h5")

    elif sys.argv[1] == "mlp":

        #convert into numpy array
        X, y, le = get_numpy_array(features_df)

        # split into training and testing data
        X_train, X_test, y_train, y_test = get_train_test(X, y)
        num_labels = y.shape[1]

        # create model architecture
        model = nn.create_mlp(num_labels)

        # train model
        print("Training..")
        nn.train(model, X_train, X_test, y_train, y_test, "trained_mlp.h5")

        # compute test loss and accuracy
        test_loss, test_accuracy = nn.compute(X_test, y_test, "trained_mlp.h5")
        print("Test loss", test_loss)
        print("Test accuracy", test_accuracy)

        # predicting using trained model with any test file in dataset
        nn.predict("dataset/001 - Dog bark/1-30226-A.ogg", le,
                   "trained_mlp.h5")

    elif sys.argv[1] == "svm":
        svm.svm(features_df)
Exemple #42
0
X = []
Y = []

for k in xrange(N_train):
    if random.randn() < 0:
        X.append([-1 + random.randn() * 0.5, -1 + random.randn() * 0.5])
        Y.append([-1])
    else:
        X.append([1 + random.randn() * 0.5, 1 + random.randn() * 0.5])
        Y.append([1])
N_test = 100
X_test = []
Y_test = []

for k in xrange(N_test):
    if random.randn() < 0:
        X_test.append([-1 + random.randn() * 0.5, -1 + random.randn() * 0.5])
        Y_test.append([-1])
    else:
        X_test.append([1 + random.randn() * 0.5, 1 * random.randn() * 0.5])
        Y_test.append([1])

s = svm(2 + 1, weighted=False)

s.train(X, Y, 0.1)

Y_pre = s.predict(X_test)

#for k in xrange(len(Y_pre)):
#	print Y_test[k][0]-Y_pre[k][0]