Exemple #1
0
def classification(model, get_features = True, train_bow = True):

    root_path = '/home/mitya/Documents/CMake/mlschool/mlschool_01/'
    csv_train = root_path + 'final_train.csv'
    csv_test = root_path + 'final_test.csv'
    features_folder = './arrays/'
    
    csv_reader = pd.read_csv(csv_train, sep = ',')

    vocabulary = bow.bow('bag_of_words', 'kmeans', 10)

    if get_features == True:
        vocabulary.transform_data(csv_train, root_path + 'train_dataset/', features_folder)
        vocabulary.transform_data(csv_test, root_path + 'test_dataset/', features_folder)

    if train_bow == True:
        vocabulary.fit(csv_train, features_folder, 100)

    X = vocabulary.transform(csv_train, features_folder, 20)
    y = csv_reader['image_label'].values[:X.shape[0]].ravel()

    preds = model.fit(X, y).predict_proba(vocabulary.transform(csv_test, features_folder, 10))[:, 1]
    #csv_reader = pd.read_csv(csv_test, sep = ',').drop('image_url', 1)
    #csv_reader['image_label'] = preds
    #csv_reader.to_csv(root_path + '/res.csv', index = False)

    return preds
def getBowNeighbors(img, ka=1000):
    path = 'train/data/'
    files = os.listdir(path)
    dic = {}

    for i in range(ka):
        if (i + 1) % 100 == 0:
            print('Done {0:d}/{1:d}'.format(i + 1, ka))
        x = np.random.randint(len(files) - 2)
        I = cv.imread(path + files[x])
        e = bow(img, I)
        dic[files[x]] = e

    files = sorted(dic, key=dic.__getitem__)
    return files
Exemple #3
0
def extra(text,n,op):
    parser = PlaintextParser(text, Tokenizer(language))

    if op == 1:
        return(bow(text,n))
    elif op == 2:
        return(lexs(parser,n))
    elif op == 3:
        return(luhn(parser,n))
    elif op == 4:
        return(lsa(parser,n))
    elif op == 5:
        return(textrank(parser,n))
    elif op == 6:
        return(sumbasic(parser,n))
    elif op == 7:
        return(klsum(parser,n))
    elif op == 8:
        return(reduction(parser,n))
    elif op ==9:
        return(tfidf(text,n))
Exemple #4
0
def bow_proxy(tu):
    return bow.bow(*tu)
Exemple #5
0



def bow_proxy(tu):
    return bow.bow(*tu)

if __name__ == "__main__":
    cars_dir = "PNGImages/cars"
    cows_dir = "PNGImages/cows"
    bikes_dir = "PNGImages/bikes"
    # Generate Descriptors
    p = Pool(3)
    results = p.map_async(gen_dir, [cars_dir, cows_dir, bikes_dir], 1)
    results.get()
    # # Find means
    files = descriptor.select_sample(cars_dir, cows_dir, bikes_dir)
    centers = bow.kmeans(files)
    wrapped_centers = [CentroidWrapper(c.tolist(), i) for i, c in enumerate(centers)]
    # Construct kd-tree
    tree = kdtree.create(point_list=wrapped_centers, dimensions=128)
    if not tree.is_balanced:
        tree = tree.rebalance()
    p.close()
    p.join()
    # Calculate bow for every image
    bow.bow(cars_dir, tree)
    bow.bow(cows_dir, tree)
    bow.bow(bikes_dir, tree)
 
    
Exemple #6
0
import euclidean

path = 'F:/Kuliah/STKI/3-11-2017 kuis/text files/'

articles = {}

for item in os.listdir(path):
    if item.endswith(".txt"):
        with open(path + "/" + item, 'r',encoding='utf-8') as file:
            articles[item] = preprocessing.preprotext(file.read())

#representasi bow
list_of_bow = []

for key, value in articles.items():
    list_of_bow.append(bow.bow(value.split()))

#membuat matrix
matrix_akhir = matrix.matrix(list_of_bow)

dokumen= ['bk.txt', 'ed.txt', 'ot.txt', 'en.txt', 'lf.txt', 'bl.txt', 'tk.txt']
dokumens = {}

#----------------------------------------------------------------------

for item in os.listdir(path):
    for item in dokumen:
        with open(path + "/" + item, 'r', encoding ='utf-8') as file:
            dokumens[item] = preprocessing.preprotext(file.read())

#representasi bow
Exemple #7
0
import longsword, greatsword, bow, axe, mace

Quickshot = bow.bow('10', '25', '50')
Quickshot.print_stats()

Bludgeoner = mace.mace('10', '10', '5')
Bludgeoner.print_stats()
Exemple #8
0
def models(X_tr_n, y_tr, X_te_n, classifier):
    if(classifier == "c_svm"):
        ###################### C SVM - Accuracy - 0.44503 #############################
        model = SVC()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)
        # save_out(y_te_p,labels_string,sorted_files_te,'submission/testLabels_CSVM.csv')

    elif(classifier == "c_svm_l1"):
        ###################### C SVM L1 - Accuracy - 0.44503 #############################
        model = LinearSVC(penalty='l1',dual=False)
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "log_reg"):
        ###################### Logistic regression #############################
        model = linear_model.LogisticRegression()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "c_svm_param"):
        ###################### C SVM Param - Accuracy - 0.50164 #############################
        model = grid_search(X_tr_n,y_tr)
        print "Best params = "
        print model.best_params_

        # model = SVC(C=10,kernel='rbf',gamma=0.001)
        # model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "knn"):
        ###################### KNN - Accuracy -  #############################
        model = KNeighborsClassifier(n_neighbors=20)
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "naive_bayes"):
        ###################### Naive Bayes - Accuracy -  #############################
        model = GaussianNB()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "ols"):
        ###################### OLS - Accuracy -  #############################
        model = linear_model.LinearRegression()
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "ridge_reg"):
        ###################### Ridge Regression - Accuracy -  #############################
        model = linear_model.Ridge(alpha=0.001)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "lasso"):
        ###################### Lasso - Accuracy -  #############################
        model = linear_model.Lasso(alpha=.15,max_iter=-1)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "adaboost"):
        ###################### AdaBoost ###########################################
        # model = AdaBoostClassifier(RandomForestClassifier(max_features=50, n_estimators=10, max_depth=20),
        #                            n_estimators=100,learning_rate=2)
        model = AdaBoostClassifier(linear_model.SGDClassifier(n_iter=50),n_estimators=100,learning_rate=1, algorithm="SAMME")
        # model = AdaBoostClassifier(n_estimators=100,learning_rate=2)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    # elif(classifier == "voting"):
        # clf1 = DecisionTreeClassifier(max_depth=4)
        # clf2 = KNeighborsClassifier(n_neighbors=7)
        # clf3 = SVC(kernel='rbf', probability=True)
        # model = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2,1,2])
        # model.fit(X_tr_n,y_tr)
        # y_tr_p = model.predict(X_tr_n)
        # y_te_p = model.predict(X_te_n)

    elif(classifier == "random_forest"):
        ###################### Random Forest ###########################################
        # model =  RandomForestClassifier(n_estimators=100,n_jobs=4)

        # Grid search
        clf =  RandomForestClassifier(n_jobs=3)
        param_grid = {"max_depth": [10, 20, 30],
                      "max_features": [50, 100, 200],
                      "n_estimators": [10,50,100]}

        # run grid search
        model = GridSearchCV(clf, param_grid=param_grid)
        model.fit(X_tr_n,y_tr)

        print model.best_params_

        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "nn"):
        ############################### NN ###################################
        # tensorFlowNN(X_tr,y_tr,X_te,y_te)
        y_tr_p, y_te_p = keras_CNN(X_tr, y_tr, X_te)

    elif(classifier == "bow"):
        ############################### BOW ###################################
        X_tr_full_res, s = read_X_full_res('data/train')
        X_te_full_res, s = read_X_full_res('data/test')

        bow_obj = bow(kmeans_K = 100)
        X_bow_tr = bow_obj.fit_predict(X_tr_full_res)
        X_bow_te = bow_obj.predict(X_te_full_res)

        model = SVC()
        model.fit(X_bow_tr, y_tr)
        y_tr_p = model.predict(X_bow_tr)
        y_te_p = model.predict(X_bow_te)

    else:
        print "No Classifier selected"
        return False


    print_accuracy(y_tr, y_tr_p, "Training")

    return y_te_p
#A = [0, 0, 5, 3, 5, 2, 0, 1, 0, 0, 0]
#B = [0, 2, 1, 0, 1, 0, 3, 0, 1, 0, 0]
#
#print (euclidean(A,B))

articles = {}
for item in os.listdir(path):
    if item.endswith(".txt"):
        with open(path + "/" + item, 'r', encoding="utf-8") as file:
            articles[item] = preprocessing.preprotext(file.read())

#representasi bow
list_of_bow = []

for key, value in articles.items():
    list_of_bow.append(bow.bow(value.split()))

#membuat matrix
matrix_akhir = matrix.matrix(list_of_bow)

#print (matrix_akhir)
#print (euclidean(matrix_akhir[0], matrix_akhir[3]))

#jarak = {}
#for key, vektor in zip(articles.keys(), matrix_akhir):
#    jarak[key] = euclidean.euclidean(matrix_akhir[0], vektor)
#
#print (jarak)

#jarak = []
#