def recognize_symbol(features, img, fsize): # Get the feature feat = extract_feature(img, fsize) retval = None min = 1 # For every feature for val, f in features: # Compare the feature to our own d = compare_feature(f, feat) if d < min: # We got a match! retval = val min = d return retval
import time import pandas as pd # ------------------------------------------------------------------ # -------------- Import Images and Create DataFrame ---------------- # ------------------------------------------------------------------ name = "gfrp" # Import original image # ------------------------------------------------------------------ img = cv2.imread(name +'.tif') img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Feature Extraction # ------------------------------------------------------------------ df=extract_feature(img) # Import labeled image and add to dataframe # ------------------------------------------------------------------ labeled = cv2.imread('labeled_'+ name +'.tif') labeled = cv2.cvtColor(labeled, cv2.COLOR_BGR2GRAY) labeled_array = labeled.reshape(-1) df['Label']= labeled_array # ------------------------------------------------------------------ # ---------------------- Training and Validation ------------------- # ------------------------------------------------------------------ # Dependent variables # ------------------------------------------------------------------ Y= df['Label'].values
except ValueError: print("Enter 0,1,2 to specify testing document") exit() except IndexError: TEST = 2 #def get_ from sklearn.tree import DecisionTreeClassifier train = corpus.load_corpus(all=True) statistic = analytics.load_analytics(train) heighest_probabilty = {} for i in statistic: heighest_probabilty[i] = max(statistic[i].items(),key=lambda x:x[1])[0] X_train_raw, Y_train_raw = extract_feature(data=train) #Global label_encoder to encode X values global_label_encoder,global_hot_encoder = set_encoder(Y_train_raw) print("Training Global Classifer ....") X_train,Y_train = encode_features(X_train_raw,Y_train_raw,global_label_encoder,global_hot_encoder) global_clf = DecisionTreeClassifier() global_clf.fit(X_train,Y_train) print("Completed") # print(train) # Identify the ambiguity classes amb_class = {} for i in train: for x,y in enumerate(i): #If the word only has one tagging, we don't need a classifier
from features import extract_feature, set_encoder,encode_features from corpus import load_corpus from sklearn.tree import DecisionTreeClassifier X_train_raw, Y_train_raw = extract_feature(data=load_corpus()) label_encoder,hot_encoder = set_encoder(Y_train_raw) X_train,Y_train = encode_features(X_train_raw,Y_train_raw,label_encoder,hot_encoder) clf = DecisionTreeClassifier() clf.fit(X_train,Y_train) X_test_raw,Y_test_raw = extract_feature(load_corpus(last=True)) X_test,Y_test = encode_features(X_test_raw,Y_test_raw,label_encoder,hot_encoder) print(clf.score(X_test,Y_test))
''' if ((feature_no<14) & (preprocess=='preclean_Tweet')): continue ''' train_save_path = '..//features//train_feature_' + str( feature_no) + "_" + preprocess + ".pkl" test_save_path = '..//features//test_feature_' + str( feature_no) + "_" + preprocess + ".pkl" if (os.path.isfile(train_save_path) & os.path.isfile(test_save_path)): print('read feature') X = pickle.load(open(train_save_path, 'rb')) X_test = pickle.load(open(test_save_path, 'rb')) else: print('extract feature') X, X_test = extract_feature(preprocess, preprocessed_train_tweets, preprocessed_test_tweets, feature_no, train_save_path, test_save_path) print(X.shape) print(X_test.shape) ''' X1, X_test1=extract_feature(preprocessed_train_tweets,preprocessed_test_tweets,0,'','') X2, X_test2=extract_feature(preprocessed_train_tweets,preprocessed_test_tweets,2,'','') X=np.concatenate((X1.toarray(), X2), axis=1) X_test=np.concatenate((X_test1.toarray(),X_test2),axis=1) ''' # Model Training # Single Stage # Combine if ( not os.path.isfile(result_path + 'svm_OneStage_combine_' + str(feature_no) + "_" + preprocess + ".txt")