def encode(df, dump=fromPickle): """ Takes in: dataframe from clean_col Returns: a dataframe that LabelEncodes the categorical variables """ encoders=dict() for col in lblColumns: if col not in final_cols: continue le = LabelEncoder() if dump: fName="%s/%s.npy"%(modelPath,col) if os.path.isfile(fName): le.classes_=np.load(fName) else: le.fit(df[col]) np.save(fName, le.classes_) else: le.fit(df[col]) encoders[col]=le df[col] = le.transform(df[col]) # Order columns with logprice as the last column df = df[final_cols] df = df.reset_index().drop('index', axis = 1) return df
def data(fold=False): fname = df.zoo.download('http://dags.stanford.edu/data/iccv09Data.tar.gz') # extracting files one-by-one in memory is unfortunately WAY too slow # for this dataset. So we bite the bullet and extract the full tgz. where = _p.dirname(fname) imgdir = 'iccv09Data/images/' with _taropen(fname, 'r') as f: f.extractall(where) ids = [_p.basename(n)[:-4] for n in f.getnames() if n.startswith(imgdir)] X = [imread(_p.join(where, imgdir, i) + '.jpg') for i in ids] y = [_np.loadtxt(_p.join(where, 'iccv09Data/labels', i) + '.regions.txt', dtype=_np.int32) for i in ids] # I personally don't believe in the other label types. le = _np.array(['sky', 'tree', 'road', 'grass', 'water', 'building', 'mountain', 'foreground', 'object']) try: from sklearn.preprocessing import LabelEncoder le, classes = LabelEncoder(), le le.classes_ = classes except ImportError: pass if fold is False: return X, y, le lo, hi = fold*ntest(), (fold+1)*ntest() Xtr = X[:lo] + X[hi:] ytr = y[:lo] + y[hi:] Xte = X[lo:hi] yte = y[lo:hi] return (Xtr, ytr), (Xte, yte), le
def restore(self,model_path): ''' Restore a saved multiencoder from path using npz file, by reconstructing the LabelEncoders with the classes. Restore the X header too. ''' path = model_path + '/encoder.npz' h_path = model_path + '/header.npz' npzfile = np.load(path) h_npzfile = np.load(h_path) self.header = h_npzfile['header'] self.encoders = {} for k,v in npzfile.items(): le = LabelEncoder() le.classes_ = v self.encoders[k] = le self.columns = list(self.encoders.keys()) return self
def learn_sentdist(clean_pcc, feature_list=None, label_features=None): """ Learning a classifier for the distance of arguments from a connective Runs a random forest. Prints out accuracy scores from a 5-fold cross validation. Returns the classifier and the label encoder that was used. :param clean_pcc: Cleaned PCC data, no NaNs :type clean_pcc: pd.DataFrame :param feature_list: list of features that shall be calculated with discourse_connective_text_featurizer :param label_features: list of features that have to be encoded as labels :return: trained classifier, score array and label encoder :rtype: tuple """ print 'Calculating features...' # Taking our favorite featurizer featurizer = lambda sents, conn_pos: discourse_connective_text_featurizer(sents, conn_pos, feature_list=feature_list) features = sentdist_feature_dataframe(clean_pcc, featurizer) # Got features of X print 'Calculated all features' # We need to encode the non-numerical labels le = LabelEncoder() # LabelEncoder only deals with 1 dim np.arrays le.fit(features[label_features].values.ravel()) # Dealing with unknowns le.classes_ = np.append(le.classes_, '<unknown>') features = encode_label_features(features, le, label_features) print 'Cross validating classifier...' clf = RandomForestClassifier(min_samples_leaf=5, n_jobs=-1, verbose=0) scores = cross_val_score(clf, features, clean_pcc['sentence_dist'], cv=5) print 'Cross validated classifier\nscores: %s\nmean score: %f' % (str(scores), scores.mean()) print 'Learning classifier on the whole data set...' clf.fit(features, clean_pcc['sentence_dist']) print 'Learned classifier on the whole data set' return clf, scores, le
def create_kaggle_submission(prob, ids_raw, score=None, threshold=0.0): """ Given a model, load training data and predict on it. Includes id col. Note that this assumes only one prediction per user. (for now) """ ids = [] #list of ids cts = [] #list of countries # threshold = 0.0 le = LabelEncoder() le.classes_ = COUNTRY_CLASSES for i in xrange(len(list(ids_raw))): idx = ids_raw[i] if pd.isnull(idx): py.test.set_trace() valid = sorted([(j,k) for j,k in enumerate(prob[i]) if k >= threshold],key=lambda x: x[1], reverse=True) valid_ids, valid_prob = zip(*valid) sub_countries = list([le.inverse_transform(x) for x in valid_ids])[:5] ids += [idx] * len(sub_countries) cts += sub_countries # spot check that it's not all 7's irene = [np.argmax(i) for i in prob] print pd.Series(irene).value_counts() #Generate submission sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country']) # datetime submission date_str = datetime.datetime.now().strftime('%y%m%d_%H%M') if score: sub.to_csv('submission_%.4f.csv' % score,index=False) else: sub.to_csv('submission_%s.csv' % date_str ,index=False) return sub
if __name__ == '__main__': # TODO: ROS node initialization rospy.init_node('clustering', anonymous=True) # TODO: Create Subscribers pcl_sub = rospy.Subscriber("sensor_stick/point_cloud", pc2.PointCloud2, pcl_callback, queue_size=1) # TODO: Create Publishers pcl_objects_pub = rospy.Publisher("/pcl_objects", PointCloud2, queue_size=1) pcl_table_pub = rospy.Publisher("/pcl_table", PointCloud2, queue_size=1) pcl_cluster_pub = rospy.Publisher("/pcl_cluster", PointCloud2, queue_size=1) pcl_detected_obj_pub = rospy.Publisher("/pcl_detected_obj", PointCloud2, queue_size=1) object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1) # TODO: Load Model From disk model = pickle.load(open('model.sav', 'rb')) clf = model['classifier'] encoder = LabelEncoder() encoder.classes_ = model['classes'] scaler = model['scaler'] # Initialize color_list get_color_list.color_list = [] # TODO: Spin while node is not shutdown while not rospy.is_shutdown(): rospy.spin()
def analisarPendente(): print(request.data) K.clear_session() data = json.loads(request.data) model = load_model('SmartGardenModelBinary.h5') dfTest = json_normalize(data) le_insumo = LabelEncoder() le_mes = OrdinalEncoder(categories=[[ 'jan', 'fev', 'mar', 'abr', 'mai', 'jun', 'jul', 'ago', 'set', 'out', 'nov', 'dez' ]]) le_aparencia = OrdinalEncoder( categories=[['murcha', 'amarelada', 'normal']]) le_temp = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']]) le_umi = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']]) le_risco = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']]) le_insumo.classes_ = np.load('./pickle/insumo.npy', allow_pickle=True) le_mes.categories_ = np.load('./pickle/mes.npy', allow_pickle=True) le_aparencia.categories_ = np.load('./pickle/aparencia.npy', allow_pickle=True) le_temp.categories_ = np.load('./pickle/temp.npy', allow_pickle=True) le_umi.categories_ = np.load('./pickle/umi.npy', allow_pickle=True) le_risco.categories_ = np.load('./pickle/risco.npy', allow_pickle=True) insumo = le_insumo.transform(dfTest['insumo']) mes = le_mes.transform(dfTest[['mes']]) aparencia = le_aparencia.transform(dfTest[['aparencia']]) dfTest['insumo'] = insumo dfTest['mes'] = mes dfTest['aparencia'] = aparencia predictions = model.predict(dfTest.values) predictions[predictions >= 0.5] = 1 predictions[predictions < 0.5] = 0 nn_preds = pd.DataFrame(predictions, columns=[0, 1, 2, 0, 1, 2, 0, 1, 2]) nn_preds['encGeral'] = (nn_preds.iloc[:, 0:3] == 1).idxmax(1) nn_preds['encUmi'] = (nn_preds.iloc[:, 3:6] == 1).idxmax(1) nn_preds['encTemp'] = (nn_preds.iloc[:, 6:9] == 1).idxmax(1) nn_preds['stRisco'] = le_risco.inverse_transform(nn_preds[['encGeral']]) nn_preds['stUmi'] = le_umi.inverse_transform(nn_preds[['encUmi']]) nn_preds['stTemp'] = le_temp.inverse_transform(nn_preds[['encTemp']]) nn_preds = nn_preds[['stRisco', 'stUmi', 'stTemp']] nn_preds = nn_preds.to_json() nn_preds = json.loads(nn_preds) nn_preds['stRisco'] = nn_preds['stRisco']['0'] nn_preds['stTemp'] = nn_preds['stTemp']['0'] nn_preds['stUmi'] = nn_preds['stUmi']['0'] return json.dumps(nn_preds)
batch_size=32, epochs=10, verbose=2) # save the trained model model.save("mathsymbols.model") # save label encoder (to reverse one-hot encoding) np.save('classes.npy', label_encoder.classes_) # In[14] (load the pre-trained model and predict the math symbol for an arbitrary image) model2 = keras.models.load_model("mathsymbols.model") print(model2.summary()) # restore the class name to integer encoder label_encoder2 = LabelEncoder() label_encoder2.classes_ = np.load('classes.npy') def predict(img_path): newimg = keras.preprocessing.image.img_to_array(pil_image.open(img_path)) newimg /= 255.0 # do the prediction prediction = model2.predict(newimg.reshape(1, 32, 32, 3)) # figure out which output neuron had the highest score, and reverse the one-hot encoding inverted = label_encoder2.inverse_transform( [np.argmax(prediction)]) # argmax finds highest-scoring output print("Prediction: %s, confidence: %.2f" % (inverted[0], np.max(prediction)))
#CQT Parameters fminval = 36 fmin = librosa.midi_to_hz(fminval) n_bins = 72 #Window Params cf = "classes_windowed.npy" lf = "labels_windowed.npy" log_name_cqt = "cqt_win_knn_{0}_{1}_{2}".format(hop_length, fminval, n_bins) files_path = "files_win" pp.create_labels(audio_path=audio_path, output_classes=cf, output_labels=lf) labels = np.load(resource_path + "\\labels\\" + lf) labelencoder = LabelEncoder() labelencoder.classes_ = np.load(resource_path + "\\labels\\" + cf) classes = labelencoder.transform(labels) print(labels.shape) print(classes.shape) #CREATE VECTORS feature_vectors, files = pp.get_cqt_folder(path=audio_path) pp.save_cqt_sk(feature_vectors, log_name_cqt + ".pl") np.save(resource_path + "\\files\\" + files_path + ".npy", files) scaled_feature_vectors = pickle.load( open(resource_path + "\\feature_vectors\\CQT_SK\\" + log_name_cqt + ".pl", "rb")) #for cqt omit otherwise scaled_feature_vectors = scaled_feature_vectors.reshape( len(scaled_feature_vectors), n_bins)
def load_model(): # Load model architecture, weight and labels for character recognition json_file = open('model/ResNets_character_recognition_spyder_new.json') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("License_character_recognition_spyder_new.h5") print("[INFO] Model loaded successfully...") return model labels = LabelEncoder() labels.classes_ = np.load('model/license_character_classes_Spyder.npy') model = load_model() def display_img(img_path): img = IPythonImage(filename=img_path) st.image(Image.open(img)) def sort_contours(cnts, reverse=False): i = 0 boundingBoxes = [cv2.boundingRect(c) for c in cnts] (cnts, boundingBoxes) = zip(*sorted( zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse)) return cnts
if (len(tmp) > 0): extra_encoding_col.append(col) for el in tmp: lst_extra_encodings.append(el) # Label Encoding from sklearn.preprocessing import LabelEncoder label_encoder = LabelEncoder() i = 0 for col in categorical_cols: label_encoder.fit(train[col]) i += 1 if (col == 'EngineVersion'): label_encoder.classes_ = np.append( label_encoder.classes_, '1.1.11602.0') # count = 1 in test set label_encoder.classes_ = np.append( label_encoder.classes_, '1.1.12002.0') # count = 1 in test set if (col == 'AppVersion'): label_encoder.classes_ = np.append( label_encoder.classes_, '4.18.1806.20015') # count = 1 in test set label_encoder.classes_ = np.append( label_encoder.classes_, '4.11.15063.1154') # count = 1 in test set label_encoder.classes_ = np.append( label_encoder.classes_, '4.12.17007.18021') # count = 1 in test set label_encoder.classes_ = np.append( label_encoder.classes_, '4.13.17627.1000') # count = 1 in test set label_encoder.classes_ = np.append( label_encoder.classes_, '4.9.10586.1177') # count = 1 in test set if (col == 'OsVer'):
cosine = F.linear(x_norm, w_norm, None) out = cosine * self.scale return out resume = sys.argv[1] encoder = sys.argv[2] x_test = sys.argv[3] train_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/train_bb_fastai2/" test_df = "/home/blcv/CODE/Kaggle/humpback_whale_identification/data/processed/sample_submission.csv" test_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/test_bb_fastai2/" option_da = ['gray']# [] # label_encoder = LabelEncoder() label_encoder.classes_ = np.load(encoder) # encode whale as integers X_test = pd.read_csv(x_test) val_loader = getDataLoader(X_test, train_folder, 'val', option_da = option_da, image_size = 224, batch_size = 64) # model preparation device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_name = 'se_resnext101_32x4d' model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet') model.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) model.last_linear = nn.Sequential(*[nn.LayerNorm(model.last_linear.in_features, elementwise_affine = False), NormLinear(model.last_linear.in_features, 5004)]) model = model.to(device) model = nn.DataParallel(model)
def giveLE(self): encoder = LabelEncoder() encoder.classes_ = np.load('classes.npy') return encoder
def loadLabelEncoder(file): encoder = LabelEncoder() encoder.classes_ = np.load(file)
def get_label_encoder(): """This returns the label encoder which contains label to integer mapping""" encoder = LabelEncoder() encoder.classes_ = np.load('label_encoder.npy') return encoder
def process_data(data_type='train', write_to_csv=False, return_df=True, include_sessions=False): train = pd.read_csv(TRAINING_DATA, header=0) test = pd.read_csv(TEST_DATA, header=0) train_countries = train['country_destination'] train_ids = train['id'] test_ids = test['id'] train.drop(['id', 'country_destination'], axis=1, inplace=True) test.drop(['id'], axis=1, inplace=True) piv_train = train.shape[0] data = pd.concat((train, test), axis=0, ignore_index=True) # features to output into model training data nonnumeric_columns = [ 'gender', 'signup_method', 'signup_flow', 'language', 'affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser', ] # add_null_cols(data) add_date_cols(data) parse_age(data) data = add_categorical_cols(data, nonnumeric_columns, data_type) data = fill_in_na(data) vals = data.values X = vals[:piv_train] le = LabelEncoder() le.classes_ = COUNTRY_CLASSES y = le.fit_transform(train_countries) train_df = pd.DataFrame(X, columns=data.columns) train_df['id'] = train_ids train_df['country_destination'] = y X_kaggle = vals[piv_train:] test_df = pd.DataFrame(X_kaggle, columns=data.columns) test_df['id'] = test_ids if include_sessions: sessions_df = pd.read_csv('sessions_users.csv') train_df = train_df.merge(sessions_df, how='left', left_index='id', right_index='user_id') train_df.fillna(0, inplace=True) test_df = test_df.merge(sessions_df, how='left', left_index='id', right_index='user_id') test_df.fillna(0, inplace=True) if write_to_csv: if include_sessions: test_df.to_csv('test_sessions.csv', index=False) train_df.to_csv('train_sessions.csv', index=False) print 'Wrote train_sessions.csv, test_sessions.csv' else: test_df.to_csv('test.csv', index=False) train_df.to_csv('train.csv', index=False) print 'Wrote train.csv, test.csv' if return_df: return test_df, train_df
from Preprocess import extract_face, get_embedding from tensorflow.python.keras.models import load_model from sklearn.preprocessing import Normalizer, LabelEncoder import argparse import pickle import numpy as np in_encoder = Normalizer() out_encoder = LabelEncoder() out_encoder.classes_ = np.load('classes.npy') facenet_model = load_model('facenet_keras.h5') with open('SVCtrainedModel.pkl', 'rb') as f: model = pickle.load(f) ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="Test Image Path") # ap.add_argument("-n", "--name", required=True, # help="Name of the person (same as the class name)") args = vars(ap.parse_args()) random_face = extract_face(args['image']) random_face_emd = in_encoder.transform( [get_embedding(facenet_model, random_face)])[0] # random_face_name = args['name'] samples = np.expand_dims(random_face_emd, axis=0) yhat_class = model.predict(samples) yhat_prob = model.predict_proba(samples) class_index = yhat_class[0]
def mark_your_attendance_out(request): detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor( 'face_recognition_data/shape_predictor_68_face_landmarks.dat' ) #Add path to the shape predictor ######CHANGE TO RELATIVE PATH LATER svc_save_path = "face_recognition_data/svc.sav" with open(svc_save_path, 'rb') as f: svc = pickle.load(f) fa = FaceAligner(predictor, desiredFaceWidth=96) encoder = LabelEncoder() encoder.classes_ = np.load('face_recognition_data/classes.npy') faces_encodings = np.zeros((1, 128)) no_of_faces = len(svc.predict_proba(faces_encodings)[0]) count = dict() present = dict() log_time = dict() start = dict() for i in range(no_of_faces): count[encoder.inverse_transform([i])[0]] = 0 present[encoder.inverse_transform([i])[0]] = False vs = VideoStream(src=0).start() sampleNum = 0 while (True): frame = vs.read() frame = imutils.resize(frame, width=800) gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = detector(gray_frame, 0) for face in faces: print("INFO : inside for loop") (x, y, w, h) = face_utils.rect_to_bb(face) face_aligned = fa.align(frame, gray_frame, face) cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 1) (pred, prob) = predict(face_aligned, svc) if (pred != [-1]): person_name = encoder.inverse_transform(np.ravel([pred]))[0] pred = person_name if count[pred] == 0: start[pred] = time.time() count[pred] = count.get(pred, 0) + 1 if count[pred] == 4 and (time.time() - start[pred]) > 1.5: count[pred] = 0 else: #if count[pred] == 4 and (time.time()-start) <= 1.5: present[pred] = True log_time[pred] = datetime.datetime.now() count[pred] = count.get(pred, 0) + 1 print(pred, present[pred], count[pred]) cv2.putText(frame, str(person_name) + str(prob), (x + 6, y + h - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) else: person_name = "unknown" cv2.putText(frame, str(person_name), (x + 6, y + h - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) #cv2.putText() # Before continuing to the next loop, I want to give it a little pause # waitKey of 100 millisecond #cv2.waitKey(50) #Showing the image in another window #Creates a window with window name "Face" and with the image img cv2.imshow("Mark Attendance- Out - Press q to exit", frame) #Before closing it we need to give a wait command, otherwise the open cv wont work # @params with the millisecond of delay 1 #cv2.waitKey(1) #To get out of the loop key = cv2.waitKey(50) & 0xFF if (key == ord("q")): break #Stoping the videostream vs.stop() # destroying all the windows cv2.destroyAllWindows() update_attendance_in_db_out(present) # return redirect('admin') return render(request, 'recognition/admin_dashboard.html')
''' # convert Tag1 from strings to integers from sklearn.preprocessing import LabelEncoder le = LabelEncoder() train['Tag1_enc'] = le.fit_transform(train.Tag1) # create a dummy column for each value of Tag1_enc (returns a sparse matrix) from sklearn.preprocessing import OneHotEncoder ohe = OneHotEncoder() tag1_dummies = ohe.fit_transform(train[['Tag1_enc']]) # adjust Tag1 on testing set since LabelEncoder errors on new values during a transform test['Tag1'] = test['Tag1'].map(lambda s: '<unknown>' if s not in le.classes_ else s) le.classes_ = np.append(le.classes_, '<unknown>') # define X and y X = tag1_dummies y = train.OpenStatus # apply the same encoding to the actual testing data and make predictions test['Tag1_enc'] = le.transform(test.Tag1) oos_tag1_dummies = ohe.transform(test[['Tag1_enc']]) nb.fit(X, y) oos_pred_prob = nb.predict_proba(oos_tag1_dummies)[:, 1] sub = pd.DataFrame({ 'id': test.index, 'OpenStatus': oos_pred_prob }).set_index('id') sub.to_csv('sub4.csv') # 0.652
if __name__ == '__main__': # TODO: ROS node initialization rospy.init_node('clustering', anonymous=True) # TODO: Create Subscribers pcl_sub = rospy.Subscriber("/sensor_stick/point_cloud", pc2.PointCloud2, pcl_callback, queue_size=1) # TODO: Create Publishers pcl_objects_pub = rospy.Publisher("/pcl_objects", pc2.PointCloud2, queue_size=1) pcl_table_pub = rospy.Publisher("/pcl_table", pc2.PointCloud2, queue_size=1) pcl_cluster_pub = rospy.Publisher("/pcl_cluster", pc2.PointCloud2, queue_size=1) object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1) detected_objects_pub = rospy.Publisher("/detected_objects", DetectedObjectsArray, queue_size=1) # TODO: Load Model From disk model = pickle.load(open('model.sav', 'rb')) clf = model['classifier'] encoder = LabelEncoder() encoder.classes_ = model['classes'] scaler = model['scaler'] # Initialize color_list get_color_list.color_list = [] # TODO: Spin while node is not shutdown while not rospy.is_shutdown(): rospy.spin()
import cv2 as cv import numpy as np from sklearn.preprocessing import LabelEncoder import warnings warnings.filterwarnings("ignore") face_cascade = cv.CascadeClassifier( '../cascades/haarcascade_frontalface_alt2.xml') recognizer = cv.face.LBPHFaceRecognizer_create() recognizer.read('../Saved_models/model.yml') le = LabelEncoder() le.classes_ = np.load('../Saved_models/encoder.npy') cap = cv.VideoCapture(0) while True: #...Capture frame by frame ret, frame = cap.read() gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) #...Detecting faces and saving the final captured frame cropping face faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5) for x, y, w, h in faces: #print (x,y,w,h) roi_gray = gray[y:y + h, x:x + w] roi_color = frame[y:y + h, x:x + w] #img_item = 'face_detected.jpg'
from flask import Flask, render_template, url_for from flask_wtf import FlaskForm from wtforms import FileField from flask_uploads import configure_uploads, AUDIO, UploadSet import os import numpy as np import librosa from tensorflow import keras import tensorflow as tf from sklearn.preprocessing import LabelEncoder # Load model and label encoder model = keras.models.load_model("./saved_models/weights.best.basic_cnn.hdf5") le = LabelEncoder() le.classes_ = np.load("./saved_models/classes.npy") # data dims num_rows = 40 num_columns = 174 num_channels = 1 # CODE FOR FLASK APP app = Flask(__name__, static_folder=os.path.join(os.getcwd(), "static")) app.config['SECRET_KEY'] = "clishmaclaver" app.config['UPLOADED_AUDIOSET_DEST'] = "static" audioset = UploadSet("audioset", AUDIO) configure_uploads(app, audioset)
def plate_reader(plate_pic): # remove warning message # remove warning message import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # required library import cv2 import numpy as np from local_utils import detect_lp from os.path import splitext,basename from keras.models import model_from_json from sklearn.preprocessing import LabelEncoder import glob def get_opencv_img_from_buffer(buffer, flags): bytes_as_np_array = np.frombuffer(buffer.read(), dtype=np.uint8) return cv2.imdecode(bytes_as_np_array, flags) def load_model(path): try: path = splitext(path)[0] with open('%s.json' % path, 'r') as json_file: model_json = json_file.read() model = model_from_json(model_json, custom_objects={}) model.load_weights('%s.h5' % path) print("Loading model successfully...") return model except Exception as e: print(e) wpod_net_path = "wpod-net.json" wpod_net = load_model(wpod_net_path) def preprocess_image(image_path,resize=False): # print(image_path) # img = cv2.imread(image_path) img = image_path img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255 if resize: img = cv2.resize(img, (255,255)) return img def get_plate(image_path, Dmax=608, Dmin=256): vehicle = preprocess_image(image_path) ratio = float(max(vehicle.shape[:2])) / min(vehicle.shape[:2]) side = int(ratio * Dmin) bound_dim = min(side, Dmax) _ , LpImg, _, cor = detect_lp(wpod_net, vehicle, bound_dim, lp_threshold=0.5) return vehicle, LpImg, cor # test_image_path = "Plate_examples/usa_car_plate.jpg" test_image_path = get_opencv_img_from_buffer(plate_pic, cv2.IMREAD_UNCHANGED) vehicle, LpImg,cor = get_plate(test_image_path) # fig = plt.figure(figsize=(12,6)) # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig) # fig.add_subplot(grid[0]) # plt.axis(False) # plt.imshow(vehicle) # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig) # fig.add_subplot(grid[1]) # plt.axis(False) # plt.imshow(LpImg[0]) if (len(LpImg)): #check if there is at least one license image # Scales, calculates absolute values, and converts the result to 8-bit. plate_image = cv2.convertScaleAbs(LpImg[0], alpha=(255.0)) # convert to grayscale and blur the image gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray,(7,7),0) # Applied inversed thresh_binary binary = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,11,2) kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) thre_mor = cv2.morphologyEx(binary, cv2.MORPH_DILATE, kernel3) # # visualize results # fig = plt.figure(figsize=(12,7)) # plt.rcParams.update({"font.size":18}) # grid = gridspec.GridSpec(ncols=2,nrows=3,figure = fig) # plot_image = [plate_image, gray, blur, binary,thre_mor] # # plot_image = [ gray] # plot_name = ["gray","blur","binary","dilation", 'thre_mor'] # for i in range(len(plot_image)): # fig.add_subplot(grid[i]) # plt.axis(False) # plt.title(plot_name[i]) # if i ==0: # plt.imshow(plot_image[i]) # else: # plt.imshow(plot_image[i],cmap="gray") # plt.savefig("threshding.png", dpi=300) # Create sort_contours() function to grab the contour of each digit from left to right # Create sort_contours() function to grab the contour of each digit from left to right def sort_contours(cnts,reverse = False): i = 0 boundingBoxes = [cv2.boundingRect(c) for c in cnts] (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse)) return cnts cont, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # creat a copy version "test_roi" of plat_image to draw bounding box test_roi = plate_image.copy() # Initialize a list which will be used to append charater image crop_characters = [] # define standard width and height of character digit_w, digit_h = 20, 50 for c in sort_contours(cont): (x, y, w, h) = cv2.boundingRect(c) ratio = h/w if 1<=ratio<=3.5: # Only select contour with defined ratio if h/plate_image.shape[0]>=0.4: # Select contour which has the height larger than 40% of the plate # Draw bounding box arroung digit number cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 255,0), 2) # Sperate number and gibe prediction curr_num = thre_mor[y:y+h,x:x+w] curr_num = cv2.resize(curr_num, dsize=(digit_w, digit_h)) _, curr_num = cv2.threshold(curr_num, 220, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) crop_characters.append(curr_num) print("Detect {} letters...".format(len(crop_characters))) # fig = plt.figure(figsize=(10,6)) # plt.axis(False) # plt.imshow(test_roi) #plt.savefig('grab_digit_contour.png',dpi=300) # fig = plt.figure(figsize=(14,4)) # grid = gridspec.GridSpec(ncols=len(crop_characters),nrows=1,figure=fig) # for i in range(len(crop_characters)): # fig.add_subplot(grid[i]) # plt.axis(False) # plt.imshow(crop_characters[i],cmap="gray") #plt.savefig("segmented_leter.png",dpi=300) # Load model architecture, weight and labels json_file = open('MobileNets_character_recognition.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("License_character_recognition_weight.h5") print("[INFO] Model loaded successfully...") labels = LabelEncoder() labels.classes_ = np.load('license_character_classes.npy') print("[INFO] Labels loaded successfully...") # plt.savefig("threshding.png", dpi=300) # pre-processing input images and pedict with model def predict_from_model(image,model,labels): image = cv2.resize(image,(80,80)) image = np.stack((image,)*3, axis=-1) prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))]) return prediction # fig = plt.figure(figsize=(15,3)) # cols = len(crop_characters) # grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig) final_string = '' for i,character in enumerate(crop_characters): # fig.add_subplot(grid[i]) title = np.array2string(predict_from_model(character,model,labels)) # plt.title('{}'.format(title.strip("'[]"),fontsize=20)) final_string+=title.strip("'[]") # plt.axis(False) # plt.imshow(character,cmap='gray') print("Achieved result: ", final_string) # plt.savefig('final_result.png', dpi=300) print(type(final_string)) return final_string
result = model.fit(image_gen.flow(trainX, trainY, batch_size=BATCH_SIZE), steps_per_epoch=len(trainX) // BATCH_SIZE, validation_data=(testX, testY), validation_steps=len(testX) // BATCH_SIZE, epochs=EPOCHS, callbacks=my_checkpointer) # Load model architecture, weight and labels json_file = open('/content/Plate_detect_and_recognize/MobileNets_character_recognition.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("/content/Plate_detect_and_recognize/License_character_recognition_weight.h5") print("[INFO] Model loaded successfully...") labels = LabelEncoder() labels.classes_ = np.load('/content/Plate_detect_and_recognize/license_character_classes.npy') print("[INFO] Labels loaded successfully...") # pre-processing input images and pedict with model def predict_from_model(image,model,labels): image = cv2.resize(image,(80,80)) image = np.stack((image,)*3, axis=-1) prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))]) return prediction fig = plt.figure(figsize=(15,3)) cols = len(crop_characters) grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig) final_string = '' for i,character in enumerate(crop_characters):
def train_model(params, device): embeddings = None if (params['bert_tokens']): train, val, test = createDatasetSplit(params) else: train, val, test, vocab_own = createDatasetSplit(params) params['embed_size'] = vocab_own.embeddings.shape[1] params['vocab_size'] = vocab_own.embeddings.shape[0] embeddings = vocab_own.embeddings if (params['auto_weights']): y_test = [ele[2] for ele in test] # print(y_test) encoder = LabelEncoder() encoder.classes_ = np.load(params['class_names'], allow_pickle=True) params['weights'] = class_weight.compute_class_weight( 'balanced', np.unique(y_test), y_test).astype('float32') #params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32') batch_size_eval = min(params['batch_size'], 32) train_dataloader = combine_features(train, params, is_train=True) train_dataloader_eval = combine_features(train, params, is_train=True, batch_size=batch_size_eval) validation_dataloader = combine_features(val, params, is_train=False, batch_size=batch_size_eval) test_dataloader = combine_features(test, params, is_train=False, batch_size=batch_size_eval) model = select_model(params, embeddings) if (params["device"] == 'cuda'): model.cuda() optimizer = AdamW( model.parameters(), lr=params[ 'learning_rate'], # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=params['epsilon'] # args.adam_epsilon - default is 1e-8. ) # Number of training epochs (authors recommend between 2 and 4) # Total number of training steps is number of batches * number of epochs. total_steps = len(train_dataloader) * params['epochs'] # Create the learning rate scheduler. if (params['bert_tokens']): scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(total_steps / 10), num_training_steps=total_steps) # Set the seed value all over the place to make this reproducible. fix_the_random(seed_val=params['random_seed']) # Store the average loss after each epoch so we can plot them. loss_values = [] best_val_fscore = 0 best_test_fscore = 0 best_val_roc_auc = 0 best_test_roc_auc = 0 best_val_precision = 0 best_test_precision = 0 best_val_recall = 0 best_test_recall = 0 for epoch_i in range(0, params['epochs']): print("") print('======== Epoch {:} / {:} ========'.format( epoch_i + 1, params['epochs'])) print('Training...') # Measure how long the training epoch takes. t0 = time.time() # Reset the total loss for this epoch. total_loss = 0 model.train() if params['bert_tokens']: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False) # For each batch of training data... for step, batch in tqdm(enumerate(train_dataloader)): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention vals # [2]: attention mask # [3]: labels b_input_ids = batch[0].to(device) b_att_val = batch[1].to(device) b_input_mask = batch[2].to(device) b_labels = batch[3].to(device) # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) model.zero_grad() outputs = model(b_input_ids, attention_vals=b_att_val, attention_mask=b_input_mask, labels=b_labels, device=device) # The call to `model` always returns a tuple, so we need to pull the # loss value out of the tuple. loss = outputs[0] # Accumulate the training loss over all of the batches so that we can # calculate the average loss at the end. `loss` is a Tensor containing a # single value; the `.item()` function just returns the Python value # from the tensor. batch_loss = loss.item() total_loss += batch_loss # Perform a backward pass to calculate the gradients. loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and take a step using the computed gradient. # The optimizer dictates the "update rule"--how the parameters are # modified based on their gradients, the learning rate, etc. optimizer.step() # Update the learning rate. if (params['bert_tokens']): scheduler.step() if (params['logging'] == 'neptune'): neptune.log_metric('batch_loss', batch_loss) # Calculate the average loss over the training data. avg_train_loss = total_loss / len(train_dataloader) if (params['logging'] == 'neptune'): neptune.log_metric('avg_train_loss', avg_train_loss) else: print('avg_train_loss', avg_train_loss) # Store the loss value for plotting the learning curve. loss_values.append(avg_train_loss) train_fscore, train_accuracy, train_precision, train_recall, train_roc_auc, _ = Eval_phase( params, 'train', model, train_dataloader_eval, device) val_fscore, val_accuracy, val_precision, val_recall, val_roc_auc, _ = Eval_phase( params, 'val', model, validation_dataloader, device) test_fscore, test_accuracy, test_precision, test_recall, test_roc_auc, logits_all_final = Eval_phase( params, 'test', model, test_dataloader, device) #Report the final accuracy for this validation run. if (params['logging'] == 'neptune'): neptune.log_metric('test_fscore', test_fscore) neptune.log_metric('test_accuracy', test_accuracy) neptune.log_metric('test_precision', test_precision) neptune.log_metric('test_recall', test_recall) neptune.log_metric('test_rocauc', test_roc_auc) neptune.log_metric('val_fscore', val_fscore) neptune.log_metric('val_accuracy', val_accuracy) neptune.log_metric('val_precision', val_precision) neptune.log_metric('val_recall', val_recall) neptune.log_metric('val_rocauc', val_roc_auc) neptune.log_metric('train_fscore', train_fscore) neptune.log_metric('train_accuracy', train_accuracy) neptune.log_metric('train_precision', train_precision) neptune.log_metric('train_recall', train_recall) neptune.log_metric('train_rocauc', train_roc_auc) if (val_fscore > best_val_fscore): print(val_fscore, best_val_fscore) best_val_fscore = val_fscore best_test_fscore = test_fscore best_val_roc_auc = val_roc_auc best_test_roc_auc = test_roc_auc best_val_precision = val_precision best_test_precision = test_precision best_val_recall = val_recall best_test_recall = test_recall if (params['bert_tokens']): save_bert_model(model, tokenizer, params) else: print("Saving model") save_normal_model(model, params) if (params['logging'] == 'neptune'): neptune.log_metric('best_val_fscore', best_val_fscore) neptune.log_metric('best_test_fscore', best_test_fscore) neptune.log_metric('best_val_rocauc', best_val_roc_auc) neptune.log_metric('best_test_rocauc', best_test_roc_auc) neptune.log_metric('best_val_precision', best_val_precision) neptune.log_metric('best_test_precision', best_test_precision) neptune.log_metric('best_val_recall', best_val_recall) neptune.log_metric('best_test_recall', best_test_recall) neptune.stop() else: print('best_val_fscore', best_val_fscore) print('best_test_fscore', best_test_fscore) print('best_val_rocauc', best_val_roc_auc) print('best_test_rocauc', best_test_roc_auc) print('best_val_precision', best_val_precision) print('best_test_precision', best_test_precision) print('best_val_recall', best_val_recall) print('best_test_recall', best_test_recall) del model torch.cuda.empty_cache() return 1
def plot_confusion_matrix(cm, classes, path, encoder_model, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues ): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. Taken straight vom SKLEARN. :param cm: confusion matrix generated by sklearn :param classes: range with lenght of classes :param encoder_model: encoder_model :param path: saving path """ # Set figsize plt.figure(figsize=(5.8, 3.58)) # change font size according to number of classes if len(classes) == 120: mpl.rcParams.update({'font.size': 3}) else: mpl.rcParams.update({'font.size': 5}) print("plot confusion matrix") path = path + '/build/' if not os.path.exists(path): os.makedirs(path) # Decode the class names path_to_labels = os.path.join(Path(os.path.abspath(__file__)).parents[2], "labels/") encoder_path = os.path.join(path_to_labels, encoder_model) encoder = LabelEncoder() encoder.classes_ = np.load(encoder_path) classes = encoder.inverse_transform(classes) classes = [cl.replace('_', ' ') for cl in classes] # Check if normalize is True, then scale the colorbar accordingly if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.imshow(cm, interpolation='nearest', cmap=cmap) if len(classes) == 120: plt.title(title, fontsize=12) else: plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45, horizontalalignment='right') plt.yticks(tick_marks, classes) # print text if not 120 classes are given if len(classes) != 120: # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.savefig("{}/confusion_matrix.pdf".format(path), dpi=500, pad_inches=0, bbox_inches='tight') plt.clf() # reset rcParams mpl.rcParams.update(mpl.rcParamsDefault)
# TODO: Create Publishers pcl_objects_pub = rospy.Publisher("/pcl_objects", PointCloud2, queue_size=1) pcl_table_pub = rospy.Publisher("/pcl_table", PointCloud2, queue_size=1) pcl_cluster_pub = rospy.Publisher('/pcl_cluster', PointCloud2, queue_size=1) object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1) detected_objects_pub = rospy.Publisher("/detected_objects", DetectedObjectsArray, queue_size=1) # Initialize color_list get_color_list.color_list = [] # Initialize color_list get_color_list.color_list = [] model = pickle.load(open('model.sav', 'rb')) clf = model['classifier'] encoder = LabelEncoder() encoder.classes_ = model["classes"] scaler = model['scaler'] # TODO: Spin while node is not shutdown while not rospy.is_shutdown(): rospy.spin()
print(e) wpod_net_path = "wpod-net.json" wpod_net = load_model(wpod_net_path) # Load model architecture, weight and labels json_file = open('MobileNets_char_rec_18052021.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("character_recognition_18052021.h5") print("[INFO] Model loaded successfully...") labels = LabelEncoder() labels.classes_ = np.load('character_classes.npy') print("[INFO] Labels loaded successfully...") def preprocess_image(image_path, resize=False): img = cv2.imread(image_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255 if resize: img = cv2.resize(img, (224, 224)) return img def get_plate(image, Dmax=608, Dmin=608): image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image / 255
import cleaning import numpy as np import glob import pandas as pd import pickle from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.metrics import confusion_matrix from sklearn.feature_extraction.text import CountVectorizer with open('tokenizer/tokenize_b.pickle', 'rb') as handle: tokenize = pickle.load(handle) modelFileLoad1 = open('models/model_b', 'rb') modelFileLoad2 = open('models/model_m1', 'rb') encoder = LabelEncoder() encoder.classes_ = np.load('labelencoder/encoder_m1.npy') fit_model1 = pickle.load(modelFileLoad1) fit_model2 = pickle.load(modelFileLoad2) from IPython.display import display from tabulate import tabulate def analyze_message(value): col_names = [ 'Station name', 'Train name', 'Category', 'Platform number', 'Is spam', 'If delay' ] output = pd.DataFrame(columns=col_names) #print(value) a, b, d, t = cleaning.clean(value)
'classes.npy', label_encoder.classes_) #Menyimpan label encoder dengan nama classes.npy # In[18]:load the pre-trained model and predict the math symbol for an arbitrary image; # the code below could be placed in a separate file import keras.models #Mengimpport library keras model model2 = keras.models.load_model( "mathsymbols.model" ) #Membuat variabel model2 untuk meload model yang telah di simpan tadi print(model2.summary()) #Mencetak hasil model2 # In[19]:restore the class name to integer encoder label_encoder2 = LabelEncoder( ) # membuat variabel label encoder ke 2 dengan isian fungsi label encoder. label_encoder2.classes_ = np.load( 'classes.npy' ) #Menambahkan method classess dengan data classess yang di eksport tadi def predict(img_path): #Membuat fumgsi predict dengan path img newimg = keras.preprocessing.image.img_to_array( pil_image.open(img_path) ) #Membuat variabel newimg dengam membuay immage menjadi array dan membuka data berdasarkan img path newimg /= 255.0 #Membagi data yang terdapat pada variabel newimg sebanyak 255 # do the prediction prediction = model2.predict( newimg.reshape(1, 32, 32, 3) ) #Membuat variabel predivtion dengan isian variabel model2 menggunakan fungsi predic dengan syarat variabel newimg dengan data reshape # figure out which output neuron had the highest score, and reverse the one-hot encoding
if (val == number): accuracy = accuracy else: accuracy -= 1 else: print('No LP detected') accuracy -= 1 print(accuracy) # Preparing pretrained model for vehicle recognition wpod_net_path = "wpod-net.json" wpod_net = load_model(wpod_net_path) # Preparing pretrained model for vehicle recognition json_file = open('MobileNets_character_recognition.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("License_character_recognition_weight.h5") print("[INFO] Model loaded successfully...") labels = LabelEncoder() labels.classes_ = np.load('license_character_classes.npy') print("[INFO] Labels loaded successfully...") if __name__ == '__main__': Execute.sampleSingleLP()
def predict(experiment_id, inputs): """Predict the class for a text using a trained model from an experiment.""" # Get experiment config if experiment_id == 'latest': experiment_id = max(os.listdir(config.EXPERIMENTS_DIR)) experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id) experiment_config = utils.load_json( os.path.join(experiment_dir, 'config.json')) args = Namespace(**experiment_config) # Tokenizers texts = [sample['text'] for sample in inputs] with open(os.path.join(experiment_dir, 'X_tokenizer.json'), 'r') as fp: X_tokenizer = tokenizer_from_json(json.load(fp)) y_tokenizer = LabelEncoder() y_tokenizer.classes_ = np.load(os.path.join(experiment_dir, 'y_tokenizer.npy'), allow_pickle=True) # Create dataset generator X_infer = np.array(X_tokenizer.texts_to_sequences(texts)) preprocessed_texts = X_tokenizer.sequences_to_texts(X_infer) y_filler = np.array([0] * len(X_infer)) inference_generator = data.DataGenerator(X=X_infer, y=y_filler, batch_size=args.batch_size, max_filter_size=max( args.filter_sizes)) # Load model model = models.TextCNN(embedding_dim=args.embedding_dim, vocab_size=len(X_tokenizer.word_index) + 1, num_filters=args.num_filters, filter_sizes=args.filter_sizes, hidden_dim=args.hidden_dim, dropout_p=args.dropout_p, num_classes=len(y_tokenizer.classes_)) model.summary(input_shape=(10, )) # build it model_path = os.path.join(experiment_dir, 'model/cp.ckpt') model.load_weights(model_path) # Conv output model conv_outputs_model = models.ConvOutputsModel( vocab_size=len(X_tokenizer.word_index) + 1, embedding_dim=args.embedding_dim, filter_sizes=args.filter_sizes, num_filters=args.num_filters) conv_outputs_model.summary(input_shape=(10, )) # build it # Set weights conv_outputs_model.layers[0].set_weights(model.layers[0].get_weights()) conv_layer_start_num = 1 for layer_num in range(conv_layer_start_num, conv_layer_start_num + len(args.filter_sizes)): conv_outputs_model.layers[layer_num].set_weights( model.layers[layer_num].get_weights()) # Predict results = [] y_prob = model.predict(x=inference_generator, verbose=1) conv_outputs = conv_outputs_model.predict(x=inference_generator, verbose=1) for index in range(len(X_infer)): results.append({ 'raw_input': texts[index], 'preprocessed_input': preprocessed_texts[index], 'probabilities': get_probability_distribution(y_prob[index], y_tokenizer.classes_), 'top_n_grams': get_top_n_grams(tokens=preprocessed_texts[index].split(' '), conv_outputs=conv_outputs, filter_sizes=args.filter_sizes) }) return results
def from_estimator( cls, estimator, X, *, grid_resolution=100, eps=1.0, plot_method="contourf", response_method="auto", xlabel=None, ylabel=None, ax=None, **kwargs, ): """Plot decision boundary given an estimator. Read more in the :ref:`User Guide <visualizations>`. .. versionadded:: 1.0 Parameters ---------- estimator : object Trained estimator used to plot the decision boundary. X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2) Input data that should be only 2-dimensional. grid_resolution : int, default=100 Number of grid points to use for plotting decision boundary. Higher values will make the plot look nicer but be slower to render. eps : float, default=1.0 Extends the minimum and maximum values of X for evaluating the response function. plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf' Plotting method to call when plotting the response. Please refer to the following matplotlib documentation for details: :func:`contourf <matplotlib.pyplot.contourf>`, :func:`contour <matplotlib.pyplot.contour>`, :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`. response_method : {'auto', 'predict_proba', 'decision_function', \ 'predict'}, default='auto' Specifies whether to use :term:`predict_proba`, :term:`decision_function`, :term:`predict` as the target response. If set to 'auto', the response method is tried in the following order: :term:`predict_proba`, :term:`decision_function`, :term:`predict`. xlabel : str, default=None The label used for the x-axis. If `None`, an attempt is made to extract a label from `X` if it is a dataframe, otherwise an empty string is used. ylabel : str, default=None The label used for the y-axis. If `None`, an attempt is made to extract a label from `X` if it is a dataframe, otherwise an empty string is used. ax : Matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is created. **kwargs : dict Additional keyword arguments to be passed to the `plot_method`. Returns ------- display : :class:`~sklearn.inspection.DecisionBoundaryDisplay` Object that stores the result. See Also -------- DecisionBoundaryDisplay : Decision boundary visualization. ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix given an estimator, the data, and the label. ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix given the true and predicted labels. Examples -------- >>> import matplotlib.pyplot as plt >>> from sklearn.datasets import load_iris >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.inspection import DecisionBoundaryDisplay >>> iris = load_iris() >>> X = iris.data[:, :2] >>> classifier = LogisticRegression().fit(X, iris.target) >>> disp = DecisionBoundaryDisplay.from_estimator( ... classifier, X, response_method="predict", ... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1], ... alpha=0.5, ... ) >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k") <...> >>> plt.show() """ check_matplotlib_support(f"{cls.__name__}.from_estimator") if not grid_resolution > 1: raise ValueError("grid_resolution must be greater than 1. Got" f" {grid_resolution} instead.") if not eps >= 0: raise ValueError( f"eps must be greater than or equal to 0. Got {eps} instead.") possible_plot_methods = ("contourf", "contour", "pcolormesh") if plot_method not in possible_plot_methods: avaliable_methods = ", ".join(possible_plot_methods) raise ValueError( f"plot_method must be one of {avaliable_methods}. " f"Got {plot_method} instead.") x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1) x0_min, x0_max = x0.min() - eps, x0.max() + eps x1_min, x1_max = x1.min() - eps, x1.max() + eps xx0, xx1 = np.meshgrid( np.linspace(x0_min, x0_max, grid_resolution), np.linspace(x1_min, x1_max, grid_resolution), ) X_for_pred = np.c_[xx0.ravel(), xx1.ravel()] if isinstance(X, pd.DataFrame): X_for_pred = pd.DataFrame(X_for_pred, columns=X.columns) pred_func = _check_boundary_response_method(estimator, response_method) response = pred_func(X_for_pred) if response_method == "predict": label_encoder = LabelEncoder() label_encoder.classes_ = estimator.classes_ response = label_encoder.transform(response) if response.ndim != 1: if response.shape[1] != 2: raise ValueError( "Multiclass classifiers are only supported when " "response_method='predict'") response = response[:, 1] if xlabel is not None: xlabel = xlabel else: xlabel = X.columns[0] if hasattr(X, "columns") else "" if ylabel is not None: ylabel = ylabel else: ylabel = X.columns[1] if hasattr(X, "columns") else "" display = DecisionBoundaryDisplay( xx0=xx0, xx1=xx1, response=response.reshape(xx0.shape), xlabel=xlabel, ylabel=ylabel, ) return display.plot(ax=ax, plot_method=plot_method, **kwargs)
def learn_main_arg_node(node_df, syntax_dict, node_dict, precalc_features=None, feature_list=None, label_features=None): """ Learn a classifier for a node being arg0 or arg1 :param node_df: node data with tree and node ids :type node_df: pd.DataFrame :param syntax_dict: to look up the syntax trees by their id :type syntax_dict: dict :param node_dict: to look up the nodes by their id :type node_dict: dict :param precalc_features: precalculated features to save computation time in development :param precalc_features: pd.DataFrame :param feature_list: Names of the features that shall be calculated :type feature_list: list :param label_features: Names of features that are discrete :type label_features: list :return: All data that is needed to classifiy new data with the classifiers LogisticRegression classifiers from scikit learn, the list of features and label features, as well as encoders for the labels and a binary encoder and a featurizer method {'logit_arg0_clf': logit_arg0_clf, 'logit_arg1_clf': logit_arg1_clf, 'feature_list': feature_list, 'label_features': label_features, 'label_encoder': le, 'binary_encoder': ohe, 'node_featurizer': featurizer} :rtype: dict """ def featurizer(node_df, syntax_dict, node_dict): return node_feature_dataframe(node_df, node_featurizer, syntax_dict=syntax_dict, node_dict=node_dict, feature_list=feature_list) if precalc_features is None: print 'Calculating features' features = featurizer(node_df, syntax_dict, node_dict) print 'done' else: features = precalc_features # We need to encode the non-numerical labels print 'Encoding labels...' le = LabelEncoder() # LabelEncoder only deals with 1 dim np.arrays le.fit(features[label_features].values.ravel()) # Dealing with unknowns le.classes_ = np.append(le.classes_, '<unknown>') encoded_features = encode_label_features(features, le, label_features) print 'Encoded label' # We need to binarize the data for logistic regression print 'Binarizing features for logistic regression...' ohe = OneHotEncoder(sparse=False) ohe.fit(encoded_features[label_features].values) logit_features = binarize_features(encoded_features, ohe, label_features) print 'Binarized features.' print 'Training classifiers for arg0 labeling' print '======================================' nr_of_nodes = float(len(node_df)) baseline = (nr_of_nodes - sum(node_df['is_arg0_node'])) / nr_of_nodes print 'Majority baseline: %f' % baseline print 'Cross validating Logistic regression classifier...' # C is the inverse of the regularization strength # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html logit_arg0_clf = LogisticRegression(C=1.0) scores = cross_val_score(logit_arg0_clf, logit_features, node_df['is_arg0_node'], cv=5) print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \ '%f' % (str(scores), scores.mean()) print '' print 'Training classifiers for arg1 labeling' print '======================================' baseline = (nr_of_nodes - sum(node_df['is_arg1_node'])) / nr_of_nodes print 'Majority baseline: %f' % baseline print 'Cross validating Logistic regression classifier...' # C is the inverse of the regularization strength logit_arg1_clf = LogisticRegression(C=1.0) scores = cross_val_score(logit_arg1_clf, logit_features, node_df['is_arg1_node'], cv=5) print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \ '%f' % ( str(scores), scores.mean()) print 'Learning classifiers on the whole data set...' logit_arg0_clf.fit(logit_features, node_df['is_arg0_node']) logit_arg1_clf.fit(logit_features, node_df['is_arg1_node']) print 'Learned classifier on the whole data set' # ToDo: Design features (see Lin et al p. 17, Connective_syntactic!) # ToDo: Evaluate this method (remember not to count punctuation) # ToDo: Get baseline by labeling everything after the connective as # arg0, everything else as arg1 # ToDo: Get baseline for previous sentence by labeling the full sentence # as arg1. return_dict = {'logit_arg0_clf': logit_arg0_clf, 'logit_arg1_clf': logit_arg1_clf, 'feature_list': feature_list, 'label_features': label_features, 'label_encoder': le, 'binary_encoder': ohe, 'node_featurizer': featurizer} return return_dict
if __name__ == '__main__': input_dir = 'play' images = load_images(input_dir) cropped_images = list() for i in range(len(images)): cropped_images.append(detect_face(images[i])) face_model = load_model(os.path.join('model', 'facenet_keras.h5')) cropped_images = get_embedded_data(face_model, cropped_images) cropped_images = normalize(cropped_images) model = joblib.load(os.path.join('model', 'svm_model.sav')) pred_test = model.predict(cropped_images) pred_proba = model.predict_proba(cropped_images) label_encode = LabelEncoder() label_encode.classes_ = np.load(os.path.join('model', 'classes.npy')) predicted_names = label_encode.inverse_transform(pred_test) for i, image in enumerate(images): # plt.figure() plt.imshow(image) plt.title("Predicted: " + predicted_names[i] + " with " + str(round(pred_proba[i][pred_test[i]] * 100, 2)) + "% confidence.") plt.show()
def model(): def load_model(path): try: path = splitext(path)[0] with open('%s.json' % path, 'r') as json_file: model_json = json_file.read() model = model_from_json(model_json, custom_objects={}) model.load_weights('%s.h5' % path) print("Loading model successfully...") return model except Exception as e: print(e) wpod_net_path = "wpod-net.json" wpod_net = load_model(wpod_net_path) def preprocess_image(image_path,resize=False): img = cv2.imread(image_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255 if resize: img = cv2.resize(img, (224,224)) return img def get_plate(image_path): Dmax = 608 Dmin = 288 vehicle = preprocess_image(image_path) ratio = float(max(vehicle.shape[:2])) / min(vehicle.shape[:2]) side = int(ratio * Dmin) bound_dim = min(side, Dmax) _ , LpImg, _, cor = detect_lp(wpod_net, vehicle, bound_dim, lp_threshold=0.5) return vehicle, LpImg, cor test_image_path = "images/numberplate_image.png" vehicle, LpImg,cor = get_plate(test_image_path) # fig = plt.figure(figsize=(12,6)) # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig) # fig.add_subplot(grid[0]) # plt.axis(False) # plt.imshow(vehicle) # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig) # fig.add_subplot(grid[1]) # plt.axis(False) # plt.imshow(LpImg[0]) # plt.show() if (len(LpImg)): plate_image = cv2.convertScaleAbs(LpImg[0], alpha=(255.0)) gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray,(7,7),0) binary = cv2.threshold(blur, 180, 255,cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) thre_mor = cv2.morphologyEx(binary, cv2.MORPH_DILATE, kernel3) # fig = plt.figure(figsize=(12,7)) # plt.rcParams.update({"font.size":18}) # grid = gridspec.GridSpec(ncols=2,nrows=3,figure = fig) # plot_image = [plate_image, gray, blur, binary,thre_mor] # plot_name = ["plate_image","gray","blur","binary","dilation"] # for i in range(len(plot_image)): # fig.add_subplot(grid[i]) # plt.axis(False) # plt.title(plot_name[i]) # if i ==0: # plt.imshow(plot_image[i]) # else: # plt.imshow(plot_image[i],cmap="gray") # plt.show() def sort_contours(cnts,reverse = False): i = 0 boundingBoxes = [cv2.boundingRect(c) for c in cnts] (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),key=lambda b: b[1][i], reverse=reverse)) return cnts #List approx and External approx can be changed here. #Code added to make contour into list if only external contour is detected. cont, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(cont) <= 2: #IT NEEDS TO DEBUGGED FOR CASE OF VARIOUS CHARS. cont, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) print("NOTE: USING LIST CONTOURS") #Code to Debug the Contours on the plate image. '''print(len(cont)) imagge = LpImg[0].copy() bxmi = sort_contours(cont) cv2.drawContours(imagge, bxmi, -1, (0,255,0), 3) cv2.imshow('Conto', imagge) cv2.waitKey(0) cv2.destroyAllWindows()''' test_roi = plate_image.copy() crop_characters = [] digit_w, digit_h = 30, 60 for c in sort_contours(cont): (x, y, w, h) = cv2.boundingRect(c) ratio = h/w #The ratio is used to make sure the number plate is not detected play with it in a way that numberplate is not detected. #Original value was set to 3.5 it was increased due to INDIAN numberplate fashion. if 1<=ratio<=5: #Original value was 0.5 #0.4 is more compaitable and 0.5 tweaking between them will workout, Basically it is used to check the number/cropped image shape ratio if h/plate_image.shape[0]>=0.4: cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 255,0), 2) curr_num = thre_mor[y:y+h,x:x+w] curr_num = cv2.resize(curr_num, dsize=(digit_w, digit_h)) _, curr_num = cv2.threshold(curr_num, 220, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) crop_characters.append(curr_num) print("Detect {} letters...".format(len(crop_characters))) # fig = plt.figure(figsize=(10,6)) # plt.axis(False) # plt.imshow(test_roi) # plt.show() # fig = plt.figure(figsize=(14,4)) # grid = gridspec.GridSpec(ncols=len(crop_characters),nrows=1,figure=fig) # for i in range(len(crop_characters)): # fig.add_subplot(grid[i]) # plt.axis(False) # plt.imshow(crop_characters[i],cmap="gray") # plt.show() json_file = open('MobileNets_character_recognition.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("License_character_recognition_weight.h5") print(" Model loaded successfully...") labels = LabelEncoder() labels.classes_ = np.load('license_character_classes.npy') print(" Labels loaded successfully...") def predict_from_model(image,model,labels): image = cv2.resize(image,(80,80)) image = np.stack((image,)*3, axis=-1) prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))]) return prediction fig = plt.figure(figsize=(15,3)) cols = len(crop_characters) grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig) final_string = '' for i,character in enumerate(crop_characters): fig.add_subplot(grid[i]) title = np.array2string(predict_from_model(character,model,labels)) plt.title('{}'.format(title.strip("'[]"),fontsize=20)) final_string+=title.strip("'[]") plt.axis(False) # plt.imshow(character,cmap='gray') print(final_string) today = str(date.today()) data = { 'number': final_string, 'date_created': today } json_object = json.dumps(data, indent=2) try: with open("output.json", "w") as outfile: outfile.write(json_object) print('JSON Object sucessfully exported') os.remove('file_modifier/file') checker() except: print('Error working with json')
cosine = F.linear(x_norm, w_norm, None) out = cosine * self.scale return out resume = sys.argv[1] encoder = sys.argv[2] x_test = sys.argv[3] train_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/train_bb_fastai2/" test_df = "/home/blcv/CODE/Kaggle/humpback_whale_identification/data/processed/sample_submission.csv" test_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/test_bb_fastai2/" option_da = ['gray'] # [] # label_encoder = LabelEncoder() label_encoder.classes_ = np.load(encoder) # encode whale as integers X_test = pd.read_csv(x_test) val_loader = getDataLoader(X_test, train_folder, 'val', option_da=option_da, image_size=224, batch_size=64) # model preparation device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_name = 'se_resnext101_32x4d' model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet') model.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import ModelCheckpoint from datetime import datetime from sklearn import metrics from sklearn.preprocessing import LabelEncoder x_train = np.load("./preprocessed/x_train.npy") y_train = np.load("./preprocessed/y_train.npy") x_test = np.load("./preprocessed/x_test.npy") y_test = np.load("./preprocessed/y_test.npy") yy = np.load("./preprocessed/yy.npy") le = LabelEncoder() le.classes_ = np.load("./preprocessed/classes.npy") # MODEL num_rows = 40 num_columns = 174 num_channels = 1 x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels) x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels) num_labels = yy.shape[1] filter_size = 2 def construct_model(): # Construct model
# convert Tag1 from strings to integers from sklearn.preprocessing import LabelEncoder le = LabelEncoder() train['Tag1_enc'] = le.fit_transform(train.Tag1) # confirm that the conversion worked train.Tag1.value_counts().head() train.Tag1_enc.value_counts().head() # create a dummy column for each value of Tag1_enc (returns a sparse matrix) from sklearn.preprocessing import OneHotEncoder ohe = OneHotEncoder() tag1_dummies = ohe.fit_transform(train[['Tag1_enc']]) tag1_dummies # try a Naive Bayes model with tag1_dummies as the features cross_val_score(nb, tag1_dummies, train.OpenStatus, scoring='log_loss', cv=10).mean() # 0.650 # adjust Tag1 on testing set since LabelEncoder errors on new values during a transform test['Tag1'] = test['Tag1'].map(lambda s: '<unknown>' if s not in le.classes_ else s) import numpy as np le.classes_ = np.append(le.classes_, '<unknown>') # apply the same encoding to the actual testing data and make predictions test['Tag1_enc'] = le.transform(test.Tag1) oos_tag1_dummies = ohe.transform(test[['Tag1_enc']]) nb.fit(tag1_dummies, train.OpenStatus) oos_pred_prob = nb.predict_proba(oos_tag1_dummies)[:, 1] sub = pd.DataFrame({'id':test.index, 'OpenStatus':oos_pred_prob}).set_index('id') sub.to_csv('sub5.csv') # 0.649
import click import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' from keras.models import load_model from audio_much.core import build_beat_audio_feature_sequences import librosa import numpy as np from sklearn.preprocessing import LabelEncoder import collections encoder = LabelEncoder() encoder.classes_ = np.load('songs_training_data_classes.npy') lstm_model = load_model('model_raw_22050_lstm_02.h5') def do_prediction_lstm(lstm_model, song_path, target_sample_rate=44100): X, sample_rate = librosa.load(song_path, sr=target_sample_rate, res_type='kaiser_fast') hop_length = 512 tempo, beats = librosa.beat.beat_track(y=X, sr=target_sample_rate, hop_length=hop_length) float_audio_segments = build_beat_audio_feature_sequences( X, target_sample_rate, tempo) timeseries_length = 16 batch_size = float_audio_segments.shape[0] - timeseries_length features = np.zeros((batch_size, timeseries_length, 12), dtype=np.float64)