Esempio n. 1
0
def encode(df, dump=fromPickle):
    """
    Takes in: dataframe from clean_col
    
    Returns: a dataframe that LabelEncodes the categorical variables
    """
    encoders=dict()
    for col in lblColumns:
        if col not in final_cols:
            continue
        le = LabelEncoder()
        if dump:
            fName="%s/%s.npy"%(modelPath,col)
            if os.path.isfile(fName):
                le.classes_=np.load(fName)
            else:
                le.fit(df[col])
                np.save(fName, le.classes_)
        else:
            le.fit(df[col])
        encoders[col]=le
        df[col] = le.transform(df[col])
    # Order columns with logprice as the last column
    df = df[final_cols]
    df = df.reset_index().drop('index', axis = 1)
    return df
Esempio n. 2
0
def data(fold=False):
    fname = df.zoo.download('http://dags.stanford.edu/data/iccv09Data.tar.gz')

    # extracting files one-by-one in memory is unfortunately WAY too slow
    # for this dataset. So we bite the bullet and extract the full tgz.

    where = _p.dirname(fname)
    imgdir = 'iccv09Data/images/'

    with _taropen(fname, 'r') as f:
        f.extractall(where)
        ids = [_p.basename(n)[:-4] for n in f.getnames() if n.startswith(imgdir)]

    X = [imread(_p.join(where, imgdir, i) + '.jpg') for i in ids]
    y = [_np.loadtxt(_p.join(where, 'iccv09Data/labels', i) + '.regions.txt', dtype=_np.int32) for i in ids]
    # I personally don't believe in the other label types.

    le = _np.array(['sky', 'tree', 'road', 'grass', 'water', 'building', 'mountain', 'foreground', 'object'])
    try:
        from sklearn.preprocessing import LabelEncoder
        le, classes = LabelEncoder(), le
        le.classes_ = classes
    except ImportError:
        pass

    if fold is False:
        return X, y, le

    lo, hi = fold*ntest(), (fold+1)*ntest()
    Xtr = X[:lo] + X[hi:]
    ytr = y[:lo] + y[hi:]
    Xte = X[lo:hi]
    yte = y[lo:hi]
    return (Xtr, ytr), (Xte, yte), le
Esempio n. 3
0
 def restore(self,model_path):
   '''
   Restore a saved multiencoder from path using npz file, by reconstructing the LabelEncoders with the classes.
   Restore the X header too.
   '''
   path = model_path + '/encoder.npz'
   h_path = model_path + '/header.npz'
   npzfile = np.load(path)
   h_npzfile = np.load(h_path)
   self.header = h_npzfile['header']
   self.encoders = {}
   for k,v in npzfile.items():
     le = LabelEncoder()
     le.classes_ = v
     self.encoders[k] = le
   self.columns = list(self.encoders.keys())
   return self
Esempio n. 4
0
def learn_sentdist(clean_pcc,
                   feature_list=None,
                   label_features=None):
    """ Learning a classifier for the distance of arguments from a connective

    Runs a random forest. Prints out accuracy scores from a 5-fold cross validation.
    Returns the classifier and the label encoder that was used.
    :param clean_pcc: Cleaned PCC data, no NaNs
    :type clean_pcc: pd.DataFrame
    :param feature_list: list of features that shall be calculated with discourse_connective_text_featurizer
    :param label_features: list of features that have to be encoded as labels
    :return: trained classifier, score array and label encoder
    :rtype: tuple
    """
    print 'Calculating features...'
    # Taking our favorite featurizer
    featurizer = lambda sents, conn_pos: discourse_connective_text_featurizer(sents, conn_pos,
                                                                              feature_list=feature_list)
    features = sentdist_feature_dataframe(clean_pcc, featurizer)  # Got features of X
    print 'Calculated all features'

    # We need to encode the non-numerical labels
    le = LabelEncoder()
    # LabelEncoder only deals with 1 dim np.arrays
    le.fit(features[label_features].values.ravel())
    # Dealing with unknowns
    le.classes_ = np.append(le.classes_, '<unknown>')
    features = encode_label_features(features, le, label_features)

    print 'Cross validating classifier...'
    clf = RandomForestClassifier(min_samples_leaf=5, n_jobs=-1, verbose=0)
    scores = cross_val_score(clf, features, clean_pcc['sentence_dist'], cv=5)
    print 'Cross validated classifier\nscores: %s\nmean score: %f' % (str(scores), scores.mean())

    print 'Learning classifier on the whole data set...'
    clf.fit(features, clean_pcc['sentence_dist'])
    print 'Learned classifier on the whole data set'

    return clf, scores, le
Esempio n. 5
0
def create_kaggle_submission(prob, ids_raw, score=None, threshold=0.0):
	"""
	Given a model, load training data and predict on it. Includes id col.
	Note that this assumes only one prediction per user. (for now)
	"""
	ids = []  #list of ids
	cts = []  #list of countries
	# threshold = 0.0

	le = LabelEncoder()
	le.classes_ = COUNTRY_CLASSES

	for i in xrange(len(list(ids_raw))):
		idx = ids_raw[i]
		if pd.isnull(idx):
			py.test.set_trace()
		valid = sorted([(j,k) for j,k in enumerate(prob[i]) if k >= threshold],key=lambda x: x[1], reverse=True)
		valid_ids, valid_prob = zip(*valid)
		sub_countries = list([le.inverse_transform(x) for x in valid_ids])[:5]

		ids += [idx] * len(sub_countries)
		cts += sub_countries

	# spot check that it's not all 7's
	irene = [np.argmax(i) for i in prob]
	print pd.Series(irene).value_counts()

	#Generate submission
	sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])

	# datetime submission
	date_str = datetime.datetime.now().strftime('%y%m%d_%H%M')
	if score:
		sub.to_csv('submission_%.4f.csv' % score,index=False)
	else:
		sub.to_csv('submission_%s.csv' % date_str ,index=False)
	return sub
Esempio n. 6
0
if __name__ == '__main__':

    # TODO: ROS node initialization
    rospy.init_node('clustering', anonymous=True)

    # TODO: Create Subscribers
    pcl_sub = rospy.Subscriber("sensor_stick/point_cloud", pc2.PointCloud2, pcl_callback, queue_size=1)

    # TODO: Create Publishers
    pcl_objects_pub = rospy.Publisher("/pcl_objects", PointCloud2, queue_size=1)
    pcl_table_pub = rospy.Publisher("/pcl_table", PointCloud2, queue_size=1)
    pcl_cluster_pub = rospy.Publisher("/pcl_cluster", PointCloud2, queue_size=1)
    pcl_detected_obj_pub = rospy.Publisher("/pcl_detected_obj", PointCloud2, queue_size=1)
    object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1)

    # TODO: Load Model From disk
    model = pickle.load(open('model.sav', 'rb'))
    clf = model['classifier']
    encoder = LabelEncoder()
    encoder.classes_ = model['classes']
    scaler = model['scaler']

    # Initialize color_list
    get_color_list.color_list = []

    # TODO: Spin while node is not shutdown
    while not rospy.is_shutdown():
	rospy.spin()

Esempio n. 7
0
def analisarPendente():
    print(request.data)
    K.clear_session()

    data = json.loads(request.data)

    model = load_model('SmartGardenModelBinary.h5')

    dfTest = json_normalize(data)

    le_insumo = LabelEncoder()
    le_mes = OrdinalEncoder(categories=[[
        'jan', 'fev', 'mar', 'abr', 'mai', 'jun', 'jul', 'ago', 'set', 'out',
        'nov', 'dez'
    ]])
    le_aparencia = OrdinalEncoder(
        categories=[['murcha', 'amarelada', 'normal']])
    le_temp = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']])
    le_umi = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']])
    le_risco = OrdinalEncoder(categories=[['baixo', 'medio', 'alto']])

    le_insumo.classes_ = np.load('./pickle/insumo.npy', allow_pickle=True)
    le_mes.categories_ = np.load('./pickle/mes.npy', allow_pickle=True)
    le_aparencia.categories_ = np.load('./pickle/aparencia.npy',
                                       allow_pickle=True)

    le_temp.categories_ = np.load('./pickle/temp.npy', allow_pickle=True)
    le_umi.categories_ = np.load('./pickle/umi.npy', allow_pickle=True)
    le_risco.categories_ = np.load('./pickle/risco.npy', allow_pickle=True)

    insumo = le_insumo.transform(dfTest['insumo'])
    mes = le_mes.transform(dfTest[['mes']])
    aparencia = le_aparencia.transform(dfTest[['aparencia']])

    dfTest['insumo'] = insumo
    dfTest['mes'] = mes
    dfTest['aparencia'] = aparencia

    predictions = model.predict(dfTest.values)

    predictions[predictions >= 0.5] = 1
    predictions[predictions < 0.5] = 0

    nn_preds = pd.DataFrame(predictions, columns=[0, 1, 2, 0, 1, 2, 0, 1, 2])

    nn_preds['encGeral'] = (nn_preds.iloc[:, 0:3] == 1).idxmax(1)
    nn_preds['encUmi'] = (nn_preds.iloc[:, 3:6] == 1).idxmax(1)
    nn_preds['encTemp'] = (nn_preds.iloc[:, 6:9] == 1).idxmax(1)

    nn_preds['stRisco'] = le_risco.inverse_transform(nn_preds[['encGeral']])
    nn_preds['stUmi'] = le_umi.inverse_transform(nn_preds[['encUmi']])
    nn_preds['stTemp'] = le_temp.inverse_transform(nn_preds[['encTemp']])
    nn_preds = nn_preds[['stRisco', 'stUmi', 'stTemp']]

    nn_preds = nn_preds.to_json()
    nn_preds = json.loads(nn_preds)

    nn_preds['stRisco'] = nn_preds['stRisco']['0']
    nn_preds['stTemp'] = nn_preds['stTemp']['0']
    nn_preds['stUmi'] = nn_preds['stUmi']['0']

    return json.dumps(nn_preds)
Esempio n. 8
0
          batch_size=32, epochs=10, verbose=2)

# save the trained model
model.save("mathsymbols.model")

# save label encoder (to reverse one-hot encoding)
np.save('classes.npy', label_encoder.classes_)


# In[14] (load the pre-trained model and predict the math symbol for an arbitrary image)
model2 = keras.models.load_model("mathsymbols.model")
print(model2.summary())

# restore the class name to integer encoder
label_encoder2 = LabelEncoder()
label_encoder2.classes_ = np.load('classes.npy')


def predict(img_path):
    newimg = keras.preprocessing.image.img_to_array(pil_image.open(img_path))
    newimg /= 255.0

    # do the prediction
    prediction = model2.predict(newimg.reshape(1, 32, 32, 3))

    # figure out which output neuron had the highest score, and reverse the one-hot encoding
    inverted = label_encoder2.inverse_transform(
        [np.argmax(prediction)])  # argmax finds highest-scoring output
    print("Prediction: %s, confidence: %.2f" %
          (inverted[0], np.max(prediction)))
Esempio n. 9
0
#CQT Parameters
fminval = 36
fmin = librosa.midi_to_hz(fminval)
n_bins = 72

#Window Params

cf = "classes_windowed.npy"
lf = "labels_windowed.npy"
log_name_cqt = "cqt_win_knn_{0}_{1}_{2}".format(hop_length, fminval, n_bins)
files_path = "files_win"

pp.create_labels(audio_path=audio_path, output_classes=cf, output_labels=lf)
labels = np.load(resource_path + "\\labels\\" + lf)
labelencoder = LabelEncoder()
labelencoder.classes_ = np.load(resource_path + "\\labels\\" + cf)
classes = labelencoder.transform(labels)
print(labels.shape)
print(classes.shape)

#CREATE VECTORS
feature_vectors, files = pp.get_cqt_folder(path=audio_path)
pp.save_cqt_sk(feature_vectors, log_name_cqt + ".pl")
np.save(resource_path + "\\files\\" + files_path + ".npy", files)

scaled_feature_vectors = pickle.load(
    open(resource_path + "\\feature_vectors\\CQT_SK\\" + log_name_cqt + ".pl",
         "rb"))
#for cqt omit otherwise
scaled_feature_vectors = scaled_feature_vectors.reshape(
    len(scaled_feature_vectors), n_bins)
Esempio n. 10
0

def load_model():

    # Load model architecture, weight and labels for character recognition
    json_file = open('model/ResNets_character_recognition_spyder_new.json')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights("License_character_recognition_spyder_new.h5")
    print("[INFO] Model loaded successfully...")
    return model


labels = LabelEncoder()
labels.classes_ = np.load('model/license_character_classes_Spyder.npy')
model = load_model()


def display_img(img_path):
    img = IPythonImage(filename=img_path)
    st.image(Image.open(img))


def sort_contours(cnts, reverse=False):
    i = 0
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(
        zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse))
    return cnts
Esempio n. 11
0
    if (len(tmp) > 0):
        extra_encoding_col.append(col)
    for el in tmp:
        lst_extra_encodings.append(el)

# Label Encoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

i = 0
for col in categorical_cols:
    label_encoder.fit(train[col])
    i += 1
    if (col == 'EngineVersion'):
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '1.1.11602.0')  # count = 1 in test set
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '1.1.12002.0')  # count = 1 in test set
    if (col == 'AppVersion'):
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '4.18.1806.20015')  # count = 1 in test set
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '4.11.15063.1154')  # count = 1 in test set
        label_encoder.classes_ = np.append(
            label_encoder.classes_,
            '4.12.17007.18021')  # count = 1 in test set
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '4.13.17627.1000')  # count = 1 in test set
        label_encoder.classes_ = np.append(
            label_encoder.classes_, '4.9.10586.1177')  # count = 1 in test set
    if (col == 'OsVer'):
Esempio n. 12
0
        cosine = F.linear(x_norm, w_norm, None)
        out = cosine * self.scale
        return out

resume   = sys.argv[1]
encoder  = sys.argv[2]
x_test   = sys.argv[3]

train_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/train_bb_fastai2/"
test_df     = "/home/blcv/CODE/Kaggle/humpback_whale_identification/data/processed/sample_submission.csv"
test_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/test_bb_fastai2/"
option_da = ['gray']# [] #


label_encoder = LabelEncoder()
label_encoder.classes_ = np.load(encoder)
# encode whale as integers
X_test = pd.read_csv(x_test)
val_loader = getDataLoader(X_test, train_folder, 'val', option_da = option_da, image_size = 224, batch_size = 64)

# model preparation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = 'se_resnext101_32x4d'
model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
model.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
model.last_linear = nn.Sequential(*[nn.LayerNorm(model.last_linear.in_features, elementwise_affine = False),
                                        NormLinear(model.last_linear.in_features, 5004)])

model = model.to(device)
model = nn.DataParallel(model)                           
Esempio n. 13
0
 def giveLE(self):
     encoder = LabelEncoder()
     encoder.classes_ = np.load('classes.npy')
     return encoder
Esempio n. 14
0
def loadLabelEncoder(file):
    encoder = LabelEncoder()
    encoder.classes_ = np.load(file)
def get_label_encoder():
    """This returns the label encoder which contains label to integer mapping"""
    encoder = LabelEncoder()
    encoder.classes_ = np.load('label_encoder.npy')
    return encoder
Esempio n. 16
0
def process_data(data_type='train', write_to_csv=False, return_df=True,
	include_sessions=False):
	train = pd.read_csv(TRAINING_DATA, header=0)
	test = pd.read_csv(TEST_DATA, header=0)

	train_countries = train['country_destination']
	train_ids = train['id']
	test_ids = test['id']
	train.drop(['id', 'country_destination'], axis=1, inplace=True)
	test.drop(['id'], axis=1, inplace=True)

	piv_train = train.shape[0]

	data = pd.concat((train, test), axis=0, ignore_index=True)
	# features to output into model training data
	nonnumeric_columns = [
		'gender',
		'signup_method',
		'signup_flow',
		'language',
		'affiliate_channel',
		'affiliate_provider',
		'first_affiliate_tracked',
		'signup_app',
		'first_device_type',
		'first_browser',
		]

	# add_null_cols(data)
	add_date_cols(data)
	parse_age(data)

	data = add_categorical_cols(data, nonnumeric_columns, data_type)
	data = fill_in_na(data)

	vals = data.values
	X = vals[:piv_train]

	le = LabelEncoder()
	le.classes_ = COUNTRY_CLASSES
	y = le.fit_transform(train_countries)

	train_df = pd.DataFrame(X, columns=data.columns)
	train_df['id'] = train_ids
	train_df['country_destination'] = y

	X_kaggle = vals[piv_train:]
	test_df = pd.DataFrame(X_kaggle, columns=data.columns)
	test_df['id'] = test_ids

	if include_sessions:
		sessions_df = pd.read_csv('sessions_users.csv')

		train_df = train_df.merge(sessions_df, how='left',
				left_index='id', right_index='user_id')
		train_df.fillna(0, inplace=True)

		test_df = test_df.merge(sessions_df, how='left',
				left_index='id', right_index='user_id')
		test_df.fillna(0, inplace=True)

	if write_to_csv:
		if include_sessions:
			test_df.to_csv('test_sessions.csv', index=False)
			train_df.to_csv('train_sessions.csv', index=False)
			print 'Wrote train_sessions.csv, test_sessions.csv'
		else:
			test_df.to_csv('test.csv', index=False)
			train_df.to_csv('train.csv', index=False)
			print 'Wrote train.csv, test.csv'
	if return_df:
		return test_df, train_df
from Preprocess import extract_face, get_embedding
from tensorflow.python.keras.models import load_model
from sklearn.preprocessing import Normalizer, LabelEncoder
import argparse
import pickle
import numpy as np

in_encoder = Normalizer()
out_encoder = LabelEncoder()
out_encoder.classes_ = np.load('classes.npy')
facenet_model = load_model('facenet_keras.h5')

with open('SVCtrainedModel.pkl', 'rb') as f:
    model = pickle.load(f)

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Test Image Path")
# ap.add_argument("-n", "--name", required=True,
# 	help="Name of the person (same as the class name)")
args = vars(ap.parse_args())

random_face = extract_face(args['image'])
random_face_emd = in_encoder.transform(
    [get_embedding(facenet_model, random_face)])[0]
# random_face_name = args['name']

samples = np.expand_dims(random_face_emd, axis=0)
yhat_class = model.predict(samples)
yhat_prob = model.predict_proba(samples)

class_index = yhat_class[0]
Esempio n. 18
0
def mark_your_attendance_out(request):

    detector = dlib.get_frontal_face_detector()

    predictor = dlib.shape_predictor(
        'face_recognition_data/shape_predictor_68_face_landmarks.dat'
    )  #Add path to the shape predictor ######CHANGE TO RELATIVE PATH LATER
    svc_save_path = "face_recognition_data/svc.sav"

    with open(svc_save_path, 'rb') as f:
        svc = pickle.load(f)
    fa = FaceAligner(predictor, desiredFaceWidth=96)
    encoder = LabelEncoder()
    encoder.classes_ = np.load('face_recognition_data/classes.npy')

    faces_encodings = np.zeros((1, 128))
    no_of_faces = len(svc.predict_proba(faces_encodings)[0])
    count = dict()
    present = dict()
    log_time = dict()
    start = dict()
    for i in range(no_of_faces):
        count[encoder.inverse_transform([i])[0]] = 0
        present[encoder.inverse_transform([i])[0]] = False

    vs = VideoStream(src=0).start()

    sampleNum = 0

    while (True):

        frame = vs.read()

        frame = imutils.resize(frame, width=800)

        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        faces = detector(gray_frame, 0)

        for face in faces:
            print("INFO : inside for loop")
            (x, y, w, h) = face_utils.rect_to_bb(face)

            face_aligned = fa.align(frame, gray_frame, face)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 1)

            (pred, prob) = predict(face_aligned, svc)

            if (pred != [-1]):

                person_name = encoder.inverse_transform(np.ravel([pred]))[0]
                pred = person_name
                if count[pred] == 0:
                    start[pred] = time.time()
                    count[pred] = count.get(pred, 0) + 1

                if count[pred] == 4 and (time.time() - start[pred]) > 1.5:
                    count[pred] = 0
                else:
                    #if count[pred] == 4 and (time.time()-start) <= 1.5:
                    present[pred] = True
                    log_time[pred] = datetime.datetime.now()
                    count[pred] = count.get(pred, 0) + 1
                    print(pred, present[pred], count[pred])
                cv2.putText(frame,
                            str(person_name) + str(prob), (x + 6, y + h - 6),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

            else:
                person_name = "unknown"
                cv2.putText(frame, str(person_name), (x + 6, y + h - 6),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

            #cv2.putText()
            # Before continuing to the next loop, I want to give it a little pause
            # waitKey of 100 millisecond
            #cv2.waitKey(50)

        #Showing the image in another window
        #Creates a window with window name "Face" and with the image img
        cv2.imshow("Mark Attendance- Out - Press q to exit", frame)
        #Before closing it we need to give a wait command, otherwise the open cv wont work
        # @params with the millisecond of delay 1
        #cv2.waitKey(1)
        #To get out of the loop
        key = cv2.waitKey(50) & 0xFF
        if (key == ord("q")):
            break

    #Stoping the videostream
    vs.stop()

    # destroying all the windows
    cv2.destroyAllWindows()
    update_attendance_in_db_out(present)
    # return redirect('admin')
    return render(request, 'recognition/admin_dashboard.html')
Esempio n. 19
0
'''

# convert Tag1 from strings to integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train['Tag1_enc'] = le.fit_transform(train.Tag1)

# create a dummy column for each value of Tag1_enc (returns a sparse matrix)
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
tag1_dummies = ohe.fit_transform(train[['Tag1_enc']])

# adjust Tag1 on testing set since LabelEncoder errors on new values during a transform
test['Tag1'] = test['Tag1'].map(lambda s: '<unknown>'
                                if s not in le.classes_ else s)
le.classes_ = np.append(le.classes_, '<unknown>')

# define X and y
X = tag1_dummies
y = train.OpenStatus

# apply the same encoding to the actual testing data and make predictions
test['Tag1_enc'] = le.transform(test.Tag1)
oos_tag1_dummies = ohe.transform(test[['Tag1_enc']])
nb.fit(X, y)
oos_pred_prob = nb.predict_proba(oos_tag1_dummies)[:, 1]
sub = pd.DataFrame({
    'id': test.index,
    'OpenStatus': oos_pred_prob
}).set_index('id')
sub.to_csv('sub4.csv')  # 0.652
if __name__ == '__main__':

    # TODO: ROS node initialization
    rospy.init_node('clustering', anonymous=True)

    # TODO: Create Subscribers
    pcl_sub = rospy.Subscriber("/sensor_stick/point_cloud", pc2.PointCloud2, pcl_callback, queue_size=1)

    # TODO: Create Publishers
    pcl_objects_pub = rospy.Publisher("/pcl_objects", pc2.PointCloud2, queue_size=1)
    pcl_table_pub = rospy.Publisher("/pcl_table", pc2.PointCloud2, queue_size=1)
    pcl_cluster_pub = rospy.Publisher("/pcl_cluster", pc2.PointCloud2, queue_size=1)

    object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1)
    detected_objects_pub = rospy.Publisher("/detected_objects", DetectedObjectsArray, queue_size=1)

    # TODO: Load Model From disk
    model = pickle.load(open('model.sav', 'rb'))
    clf = model['classifier']
    encoder = LabelEncoder()
    encoder.classes_ = model['classes']
    scaler = model['scaler']

    # Initialize color_list
    get_color_list.color_list = []

    # TODO: Spin while node is not shutdown
    while not rospy.is_shutdown():
        rospy.spin()
Esempio n. 21
0
import cv2 as cv
import numpy as np
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

face_cascade = cv.CascadeClassifier(
    '../cascades/haarcascade_frontalface_alt2.xml')
recognizer = cv.face.LBPHFaceRecognizer_create()
recognizer.read('../Saved_models/model.yml')

le = LabelEncoder()
le.classes_ = np.load('../Saved_models/encoder.npy')

cap = cv.VideoCapture(0)
while True:
    #...Capture frame by frame
    ret, frame = cap.read()
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

    #...Detecting faces and saving the final captured frame cropping face
    faces = face_cascade.detectMultiScale(gray,
                                          scaleFactor=1.5,
                                          minNeighbors=5)
    for x, y, w, h in faces:
        #print (x,y,w,h)
        roi_gray = gray[y:y + h, x:x + w]
        roi_color = frame[y:y + h, x:x + w]

        #img_item = 'face_detected.jpg'
Esempio n. 22
0
from flask import Flask, render_template, url_for
from flask_wtf import FlaskForm
from wtforms import FileField
from flask_uploads import configure_uploads, AUDIO, UploadSet
import os
import numpy as np
import librosa
from tensorflow import keras
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Load model and label encoder
model = keras.models.load_model("./saved_models/weights.best.basic_cnn.hdf5")
le = LabelEncoder()
le.classes_ = np.load("./saved_models/classes.npy")

# data dims
num_rows = 40
num_columns = 174
num_channels = 1

# CODE FOR FLASK APP

app = Flask(__name__, static_folder=os.path.join(os.getcwd(), "static"))
app.config['SECRET_KEY'] = "clishmaclaver"
app.config['UPLOADED_AUDIOSET_DEST'] = "static"

audioset = UploadSet("audioset", AUDIO)
configure_uploads(app, audioset)

    def plate_reader(plate_pic):
        # remove warning message
        # remove warning message
        import os
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

        # required library
        import cv2
        import numpy as np
        from local_utils import detect_lp
        from os.path import splitext,basename
        from keras.models import model_from_json
        from sklearn.preprocessing import LabelEncoder
        import glob

        def get_opencv_img_from_buffer(buffer, flags):
            bytes_as_np_array = np.frombuffer(buffer.read(), dtype=np.uint8)
            return cv2.imdecode(bytes_as_np_array, flags)


        def load_model(path):
            try:
                path = splitext(path)[0]
                with open('%s.json' % path, 'r') as json_file:
                    model_json = json_file.read()
                model = model_from_json(model_json, custom_objects={})
                model.load_weights('%s.h5' % path)
                print("Loading model successfully...")
                return model
            except Exception as e:
                print(e)

                
        wpod_net_path = "wpod-net.json"
        wpod_net = load_model(wpod_net_path)

        def preprocess_image(image_path,resize=False):
            # print(image_path)
            # img = cv2.imread(image_path)
            img = image_path
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img / 255
            if resize:
                img = cv2.resize(img, (255,255))
            return img

        def get_plate(image_path, Dmax=608, Dmin=256):
            vehicle = preprocess_image(image_path)
            ratio = float(max(vehicle.shape[:2])) / min(vehicle.shape[:2])
            side = int(ratio * Dmin)
            bound_dim = min(side, Dmax)
            _ , LpImg, _, cor = detect_lp(wpod_net, vehicle, bound_dim, lp_threshold=0.5)
            return vehicle, LpImg, cor

        # test_image_path = "Plate_examples/usa_car_plate.jpg"
        test_image_path = get_opencv_img_from_buffer(plate_pic, cv2.IMREAD_UNCHANGED)
        vehicle, LpImg,cor = get_plate(test_image_path)

        # fig = plt.figure(figsize=(12,6))
        # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig)
        # fig.add_subplot(grid[0])
        # plt.axis(False)
        # plt.imshow(vehicle)
        # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig)
        # fig.add_subplot(grid[1])
        # plt.axis(False)
        # plt.imshow(LpImg[0])

        if (len(LpImg)): #check if there is at least one license image
            # Scales, calculates absolute values, and converts the result to 8-bit.
            plate_image = cv2.convertScaleAbs(LpImg[0], alpha=(255.0))
            
            # convert to grayscale and blur the image
            gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY)
            blur = cv2.GaussianBlur(gray,(7,7),0)
            
            # Applied inversed thresh_binary 
            binary = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,11,2) 
            
            kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
            thre_mor = cv2.morphologyEx(binary, cv2.MORPH_DILATE, kernel3)

            
        # # visualize results    
        # fig = plt.figure(figsize=(12,7))
        # plt.rcParams.update({"font.size":18})
        # grid = gridspec.GridSpec(ncols=2,nrows=3,figure = fig)
        # plot_image = [plate_image, gray, blur, binary,thre_mor]
        # # plot_image = [ gray]
        # plot_name = ["gray","blur","binary","dilation", 'thre_mor']

        # for i in range(len(plot_image)):
        #     fig.add_subplot(grid[i])
        #     plt.axis(False)
        #     plt.title(plot_name[i])
        #     if i ==0:
        #         plt.imshow(plot_image[i])
        #     else:
        #         plt.imshow(plot_image[i],cmap="gray")

        # plt.savefig("threshding.png", dpi=300)

        # Create sort_contours() function to grab the contour of each digit from left to right
        # Create sort_contours() function to grab the contour of each digit from left to right
        def sort_contours(cnts,reverse = False):
            i = 0
            boundingBoxes = [cv2.boundingRect(c) for c in cnts]
            (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                                key=lambda b: b[1][i], reverse=reverse))
            return cnts

        cont, _  = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # creat a copy version "test_roi" of plat_image to draw bounding box
        test_roi = plate_image.copy()

        # Initialize a list which will be used to append charater image
        crop_characters = []

        # define standard width and height of character
        digit_w, digit_h = 20, 50

        for c in sort_contours(cont):
            (x, y, w, h) = cv2.boundingRect(c)
            ratio = h/w
            if 1<=ratio<=3.5: # Only select contour with defined ratio
                if h/plate_image.shape[0]>=0.4: # Select contour which has the height larger than 40% of the plate
                    # Draw bounding box arroung digit number
                    cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 255,0), 2)

                    # Sperate number and gibe prediction
                    curr_num = thre_mor[y:y+h,x:x+w]
                    curr_num = cv2.resize(curr_num, dsize=(digit_w, digit_h))
                    _, curr_num = cv2.threshold(curr_num, 220, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                    crop_characters.append(curr_num)

        print("Detect {} letters...".format(len(crop_characters)))
        # fig = plt.figure(figsize=(10,6))
        # plt.axis(False)
        # plt.imshow(test_roi)
        #plt.savefig('grab_digit_contour.png',dpi=300)

        # fig = plt.figure(figsize=(14,4))
        # grid = gridspec.GridSpec(ncols=len(crop_characters),nrows=1,figure=fig)

        # for i in range(len(crop_characters)):
        #     fig.add_subplot(grid[i])
        #     plt.axis(False)
        #     plt.imshow(crop_characters[i],cmap="gray")
        #plt.savefig("segmented_leter.png",dpi=300)  


        # Load model architecture, weight and labels
        json_file = open('MobileNets_character_recognition.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        model = model_from_json(loaded_model_json)
        model.load_weights("License_character_recognition_weight.h5")
        print("[INFO] Model loaded successfully...")

        labels = LabelEncoder()
        labels.classes_ = np.load('license_character_classes.npy')
        print("[INFO] Labels loaded successfully...")
        # plt.savefig("threshding.png", dpi=300)

        # pre-processing input images and pedict with model
        def predict_from_model(image,model,labels):
            image = cv2.resize(image,(80,80))
            image = np.stack((image,)*3, axis=-1)
            prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))])
            return prediction
            
        # fig = plt.figure(figsize=(15,3))
        # cols = len(crop_characters)
        # grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig)

        final_string = ''

        for i,character in enumerate(crop_characters):
            # fig.add_subplot(grid[i])
            title = np.array2string(predict_from_model(character,model,labels))
            # plt.title('{}'.format(title.strip("'[]"),fontsize=20))
            final_string+=title.strip("'[]")
            # plt.axis(False)
            # plt.imshow(character,cmap='gray')


        print("Achieved result: ", final_string)

        # plt.savefig('final_result.png', dpi=300)
        print(type(final_string))
        return final_string
result = model.fit(image_gen.flow(trainX, trainY, batch_size=BATCH_SIZE), 
                   steps_per_epoch=len(trainX) // BATCH_SIZE, 
                   validation_data=(testX, testY), 
                   validation_steps=len(testX) // BATCH_SIZE, 
                   epochs=EPOCHS, callbacks=my_checkpointer)

# Load model architecture, weight and labels
json_file = open('/content/Plate_detect_and_recognize/MobileNets_character_recognition.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("/content/Plate_detect_and_recognize/License_character_recognition_weight.h5")
print("[INFO] Model loaded successfully...")

labels = LabelEncoder()
labels.classes_ = np.load('/content/Plate_detect_and_recognize/license_character_classes.npy')
print("[INFO] Labels loaded successfully...")

# pre-processing input images and pedict with model
def predict_from_model(image,model,labels):
    image = cv2.resize(image,(80,80))
    image = np.stack((image,)*3, axis=-1)
    prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))])
    return prediction

fig = plt.figure(figsize=(15,3))
cols = len(crop_characters)
grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig)

final_string = ''
for i,character in enumerate(crop_characters):
Esempio n. 25
0
def train_model(params, device):
    embeddings = None
    if (params['bert_tokens']):
        train, val, test = createDatasetSplit(params)
    else:
        train, val, test, vocab_own = createDatasetSplit(params)
        params['embed_size'] = vocab_own.embeddings.shape[1]
        params['vocab_size'] = vocab_own.embeddings.shape[0]
        embeddings = vocab_own.embeddings
    if (params['auto_weights']):
        y_test = [ele[2] for ele in test]
        #         print(y_test)
        encoder = LabelEncoder()
        encoder.classes_ = np.load(params['class_names'], allow_pickle=True)
        params['weights'] = class_weight.compute_class_weight(
            'balanced', np.unique(y_test), y_test).astype('float32')
        #params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32')

    batch_size_eval = min(params['batch_size'], 32)
    train_dataloader = combine_features(train, params, is_train=True)
    train_dataloader_eval = combine_features(train,
                                             params,
                                             is_train=True,
                                             batch_size=batch_size_eval)
    validation_dataloader = combine_features(val,
                                             params,
                                             is_train=False,
                                             batch_size=batch_size_eval)
    test_dataloader = combine_features(test,
                                       params,
                                       is_train=False,
                                       batch_size=batch_size_eval)

    model = select_model(params, embeddings)

    if (params["device"] == 'cuda'):
        model.cuda()
    optimizer = AdamW(
        model.parameters(),
        lr=params[
            'learning_rate'],  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=params['epsilon']  # args.adam_epsilon  - default is 1e-8.
    )

    # Number of training epochs (authors recommend between 2 and 4)
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * params['epochs']

    # Create the learning rate scheduler.
    if (params['bert_tokens']):
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(total_steps / 10),
            num_training_steps=total_steps)

    # Set the seed value all over the place to make this reproducible.
    fix_the_random(seed_val=params['random_seed'])
    # Store the average loss after each epoch so we can plot them.
    loss_values = []

    best_val_fscore = 0
    best_test_fscore = 0

    best_val_roc_auc = 0
    best_test_roc_auc = 0

    best_val_precision = 0
    best_test_precision = 0

    best_val_recall = 0
    best_test_recall = 0

    for epoch_i in range(0, params['epochs']):
        print("")
        print('======== Epoch {:} / {:} ========'.format(
            epoch_i + 1, params['epochs']))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0
        model.train()
        if params['bert_tokens']:
            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                      do_lower_case=False)
        # For each batch of training data...
        for step, batch in tqdm(enumerate(train_dataloader)):

            # Progress update every 40 batches.
            if step % 40 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)

            # `batch` contains three pytorch tensors:
            #   [0]: input ids
            #   [1]: attention vals
            #   [2]: attention mask
            #   [3]: labels
            b_input_ids = batch[0].to(device)
            b_att_val = batch[1].to(device)
            b_input_mask = batch[2].to(device)
            b_labels = batch[3].to(device)

            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()
            outputs = model(b_input_ids,
                            attention_vals=b_att_val,
                            attention_mask=b_input_mask,
                            labels=b_labels,
                            device=device)

            # The call to `model` always returns a tuple, so we need to pull the
            # loss value out of the tuple.

            loss = outputs[0]

            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value
            # from the tensor.
            batch_loss = loss.item()
            total_loss += batch_loss

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            # Update parameters and take a step using the computed gradient.
            # The optimizer dictates the "update rule"--how the parameters are
            # modified based on their gradients, the learning rate, etc.
            optimizer.step()
            # Update the learning rate.
            if (params['bert_tokens']):
                scheduler.step()

            if (params['logging'] == 'neptune'):
                neptune.log_metric('batch_loss', batch_loss)

        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)
        if (params['logging'] == 'neptune'):
            neptune.log_metric('avg_train_loss', avg_train_loss)
        else:
            print('avg_train_loss', avg_train_loss)

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)
        train_fscore, train_accuracy, train_precision, train_recall, train_roc_auc, _ = Eval_phase(
            params, 'train', model, train_dataloader_eval, device)
        val_fscore, val_accuracy, val_precision, val_recall, val_roc_auc, _ = Eval_phase(
            params, 'val', model, validation_dataloader, device)
        test_fscore, test_accuracy, test_precision, test_recall, test_roc_auc, logits_all_final = Eval_phase(
            params, 'test', model, test_dataloader, device)

        #Report the final accuracy for this validation run.
        if (params['logging'] == 'neptune'):
            neptune.log_metric('test_fscore', test_fscore)
            neptune.log_metric('test_accuracy', test_accuracy)
            neptune.log_metric('test_precision', test_precision)
            neptune.log_metric('test_recall', test_recall)
            neptune.log_metric('test_rocauc', test_roc_auc)

            neptune.log_metric('val_fscore', val_fscore)
            neptune.log_metric('val_accuracy', val_accuracy)
            neptune.log_metric('val_precision', val_precision)
            neptune.log_metric('val_recall', val_recall)
            neptune.log_metric('val_rocauc', val_roc_auc)

            neptune.log_metric('train_fscore', train_fscore)
            neptune.log_metric('train_accuracy', train_accuracy)
            neptune.log_metric('train_precision', train_precision)
            neptune.log_metric('train_recall', train_recall)
            neptune.log_metric('train_rocauc', train_roc_auc)

        if (val_fscore > best_val_fscore):
            print(val_fscore, best_val_fscore)
            best_val_fscore = val_fscore
            best_test_fscore = test_fscore
            best_val_roc_auc = val_roc_auc
            best_test_roc_auc = test_roc_auc

            best_val_precision = val_precision
            best_test_precision = test_precision
            best_val_recall = val_recall
            best_test_recall = test_recall

            if (params['bert_tokens']):
                save_bert_model(model, tokenizer, params)
            else:
                print("Saving model")
                save_normal_model(model, params)

    if (params['logging'] == 'neptune'):
        neptune.log_metric('best_val_fscore', best_val_fscore)
        neptune.log_metric('best_test_fscore', best_test_fscore)
        neptune.log_metric('best_val_rocauc', best_val_roc_auc)
        neptune.log_metric('best_test_rocauc', best_test_roc_auc)
        neptune.log_metric('best_val_precision', best_val_precision)
        neptune.log_metric('best_test_precision', best_test_precision)
        neptune.log_metric('best_val_recall', best_val_recall)
        neptune.log_metric('best_test_recall', best_test_recall)

        neptune.stop()
    else:
        print('best_val_fscore', best_val_fscore)
        print('best_test_fscore', best_test_fscore)
        print('best_val_rocauc', best_val_roc_auc)
        print('best_test_rocauc', best_test_roc_auc)
        print('best_val_precision', best_val_precision)
        print('best_test_precision', best_test_precision)
        print('best_val_recall', best_val_recall)
        print('best_test_recall', best_test_recall)

    del model
    torch.cuda.empty_cache()
    return 1
Esempio n. 26
0
def plot_confusion_matrix(cm, classes, path, encoder_model,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues
                          ):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    Taken straight vom SKLEARN.

    :param cm: confusion matrix generated by sklearn
    :param classes: range with lenght of classes
    :param encoder_model: encoder_model
    :param path: saving path
    """
    # Set figsize
    plt.figure(figsize=(5.8, 3.58))
    # change font size according to number of classes
    if len(classes) == 120:
        mpl.rcParams.update({'font.size': 3})
    else:
        mpl.rcParams.update({'font.size': 5})

    print("plot confusion matrix")

    path = path + '/build/'
    if not os.path.exists(path):
        os.makedirs(path)

    # Decode the class names
    path_to_labels = os.path.join(Path(os.path.abspath(__file__)).parents[2],
                                  "labels/")

    encoder_path = os.path.join(path_to_labels, encoder_model)
    encoder = LabelEncoder()
    encoder.classes_ = np.load(encoder_path)
    classes = encoder.inverse_transform(classes)
    classes = [cl.replace('_', ' ') for cl in classes]
    # Check if normalize is True, then scale the colorbar accordingly
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)

    if len(classes) == 120:
        plt.title(title, fontsize=12)
    else:
        plt.title(title)

    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, horizontalalignment='right')
    plt.yticks(tick_marks, classes)

    # print text if not 120 classes are given
    if len(classes) != 120:
        # Loop over data dimensions and create text annotations.
        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig("{}/confusion_matrix.pdf".format(path), dpi=500, pad_inches=0, bbox_inches='tight')
    plt.clf()
    # reset rcParams
    mpl.rcParams.update(mpl.rcParamsDefault)
Esempio n. 27
0
    # TODO: Create Publishers
    pcl_objects_pub = rospy.Publisher("/pcl_objects",
                                      PointCloud2,
                                      queue_size=1)
    pcl_table_pub = rospy.Publisher("/pcl_table", PointCloud2, queue_size=1)
    pcl_cluster_pub = rospy.Publisher('/pcl_cluster',
                                      PointCloud2,
                                      queue_size=1)
    object_markers_pub = rospy.Publisher("/object_markers",
                                         Marker,
                                         queue_size=1)
    detected_objects_pub = rospy.Publisher("/detected_objects",
                                           DetectedObjectsArray,
                                           queue_size=1)

    # Initialize color_list
    get_color_list.color_list = []

    # Initialize color_list
    get_color_list.color_list = []

    model = pickle.load(open('model.sav', 'rb'))
    clf = model['classifier']
    encoder = LabelEncoder()
    encoder.classes_ = model["classes"]
    scaler = model['scaler']

    # TODO: Spin while node is not shutdown
    while not rospy.is_shutdown():
        rospy.spin()
Esempio n. 28
0
        print(e)


wpod_net_path = "wpod-net.json"
wpod_net = load_model(wpod_net_path)

# Load model architecture, weight and labels
json_file = open('MobileNets_char_rec_18052021.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("character_recognition_18052021.h5")
print("[INFO] Model loaded successfully...")

labels = LabelEncoder()
labels.classes_ = np.load('character_classes.npy')
print("[INFO] Labels loaded successfully...")


def preprocess_image(image_path, resize=False):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img / 255
    if resize:
        img = cv2.resize(img, (224, 224))
    return img


def get_plate(image, Dmax=608, Dmin=608):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255
import cleaning
import numpy as np
import glob
import pandas as pd
import pickle
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer

with open('tokenizer/tokenize_b.pickle', 'rb') as handle:
    tokenize = pickle.load(handle)
modelFileLoad1 = open('models/model_b', 'rb')
modelFileLoad2 = open('models/model_m1', 'rb')
encoder = LabelEncoder()
encoder.classes_ = np.load('labelencoder/encoder_m1.npy')

fit_model1 = pickle.load(modelFileLoad1)
fit_model2 = pickle.load(modelFileLoad2)
from IPython.display import display
from tabulate import tabulate


def analyze_message(value):
    col_names = [
        'Station name', 'Train name', 'Category', 'Platform number', 'Is spam',
        'If delay'
    ]
    output = pd.DataFrame(columns=col_names)
    #print(value)
    a, b, d, t = cleaning.clean(value)
Esempio n. 30
0
    'classes.npy',
    label_encoder.classes_)  #Menyimpan label encoder dengan nama classes.npy

# In[18]:load the pre-trained model and predict the math symbol for an arbitrary image;
# the code below could be placed in a separate file
import keras.models  #Mengimpport library keras model
model2 = keras.models.load_model(
    "mathsymbols.model"
)  #Membuat variabel model2 untuk meload model yang telah di simpan tadi
print(model2.summary())  #Mencetak hasil model2

# In[19]:restore the class name to integer encoder
label_encoder2 = LabelEncoder(
)  # membuat variabel label encoder ke 2 dengan isian fungsi label encoder.
label_encoder2.classes_ = np.load(
    'classes.npy'
)  #Menambahkan method classess dengan data classess yang di eksport tadi


def predict(img_path):  #Membuat fumgsi predict dengan path img
    newimg = keras.preprocessing.image.img_to_array(
        pil_image.open(img_path)
    )  #Membuat variabel newimg dengam membuay immage menjadi array dan membuka data berdasarkan img path
    newimg /= 255.0  #Membagi data yang terdapat pada variabel newimg sebanyak 255

    # do the prediction
    prediction = model2.predict(
        newimg.reshape(1, 32, 32, 3)
    )  #Membuat variabel predivtion dengan isian variabel model2 menggunakan fungsi predic dengan syarat variabel newimg dengan data reshape

    # figure out which output neuron had the highest score, and reverse the one-hot encoding
Esempio n. 31
0
                if (val == number):
                    accuracy = accuracy
                else:
                    accuracy -= 1

            else:
                print('No LP detected')
                accuracy -= 1

        print(accuracy)


# Preparing pretrained model for vehicle recognition
wpod_net_path = "wpod-net.json"
wpod_net = load_model(wpod_net_path)

# Preparing pretrained model for vehicle recognition
json_file = open('MobileNets_character_recognition.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("License_character_recognition_weight.h5")
print("[INFO] Model loaded successfully...")

labels = LabelEncoder()
labels.classes_ = np.load('license_character_classes.npy')
print("[INFO] Labels loaded successfully...")

if __name__ == '__main__':
    Execute.sampleSingleLP()
Esempio n. 32
0
def predict(experiment_id, inputs):
    """Predict the class for a text using
    a trained model from an experiment."""
    # Get experiment config
    if experiment_id == 'latest':
        experiment_id = max(os.listdir(config.EXPERIMENTS_DIR))
    experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id)
    experiment_config = utils.load_json(
        os.path.join(experiment_dir, 'config.json'))
    args = Namespace(**experiment_config)

    # Tokenizers
    texts = [sample['text'] for sample in inputs]
    with open(os.path.join(experiment_dir, 'X_tokenizer.json'), 'r') as fp:
        X_tokenizer = tokenizer_from_json(json.load(fp))
    y_tokenizer = LabelEncoder()
    y_tokenizer.classes_ = np.load(os.path.join(experiment_dir,
                                                'y_tokenizer.npy'),
                                   allow_pickle=True)

    # Create dataset generator
    X_infer = np.array(X_tokenizer.texts_to_sequences(texts))
    preprocessed_texts = X_tokenizer.sequences_to_texts(X_infer)
    y_filler = np.array([0] * len(X_infer))
    inference_generator = data.DataGenerator(X=X_infer,
                                             y=y_filler,
                                             batch_size=args.batch_size,
                                             max_filter_size=max(
                                                 args.filter_sizes))

    # Load model
    model = models.TextCNN(embedding_dim=args.embedding_dim,
                           vocab_size=len(X_tokenizer.word_index) + 1,
                           num_filters=args.num_filters,
                           filter_sizes=args.filter_sizes,
                           hidden_dim=args.hidden_dim,
                           dropout_p=args.dropout_p,
                           num_classes=len(y_tokenizer.classes_))
    model.summary(input_shape=(10, ))  # build it
    model_path = os.path.join(experiment_dir, 'model/cp.ckpt')
    model.load_weights(model_path)

    # Conv output model
    conv_outputs_model = models.ConvOutputsModel(
        vocab_size=len(X_tokenizer.word_index) + 1,
        embedding_dim=args.embedding_dim,
        filter_sizes=args.filter_sizes,
        num_filters=args.num_filters)
    conv_outputs_model.summary(input_shape=(10, ))  # build it

    # Set weights
    conv_outputs_model.layers[0].set_weights(model.layers[0].get_weights())
    conv_layer_start_num = 1
    for layer_num in range(conv_layer_start_num,
                           conv_layer_start_num + len(args.filter_sizes)):
        conv_outputs_model.layers[layer_num].set_weights(
            model.layers[layer_num].get_weights())

    # Predict
    results = []
    y_prob = model.predict(x=inference_generator, verbose=1)
    conv_outputs = conv_outputs_model.predict(x=inference_generator, verbose=1)
    for index in range(len(X_infer)):
        results.append({
            'raw_input':
            texts[index],
            'preprocessed_input':
            preprocessed_texts[index],
            'probabilities':
            get_probability_distribution(y_prob[index], y_tokenizer.classes_),
            'top_n_grams':
            get_top_n_grams(tokens=preprocessed_texts[index].split(' '),
                            conv_outputs=conv_outputs,
                            filter_sizes=args.filter_sizes)
        })

    return results
Esempio n. 33
0
    def from_estimator(
        cls,
        estimator,
        X,
        *,
        grid_resolution=100,
        eps=1.0,
        plot_method="contourf",
        response_method="auto",
        xlabel=None,
        ylabel=None,
        ax=None,
        **kwargs,
    ):
        """Plot decision boundary given an estimator.

        Read more in the :ref:`User Guide <visualizations>`.

        .. versionadded:: 1.0

        Parameters
        ----------
        estimator : object
            Trained estimator used to plot the decision boundary.

        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
            Input data that should be only 2-dimensional.

        grid_resolution : int, default=100
            Number of grid points to use for plotting decision boundary.
            Higher values will make the plot look nicer but be slower to
            render.

        eps : float, default=1.0
            Extends the minimum and maximum values of X for evaluating the
            response function.

        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
            Plotting method to call when plotting the response. Please refer
            to the following matplotlib documentation for details:
            :func:`contourf <matplotlib.pyplot.contourf>`,
            :func:`contour <matplotlib.pyplot.contour>`,
            :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.

        response_method : {'auto', 'predict_proba', 'decision_function', \
                'predict'}, default='auto'
            Specifies whether to use :term:`predict_proba`,
            :term:`decision_function`, :term:`predict` as the target response.
            If set to 'auto', the response method is tried in the following order:
            :term:`predict_proba`, :term:`decision_function`, :term:`predict`.

        xlabel : str, default=None
            The label used for the x-axis. If `None`, an attempt is made to
            extract a label from `X` if it is a dataframe, otherwise an empty
            string is used.

        ylabel : str, default=None
            The label used for the y-axis. If `None`, an attempt is made to
            extract a label from `X` if it is a dataframe, otherwise an empty
            string is used.

        ax : Matplotlib axes, default=None
            Axes object to plot on. If `None`, a new figure and axes is
            created.

        **kwargs : dict
            Additional keyword arguments to be passed to the
            `plot_method`.

        Returns
        -------
        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
            Object that stores the result.

        See Also
        --------
        DecisionBoundaryDisplay : Decision boundary visualization.
        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix
            given an estimator, the data, and the label.
        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix
            given the true and predicted labels.

        Examples
        --------
        >>> import matplotlib.pyplot as plt
        >>> from sklearn.datasets import load_iris
        >>> from sklearn.linear_model import LogisticRegression
        >>> from sklearn.inspection import DecisionBoundaryDisplay
        >>> iris = load_iris()
        >>> X = iris.data[:, :2]
        >>> classifier = LogisticRegression().fit(X, iris.target)
        >>> disp = DecisionBoundaryDisplay.from_estimator(
        ...     classifier, X, response_method="predict",
        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
        ...     alpha=0.5,
        ... )
        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
        <...>
        >>> plt.show()
        """
        check_matplotlib_support(f"{cls.__name__}.from_estimator")

        if not grid_resolution > 1:
            raise ValueError("grid_resolution must be greater than 1. Got"
                             f" {grid_resolution} instead.")

        if not eps >= 0:
            raise ValueError(
                f"eps must be greater than or equal to 0. Got {eps} instead.")

        possible_plot_methods = ("contourf", "contour", "pcolormesh")
        if plot_method not in possible_plot_methods:
            avaliable_methods = ", ".join(possible_plot_methods)
            raise ValueError(
                f"plot_method must be one of {avaliable_methods}. "
                f"Got {plot_method} instead.")

        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)

        x0_min, x0_max = x0.min() - eps, x0.max() + eps
        x1_min, x1_max = x1.min() - eps, x1.max() + eps

        xx0, xx1 = np.meshgrid(
            np.linspace(x0_min, x0_max, grid_resolution),
            np.linspace(x1_min, x1_max, grid_resolution),
        )
        X_for_pred = np.c_[xx0.ravel(), xx1.ravel()]
        if isinstance(X, pd.DataFrame):
            X_for_pred = pd.DataFrame(X_for_pred, columns=X.columns)

        pred_func = _check_boundary_response_method(estimator, response_method)
        response = pred_func(X_for_pred)

        if response_method == "predict":
            label_encoder = LabelEncoder()
            label_encoder.classes_ = estimator.classes_
            response = label_encoder.transform(response)

        if response.ndim != 1:
            if response.shape[1] != 2:
                raise ValueError(
                    "Multiclass classifiers are only supported when "
                    "response_method='predict'")
            response = response[:, 1]

        if xlabel is not None:
            xlabel = xlabel
        else:
            xlabel = X.columns[0] if hasattr(X, "columns") else ""

        if ylabel is not None:
            ylabel = ylabel
        else:
            ylabel = X.columns[1] if hasattr(X, "columns") else ""

        display = DecisionBoundaryDisplay(
            xx0=xx0,
            xx1=xx1,
            response=response.reshape(xx0.shape),
            xlabel=xlabel,
            ylabel=ylabel,
        )
        return display.plot(ax=ax, plot_method=plot_method, **kwargs)
Esempio n. 34
0
def learn_main_arg_node(node_df,
                        syntax_dict,
                        node_dict,
                        precalc_features=None,
                          feature_list=None,
                          label_features=None):
    """ Learn a classifier for a node being arg0 or arg1

    :param node_df: node data with tree and node ids
    :type node_df: pd.DataFrame
    :param syntax_dict: to look up the syntax trees by their id
    :type syntax_dict: dict
    :param node_dict: to look up the nodes by their id
    :type node_dict: dict
    :param precalc_features: precalculated features to save computation time in development
    :param precalc_features: pd.DataFrame
    :param feature_list: Names of the features that shall be calculated
    :type feature_list: list
    :param label_features: Names of features that are discrete
    :type label_features: list
    :return: All data that is needed to classifiy new data with the classifiers
    LogisticRegression classifiers from scikit learn, the list of features and label
    features, as well as encoders for the labels and a binary encoder and a featurizer method
             {'logit_arg0_clf': logit_arg0_clf,
              'logit_arg1_clf': logit_arg1_clf,
              'feature_list': feature_list,
              'label_features': label_features,
              'label_encoder': le,
              'binary_encoder': ohe,
              'node_featurizer': featurizer}
    :rtype: dict
    """

    def featurizer(node_df, syntax_dict, node_dict):
        return node_feature_dataframe(node_df, node_featurizer,
                                      syntax_dict=syntax_dict,
                                      node_dict=node_dict,
                                      feature_list=feature_list)

    if precalc_features is None:
        print 'Calculating features'
        features = featurizer(node_df, syntax_dict, node_dict)
        print 'done'
    else:
        features = precalc_features

    # We need to encode the non-numerical labels
    print 'Encoding labels...'
    le = LabelEncoder()
    # LabelEncoder only deals with 1 dim np.arrays
    le.fit(features[label_features].values.ravel())
    # Dealing with unknowns
    le.classes_ = np.append(le.classes_, '<unknown>')
    encoded_features = encode_label_features(features, le, label_features)
    print 'Encoded label'
    # We need to binarize the data for logistic regression
    print 'Binarizing features for logistic regression...'
    ohe = OneHotEncoder(sparse=False)
    ohe.fit(encoded_features[label_features].values)
    logit_features = binarize_features(encoded_features, ohe, label_features)
    print 'Binarized features.'

    print 'Training classifiers for arg0 labeling'
    print '======================================'
    nr_of_nodes = float(len(node_df))
    baseline = (nr_of_nodes - sum(node_df['is_arg0_node'])) / nr_of_nodes
    print 'Majority baseline: %f' % baseline
    print 'Cross validating Logistic regression classifier...'
    # C is the inverse of the regularization strength
    # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
    logit_arg0_clf = LogisticRegression(C=1.0)
    scores = cross_val_score(logit_arg0_clf, logit_features,
                             node_df['is_arg0_node'], cv=5)
    print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \
          '%f' % (str(scores), scores.mean())

    print ''
    print 'Training classifiers for arg1 labeling'
    print '======================================'
    baseline = (nr_of_nodes - sum(node_df['is_arg1_node'])) / nr_of_nodes
    print 'Majority baseline: %f' % baseline
    print 'Cross validating Logistic regression classifier...'
    # C is the inverse of the regularization strength
    logit_arg1_clf = LogisticRegression(C=1.0)
    scores = cross_val_score(logit_arg1_clf, logit_features,
                             node_df['is_arg1_node'], cv=5)
    print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \
          '%f' % (
              str(scores), scores.mean())

    print 'Learning classifiers on the whole data set...'
    logit_arg0_clf.fit(logit_features, node_df['is_arg0_node'])
    logit_arg1_clf.fit(logit_features, node_df['is_arg1_node'])
    print 'Learned classifier on the whole data set'


    # ToDo: Design features (see Lin et al p. 17, Connective_syntactic!)

    # ToDo: Evaluate this method (remember not to count punctuation)
    # ToDo: Get baseline by labeling everything after the connective as
    # arg0, everything else as arg1
    # ToDo: Get baseline for previous sentence by labeling the full sentence
    #  as arg1.

    return_dict = {'logit_arg0_clf': logit_arg0_clf,
                   'logit_arg1_clf': logit_arg1_clf,
                   'feature_list': feature_list,
                   'label_features': label_features,
                   'label_encoder': le,
                   'binary_encoder': ohe,
                   'node_featurizer': featurizer}
    return return_dict
Esempio n. 35
0
if __name__ == '__main__':
    input_dir = 'play'

    images = load_images(input_dir)
    cropped_images = list()

    for i in range(len(images)):
        cropped_images.append(detect_face(images[i]))

    face_model = load_model(os.path.join('model', 'facenet_keras.h5'))
    cropped_images = get_embedded_data(face_model, cropped_images)

    cropped_images = normalize(cropped_images)

    model = joblib.load(os.path.join('model', 'svm_model.sav'))

    pred_test = model.predict(cropped_images)
    pred_proba = model.predict_proba(cropped_images)

    label_encode = LabelEncoder()
    label_encode.classes_ = np.load(os.path.join('model', 'classes.npy'))
    predicted_names = label_encode.inverse_transform(pred_test)

    for i, image in enumerate(images):
        # plt.figure()
        plt.imshow(image)
        plt.title("Predicted: " + predicted_names[i] + " with " +
                  str(round(pred_proba[i][pred_test[i]] * 100, 2)) +
                  "% confidence.")
        plt.show()
Esempio n. 36
0
def model():
    def load_model(path):
        try:
            path = splitext(path)[0]
            with open('%s.json' % path, 'r') as json_file:
                model_json = json_file.read()
            model = model_from_json(model_json, custom_objects={})
            model.load_weights('%s.h5' % path)
            print("Loading model successfully...")
            return model
        except Exception as e:
            print(e)

    wpod_net_path = "wpod-net.json"
    wpod_net = load_model(wpod_net_path)

    def preprocess_image(image_path,resize=False):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img / 255
        if resize:
            img = cv2.resize(img, (224,224))
        return img

    def get_plate(image_path):
        Dmax = 608
        Dmin = 288
        vehicle = preprocess_image(image_path)
        ratio = float(max(vehicle.shape[:2])) / min(vehicle.shape[:2])
        side = int(ratio * Dmin)
        bound_dim = min(side, Dmax)
        _ , LpImg, _, cor = detect_lp(wpod_net, vehicle, bound_dim, lp_threshold=0.5)
        return vehicle, LpImg, cor

    test_image_path = "images/numberplate_image.png"
    vehicle, LpImg,cor = get_plate(test_image_path)

    # fig = plt.figure(figsize=(12,6))
    # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig)
    # fig.add_subplot(grid[0])
    # plt.axis(False)
    # plt.imshow(vehicle)
    # grid = gridspec.GridSpec(ncols=2,nrows=1,figure=fig)
    # fig.add_subplot(grid[1])
    # plt.axis(False)
    # plt.imshow(LpImg[0])
    # plt.show()

    if (len(LpImg)): 
        plate_image = cv2.convertScaleAbs(LpImg[0], alpha=(255.0))
        gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray,(7,7),0)
        binary = cv2.threshold(blur, 180, 255,cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
        kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        thre_mor = cv2.morphologyEx(binary, cv2.MORPH_DILATE, kernel3)

            
    # fig = plt.figure(figsize=(12,7))
    # plt.rcParams.update({"font.size":18})
    # grid = gridspec.GridSpec(ncols=2,nrows=3,figure = fig)
    # plot_image = [plate_image, gray, blur, binary,thre_mor]
    # plot_name = ["plate_image","gray","blur","binary","dilation"]

    # for i in range(len(plot_image)):
    #     fig.add_subplot(grid[i])
    #     plt.axis(False)
    #     plt.title(plot_name[i])
    #     if i ==0:
    #         plt.imshow(plot_image[i])
    #     else:
    #         plt.imshow(plot_image[i],cmap="gray")
    # plt.show()

    def sort_contours(cnts,reverse = False):
        i = 0
        boundingBoxes = [cv2.boundingRect(c) for c in cnts]
        (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),key=lambda b: b[1][i], reverse=reverse))
        return cnts
    #List approx and External approx can be changed here.
    #Code added to make contour into list if only external contour is detected.
    cont, _  = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if len(cont) <= 2:
        #IT NEEDS TO DEBUGGED FOR CASE OF VARIOUS CHARS.
        cont, _  = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        print("NOTE: USING LIST CONTOURS")
    #Code to Debug the Contours on the plate image.
    '''print(len(cont))
    imagge = LpImg[0].copy()
    bxmi = sort_contours(cont)
    cv2.drawContours(imagge, bxmi, -1, (0,255,0), 3)
    cv2.imshow('Conto', imagge)
    cv2.waitKey(0)
    cv2.destroyAllWindows()'''
    test_roi = plate_image.copy()
    crop_characters = []
    digit_w, digit_h = 30, 60
    for c in sort_contours(cont):
        (x, y, w, h) = cv2.boundingRect(c)
        ratio = h/w
        #The ratio is used to make sure the number plate is not detected play with it in a way that numberplate is not detected.
        #Original value was set to 3.5 it was increased due to INDIAN numberplate fashion.
        if 1<=ratio<=5: 
            #Original value was 0.5
            #0.4 is more compaitable and 0.5 tweaking between them will workout, Basically it is used to check the number/cropped image shape ratio
            if h/plate_image.shape[0]>=0.4: 
                cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 255,0), 2)
                curr_num = thre_mor[y:y+h,x:x+w]
                curr_num = cv2.resize(curr_num, dsize=(digit_w, digit_h))
                _, curr_num = cv2.threshold(curr_num, 220, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                crop_characters.append(curr_num)

    print("Detect {} letters...".format(len(crop_characters)))
    # fig = plt.figure(figsize=(10,6))
    # plt.axis(False)
    # plt.imshow(test_roi)
    # plt.show()

    # fig = plt.figure(figsize=(14,4))
    # grid = gridspec.GridSpec(ncols=len(crop_characters),nrows=1,figure=fig)

    # for i in range(len(crop_characters)):
    #     fig.add_subplot(grid[i])
    #     plt.axis(False)
    #     plt.imshow(crop_characters[i],cmap="gray")
    # plt.show()

    json_file = open('MobileNets_character_recognition.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights("License_character_recognition_weight.h5")
    print(" Model loaded successfully...")

    labels = LabelEncoder()
    labels.classes_ = np.load('license_character_classes.npy')
    print(" Labels loaded successfully...")

    def predict_from_model(image,model,labels):
        image = cv2.resize(image,(80,80))
        image = np.stack((image,)*3, axis=-1)
        prediction = labels.inverse_transform([np.argmax(model.predict(image[np.newaxis,:]))])
        return prediction

    fig = plt.figure(figsize=(15,3))
    cols = len(crop_characters)
    grid = gridspec.GridSpec(ncols=cols,nrows=1,figure=fig)

    final_string = ''
    for i,character in enumerate(crop_characters):
        fig.add_subplot(grid[i])
        title = np.array2string(predict_from_model(character,model,labels))
        plt.title('{}'.format(title.strip("'[]"),fontsize=20))
        final_string+=title.strip("'[]")
        plt.axis(False)
        # plt.imshow(character,cmap='gray')

    print(final_string)
    today = str(date.today())

    data = {
        'number': final_string,
        'date_created': today
    }

    json_object = json.dumps(data, indent=2)

    try:
        with open("output.json", "w") as outfile: 
            outfile.write(json_object) 
        print('JSON Object sucessfully exported')
        os.remove('file_modifier/file')
        checker()
    except:
        print('Error working with json')
Esempio n. 37
0
        cosine = F.linear(x_norm, w_norm, None)
        out = cosine * self.scale
        return out


resume = sys.argv[1]
encoder = sys.argv[2]
x_test = sys.argv[3]

train_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/train_bb_fastai2/"
test_df = "/home/blcv/CODE/Kaggle/humpback_whale_identification/data/processed/sample_submission.csv"
test_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/test_bb_fastai2/"
option_da = ['gray']  # [] #

label_encoder = LabelEncoder()
label_encoder.classes_ = np.load(encoder)
# encode whale as integers
X_test = pd.read_csv(x_test)
val_loader = getDataLoader(X_test,
                           train_folder,
                           'val',
                           option_da=option_da,
                           image_size=224,
                           batch_size=64)

# model preparation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = 'se_resnext101_32x4d'
model = pretrainedmodels.__dict__[model_name](num_classes=1000,
                                              pretrained='imagenet')
model.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
Esempio n. 38
0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder

x_train = np.load("./preprocessed/x_train.npy")
y_train = np.load("./preprocessed/y_train.npy")
x_test = np.load("./preprocessed/x_test.npy")
y_test = np.load("./preprocessed/y_test.npy")
yy = np.load("./preprocessed/yy.npy")
le = LabelEncoder()
le.classes_ = np.load("./preprocessed/classes.npy")

# MODEL

num_rows = 40
num_columns = 174
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

def construct_model():
	# Construct model 
Esempio n. 39
0
# convert Tag1 from strings to integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train['Tag1_enc'] = le.fit_transform(train.Tag1)

# confirm that the conversion worked
train.Tag1.value_counts().head()
train.Tag1_enc.value_counts().head()

# create a dummy column for each value of Tag1_enc (returns a sparse matrix)
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
tag1_dummies = ohe.fit_transform(train[['Tag1_enc']])
tag1_dummies

# try a Naive Bayes model with tag1_dummies as the features
cross_val_score(nb, tag1_dummies, train.OpenStatus, scoring='log_loss', cv=10).mean()   # 0.650

# adjust Tag1 on testing set since LabelEncoder errors on new values during a transform
test['Tag1'] = test['Tag1'].map(lambda s: '<unknown>' if s not in le.classes_ else s)
import numpy as np
le.classes_ = np.append(le.classes_, '<unknown>')

# apply the same encoding to the actual testing data and make predictions
test['Tag1_enc'] = le.transform(test.Tag1)
oos_tag1_dummies = ohe.transform(test[['Tag1_enc']])
nb.fit(tag1_dummies, train.OpenStatus)
oos_pred_prob = nb.predict_proba(oos_tag1_dummies)[:, 1]
sub = pd.DataFrame({'id':test.index, 'OpenStatus':oos_pred_prob}).set_index('id')
sub.to_csv('sub5.csv')  # 0.649
Esempio n. 40
0
import click
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

from keras.models import load_model
from audio_much.core import build_beat_audio_feature_sequences
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
import collections

encoder = LabelEncoder()
encoder.classes_ = np.load('songs_training_data_classes.npy')

lstm_model = load_model('model_raw_22050_lstm_02.h5')


def do_prediction_lstm(lstm_model, song_path, target_sample_rate=44100):
    X, sample_rate = librosa.load(song_path,
                                  sr=target_sample_rate,
                                  res_type='kaiser_fast')
    hop_length = 512
    tempo, beats = librosa.beat.beat_track(y=X,
                                           sr=target_sample_rate,
                                           hop_length=hop_length)
    float_audio_segments = build_beat_audio_feature_sequences(
        X, target_sample_rate, tempo)
    timeseries_length = 16
    batch_size = float_audio_segments.shape[0] - timeseries_length
    features = np.zeros((batch_size, timeseries_length, 12), dtype=np.float64)