def dbProcess(self, password, process='s', model='s', host='localhost', database='postgres', user='******', port=5432, dbase = 'imagedata_l'): # Connect to the database db = PostgresDB(password=password, host=host, database=DATABASE_NAME, user=user, port=port) conn = db.connect() if process == 's': self.dbSave(conn, model) print('Data saved successfully to the Database!') elif process == 'f': recs = self.dbFetch(conn,dbase) recs_flt = [] # Flatten the data structure and for rec in recs: recs_flt.append((rec[0],np.asarray(eval(rec[1])))) # if model == 'm': # print(recs) # for rec in recs: # recs_flt.append(np.asarray(eval(rec[1]))) # recs_flt.append((rec[0], [float(x) for y in rec[1] for x in y])) # elif model == 's': # for rec in recs: # recs_flt.append((rec[0], [[float(x) for x in y] for y in rec[1]])) # elif model == 'l' or model == 'h': # for rec in recs: # recs_flt.append((rec[0], [float(x) for x in rec[1]])) return recs_flt
def dbProcess(self, password, process='s', model='s', host='localhost', database='postgres', user='******', port=5432): # Connect to the database db = PostgresDB(password=password, host=host, database=database, user=user, port=port) conn = db.connect() if process == 's': self.dbSave(conn, model) print('Data saved successfully to the Database!') elif process == 'f': recs = self.dbFetch(conn, model) recs_flt = [] # Flatten the data structure and if model == 'm': print(recs) for rec in recs: recs_flt.append( (rec[0], [float(x) for y in rec[1] for x in y])) elif model == 's': for rec in recs: recs_flt.append( (rec[0], [[float(x) for x in y] for y in rec[1]])) elif model == 'l' or model == 'h': for rec in recs: recs_flt.append((rec[0], [float(x) for x in rec[1]])) return recs_flt
def get_model(args): if args.model == 'fasttext': model = FastTextModel(args.modelPath) elif args.model == 'nle': db = PostgresDB(util.read_db_config(args.db_cred)) model = NLEModel(args.modelPath, args.njobs, False, db) model.load_indexes() return model
def similarity(self, feature, technique, dbase, k, image, label=""): db = PostgresDB(password="******", database="mwdb") conn = db.connect() if conn is None: print("Can not connect to database") exit() # print(dbase) cursor = conn.cursor() cursor.execute("SELECT * FROM " + dbase) data = cursor.fetchall() image_id = [rec[0] for rec in data] similarity = {} if image in image_id: image_index = image_id.index(image) # print(image_index) image_data = np.asarray(eval(data[image_index][1])) else: print("Not Same Label") dbase = 'imagedata_' + feature label = label.replace(" ", "_") image_data = self.dbFetch(conn, dbase, "WHERE imageid = '{0}'".format(image)) image_data = self.queryImageNotLabel(image_data, feature, technique, label) similarity[image] = self.euclidean_distance(image_data, image_data) # print (image_id) for i in range(len(image_id)): image_cmp = np.asarray(eval(data[i][1])) # print(data[i][0]) # print(data[i][1]) # if self.metrics: # # similarity[row[0]] = 1- self.cosine_similarity(image, result) # similarity[image_id[i]] = 1 - st.pearsonr(image,image_cmp)[0] # # similarity[row[0]] = mean_squared_error(image,result) # # similarity[row[0]] = 0 - self.psnr(image,result) # else: similarity[image_id[i]] = self.euclidean_distance( image_data, image_cmp) # print(similarity) similarity = sorted(similarity.items(), key=lambda x: x[1], reverse=False) # print(similarity) self.dispImages(similarity, feature, technique, 11, k, label)
def run(): parser = argparse.ArgumentParser(description='Calcuclate embeddings') parser.add_argument('input', help='Input containing osm samples') parser.add_argument('output', help='File to safe embeddings in') parser.add_argument('--model', help='Embedding model to us', default="NLE", type=str) parser.add_argument('--ftmodel', help='Path to fasttext model', type=str, default="") parser.add_argument('--db_cred', help='Credentials for database', type=str, default="") parser.add_argument('--njobs', help='Number of threads to use', default=1, type=int) args = parser.parse_args() # create model if args.model == "fasttext": model = FastTextModel(args.ftmodel) elif args.model == 'NLE': db = PostgresDB(read_db_config(args.db_cred)) model = NLEModel(args.output, args.njobs, db) else: raise Exception("Model not found") data = read_samples(args.input) # train model model.train(data) # save model model.save_model(args.output + "_" + args.model)
from dimReduction import dimReduction from PostgresDB import PostgresDB arg = input( "Please enter the home directory for the images " "(Default: C:\\Users\\pylak\\Documents\\Fall 2019\\MWDB\\Project\\Dataset\\) : " ) if arg == '': arg = 'C:\\Users\\pylak\\Documents\\Fall 2019\\MWDB\\Project\\Dataset\\' dim = dimReduction(arg, '*.jpg') db = PostgresDB(password='******', host='localhost', database='mwdb', user='******', port=5432) conn = db.connect() # Create for M and PCA _, _, _, _ = dim.saveDim('m', 'pca', 'imagedata_m', 10, password="******", database="mwdb") bin_matrix, feature_matrix = dim.binMat(conn, 'imagedata_m_pca') # code to print # print(bin_matrix[0][0]) dim.imgViz(bin_matrix) for idx, sub in enumerate(feature_matrix): print('\nLatent Semantic {x}'.format(x=idx + 1)) for s in sub: print('Subject: ', s[0]) print('Weight: ', s[1])
def saveDim(self, feature, model, dbase, k, password='******', host='localhost', database='mwdb', user='******', port=5432, label=None, meta=False, negative_handle='n'): imageDB = imageProcess(self.dirpath) imgs = imageDB.dbProcess(password=password, process='f', model=feature, dbase=dbase) kmeans_model = 'kmeans_' + str(no_clusters) technique_model = feature + '_' + model if label is not None: filteredImage = imageDB.CSV(label) label = label.replace(" ", "_") dbase += '_' + model + '_' + label kmeans_model += '_' + label technique_model += '_' + label else: dbase += '_' + model # print(technique_model) imgs_data = [] imgs_meta = [] i = -1 while i < len(imgs) - 1: # print (x[1].shape) i += 1 if label is not None and imgs[i][0] not in filteredImage: # print("label") del imgs[i] i -= 1 continue if feature != "s": imgs_data.append(imgs[i][1].reshape((-1))) else: imgs_data.extend(imgs[i][1]) # print (image_cmp.shape) imgs_meta.append(imgs[i][0]) # print(i) # print(len(imgs)) #Handle Negative Value of NMF if feature == 'm': if negative_handle == 'h': imgs_data = self.hist(imgs_data) else: imgs_data = self.normalize(imgs_data) imgs_data = np.asarray(imgs_data) # print(imgs_data.shape) # print(imgs_meta) # imgs_meta = [x[0] if x[0] in filteredImage for x in imgs] imgs_zip = list(zip(imgs_meta, imgs_data)) db = PostgresDB(password=password, host=host, database=database, user=user, port=port) conn = db.connect() if meta: imageDB.createInsertMeta(conn) model = model.lower() if feature == "s": if imgs_data.shape[0] < no_clusters: Kmeans = KMeans_SIFT(imgs_data.shape[0] // 2) else: Kmeans = KMeans_SIFT(no_clusters) clusters = Kmeans.kmeans_process(imgs_data) # print (imgs_zip) imgs_data = Kmeans.newMatrixSift(imgs, clusters, kmeans_model) imgs_zip = list(zip(imgs_meta, imgs_data)) if model == 'nmf': w, h = self.nmf(imgs_data, k, technique_model) imgs_red = np.dot(imgs_data, h.T).tolist() print(np.asarray(w).shape) print(np.asarray(h).shape) imgs_sort = self.imgSort(w.T, imgs_meta) feature_sort = self.imgFeatureSort(h, imgs_zip) elif model == 'lda': w, h = self.lda(imgs_data, k, technique_model) imgs_red = np.dot(imgs_data, h.T).tolist() print(np.asarray(w).shape) print(np.asarray(h).shape) imgs_sort = self.imgSort(w.T, imgs_meta) feature_sort = self.imgFeatureSort(h, imgs_zip) elif model == 'pca': data, U, Vt = self.pca(imgs_data, k, technique_model) imgs_red = data.tolist() imgs_sort = self.imgSort(U.T, imgs_meta) feature_sort = self.imgFeatureSort(Vt, imgs_zip) elif model == 'svd': # print(imgs_data.shape) data, U, Vt = self.svd(imgs_data, k, technique_model) imgs_red = data.tolist() # print(im) # U[:,:self.k].dot(Sigma[:self.k, :self.k]).dot(V[:self.k,:]) imgs_sort = self.imgSort(U.T, imgs_meta) feature_sort = self.imgFeatureSort(Vt, imgs_zip) # print("=======================") # print(imgs_sort) # print("=======================") # print(feature_sort) # Process the reduced Images imgs_red = list(zip(imgs_meta, imgs_red)) # print (np.asarray(imgs_sort).shape) # print(img_sort) # print (np.asarray(feature_sort).shape) # imgs_red = self.convString(imgs_red) print(imgs_red) self.createInsertDB(dbase, imgs_red, conn) return imgs_sort, feature_sort
def saveDim(self, feature, model, dbase, k, password='******', host='localhost', database='postgres', user='******', port=5432, label=None, meta=True, negative_handle='n'): imageDB = imageProcess(self.dirpath) imgs = imageDB.dbProcess(password=password, process='f', model=feature, dbase=dbase) kmeans_model = 'kmeans_' + str(no_clusters) + '_' + feature technique_model = feature + '_' + model db = PostgresDB(password=password, host=host, database=database, user=user, port=port) conn = db.connect() if meta: imageDB.createInsertMeta(conn) if label is not None: imgs = imageDB.CSV(conn, dbase, label) label = label.replace(" ", "_") dbase += '_' + model + '_' + label kmeans_model += '_' + label technique_model += '_' + label else: dbase += '_' + model # print(technique_model) imgs_data = [] imgs_meta = [] for img in imgs: if feature == "s" or (feature == "m" and model in ("nmf", "lda")): imgs_data.extend(img[1]) else: imgs_data.append(img[1].reshape((-1))) # print (image_cmp.shape) imgs_meta.append(img[0]) # print(i) # print(len(imgs)) # print(imgs_meta) # print(len(imgs_meta)) #Handle Negative Value of NMF # if feature == 'm' and (model == 'lda' or model == 'nmf'): # print ("Normalize") # if negative_handle == 'h': # imgs_data = self.hist(imgs_data) # else: # imgs_data = self.normalize(imgs_data) imgs_data = np.asarray(imgs_data) # print(imgs_data.shape) # print(imgs_data.shape) # print(imgs_meta) # imgs_meta = [x[0] if x[0] in filteredImage for x in imgs] imgs_zip = list(zip(imgs_meta, imgs_data)) model = model.lower() if feature == "s" or (feature == "m" and model in ("nmf", "lda")): if imgs_data.shape[0] < no_clusters: Kmeans = KMeans_SIFT(imgs_data.shape[0] // 2) else: Kmeans = KMeans_SIFT(no_clusters) clusters = Kmeans.kmeans_process(imgs_data) # print (imgs_zip) imgs_data = Kmeans.newMatrixSift(imgs, clusters, kmeans_model) imgs_zip = list(zip(imgs_meta, imgs_data)) if model == 'nmf': w, h = self.nmf(imgs_data, k, technique_model) imgs_red = np.dot(imgs_data, h.T).tolist() # print(np.asarray(w).shape) # print(np.asarray(h).shape) imgs_sort = self.imgSort(w.T, imgs_meta) feature_sort = self.imgFeatureSort(h, imgs_zip) U = w Vt = h elif model == 'lda': w, h = self.lda(imgs_data, k, technique_model) imgs_red = np.dot(imgs_data, h.T).tolist() # print(np.asarray(w).shape) # print(np.asarray(h).shape) imgs_sort = self.imgSort(w.T, imgs_meta) feature_sort = self.imgFeatureSort(h, imgs_zip) U = w Vt = h elif model == 'pca': data, U, Vt = self.pca(imgs_data, k, technique_model) imgs_red = data.tolist() imgs_sort = self.imgSort(U.T, imgs_meta) feature_sort = self.imgFeatureSort(Vt, imgs_zip) elif model == 'svd': # print(imgs_data.shape) data, U, Vt = self.svd(imgs_data, k, technique_model) imgs_red = data.tolist() # print(im) # U[:,:self.k].dot(Sigma[:self.k, :self.k]).dot(V[:self.k,:]) # print(U.T.shape) # print(imgs_meta.shape) imgs_sort = self.imgSort(U.T, imgs_meta) feature_sort = self.imgFeatureSort(Vt, imgs_zip) # print("=======================") # print(imgs_sort) # print("=======================") # print(feature_sort) # Process the reduced Images imgs_red = list(zip(imgs_meta, imgs_red)) # print (np.asarray(imgs_sort).shape) # print(img_sort) # print (np.asarray(feature_sort).shape) self.createInsertDB(dbase, imgs_red, conn) return imgs_sort, feature_sort, U, Vt
from dimReduction import dimReduction from PostgresDB import PostgresDB arg = input( "Please enter the home directory for the images " "(Default: C:\\Users\\pylak\\Documents\\Fall 2019\\MWDB\\Project\\Phase1\\Hands_test2\\) : " ) if arg == '': arg = 'C:\\Users\\pylak\\Documents\\Fall 2019\\MWDB\\Project\\Phase1\\Hands_test2\\' dim = dimReduction(arg, '*.jpg') db = PostgresDB(password='******', host='localhost', database='postgres', user='******', port=5432) conn = db.connect() bin_matrix, feature_matrix = dim.binMat(conn, 'imagedata_m_pca') # code to print print(bin_matrix[0][0]) print(feature_matrix)