def runTrainDs(model, optimizer, dsLoad_train_train): print("running --> runTrainDs", datetime.datetime.now().strftime("%H:%M:%S")) t = time.time() running_acc = [] idShuffle = [] cnt = 0 for sample in dsLoad_train_train: images = sample['image'].cuda() labels = sample['label'].cuda() ids = sample['id'].cuda() cnt += len(labels) optimizer.zero_grad() outputs = model(images) crit = torch.nn.CrossEntropyLoss() loss = crit(outputs, labels) loss.backward() optimizer.step() _, indices = outputs.max(1) acc = ((indices == labels).cpu().numpy().astype(dtype=np.float)) for x in acc: running_acc.append(x) idShuffle = idShuffle + ids.tolist() tr_acc_run = (np.sum(np.array(running_acc)) / len(np.array(running_acc))).astype(dtype=np.float) elapsed = time.time() - t funcH.removeLastLine() print('runTrainDs completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) return tr_acc_run, idShuffle
def runTrainDs(model, optimizer, dsLoad_train): print("running --> runTrainDs", datetime.datetime.now().strftime("%H:%M:%S")) t = time.time() tr_acc_run = 0 elapsed = time.time() - t funcH.removeLastLine() print('runTrainDs completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) return tr_acc_run
def runValidDs(model, dsLoad_valid_test, return_feats=True, layerSize=512, dataIdentStr=""): predictions = [] labels_all = [] print("running --> runValidDs(", dataIdentStr, "return_feats=", str(return_feats), ", layerSize=", str(layerSize), ")", datetime.datetime.now().strftime("%H:%M:%S")) t = time.time() elapsed = time.time() - t acc = 0 funcH.removeLastLine() print('runValidDs(return_feats=', str(return_feats), ' completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) return acc, predictions, labels_all
def runValidDs(model, dsLoad_valid_test, return_feats=True, layerSize=512, dataIdentStr=""): epoc_acc = [] features_avgPool = [] predictions = [] labels_all = [] print("running --> runValidDs(", dataIdentStr, "return_feats=", str(return_feats), ", layerSize=", str(layerSize), ")", datetime.datetime.now().strftime("%H:%M:%S")) t = time.time() for sample in dsLoad_valid_test: images = sample['image'].cuda() labels = sample['label'].cuda() # ids = sample['id'].cuda() outputs = model(images) x, indices = outputs.max(1) acc = ((indices == labels).cpu().numpy().astype(dtype=np.float)) epoc_acc += acc.tolist() #for x in acc: # epoc_acc.append(x) if return_feats: feats = extract_features(layerSize=layerSize, model=model, feature_layer_string='avgpool', images=images) features_avgPool = features_avgPool + feats predictions += indices.tolist() labels_all += labels.tolist() elapsed = time.time() - t acc = (np.sum(np.array(epoc_acc)) / len(np.array(epoc_acc))).astype(dtype=np.float) funcH.removeLastLine() print('runValidDs(return_feats=', str(return_feats), ' completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) return acc, predictions, labels_all, features_avgPool
def iterate_1(model, ds_loader, num_ftrs, ep, epochTo, epochStartTime, path_dict): model.eval() # Set model to evaluation mode acc_tra, pred_tra, labels_tra, _ = runValidDs(model, ds_loader["train_te"], return_feats=False, layerSize=num_ftrs, dataIdentStr="train") acc_val, pred_val, labels_val, _ = runValidDs(model, ds_loader["valid"], return_feats=False, layerSize=num_ftrs, dataIdentStr="validation") acc_tes, pred_tes, labels_tes, _ = runValidDs(model, ds_loader["test"], return_feats=False, layerSize=num_ftrs, dataIdentStr="test") result_row = np.array([ep, acc_tra, acc_val, acc_tes]) print('ep={:d}/{:d}, acc_tra={:0.5f}, acc_val={:0.2f}, acc_tes={:0.2f}'. format(ep, epochTo, acc_tra, acc_val, acc_tes)) print('Epoch done in (', funcH.getElapsedTimeFormatted(time.time() - epochStartTime), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) print('*-*-*-*-*-*-*') results_dict = { "labels_tra": labels_tra, "labels_val": labels_val, "labels_tes": labels_tes, "pred_tra": pred_tra, "pred_val": pred_val, "pred_tes": pred_tes, } resultFileNameFull = os.path.join(path_dict["result_fold"], "ep{:03d}.npy".format(ep)) np.save(resultFileNameFull, results_dict, allow_pickle=True) return result_row
def iterate_1(featTrInit, labelsTrInit, predictionsTr, k, labelSaveFileName, ep, epochTo, trAccInit, epochStartTime, clusterModel='KMeans', initialLabelVec=None): labelsTrInit = np.asarray(labelsTrInit, dtype=int) predictionsTr = np.asarray(predictionsTr, dtype=int) if initialLabelVec is None else np.asarray(initialLabelVec, dtype=int) nmi_lab, acc_lab, nmi_lab_nz, acc_lab_nz, \ nmi_pred, acc_pred, nmi_pred_nz, acc_pred_nz, predClusters = \ calc_stats_on_iterate(featTrInit, labelsTrInit, predictionsTr, k, clusterModel) np.savez(labelSaveFileName, labelsTrInit=labelsTrInit, predClusters=predClusters, acc_lab=acc_lab, acc_lab_nonzero=acc_lab_nz, predictionsTr=predictionsTr) resultRow = np.array([ep, trAccInit, nmi_lab, nmi_lab_nz, acc_lab, acc_lab_nz, nmi_pred, nmi_pred_nz, acc_pred, acc_pred_nz]) print('ep={:d}/{:d}, trAccInit={:0.5f} - ' 'nmi_lab={:0.2f}, nmi_lab_nonzero={:0.2f}, acc_lab={:0.2f}, acc_lab_nonzero={:0.2f}, ' 'nmi_pred={:0.2f}, nmi_pred_nonzero={:0.2f}, acc_pred={:0.2f}, acc_pred_nonzero={:0.2f} '.format( ep, epochTo, trAccInit, nmi_lab, nmi_lab_nz, acc_lab, acc_lab_nz, nmi_pred, nmi_pred_nz, acc_pred, acc_pred_nz)) #elapsed time of epoch print('Epoch done in (', funcH.getElapsedTimeFormatted(time.time() - epochStartTime), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) print('*-*-*-*-*-*-*') return predClusters, resultRow
def learn_manifold(self, X, manifold_out_file_name=None): self.debug_string_out.clear() self.print_and_remember("Learning manifold(" + self.manifold_learner + ")" + str(datetime.now())) learn_time = time() if manifold_out_file_name is not None and isfile( manifold_out_file_name ): # check the learned manifold existance manifold_feats = np_load(manifold_out_file_name, allow_pickle=True) self.print_and_remember("Manifold loaded(" + manifold_out_file_name + ")") elif self.manifold_learner == 'UMAP': manifold_feats = UMAP(random_state=0, metric=self.dist_metric, n_components=self.manifold_dimension, n_neighbors=self.num_of_neighbours, min_dist=float( self.min_dist)).fit_transform(X) elif self.manifold_learner == 'LLE': manifold_feats = LocallyLinearEmbedding( n_components=self.manifold_dimension, n_neighbors=self.num_of_neighbours).fit_transform(X) elif self.manifold_learner == 'tSNE': manifold_feats = TSNE(n_components=self.manifold_dimension, random_state=0, verbose=0).fit_transform(X) elif self.manifold_learner == 'isomap': manifold_feats = Isomap( n_components=self.manifold_dimension, n_neighbors=self.num_of_neighbours).fit_transform(X) self.print_and_remember( "Time to learn manifold: " + str(funcH.getElapsedTimeFormatted(time() - learn_time))) if manifold_out_file_name is not None: np_save(manifold_out_file_name, manifold_feats, allow_pickle=True) self.print_and_remember("Manifold saved(" + manifold_out_file_name + ")") return manifold_feats, self.debug_string_out
def runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=512): cnt = 0 idSorted = [] epoc_acc = [] features_avgPool = [] labels_avgPool = [] predictions_avgPool = [] print("running --> runValidDs(return_feats=", str(return_feats), ", layerSize=", str(layerSize), ")", datetime.datetime.now().strftime("%H:%M:%S")) t = time.time() for sample in dsLoad_train_featExtract: images = sample['image'].cuda() labels = sample['label'].cuda() ids = sample['id'].cuda() outputs = model(images) _, indices = outputs.max(1) acc = ((indices == labels).cpu().numpy().astype(dtype=np.float)) for x in acc: epoc_acc.append(x) cnt += 1 if return_feats: feats = extract_features(layerSize=layerSize, model=model, feature_layer_string='avgpool', images=images) features_avgPool = features_avgPool + feats labels_avgPool = labels_avgPool + labels.tolist() predictions_avgPool = predictions_avgPool + indices.tolist() idSorted = idSorted + ids.tolist() elapsed = time.time() - t val_acc_epoch = (np.sum(np.array(epoc_acc)) / len(np.array(epoc_acc))).astype(dtype=np.float) funcH.removeLastLine() print('runValidDs(return_feats=', str(return_feats), ' completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S")) return val_acc_epoch, idSorted, features_avgPool, labels_avgPool, predictions_avgPool
def fit(self, X, post_analyze_distribution=False, verbose=1, random_state=0): df = pd_df(np_array(X)) curTol = 0.0001 if self.cluster_model == 'KMeans' else 0.01 max_iter = 300 if self.cluster_model == 'KMeans' else 200 numOf_1_sample_bins = 1 unique_clust_cnt = 1 expCnt = 0 while (unique_clust_cnt == 1 or numOf_1_sample_bins - expCnt > 0) and expCnt < self.max_try_cnt: t = time() if expCnt > 0: if numOf_1_sample_bins > 0: print("running ", self.cluster_model, " for the ", str(expCnt), " time due to numOf_1_sample_bins(", str(numOf_1_sample_bins), ")") if unique_clust_cnt == 1: print("running ", self.cluster_model, " for the ", str(expCnt), " time due to unique_clust_cnt==1") if verbose > 0: print('Clustering the featVec(', X.shape, ') with n_clusters(', str(self.n_clusters), ') and model = ', self.cluster_model, ", curTol(", str(curTol), "), max_iter(", str(max_iter), "), at ", datetime.now().strftime("%H:%M:%S")) self.kluster_centers = None self.predictedKlusters = None if self.cluster_model == 'KMeans': # default vals for kmeans --> max_iter=300, 1e-4 self.trained_model = KMeans(init='k-means++', n_clusters=self.n_clusters, n_init=20, tol=curTol, max_iter=max_iter, random_state=random_state).fit(df) self.predictedKlusters = self.trained_model.labels_.astype( float) self.kluster_centers = self.trained_model.cluster_centers_.astype( float) elif self.cluster_model == 'GMM_full': # default vals for gmm --> max_iter=100, 1e-3 self.trained_model = GaussianMixture( n_components=self.n_clusters, covariance_type='full', tol=curTol, random_state=random_state, max_iter=max_iter, reg_covar=1e-4).fit(df) _, log_resp = self.trained_model._e_step(X) self.predictedKlusters = log_resp.argmax(axis=1) elif self.cluster_model == 'GMM_diag': self.trained_model = GaussianMixture( n_components=self.n_clusters, covariance_type='diag', tol=curTol, random_state=random_state, max_iter=max_iter, reg_covar=1e-4).fit(df) _, log_resp = self.trained_model._e_step(X) self.predictedKlusters = log_resp.argmax(axis=1) elif self.cluster_model == 'Spectral': sc = SpectralClustering(n_clusters=self.n_clusters, affinity=self.spectral_affinity, random_state=random_state) self.trained_model = sc.fit(X) self.predictedKlusters = self.trained_model.labels_ self.kluster_centroids = get_cluster_centroids( X, self.predictedKlusters, kluster_centers=self.kluster_centers, verbose=0) if post_analyze_distribution: numOf_1_sample_bins, histSortedInv = analyzeClusterDistribution( self.predictedKlusters, self.n_clusters, verbose=verbose) unique_clust_cnt = len(np_unique(self.predictedKlusters)) curTol = curTol * 10 max_iter = max_iter + 50 expCnt = expCnt + 1 else: expCnt = self.max_try_cnt elapsed = time() - t if verbose > 0: print('Clustering done in (', getElapsedTimeFormatted(elapsed), '), ended at ', datetime.now().strftime("%H:%M:%S")) removeLastLine() if verbose > 0: print('Clustering completed with (', np_unique(self.predictedKlusters).shape, ') clusters, expCnt(', str(expCnt), ')') # elif 'OPTICS' in clusterModel: # N = featVec.shape[0] # min_cluster_size = int(np.ceil(N / (n_clusters * 4))) # pars = clusterModel.split('_') # 'OPTICS_hamming_dbscan', 'OPTICS_russellrao_xi' # # metricsAvail = np.sort(['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', # # 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', # # 'sokalsneath', 'sqeuclidean', 'yule', # # 'cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan']) # # cluster_methods_avail = ['xi', 'dbscan'] # clust = ClusterOPT(min_samples=50, xi=.05, min_cluster_size=min_cluster_size, metric=pars[1], cluster_method=pars[2]) # clust.fit(featVec) # predictedKlusters = cluster_optics_dbscan(reachability=clust.reachability_, # core_distances=clust.core_distances_, # ordering=clust.ordering_, eps=0.5) # n1 = np.unique(predictedKlusters) # print(clusterModel, ' found ', str(n1), ' uniq clusters') # predictedKlusters = predictedKlusters + 1 return self