def runTrainDs(model, optimizer, dsLoad_train_train):
    print("running --> runTrainDs", datetime.datetime.now().strftime("%H:%M:%S"))
    t = time.time()
    running_acc = []
    idShuffle = []
    cnt = 0
    for sample in dsLoad_train_train:
        images = sample['image'].cuda()
        labels = sample['label'].cuda()
        ids = sample['id'].cuda()
        cnt += len(labels)

        optimizer.zero_grad()
        outputs = model(images)
        crit = torch.nn.CrossEntropyLoss()
        loss = crit(outputs, labels)
        loss.backward()
        optimizer.step()

        _, indices = outputs.max(1)
        acc = ((indices == labels).cpu().numpy().astype(dtype=np.float))
        for x in acc:
            running_acc.append(x)
        idShuffle = idShuffle + ids.tolist()

    tr_acc_run = (np.sum(np.array(running_acc)) / len(np.array(running_acc))).astype(dtype=np.float)
    elapsed = time.time() - t

    funcH.removeLastLine()
    print('runTrainDs completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S"))

    return tr_acc_run, idShuffle
예제 #2
0
def runTrainDs(model, optimizer, dsLoad_train):
    print("running --> runTrainDs", datetime.datetime.now().strftime("%H:%M:%S"))
    t = time.time()
    tr_acc_run = 0
    elapsed = time.time() - t
    funcH.removeLastLine()
    print('runTrainDs completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S"))
    return tr_acc_run
예제 #3
0
def runValidDs(model, dsLoad_valid_test, return_feats=True, layerSize=512, dataIdentStr=""):
    predictions = []
    labels_all = []
    print("running --> runValidDs(", dataIdentStr, "return_feats=", str(return_feats), ", layerSize=", str(layerSize), ")", datetime.datetime.now().strftime("%H:%M:%S"))
    t = time.time()
    elapsed = time.time() - t
    acc = 0
    funcH.removeLastLine()
    print('runValidDs(return_feats=', str(return_feats), ' completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S"))
    return acc, predictions, labels_all
예제 #4
0
def runValidDs(model,
               dsLoad_valid_test,
               return_feats=True,
               layerSize=512,
               dataIdentStr=""):
    epoc_acc = []
    features_avgPool = []
    predictions = []
    labels_all = []

    print("running --> runValidDs(", dataIdentStr, "return_feats=",
          str(return_feats), ", layerSize=", str(layerSize), ")",
          datetime.datetime.now().strftime("%H:%M:%S"))
    t = time.time()

    for sample in dsLoad_valid_test:
        images = sample['image'].cuda()
        labels = sample['label'].cuda()
        # ids = sample['id'].cuda()
        outputs = model(images)

        x, indices = outputs.max(1)
        acc = ((indices == labels).cpu().numpy().astype(dtype=np.float))
        epoc_acc += acc.tolist()
        #for x in acc:
        #  epoc_acc.append(x)

        if return_feats:
            feats = extract_features(layerSize=layerSize,
                                     model=model,
                                     feature_layer_string='avgpool',
                                     images=images)
            features_avgPool = features_avgPool + feats
        predictions += indices.tolist()
        labels_all += labels.tolist()

    elapsed = time.time() - t
    acc = (np.sum(np.array(epoc_acc)) /
           len(np.array(epoc_acc))).astype(dtype=np.float)

    funcH.removeLastLine()
    print('runValidDs(return_feats=', str(return_feats), ' completed (',
          funcH.getElapsedTimeFormatted(elapsed), '), ended at ',
          datetime.datetime.now().strftime("%H:%M:%S"))

    return acc, predictions, labels_all, features_avgPool
예제 #5
0
def iterate_1(model, ds_loader, num_ftrs, ep, epochTo, epochStartTime,
              path_dict):
    model.eval()  # Set model to evaluation mode
    acc_tra, pred_tra, labels_tra, _ = runValidDs(model,
                                                  ds_loader["train_te"],
                                                  return_feats=False,
                                                  layerSize=num_ftrs,
                                                  dataIdentStr="train")
    acc_val, pred_val, labels_val, _ = runValidDs(model,
                                                  ds_loader["valid"],
                                                  return_feats=False,
                                                  layerSize=num_ftrs,
                                                  dataIdentStr="validation")
    acc_tes, pred_tes, labels_tes, _ = runValidDs(model,
                                                  ds_loader["test"],
                                                  return_feats=False,
                                                  layerSize=num_ftrs,
                                                  dataIdentStr="test")

    result_row = np.array([ep, acc_tra, acc_val, acc_tes])
    print('ep={:d}/{:d}, acc_tra={:0.5f}, acc_val={:0.2f}, acc_tes={:0.2f}'.
          format(ep, epochTo, acc_tra, acc_val, acc_tes))
    print('Epoch done in (',
          funcH.getElapsedTimeFormatted(time.time() - epochStartTime),
          '), ended at ',
          datetime.datetime.now().strftime("%H:%M:%S"))
    print('*-*-*-*-*-*-*')

    results_dict = {
        "labels_tra": labels_tra,
        "labels_val": labels_val,
        "labels_tes": labels_tes,
        "pred_tra": pred_tra,
        "pred_val": pred_val,
        "pred_tes": pred_tes,
    }
    resultFileNameFull = os.path.join(path_dict["result_fold"],
                                      "ep{:03d}.npy".format(ep))
    np.save(resultFileNameFull, results_dict, allow_pickle=True)

    return result_row
def iterate_1(featTrInit, labelsTrInit, predictionsTr, k, labelSaveFileName, ep, epochTo, trAccInit, epochStartTime,
              clusterModel='KMeans', initialLabelVec=None):
    labelsTrInit = np.asarray(labelsTrInit, dtype=int)
    predictionsTr = np.asarray(predictionsTr, dtype=int) if initialLabelVec is None else np.asarray(initialLabelVec, dtype=int)

    nmi_lab, acc_lab, nmi_lab_nz, acc_lab_nz, \
    nmi_pred, acc_pred, nmi_pred_nz, acc_pred_nz, predClusters = \
                                                        calc_stats_on_iterate(featTrInit, labelsTrInit, predictionsTr,
                                                                              k, clusterModel)

    np.savez(labelSaveFileName, labelsTrInit=labelsTrInit, predClusters=predClusters, acc_lab=acc_lab, acc_lab_nonzero=acc_lab_nz, predictionsTr=predictionsTr)

    resultRow = np.array([ep, trAccInit, nmi_lab, nmi_lab_nz, acc_lab, acc_lab_nz, nmi_pred, nmi_pred_nz, acc_pred, acc_pred_nz])
    print('ep={:d}/{:d}, trAccInit={:0.5f} - '
          'nmi_lab={:0.2f}, nmi_lab_nonzero={:0.2f}, acc_lab={:0.2f}, acc_lab_nonzero={:0.2f}, '
          'nmi_pred={:0.2f}, nmi_pred_nonzero={:0.2f}, acc_pred={:0.2f}, acc_pred_nonzero={:0.2f} '.format(
            ep, epochTo, trAccInit, nmi_lab, nmi_lab_nz, acc_lab, acc_lab_nz, nmi_pred, nmi_pred_nz, acc_pred, acc_pred_nz))
    #elapsed time of epoch
    print('Epoch done in (', funcH.getElapsedTimeFormatted(time.time() - epochStartTime), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S"))
    print('*-*-*-*-*-*-*')

    return predClusters, resultRow
예제 #7
0
    def learn_manifold(self, X, manifold_out_file_name=None):
        self.debug_string_out.clear()
        self.print_and_remember("Learning manifold(" + self.manifold_learner +
                                ")" + str(datetime.now()))
        learn_time = time()

        if manifold_out_file_name is not None and isfile(
                manifold_out_file_name
        ):  # check the learned manifold existance
            manifold_feats = np_load(manifold_out_file_name, allow_pickle=True)
            self.print_and_remember("Manifold loaded(" +
                                    manifold_out_file_name + ")")
        elif self.manifold_learner == 'UMAP':
            manifold_feats = UMAP(random_state=0,
                                  metric=self.dist_metric,
                                  n_components=self.manifold_dimension,
                                  n_neighbors=self.num_of_neighbours,
                                  min_dist=float(
                                      self.min_dist)).fit_transform(X)
        elif self.manifold_learner == 'LLE':
            manifold_feats = LocallyLinearEmbedding(
                n_components=self.manifold_dimension,
                n_neighbors=self.num_of_neighbours).fit_transform(X)
        elif self.manifold_learner == 'tSNE':
            manifold_feats = TSNE(n_components=self.manifold_dimension,
                                  random_state=0,
                                  verbose=0).fit_transform(X)
        elif self.manifold_learner == 'isomap':
            manifold_feats = Isomap(
                n_components=self.manifold_dimension,
                n_neighbors=self.num_of_neighbours).fit_transform(X)
        self.print_and_remember(
            "Time to learn manifold: " +
            str(funcH.getElapsedTimeFormatted(time() - learn_time)))
        if manifold_out_file_name is not None:
            np_save(manifold_out_file_name, manifold_feats, allow_pickle=True)
            self.print_and_remember("Manifold saved(" +
                                    manifold_out_file_name + ")")
        return manifold_feats, self.debug_string_out
def runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=512):
    cnt = 0
    idSorted = []
    epoc_acc = []

    features_avgPool = []
    labels_avgPool = []
    predictions_avgPool = []

    print("running --> runValidDs(return_feats=", str(return_feats), ", layerSize=", str(layerSize), ")", datetime.datetime.now().strftime("%H:%M:%S"))
    t = time.time()

    for sample in dsLoad_train_featExtract:
        images = sample['image'].cuda()
        labels = sample['label'].cuda()
        ids = sample['id'].cuda()
        outputs = model(images)

        _, indices = outputs.max(1)
        acc = ((indices == labels).cpu().numpy().astype(dtype=np.float))
        for x in acc:
            epoc_acc.append(x)

        cnt += 1
        if return_feats:
            feats = extract_features(layerSize=layerSize, model=model, feature_layer_string='avgpool', images=images)
            features_avgPool = features_avgPool + feats
            labels_avgPool = labels_avgPool + labels.tolist()
            predictions_avgPool = predictions_avgPool + indices.tolist()
        idSorted = idSorted + ids.tolist()

    elapsed = time.time() - t
    val_acc_epoch = (np.sum(np.array(epoc_acc)) / len(np.array(epoc_acc))).astype(dtype=np.float)

    funcH.removeLastLine()
    print('runValidDs(return_feats=', str(return_feats), ' completed (', funcH.getElapsedTimeFormatted(elapsed), '), ended at ', datetime.datetime.now().strftime("%H:%M:%S"))

    return val_acc_epoch, idSorted, features_avgPool, labels_avgPool, predictions_avgPool
예제 #9
0
    def fit(self,
            X,
            post_analyze_distribution=False,
            verbose=1,
            random_state=0):
        df = pd_df(np_array(X))

        curTol = 0.0001 if self.cluster_model == 'KMeans' else 0.01
        max_iter = 300 if self.cluster_model == 'KMeans' else 200

        numOf_1_sample_bins = 1
        unique_clust_cnt = 1
        expCnt = 0
        while (unique_clust_cnt == 1 or
               numOf_1_sample_bins - expCnt > 0) and expCnt < self.max_try_cnt:
            t = time()
            if expCnt > 0:
                if numOf_1_sample_bins > 0:
                    print("running ", self.cluster_model, " for the ",
                          str(expCnt), " time due to numOf_1_sample_bins(",
                          str(numOf_1_sample_bins), ")")
                if unique_clust_cnt == 1:
                    print("running ", self.cluster_model, " for the ",
                          str(expCnt), " time due to unique_clust_cnt==1")
            if verbose > 0:
                print('Clustering the featVec(', X.shape, ') with n_clusters(',
                      str(self.n_clusters),
                      ') and model = ', self.cluster_model, ", curTol(",
                      str(curTol), "), max_iter(", str(max_iter), "), at ",
                      datetime.now().strftime("%H:%M:%S"))
            self.kluster_centers = None
            self.predictedKlusters = None

            if self.cluster_model == 'KMeans':
                # default vals for kmeans --> max_iter=300, 1e-4
                self.trained_model = KMeans(init='k-means++',
                                            n_clusters=self.n_clusters,
                                            n_init=20,
                                            tol=curTol,
                                            max_iter=max_iter,
                                            random_state=random_state).fit(df)
                self.predictedKlusters = self.trained_model.labels_.astype(
                    float)
                self.kluster_centers = self.trained_model.cluster_centers_.astype(
                    float)
            elif self.cluster_model == 'GMM_full':
                # default vals for gmm --> max_iter=100, 1e-3
                self.trained_model = GaussianMixture(
                    n_components=self.n_clusters,
                    covariance_type='full',
                    tol=curTol,
                    random_state=random_state,
                    max_iter=max_iter,
                    reg_covar=1e-4).fit(df)
                _, log_resp = self.trained_model._e_step(X)
                self.predictedKlusters = log_resp.argmax(axis=1)
            elif self.cluster_model == 'GMM_diag':
                self.trained_model = GaussianMixture(
                    n_components=self.n_clusters,
                    covariance_type='diag',
                    tol=curTol,
                    random_state=random_state,
                    max_iter=max_iter,
                    reg_covar=1e-4).fit(df)
                _, log_resp = self.trained_model._e_step(X)
                self.predictedKlusters = log_resp.argmax(axis=1)
            elif self.cluster_model == 'Spectral':
                sc = SpectralClustering(n_clusters=self.n_clusters,
                                        affinity=self.spectral_affinity,
                                        random_state=random_state)
                self.trained_model = sc.fit(X)
                self.predictedKlusters = self.trained_model.labels_

            self.kluster_centroids = get_cluster_centroids(
                X,
                self.predictedKlusters,
                kluster_centers=self.kluster_centers,
                verbose=0)

            if post_analyze_distribution:
                numOf_1_sample_bins, histSortedInv = analyzeClusterDistribution(
                    self.predictedKlusters, self.n_clusters, verbose=verbose)
                unique_clust_cnt = len(np_unique(self.predictedKlusters))
                curTol = curTol * 10
                max_iter = max_iter + 50
                expCnt = expCnt + 1
            else:
                expCnt = self.max_try_cnt

            elapsed = time() - t
            if verbose > 0:
                print('Clustering done in (', getElapsedTimeFormatted(elapsed),
                      '), ended at ',
                      datetime.now().strftime("%H:%M:%S"))
        removeLastLine()
        if verbose > 0:
            print('Clustering completed with (',
                  np_unique(self.predictedKlusters).shape,
                  ') clusters,  expCnt(', str(expCnt), ')')
        # elif 'OPTICS' in clusterModel:
        #     N = featVec.shape[0]
        #     min_cluster_size = int(np.ceil(N / (n_clusters * 4)))
        #     pars = clusterModel.split('_')  # 'OPTICS_hamming_dbscan', 'OPTICS_russellrao_xi'
        #     #  metricsAvail = np.sort(['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
        #     #                'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener',
        #     #                'sokalsneath', 'sqeuclidean', 'yule',
        #     #                'cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'])
        #     #  cluster_methods_avail = ['xi', 'dbscan']
        #     clust = ClusterOPT(min_samples=50, xi=.05, min_cluster_size=min_cluster_size, metric=pars[1], cluster_method=pars[2])
        #     clust.fit(featVec)
        #     predictedKlusters = cluster_optics_dbscan(reachability=clust.reachability_,
        #                                                core_distances=clust.core_distances_,
        #                                                ordering=clust.ordering_, eps=0.5)
        #     n1 = np.unique(predictedKlusters)
        #     print(clusterModel, ' found ', str(n1), ' uniq clusters')
        #     predictedKlusters = predictedKlusters + 1

        return self