Ejemplo n.º 1
0
def main():
    # Training settings

    batch_size = 10
    num_epochs = 1

    learning_rate = 0.1
    log_interval = 2
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    transform = ToTensor()
    Circles = [Circle(n, r, transform=ToTensor()) for n, r in zip((2, 3, 5), (1, 2, 3))]
    # ds = [Circle(1000, 2, transform=transform), Circle(1200, 6, transform=transform)]
    k = len(Circles)
    train_dataset = ConcatDataset(Circles)
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    # ds = [Circle(1000, 2, transform=transform), Circle(1200, 6, transform=transform)]
    # train_dataset = ConcatDataset(ds)
    # test_dataset = ConcatDataset([Circle(32, 3, transform=transform), Circle(1, 0, transform=transform)])

    # train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    # test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
    model = Net()
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
    for epoch in range(num_epochs):
        # (model, device, train_loader, optimizer, epoch, num_clusters)
        train(model, device, train_loader, optimizer, epoch, k, log_interval)
        # test(model, test_loader)
        scheduler.step()
    eval(model, device, train_loader)
    N = eval(model, device, train_loader)
    L = laplacian(N)
    # print(L.shape)

    eigval, eigvec = torch.symeig(L, eigenvectors=True)
    EigMat = eigvec[:, 1:3]

    # if torch.cuda.is_available():
    #     device = torch.device('cuda:0')
    # else:
    #     device = torch.device('cpu')

    cluster_ids_EigMat_new, cluster_centers = kmeans(EigMat, num_clusters=3, distance='euclidean', device=device)
    # plot
    plt.figure(figsize=(4, 3), dpi=160)
    plt.scatter(EigMat[:, 0], EigMat[:, 1], c=cluster_ids_EigMat_new, cmap='cool')
    #     plt.scatter(y[:, 0], y[:, 1], c=cluster_ids_y, cmap='cool', marker='X')
    plt.scatter(
        cluster_centers[:, 0], cluster_centers[:, 1],
        c='white',
        alpha=0.6,
        edgecolors='black',
        linewidths=2
    )
    plt.axis([-1, 1, -1, 1])
    plt.tight_layout()
    plt.show()
Ejemplo n.º 2
0
    def forward(self, qk):
        qk = qk.detach().to(torch.float16)
        batch_size, seq_len, dim, device = *qk.shape, qk.device # [6,12,384,64]

        W_R = torch.empty(batch_size, dim, dim, device=device)
        nn.init.orthogonal_(W_R)
        
        W_R = W_R.to(torch.float16)
        R = torch.matmul(qk, W_R).reshape(-1, dim)
        K = int(seq_len ** 0.5)
        
        import pdb
        pdb.set_trace()
        cluster_idx, centroid = kmeans(X=R, num_clusters=K, distance='cosine', device=device)
        
        cluster_idx = cluster_idx.reshape(batch_size, seq_len).unsqueeze(1).expand(-1, self.n_heads, -1)
        result = torch.zeros(batch_size, self.n_heads, seq_len, seq_len, device=device)
        # import pdb 
        # pdb.set_trace()
        r1 = result.to(torch.long) + cluster_idx.unsqueeze(-1).to(device)   # [0, 0, 0, 0, ...]
        r2 = result.to(torch.long) + cluster_idx.unsqueeze(-2).to(device)  # [0, 1, 2, 3, ...]

        result = (r1 == r2).to(torch.float32)
        result = 10000. * result - 10000. 
        
        return result.detach()
Ejemplo n.º 3
0
 def attn_mask(self, x):
     attention_map_stretched = x.view(x.shape[2] * x.shape[3], 1)
     var = torch.randn(attention_map_stretched.shape,
                       device='cuda') * 0.0001
     attention_mask, _ = kmeans(attention_map_stretched + var,
                                num_clusters=2,
                                device=torch.device('cuda:0'))
     attention_mask = attention_mask.view(x.shape).type(
         torch.cuda.FloatTensor)
     return attention_mask
Ejemplo n.º 4
0
def update_kmeans(class_data, batch_size, sample_size, pretrained):
    class_features = get_features(class_data, batch_size, sample_size,
                                  pretrained)
    _, center_features = kmeans(X=class_features,
                                num_clusters=sample_size,
                                distance='euclidean',
                                device=torch.device('cuda:' +
                                                    str(args.gpu_id)))
    center_features = center_features.cuda()
    dist = dist_matrix(center_features, class_features)
    inds = torch.argmin(dist, dim=1)
    return inds
Ejemplo n.º 5
0
def coexist_multiclass(to_class):
    path = '/home/ubuntu/data/Workspace/Soobin/wide_total/'
    path = glob.glob(path + '*')
    tot_cnt = len(path)
    print(tot_cnt)
    length = int(tot_cnt / 50)
    for attempt in range(24, 50):
        csv = []
        if attempt == 49:
            st_idx = length * attempt
            end_idx = tot_cnt
        else:
            st_idx = length * attempt
            end_idx = length * (attempt + 1)
        print(st_idx, "  ", end_idx)
        transformation = np.zeros((end_idx - st_idx, 20000 * 3),
                                  dtype='float32')

        for i, idx in enumerate(list(range(st_idx, end_idx))):
            im = cv2.imread(path[idx])
            im = np.array(cv2.resize(im, dsize=(200, 100)), dtype='float32')
            # h, edges = np.histogramdd(im.reshape(-1,3),8,normed=True,range=[(0,255),(0,255),(0,255)])
            transform = np.fft.fft2(im)
            transformation[i] = np.abs(transform.flatten())
        res = torch.from_numpy(transformation)
        print("start clustering")
        labels, _ = kmeans(X=res,
                           num_clusters=to_class,
                           distance='euclidean',
                           device=torch.device('cuda:1'))
        cluster_map = pandas.DataFrame()
        cluster_map['cluster'] = labels

        for i, idx in enumerate(list(range(st_idx, end_idx))):
            im_path = path[idx]
            im = cv2.imread(im_path)
            name = path[idx].split('/')[7]
            cluster = cluster_map['cluster'][i]
            savepath = '/home/ubuntu/data/Workspace/Soobin/attempt' + str(
                attempt) + '/img'
            savepath = savepath + str(cluster) + '/'
            os.makedirs(savepath, exist_ok=True)
            cv2.imwrite(savepath + name, im)
            # item = []
            # item.append(path)
            # item.append(name)
            # item.append(str(cluster))
            # csv.append(item)
        # csv_file = pandas.DataFrame(csv, columns=['file_path', 'file_name','cluster'])
        # csv_file.to_csv('/home/ubuntu/data/Workspace/Soobin/attempt%d/'%(attempt)+'cluster.csv', index=False, encoding='cp949')
    return
Ejemplo n.º 6
0
def main():
    batch_train = 60000
    batch_test = 1000
    attempt_kmeans = 3  # number of k-means attempt
    centers = range(2, 31, 2)  # number of centroids to be tested
    mnist_root = '../../../data'

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    # dataset
    train_dataset = Reduced_MNIST(root=mnist_root, train=True)
    test_dataset = Reduced_MNIST(root=mnist_root, train=False)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_train,
                              shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_test, shuffle=True)

    data_train, index_train = next(iter(train_loader))
    # plt.imshow((data_train * 0.3081 + 0.1307).view(data_train.__len__(), 28, 28).detach().numpy()[0], cmap='gray')
    print(index_train)
    data_test, index_test = next(iter(test_loader))
    # plt.imshow((data_test * 0.3081 + 0.1307).view(data_test.__len__(), 28, 28).detach().numpy()[0], cmap='gray')
    print(index_test)
    sse_all = []
    sse_temp = np.zeros(attempt_kmeans)

    # execute k-means clustering
    with torch.no_grad():
        data = data_train  # reverse normalization to get original MNIST
        for i_centers in centers:
            for i_loop in range(attempt_kmeans):
                cluster_index, cluster_centers = kmeans(X=data,
                                                        num_clusters=i_centers,
                                                        distance='euclidean',
                                                        device=device)
                sse = eval_kmeans(data, cluster_index, cluster_centers, device)
                print(
                    str(cluster_centers.shape[0]) + " centers, sse:" +
                    "{:.4f}".format(sse))
                sse_temp[i_loop] = sse
            sse_all.append(sse_temp.mean())
            sse_temp = np.zeros(attempt_kmeans)
    print(str([round(num, 3) for num in sse_all]).replace(".", ","))
Ejemplo n.º 7
0
def update_kmeans(class_data, batch_size, sample_size, pretrained):
    class_features = get_features(class_data, batch_size, sample_size,
                                  pretrained)
    _, center_features = kmeans(X=class_features,
                                num_clusters=sample_size,
                                distance='euclidean',
                                device=torch.device('cuda:3'))
    center_features = center_features.cuda()
    '''
    print('device of class features: %d'%class_features.get_device())
    print('device of center features: %d'%center_features.get_device())
    print(center_features.shape)
    '''
    dist = dist_matrix(center_features, class_features)
    inds = torch.argmin(dist, dim=1)
    return inds
Ejemplo n.º 8
0
def get_clusters_from_latent(args, g_ema, device, mean_latent, t_dict_list,
                             yaml_config, layer_channel_dims, latent, noise):
    print("get clusters")
    with torch.no_grad():
        g_ema.eval()
        slice_latent = latent[0, :]
        slce_latent = slice_latent.unsqueeze(0)
        print(slice_latent.size())
        sample, activation_maps = g_ema([slce_latent],
                                        input_is_latent=True,
                                        noise=noises,
                                        transform_dict_list=t_dict_list,
                                        return_activation_maps=True)
        print(len(activation_maps))
        feature_cluster_dict = {}
        for index, activations in enumerate(activation_maps):
            true_index = index + 1
            classifier = FeatureClassifier(true_index)
            classifier_str = args.classifier_ckpts + "/" + str(
                true_index) + "/classifier" + str(true_index) + "_final.pt"
            classifier_state_dict = torch.load(classifier_str)
            classifier.load_state_dict(classifier_state_dict)
            classifier.to(device)
            layer_activation_maps = activation_maps[index]
            a_map_array = list(torch.split(layer_activation_maps, 1, 1))
            dict_list = []
            latent_list = []
            for i, map in enumerate(a_map_array):
                map = map.to(device)
                feat_vec, class_prob = classifier(map)
                activation_dict = {"class_index": i, "feat_vec": feat_vec}
                # dict_list.append(activation_dict)
                latent_list.append(feat_vec)
            cluster_ids_x, cluster_centers = kmeans(
                X=torch.stack(latent_list),
                num_clusters=cluster_layer_dict[true_index],
                distance='euclidean',
                device=torch.device('cuda'))
            for i, id in enumerate(cluster_ids_x):
                cluster_dict = {
                    "feature_index": int(i),
                    "cluster_index": int(id)
                }
                dict_list.append(cluster_dict)
            feature_cluster_dict[true_index] = dict_list
        with open(r'cluster_dict.yaml', 'w') as file:
            documents = yaml.dump(feature_cluster_dict, file)
Ejemplo n.º 9
0
    def get_cluster_idx(self, i_node, clustering_ratio):

        weights = i_node['layer'].weight.clone()

        if clustering_ratio <= 0: return []
        n = len(weights)

        out_channels = weights.size()[0]

        weights = weights.view(-1, out_channels)

        sim_dict = {}

        i_u, i_s, i_vh = torch.svd(weights)

        n_to_cluster = n - int(clustering_ratio * n)
        # print(i_u.size())
        # print(i_s.size())
        # print(i_vh.size())
        i_sv = torch.matmul(i_vh, torch.diag(i_s))

        # kmeans
        cluster_ids_x, cluster_centers = kmeans(X=i_sv,
                                                num_clusters=n_to_cluster,
                                                distance='cosine')

        group_to_mindist = 1e6 * np.ones(n_to_cluster)
        group_to_id = np.ones(n_to_cluster)
        for idx in range(len(cluster_ids_x)):
            i_cluster = cluster_ids_x[idx]

            i_center = cluster_centers[i_cluster, :]
            i_val = i_sv[idx, :]
            dist = torch.dist(i_val, i_center)

            if group_to_mindist[i_cluster] > dist:
                group_to_mindist[i_cluster] = dist
                group_to_id[i_cluster] = idx

        indices = {x for x in range(n)} - set(group_to_id)

        return list(indices)
Ejemplo n.º 10
0
def tot_dat():
    for j in range(0, 20):
        topcon = create_tempfeatures(j)
        print("start clustering")
        labels, _ = kmeans(X=topcon,
                           num_clusters=5,
                           distance='euclidean',
                           device=torch.device('cuda:0'))
        cluster_map = pandas.DataFrame()
        cluster_map['cluster'] = labels
        for i in range(0, len(cluster_map['cluster'])):
            topcon_imwrite(cluster_map['cluster'][i], j, i)
        csv_file = pandas.DataFrame(
            csv, columns=['file_path', 'file_name', 'cluster'])
        csv = []
        csv_file.to_csv('./topcon_candidates_5class_higher/attempt%d/' % (j) +
                        'cluster.csv',
                        index=False,
                        encoding='cp949')
        print("one attempt done!")
Ejemplo n.º 11
0
    def get_cluster_exemplars(self, features, num_clusters):
        logging.info("total number of features: "+str(len(features)))
        indices = torch.randperm(len(features))[:3000]
        subsample = features[indices]
        cluster_ids_x, cluster_centers = kmeans(
        X=subsample, num_clusters=num_clusters, distance='euclidean', device=self._device()
        )

        original_idx_map = {}
        ret_features = []
        for cluster_number, centroid in enumerate(cluster_centers):
            
            cluster_features, cluster_to_original_idxs = self.get_all_points_in_cluster(subsample, cluster_ids_x, cluster_number)
            selected_feature, selected_feature_idx = self.get_point_nearest_centroid(centroid, cluster_features, cluster_to_original_idxs)
            ret_features.append(selected_feature)
            
            # maps back to idx in entire dataset of features 
            original_idx_map[str(cluster_number)] = selected_feature_idx
        
        return torch.stack(ret_features), original_idx_map
Ejemplo n.º 12
0
    def select_round_workers_actvSAMP(self, workers, poisoned_workers,clients, kwargs):
        clients_distribution = []
        for client_idx in range(len(clients)):
            _client_distribution = clients[client_idx].get_client_distribution()
            clients_distribution.append(_client_distribution)
        data_size, dims, num_clusters = len(clients), len(clients_distribution[0]), kwargs["NUM_WORKERS_PER_ROUND"]
        clients_distribution = torch.from_numpy(np.array(clients_distribution))
        cluster_ids_x, cluster_centers = kmeans(
        X = clients_distribution, num_clusters = num_clusters, distance = 'euclidean')

        clusters = []
        for cluster_name in range(num_clusters):
            _cluster = []
            _cluster.extend([i for i in range(len(cluster_ids_x)) if cluster_ids_x[i] == cluster_name])
            clusters.append(_cluster)

        choosed_workers = []
        for cluster in clusters:
            choosed_workers.append((random.sample(cluster,1))[0])
        return choosed_workers
Ejemplo n.º 13
0
def kmeans_content(text_list, tokenizer, model, num_clusters=20):
    presentence_embedding, text_list, labels = get_embedding(
        text_list, [], tokenizer, model)
    presentence_embedding = torch.tensor(presentence_embedding,
                                         dtype=torch.long)

    cluster_ids_x, cluster_centers = kmeans(X=presentence_embedding,
                                            num_clusters=num_clusters,
                                            distance='euclidean',
                                            device=torch.device('cpu'),
                                            tol=1e-8)
    klist = bulid_pre_dict(text_list, cluster_ids_x.tolist())
    # klist={}

    # for i,c in enumerate (cluster_ids_x.tolist()):
    #     # print(i,c,text_list)
    #     if klist.get(c):
    #         klist[c].append(text_list[i])
    #     else:
    #         klist[c]=[text_list[i]]
    # # pprint.pprint(klist)
    return klist
Ejemplo n.º 14
0
def update_robust_kmeans(class_data, batch_size, sample_size, pretrained):
    # get the distance to the feature mean and filter out the outliers
    class_features = get_features(class_data, batch_size, sample_size,
                                  pretrained)
    average_feature = torch.mean(class_features, dim=0, keepdim=True)
    dist = dist_matrix(class_features, average_feature).squeeze()
    sorted_inds = torch.argsort(dist, descending=False)

    class_size = class_data.size(0)
    candidate_size = int(class_size * 9 / 10)
    candidate_inds = sorted_inds[:candidate_size]
    candidate_features = class_features[candidate_inds]

    _, center_features = kmeans(X=candidate_features,
                                num_clusters=sample_size,
                                distance='euclidean',
                                device=torch.device('cuda:' +
                                                    str(args.gpu_id)))
    center_features = center_features.cuda()
    dist = dist_matrix(center_features, candidate_features)
    inds = torch.argmin(dist, dim=1)
    final_inds = candidate_inds[inds]

    return final_inds
Ejemplo n.º 15
0
        print('Selected random dbids')

        c.execute(temp_table)
        c.executemany(temp_insert, dbid_tups)
        c.execute(select_stmt)

        def batch_enumerate(bs):
            batch = c.fetchmany(bs)
            while (len(batch)):
                yield batch
                batch = c.fetchmany(bs)

        batches = batch_enumerate(bsize)

        for i, batch in enumerate(batches):
            sub_lst = list(map(deserialize, batch))
            sub_dbids, sub_tensors = zip(*sub_lst)
            dbid_lst.extend(sub_dbids)
            tensor_lst.extend(sub_tensors)

            print('Batch {} completed. Processed {}. {}'.format(
                i + 1, len(dbid_lst), datetime.now()))

    x = torch.stack(tensor_lst)
    cluster_ids, cluster_centers = kmeans(X=x,
                                          num_clusters=100,
                                          distance='cosine',
                                          device=torch.device('cpu'))

    torch.save(cluster_centers, 'centroids.tensor')
Ejemplo n.º 16
0
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        output_0 = self.relu(x)
        #pdb.set_trace()
      
        #crow_pool = gem(output_0)
        #pdb.set_trace()
        
        x = self.maxpool(output_0)
        #x = norm(x, dim=1)
        #version -1: CBAM
        mask = torch.ones(x.size()[0], 64, 48)
        for i in range(x.size()[0]):
            input_mask = x[i].reshape(64, -1 ).permute(1,0).reshape(-1,64)
            cluster_ids_x, cluster_centers = kmeans(X=input_mask, num_clusters=2, distance='euclidean', tqdm_flag=False, device=torch.device('cuda:0'))

            idx = cluster_ids_x.reshape(64, 48)
            #pdb.set_trace()
            mask[i] =  1 - (idx ^ (idx[32,24].unsqueeze(0).unsqueeze(1)))
            
        mask = mask.unsqueeze(1).float().cuda()
        
        
        
        #version 2  gem+SAM 
        
        
        
        
        #version 3 CROW+SAM
       
        # version0: constrain on feature map
        '''
        att = self.compress(output_0)
        att = self.cov(att)
        #pdb.set_trace()
        att = self.relu(att)
        #mask = self.sig(att)
        mask = torch.where(att>self.selective_0, torch.FloatTensor([1.0]).cuda(),torch.FloatTensor([0.0]).cuda())
        '''
        #x  = self.cbam_0(x)
        output_1 = self.layer1(x)
        
       
        # version1: generate the mask 
        '''
        y_cov_1 = 0 
        for i in [0,60,3,12,19,22,15,29,45,35,50,51,54,55,58,62]:
            y_cov_1 = y_cov_1 + output_0[:,i,:,:]
        y_cov_2 = torch.sum(output_0, dim =1) - y_cov_1    
        y_cov = (y_cov_1/16).unsqueeze(1) #* self.selective_0    
       
        y_cov = torch.tanh(y_cov)
   
        mask = y_cov.view(y_cov.size()[0], 1, 128,96)
        '''
        
        # version2: NLB generate the mask
        
        '''
        y_cov_1 = [] 
        for i in [3,12,29,35,54,58,62]:
            y_cov_1.append((output_0[:,i,:,:]))
               
        
        #pdb.set_trace()
        y_cov_1 = torch.stack(y_cov_1, dim=1)
        y_cov_1 = y_cov_1 * y_cov_1
        y_cov_1 = torch.sum(y_cov_1, dim=1)
        mask  = y_cov_1/torch.sum(y_cov_1.view(y_cov_1.size()[0], -1), dim=1).unsqueeze(1).unsqueeze(2)
        mask = mask.view(mask.size()[0], 1, 128,96)
        '''
        '''
        y_cov_1 = self.cov(y_cov_1)  
        y_cov_1 = torch.sum(y_cov_1, dim =1)        
        y_cov = (y_cov_1/11).unsqueeze(1) #* self.selective_0           
        y_cov = torch.tanh(y_cov)   
        '''
        '''
        x_compress = self.compress(y_cov_1)
        x_out = self.cov(x_compress)
        mask = torch.tanh(x_out) 
        
        mask = mask.view(mask.size()[0], 1, 128,96)
        '''

        
        #x_cov = F.softmax(x_cov.view(x_cov.size()[0], 1, -1), dim=-1)
        output_1 = output_1*F.interpolate(mask, size=[64, 48], mode="bilinear") +  output_1

        output_2 = self.layer2(output_1)
        #output_2  = self.cbam_1( output_2)
        output_2 = output_2*F.interpolate(mask, size=[32, 24], mode="bilinear") + output_2
        
        
        output_3 = self.layer3(output_2)
        #output_3  = self.cbam_2(output_3)
        output_3 = output_3*F.interpolate(mask, size=[16, 12], mode="bilinear") + output_3

        output_4 = self.layer4(output_3)
        #output_4  = self.cbam_3(output_4)
        output_4 = output_4*F.interpolate(mask, size=[8, 6], mode="bilinear") + output_4
        
         
              
        '''
        att = self.down_0(output_1)+self.in_cha_0(output_2)
        att = self.down_1(att)+self.in_cha_1(output_3)
        att = self.down_2(att)+self.in_cha_2(output_4)         
        spa_mask = spatial_optimize(att).cuda()
        
        x = output_4*spa_mask
        '''
        

        
        
        return output_4, output_4, mask#x_cov.view(x_cov.size()[0], 1, 128,96), output_4
Ejemplo n.º 17
0
def get_clusters_from_generated_greedy(args, g_ema, device, mean_latent,
                                       t_dict_list, yaml_config,
                                       layer_channel_dims):
    print("get clusters")
    with torch.no_grad():
        g_ema.eval()

        latent_ll = []
        feature_ll = []
        feature_cluster_sum_dict = {}
        feature_cluster_dict = {}

        for i in tqdm(range(args.n_layers)):
            true_index = i + 1
            latent_list = []
            feature_list = []
            latent_ll.append(latent_list)
            feature_ll.append(feature_list)
            feature_cluster_sum_dict[true_index] = {}
            for j in tqdm(range(layer_channel_dims[true_index])):
                feature_cluster_sum_dict[true_index][j] = []

        for i in tqdm(range(args.num_samples)):
            print("processing sample: " + str(i))
            sample_z = torch.randn(1, args.latent, device=device)
            sample, activation_maps = g_ema([sample_z],
                                            truncation=args.truncation,
                                            truncation_latent=mean_latent,
                                            transform_dict_list=t_dict_list,
                                            return_activation_maps=True)
            for index, activations in enumerate(activation_maps):
                true_index = index + 1
                classifier = FeatureClassifier(true_index)
                classifier_str = args.classifier_ckpts + "/" + str(
                    true_index) + "/classifier" + str(true_index) + "_final.pt"
                classifier_state_dict = torch.load(classifier_str)
                classifier.load_state_dict(classifier_state_dict)
                classifier.to(device)
                layer_activation_maps = activation_maps[index]
                a_map_array = list(torch.split(layer_activation_maps, 1, 1))
                for j, map in enumerate(a_map_array):
                    map = map.to(device)
                    feat_vec, class_prob = classifier(map)
                    latent_ll[index].append(feat_vec)
                    feature_ll[index].append(j)

        for i in tqdm(range(args.n_layers)):
            true_index = i + 1
            print("generating clusters for layer: " + str(i))
            cluster_ids_x, cluster_centers = kmeans(
                X=torch.stack(latent_ll[i]),
                num_clusters=cluster_layer_dict[true_index],
                distance='euclidean',
                device=torch.device('cuda'))
            for j, id in enumerate(cluster_ids_x):
                feature_cluster_sum_dict[true_index][feature_ll[i][j]].append(
                    id)

            dict_list = []
            for j in tqdm(range(layer_channel_dims[true_index])):
                cluster_id = max(feature_cluster_sum_dict[true_index][j])
                cluster_dict = {
                    "feature_index": int(j),
                    "cluster_index": int(cluster_id)
                }
                dict_list.append(cluster_dict)
            feature_cluster_dict[true_index] = dict_list

    with open(r'cluster_dict.yaml', 'w') as file:
        documents = yaml.dump(feature_cluster_dict, file)
Ejemplo n.º 18
0
    x = torch.Tensor([
        data['latitude'].values,
        data['longitude'].values,
        data['month'].values,
        data['day'].values,
        data['n_killed'].values,
        data['n_injured'].values,
        data['n_guns_involved'].values,
    ]).transpose(0, 1)
    return x


train_data = data.iloc[:int(data.shape[0] * 0.9), :]
test_data = data.iloc[int(data.shape[0] * 0.9):, :]

train_x = getDataPair(train_data)
test_x = getDataPair(test_data)

cluster_ids_x, cluster_centers = kmeans(X=train_x,
                                        num_clusters=NUM_CLUSTERS,
                                        distance='euclidean')

df = pd.DataFrame(cluster_centers.data.tolist(),
                  columns=[
                      'lat', 'lng', 'month', 'day', 'n_killed', 'n_injured',
                      'n_guns_involved'
                  ])
fpath = os.path.join(os.path.join(BASE_DIR, "media"),
                     f"cluster-{NUM_CLUSTERS}.csv")
df.to_csv(fpath, index=False)
Ejemplo n.º 19
0
def kmeans():
    num_clusters = 64
    return kmeans(X=imageTensor,
                  num_clusters=num_clusters,
                  distance='euclidean')
Ejemplo n.º 20
0
# cluster_ids_x, cluster_centers = kmeans(
#     X=batch_1, num_clusters=num_clusters, distance='euclidean', device=device
# )
num_clusters = 16
cluster_centers = []
for batch_id in range(num_batch):
    print(batch_id, num_batch)
    sift_descriptors = descriptors[batch_id*des_bs : (batch_id+1)*des_bs]
    sift_descriptors = np.array(list(itertools.chain.from_iterable(sift_descriptors)))
#         kmeans = KMeans(n_clusters=16, mode='euclidean', verbose=1)
    # kmeans_clusters = KMeans(n_clusters=k).fit(sift_descriptors)
    sift_descriptors = torch.FloatTensor(sift_descriptors).cuda()
#     kmeans.fit(sift_descriptors, centroids = kmeans.centroids)
    cluster_ids_x, cluster_centers = kmeans(
    X=sift_descriptors, num_clusters=num_clusters,
    cluster_centers = cluster_centers,
    distance='euclidean', device=device
)


#     prtv_score = torch.load('prtv_score.pt')


for test_id in range(1000): 
    print(test_id)
    index_ids = idx_list[test_id]
    test_img_id = test_list[test_id][0]
    test_img_name = test_img_id+'.jpg'
    test_img_path = os.path.join(test_root, test_img_name)
    test_img = cv2.imread(test_img_path)
    test_img = cv2.resize(test_img, (224,224))
Ejemplo n.º 21
0
import torch
import numpy as np
from kmeans_pytorch import kmeans
from Sphere_Data import Sphere, ToTensor
import matplotlib.pyplot as plt
# data
# train_dataset = Sphere([100,150,200],[1, 2, 3], transform=ToTensor())

data_size, dims, num_clusters = 1000, 2, 3

x = np.random.randn(data_size, dims) / 6

x = torch.from_numpy(x)
# KMEANS = kmeans(x, 3)

# kmeans
cluster_ids_x, cluster_centers = kmeans(X=x,
                                        num_clusters=num_clusters,
                                        distance='euclidean')

fig, ax = plt.subplots(figsize=(9, 7))
ax.set_title('Encoded Data', fontsize=18, fontweight='demi')
ax.scatter(x[:, 0], x[:, 1], c=cluster_ids_x, s=None, cmap=None)
plt.show()
Ejemplo n.º 22
0
    presentence_embedding = torch.from_numpy(presentence_embedding)   #为numpy类型
    # print( "presentence_embedding",presentence_embedding.size())
    return presentence_embedding


# 训练

tt=tkitText.Text()

text_list=tt.sentence_segmentation_v1(text)
presentence_embedding=get_embedding(text_list,tokenizer,model)
num_clusters=10
# print('x',x )
# # # kmeans
cluster_ids_x, cluster_centers = kmeans(
    X=presentence_embedding, num_clusters=num_clusters, distance='euclidean', device=torch.device('cpu'),tol=1e-8
)
print('cluster_ids_x',cluster_ids_x)
print("cluster_centers",cluster_centers)


 

output_dir='./'
# torch.save(cluster_centers, os.path.join(output_dir, 'Kmeanpytroch_model.bin'))
 

cluster_centers=torch.load(os.path.join(output_dir, 'Kmeanpytroch_model.bin'))


Ejemplo n.º 23
0
def kmeans(data: torch.Tensor, nr_clusters: int, nr_iterations: int = 20, distance: str = 'euclidean', device=None, verbose=False):
    if device is None:
        device = data.device

    from kmeans_pytorch import kmeans
    return kmeans(X=data, num_clusters=nr_clusters, distance=distance, device=device, tqdm_flag=verbose, iter_limit=nr_iterations)
Ejemplo n.º 24
0
def k_means(imagesTensor):
    num_clusters = 64
    imagesTensor = pixelsForm(imagesTensor)
    return kmeans(X=imagesTensor,
                  num_clusters=num_clusters,
                  distance='euclidean')
def qbc(n_model, n_train, batch_size, idx_ratio, dataset):
    # parameters
    n_model = n_model
    n_train = n_train
    batch_size = batch_size
    idx_ratio = idx_ratio
    n_cluster = 20
    dataset = dataset.lower()  # 'reduced_f_mnist', 'reduced_mnist','unreduced_f_mnist','unreduced_mnist',
    text = (('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', batch_size: ' + str(batch_size))
            + (', idx_ratio: ' + str(idx_ratio)) + (', n_cluster: ' + str(n_cluster)) + (', dataset: ' + dataset))
    print(text)

    # paths
    model_path = os.path.join(dr(dr(abspath(__file__))), 'results', dataset)
    csv_path = os.path.join(model_path, 'xgb_qbc.csv')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    # load dataset
    if dataset == 'reduced_f_mnist':
        data_train, target_train = datasets_preset.provide_reduced_f_mnist(train=True)
        data_test, target_test = datasets_preset.provide_reduced_f_mnist(train=False)
    elif dataset == 'reduced_mnist':
        data_train, target_train = datasets_preset.provide_reduced_mnist(train=True)
        data_test, target_test = datasets_preset.provide_reduced_mnist(train=False)
    elif dataset == 'unreduced_f_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_f_mnist(train=True)
        data_test, target_test = datasets_preset.provide_unreduced_f_mnist(train=False)
    elif dataset == 'unreduced_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_mnist(train=True)
        data_test, target_test = datasets_preset.provide_unreduced_mnist(train=False)

    # execute kmeans-clustering for entire training dataset
    cluster_index, cluster_centers = kmeans(X=torch.from_numpy(data_train),
                                            num_clusters=n_cluster, distance='cosine', device=device)
    # show clustering result, document data per cluster
    n_data_cr = np.zeros(n_cluster, dtype=int)
    idx_data_cr = []
    for i_cluster in range(n_cluster):
        n_data_cr[i_cluster] = np.sum(cluster_index.numpy() == i_cluster)
        idx_data_cr.append(np.argwhere(cluster_index == i_cluster).numpy())
        print("Cluster " + str(i_cluster) + ": " + str(n_data_cr[i_cluster])
              + " data, or " + "{:.4f}".format(n_data_cr[i_cluster] / cluster_index.__len__() * 100) + "%")
    print("Cluster data size variance: " + "{:.4f}".format(n_data_cr.var() ** 0.5) + ", (smaller is better)")

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(batch_size, n_train + 1, batch_size)]
    dir_name = 'run_'
    dir_number = 1
    while os.path.exists(os.path.join(model_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(model_path, (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()

    # create models and index library
    models = []
    tree_method = "auto"  # "gpu_hist" if cuda_flag else "auto"
    print('Tree creation method: ' + tree_method)
    idx_library = [np.array([]).astype(int) for x in range(n_model)]
    for i_model in range(n_model):
        xgbc = XGBClassifier(max_depth=8, objective='objective=multi:softmax', n_estimators=1, n_jobs=32,
                             reg_lambda=1, gamma=2, learning_rate=1, num_classes=10, tree_method=tree_method)
        models.append(xgbc)
    print(str(n_model) + " xgboost models created")

    # training and test process, 1st batch
    output_list_test = np.zeros((n_model, data_test.__len__())).astype(int) # n_models x n_data x n_classes
    for i_model in range(n_model):
        random_index = np.array(random.sample(range(data_train.__len__()), k=batch_size))
        idx_library[i_model] = np.append(idx_library[i_model], random_index)
        models[i_model].fit(data_train[random_index], target_train[random_index])
        output_list_test[i_model, :] = models[i_model].predict(data_test)

    # Document first batch
    acc_models = qbc_preset.each_model_acc(output_list_test, target_test)
    acc_committee = qbc_preset.committee_vote(output_list_test, target_test)  # committee vote
    train_text[0] = train_text[0] + ' '.join([";" + "{:.4f}".format(elem) for elem in acc_models])
    train_text[0] = train_text[0] + '; ' + "{:.3f}".format(acc_committee * 100) + '%'  # committee vote
    print("First batch added!")
    print("Batch " + str(0) + ": average acc of models is " + "{:.3f}".format(acc_models.mean() * 100) + "%")
    print("Batch " + str(0) + ": acc of committee is " + "{:.3f}".format(acc_committee * 100) + "%")
    print("Library sizes, after first batch:" + str([np.unique(idx_library[i_model]).shape for x in range(n_model)]))
    pickle.dump(models, open(os.path.join(run_path, ('models_batch_' + "{0:0=3d}".format(0) + '.pkl')), 'wb'))
    pickle.dump(idx_library, open(os.path.join(run_path, ('indices_batch_' + "{0:0=3d}".format(0) + '.pkl')), 'wb'))

    # training process, n-th batch
    for i_batch in range(1, train_text.__len__()):
        print("Starting Batch " + str(i_batch))
        output_list_train = np.zeros((n_model, data_train.__len__())).astype(int)

        # calculate entropy & acc of current data
        for i_model in range(n_model):
            output_list_train[i_model, :] = models[i_model].predict(data_train)
        acc_models = qbc_preset.each_model_acc(output_list_train, target_train)
        acc_target = qbc_preset.each_target_acc(output_list_train, target_train)
        entropy = qbc_preset.vote_entropy_xgb(output_list_train, target_train)
        # qbc_preset.get_entropy_acc(entropy, output_list_train, target_train)
        # show entropy, show committee acc, 3 highest guess, entropy value, show 8 of it?
        # qbc_preset.show_entropy_result(acc_models, entropy, output_list, data_train, target_train)
        # qbc_preset.plot_ugly(output_list_train, data_train, target_train)
        print("Library sizes:" + str([np.unique(idx_library[i_model]).shape for x in range(n_model)])) 
        index_1 = np.random.choice(range(n_model))
        index_2 = np.random.choice(np.setdiff1d(range(0, n_model), index_1))
        print("Overlap size:" + str(np.intersect1d(idx_library[index_1], idx_library[index_2]).__len__()) +
              ", overlap ideal: " + str(int((idx_library[index_2].__len__() - batch_size)
                                            * (idx_ratio[0] + idx_ratio[1]))) +
              ", library size: " + str(idx_library[index_2].__len__()) + ", dataset: " + dataset
              + ", idx_ratio: " + str(idx_ratio))

        # train and test for each model and each batch
        for i_model in range(n_model):
            # indexes
            idx_library[i_model] = \
                qbc_preset.get_next_indices(idx_library[i_model], entropy, idx_data_cr, batch_size,
                                            idx_ratio, data_train.__len__())
            # train model
            models[i_model].fit(data_train[idx_library[i_model]], target_train[idx_library[i_model]])
            # test model
            output_list_test[i_model, :] = models[i_model].predict(data_test)
            print('Model ' + str(i_model))

        # check committee vote
        acc_models = qbc_preset.each_model_acc(output_list_test, target_test)
        acc_committee = qbc_preset.committee_vote(output_list_test, target_test)  # committee vote method
        print("Batch " + str(i_batch) + ": average acc of models is " + "{:.3f}".format(acc_models.mean() * 100) + "%")
        print("Batch " + str(i_batch) + ": acc of committee is " + "{:.3f}".format(acc_committee * 100) + "%")

        # Document training progress
        train_text[i_batch] = train_text[i_batch] + ' '.join([";" + "{:.4f}".format(elem) for elem in acc_models])
        train_text[i_batch] = train_text[i_batch] + '; ' + "{:.3f}".format(
            acc_committee * 100) + '%'  # committee vote method
        # save models and indices
        pickle.dump(models, open(os.path.join(run_path, ('models_batch_' + "{0:0=3d}".format(i_batch) + '.pkl')), 'wb'))
        pickle.dump(idx_library,
                    open(os.path.join(run_path, ('indices_batch_' + "{0:0=3d}".format(i_batch) + '.pkl')), 'wb'))

    # write text to csv
    title = ["New Vote, Results for n_model = " + str(n_model) + ", idx_ratio:  " + str(idx_ratio)
             + ", n_cluster: " + str(n_cluster) + ", with highest entropy, avg and var documented"]
    with open(csv_path, mode='a+') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)
    # loop through train_text
    for i_text in range(0, train_text.__len__()):
        text = train_text[i_text].split(";")
        mean = statistics.mean([float(i) for i in text[1:-2]])
        var = statistics.variance([float(i) for i in text[1:-2]]) ** 0.5
        text.append("{:.3f}".format(mean * 100) + "%")
        text.append("{:.3f}".format(var * 100) + "%")
        with open(csv_path, mode='a+') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)
Ejemplo n.º 26
0
def project(
        G,
        target_image: torch.
    Tensor,  # [C,H,W] and dynamic range [0,255], W & H must match G output resolution
        target_text,
        *,
        num_steps=300,
        w_avg_samples=8192,
        initial_learning_rate=0.02,
        initial_latent=None,
        initial_noise_factor=0.01,
        lr_rampdown_length=0.10,
        lr_rampup_length=0.5,
        noise_ramp_length=0.75,
        latent_range=2.0,
        max_noise=0.5,
        min_threshold=0.6,
        use_vgg=True,
        use_clip=True,
        use_pixel=True,
        use_penalty=True,
        use_center=True,
        regularize_noise_weight=1e5,
        kmeans=True,
        kmeans_clusters=64,
        verbose=False,
        device: torch.device):
    if target_image is not None:
        assert target_image.shape == (G.img_channels, G.img_resolution,
                                      G.img_resolution)
    else:
        use_vgg = False
        use_pixel = False

    # reduce errors unless using clip
    if use_clip:
        import clip

    def logprint(*args):
        if verbose:
            print(*args)

    G = copy.deepcopy(G).eval().requires_grad_(False).to(
        device)  # type: ignore

    # Compute w stats.
    logprint(
        f'Computing W midpoint and stddev using {w_avg_samples} samples...')
    z_samples = np.random.RandomState(123).randn(w_avg_samples, G.z_dim)
    labels = None
    if (G.mapping.c_dim):
        labels = torch.from_numpy(0.5 * np.random.RandomState(123).randn(
            w_avg_samples, G.mapping.c_dim)).to(device)
    w_samples = G.mapping(torch.from_numpy(z_samples).to(device),
                          labels)  # [N, L, C]
    w_samples = w_samples.cpu().numpy().astype(np.float32)  # [N, L, C]
    w_samples_1d = w_samples[:, :1, :].astype(np.float32)

    w_avg = np.mean(w_samples, axis=0, keepdims=True)  # [1, L, C]
    w_std = (np.sum((w_samples - w_avg)**2) / w_avg_samples)**0.5

    kmeans_latents = None
    if initial_latent is not None:
        w_avg = initial_latent
    else:
        if kmeans and use_clip and target_text is not None:
            from kmeans_pytorch import kmeans
            # data
            data_size, dims, num_clusters = w_avg_samples, G.z_dim, kmeans_clusters
            x = w_samples_1d
            x = torch.from_numpy(x)

            # kmeans
            logprint(
                f'Performing kmeans clustering using {w_avg_samples} latents into {kmeans_clusters} clusters...'
            )
            cluster_ids_x, cluster_centers = kmeans(X=x,
                                                    num_clusters=num_clusters,
                                                    distance='euclidean',
                                                    device=device)
            #logprint(f'\nGenerating images from kmeans latents...')
            kmeans_latents = torch.tensor(cluster_centers,
                                          dtype=torch.float32,
                                          device=device,
                                          requires_grad=True)

    # Setup noise inputs.
    noise_bufs = {
        name: buf
        for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name
    }

    # Load VGG16 feature detector.
    if use_vgg:
        url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
        with dnnlib.util.open_url(url) as f:
            vgg16 = torch.jit.load(f).eval().to(device)

    # Load CLIP
    if use_clip:
        model, transform = clip.load("ViT-B/32", device=device)

    # Features for target image.
    if target_image is not None:
        target_images = target_image.unsqueeze(0).to(device).to(torch.float32)
        small_target = F.interpolate(target_images, size=(64, 64), mode='area')
        if use_center:
            center_target = F.interpolate(target_images,
                                          size=(448, 448),
                                          mode='area')[:, :, 112:336, 112:336]
        target_images = F.interpolate(target_images,
                                      size=(256, 256),
                                      mode='area')
        target_images = target_images[:, :, 16:240,
                                      16:240]  # 256 -> 224, center crop

    if use_vgg:
        vgg_target_features = vgg16(target_images,
                                    resize_images=False,
                                    return_lpips=True)
        if use_center:
            vgg_target_center = vgg16(center_target,
                                      resize_images=False,
                                      return_lpips=True)

    if use_clip:
        if target_image is not None:
            with torch.no_grad():
                clip_target_features = model.encode_image(
                    ((target_images / 255.0) - image_mean[None, :, None, None])
                    / image_std[None, :, None, None]).float()
                if use_center:
                    clip_target_center = model.encode_image(
                        ((center_target / 255.0) -
                         image_mean[None, :, None, None]) /
                        image_std[None, :, None, None]).float()

    if kmeans_latents is not None and use_clip and target_text is not None:
        scores, kmeans_images = score_images(G,
                                             model,
                                             target_text,
                                             kmeans_latents.repeat(
                                                 [1, G.mapping.num_ws, 1]),
                                             device=device)
        ind = np.argpartition(scores, 4)[:4]
        w_avg = torch.median(kmeans_latents[ind], dim=0,
                             keepdim=True)[0].repeat([1, G.mapping.num_ws, 1])

    w_opt = torch.tensor(w_avg,
                         dtype=torch.float32,
                         device=device,
                         requires_grad=True)  # pylint: disable=not-callable
    w_avg_tensor = w_opt.clone()
    w_out = torch.zeros([num_steps] + list(w_opt.shape[1:]),
                        dtype=torch.float32,
                        device=device)
    optimizer = torch.optim.AdamW([w_opt] + list(noise_bufs.values()),
                                  betas=(0.9, 0.999),
                                  lr=initial_learning_rate)

    # Init noise.
    for buf in noise_bufs.values():
        buf[:] = torch.randn_like(buf)
        buf.requires_grad = True

    for step in range(num_steps):
        # Learning rate schedule.
        t = step / num_steps
        w_noise_scale = max_noise * w_std * initial_noise_factor * max(
            0.0, 1.0 - t / noise_ramp_length)**2
        lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length)
        lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
        lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length)
        lr = initial_learning_rate * lr_ramp
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        # Synth images from opt_w.
        w_noise = torch.randn_like(w_opt) * w_noise_scale
        ws = w_opt + w_noise
        synth_images = G.synthesis(torch.clamp(ws, -latent_range,
                                               latent_range),
                                   noise_mode='const')

        # Downsample image to 256x256 if it's larger than that. CLIP was built for 224x224 images.
        synth_images = (torch.clamp(synth_images, -1, 1) + 1) * (255 / 2)
        small_synth = F.interpolate(synth_images, size=(64, 64), mode='area')
        if use_center:
            center_synth = F.interpolate(synth_images,
                                         size=(448, 448),
                                         mode='area')[:, :, 112:336, 112:336]
        synth_images = F.interpolate(synth_images,
                                     size=(256, 256),
                                     mode='area')

        # Features for synth images.
        synth_images = synth_images[:, :, 16:240,
                                    16:240]  # 256 -> 224, center crop

        dist = 0

        if use_vgg:
            vgg_synth_features = vgg16(synth_images,
                                       resize_images=False,
                                       return_lpips=True)
            vgg_dist = (vgg_target_features -
                        vgg_synth_features).square().sum()
            if use_center:
                vgg_synth_center = vgg16(center_synth,
                                         resize_images=False,
                                         return_lpips=True)
                vgg_dist += (vgg_target_center -
                             vgg_synth_center).square().sum()
            vgg_dist *= 6
            dist += F.relu(vgg_dist * vgg_dist - min_threshold)

        if use_clip:
            clip_synth_image = (
                (synth_images / 255.0) - image_mean[None, :, None, None]
            ) / image_std[None, :, None, None]
            clip_synth_features = model.encode_image(clip_synth_image).float()
            adj_center = 2.0

            if use_center:
                clip_cynth_center_image = (
                    (center_synth / 255.0) - image_mean[None, :, None, None]
                ) / image_std[None, :, None, None]
                adj_center = 1.0
                clip_synth_center = model.encode_image(
                    clip_cynth_center_image).float()

            if target_image is not None:
                clip_dist = (clip_target_features -
                             clip_synth_features).square().sum()
                if use_center:
                    clip_dist += (clip_target_center -
                                  clip_synth_center).square().sum()
                dist += F.relu(0.5 + adj_center * clip_dist - min_threshold)

            if target_text is not None:
                clip_text = 1 - model(clip_synth_image,
                                      target_text)[0].sum() / 100
                if use_center:
                    clip_text += 1 - model(clip_cynth_center_image,
                                           target_text)[0].sum() / 100
                dist += 2 * F.relu(adj_center * clip_text * clip_text -
                                   min_threshold / adj_center)

        if use_pixel:
            pixel_dist = (target_images - synth_images).abs().sum() / 2000000.0
            if use_center:
                pixel_dist += (center_target -
                               center_synth).abs().sum() / 2000000.0
            pixel_dist += (small_target -
                           small_synth).square().sum() / 800000.0
            pixel_dist /= 4
            dist += F.relu(lr_ramp * pixel_dist - min_threshold)

        if use_penalty:
            l1_penalty = (w_opt - w_avg_tensor).abs().sum() / 5000.0
            dist += F.relu(lr_ramp * l1_penalty - min_threshold)

        # Noise regularization.
        reg_loss = 0.0
        for v in noise_bufs.values():
            noise = v[None, None, :, :]  # must be [1,1,H,W] for F.avg_pool2d()
            while True:
                reg_loss += (noise *
                             torch.roll(noise, shifts=1, dims=3)).mean()**2
                reg_loss += (noise *
                             torch.roll(noise, shifts=1, dims=2)).mean()**2
                if noise.shape[2] <= 8:
                    break
                noise = F.avg_pool2d(noise, kernel_size=2)
        #print(vgg_dist, clip_dist, pixel_dist, l1_penalty, reg_loss * regularize_noise_weight)
        loss = dist + reg_loss * regularize_noise_weight

        # Step
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()
        logprint(
            f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}'
        )
        with torch.no_grad():
            torch.clamp(w_opt, -latent_range, latent_range, out=w_opt)
        # Save projected W for each optimization step.
        w_out[step] = w_opt.detach()[0]
        # Normalize noise.
        with torch.no_grad():
            for buf in noise_bufs.values():
                buf -= buf.mean()
                buf *= buf.square().mean().rsqrt()

    return w_out
Ejemplo n.º 27
0
import torch
from kmeans_pytorch import kmeans
import open3d as o3d
import numpy as np

# data
data_size, dims, num_clusters = 1000, 2, 10

cloud = o3d.io.read_point_cloud("/home/llg/dataset_paper/camp001_l3.ply")
cloud_xyz = np.array(cloud.points)
x = torch.from_numpy(cloud_xyz)

# kmeans
cluster_ids_x, cluster_centers = kmeans(X=x,
                                        num_clusters=num_clusters,
                                        distance='euclidean',
                                        device=torch.device('cuda:0'))
Ejemplo n.º 28
0
def get_anchor(all_data, anchor_num, device):
    cluster_ids_x, cluster_centers = kmeans(
        X=all_data, num_clusters=anchor_num, distance='euclidean', device=device
    )
    return cluster_centers
Ejemplo n.º 29
0
# Load img
img_path = config.trainDataPath
fileList = utilits.getAllName(img_path)
# It is a little experiment, hence I only use one image.
file = fileList[0]
img = plt.imread(file)
imgCV = cv2.imread(file)

imgWriteable = np.array(img)

imgWriteable = imgWriteable.reshape(-1, 3)
imgTensor = torch.from_numpy(imgWriteable)

labels, clusterCenters = kmeans(X=imgTensor,
                                num_clusters=config.K,
                                distance='euclidean',
                                device=torch.device('cuda:0'))
imgTensor = imgTensor.view((config.imgSize[0], config.imgSize[1], 3))

colorFeatureList = imgprocess.regionColorFeatures(imgTensor, labels)
textureFeatureList = imgprocess.regionTextureFeatures(imgCV, labels)
edgeFeatureList = imgprocess.regionEdgeFeatures(imgCV, labels)
spatialFeatureList = imgprocess.regionSpatialFeatures(labels)
featureList = torch.cat((colorFeatureList, textureFeatureList, edgeFeatureList,
                         spatialFeatureList),
                        dim=1)

num_sample = len(featureList)

X = featureList.cuda() if config.use_cuda else featureList
Ejemplo n.º 30
0
        # if os.path.exists(os.path.join(output_path, sample_name)) == False:
        #     os.mkdir(os.path.join(output_path, sample_name))
        if os.path.exists(os.path.join(output_path,
                                       sample_name + ".pickle")) != False:
            print("{} is processed before.".format(sample_name))
            continue

        print("Processing: {}".format(sample_name))

        #Calculate clusters
        # features_AB = torch.randn(10000, 2, dtype=torch.float32) / 6 + .5
        features_AB = features_AB.squeeze(0).transpose(0, 1).reshape(
            2, -1).transpose(0, 1).contiguous().type(torch.float32)
        cluster_ids_x, cluster_centers = kmeans(X=features_AB,
                                                num_clusters=num_clusters,
                                                distance='euclidean',
                                                device=torch.device('cuda'),
                                                tol=0.0000005)
        cluster_ids_x = cluster_ids_x.reshape(-1, 1, 32, 32)

        now = datetime.now()
        data_obj = {
            # 'features_L': features_L.squeeze(0),
            'clusters': cluster_ids_x,
            'centers': cluster_centers,
            'number_of_objects': S,
            'class_name': sample_name,
            'timeofday': now.strftime("%d/%m/%y %H:%M")
        }

        # #View the labels