def t_sen_new(feature_name, layer, isNorm):
    all = get_data_new(feature_name, layer)
    print("cal....")
    fea = TSNE(n_components=2).fit_transform(all)
    if isNorm:
        fea = (fea - fea.min(0)) / (fea.max(0) - fea.min(0))
    print("printing....")
    plt.scatter(fea[:1680, 0],
                fea[:1680, 1],
                label="legitimate",
                c="seagreen",
                s=19.5)
    #plt.scatter(fea[:7201, 0], fea[:7201, 1], label="legitimate", c="seagreen", s=19.5)
    #7201
    plt.scatter(fea[1680:, 0],
                fea[1680:, 1],
                label="malicious",
                c="orangered",
                s=19.5)
    #plt.scatter(fea[7201:, 0], fea[7201:, 1], label="malicious", c="orangered", s=19.5)

    # plt.scatter(fea[1000:, 0], fea[1000:, 1], label="bad")
    plt.legend(loc='upper right', fontsize='large')
    plt.tight_layout()
    #plt.savefig(layer + "_1000.bmp",format='bmp', dpi=1000)
    plt.savefig(layer + "1600_ALL", format='jpeg', dpi=600)
    plt.show()
예제 #2
0
    def plot_quantized_embeddings(self):
        output_image_path = os.path.join(self.args.output_image_path,
                                         self.args.val_set,
                                         self.args.load_iteration)
        os.makedirs(output_image_path, exist_ok=True)
        embs = self.model.get_vqvae_embeddings()

        _embedding = nn.Embedding(64, 128)
        _embedding.weight.data.uniform_(-1 / 64, 1 / 64)
        embs_init = _embedding.weight.data.cpu()
        # embs = torch.cat((embs,embs_init),dim=0)

        # print(embs[:,:5])
        '''
        print('UMAP...shape_{}'.format(embs.shape))
        proj = umap.UMAP(n_neighbors=5, min_dist=0.2, metric='cosine').fit_transform(embs)
        x_min, x_max = proj.min(0), proj.max(0)
        proj = (proj - x_min) / (x_max - x_min)
        plt.subplot(1,2,1)
        plt.scatter(proj[:,0], proj[:,1], alpha=0.3)
        plt.title('UMAP-{}x{}'.format(embs.shape[0],embs.shape[1]))
        '''
        print('t-SNE...final {} | init {}'.format(embs.shape, embs_init.shape))
        proj = TSNE(n_components=2,
                    init='pca',
                    perplexity=50,
                    random_state=self.args.seed).fit_transform(embs)
        x_min, x_max = proj.min(0), proj.max(0)
        proj = (proj - x_min) / (x_max - x_min)
        #plt.subplot(1,2,2)
        plt.scatter(proj[:, 0], proj[:, 1], alpha=0.3)
        plt.title('tSNE-{}x{}'.format(embs.shape[0], embs.shape[1]))
        plt.savefig(os.path.join(output_image_path, 'vqvae.png'))
예제 #3
0
    def run(self):
        self.log("Starting ReduceEmbeddingDimensionality")
        vectorizer = get_vectorizer(self._vectorizer_name)
        paper_matrix = vectorizer.paper_matrix
        X = 0.5 * paper_matrix['abstract'] + 0.5 * paper_matrix['title']
        self.log(X.shape)
        points = TSNE(n_components=3, verbose=True).fit_transform(X)
        points = scale(points)
        dois = paper_matrix['index_arr']
        id_map = paper_matrix['id_map']
        result = dict()
        category_memberships = CategoryMembership.objects.filter(
            paper__in=dois)
        for membership in self.progress(category_memberships):
            doi = membership.paper.pk
            matrix_index = id_map[doi]

            category_pk = membership.category.pk
            category_score = membership.score

            if doi not in result:
                result[doi] = {
                    'doi':
                    doi,
                    'title':
                    membership.paper.title,
                    'point':
                    points[matrix_index].tolist(),
                    'top_category':
                    category_pk,
                    'published_at':
                    json.dumps(membership.paper.published_at,
                               cls=DjangoJSONEncoder),
                    'top_category_score':
                    category_score
                }
            elif result[doi]['top_category_score'] <= category_score:
                result[doi]['top_category'] = category_pk
                result[doi]['top_category_score'] = category_score
        output = {
            'papers': list(result.values()),
            'means': points.mean(axis=0).tolist(),
            'max': points.max(axis=0).tolist(),
            'min': points.min(axis=0).tolist()
        }
        if settings.DEBUG:
            with open('../web/assets/embeddings_3d.json', 'w+') as f:
                json.dump(output, f)
        else:
            s3_bucket_client = S3BucketClient(
                aws_access_key=settings.AWS_ACCESS_KEY_ID,
                aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
                endpoint_url=settings.AWS_S3_ENDPOINT_URL,
                bucket=settings.AWS_STORAGE_BUCKET_NAME)
            s3_bucket_client.upload_as_json(settings.AWS_EMBEDDINGS_FILE_PATH,
                                            output)

        Paper.objects.all().update(visualized=True)

        self.log("ReduceEmbeddingDimensionality finished")
예제 #4
0
def t_sne(X, y):
    X = np.asarray(X)
    y = np.asarray(y)
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt

    import time
    start = time.time()
    X_tsne = TSNE(n_components=2).fit_transform(X)
    end = time.time()
    print(end - start)

    x_min, x_max = X_tsne.min(0), X_tsne.max(0)
    X_norm = (X_tsne - x_min) / (x_max - x_min)  # 归一化
    plt.figure(figsize=(8, 8))
    for i in range(X_norm.shape[0]):
        plt.text(X_norm[i, 0],
                 X_norm[i, 1],
                 str(y[i]),
                 color=plt.cm.Set1(y[i]),
                 fontdict={
                     'weight': 'bold',
                     'size': 9
                 })
    plt.xticks([])
    plt.yticks([])
    plt.show()
예제 #5
0
 def plot_static_embeddings(self, output_path):
     # filter the samples by speakers sampled
     # hack code
     small_indexes = [
         index for index in self.indexes
         if index[0][:len('p000')] in self.sampled_speakers
     ]
     random.shuffle(small_indexes)
     small_indexes = small_indexes[:self.args.max_samples]
     # generate the tensor and dataloader for evaluation
     tensor = [
         self.pkl_data[key][t:t + self.config.segment_size]
         for key, t, _, _, _ in small_indexes
     ]
     speakers = [key[:len('p000')] for key, _, _, _, _ in small_indexes]
     # add the dimension for channel
     tensor = torch.from_numpy(np.array(tensor)).unsqueeze(dim=1)
     dataset = TensorDataset(tensor)
     dataloader = DataLoader(dataset,
                             batch_size=20,
                             shuffle=False,
                             num_workers=0)
     all_embs = []
     # run the model
     for data in dataloader:
         data = cc(data[0])
         embs = self.model.get_static_embeddings(data)
         all_embs = all_embs + embs.detach().cpu().numpy().tolist()
     all_embs = np.array(all_embs)
     print(all_embs.shape)
     # TSNE
     embs_2d = TSNE(n_components=2, init='pca',
                    perplexity=50).fit_transform(all_embs)
     x_min, x_max = embs_2d.min(0), embs_2d.max(0)
     embs_norm = (embs_2d - x_min) / (x_max - x_min)
     # plot to figure
     female_cluster = [
         i for i, speaker in enumerate(speakers)
         if self.speaker2gender[speaker] == 'F'
     ]
     male_cluster = [
         i for i, speaker in enumerate(speakers)
         if self.speaker2gender[speaker] == 'M'
     ]
     colors = np.array(
         [self.speaker_index[speaker] for speaker in speakers])
     plt.scatter(embs_norm[female_cluster, 0],
                 embs_norm[female_cluster, 1],
                 c=colors[female_cluster],
                 marker='x')
     plt.scatter(embs_norm[male_cluster, 0],
                 embs_norm[male_cluster, 1],
                 c=colors[male_cluster],
                 marker='o')
     plt.savefig(output_path)
     return
예제 #6
0
        def coords_func(lib, opts, args):
            if opts.type:
                seq_features_file = config["blackbird"]["seq_features"].get("unicode")
                seq_features = cPickle.load(open(seq_features_file, "rb"))

                keys = seq_features.keys()

                if opts.type == "mean":
                    features = np.empty((len(seq_features), 20))

                    for idx, key in enumerate(seq_features):
                        length = seq_features[key].shape[1]
                        features[idx, :] = seq_features[key][:, int(0.1 * length):int(0.9 * length)].mean(axis=1)
                elif opts.type == "lstm":
                    print("Loading network...")
                    model = LSTMSeq2Seq(config["blackbird"]["lstm"]["arch"].get("unicode"),
                                        config["blackbird"]["lstm"]["weights"].get("unicode"),
                                        config["blackbird"]["lstm"]["output"].get())
                    # Pad sequences
                    maxlen = 150
                    padded_seq_features = np.empty((len(seq_features), maxlen, 20))
                    for idx, key in enumerate(seq_features):
                        padded_seq_features[idx, :, :] = sequence.pad_sequences(seq_features[key], maxlen=maxlen, dtype="float32").T

                    print("Getting vectors...")
                    features = model.predict(padded_seq_features)
                else:
                    print("Provide a valid --type [mean, lstm]")
                    sys.exit(1)

                print("Reducing dimensions...")
                features_2d = TSNE(n_components=2).fit_transform(features)

                print("Writing to db...")
                conn = sqlite3.connect(config["blackbird"]["db"].get("unicode"))
                cur = conn.cursor()
                cur.execute("DELETE FROM coords")

                to_insert = []
                for idx in xrange(features_2d.shape[0]):
                    to_insert.append((keys[idx],
                                      features_2d[idx, 0],
                                      features_2d[idx, 1]))
                cur.executemany("INSERT INTO coords VALUES (?, ?, ?)", to_insert)
                conn.commit()
                conn.close()

                # Fill leftovers
                ids_to_fill = []
                for item in lib.items():
                    if item.id not in keys:
                        ids_to_fill.append(item.id)

                self.fill(ids_to_fill, features_2d.min(axis=0), features_2d.max(axis=0))
            else:
                print("Provide a valid --type [mean, lstm]")
예제 #7
0
 def calculate_projection(self):
     self._perform_svd()
     if self.method == SKLEARN:
         projection_vectors = SKLEARN_TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(self.data_vectors)
     elif self.method == MAATEN:
         projection_vectors = MATTENS_TSNE(self.data_vectors, no_dims=2, initial_dims=self.data_vectors.shape[1],
                                     perplexity=40.0)
     else:
         projection_vectors = UMAP_PROJECTION(n_neighbors=5, min_dist=0.3).fit_transform(self.data_vectors)
     projection_vectors -= projection_vectors.min(axis=0)
     projection_vectors /= projection_vectors.max(axis=0)
     self.projection_vectors = projection_vectors
예제 #8
0
    def plot_speaker_embeddings(self):

        output_image_path = os.path.join(self.args.output_image_path,self.args.val_set,self.args.load_iteration)
        os.makedirs(output_image_path,exist_ok=True)

        speakers = []
        utts = []
        # in_test
        # self.samples = ['252', '240', '237', '341', '274', '236', '272', '329', '271', '301']
        # out_test
        # self.samples = ['232', '305', '227', '238', '263', '339', '376', '318', '286', '312']
        for speaker in self.samples:
            speakers += [speaker] * len(self.indexes[speaker])
            utts += self.indexes[speaker]

        use_spec = 'dmel' if self.args.model_type == 'AdaVAEd' else 'mel'
        dataset = EvaluateDateset(os.path.join(self.args.data_dir, self.args.dataset), 
                                  speakers, utts, segment_size=None, 
                                  load_spectrogram=use_spec)

        dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)
        embs = []

        for data in dataloader:
            spec = cc(data['spectrogram'])
            emb = self.model.get_speaker_embeddings(spec)
            embs += emb.detach().cpu().numpy().tolist() 
            print('Evaluate: {}/{}'.format(len(embs),len(dataloader)),end='\r') 

        embs = np.array(embs)
        norms = np.sqrt(np.sum(embs ** 2, axis=1, keepdims=True))
        embs = embs / norms 

        # t-SNE
        print('\nt-SNE...')
        embs_2d = TSNE(n_components=2, init='pca', perplexity=50).fit_transform(embs)
        x_min, x_max = embs_2d.min(0), embs_2d.max(0)
        embs_2d = (embs_2d - x_min) / (x_max - x_min)

        # plot to figure
        female_cluster = [i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'F']
        male_cluster = [i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'M']
        colors = np.array([self.samples_index[speaker] for speaker in speakers])
        plt.scatter(embs_2d[female_cluster, 0], embs_2d[female_cluster, 1],  c=colors[female_cluster], marker='x') 
        plt.scatter(embs_2d[male_cluster, 0], embs_2d[male_cluster, 1], c=colors[male_cluster], marker='o') 
        plt.savefig(os.path.join(output_image_path,'speaker.png'))
        plt.clf()
        plt.cla()
        plt.close()
        return
예제 #9
0
    def show_result(self, labels):
        print("Visualizing clustering result...")
        x_tsne = TSNE().fit_transform(self.train_set)
        x_min, x_max = x_tsne.min(0), x_tsne.max(0)
        # normalize
        x_norm = (x_tsne - x_min) / (x_max - x_min)

        plt.figure(figsize=(8, 8))
        for i in range(x_norm.shape[0]):
            plt.text(x_norm[i, 0],
                     x_norm[i, 1],
                     str(labels[i]),
                     color=plt.cm.Set1(labels[i]))
        plt.xticks([])
        plt.yticks([])
        plt.show()
예제 #10
0
def tsne_show(data, labels, title=''):
    print("start tsne analysis...")
    sorted_ftrs = []
    uniq_labels = np.array(sorted(list(set(labels))))
    all_cls_ft_num = np.zeros(len(uniq_labels))
    # color_list = list(colors._colors_full_map.values())
    colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
    by_hsv = sorted(
        (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
        for name, color in colors.items())
    color_list = [name for hsv, name in by_hsv]

    for i, lb in enumerate(uniq_labels):
        corr_ftrs = data[labels == lb]
        all_cls_ft_num[i] = len(corr_ftrs)
        sorted_ftrs.extend(corr_ftrs)
    sorted_ftrs = np.array(sorted_ftrs)

    ft_tsne = TSNE(init='pca').fit_transform(sorted_ftrs[:3000])
    ft_min, ft_max = ft_tsne.min(0), ft_tsne.max(0)
    ft_norm = (ft_tsne - ft_min) / (ft_max - ft_min)
    j = 0
    gd_cls = 0
    lst_gd_cls = 0
    cls_ft_num = all_cls_ft_num[j]
    plt.figure(figsize=(10, 10))

    for i in range(ft_norm.shape[0]):
        if i >= cls_ft_num:
            j += 1
            cls_ft_num += all_cls_ft_num[j]
        if all_cls_ft_num[j] >= 20:
            if j > lst_gd_cls:
                gd_cls += 1
            plt.scatter(ft_norm[i, 0], ft_norm[i, 1],
                        color=color_list[3 * gd_cls])
            lst_gd_cls = gd_cls
        if gd_cls >= 50:
            break
        # plt.scatter(ft_norm[i, 0], ft_norm[i, 1], color=color_list[j])
        # if j >= 20:
        #     break
    print(j)
    plt.title(title + 'resnet >=20 * 50')
    plt.show()
예제 #11
0
def distMat2colors(dm, lab=False):

    # tsne into 3dim
    from sklearn.manifold import TSNE
    dm_3dim = TSNE(n_components=3, metric="precomputed").fit_transform(dm)
    n = dm_3dim.shape[0]
    #print(dm_3dim)

    # embedding is transformed to fit in CIELAB
    from skimage.color import lab2rgb

    #rgb1 = lab2rgb(dm_3dim.reshape(n, 1, 3))
    #print(rgb1.reshape(n, 3))

    #b, t = dm_3dim.min(0), dm_3dim.max(0)
    b, t = dm_3dim.min(), dm_3dim.max()
    #valid_lab = (dm_3dim - b) * np.array([99.99, 254.99, 254.99]) / (t - b) - np.array([0, 127, 127]) # range of value = 100, +-127, +-127
    valid_lab = (dm_3dim - b) * np.array([100.0, 100, 100]) / (
        t - b)  # percentage ??
    #valid_lab = (np.array([1, 2, 2]) * (dm_3dim - b) / (t - b)) - np.array([0, 1, 1]) # range of value = [0,1] for each

    if lab:
        # return CIELAB for each elem
        # return valid_lab
        print("lab[0-1]")
        print(valid_lab)
        print("3dim")
        print(dm_3dim)
        return valid_lab
    else:
        # return RGB
        rgb2 = lab2rgb(valid_lab.reshape(n, 1, 3)).reshape(n, 3)
        print("rgb2")
        print(rgb2)
        print("rgb2[0-255]")
        print(rgb2 * 256)
        return rgb2 * 256
    def _tsne_visualize(self, embeds, labels):
        ax, fig = plt.subplots()

        tsne_vecs = TSNE(n_components=2).fit_transform(embeds)
        tsne_vecs /= tsne_vecs.max()

        axes = plt.gca()
        axes.set_xlim([-1.1, 1.1])
        axes.set_ylim([-1.1, 1.1])

        random.seed(0)
        colors = {
            int(label): "#" +
            ''.join([random.choice('0123456789ABCDEF') for j in range(6)])
            for label in np.unique(labels)
        }

        # add dots in plot
        for (x, y), label in zip(tsne_vecs, labels):
            plt.plot(x,
                     y,
                     color=colors[int(label)],
                     marker='o',
                     linestyle='dashed',
                     linewidth=2,
                     markersize=4)

        # add colorized legend
        handles = [
            mpatches.Patch(color=color, label=f"id_{label}")
            for label, color in colors.items()
        ]
        legend = plt.legend(handles=handles, loc='upper right')
        for legend_text, color in zip(legend.get_texts(), colors.values()):
            plt.setp(legend_text, color=color)

        return fig
예제 #13
0
#%%
dense1_layer_model = Model(inputs=model.input,outputs=model.get_layer('Conv4_3').output) #Conv4_3 Conv1_2

dense1_output = dense1_layer_model.predict(test_data[5:6, :, :, :])
f, ax = plt.subplots(3,4, figsize=(30,10),)
f.subplots_adjust(wspace =0.1, hspace =0.1)
for i in range(11):
    ax[i // 4, i % 4].imshow(dense1_output[0,:,:,i], cmap='gray')
    ax[i // 4, i % 4].axis('off')
ax[2,3].imshow(np.squeeze(test_data[5:6, :, :, :]), cmap='gray')
ax[2,3].axis('off')


#%%
fc_layer_model = Model(inputs=model.input,outputs=model.get_layer('fc3').output) # fc1 fc2
fc_output = fc_layer_model.predict(test_data)
from sklearn.manifold import TSNE
X_tsne = TSNE(n_components=3).fit_transform(fc_output)

#%%
x_min, x_max = X_tsne.min(0), X_tsne.max(0)
X_norm = (X_tsne - x_min) / (x_max - x_min)
plt.figure(figsize=(8, 8))
for i in range(X_norm.shape[0]):
    plt.text(X_norm[i, 0], X_norm[i, 1], str(true_label[i]), color=plt.cm.Set1(true_label[i]),
             fontdict={'weight': 'bold', 'size': 9})
plt.xticks([])
plt.yticks([])
plt.show()
예제 #14
0
image_names = [image_names[i] for i in indx]
data_vectors = data_vectors[indx, :]

## load subset of images, resize
imdata = []
for im in image_names:
    temp = Image.open(im)
    temp.thumbnail([100, 100])
    imdata.append(np.array(temp))
imdata = np.array(imdata)

## run tsne on fasttext pca vectors
embeddings = TSNE(init='pca', verbose=2,
                  random_state=200).fit_transform(data_vectors)
embeddings -= embeddings.min(axis=0)
embeddings /= embeddings.max(axis=0)

## plot scatter t-sne
plt.figure(figsize=(17, 9))
plt.scatter(embeddings[:, 0], embeddings[:, 1], c=indx)
cb = plt.colorbar(fraction=0.05, pad=0.0125)
plt.xticks([])
plt.yticks([])

# plot images as scatter t-sne
plt.figure(figsize=(24, 12))
plt.gca().set_facecolor("black")
for pos, img in zip(embeddings, imdata):
    ab = AnnotationBbox(OffsetImage(img),
                        0.03 + pos * 0.94,
                        xycoords="axes fraction",
예제 #15
0
    def plot_transfer_embeddings(self):

        output_image_path = os.path.join(self.args.output_image_path,
                                         self.args.val_set,
                                         self.args.load_iteration)
        os.makedirs(output_image_path, exist_ok=True)

        speakers = []
        utts = []
        # in_test
        # self.samples = ['252', '240', '237', '341', '274', '236', '272', '329', '271', '301']
        # out_test
        # self.samples = ['232', '305', '227', '238', '263', '339', '376', '318', '286', '312']
        for speaker in self.samples:
            speakers += [speaker] * len(self.indexes[speaker])
            utts += self.indexes[speaker]

        dataset = TransferDateset(os.path.join(self.args.data_dir,
                                               self.args.dataset),
                                  speakers,
                                  utts,
                                  self.indexes,
                                  segment_size=None)
        dataloader = DataLoader(dataset,
                                batch_size=1,
                                shuffle=False,
                                num_workers=4,
                                pin_memory=True)

        embs = []
        embs_tran = []

        for data in dataloader:
            spec_tar = cc(data['tar'])
            spec_tar_d = cc(data['tar_dmel'])
            spec_src = cc(data['src'])

            emb = self.model.get_speaker_embeddings(spec_tar)

            with torch.no_grad():
                mu, emb, spec_tran = self.model.patch(spec_src, spec_tar)

                if self.args.model_type == 'AdaVAEGAN':
                    spec_residual = self.patch_model(mu, emb)
                    spec_tran = spec_tran + spec_residual

                emb_tran = self.model.get_speaker_embeddings(spec_tran)

            embs += emb.detach().cpu().numpy().tolist()
            embs_tran += emb_tran.detach().cpu().numpy().tolist()

            print('Evaluate: {}/{}'.format(len(embs), len(dataloader)),
                  end='\r')

        embs_all = embs + embs_tran

        embs_all = np.array(embs_all)
        norms = np.sqrt(np.sum(embs_all**2, axis=1, keepdims=True))
        embs_all = embs_all / norms

        # t-SNE
        print('\nt-SNE...')
        embs_2d = TSNE(n_components=2, init='pca',
                       perplexity=50).fit_transform(embs_all)
        x_min, x_max = embs_2d.min(0), embs_2d.max(0)
        embs_2d = (embs_2d - x_min) / (x_max - x_min)

        embs_2d_src = embs_2d[:len(embs)]
        embs_2d_tran = embs_2d[len(embs):]
        # plot to figure
        female_cluster = [
            i for i, speaker in enumerate(speakers)
            if self.speaker_infos[speaker][0] == 'F'
        ]
        male_cluster = [
            i for i, speaker in enumerate(speakers)
            if self.speaker_infos[speaker][0] == 'M'
        ]
        colors = np.array(
            [self.samples_index[speaker] for speaker in speakers])
        # plt.scatter(embs_2d_src[female_cluster, 0], embs_2d_src[female_cluster, 1],  c=colors[female_cluster], marker='s')
        # plt.scatter(embs_2d_src[male_cluster, 0], embs_2d_src[male_cluster, 1], c=colors[male_cluster], marker='o')

        plt.scatter(embs_2d_tran[female_cluster, 0],
                    embs_2d_tran[female_cluster, 1],
                    c=colors[female_cluster],
                    marker='x')
        plt.scatter(embs_2d_tran[male_cluster, 0],
                    embs_2d_tran[male_cluster, 1],
                    c=colors[male_cluster],
                    marker='o')

        plt.savefig(os.path.join(output_image_path, 'transfer.png'))
        plt.clf()
        plt.cla()
        plt.close()
        return
예제 #16
0
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
# initialize network
net.init(sess)

# preprare tsne
with open('mukai_dataset.pickle', mode='rb') as f:
    data_set = pickle.load(f)  # 100で割ってあるやつ
X_reduced = TSNE(n_components=2, random_state=0,
                 perplexity=perp).fit_transform(data_set)
X = torch.tensor(data_set, requires_grad=True, dtype=torch.double)
Y = torch.tensor(X_reduced, requires_grad=True, dtype=torch.double)

ini_set = {
    'area_min': X_reduced.min(axis=0),
    'area_max': X_reduced.max(axis=0)
}
with open(mb + '/initial_setting.pickle', mode='wb') as f:
    pickle.dump(ini_set, f)

# server mode
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("127.0.0.1", port))
while True:
    print("waitnig... port:{}".format(port))
    s.listen(1)
    cli, addr = s.accept()
    print('connected from ', str(addr))
    data = np.array([])
    for _ in range(113):
        new = cli.recv(8 * 2048)
예제 #17
0
from numpy import array, dot, diag, nan_to_num
from numpy.random import randn

import sys

features = 'CADD1,CADD2,RecA,EssA,CADD3,CADD4,RecB,EssB,Path'.split(',')

df_data = pd.read_csv("dida_posey_to_predict.csv")
df_data.head()

combination = sys.argv[1]
X = array(df_data[features])
X = X[:, [c == '1' for c in combination]]

X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

if len(X.T) > 2:
    X = TSNE(n_components=2, init="pca").fit_transform(X)
    X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    X = nan_to_num(X)

df_data_vs = df_data.copy(False)
df_data_vs['x'] = X[:, 0]
df_data_vs['y'] = X[:, 1] if len(X.T) > 1 else 0
df_data_vs = df_data_vs.drop('Pair', 1)

with open("exports/p_file_" + combination + ".csv", "w") as out:
    out.write('id,x,y\n')
    for line in array(df_data_vs):
        out.write(','.join(map(str, line[[0, -2, -1]])) + '\n')
        alpha = 0.5
    hit = np.append(train.rowmeta['Tissue'] == tissue,
                    valid.rowmeta['Tissue'] == tissue)
    ax.plot(T[hit, 0],
            T[hit, 1],
            linestyle='None',
            linewidth=0,
            marker='o',
            markerfacecolor=color,
            markeredgecolor=color,
            markersize=2,
            markeredgewidth=0,
            alpha=alpha,
            zorder=zorder,
            label=tissue)
ax.set_xlim(T.min(0)[0], 1.5 * (T.max(0)[0] - T.min(0)[0]) - T.max(0)[0])
#ax.set_ylim(T.min(0)[1], T.max(0)[1]+1*(T.max(0)[1]-T.min(0)[1]))
ax.legend(loc='best',
          ncol=2,
          numpoints=1,
          markerscale=2,
          fontsize=8,
          labelspacing=0.1)
ax.tick_params(axis='both',
               which='major',
               bottom='off',
               top='off',
               labelbottom='off',
               labeltop='off',
               left='off',
               right='off',
예제 #19
0
    def plot_segment_embeddings(self):

        output_image_path = os.path.join(self.args.output_image_path,
                                         self.args.val_set,
                                         self.args.load_iteration)
        os.makedirs(output_image_path, exist_ok=True)

        speakers = []
        utts = []
        for speaker in self.samples:
            speakers += [speaker] * len(self.indexes[speaker])
            utts += self.indexes[speaker]

        dataset = EvaluateDateset(
            os.path.join(self.args.data_dir, self.args.dataset),
            speakers,
            utts,
            segment_size=self.config['data_loader']['segment_size'],
            load_spectrogram='dmel')

        dataloader = DataLoader(dataset,
                                batch_size=128,
                                shuffle=False,
                                num_workers=0,
                                pin_memory=True)
        batchiter = infinite_iter(dataloader)

        embs = []
        speakers = []
        # run the model
        while (len(embs) < self.args.n_segments):
            data = next(batchiter)
            speakers += data['speaker']
            data = cc(data['spectrogram'].permute(0, 2, 1))
            emb = self.model.get_speaker_embeddings(data)
            embs += emb.detach().cpu().numpy().tolist()
            print('Evaluate: {}/{}'.format(len(embs), self.args.n_segments),
                  end='\r')

        embs = np.array(embs)
        norms = np.sqrt(np.sum(embs**2, axis=1, keepdims=True))
        embs = embs / norms

        # t-SNE
        print('\nt-SNE...')
        embs_2d = TSNE(n_components=2, init='pca',
                       perplexity=50).fit_transform(embs)
        x_min, x_max = embs_2d.min(0), embs_2d.max(0)
        embs_2d = (embs_2d - x_min) / (x_max - x_min)

        # plot to figure
        female_cluster = [
            i for i, speaker in enumerate(speakers)
            if self.speaker_infos[speaker][1] == 'F'
        ]
        male_cluster = [
            i for i, speaker in enumerate(speakers)
            if self.speaker_infos[speaker][1] == 'M'
        ]
        colors = np.array(
            [self.samples_index[speaker] for speaker in speakers])
        plt.scatter(embs_2d[female_cluster, 0],
                    embs_2d[female_cluster, 1],
                    c=colors[female_cluster],
                    marker='x')
        plt.scatter(embs_2d[male_cluster, 0],
                    embs_2d[male_cluster, 1],
                    c=colors[male_cluster],
                    marker='o')
        plt.savefig(os.path.join(output_image_path, 'segment.png'))
        plt.clf()
        plt.cla()
        plt.close()
        return
예제 #20
0
from sklearn.manifold import TSNE
import numpy as np
import json

with open('mv_data.json') as f:
    j = json.loads(f.read())


def distance(a, b):
    return np.linalg.norm(a.reshape((40, 40))-b.reshape((40, 40)))


data = np.array(list(map(lambda x: np.array(x).flatten(),
                         map(lambda x: x['viewMatrix'], j['papers']))))
embed = TSNE(metric=distance).fit_transform(data)
embed -= embed.min(axis=0)
embed /= embed.max(axis=0)
embed *= 2
embed -= 1

with open('tsne.json', 'w') as f:
    f.write(json.dumps(embed.tolist()))
예제 #21
0
def convert_to_dict(clusters_to_filter, ru_idfs, fi_idfs, start_time):
    print(start_time)
    if isinstance(clusters_to_filter, dict):
        clusters_to_filter = clusters_to_filter.values()

    clusters_to_save = filter_interesting_clusters(clusters_to_filter)
    json_formatted = []

    cdata = [c.center / c.norm for c in clusters_to_save]
    if len(cdata) < 5:
        return json_formatted

    t_sne_space = TSNE(n_components=2, metric='cosine').fit_transform(cdata)
    # normalize T-SNE space to -1 to 1
    minimums = t_sne_space.min(axis=0)
    maximums = t_sne_space.max(axis=0)
    for v in t_sne_space:
        v[0] = 2 * (v[0] - minimums[0]) / (maximums[0] - minimums[0]) - 1
        v[1] = 2 * (v[1] - minimums[1]) / (maximums[1] - minimums[1]) - 1

    for cluster_index in range(len(clusters_to_save)):
        c = clusters_to_save[cluster_index]

        idfs = ru_idfs if c.lang == 'ru' else fi_idfs

        # TODO remove temporary filtering
        #
        if (c.created_at <
            (start_time - len(c.hourly_growth_rate) * 3600 * 1000)
            ):  #1405555200000): # 17/07/2014 00:00:00
            continue
        #if (c.created_at < 1503014400000): # 18/08/2017 00:00:00
        #continue

        if len(c.hourly_growth_rate) < 1:
            continue

        start_idx = max(int((c.created_at - start_time) / 3600 / 1000), 1)
        for i in range(start_idx, len(c.hourly_growth_rate)):
            update = {}
            # timestamp
            update['t'] = int(c.first_growth_time / 1000) + i * 60 * 60

            # start with a new cluster event
            if i == start_idx:

                total_sentiment = c.hourly_accum_sentiment[
                    len(c.hourly_accum_sentiment) - 1]

                tags = c.hourly_tags[len(c.hourly_tags) - 1]

                if tags is not None:
                    tags = [
                        tag_label_overrides.get(t, t.title()) for t in tags
                    ]

                #get_keywords(c, idfs)[:4],
                update['n'] = {c.id:                                                \
                                {                                                   \
                                  's': round(c.hourly_growth_rate[i]),              \
                                  'k': c.hourly_keywords[i],                        \
                                  'lang': c.lang,                                   \
                                  'sentiment': round(c.hourly_sentiment[i], 3),     \
                                  'sentiment_total': round(total_sentiment, 3),     \
                                  'tags': tags if tags is not None else [],         \
                                  't_sne': [float(t_sne_space[cluster_index][0]),   \
                                            float(t_sne_space[cluster_index][1])]   \
                                }                                                   \
                              }
            elif i == len(c.hourly_growth_rate):
                # insert a negative number at the end to mark the end of the cluster
                update['u'] = {c.id: {'s': -1}}
            else:
                update['u'] = {
                    c.id: {
                        's': int(round(c.hourly_growth_rate[i])),
                        'sentiment': round(c.hourly_sentiment[i], 3),
                        'sentiment_accum': round(c.hourly_accum_sentiment[i],
                                                 3),
                        'k': c.hourly_keywords[i - 1]
                    }
                }

            json_formatted.append(update)

    json_formatted.sort(key=lambda update: update['t'])
    return json_formatted
예제 #22
0
                x = conv_net(x)
                x = x.contiguous().view(x.shape[0], -1)
                x = fc_net[0](x)
                x = fc_net[1](x)
                x = fc_net[2](x)
                if opt.layer_idx >= 1:
                    x = fc_net[3](x)
                    x = fc_net[4](x)
                x = torch.nn.functional.softmax(x, dim=-1)
                features.append(copy.deepcopy(x.detach()))
                labels.append(copy.deepcopy(y))
            features = torch.cat(features, dim=0)
            labels = torch.cat(labels, dim=0)
            Y = TSNE(init='pca').fit_transform(features[:800].numpy())
            labels = labels[:800].numpy()

        #for i in range(26):
        #    plt.scatter(Y[labels==i, 0], Y[labels==i, 1], 20, color=(float(i)/26, 0, 0))
        letters = list(string.ascii_letters[-26:])
        Y = (Y - Y.min(0)) / (Y.max(0) - Y.min(0))
        #plt.legend(string.ascii_letters[-26:])
        #plt.scatter(Y[:, 0], Y[:, 1], 5, c=labels, cmap='Spectral')
        #plt.colorbar(boundaries=np.arange(27)-0.5).set_ticks(np.arange(26))
        for i in range(len(labels)):
            c = plt.cm.rainbow(float(labels[i]) / 26)
            plt.text(Y[i, 0], Y[i, 1], s=letters[labels[i]], color=c)
        plt.savefig(os.path.join(tsne_dir, 'tsne_%d.jpg' % layer_idx), dpi=300)
        plt.show()
        print('Results are saved as {}'.format(
            os.path.join(tsne_dir, 'tsne_%d.jpg' % opt.layer_idx)))
예제 #23
0
    ax_hsv.set_title("HSV Channels")
    ax_hsv.set_xlabel("H Channel")
    ax_hsv.set_ylabel("S Channel")
    ax_hsv.set_zlabel("V Channel")
    ax_hsv.legend(custom_markers, ["No Skin", "Skin"])

    # Using dimensionality reduction to transform a 6D dataset to a 3D dataset.
    tsne = TSNE(n_components=3).fit_transform(color_data)

    # Decision boundary of RGB subset.
    logreg.fit(tsne, labels)
    intercept = logreg.intercept_[0]
    coeff = logreg.coef_[0]

    tmp = np.linspace(tsne.min(), tsne.max(), 50)
    x, y = np.meshgrid(tmp, tmp)

    # Plot the RGB+HSV dataset in 3D.
    fig = plt.figure()
    ax_tsne = fig.add_subplot(111, projection="3d")
    ax_tsne.plot_surface(x, y, z(x, y), alpha=0.2)

    for i, c, m in zip(range(2), ("r", "b"), ("o", "^")):
        xs = tsne[:, 0][labels == i]
        ys = tsne[:, 1][labels == i]
        zs = tsne[:, 2][labels == i]
        ax_tsne.scatter(xs, ys, zs, c=c, marker=m)

    ax_tsne.set_title("3D t-SNE")
    ax_tsne.set_xlabel("X")
예제 #24
0
    def main_train(self):
        with tf.Graph().as_default():
            with tf.Session() as sess:
                img_data = facenet.get_dataset(self.datadir)
                path, label = facenet.get_image_paths_and_labels(img_data)
                print("label")
                print(label)
                print('Classes: %d' % len(img_data))
                print('Images: %d' % len(path))

                facenet.load_model(self.modeldir)
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")
                embedding_size = embeddings.get_shape()[1]

                print('Extracting features of images for model')
                batch_size = 10000
                image_size = 160
                nrof_images = len(path)
                nrof_batches_per_epoch = int(
                    math.ceil(1.0 * nrof_images / batch_size))
                emb_array = np.zeros((nrof_images, embedding_size))
                #print(nrof_batches_per_epoch)
                #for i in range(nrof_batches_per_epoch):
                start_index = 0 * batch_size
                end_index = min((0 + 1) * batch_size, nrof_images)
                paths_batch = path[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False,
                                           image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)
                print("emb_array[0]")
                print(emb_array[0])
                class_names = [cls.name.replace('_', ' ') for cls in img_data]
                classifier_file_name = os.path.expanduser(
                    self.classifier_filename)
                print('emb_array')
                print(emb_array)
                X_embedded = TSNE(n_components=2).fit_transform(emb_array)
                X_embedded -= X_embedded.min(axis=0)
                X_embedded /= X_embedded.max(axis=0)
                print("X_embedded")
                print(X_embedded)

                #for i in range(0, nrof_images-1):
                #    plt.plot(X_embedded[i, 0], X_embedded[i, 1],'bo')
                plt.legend(bbox_to_anchor=(1, 1))
                plt.show()
                out_dim = round(math.sqrt(nrof_images))
                out_res = 160
                to_plot = np.square(out_dim)
                grid = np.dstack(
                    np.meshgrid(np.linspace(0, 1, out_dim),
                                np.linspace(0, 1, out_dim))).reshape(-1, 2)
                cost_matrix = cdist(grid, X_embedded,
                                    "sqeuclidean").astype(np.float32)
                cost_matrix = cost_matrix * (100000 / cost_matrix.max())
                print(cost_matrix)
                #rids, cids = solve_dense(costs)
                #print(rids)
                print("zaczalem to robic")
                #row_ind, col_ind = linear_sum_assignment(cost_matrix)
                row_asses, col_asses, _ = lapjv(cost_matrix)
                #print("To cos")
                #print (col_asses)
                print("teraz to!")
                #print (row_ind)
                #print (col_ind)
                #for r,c in zip(row_ind, col_asses):
                #    print(r,c) # Row/column pairings
                grid_jv = grid[col_asses]
                out = np.ones((out_dim * out_res, out_dim * out_res, 3))
                print(grid_jv)

                for pos, img in zip(grid_jv, images[0:to_plot]):
                    h_range = int(np.floor(pos[0] * (out_dim - 1) * out_res))
                    w_range = int(np.floor(pos[1] * (out_dim - 1) * out_res))
                    out[h_range:h_range + out_res,
                        w_range:w_range + out_res] = image.img_to_array(img)
                print(out)
                im = image.array_to_img(out)
                im.save("obrazekV2.jpg", quality=100)
예제 #25
0
 emb_array = np.zeros((nrof_images, embedding_size))
 start_index = 0 * batch_size
 end_index = min((0 + 1) * batch_size, nrof_images)
 paths_batch = path[start_index:end_index]
 images = facenet.load_data(paths_batch, False, False, image_size)
 feed_dict = {images_placeholder: images, phase_train_placeholder: False}
 emb_array[start_index:end_index, :] = sess.run(embeddings,
                                                feed_dict=feed_dict)
 print("emb_array[0]")
 print(emb_array[0])
 class_names = [cls.name.replace('_', ' ') for cls in img_data]
 print('emb_array')
 print(emb_array)
 X_embedded = TSNE(n_components=2).fit_transform(emb_array)
 X_embedded -= X_embedded.min(axis=0)
 X_embedded /= X_embedded.max(axis=0)
 print("X_embedded")
 print(X_embedded)
 for i in range(0, nrof_images - 1):
     plt.plot(X_embedded[i, 0], X_embedded[i, 1], 'bo')
 plt.legend(bbox_to_anchor=(1, 1))
 plt.show()
 out_dim = round(math.sqrt(nrof_images))
 out_res = 160
 to_plot = np.square(out_dim)
 grid = np.dstack(
     np.meshgrid(np.linspace(0, 1, out_dim),
                 np.linspace(0, 1, out_dim))).reshape(-1, 2)
 cost_matrix = cdist(grid, X_embedded, "sqeuclidean").astype(np.float32)
 cost_matrix = cost_matrix * (100000 / cost_matrix.max())
 print(cost_matrix)
예제 #26
0
embed2 = TSNE(n_components=2).fit_transform(feas_normalized)

height = 128
width = 64
embed2 = embed2 - embed2.min(axis=0)
# np.median(np.abs(np.diff(embed2, axis=0)), axis=0)
space = 64 + 16
embed2 *= space
embed2 = embed2.astype(int)
extend = np.array([
    height,
    width,
])

shape = tuple((embed2.max(axis=0).astype(int) + extend).tolist()) + (3, )
print('res shape', shape)
res = np.ones(shape).astype(np.uint8) * 255

for ind in range(feas.shape[0]):
    img_name = feask[ind]
    img = cv2.imread(img_name)
    img2 = cvb.resize_keep_ar(img, height, width)
    if not (img2.shape[0] <= height and img2.shape[1] <= width):
        img2 = cvb.resize_keep_ar(img, width, width)
    assert (img2.shape[0] <= height and img2.shape[1] <= width)
    # if img2.shape[0] < height:
    #     img2 = np.concatenate((img2, np.ones((height - img2.shape[0], img2.shape[1], 3)) * 255), axis=0)
    # if img2.shape[1] < width:
    #     img2 = np.concatenate((img2, np.ones((img2.shape[0], width - img2.shape[1], 3)) * 255), axis=1)
    # assert img2.shape[0] == height and img2.shape[1] == width