예제 #1
0
def run(input_file, output_file, max_common_frames, n_clusters):
    input = np.load(input_file)

    #print(np.linalg.norm(input[0, 10:]))

    #Frame_id, track id, ., ., ., ., ., ., ., ., appearance features

    t_ = time.time()

    print("Input shape:", input.shape)
    ids = list(np.unique(input[:, 1]))

    print(time.time() - t_)
    t_ = time.time()

    print("Total number of ids:", len(ids))
    ids_by_frames = {}
    for row in input:
        if row[0] not in ids_by_frames.keys():
            ids_by_frames[row[0]] = []
        ids_by_frames[row[0]].append(row[1])
    n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}
    plt.bar(n_ids_by_frames.keys(), n_ids_by_frames.values(), color='g')
    #plt.show()

    print("Maximum number of ids on the same frame:",
          max(n_ids_by_frames.values()))
    ff = []
    for f, nid in n_ids_by_frames.items():
        if nid > n_clusters:
            ff.append(f)
    print("Delete frames with n_detections > n_clusters:", len(ff), ff)
    input = np.array([x for x in input if x[0] not in ff])
    min_len_tracklet = 10
    print("Delete tracklets with n_detections < ", min_len_tracklet)
    lens = []
    to_remove = []
    for i in ids:
        t = input[input[:, 1] == i][:, 0]
        if t.shape[0] < min_len_tracklet:
            to_remove.append(i)
        else:
            lens.append(t.shape[0])
    for i in to_remove:
        ids.remove(i)
        input = input[~(input[:, 1] == i)]
    print(input.shape, "detections x features")
    print("Mean len of tracklets (in frames):", np.mean(lens))

    random_data = []
    data = []
    for i in ids:
        group = input[:, 1] == i
        n_frames = input[group].shape[0]
        d = np.zeros(input[0, 10:].shape[0] + 2)
        d[0] = input[group][:, 0].min(axis=0)
        d[1] = i
        d[2:] = input[group][:, 10:].mean(axis=0)
        d[2:] = d[2:] / np.linalg.norm(d[2:]) * n_frames
        data.append(d)

        x = np.random.random(128)
        x = x / np.linalg.norm(x)
        random_data.append(list(d[:2]) + list(x))

    data = np.array(data)
    data = data[data[:, 0].argsort()]
    ids = list(data[:, 1])

    random_data = np.array(random_data)

    common_frames = np.zeros((len(ids), len(ids)))
    if run_common_frames:
        print("Computing common frames matrix...")
        for i in range(len(ids)):
            for j in range(i, len(ids)):
                n_common_frames = len(
                    set(list(input[input[:, 1] == ids[i]][:, 0])).intersection(
                        list(input[input[:, 1] == ids[j]][:, 0])))
                common_frames[i, j] = n_common_frames
        print("Saved common frames matrix")
        np.save("common_frames.npy", common_frames)
    else:
        print("Loaded common frames matrix")
        common_frames = np.load("common_frames.npy")
    #print(common_frames, common_frames.shape)

    print("Computing 'cannot link' constraints with max_common_frames = ",
          max_common_frames)
    must_link = []
    cannot_link = [(i, j) for i in range(len(ids))
                   for j in range(i + 1, len(ids))
                   if common_frames[i, j] > max_common_frames]
    #print(cannot_link)
    print("Number of constraints", len(cannot_link))

    #Initialisation des centroides
    ids_by_frames = {}
    for row in input:
        if row[0] not in ids_by_frames.keys():
            ids_by_frames[row[0]] = []
        ids_by_frames[row[0]].append(row[1])
    n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}
    ref_frame = max(n_ids_by_frames, key=lambda key: n_ids_by_frames[key])
    centers_ids_init = ids_by_frames[ref_frame]
    centers_init = [list(data[:, 1]).index(i) for i in centers_ids_init]

    ids = list(np.unique(data[:, 1]))
    if (len(ids) < n_clusters):
        n_clusters = len(ids)
    clusters, centers = cop_kmeans(dataset=data[:, 2:],
                                   initialization=centers_init,
                                   k=n_clusters,
                                   ml=must_link,
                                   cl=cannot_link,
                                   spherical=True)

    #We then compute a clustering with random unit features and only the constraints (to compare)
    #random_clusters, random_centers = cop_kmeans(dataset=random_data, k=n_clusters, ml=must_link, cl=cannot_link, spherical=True)

    #print("Adjusted Rand Index between constrained k-means clustering and a constrained but random clustering", adjusted_rand_score(clusters, random_clusters))

    out_clusters = clusters

    output = input[:, :10]
    output[:, 1] = np.array(
        [out_clusters[ids.index(int(x))] for x in input[:, 1]])
    #print("Output:", output)
    print("Output saved to:", output_file)
    np.save(output_file, output)
예제 #2
0
    def fit(self,
            x=None,
            y=None,
            shuffle_=None,
            pretrain_epochs=100,
            batch_size_au=256,
            maxiter_DC=7000,
            update_interval=140,
            n_clusters=2,
            seed_value=42,
            verbose=None,
            file_out=None,
            N_no_mod=None):
        '''
        Fit the model 
        '''
        if x is None:
            autoencoder, encoder = self.autoencoderConv1D(self.signal_shape)
            autoencoder.compile(optimizer='adadelta', loss='mse')
            clustering_layer = ClusteringLayer(n_clusters, name='clustering')(
                encoder.output)
            model = Model(inputs=encoder.input,
                          outputs=[clustering_layer, autoencoder.output])
            model.compile(loss=['kld', 'mse'],
                          loss_weights=[0.3, 1],
                          optimizer='adam')
            return model

        # 2. Set `python` built-in pseudo-random generator at a fixed value
        random.seed(
            seed_value
        )  # 3. Set `numpy` pseudo-random generator at a fixed value
        np.random.seed(
            seed_value
        )  # 4. Set `tensorflow` pseudo-random generator at a fixed value
        tf.set_random_seed(
            seed_value
        )  # 5. For layers that introduce randomness like dropout, make sure to set seed values

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        nmi_f = normalized_mutual_info_score
        ari_f = adjusted_rand_score

        if shuffle_:
            x, y = shuffle(x, y, random_state=seed_value)

        # Baseline1 raw data
        #kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=10, random_state = seed_value)
        #y_pred_kmeans = kmeans.fit_predict(x.reshape((x.shape[0], x.shape[1])))

        # baseline data with cop-kmeans
        must_link = list(itertools.combinations(np.arange(0, 900), 2))
        y_pred_kmeans, centers = cop_kmeans(dataset=x, k=2, ml=must_link)

        if file_out:
            file_out.write('Acc. k-means : ' +
                           str(self.accuracy(y, np.array(y_pred_kmeans))) +
                           '\n')

        batch_size = batch_size_au

        autoencoder, encoder = self.autoencoderConv1D(self.signal_shape)

        autoencoder.summary()

        autoencoder.compile(optimizer='adadelta', loss='mse')
        autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs)

        # Baseline 2
        #kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=10, random_state=42)
        #y_pred_kmeans = kmeans.fit_predict(encoder.predict(x))
        y_pred_kmeans, centers = cop_kmeans(dataset=encoder.predict(x),
                                            k=2,
                                            ml=must_link)

        if file_out:
            file_out.write('Acc. Autoencoder : ' +
                           str(self.accuracy(y, np.array(y_pred_kmeans))) +
                           '\n')

        # build the clustering layer
        clustering_layer = ClusteringLayer(n_clusters,
                                           name='clustering')(encoder.output)

        model = Model(inputs=encoder.input,
                      outputs=[clustering_layer, autoencoder.output])

        model.compile(loss=['kld', 'mse'],
                      loss_weights=[0.1, 1],
                      optimizer='adam')
        model.summary()

        centers = np.array(centers).reshape((2, 5))
        #Initialize cluster centers k-means
        model.get_layer(name='clustering').set_weights([centers])

        loss = 0
        index = 0
        maxiter = maxiter_DC
        update_interval = update_interval
        index_array = np.arange(x.shape[0])

        # change the batch size to the number of samples
        #batch_size = x.shape[0]

        # computing an auxiliary target distribution
        def target_distribution(q):
            weight = q**2 / q.sum(0)
            return (weight.T / weight.sum(1)).T

        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = model.predict(x, verbose=0)
                p = target_distribution(
                    q)  # update the auxiliary target distribution p

                p[:N_no_mod] = p[:N_no_mod]
                # evaluate the clustering performance
                y_pred = q.argmax(1)
                if y is not None:
                    acc = self.accuracy(y, y_pred)
                    ari = ari_f(y, y_pred)
                    nmi = nmi_f(y, y_pred)
                    loss = loss
                    if verbose:
                        print(
                            'Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' %
                            (ite, acc, nmi, ari), ' ; loss=', loss)

            idx = index_array[index * batch_size:min((index + 1) *
                                                     batch_size, x.shape[0])]
            loss = model.train_on_batch(x=x[idx], y=[p[idx], x[idx]])
            index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

        return model
예제 #3
0
            calculate_geo_distance(r1.lat, r1.lng, r2.lat, r2.lng)
                                    for _, r2 in lat_lng_df.iterrows()
        ])
    D_lat_lng_scale = scaler.fit_transform(D_lat_lng)
    D_lat_lng_scale = pd.DataFrame(D_lat_lng_scale).fillna(np.nanmean(D_lat_lng_scale)).values

    # calculate topic distance between statement
    persons_1 = list(map(preprocess, list(df['Statement'])))
    persons_2 = list(map(preprocess, list(df['Statement'])))
    D_statement = - affinity_computation(persons_1, persons_2,
                                        n_components=30, min_df=2, max_df=0.8,
                                        weighting='tfidf', projection='svd')
    std_topic = D_statement.std()

    # clustering
    D_final = (D_statement) + (10 * std_topic * D_tz) + (std_topic * D_lat_lng_scale) # final distance
    X_mds = MDS(n_components=30).fit_transform(D_final)
    clusters_kmean, centers_kmean = cop_kmeans(dataset=X_mds, k=200, cl=cannot_link)
    output_df = df[selected_cols]
    output_df['pod_number'] = clusters_kmean

    # rearrange
    df_rearrange = []
    pod_num = 1
    for _, df_tz in output_df.groupby('timezone'):
        for _, df_pod_num in df_tz.groupby('pod_number'):
            df_pod_num['pod_number'] = pod_num
            df_rearrange.append(df_pod_num)
            pod_num += 1
    df_rearrange = pd.concat(df_rearrange)[selected_cols]
    df_rearrange.to_csv('pod_matching_rearrange_mds.csv', index=False)
for i in range(precip_sample + 1, 153):
    if label_list_train[i] == 1:
        must_link.append((precip_sample, i))

# This is the same idea
for i in range(bicontin_sample + 1, 153):
    if label_list_train[i] == -1:
        must_link.append((bicontin_sample, i))

# We do not want anything with different labels to be linked
cannot_link = [(precip_sample, bicontin_sample)]

random.seed(18)
# This command runs the method; clusters are the label assignments
clusters, centers = cop_kmeans(dataset=processed_no_test,
                               k=2,
                               ml=must_link,
                               cl=cannot_link)

# remember this is an unsupervised process with no real label information going into it;
# the algorithm will give two clusters, but we have to look into it to see which cluster
# labels correspond to our initial labels
precip_cluster_label = clusters[precip_sample]
bicontin_cluster_label = clusters[bicontin_sample]

for i in range(0, 205):
    if clusters[i] == precip_cluster_label:
        clusters[i] = 1
    else:
        clusters[i] = -1
예제 #5
0
def cop_kmean():
    # input_matrix = numpy.random.rand(100, 500)
    documents = read_txt()
    input_matrix = []
    for i in documents:
        input_matrix = build_matrix(i, input_matrix)

    # must_link = [(1,2),(8,9),(0,1),(3,7),(0,3),(0,4),(11,12),(24,26),(25,27),(50,51),(53,54)]
    # cannot_link = [(7,13),(0,5),(0,8),(0,24),(0,27),(0,50),(0,53)]
    must_link = [(0, 1), (0, 2), (0, 3), (0, 7), (5, 8), (14, 18), (37, 38)]
    cannot_link = [(0, 4), (0, 5), (0, 6), (0, 8), (7, 71), (7, 16), (0, 31),
                   (0, 37), (7, 123), (0, 123), (7, 18)]
    clusters, centers = cop_kmeans(dataset=input_matrix,
                                   k=4,
                                   ml=must_link,
                                   cl=cannot_link)
    print(clusters)
    print(input_matrix)
    print(centers)
    print(len(clusters), '--', len(input_matrix))

    # test model
    # tests = ['h', 'a', 'qq', 'cx', 'hvh']
    tests = [
        'a', 'b', 'abbca', 'ccbba', 'dabdda', 'm', 'ammp', 'non', 'mmdn',
        'oaa', 'o', 'wwp', 'xp', 'xnp', 'ppopn', 'w', 'xzzmz', 'ywzzyz',
        'wywyzyy', 'zww'
    ]
    result = []
    for test in tests:
        vect = word2vector(test)
        temp = input_matrix.index(vect)
        # print(vect)
        group_num = clusters[temp]
        # print(test_result)

        line = [
            test,
            str(get_score(centers, vect, group_num)),
            str(group_num),
            str(temp),
            str(vect)
        ]
        result.append(line)
    write_excel_xls(book_name_xls, sheet_name_xls, value_title)
    write_excel_xls_append(book_name_xls, result)
    # score_a_file(input_matrix, clusters, centers)

    fig = plt.figure()
    ax = Axes3D(fig)
    cluster_set = [[], [], [], []]
    for ind in range(len(input_matrix)):
        cluster_set[clusters[ind]].append((input_matrix[ind]))
    cluster_arr = tuple(cluster_set)
    ax.scatter([i[0] for i in cluster_arr[0]], [i[1] for i in cluster_arr[0]],
               [i[2] for i in cluster_arr[0]],
               c='r',
               label='first cluster')
    ax.scatter([i[0] for i in cluster_arr[1]], [i[1] for i in cluster_arr[1]],
               [i[2] for i in cluster_arr[1]],
               c='b',
               label='second cluster')
    ax.scatter([i[0] for i in cluster_arr[2]], [i[1] for i in cluster_arr[2]],
               [i[2] for i in cluster_arr[2]],
               c='g',
               label='third cluster')
    ax.scatter([i[0] for i in cluster_arr[3]], [i[1] for i in cluster_arr[3]],
               [i[2] for i in cluster_arr[3]],
               c='y',
               label='fourth cluster')
    print('the items of each cluster is: ', len(cluster_set[3][:]),
          len(cluster_set[2]), len(cluster_set[1]), len(cluster_set[0]))
    # ax.scatter(centers[0][2], centers[0][3], centers[0][5], marker='*', c='r')
    # ax.scatter(centers[1][2], centers[1][3], centers[1][5], marker='1', c='b')
    # ax.scatter(centers[2][2], centers[2][3], centers[2][5], marker='P', c='g')
    # ax.scatter(centers[3][2], centers[3][3], centers[3][5], marker='x', c='y')
    # print(cluster_arr[0])
    ax.legend(loc='best')
    ax.set_zlabel('high risk', fontdict={'size': 13, 'color': 'black'})
    ax.set_ylabel('medium risk', fontdict={'size': 13, 'color': 'black'})
    ax.set_xlabel('normal event', fontdict={'size': 13, 'color': 'black'})
    plt.savefig('fig.png', bbox_inches='tight')
    plt.show()
예제 #6
0
def run(input_file, output_file, max_common_frames, n_clusters, version):
    input = np.load(input_file)
    #Frame_id, track id, ., ., ., ., ., ., ., ., appearance features

    t_ = time.time()

    print("Input shape:", input.shape)
    ids = list(np.unique(input[:, 1]))

    ######## NETTOYAGE

    print("Total number of ids:", len(ids))
    ids_by_frames = {}
    for row in input:
        if row[0] not in ids_by_frames.keys():
            ids_by_frames[row[0]] = []
        ids_by_frames[row[0]].append(row[1])
    n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}

    #plt.bar(n_ids_by_frames.keys(), n_ids_by_frames.values(), color='g')
    #plt.show()

    print("Maximum number of ids on the same frame:",
          max(n_ids_by_frames.values()))
    ff = []
    for f, nid in n_ids_by_frames.items():
        if nid > n_clusters:
            ff.append(f)
    print("Delete frames with n_detections > n_clusters:", len(ff), ff)
    input = np.array([x for x in input if x[0] not in ff])

    min_len_tracklet = 0

    lens = []
    to_remove = []
    for i in ids:
        t = input[input[:, 1] == i][:, 0]
        if t.shape[0] < min_len_tracklet:
            to_remove.append(i)
        else:
            lens.append(t.shape[0])
    for i in to_remove:
        ids.remove(i)
        input = input[~(input[:, 1] == i)]
    print("Delete tracklets with n_detections < ", min_len_tracklet, " : ",
          len(to_remove))
    print(input.shape, "detections x features")
    print(
        "Mean len of tracklets (in frames):",
        np.mean(lens),
    )

    ######## FIN NETTOYAGE

    random_data = []
    data = []

    nn_frames = []
    for i in ids:
        group = input[:, 1] == i
        n_frames = input[group].shape[0]
        nn_frames.append(n_frames)
        d = np.zeros(input[0, 10:].shape[0] + 2)
        d[0] = input[group][:, 0].min(axis=0)
        d[1] = i
        d[2:] = input[group][:, 10:].mean(axis=0)
        d[2:] = d[2:] / np.linalg.norm(d[2:]) * n_frames
        data.append(d)

        x = np.random.random(128)
        x = x / np.linalg.norm(x)
        random_data.append(list(d[:2]) + list(x))

    data = np.array(data)
    data = data[data[:, 0].argsort()]  #Sort by asc frame_idx
    ids = list(data[:, 1])

    plt.hist(nn_frames, bins=100)
    #plt.show()

    common_frames = np.zeros((len(ids), len(ids)))
    if run_common_frames:
        print("Computing common frames matrix...")
        for i in range(len(ids)):
            for j in range(i, len(ids)):
                n_common_frames = len(
                    set(list(input[input[:, 1] == ids[i]][:, 0])).intersection(
                        list(input[input[:, 1] == ids[j]][:, 0])))
                common_frames[i, j] = n_common_frames
        print("Saved common frames matrix")
        np.save("common_frames.npy", common_frames)
    else:
        print("Loaded common frames matrix")
        common_frames = np.load("common_frames.npy")
    #print(common_frames, common_frames.shape)

    print("Computing 'cannot link' constraints with max_common_frames = ",
          max_common_frames)
    must_link = []
    cannot_link = [(i, j) for i in range(len(ids))
                   for j in range(i + 1, len(ids))
                   if common_frames[i, j] > max_common_frames]
    #print(cannot_link)
    print("Number of constraints", len(cannot_link))

    #INITIALISATION DES CENTROIDES
    init_mode = 2

    ids_by_frames = {}
    for row in input:
        if row[0] not in ids_by_frames.keys():
            ids_by_frames[row[0]] = []
        ids_by_frames[row[0]].append(row[1])

    if init_mode == 1 or True:  #On cherche la frame ayant le plus de joueurs détectés simultanément,
        # et on initialise les clusters avec les tracklets détectés sur cette frame
        n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}
        ref_frame = max(n_ids_by_frames, key=lambda key: n_ids_by_frames[key])
        print(ref_frame, ids_by_frames[ref_frame],
              [list(data[:, 1]).index(i) for i in ids_by_frames[ref_frame]])

    if init_mode == 2:  # On cherche la frame sur laquelle le somme des longueurs des tracklets détectés sur cette frame est maximale
        # et on initialise les clusters avec les tracklets détectés sur cette frame
        len_by_frames = {}
        for row in input:
            if row[0] not in len_by_frames.keys():
                len_by_frames[row[0]] = []
            len_by_frames[row[0]].append(
                np.linalg.norm(data[data[:, 1] == row[1]].reshape(-1)[2:]))
        sum_len_by_frames = {k: sum(v) for k, v in len_by_frames.items()}
        ref_frame = max(sum_len_by_frames,
                        key=lambda key: sum_len_by_frames[key])

    centers_ids_init = ids_by_frames[ref_frame]

    centers_init = [list(data[:, 1]).index(i) for i in centers_ids_init]

    print(ref_frame, centers_ids_init, centers_init)

    ids = list(np.unique(data[:, 1]))
    if (len(ids) < n_clusters):
        n_clusters = len(ids)

    clusters, centers = cop_kmeans(dataset=data[:, 2:],
                                   initialization=centers_init,
                                   k=n_clusters,
                                   ml=must_link,
                                   cl=cannot_link,
                                   spherical=True)

    if clusters == None:
        print("Error: impossible clustering")
        exit()

    print([clusters[ids.index(int(x))] for x in centers_ids_init])

    #clusters = [np.random.randint(0,n_clusters) for i in range(len(ids))] #RANDOM CLUSTERING

    clusters_ = {
        2.0: [1.0],
        584.0: [1.0],
        644.0: [1.0],
        910.0: [1.0],
        1060.0: [1.0],
        1435.0: [1.0],
        1593.0: [1.0],
        1732.0: [1.0],
        2021.0: [1.0],
        2149.0: [1.0],
        2273.0: [1.0],
        2455.0: [1.0],
        2550.0: [1.0],
        2671.0: [1.0],
        2680.0: [1.0, 6.0],
        21.0: [2.0],
        67.0: [2.0],
        103.0: [2.0],
        270.0: [2.0],
        346.0: [2.0],
        399.0: [2.0],
        666.0: [2.0],
        1129.0: [2.0],
        1382.0: [2.0],
        1658.0: [2.0],
        1714.0: [2.0],
        2029.0: [2.0],
        2087.0: [2.0],
        2348.0: [2.0],
        2785.0: [2.0],
        224.0: [3.0],
        283.0: [3.0],
        316.0: [3.0],
        386.0: [3.0],
        936.0: [3.0],
        1299.0: [3.0],
        1374.0: [3.0],
        1462.0: [3.0],
        1567.0: [3.0],
        1636.0: [3.0],
        1700.0: [3.0],
        1860.0: [3.0],
        2432.0: [3.0],
        2594.0: [3.0],
        2643.0: [3.0],
        2711.0: [3.0],
        4.0: [4.0],
        575.0: [4.0],
        676.0: [4.0],
        756.0: [4.0],
        888.0: [4.0],
        950.0: [4.0],
        1156.0: [4.0, 6.0],
        1918.0: [4.0, 8.0],
        2086.0: [4.0],
        2163.0: [4.0],
        2358.0: [4.0],
        2553.0: [4.0],
        3.0: [5.0],
        180.0: [5.0],
        235.0: [5.0],
        304.0: [5.0],
        401.0: [5.0],
        893.0: [5.0],
        1428.0: [5.0],
        1558.0: [5.0],
        2032.0: [5.0],
        2743.0: [5.0],
        1.0: [6.0],
        42.0: [6.0],
        140.0: [6.0],
        181.0: [6.0],
        510.0: [6.0],
        560.0: [6.0],
        686.0: [6.0],
        819.0: [6.0],
        961.0: [6.0],
        1058.0: [6.0],
        1403.0: [6.0],
        1990.0: [6.0],
        2059.0: [6.0],
        2253.0: [6.0],
        2416.0: [6.0],
        2516.0: [6.0],
        2748.0: [6.0],
        5.0: [7.0],
        108.0: [7.0],
        262.0: [7.0],
        344.0: [7.0],
        438.0: [7.0],
        504.0: [7.0],
        578.0: [7.0],
        832.0: [7.0],
        941.0: [7.0],
        1038.0: [7.0],
        1407.0: [7.0],
        1547.0: [7.0],
        1783.0: [7.0],
        1866.0: [7.0],
        1910.0: [7.0],
        2222.0: [7.0],
        2458.0: [7.0],
        2570.0: [7.0],
        2645.0: [7.0],
        6.0: [8.0],
        54.0: [8.0],
        89.0: [8.0],
        261.0: [8.0],
        334.0: [8.0],
        409.0: [8.0],
        440.0: [8.0],
        637.0: [8.0],
        932.0: [8.0],
        1102.0: [8.0],
        1173.0: [8.0],
        1406.0: [8.0],
        1508.0: [8.0],
        1644.0: [8.0],
        2007.0: [8.0],
        2166.0: [8.0],
        2480.0: [8.0],
        2525.0: [8.0],
        46.0: [9.0],
        661.0: [9.0],
        894.0: [9.0],
        1029.0: [9.0],
        1325.0: [9.0],
        1831.0: [9.0],
        1973.0: [9.0],
        2268.0: [9.0],
        2139.0: [10.0],
        1355.0: [11.0]
    }

    #output = np.array([x for x in input[:,:10] if x[1] in clusters_.keys()])
    #output[:,1] = np.array([clusters_[x][0] for x in output[:,1]])

    output = input[:, :10]
    output[:,
           1] = np.array([clusters[ids.index(int(x))] for x in output[:, 1]])

    print("Output saved to:", output_file)
    np.save(output_file, output)
예제 #7
0
def run(input_file, output_file, max_common_frames, n_clusters, version):
    input = np.load(input_file)
    #Frame_id, track id, ., ., ., ., ., ., ., ., appearance features

    t_ = time.time()

    print("Input shape:", input.shape)
    ids = list(np.unique(input[:, 1]))

    ######## NETTOYAGE

    print("Total number of ids:", len(ids))
    ids_by_frames = {}
    for row in input:
        if row[0] not in ids_by_frames.keys():
            ids_by_frames[row[0]] = []
        ids_by_frames[row[0]].append(row[1])
    n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}

    #plt.bar(n_ids_by_frames.keys(), n_ids_by_frames.values(), color='g')
    #plt.show()

    print("Maximum number of ids on the same frame:",
          max(n_ids_by_frames.values()))
    ff = []
    for f, nid in n_ids_by_frames.items():
        if nid > n_clusters:
            ff.append(f)
    print("Delete frames with n_detections > n_clusters:", len(ff), ff)
    input = np.array([x for x in input if x[0] not in ff])
    min_len_tracklet = 10

    lens = []
    to_remove = []
    for i in ids:
        t = input[input[:, 1] == i][:, 0]
        if t.shape[0] < min_len_tracklet:
            to_remove.append(i)
        else:
            lens.append(t.shape[0])
    for i in to_remove:
        ids.remove(i)
        input = input[~(input[:, 1] == i)]
    print("Delete tracklets with n_detections < ", min_len_tracklet, " : ",
          len(to_remove))
    print(input.shape, "detections x features")
    print(
        "Mean len of tracklets (in frames):",
        np.mean(lens),
    )

    ######## FIN NETTOYAGE

    random_data = []
    data = []

    nn_frames = []
    for i in ids:
        group = input[:, 1] == i
        n_frames = input[group].shape[0]
        nn_frames.append(n_frames)
        d = np.zeros(input[0, 10:].shape[0] + 2)
        d[0] = input[group][:, 0].min(axis=0)
        d[1] = i
        d[2:] = input[group][:, 10:].mean(axis=0)
        d[2:] = d[2:] / np.linalg.norm(d[2:]) * n_frames
        data.append(d)

        x = np.random.random(128)
        x = x / np.linalg.norm(x)
        random_data.append(list(d[:2]) + list(x))

    data = np.array(data)
    data = data[data[:, 0].argsort()]  #Sort by asc frame_idx
    ids = list(data[:, 1])

    plt.hist(nn_frames, bins=100)
    #plt.show()

    common_frames = np.zeros((len(ids), len(ids)))
    if run_common_frames:
        print("Computing common frames matrix...")
        for i in range(len(ids)):
            for j in range(i, len(ids)):
                n_common_frames = len(
                    set(list(input[input[:, 1] == ids[i]][:, 0])).intersection(
                        list(input[input[:, 1] == ids[j]][:, 0])))
                common_frames[i, j] = n_common_frames
        print("Saved common frames matrix")
        np.save("common_frames.npy", common_frames)
    else:
        print("Loaded common frames matrix")
        common_frames = np.load("common_frames.npy")

    print("Computing 'cannot link' constraints with max_common_frames = ",
          max_common_frames)
    must_link = []
    cannot_link = [(i, j) for i in range(len(ids))
                   for j in range(i + 1, len(ids))
                   if common_frames[i, j] > max_common_frames]
    print("Number of constraints", len(cannot_link))

    if version == 1:  #SPECTRAL CLUSTERING
        if run_similarity_matrix:
            print("Computing similarity matrix...")

            similarity_matrix = np.zeros((len(ids), len(ids)))
            for i in range(len(ids)):
                for j in range(i + 1, len(ids)):
                    similarity_matrix[i, j] = (1.0 + np.max(
                        cosine_similarity(input[input[:, 1] == ids[i]][:, 10:],
                                          input[input[:, 1] == ids[j]]
                                          [:, 10:]).reshape(-1))) / 2.0
                    """if (i,j) in cannot_link:
                    similarity_matrix[i, j] = 0.0"""

                    similarity_matrix[j, i] = similarity_matrix[i, j]

            print("Saved similarity matrix")
            np.save("similarity_matrix.npy", similarity_matrix)
        else:
            print("Loaded similarity matrix")
            similarity_matrix = np.load("similarity_matrix.npy")

        clusters = octave.TCCRP([0, 1, 2, 3], [1, 2, 3, 5],
                                [1, 1, 1, [1, 2, 3]])

        print(clusters)

    if version == 0:  #SPHERICAL K-MEANS

        #INITIALISATION DES CENTROIDES
        ids_by_frames = {}
        for row in input:
            if row[0] not in ids_by_frames.keys():
                ids_by_frames[row[0]] = []
            ids_by_frames[row[0]].append(row[1])
        n_ids_by_frames = {k: len(v) for k, v in ids_by_frames.items()}
        ref_frame = max(n_ids_by_frames, key=lambda key: n_ids_by_frames[key])
        centers_ids_init = ids_by_frames[ref_frame]
        centers_init = [list(data[:, 1]).index(i) for i in centers_ids_init]
        print(data[:, 1])
        print(ref_frame, centers_ids_init, centers_init)

        ids = list(np.unique(data[:, 1]))
        if (len(ids) < n_clusters):
            n_clusters = len(ids)

        clusters, centers = cop_kmeans(dataset=data[:, 2:],
                                       initialization=centers_init + [13],
                                       k=n_clusters,
                                       ml=must_link,
                                       cl=cannot_link,
                                       spherical=True)

        if clusters == None:
            print("Error: impossible clustering")
            exit()

    output = input[:, :10]
    output[:, 1] = np.array([clusters[ids.index(int(x))] for x in input[:, 1]])

    print("Output saved to:", output_file)
    np.save(output_file, output)