예제 #1
0
    def _fit_transform(self, X, decompose_graph):
        X = check_array(X, accept_sparse='csr')
        self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
                                      algorithm=self.neighbors_algorithm,
                                      n_jobs=self.n_jobs)
        self.nbrs_.fit(X)
        self.training_data_ = self.nbrs_._fit_X
        self.kernel_pca_ = KernelPCA(n_components=self.n_components,
                                     kernel="precomputed",
                                     eigen_solver=self.eigen_solver,
                                     tol=self.tol,
                                     max_iter=self.max_iter,
                                     n_jobs=self.n_jobs)

        if self.radius is None:
            self.distance_graph = kneighbors_graph(self.nbrs_,
                                                   self.n_neighbors,
                                                   mode='distance',
                                                   n_jobs=self.n_jobs)
        else:
            self.distance_graph = radius_neighbors_graph(self.nbrs_,
                                                         radius=self.radius,
                                                         mode='distance',
                                                         n_jobs=self.n_jobs)

        if decompose_graph:
            temp = sparse.csgraph.connected_components(self.distance_graph)
            self.num_connected_comps = temp[0]
            self.connected_comps = []
            self.dist_matrices = []
            self.embeddings = []
            # self.distance_graphs = []
            for i0 in range(self.num_connected_comps):
                idx = temp[1] == i0
                graph = self.distance_graph[idx, :][:, idx]
                self.connected_comps.append(graph)
                dist_matrix = graph_shortest_path(graph,
                                                  method=self.path_method,
                                                  directed=False)
                self.dist_matrices.append(dist_matrix)
                G = dist_matrix**2
                G *= -0.5
                embedding = self.kernel_pca_.fit_transform(G)
                self.embeddings.append(embedding)
        else:
            self.dist_matrix_ = graph_shortest_path(self.distance_graph,
                                                    method=self.path_method,
                                                    directed=False)

            G = self.dist_matrix_**2
            G *= -0.5

            self.embedding_ = self.kernel_pca_.fit_transform(G)
예제 #2
0
 def _geodesic_distance(self, X):
     X_distance = graph_shortest_path(X)
     X_distance[X_distance == 0] = np.inf  # graph_shortest_path returns a
     # float64 array, so inserting np.inf does not change the type.
     # Ideally however, graph_shortest_path would return an int array!
     np.fill_diagonal(X_distance, 0)
     return X_distance
예제 #3
0
def isomap(data, n_components=2, n_neighbors=6):

    data = distance_mat(data, n_neighbors)
    graph = graph_shortest_path(data, directed=False)
    graph = -0.5 * (graph**2)

    return mds(graph, n_components)
예제 #4
0
def redux(X, name):
    graph = neigh.kneighbors_graph(X, 10)
    pie = gsp.graph_shortest_path(graph)

    Y = np.random.rand(len(X), 2) * 100
    derivs = np.zeros_like(Y)

    step = .00005  #Need step size to be very small to avoid diverging to NaN values
    for _ in tqdm(range(500), leave=False, desc=name):
        for i in range(len(X)):
            temp = np.ones_like(Y) * Y[i]
            A = np.subtract(temp, Y)

            normedA = prep.normalize(
                A)  #Scikit handles the norm (0,0) case by just returning (0,0)
            pies = pie[i, :]

            deriv = np.sum(A, axis=0)
            deriv = deriv - np.matmul(pies.T, normedA)

            derivs[i] = deriv

        Y = Y - step * derivs

    return Y
예제 #5
0
def gen_triplets_from_knn(data, indices, num_neighbors=50):
    """
    Description: Generate triplet data given distance matrix and random indices.
    :param data: #TODO
    :param indices:
    :param num_neighbors:
    :return:
    """
    print('Generating the knn graph')
    sys.stdout.flush()
    kng = kneighbors_graph(data, num_neighbors, mode='distance', n_jobs=8)
    print('Computing the shortest path metric on the knn graph')
    sys.stdout.flush()
    sp_dist_matrix = graph_shortest_path(kng, method='auto', directed=False)
    del kng

    num_triplets = indices.shape[0]  # Compute the number of triplets.
    triplet_set = np.zeros((num_triplets, 3),
                           dtype=int)  # Initializing the triplet set

    triplet_set[:, 0] = indices[:, 0]  # Initialize index 1 randomly.

    d1 = sp_dist_matrix[indices[:, 0], indices[:, 1]]
    d2 = sp_dist_matrix[indices[:, 0], indices[:, 2]]

    det = np.sign(d1 - d2)

    triplet_set[:, 1] = ((indices[:, 1] + indices[:, 2] - det * indices[:, 1] +
                          det * indices[:, 2]) / 2)
    triplet_set[:, 2] = ((indices[:, 1] + indices[:, 2] + det * indices[:, 1] -
                          det * indices[:, 2]) / 2)
    triplet_set = triplet_set.astype(dtype=int)

    return triplet_set
예제 #6
0
파일: isomap.py 프로젝트: lwileczek/isomap
def make_adjacency(data, dist_func="euclidean", eps=1):
    """
   Step one of ISOMAP algorithm, make Adjacency and distance matrix

   Compute the WEIGHTED adjacency matrix A from the given data points.  Points
   are considered neighbors if they are within epsilon of each other.  Distance
   between points will be calculated using SciPy's cdist which will
   compute the D matrix for us. 

   https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

   INPUT
   ------
     data - (ndarray) the dataset which should be a numpy array
     dist_func - (str) the distance metric to use. See SciPy cdist for list of
                 options
     eps - (int/float) epsilon value to define the local region. I.e. two points
                       are connected if they are within epsilon of each other.

   OUTPUT
   ------
     short - (ndarray) Distance matrix, the shortest path from every point to
         every other point in the set, INF if not reachable. 
   """
    n, m = data.shape
    dist = cdist(data.T, data.T, metric=dist_func)
    adj = np.zeros((m, m)) + np.inf
    bln = dist < eps
    adj[bln] = dist[bln]
    short = graph_shortest_path(adj)

    return short
예제 #7
0
def get_rank_high(data, k_neighbours=15, knn_sym=True):
    # computes ranking of the original dataset through geodesic distances
    KNN = kneighbors_graph(data,
                           k_neighbours,
                           mode='distance',
                           include_self=False).toarray()
    if knn_sym:
        KNN = np.maximum(KNN, KNN.T)

    n_components, labels = csgraph.connected_components(KNN)
    print(n_components)
    D_high = graph_shortest_path(KNN)

    if n_components:
        max_dist = np.max(D_high) * 10
        for comp in np.unique(labels):
            ix_comp = np.where(labels == comp)[0]
            ix_not_comp = np.where(labels != comp)[0]
            for i in ix_comp:
                for j in ix_not_comp:
                    D_high[i, j] = max_dist
                    D_high[j, i] = max_dist

    Rank_high = get_ranking(D_high)

    return Rank_high
예제 #8
0
def test_FloydWarshall():
    dist_matrix = generate_graph(20)

    for directed in (True, False):
        graph_FW = graph_shortest_path(dist_matrix, directed, 'FW')
        graph_py = FloydWarshallSlow(dist_matrix.copy(), directed)

        assert_array_almost_equal(graph_FW, graph_py)
def test_floyd_warshall():
    dist_matrix = generate_graph(20)

    for directed in (True, False):
        graph_FW = graph_shortest_path(dist_matrix, directed, 'FW')
        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)

        assert_array_almost_equal(graph_FW, graph_py)
예제 #10
0
def test_Dijkstra():
    dist_matrix = generate_graph(20)

    for directed in (True, False):
        graph_D = graph_shortest_path(dist_matrix, directed, 'D')
        graph_py = FloydWarshallSlow(dist_matrix.copy(), directed)

        assert_array_almost_equal(graph_D, graph_py)
예제 #11
0
def test_dijkstra():
    dist_matrix = generate_graph(20)

    for directed in (True, False):
        graph_D = graph_shortest_path(dist_matrix, directed, 'D')
        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)

        assert_array_almost_equal(graph_D, graph_py)
예제 #12
0
 def __compute_geodesics(self, dataset):
     """
     Takes high-dimensional data and a user specified parameter
     k as input, and returns a distance
     matrix D, where D_ij is the shortestF-path
     distance between x_i and x_j along the manifold
     """
     distance_matrix = k_nearest(dataset, self.k)
     return sg.graph_shortest_path(distance_matrix)
예제 #13
0
def residual_variance(X, X_m, n_neighbors=20):
    kng_h = kneighbors_graph(X,
                             n_neighbors=n_neighbors,
                             mode='distance',
                             n_jobs=mp.cpu_count()).toarray()
    D_h = graph_shortest_path(kng_h, method='D', directed=False)
    #D_h = pairwise_distances(X, X, metric='euclidean')
    #D_l = kneighbors_graph(X_m, n_neighbors=50, mode='distance').toarray()
    D_l = pairwise_distances(X_m, X_m, metric='euclidean')
    r, _ = spearmanr(D_h.flatten(), D_l.flatten())
    return 1 - r**2.0
def isomap(df, p, k):
    X = df.to_numpy()
    graph = kneighbors_graph(X, p, mode='distance')
    A = kneighbors_graph(X, p, mode='connectivity').toarray()
    distances = graph_shortest_path(graph, directed=False, method='FW')
    X = MDS(distances, k, True, False)
    cc = Graph(A).connected_components()
    if (len(cc) != 1):
        print("The graph is disconnected. Therefore we will have", len(cc),
              "separated graphs")
    return X
예제 #15
0
def gen_knn_graph_with_sp(data, num_neighbors):
    """
    Description: Generate knn graph with the shortest path distance given data, #neighbors.
    :param data: #TODO
    :param num_neighbors:
    :return:
    """
    kng = neighbors.kneighbors_graph(data,
                                     num_neighbors,
                                     mode='distance',
                                     n_jobs=8)
    sp_dist_matrix = graph_shortest_path(kng, method='auto', directed=False)
    return sp_dist_matrix
예제 #16
0
def cal_ontology_emb(dim=20, mi=0):
    fin = open(
        '/oak/stanford/groups/rbaltman/swang91/Sheng_repo/data/SingleCell/cl.ontology'
    )
    lset = set()
    s2p = {}
    for line in fin:
        s, p = line.strip().split('\t')
        if s not in s2p:
            s2p[s] = set()
        s2p[s].add(p)
        lset.add(s)
        lset.add(p)
    fin.close()
    lset = np.sort(list(lset))
    nl = len(lset)
    l2i = dict(zip(lset, range(nl)))
    i2l = dict(zip(range(nl), lset))
    A = np.zeros((nl, nl))
    for s in s2p:
        for p in s2p[s]:
            A[l2i[s], l2i[p]] = 1
            A[l2i[p], l2i[s]] = 1
    if mi == 0:
        sp = graph_shortest_path(A, method='FW', directed=False)
        X = svd_emb(sp, dim=dim)
        sp *= -1.
    elif mi == 1:
        sp = graph_shortest_path(A, method='FW', directed=False)
        X = DCA_vector(sp, dim=dim)[0]
        sp *= -1.
    elif mi == 2:
        sp = RandomWalkRestart(A, 0.8)
        X = svd_emb(sp, dim=dim)
    elif mi == 3:
        sp = RandomWalkRestart(A, 0.8)
        X = DCA_vector(sp, dim=dim)[0]
    return X, l2i, i2l, sp
예제 #17
0
파일: lab1.py 프로젝트: xyuany/DD2434_MLadv
def isomap(X, k=5):
    # Build graph according to euclidean length
    K = isomap_distance(X, k)
    # Compute the shortest graph distance, and square
    from sklearn.utils.graph_shortest_path import graph_shortest_path

    G = graph_shortest_path(K)
    #print(np.unique(G, return_counts=True))
    # Double centering
    # G = double_centering(G)
    # MDS
    Y = mds(G)

    return Y
예제 #18
0
def isomap(z, n_dim, n_neighbor=None):
    num_samples, num_features = z.shape
    adj_mat = affinity_mat(z, n_neighbor=n_neighbor)
    shortest_paths = graph_shortest_path(adj_mat)
    h = np.eye(num_samples) - (1 / num_samples) * np.ones(
        (num_samples, num_samples))
    k = -0.5 * h.dot(shortest_paths**2).dot(h)
    eigen_values, eigen_vectors = np.linalg.eigh(k)
    idx = eigen_values.argsort()[::-1]
    eigen_values, eigen_vectors = eigen_values[idx], eigen_vectors[:, idx]
    eigen_values, eigen_vectors = eigen_values[:n_dim], eigen_vectors[:, :
                                                                      n_dim]
    embedding = np.dot(eigen_vectors, np.diag(eigen_values**(1 / 2)))
    return embedding
예제 #19
0
def cal_ontology_emb(dim=20, mi=0, DATA_DIR = '../../OnClass_data/'):
	fin = open(DATA_DIR + 'cell_ontology/cl.ontology')
	lset = set()
	s2p = {}
	for line in fin:
		s,p = line.strip().split('\t')
		if s not in s2p:
			s2p[s] = set()
		s2p[s].add(p)
		lset.add(s)
		lset.add(p)
	fin.close()
	lset = np.sort(list(lset))
	nl = len(lset)
	l2i = dict(zip(lset, range(nl)))
	i2l = dict(zip(range(nl), lset))
	A = np.zeros((nl, nl))
	for s in s2p:
		for p in s2p[s]:
			A[l2i[s], l2i[p]] = 1
			A[l2i[p], l2i[s]] = 1
	if mi==0:
		sp = graph_shortest_path(A,method='FW',directed =False)
		X = svd_emb(sp, dim=dim)
		sp *= -1.
	elif mi==1:
		sp = graph_shortest_path(A,method='FW',directed =False)
		X = DCA_vector(sp, dim=dim)[0]
		sp *= -1.
	elif mi==2:
		sp = RandomWalkRestart(A, 0.8)
		X = svd_emb(sp, dim=dim)
	elif mi==3:
		sp = RandomWalkRestart(A, 0.8)
		X = DCA_vector(sp, dim=dim)[0]
	return X, l2i, i2l, sp
예제 #20
0
def prob_to_dist_func(prob):
    """
    Return a matrix of distances based the given probabilities matrix.
    """

    N, M = prob.shape
    assert N == M, "a square matrix is required"

    # find set of nodes which have zero probability
    keep_idxs = np.nonzero(np.sum(prob, 1) > 0)[0]

    # convert probabilities to distances
    dist = -np.log(prob)

    # complete distances
    dist_func = graph_shortest_path(dist)

    # slice distance function
    dist_func = dist_func[keep_idxs, :][:, keep_idxs]

    return dist_func, keep_idxs
def get_dist_manifold(data, k_neighbours=20, knn_sym=True):
    """
    Computes ranking of the original dataset through geodesic distances:
    we estimate KNN graph and find shortest distance on it. The geodesic
    distance between disconnected componenents is set to infinity.
    """
    KNN = kneighbors_graph(data,
                           k_neighbours,
                           mode='distance',
                           include_self=False).toarray()
    if knn_sym:
        KNN = np.maximum(KNN, KNN.T)

    n_components, labels = csgraph.connected_components(KNN)

    if (n_components > 1):
        print('Connecting', n_components)
        distances = pairwise_distances(data, metric='euclidean')
        KNN = connect_knn(KNN, distances, n_components, labels)

    D_high = graph_shortest_path(KNN)
    return D_high
 def find_geodesic_distance_matrix(self):
     # ----- find k-nearest neighbor graph (distance matrix):
     if self.n_neighbors == None:
         n_samples = self.X.shape[1]
         self.n_neighbors = n_samples
     knn = KNN(
         n_neighbors=self.n_neighbors + 1,
         algorithm='kd_tree',
         n_jobs=self.n_jobs)  #+1 because the point itself is also counted
     knn.fit(X=self.X.T)
     # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph
     # the following function gives n_samples*n_samples matrix, and puts 0 for diagonal and also where points are not connected directly in KNN graph
     # if K=n_samples, only diagonal is zero.
     Euclidean_distance_matrix = knn.kneighbors_graph(
         X=self.X.T, n_neighbors=self.n_neighbors,
         mode='distance')  #--> gives Euclidean distances
     #Euclidean_distance_matrix = Euclidean_distance_matrix.toarray()
     # ----- find geodesic distance graph:
     # https://scikit-learn.org/stable/modules/generated/sklearn.utils.graph_shortest_path.graph_shortest_path.html
     self.geodesic_dist_matrix = graph_shortest_path(
         dist_matrix=Euclidean_distance_matrix,
         method="auto",
         directed=False)
예제 #23
0
 def creat_graph_and_calc_dist_verb(A):
     """
     creates graph from ajacency matrix and calculates shortest path
     """
     return gp.graph_shortest_path(A, method='auto', directed=False)
예제 #24
0
def cal_ontology_emb(
        dim=20,
        mi=0,
        use_pretrain=None,
        ontology_nlp_file='../../OnClass_data/cell_ontology/cl.ontology.nlp',
        ontology_file='../../OnClass_data/cell_ontology/cl.ontology'):
    if use_pretrain is None or not os.path.isfile(
            use_pretrain + 'X.npy') or not os.path.isfile(use_pretrain +
                                                          'sp.npy'):

        cl_nlp = collections.defaultdict(dict)
        if ontology_nlp_file is not None:
            fin = open(ontology_nlp_file)
            for line in fin:
                s, p, wt = line.upper().strip().split('\t')
                cl_nlp[s][p] = float(wt)
                cl_nlp[p][s] = float(wt)
            fin.close()

        fin = open(ontology_file)
        lset = set()
        s2p = {}
        for line in fin:
            w = line.strip().split('\t')
            s = w[0]
            p = w[1]
            if len(w) == 2:
                if p in cl_nlp and s in cl_nlp[p]:
                    wt = cl_nlp[p][s]
                else:
                    wt = 1.
            else:
                wt = float(w[2])
            if s not in s2p:
                s2p[s] = {}
            s2p[s][p] = wt
            lset.add(s)
            lset.add(p)
        fin.close()
        lset = np.sort(list(lset))
        nl = len(lset)
        l2i = dict(zip(lset, range(nl)))
        i2l = dict(zip(range(nl), lset))
        A = np.zeros((nl, nl))
        for s in s2p:
            for p in s2p[s]:
                A[l2i[s], l2i[p]] = s2p[s][p]
                A[l2i[p], l2i[s]] = s2p[s][p]
        if mi == 0:
            sp = graph_shortest_path(A, method='FW', directed=False)
            X = svd_emb(sp, dim=dim)
            sp *= -1.
        elif mi == 1:
            sp = graph_shortest_path(A, method='FW', directed=False)
            X = DCA_vector(sp, dim=dim)[0]
            sp *= -1.
        elif mi == 2:
            sp = RandomWalkRestart(A, 0.8)
            X = svd_emb(sp, dim=dim)
        elif mi == 3:
            sp = RandomWalkRestart(A, 0.8)
            X = DCA_vector(sp, dim=dim)[0]
        if use_pretrain is not None:
            i2l_file = use_pretrain + 'i2l.npy'
            l2i_file = use_pretrain + 'l2i.npy'
            X_file = use_pretrain + 'X.npy'
            sp_file = use_pretrain + 'sp.npy'
            np.save(X_file, X)
            np.save(i2l_file, i2l)
            np.save(l2i_file, l2i)
            np.save(sp_file, sp)
    else:
        i2l_file = use_pretrain + 'i2l.npy'
        l2i_file = use_pretrain + 'l2i.npy'
        X_file = use_pretrain + 'X.npy'
        sp_file = use_pretrain + 'sp.npy'
        X = np.load(X_file)
        i2l = np.load(i2l_file, allow_pickle=True).item()
        l2i = np.load(l2i_file, allow_pickle=True).item()
        sp = np.load(sp_file, allow_pickle=True)
    return X, l2i, i2l, sp
예제 #25
0
def get_shortest_paths(weighted_matrix: np.ndarray, inf: float = 1e6) -> np.ndarray:
    """Perform a shortest-path graph search on a positive directed or undirected graph."""
    return graph_shortest_path(make_distance_matrix(weighted_matrix, inf))
예제 #26
0
    edges = get_edges_weights(distances)
    G.add_edges_from(edges)

    nx.draw(G)
    nx.draw_networkx(G, node_size=25, edge_color='white', with_labels=False)

    # --- Exporting to CSV --- #
    Edge = namedtuple('Edge', ['source', 'target', 'weight'])
    edges = []
    for i in range(distances.shape[0]):
        for j in range(distances.shape[1]):
            edges.append(Edge(i + 1, j + 1, distances[i, j]))
    edge_df = pd.DataFrame(edges)

    # --- Projecting data into 2 dimensions via PCA--- #
    graph = graph_shortest_path(distances)
    sc = StandardScaler()
    pc = PCA(2)
    projected = pc.fit_transform(sc.fit_transform(graph))
    plt.scatter(projected[:, 0], projected[:, 1], s=5, alpha=.5)

    # --- Showing numpy array as image --- #
    img1 = data[0, :].reshape(64, 64)
    plt.imshow(img1, cmap='gray')

    # --- Full ISOMAP --- #
    A = make_affinity_matrix(data, e=22.5)
    weighted_A = make_weighted_matrix(A)
    distances = make_distance_matrix(weighted_A)
    graph = graph_shortest_path(distances)
    tau = make_tau_matrix(graph)
예제 #27
0

crawler = Crawler()
try:
    crawler.breadth_first_search()
except IndexError:
    pass

maze = plot_maze(crawler.history)
maze2 = condition_maze(maze)

[x_oxy], [y_oxy] = np.where(maze == 2)
[x_start], [y_start] = np.where(maze == 3)

graph = image.grid_to_graph(*maze2.shape, mask=maze2, return_as=np.ndarray)
shortest_paths = graph_shortest_path(graph)

print("Time-to-oxygen = {} minutes".format(np.max(np.unique(shortest_paths))))

# ij_to_g = ij_to_graph_index(maze2)

# print("SHORTEST PATH ISSSSSSSS!")
# print(shortest_paths[ij_to_g[(x_start, y_start)], ij_to_g[(x_oxy, y_oxy)]])

# ic = IntcodeComputer(allow_pausing=True)
# ic.code[0] = 2
# ic.run(0)
# ic.resume(0)
# ic.resume(0)
# while ic.continue_flag:
#   ic.resume(next_input)
예제 #28
0
def MMfeaturesBoot(Location, filename, summary, slots_offered):
    beta_coef = np.append([0], np.random.rand(len(slots_offered) + 3))
    features_df, disc_cols, eco_cols, gr_cols = get_active_features(
        summary, slots_offered)
    beta_ext = expand_beta(beta_coef, len(disc_cols), len(eco_cols),
                           len(gr_cols))
    design_df = get_design_matrix(features_df.columns.tolist(), slots_offered)
    assortment_df = summary.loc[:, [
        'C_' + col for col in ['NO_PURCHASE'] + slots_offered
    ]].fillna(0)
    choice_df = summary.loc[:,
                            [col for col in ['NO_PURCHASE'] +
                             slots_offered]].fillna(0)
    design = design_df.values
    features = features_df.values
    assortment = assortment_df.values
    choice = choice_df.values

    C = np.where(choice == 1)[1]
    membership = assortment
    nprods = assortment.shape[1]
    ## check if the MM algorithm would coverge by testing if the item-item graph
    # is strongly connected
    row = []
    col = []
    data = []
    for i in range(membership.shape[0]):
        assort = list(np.nonzero(membership[i, :])[0])
        try:
            assort.remove(C[i])
        except ValueError:
            print(i, C[i], assort)
            break
        row += len(assort) * [C[i]]
        col += assort
        data += len(assort) * [1]

    dist_matrix = csr_matrix((data, (row, col)), shape=(nprods, nprods))
    Z = graph_shortest_path.graph_shortest_path(
        dist_matrix, method='D')  # Dijkstra's algorithm
    I = np.eye(nprods)
    if np.count_nonzero(I + Z) < nprods**2:
        # condition for convergence of MM algo not met
        sys.stderr.write(
            'Warning: Convergence condition for MM algorithm not met...adding noise to the data matrix...\n'
        )
        pairs = [
            pair for pair in combinations(np.delete(np.arange(nprods), 0), 2)
        ]
        npairs = len(pairs)
        pairs = np.array(pairs)
        pairs = np.tile(pairs, (2, 1))
        Z = np.zeros((len(pairs), nprods))
        for i, pair in enumerate(pairs):
            Z[i, pair] = 1
        assortment = np.vstack((assortment, Z))
        d = np.append(pairs[:npairs, 0], pairs[npairs:, 1])
        choicenew = np.zeros((Z.shape[0], nprods))
        choicenew[np.arange(Z.shape[0]), d] = 1
        choice = np.vstack((choice, choicenew))
        featuresnew = np.zeros((Z.shape[0], features.shape[1]))
        featuresnew[:, np.arange(nprods)] = 1
        features = np.vstack((features, featuresnew))
    i = 0
    while True:
        i += 1
        beta = np.copy(beta_coef)
        beta_ext_cp = np.copy(beta_ext)
        beta_coef, Q = update_beta(design, features, disc_cols, eco_cols,
                                   gr_cols, assortment, choice, beta_ext_cp,
                                   beta, slots_offered)
        log_likeli = sum(np.log(sum(Q * choice, 1)))
        beta_ext = expand_beta(beta_coef, len(disc_cols), len(eco_cols),
                               len(gr_cols))
        print('Iteration=', i, 'loglikelihood =', log_likeli, 'beta_disc',
              beta_coef[-3], 'beta_eco', beta_coef[-2], 'beta_gr',
              beta_coef[-1])
        if np.linalg.norm(beta_coef[:-1] - beta[:-1]) < 10**-6 or i > 500:
            predict_prob_df = pd.DataFrame(Q,
                                           columns=['NO_PURCHASE'] +
                                           slots_offered)
            beta_df = pd.DataFrame([np.array(beta_coef)],
                                   columns=['NO_PURCHASE'] + slots_offered +
                                   ['Discount', 'Eco', 'Gr'])
            predict_prob_df.to_csv(Location + filename +
                                   'predprobfeatures.csv')
            beta_df.to_csv(Location + filename + 'betafeatures.csv')
            del summary, predict_prob_df, design_df, features_df, assortment_df, choice_df, design, features, assortment, choice
            break
    return beta_df.iloc[0]
from sklearn.neighbors import NearestNeighbors
from sklearn.utils.graph_shortest_path import graph_shortest_path
import networkx as nx
import pickle
feat = 'upc'
k = 3
nbrs = NearestNeighbors(
    n_neighbors=k + 1, metric='cosine', algorithm='brute').fit(
        x_new_stack_T)  # k=(n_neighbors-1) (first neighbour is 'v' itself)
#distances, indices = nbrs.kneighbors(x_new_T) # not directly needed, for now
knnmatrix = nbrs.kneighbors_graph(
    x_new_stack_T, mode='distance'
)  # sparse matrix(68x68) with nearest KNeighbours for each of the 68 pt
knnmatrix.data[np.where(knnmatrix.data < 0)] = 0
sp = graph_shortest_path(
    knnmatrix, directed=False
)  # shortest-path-edge-weight from (v_i to v_j), (doc-https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/graph_shortest_path.pyx)
G = nx.Graph(knnmatrix)
spl = nx.shortest_path(
    G, weight='weight'
)  # shortest-path dict-array from each v_i to v_j, do len(array) to find path-length
## spl = nx.shortest_path(G) # Without weight (just connections-1/0)
pickle.dump(knnmatrix,
            open('knn_' + feat + '_k_' + str(k) + '.pickle.dump',
                 'wb'))  # used to smooth out features
pickle.dump(sp, open('sp_all_' + feat + '_k_' + str(k) + '.pickle.dump', 'wb'))
#np.savetxt('sp_all_'+feat+'_k_'+str(k)+'.np.save', sp)
pickle.dump(spl, open('spl_all_' + feat + '_k_' + str(k) + '.pickle.dump',
                      'wb'))
##
knnmatrix_all = pickle.load(
예제 #30
0
        if pair != -1:
            if pair not in thresh_g:
                thresh_g.node[n]["Pair"] = -1
                thresh_g.node[n]["Pair ID"] = -1
                n_missing += 1

    mg = MetaGraph(thresh_g, weight="max_norm_weight")
    meta = mg.meta

    adj = mg.adj.copy()
    # colsums = np.sum(adj, axis=0)
    # colsums[colsums == 0] = 1
    # adj = adj / colsums[np.newaxis, :]
    adj = pass_to_ranks(adj)
    if use_spl:
        adj = graph_shortest_path(adj)
    if plus_c:
        adj += np.min(adj)

    if embed == "lse":
        latent = lse(adj, None, ptr=False)
    elif embed == "ase":
        latent = ase(adj, None, ptr=False)

    rot_latent, diff = procrustes_match(latent, meta)
    rot_latent = latent
    n_components = latent.shape[1]

    plot_df = pd.DataFrame(data=rot_latent)
    plot_df["Class"] = mg["Class 1"]
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
            l2i = npzfile['l2i'].item()
            i2l = npzfile['i2l'].item()
            cls2cls = npzfile['cls2cls']
            test_Y = npzfile['test_Y']
            ntest = len(test_Y)
            ncls = nseen + len(unseen_l)
            seen_l = np.array(range(nseen))

            cls2cls = np.zeros((ncls, ncls))
            fin = open(DATA_DIR + '/cell_ontology/cl.ontology')
            for line in fin:
                w = line.strip().split('\t')  #w[1] is parent of w[0]
                cls2cls[int(l2i[w[0]]), int(l2i[w[1]])] = 1
                cls2cls[int(l2i[w[1]]), int(l2i[w[0]])] = 1
            fin.close()
            sp = graph_shortest_path(cls2cls, method='FW', directed=False)

            pname = translate_paramter([nn_nhidden, keep_prob, KNN])
            pred_Y_all = np.load(our_output_dir + '/' + dname + '/' +
                                 str(iter) + '/' + str(unseen_ratio) + '/' +
                                 pname + 'pred_Y_all.npy')
            #res = evaluate(pred_Y_all, test_Y, unseen_l, nseen, Y_ind = test_Y_ind, Y_net = onto_net, write_screen = False, metrics = metrics, prefix = str(KNN))
            Y_truth_bin_mat = ConvertLabels(test_Y, ncls)
            class_auc_macro = np.full(ncls, np.nan)
            class_auprc_macro = np.full(ncls, np.nan)

            for i in unseen_l:
                if len(np.unique(Y_truth_bin_mat[:, i])) == 2:
                    class_auc_macro[i] = roc_auc_score(Y_truth_bin_mat[:, i],
                                                       pred_Y_all[:, i])
                    for cutoff in cutoffs:
예제 #32
0
def gen_triplets_from_knn_in_batches(data,
                                     random_triplet_indices,
                                     num_neighbors=50,
                                     batch_size=10000):
    """
    Description: Generate triplet data given distance matrix and random indices.
    :param data: #TODO
    :param random_triplet_indices:
    :param num_neighbors:
    :param batch_size:
    :return:
    """

    kng = kneighbors_graph(data, num_neighbors, mode='distance', n_jobs=8)
    sp_dist_matrix = graph_shortest_path(kng, method='auto', directed=False)
    del kng

    num_triplets = random_triplet_indices.shape[
        0]  # Compute the number of triplets.
    number_of_batches = np.int(np.ceil(num_triplets /
                                       batch_size))  # Number of batches

    triplet_set = np.zeros((num_triplets, 3),
                           dtype=int)  # Initializing the triplet set

    for i in range(number_of_batches):
        if i == (number_of_batches - 1):
            indices = random_triplet_indices[(i * batch_size):, :]
            triplet_set[(i * batch_size):, 0] = indices[:, 0]

            d1 = sp_dist_matrix[indices[:, 0], indices[:, 1]]
            d2 = sp_dist_matrix[indices[:, 0], indices[:, 2]]

            det = np.sign(d1 - d2)

            triplet_set[(i * batch_size):,
                        1] = ((indices[:, 1] + indices[:, 2] -
                               det * indices[:, 1] + det * indices[:, 2]) / 2)
            triplet_set[(i * batch_size):,
                        2] = ((indices[:, 1] + indices[:, 2] +
                               det * indices[:, 1] - det * indices[:, 2]) / 2)

        else:
            indices = random_triplet_indices[(i * batch_size):((i + 1) *
                                                               batch_size), :]
            triplet_set[(i * batch_size):((i + 1) * batch_size),
                        0] = indices[:, 0]

            d1 = sp_dist_matrix[indices[:, 0], indices[:, 1]]
            d2 = sp_dist_matrix[indices[:, 0], indices[:, 2]]

            det = np.sign(d1 - d2)

            triplet_set[(i * batch_size):((i + 1) * batch_size),
                        1] = ((indices[:, 1] + indices[:, 2] -
                               det * indices[:, 1] + det * indices[:, 2]) / 2)
            triplet_set[(i * batch_size):((i + 1) * batch_size),
                        2] = ((indices[:, 1] + indices[:, 2] +
                               det * indices[:, 1] - det * indices[:, 2]) / 2)
    triplet_set = triplet_set.astype(dtype=int)

    triplet_set[:,
                0] = random_triplet_indices[:,
                                            0]  # Initialize index 1 randomly.

    return triplet_set
x_new_te = sparse.lil_matrix(sparse.csr_matrix(XTE)[:,list(range(upcStart-1,nextStart-1))])
x_new_stack_T = vstack([x_new_tr,x_new_te]).T ## see boundry elements- print(sparse.csr_matrix(x_new_stack_T)[0])
##
#x_new = sparse.lil_matrix(sparse.csr_matrix(XD)[:,list(range(47,115))])
#x_new_T = x_new.T ##
from sklearn.neighbors import NearestNeighbors
from sklearn.utils.graph_shortest_path import graph_shortest_path
import networkx as nx
import pickle
feat='upc'
k=3
nbrs = NearestNeighbors(n_neighbors=k+1,metric='cosine',algorithm='brute').fit(x_new_stack_T) # k=(n_neighbors-1) (first neighbour is 'v' itself)
#distances, indices = nbrs.kneighbors(x_new_T) # not directly needed, for now
knnmatrix = nbrs.kneighbors_graph(x_new_stack_T,mode='distance') # sparse matrix(68x68) with nearest KNeighbours for each of the 68 pt
knnmatrix.data[np.where(knnmatrix.data<0)]=0
sp = graph_shortest_path(knnmatrix,directed=False) # shortest-path-edge-weight from (v_i to v_j), (doc-https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/graph_shortest_path.pyx)
G = nx.Graph(knnmatrix)
spl = nx.shortest_path(G, weight='weight') # shortest-path dict-array from each v_i to v_j, do len(array) to find path-length
## spl = nx.shortest_path(G) # Without weight (just connections-1/0)
pickle.dump(knnmatrix,open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','wb')) # used to smooth out features
pickle.dump(sp,open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb'))
#np.savetxt('sp_all_'+feat+'_k_'+str(k)+'.np.save', sp)
pickle.dump(spl,open('spl_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb'))
##
knnmatrix_all = pickle.load(open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','rb'))
sp_all = pickle.load(open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','rb'))
#sp_all = np.loadtxt('sp_all_'+feat+'_k_'+str(k)+'.np.save')
spl_all = pickle.load(open('spl_all_'+feat+'_k_'+str(k)+'.txt','rb'))
#
cosine_dist_all = pairwise_distances(x_new_stack_T, metric="cosine")
pickle.dump(cosine_dist_all,open('cosine_dist_all_'+feat+'.pickle.dump','wb'))