Esempio n. 1
0
 def sparsedataload(data_set_name, train_test_validate_designation, data_file_extension, native_binary=True):
     if native_binary:
         loaded_sparse_data = AdamMesser.ImportSparse(data_set_name, settype=train_test_validate_designation,
                                                      binary_formatted_input_file=True)
     else:
         data_file_name = data_set_name + '_' + train_test_validate_designation + '.' + data_file_extension
         loaded_sparse_data = AdamMesser.ImportSparse(data_set_name, settype=train_test_validate_designation)
     return csc_matrix.todense(loaded_sparse_data)
def print_matrix(H):

    #print('matrix to print')

    if isinstance(H, csc_matrix):
        print_h = csc_matrix.todense(H)
        print(print_h)
    else:
        print(H)

    return 0
Esempio n. 3
0
    def cosine_dis(word1, word2):
        """
        Calculate cosine similarity between two words.
        :param word1: (str) First word.
        :param word2: (str) Second word.
        :return: (float) cosine similarity.
        """
        from scipy.sparse import csc_matrix
        from scipy.spatial.distance import cosine
        from sklearn.feature_extraction.text import CountVectorizer

        vectorizer = CountVectorizer(analyzer='char')
        word_vec = vectorizer.fit_transform([word1, word2])
        word_vec = csc_matrix.todense(word_vec)
        word_dis = cosine(word_vec[0], word_vec[1])

        return word_dis
Esempio n. 4
0
def main(args):
    
    dataset = args.dataset
    emb_output_dir = args.output
    epochs = args.epochs
    agg = args.agg
    p = args.p
    tr = args.tr
    lam = args.lam
    lose_func = args.loss

    # Preprocess dataset
    adj, views_features = load_data(dataset, num_views=3)
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()
    # Calculate pairwise simlarity.
    views_sim_matrix = {}
    views_feature_matrix = {}

    for view in list(views_features.keys()):
        feature_matrix = csc_matrix.todense(views_features[view])
        views_feature_matrix.update({view:feature_matrix})
 
    kernal = "rbf"
    if lose_func == 'all':
        attr_sim = cal_attr_sim(views_feature_matrix, dataset)
    else:
        attr_sim = 0

    # split nodes to train, valid and test datasets, 
    # remove test edges from train adjacent matrix. 
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(dataset, adj)
    
    print("Masking edges Done!")
    adj = adj_train
    nx_G = nx.from_numpy_array(adj.toarray())
    num_nodes = adj.shape[0]
    adj_norm = preprocess_graph(adj)

    views_features_num = {}
    views_features_nonzero = {}
    for view in list(views_features.keys()):
        views_features[view] = sparse_to_tuple(views_features[view].tocoo())
        views_features_num.update({view:views_features[view][2][1]})
        views_features_nonzero.update({view:views_features[view][1].shape[0]})
    
    # Build model
    MagCAE = {}
    for view in list(views_features.keys()):
        x,y = views_features[view][2][0], views_features[view][2][1]
        model = GAE(y, views_features_nonzero[view], adj_norm, math.ceil(2*p*y), math.ceil(p*y))
        MagCAE.update({view:model})

    # Loss function and optimizer.
    # loss weight taken by each nodes to the total loss.
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) /adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(adj.shape[0] * adj.shape[0] - adj.sum())*2
    optimizer = tf.keras.optimizers.Adam()

    adj_targ = adj_train + sp.eye(adj_train.shape[0])
    adj_targ = sparse_to_tuple(adj_targ)

    indices= np.array(adj_targ[0])
    values = np.array(adj_targ[1])
    dense_shape = np.array(adj_targ[2])
    sparse_targ = tf.SparseTensor(indices = indices,
                                    values = values,
                                    dense_shape = dense_shape)
    sparse_targ = tf.cast(sparse_targ, dtype=tf.float32)

    adj_targ = tf.sparse.to_dense(sparse_targ)
    adj_targ = tf.reshape(adj_targ,[-1])
    # Train and Evaluate Model
    # Training Loop:
    # In each epoch: views - > view_embedding -> aggregate embedding -> total loss ->  update gradients
    decoder = Decoder(100)

    for epoch in range(epochs):
        loss = 0
        start = time.time()

        with tf.GradientTape() as tape:
            ag_embedding ={}


            for VAE in list(MagCAE.keys()):
                v_embedding, a_hat = MagCAE[VAE](views_features[VAE])
                ag_embedding.update({VAE:v_embedding})

            # aggregate embeddings
            embedding, aggregator = aggregate_embeddings(ag_embedding, agg)
            # reconstruct a_hat
            a_hat = decoder(embedding)
            loss += loss_function(a_hat, adj_targ, pos_weight, norm, attr_sim, embedding, num_nodes, lam, lose_func)

        if agg == "weighted_concat":
            variables = MagCAE['view1'].trainable_variables + MagCAE['view2'].trainable_variables + MagCAE['view3'].trainable_variables + aggregator.trainable_variables

        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        # Evaluate on validate set
        embedding = np.array(embedding)
        roc_cur, ap_cur, _, _ = evaluate(val_edges, val_edges_false, adj_orig, embedding)

        print("Epoch {}: Val_Roc {:.4f}, Val_AP {:.4f}, Time Consumed {:.2f} sec\n".format(epoch+1, roc_cur, ap_cur, time.time()-start))

    print("Training Finished!")
    
    # Evaluation Result on test Edges
    test_embedding= {}
    for VAE in list(MagCAE.keys()):
        v_embedding, a_hat = MagCAE[VAE](views_features[VAE])
        test_embedding.update({VAE:v_embedding})

    # aggregate embeddings
    embedding, aggregator = aggregate_embeddings(test_embedding, agg)
    embedding = np.array(embedding) # embedding is a tensor, convert to np array.

    # reconstruct a_hat
    test_roc, test_ap, fpr, tpr = evaluate(test_edges, test_edges_false, adj_orig, embedding)
    print("MagCAE test result on {}".format(dataset))
    print("Test Roc: {}, Test AP: {}, P: {}, Training Ratio: {}, Lambda: {}.".format(test_roc, test_ap, p, tr, lam))
Esempio n. 5
0
                           nclu1:nclu1 + patch_size_hr - 1]
        patch_feat = feat_lr[nrlu2:nrlu2 + patch_size_feat - 1,
                             nclu2:nclu2 + patch_size_feat - 1]
        aggr_times_local = aggr_time[nrlu1:nrlu1 + patch_size_hr - 1,
                                     nclu1:nclu1 + patch_size_hr - 1]

        idx_nnz = (patch_hr[:] != 0)
        patch_hr_data = patch_hr[:] - np.mean(patch_hr[idx_nnz]) / dim_hr
        patch_feat = patch_feat[:] / dim_feat

        patch_data = [patch_hr_data[idx_nnz], patch_feat]
        dict_temp = np.concatenate(
            (dictionary.dict_hr[idx_nnz, :], dictionary.dict_lr), axis=1)

        if params.train_method == 'omp':
            alpha = spams.OMP(patch_data, dict_temp)  # params.solve_param)
        elif params.train_method == 'lasso':
            alpha = spams.Lasso(patch_data, D=dict_temp)
        patch_recov = dictionary.dict_hr * csc_matrix.todense(aplha)
        patch_recov = np.reshape(patch_recov,
                                 np.shape([patch_size_hr, patch_size_hr
                                           ])) * dim_hr + local_mean
        patch_recov = np.divide(
            np.multiply((patch_recov + patch_hr, aggr_times_local),
                        (1 + aggr_times_local)))
        aggr_times_local = aggr_times_local + 1
        aggr_times_local[nrlu1:nrlu1 + patch_size_hr - 1,
                         nclu1:nclu1 + patch_size_hr - 1] = aggr_times_local
        img_out[nrlu1:nrlu1 + patch_size_hr - 1,
                nclu1:nclu1 + patch_size_hr - 1] = patch_recov
def bose_Hamiltonian(**args):

    #Hamiltonian needs as input:

    #...... Parameter: DIM_H
    if COMM.rank == 0:

        DIM_H = np.int(args.get("DIM_H"))
        ll = args.get("ll")
        nn = args.get("nn")

        #Hamiltonian returns:
        #...... Sparse or Dense matrix. By default is SPARSE

        mat_type = args.get("mat_type")
        if mat_type == None:
            mat_type = 'Sparse'

    ############### MPI VERSION

    if COMM.rank == 0:
        jobs = list(range(DIM_H))
        jobs = split(jobs, COMM.size)
    else:
        jobs = None

    jobs = COMM.scatter(jobs, root=0)

    XX = []
    YY = []
    AA = []

    for i in jobs:
        res = ham.evaluate_ham(i, **args)
        XX.append(res[0])
        YY.append(res[1])
        AA.append(res[2])

    XX0 = MPI.COMM_WORLD.gather(XX, root=0)
    YY0 = MPI.COMM_WORLD.gather(YY, root=0)
    AA0 = MPI.COMM_WORLD.gather(AA, root=0)

    if COMM.rank == 0:

        X0 = [item for sublist in XX0 for item in sublist]
        Y0 = [item for sublist in YY0 for item in sublist]
        A0 = [item for sublist in AA0 for item in sublist]

        print("Results:", 'porcodio')

        X1 = [item for sublist in X0 for item in sublist]
        Y1 = [item for sublist in Y0 for item in sublist]
        A1 = [item for sublist in A0 for item in sublist]

        Hamiltonian = csc_matrix((A1, (X1, Y1)),
                                 shape=(DIM_H, DIM_H),
                                 dtype=np.double)
        ff.print_matrix(Hamiltonian)

        if mat_type == 'Dense':

            Hamiltonian = csc_matrix.todense(Hamiltonian)

        return Hamiltonian
Esempio n. 7
0
train_y = UCIDataWrangling.textdataload(data_set_name_to_use, 'train', 'labels')
valid_y = UCIDataWrangling.textdataload(data_set_name_to_use, 'valid', 'labels')

from sklearn.random_projection import johnson_lindenstrauss_min_dim as jlmd
jlmd_start_time = default_timer()
num_samples = dense_trainData.shape[0]
train_number_of_features = dense_trainData.shape[1]
valid_number_of_features = dense_validData.shape[1]
if train_number_of_features != valid_number_of_features:
    num_features = min(dense_trainData.shape[1], dense_validData.shape[1])
    print '\n' + 'Analyzing how many of {0} possible features to keep out of training data set.'.format(num_features)
    if train_number_of_features < valid_number_of_features:
        dense_validData = matrix_delete(dense_validData,
                                        numpy.s_[train_number_of_features:valid_number_of_features], axis=1)
        temp_validData = csc_matrix.todense(sparse_validData)
        temp_validData = matrix_delete(temp_validData,
                                       numpy.s_[train_number_of_features:valid_number_of_features], axis=1)
        sparse_validData = csc_matrix(temp_validData)
    else:
        dense_trainData = matrix_delete(dense_trainData,
                                        numpy.s_[valid_number_of_features:train_number_of_features], axis=1)
        temp_trainData = csc_matrix.todense(sparse_trainData)
        temp_trainData = matrix_delete(temp_trainData,
                                       numpy.s_[valid_number_of_features:train_number_of_features], axis=1)
        sparse_trainData = csc_matrix(temp_trainData)
else:
    num_features = train_number_of_features

# Re-establish how many features are possible prior to running Johnson-Lindenstrauss algorithm
train_number_of_features = dense_trainData.shape[1]
def bose_Hamiltonian_parity_fast(**args):

    DIM_H = np.int(args.get("DIM_H"))
    BASE_bin = args.get("BASE_bin")
    BASE_bose = args.get("BASE_bose")
    mat_type = args.get("mat_type")
    b_p_inp = args.get("parity_index")
    b_p = np.asarray(b_p_inp)

    len_sym = args.get("sim_sec_len")
    len_b_p = len(b_p)

    len_asym = DIM_H - len_sym

    X0_s = []
    Y0_s = []
    A0_s = []

    X0_a = []
    Y0_a = []
    A0_a = []

    for i in range(len_b_p):

        if b_p[i, 0] < 0:
            continue

        X_1, Y_1, A_1 = ham.evaluate_ham(b_p[i, 1], **args)
        X_2, Y_2, A_2 = ham.evaluate_ham(b_p[i, 2], **args)

        X = [item for sublist in [X_1, X_2] for item in sublist]
        Y = [item for sublist in [Y_1, Y_2] for item in sublist]
        A = [item for sublist in [A_1, A_2] for item in sublist]

        for j in range(len(A)):

            state_X_0 = BASE_bin[X[j]]
            state_Y_0 = BASE_bin[Y[j]]

            state_X_rev = state_X_0[::-1]
            state_Y_rev = state_Y_0[::-1]

            ind_X = X[j]
            ind_X_rev = ff.get_index(state_X_rev, **args)

            ind_Y = Y[j]
            ind_Y_rev = ff.get_index(state_Y_rev, **args)

            ind_col_X = b_p[min(ind_X, ind_X_rev), 0]
            ind_col_Y = b_p[min(ind_Y, ind_Y_rev), 0]

            coef_s = 2

            ##.... SYM SEC

            if ind_X == ind_X_rev:

                ind_col_X = b_p[ind_X, 0]
                coef_s = 2 * np.sqrt(2)

            X0_s.append(ind_col_X)

            if ind_Y == ind_Y_rev:

                ind_col_Y = b_p[ind_Y, 0]
                coef_s = 2 / np.sqrt(2)

                if ind_X == ind_X_rev:

                    coef_s = 2

            Y0_s.append(ind_col_Y)
            A0_s.append(A[j] / coef_s)

        ##.... A_SYM SEC

        for j in range(len(A_1)):

            state_X_0 = BASE_bin[X[j]]
            state_Y_0 = BASE_bin[Y[j]]

            state_X_rev = state_X_0[::-1]
            state_Y_rev = state_Y_0[::-1]

            ind_X = X[j]
            ind_X_rev = ff.get_index(state_X_rev, **args)

            ind_Y = Y[j]
            ind_Y_rev = ff.get_index(state_Y_rev, **args)

            ind_col_X = b_p[min(ind_X, ind_X_rev), 3]
            ind_col_Y = b_p[min(ind_Y, ind_Y_rev), 3]

            if Y[j] > ind_Y_rev:
                coef_a = -1

            elif Y[j] < ind_Y_rev:
                coef_a = 1

            else:
                continue

            X0_a.append(ind_col_X + len_sym)
            Y0_a.append(ind_col_Y + len_sym)
            A0_a.append(A[j] * coef_a)

    X = [item for sublist in [X0_a, X0_s] for item in sublist]
    Y = [item for sublist in [Y0_a, Y0_s] for item in sublist]
    A = [item for sublist in [A0_a, A0_s] for item in sublist]

    #Hamiltonian = csc_matrix((A, (X,Y)), shape=(DIM_H,DIM_H), dtype=np.double)
    Hamiltonian = csc_matrix((A, (X, Y)),
                             shape=(DIM_H, DIM_H),
                             dtype=np.double)

    #Hamiltonian_sym  = csc_matrix((A0_s, (X0_s,Y0_s)), shape=(len_sym,len_sym), dtype=np.double)
    #Hamiltonian_asym = csc_matrix((A0_a, (X0_a,Y0_a)), shape=(len_asym,len_asym), dtype=np.double)

    if mat_type == 'Dense':

        Hamiltonian = csc_matrix.todense(Hamiltonian)

        #Hamiltonian_sym  = csc_matrix.todense(Hamiltonian_sym)
        #Hamiltonian_asym = csc_matrix.todense(Hamiltonian_asym)

    #ff.print_matrix(Hamiltonian)

    #ff.print_matrix(Hamiltonian_sym)
    #ff.print_matrix(Hamiltonian_asym)

    return Hamiltonian  #_sym, Hamiltonian_asym