Python Xkmeans_cluster 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.process_datasets.cross_validation

클래스/타입: Xkmeans_cluster

hotexamples.com에서의 예제들: 4

Python Xkmeans_cluster - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.process_datasets.cross_validation.Xkmeans_cluster에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Xkmeans_cluster(4)

자주 사용되는 메소드들

Xkmeans_cluster (4)

예제 #1

파일 보기

def HIV_CV(DB, data_type, list_ID, list_y, list_SMILES, dict_id2smile, n_folds):
    if data_type == 'kernel':
        if not os.path.isfile('data/' + DB + '/' + DB + '_K.npy'):
            K = mol_build_K(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_K', K)
        else:
            K = np.load('data/' + DB + '/' + DB + '_K.npy')

        list_assignment = np.zeros(K.shape[0])
        for y in [0, 1]:
            indices = np.where(list_y == y)[0]
            K_local = K[indices, :]
            K_local = K_local[:, indices]
            local_assignment = Khierarchical_cluster(K_local, n_folds)
            list_assignment[indices] = local_assignment

    elif data_type == 'features':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            X = mol_build_X(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_X', X)
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        list_assignment = np.zeros(X.shape[0])
        for y in [0, 1]:
            indices = np.where(list_y == y)[0]
            X_local = X[indices, :]
            local_assignment = Xkmeans_cluster(X_local, n_folds)
            list_assignment[indices] = local_assignment

    elif data_type == 'standard':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            X = mol_build_X(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_X', X)
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        list_assignment = np.zeros(X.shape[0])
        skf = model_selection.StratifiedKFold(n_folds, shuffle=True, random_state=92)
        skf.get_n_splits(X, list_y)
        ifold = 0
        for train_index, test_index in skf.split(X, list_y):
            list_assignment[test_index] = ifold
            ifold += 1

    # import pdb; pdb.Pdb().set_trace()
    c = collections.Counter(list_assignment)
    print(c)
    folds = [np.where(list_assignment == cle)[0] for cle in list(c.keys())]

    fo = open('data/' + DB + '/' + DB + '_folds.txt', 'w')
    for ifold in range(n_folds):
        fo.write("ifold" + str(ifold) + '\n')
        fo.write(str(collections.Counter(list_y[folds[ifold]])) + '\n')
        print(ifold, collections.Counter(list_y[folds[ifold]]))
        fo.write('\n')

    return folds

예제 #2

파일 보기

def SecondaryStructure_CV(DB, data_type, list_ID, list_y, list_FASTA,
                          dict_id2fasta, n_folds):
    if data_type == 'kernel':
        if not os.path.isfile('data/' + DB + '/' + DB + '_K.npy'):
            print('data/' + DB + '/' + DB + '_K.npy', 'does not exist')
        else:
            K = np.load('data/' + DB + '/' + DB + '_K.npy')

        list_assignment = Khierarchical_cluster(K, n_folds)

    elif data_type == 'features':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            print('data/' + DB + '/' + DB + '_X.npy', 'does not exist')
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        list_assignment = Xkmeans_cluster(X, n_folds)

    elif data_type == 'standard':
        X = np.zeros((len(list_ID), 1))

        list_assignment = np.zeros(X.shape[0])
        skf = model_selection.KFold(n_folds, shuffle=True, random_state=92)
        skf.get_n_splits(X)
        ifold = 0
        for train_index, test_index in skf.split(X):
            list_assignment[test_index] = ifold
            ifold += 1

    import pdb
    pdb.Pdb().set_trace()
    c = collections.Counter(list_assignment)
    folds = [np.where(list_assignment == cle)[0] for cle in list(c.keys())]

    fo = open('data/' + DB + '/' + DB + '_folds.txt', 'w')
    for ifold in range(n_folds):
        # import pdb; pdb.Pdb().set_trace()
        fo.write("ifold " + str(ifold) + '\t' + str(
            collections.Counter(
                [el for ll in list_y[folds[ifold]] for el in ll])))
        fo.write('\n')
        print("ifold " + str(ifold) + '\t' + str(
            collections.Counter(
                [el for ll in list_y[folds[ifold]] for el in ll])))
    fo.close()

    return folds

예제 #3

파일 보기

파일: process_AtomizationEnergy.py 프로젝트: jcheminform/NNk_DTI

def AtomizationEnergy_CV(DB, data_type, list_ID, list_y, list_SMILES,
                         dict_id2smile, n_folds):
    if data_type == 'kernel':
        if not os.path.isfile('data/' + DB + '/' + DB + '_K.npy'):
            K = mol_build_K(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_K', K)
        else:
            K = np.load('data/' + DB + '/' + DB + '_K.npy')

        list_assignment = Khierarchical_cluster(K, n_folds)

    elif data_type == 'features':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            X = mol_build_X(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_X', X)
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        list_assignment = Xkmeans_cluster(X, n_folds)

    elif data_type == 'standard':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            X = mol_build_X(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_X', X)
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        list_assignment = np.zeros(X.shape[0])
        skf = model_selection.KFold(n_folds, shuffle=True, random_state=92)
        skf.get_n_splits(X)
        ifold = 0
        for train_index, test_index in skf.split(X):
            list_assignment[test_index] = ifold
            ifold += 1

    # import pdb; pdb.Pdb().set_trace()
    c = collections.Counter(list_assignment)
    print(c)
    folds = [np.where(list_assignment == cle)[0] for cle in list(c.keys())]

    fo = open('data/' + DB + '/' + DB + '_folds.txt', 'w')
    fo.write(str(c) + '\n')
    fo.close()

    return folds

예제 #4

파일 보기

파일: process_PCBA.py 프로젝트: jcheminform/NNk_DTI

def PCBA_CV(DB, data_type, list_ID, list_y, list_SMILES, dict_id2smile, n_folds):

    if data_type == 'kernel':
        if not os.path.isfile('data/' + DB + '/' + DB + '_K.npy'):
            K = mol_build_K(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_K', K)
        else:
            K = np.load('data/' + DB + '/' + DB + '_K.npy')

        if DB == 'PCBA':
            # if Kmedoid
            # list_assignment, medoids = Kmedoid_cluster(K, n_folds)

            # if agglomerative clustering
            list_assignment = Khierarchical_cluster(K, n_folds)
        else:
            list_assignment = np.zeros(K.shape[0])
            for y in [0, 1]:
                indices = np.where(list_y == y)[0]
                K_local = K[indices, :]
                K_local = K_local[:, indices]
                local_assignment = Khierarchical_cluster(K_local, n_folds)
                list_assignment[indices] = local_assignment

    elif data_type == 'features':
        if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
            X = mol_build_X(list_SMILES)
            np.save('data/' + DB + '/' + DB + '_X', X)
        else:
            X = np.load('data/' + DB + '/' + DB + '_X.npy')

        if DB == 'PCBA':
            list_assignment = Xkmeans_cluster(X, n_folds)
        else:
            list_assignment = np.zeros(X.shape[0])
            for y in [0, 1]:
                indices = np.where(list_y == y)[0]
                X_local = X[indices, :]
                local_assignment = Xkmeans_cluster(X_local, n_folds)
                list_assignment[indices] = local_assignment

    elif data_type == 'standard':
        # if not os.path.isfile('data/' + DB + '/' + DB + '_X.npy'):
        #     X = mol_build_X(list_SMILES)
        #     np.save('data/' + DB + '/' + DB + '_X', X)
        # else:
        #     X = np.load('data/' + DB + '/' + DB + '_X.npy')
        list_ID = pickle.load(open('data/' + DB + '/' + DB + '_list_ID.data', 'rb'))
        list_y = np.array(pickle.load(open('data/' + DB + '/' + DB + '_list_y.data', 'rb')))
        X = np.zeros((len(list_ID), 1))
        list_assignment = np.zeros(X.shape[0])
        if DB not in ['PCBA', 'PCBA10', 'PCBA100']:
            skf = model_selection.StratifiedKFold(n_folds, shuffle=True, random_state=92)
            skf.get_n_splits(X, list_y)
            ifold = 0
            for train_index, test_index in skf.split(X, list_y):
                list_assignment[test_index] = ifold
                ifold += 1
        else:
            skf = model_selection.KFold(n_folds, shuffle=True, random_state=92)
            skf.get_n_splits(X)
            ifold = 0
            for train_index, test_index in skf.split(X):
                list_assignment[test_index] = ifold
                ifold += 1

    # import pdb; pdb.Pdb().set_trace()
    c = collections.Counter(list_assignment)
    print(c)
    folds = [np.where(list_assignment == cle)[0] for cle in list(c.keys())]

    fo = open('data/' + DB + '/' + DB + '_folds.txt', 'w')
    for ifold in range(n_folds):
        fo.write("ifold" + str(ifold) + '\n')
        if DB in ['PCBA', 'PCBA10', 'PCBA100']:
            for iclass in range(list_y.shape[1]):
                fo.write("iclass " + str(iclass) + ' ' +
                         str(collections.Counter(list_y[folds[ifold], iclass])) + '\n')
                print("iclass " + str(iclass) + ' ' +
                      str(collections.Counter(list_y[folds[ifold], iclass])))
        else:
            fo.write(str(collections.Counter(list_y[folds[ifold]])) + '\n')
            print(ifold, collections.Counter(list_y[folds[ifold]]))
        fo.write('\n')

    return folds