Example #1
0
def CD(prot_sub, iter_num):
    """
    Coordinate Descent Solver:
    retrun inferred protein, Z, r^2, X, error
    """
    S = prot_sub['S']
    X = prot_sub['X']

    if len(sys.argv) < 3:
        iter_num = 1000

    Num_pep, Num_prot = S.shape
    Num_condition = X.shape[1]

    # set IC
    protein_ic = np.random.randn(Num_prot, Num_condition)
    protein = protein_ic

    # for loop iterations
    for each in range(iter_num):
        Prot = protein
        dat = S.dot(protein).T
        zeta = np.sum(dat * X.T, axis=0) / np.sum(dat * dat, axis=0)
        if np.sum(zeta[:] < 0):
            zeta[zeta < 0] = 1e-4
        Zeta = np.diag(zeta)

        Proteins = np.linalg.lstsq(Zeta.dot(S), X)[0]

        if np.sum(Proteins[:] < 0):
            Proteins[Proteins < 0] = 1e-4
        protein = Proteins

        if np.linalg.norm(Prot[:] - protein[:]) < 1e-8:
            break
    Model = Zeta.dot(S).dot(protein)
    X_norm = hf.norm_mean(X, 0)
    Rsq = 1 - (np.sum(X[:] - Model[:])**2) / np.sum(X_norm**2)

    opt = dict()
    opt['model'] = Model
    opt['rsq'] = Rsq
    opt['Z'] = Zeta
    opt['error'] = np.linalg.norm(Prot[:] - protein[:])
    return protein, opt
Example #2
0
def QP(prot_sub):
    """
    Quadratic Programming Solver:
    Return the inferred protein level, Z, r^2
    """
    S = prot_sub['S']
    X = prot_sub['X']
    N = X.shape[1]
    A1 = np.kron(S, np.eye(N))
    A2 = -block_diag(*X).T
    A1 = matrix(A1)
    A2 = matrix(A2)
    AA = matrix([[A1], [A2]])
    P = matrix(np.dot(AA.T, AA))
    dim = P.size[0]
    q = matrix(np.zeros(dim))
    G = matrix(-np.eye(dim))
    h = matrix(np.zeros(dim), (dim, 1))
    A = matrix(np.ones(dim), (1, dim))
    b = matrix(1.0)
    sol = solvers.qp(P, q, G, h, A, b)
    w = np.array(sol['x'])

    if np.sum(w < 0) > np.sum(w > 0):
        w = -1 * w
    p_vect = w[0:S.shape[1] * X.shape[1]]
    U_hat = np.array(p_vect).reshape(S.shape[1],
                                     X.shape[1])  #♌ in test case reshape(2,6)
    L_hat = np.concatenate(w[S.shape[1] * X.shape[1]::])
    Z_hat = 1 / L_hat
    protein = U_hat

    X_rec = np.diag(Z_hat).dot(S).dot(U_hat)
    Model = X_rec * np.median(X_rec[:] / X[:])
    X_norm = hf.norm_mean(X, 0)
    Rsq = 1 - (np.sum(X[:] - Model[:])**2) / np.sum(X_norm**2)

    opt = dict()
    opt['model'] = Model
    opt['rsq'] = Rsq
    opt['Z'] = np.diag(Z_hat)
    return protein, opt
Example #3
0
def SVD(prot_sub):
    """
    SVD sovler:
    Return the inferred protein level, Z, r^2
    """
    S = prot_sub['S']
    X = prot_sub['X']
    N = X.shape[1]
    M = S.shape[0]
    K = S.shape[1]
    A1 = np.kron(S, np.eye(N))
    A2 = block_diag(*X).T
    A = np.concatenate((A1, -A2), axis=1)
    U, s, V = np.linalg.svd(A, full_matrices=True)
    # U, s, V = svd(A, full_matrices=True, lapack_driver = 'gesvd')
    w = V[-1, :]  #♌
    eigen_spacing = (s[-2] - s[-1]) / (s[-2] + s[-1])
    if np.sum(w < 0) > np.sum(w > 0):
        w = -1 * w
    p_vect = w[0:K * N]
    U_hat = p_vect.reshape(K, N)
    L_hat = w[K * N::]
    Z_hat = 1 / L_hat
    protein = U_hat

    X_rec = np.diag(Z_hat).dot(S).dot(U_hat)
    Model = X_rec * np.median(X_rec[:] / X[:])
    X_norm = hf.norm_mean(X, 0)
    Rsq = 1 - (np.sum(X[:] - Model[:])**2) / np.sum(X_norm**2)

    opt = dict()
    opt['model'] = Model
    opt['rsq'] = Rsq
    opt['Z'] = np.diag(Z_hat)
    opt['V'] = w  # last eigen vector
    opt['eigen_spacing'] = eigen_spacing
    return protein, opt
CD_homo_P = dict()
SVD_homo_P = dict()
QP_homo_P = dict()

U_true_set = dict()
for each in range(num_problem):
    Z_true = 1 + np.array([random.expovariate(1) for rand in range(M)])
    U_true = np.random.rand(K, N)
    X_clean = np.diag(Z_true).dot(S).dot(U_true)
    X = X_clean + (X_clean * sigma_n) * np.array(mtl.randn(M, N))

    U_true_set[str(each)] = U_true
    prot_sub = dict()
    prot_sub['S'] = S
    prot_sub['X'] = X
    hf.null_sp_dim(prot_sub)
    # SVD sovler
    SVD_solution = Solvers.SVD(prot_sub)
    SVD_protein, SVD_opt = SVD_solution
    SVD_homo_P[str(each)] = SVD_protein

    # QP solver
    QP_solution = Solvers.QP(prot_sub)
    QP_protein, QP_opt = QP_solution
    QP_homo_P[str(each)] = QP_protein

    # CD sovler
    CD_solution = Solvers.CD(prot_sub, 1000)
    CD_protein, CD_opt = CD_solution
    CD_homo_P[str(each)] = CD_protein
import HIquant_functions as hf
import Solvers
from cvxopt import solvers, matrix, lapack
import matplotlib.pyplot as plt
import HIquant_test_path_set as ts
from sklearn.ensemble import RandomForestRegressor
import seaborn as sns
os.getcwd()

print( 'Importing data from the directory below ...')

st=time.time()
test_path = ts.test_set['TMT2_MQ_Phospho']
#test_path = '/Users/chentianchi/Desktop/alex_all_proteoform_IL4_hiquant_input.txt'

Mat_A , Mat_S, Mat_CC , Mat_nC, Dat_data, Dat_textdata, Dat_proteins, Data_isunique, Num_peptides, Num_proteins = hf.GetData(test_path)
ed=time.time()
print('Time:',ed-st)

Mat_nC
Num_proteins
Num_peptides
Dat_data

## 2. Complie a list protein having unique peptde and common peptides.
proteins_uniq_pep = Dat_textdata[Data_isunique==1].iloc[:,0].tolist()
# proteins_uniq_indx = pd.DataFrame(Dat_proteins).isin(proteins_uniq_pep)
# print('The number of proteins with unique peptides is :', proteins_uniq_indx.sum().values) % redundont, use the following
print('The number of proteins with unique peptides is :', len(np.unique(proteins_uniq_pep)))
print('Number of proteins ONLY with common peptides:', Num_proteins-len(np.unique(proteins_uniq_pep)))