def fullScan(model,trait,chr):
    ### no magic number! here are constants
    nInd = 1846
    
    covFolder ='X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/Data_prep/'
    phenoFolder = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/'
    phenoFileName = 'Traits_S0S1.txt'
    genoFolder='X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/'
    outFolder= 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/outputs/'
    VCdir = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/Data_prep/leave_oneGmatrix_all_families.giv/asremlCode_estimate_VC/code_for_asreml/'
    
    covList = ['F.exp'] #a list of covariate names 
    phenoR = getPheno(phenoFolder, phenoFileName,trait, covList)
    pheno = phenoR[1]
    covariate = phenoR[2]
    #print(type(covariate))
    #print(covariate)
    
    VarComps = extractVC(VCdir, trait, chr)#Vg and Verr for trait from asreml
    print(VarComps)
    ########################## read in cov matrix  
    #read in the dense relationship matrix, it has no header or indicator columns
    #by default loadtxt makes a float matrix
    #current format of G matrix is square numeric with no row or column headings
    #sort order is enforced outside of this program to match row order of genotype scores
    K =  np.loadtxt('X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/Gmatrix_all_families.txt.gz') 
    #type(K)   
    # read in corresponding parameters (from asreml output)
    
    Vg = float(VarComps[0])
    Ve = float(VarComps[1])
    ######################### error variance matrix
    error = np.zeros((nInd,nInd),float)
    np.fill_diagonal(error, Ve)
    ######################### Genetic covariance matrix
    # g-cov matrix
    ksigmaG = Vg*K 
    V = ksigmaG + error
    
    
    ######################### genotype
    add = np.loadtxt(genoFolder+'ZeaSyn6_numeric_add_trans_shang'+"_chr"+str(chr)+'.txt',skiprows=1,dtype=str)
    dom = np.loadtxt(genoFolder+'ZeaSyn6_numeric_dom_trans_shang'+"_chr"+str(chr)+'.txt',skiprows=1,dtype=str)
    #add = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str) 
    #dom = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str)  
    
    totalMarker = add.shape[0] # shape[0] is marker number, shape[1] is individual number
    
    f_pvalue = open(outFolder + model +trait+'pvalues'+"_chr"+str(chr)+'.txt','w',1)
    f_beta = open(outFolder + model + trait+    'beta'+"_chr"+str(chr)+'.txt','w',1)
    f_dfMarker = open(outFolder + model + trait+'dfMarker'+"_chr"+str(chr)+'.txt','w',1)
    count = 1
    for index in range(add.shape[0]):
        thisAdd = add[index,:]
        thisDom = dom[index,:]
        
        keepAddI = [item for item in range(len(thisAdd)) if thisAdd[item] != 'NA']
        keepDomI = [item for item in range(len(thisDom)) if thisDom[item] != 'NA']
        nonMissI = list(set(keepAddI).intersection(keepDomI))
        # the following two lines confirmed that if add had NA then dom had NA too
        #if(set(keepAddI) != set(keepDomI)):
        #print("this one doesn't match",count)
        
        addFilter = [float(thisAdd[i]) for i in nonMissI]  
        domFilter = [float(thisDom[i]) for i in nonMissI]
        covFilter = [covariate[i] for i in nonMissI]
        phenoFilter = [float(pheno[i][0]) for i in nonMissI]
        V1=[ (np.squeeze(np.asarray(V[i]))) for i in nonMissI]
        Vsub =np.asmatrix([V1[item][nonMissI] for item in range(len(V1))])
    
        print("Vshape",Vsub.shape)
        print("Do inverse OMG")
        inverseV = inv(Vsub)
        print("Inverse done")
        result = markerTest(addFilter,domFilter,covFilter, phenoFilter, nInd, inverseV,nonMissI)
        #if(model == "model5" ):
            #ksigmaG9 = 
            #V = ksigmaG9 + error
            #result = markerTest(addFilter,domFilter,covFilter, phenoFilter, nInd, V,nonMissI)
        p   = result[0]
        beta= result[1]
        print("p:",p)
        print("beta",beta)
        thisp =   [str(p)]
        if( type(beta) is int):
            thisBeta = [str(beta)]
        else:
            thisBeta =  beta.tolist()[0]
        pString = "\t".join(str(x) for x in thisp) 
        betaString = "\t".join(str(x) for x in thisBeta) 
        f_pvalue.write(pString+"\n")   
        f_beta.write(betaString+"\n")   
        dfMarker=result[2]
        f_dfMarker.write(str(dfMarker)+"\n")
        count=count+1
        print("count",count)      
    f_pvalue.close()   
def fullScan(model, trait, DAratio, unitTime, rep):
    ### no magic number! here are constants
    nInd = 1846

    covFolder = '/home/sxue2/thirdProject/Data_prep/'
    phenoFolder = '/home/sxue2/thirdProject/data/'
    phenoFileName = 'allPhenoRep_' + trait + "_DAratio_" + DAratio + "_unit_" + unitTime + '.txt'
    genoFolder = '/home/sxue2/thirdProject/data/'
    outFolder = '/home/sxue2/thirdProject/outputs/'
    VCdir = '/home/sxue2/thirdProject/data/'

    covList = ['F.exp']  #a list of covariate names
    repName = "rep" + str(rep)
    phenoR = getPheno(phenoFolder, phenoFileName, repName, covList)
    pheno = phenoR[1]
    covariate = phenoR[2]
    #print(type(covariate))
    #print(covariate)

    VarComps = [301.1166177, 2.706935e+01]  #Vg and Verr for trait from asreml
    ########################## read in cov matrix
    #read in the dense relationship matrix, it has no header or indicator columns
    #by default loadtxt makes a float matrix
    #current format of G matrix is square numeric with no row or column headings
    #sort order is enforced outside of this program to match row order of genotype scores
    K = np.loadtxt('/home/sxue2/thirdProject/data/Gmatrix_all_families.txt.gz')
    #type(K)
    # read in corresponding parameters (from asreml output)

    Vg = float(VarComps[0])
    Ve = float(VarComps[1])
    ######################### error variance matrix
    error = np.zeros((nInd, nInd), float)
    np.fill_diagonal(error, Ve)
    ######################### Genetic covariance matrix
    # g-cov matrix
    ksigmaG = Vg * K
    V = ksigmaG + error

    ######################### genotype
    add = np.loadtxt(genoFolder + 'add_' + trait + "_DAratio_" + DAratio +
                     "_unit_" + unitTime + '_rep_' + rep + '.txt',
                     skiprows=1,
                     dtype=str)
    dom = np.loadtxt(genoFolder + 'dom_' + trait + "_DAratio_" + DAratio +
                     "_unit_" + unitTime + '_rep_' + rep + '.txt',
                     skiprows=1,
                     dtype=str)
    #add = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str)
    #dom = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str)

    f_pvalue = open(
        outFolder + model + trait + "_DAratio_" + DAratio + "_unit_" +
        unitTime + '_rep_' + rep + 'pvalues.txt', 'w', 1)
    f_beta = open(
        outFolder + model + trait + "_DAratio_" + DAratio + "_unit_" +
        unitTime + '_rep_' + rep + 'beta.txt', 'w', 1)
    f_dfMarker = open(
        outFolder + model + trait + "_DAratio_" + DAratio + "_unit_" +
        unitTime + '_rep_' + rep + 'dfMarker.txt', 'w', 1)

    count = 1
    for index in range(add.shape[0]):
        thisAdd = add[index, :]
        thisDom = dom[index, :]

        keepAddI = [
            item for item in range(len(thisAdd)) if thisAdd[item] != 'NA'
        ]
        keepDomI = [
            item for item in range(len(thisDom)) if thisDom[item] != 'NA'
        ]
        nonMissI = list(set(keepAddI).intersection(keepDomI))
        # the following two lines confirmed that if add had NA then dom had NA too
        #if(set(keepAddI) != set(keepDomI)):
        #print("this one doesn't match",count)

        addFilter = [float(thisAdd[i]) for i in nonMissI]
        domFilter = [float(thisDom[i]) for i in nonMissI]
        covFilter = [covariate[i] for i in nonMissI]
        phenoFilter = [float(pheno[i][0]) for i in nonMissI]
        V1 = [(np.squeeze(np.asarray(V[i]))) for i in nonMissI]
        Vsub = np.asmatrix([V1[item][nonMissI] for item in range(len(V1))])

        print("Vshape", Vsub.shape)
        print("Do inverse OMG")
        inverseV = inv(Vsub)
        print("Inverse done")
        result = markerTest(addFilter, domFilter, covFilter, phenoFilter, nInd,
                            inverseV, nonMissI)
        #if(model == "model5" ):
        #ksigmaG9 =
        #V = ksigmaG9 + error
        #result = markerTest(addFilter,domFilter,covFilter, phenoFilter, nInd, V,nonMissI)
        p = result[0]
        beta = result[1]
        print("p:", p)
        print("beta", beta)
        thisp = [str(p)]
        if (type(beta) is int):
            thisBeta = [str(beta)]
        else:
            thisBeta = beta.tolist()[0]
        pString = "\t".join(str(x) for x in thisp)
        betaString = "\t".join(str(x) for x in thisBeta)
        f_pvalue.write(pString + "\n")
        f_beta.write(betaString + "\n")
        dfMarker = result[2]
        f_dfMarker.write(str(dfMarker) + "\n")
        count = count + 1
        print("count", count)
    f_pvalue.close()
Esempio n. 3
0
def fullScan(model, trait):
    ### no magic number! here are constants
    nInd = 1846

    covFolder = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/Data_prep/'
    phenoFolder = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/'
    phenoFileName = 'Traits_S0S1.txt'
    genoFolder = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/'
    outFolder = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/outputs/'
    VCdir = 'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/'

    covList = ['F.exp']  #a list of covariate names
    phenoR = getPheno(phenoFolder, phenoFileName, trait, covList)
    pheno = phenoR[1]
    covariate = phenoR[2]
    #print(type(covariate))
    #print(covariate)

    ######################### genotype
    add = np.loadtxt(genoFolder + 'ZeaSyn6_numeric_add_90trans.txt',
                     skiprows=1,
                     dtype=str)
    dom = np.loadtxt(genoFolder + 'ZeaSyn6_numeric_dom_90trans.txt',
                     skiprows=1,
                     dtype=str)
    #add = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str)
    #dom = np.loadtxt(genoFolder+'tryadd.txt',skiprows=1,dtype=str)

    totalMarker = add.shape[
        0]  # shape[0] is marker number, shape[1] is individual number

    f_pvalue = open(outFolder + model + 'pvalues.txt', 'w', 1)
    f_beta = open(outFolder + model + 'beta.txt', 'w', 1)

    count = 1
    for index in range(add.shape[0]):
        thisAdd = add[index, :]
        thisDom = dom[index, :]

        keepAddI = [
            item for item in range(len(thisAdd)) if thisAdd[item] != 'NA'
        ]
        keepDomI = [
            item for item in range(len(thisDom)) if thisDom[item] != 'NA'
        ]
        nonMissI = list(set(keepAddI).intersection(keepDomI))
        # the following two lines confirmed that if add had NA then dom had NA too
        #if(set(keepAddI) != set(keepDomI)):
        #print("this one doesn't match",count)

        addFilter = [float(thisAdd[i]) for i in nonMissI]
        domFilter = [float(thisDom[i]) for i in nonMissI]
        covFilter = [covariate[i] for i in nonMissI]
        phenoFilter = [float(pheno[i][0]) for i in nonMissI]
        #########################
        if (model == "model1"):
            VeFixed = 500
            ######################### error variance matrix
            error = np.zeros((nInd, nInd), float)
            np.fill_diagonal(error, VeFixed)
            V = error
            inverseV = subsetV(V, nonMissI)
            covFilter = []
            result = markerTest(addFilter, domFilter, covFilter, phenoFilter,
                                nInd, V, nonMissI)
        if (model == "model2"):
            V = error
            result = markerTest(addFilter, domFilter, covFilter, phenoFilter,
                                nInd, V, nonMissI)
        if (model == "model3"):
            VeEigen = 500
            ######################### error variance matrix
            error = np.zeros((nInd, nInd), float)
            np.fill_diagonal(error, VeEigen)

            V = error

            covList = [
                'F.exp', 'Geigen1', 'Geigen2', 'Geigen3', 'Geigen4', 'Geigen5',
                'Geigen6', 'Geigen7', 'Geigen8', 'Geigen9', 'Geigen10'
            ]  #a list of covariate names
            phenoR = getPheno(phenoFolder, phenoFileName, trait, covList)
            covariate = phenoR[2]
            result = markerTest(addFilter, domFilter, covFilter, phenoFilter,
                                nInd, V, nonMissI)
        if (model == "model4"):
            VarComps = [5492, 7053]  #Vg and Verr for trait from asreml
            ########################## read in cov matrix
            #read in the dense relationship matrix, it has no header or indicator columns
            #by default loadtxt makes a float matrix
            #current format of G matrix is square numeric with no row or column headings
            #sort order is enforced outside of this program to match row order of genotype scores
            K = np.loadtxt(
                'X:/DATA/Public/shang/Ginnie-20170118T190718Z/Ginnie/data/Gmatrix_all_families.txt.gz'
            )
            #type(K)
            # read in corresponding parameters (from asreml output)

            Vg = float(VarComps[0])
            Ve = float(VarComps[1])
            ######################### error variance matrix
            error = np.zeros((nInd, nInd), float)
            np.fill_diagonal(error, Ve)
            ######################### Genetic covariance matrix
            # g-cov matrix
            ksigmaG = Vg * K
            V = ksigmaG + error
            result = markerTest(addFilter, domFilter, covFilter, phenoFilter,
                                nInd, V, nonMissI)
        #if(model == "model5" ):
        #ksigmaG9 =
        #V = ksigmaG9 + error
        #result = markerTest(addFilter,domFilter,covFilter, phenoFilter, nInd, V,nonMissI)
        p = result[0]
        beta = result[1]
        print("p:", p)
        print("beta", beta)
        thisp = [str(p)]
        if (type(beta) is int):
            thisBeta = [str(beta)]
        else:
            thisBeta = beta.tolist()[0]
        pString = "\t".join(str(x) for x in thisp)
        betaString = "\t".join(str(x) for x in thisBeta)
        f_pvalue.write(pString + "\n")
        f_beta.write(betaString + "\n")

        count = count + 1
        print("count", count)
    f_pvalue.close()