Example #1
0
def worx(platform):
    bogus = os.listdir(r'.\Gene Expressions\CurrentCel')
    names = []
    for bug in bogus:
        if bug[-7:] == '.CEL.gz':
            names.append(bug)
    platPath = ''
    try:
        pforms = os.listdir(r'.\Gene Expressions\GPL')
        i = 0
        while i < len(pforms):
            if pforms[i][:-4] == platform:
                platPath = r'.\Gene Expressions\GPL/' + pforms[i]
                i = len(pforms)
            i += 1
    except:
        errno
    cdfP = cdfParser.cdfParse()
    print 'Reading .CDF file:', platform, r'...'
    cdfP.parse(platPath)
    print 'Done'
    PMLoc = cdfP.getPMLocs()
    Catche.spickle(r'.\Gene Expressions\CurrentCel/PMLoc.pickle', PMLoc)
    Catche.spickle(r'.\Gene Expressions\CurrentCel/ProbeSets.pickle',
                   cdfP.getProbeSets())
Example #2
0
def adjust(iters):
    Xes = os.listdir(r'.\Gene Expressions\CurrentCel')
    names = []
    for x in Xes:
        if x[-7:] == r'.CEL.gz':
            names.append(x[:-7])

    geoMat = Catche.opickle(r'.\Gene Expressions\CurrentCel\PMIntense' +
                            names[iters] + r'.pickle')
    sigma = 0
    alpha = 0
    a = []
    y = []
    vector = []

    for row in geoMat:
        vector.append(row)

    #print vector[1]
    #print i
    m1 = mode(vector)
    m2 = mode(leftOf(vector, m1))

    #estimate sigma?
    #find elements in dataO less than mu. group in leftz
    leftZ = leftOf(vector, m2)
    n = len(leftZ)
    zSum = 0
    for lZ in leftZ:
        zSum += (lZ - m2)**2
    if n > 1:
        sigma = math.sqrt(zSum / (n - 1)) * math.sqrt(2.0)
    else:
        sigma = math.sqrt(zSum) * math.sqrt(2.0)

        #estimate alpha
        #find elements in dataO greater than mu. store in rightz. rightz - mu for all
        #find mode of rightz

    alpha = 1 / mode(rightOf(vector, m2))
    for j, val in enumerate(vector):
        a.append(val - m2 - alpha * (sigma**2))
    print 'pass 1'
    for val in a:
        if sigma == 0:
            y.append(1)
        elif normcdf(val / sigma) == 0:
            y.append(2)
        else:
            y.append(val + sigma * normpdf(val / sigma) / normcdf(val / sigma))
    print 'pass 2'

    for i, col in enumerate(y):
        geoMat[i] = col

    print 'pass 3'
    Catche.spickle(
        r'.\Gene Expressions\CurrentCel/BAIntense' + names[iters] + r'.pickle',
        geoMat)
Example #3
0
def adjust(iters):
    Xes = os.listdir(r'.\Gene Expressions\CurrentCel')
    names = []
    for x in Xes:
        if x[-7:] == r'.CEL.gz':
            names.append(x[:-7])
        
    geoMat = Catche.opickle(r'.\Gene Expressions\CurrentCel\PMIntense'+names[iters]+r'.pickle')
    sigma = 0
    alpha = 0
    a = []
    y = []
    vector = []

    for row in geoMat:
        vector.append(row)


    #print vector[1]
    #print i
    m1 = mode(vector)
    m2 = mode(leftOf(vector,m1))
        
    #estimate sigma?
    #find elements in dataO less than mu. group in leftz
    leftZ = leftOf(vector,m2)
    n = len(leftZ)
    zSum = 0
    for lZ in leftZ:
        zSum += (lZ - m2) ** 2
    if n > 1:
        sigma = math.sqrt( zSum / (n - 1)) * math.sqrt(2.0)
    else:
        sigma = math.sqrt(zSum) * math.sqrt(2.0)
        

        #estimate alpha
        #find elements in dataO greater than mu. store in rightz. rightz - mu for all
        #find mode of rightz
        
    alpha = 1 / mode(rightOf(vector,m2))
    for j,val in enumerate(vector):
        a.append(val - m2 - alpha * (sigma ** 2))
    print 'pass 1'
    for val in a:
        if sigma == 0:
            y.append(1)
        elif normcdf(val / sigma) == 0:
            y.append(2)
        else:
            y.append(val + sigma * normpdf(val / sigma) / normcdf(val / sigma))
    print 'pass 2'

    for i,col in enumerate(y):
        geoMat[i] = col

    print 'pass 3'
    Catche.spickle(r'.\Gene Expressions\CurrentCel/BAIntense'+names[iters]+r'.pickle',geoMat)
Example #4
0
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList = []
    j = 0

    listdata = dict()
    ftime = open('lastChecked.txt', 'r')
    prevTime = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt', 'w')
    f.write(str(time.time()))
    f.close()
    k = 0
    while k < len(Recs):
        (name, ext) = os.path.splitext(Recs[k])
        if len(ext) > 2 and not ext == '.pickle':
            ListFile = name + ".pickle"
            if not os.path.isfile(ListFile) or float(fmt.filemtime(
                    Recs[k])) > prevTime:
                if ext[:3] == ".fa":
                    if name != "my_seq":
                        seqIORec = list(SeqIO.parse(Recs[k], 'fasta'))
                        for i, v in enumerate(seqIORec):
                            newList.append([v, v.id])
                            listdata[j] = str(v.id), len(
                                v.seq), str(name) + str(ext)
                            rHoward = [
                                str(v.id),
                                len(v.seq),
                                str(name) + str(ext), v
                            ]
                            mP.spickle(ListFile, rHoward)
                            j += 1

                elif ext[:3] == ".gb":
                    seqIORec = list(SeqIO.parse(Recs[k], 'genbank'))
                    for i, v in enumerate(seqIORec):
                        newList.append([v, v.id])
                        listdata[j] = str(v.id), len(
                            v.seq), str(name) + str(ext)
                        rHoward = [
                            str(v.id),
                            len(v.seq),
                            str(name) + str(ext), v
                        ]
                        mP.spickle(ListFile, rHoward)
                        j += 1

            else:
                if ext[:3] in [".gb", ".fa"]:
                    rHoward = mP.opickle(ListFile)
                    listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2]
                    newList.append([rHoward[3], rHoward[0]])
                    j += 1
        k += 1

    return [newList, listdata]
Example #5
0
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList=[]
    j = 0

    listdata=dict()
    ftime = open('lastChecked.txt','r')
    prevTime = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt','w')
    f.write(str(time.time()))
    f.close()
    k = 0
    while k < len(Recs):
        (name, ext) = os.path.splitext(Recs[k])         
        if len(ext) > 2 and not ext == '.pickle':
            ListFile = name + ".pickle"
            if not os.path.isfile(ListFile) or float(fmt.filemtime(Recs[k])) > prevTime:
                if ext[:3] == ".fa":
                    if name != "my_seq":
                        seqIORec = list(SeqIO.parse(Recs[k],'fasta'))
                        for i,v in enumerate(seqIORec):
                            newList.append([v,v.id])                   
                            listdata[j] = str(v.id),len(v.seq),str(name)+str(ext)
                            rHoward = [str(v.id),len(v.seq),str(name)+str(ext),v]
                            mP.spickle(ListFile,rHoward)
                            j+=1
                            
                elif ext[:3] == ".gb":                
                    seqIORec = list(SeqIO.parse(Recs[k],'genbank'))
                    for i,v in enumerate(seqIORec):
                        newList.append([v,v.id])                   
                        listdata[j] = str(v.id),len(v.seq),str(name)+str(ext)
                        rHoward = [str(v.id),len(v.seq),str(name)+str(ext),v]
                        mP.spickle(ListFile,rHoward)
                        j+=1
                
                        
            else:
                if ext[:3] in [".gb",".fa"]:
                    rHoward = mP.opickle(ListFile)
                    listdata[j] = str(rHoward[0]),rHoward[1],rHoward[2]
                    newList.append([rHoward[3],rHoward[0]])
                    j+=1
        k += 1

    return [newList,listdata]
Example #6
0
def funccall(x):
    bogus = os.listdir(r'.\Gene Expressions\CurrentCel')
    names = []
    for bug in bogus:
        if bug[-7:] == '.CEL.gz':
            names.append(bug)
    h = gzip.GzipFile(r'.\Gene Expressions\CurrentCel/' + names[x])
    b = CelFileReader.read(h)
    g = b.intensities
    PMIntense = []
    PMLoc = Catche.opickle(r'.\Gene Expressions\CurrentCel/PMLoc.pickle')
    for loc in PMLoc:
        PMIntense.append(g[loc[1]][loc[0]])
    Catche.spickle(
        r'.\Gene Expressions\CurrentCel/PMIntense' + names[x][:-7] +
        r'.pickle', PMIntense)
    return x
Example #7
0
def countSort(xe):

    names = []    
    Xes = os.listdir(r'.\Gene Expressions\CurrentCel')
    out = [[0]]
    for x in Xes:
        if x[-7:] == r'.CEL.gz':
            names.append(x[:-7])
            
    arrayX = Catche.opickle(r'.\Gene Expressions\CurrentCel/BAIntense'+names[xe]+r'.pickle')
    counter = []
    final = []
    ranker = []
    finRank = []
    i = 0
    while i < len(arrayX):
        counter.append(0)
        final.append(0)
        ranker.append([])
        finRank.append(0)
        i += 1
    
    for j,val in enumerate(arrayX):
        counter[int(val)] += 1
        ranker[int(val)].append(j)

    preSum = []
    preSum.append(0)
    i = 0
    while i < len(arrayX) - 1:
        preSum.append(preSum[i] + counter[i])
        i += 1
    i = len(counter) - 1
    while i >= 0:
        while counter[i] > 0:
            final[preSum[i]] = i
            finRank[preSum[i]] = ranker[i][-1]
            preSum[i] += 1            
            del ranker[i][-1]
            counter[i] -= 1
        i -= 1

    Catche.spickle(r'.\GeneExpressions\CurrentCel/SortAndRank'+names[xe]+r'.pickle',[final,finRank])
Example #8
0
File: PDB.py Project: fxb22/BioGUI
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList = []
    j = 0
    listdata=dict()
    k = 0
    p = PDBParser(PERMISSIVE=1)
    ftime = open('lastChecked.txt','r')
    pT = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt','w')
    f.write(str(time.time()))
    f.close()
    while k < len(Recs):
        try:
            (name, ext) = os.path.splitext(Recs[k])
            if ext=='':
                2+2
            elif ext==".pdb":
                f = name + ".pickle"
                newList.append([Recs[k],os.getcwd()])
                if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT:
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore") 
                        pdbRec = p.get_structure(name, Recs[k])
                    models = pdbRec.get_list()
                    listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext)
                    rHoward = [str(name), len(models), str(name) + str(ext)]
                    mP.spickle(f, rHoward)
                else:
                    rHoward = mP.opickle(f)
                    listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2]
                
                j += 1
                
                    
        except IOError, e:
            print e

    
        k += 1
Example #9
0
File: PDB.py Project: cwt1/BioGUI
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList = []
    j = 0
    listdata = dict()
    k = 0
    p = PDBParser(PERMISSIVE=1)
    ftime = open('lastChecked.txt', 'r')
    pT = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt', 'w')
    f.write(str(time.time()))
    f.close()
    while k < len(Recs):
        try:
            (name, ext) = os.path.splitext(Recs[k])
            if ext == '':
                2 + 2
            elif ext == ".pdb":
                f = name + ".pickle"
                newList.append([Recs[k], os.getcwd()])
                if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT:
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore")
                        pdbRec = p.get_structure(name, Recs[k])
                    models = pdbRec.get_list()
                    listdata[j] = str(name), len(
                        models), os.getcwd() + '/' + str(name) + str(ext)
                    rHoward = [str(name), len(models), str(name) + str(ext)]
                    mP.spickle(f, rHoward)
                else:
                    rHoward = mP.opickle(f)
                    listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2]

                j += 1

        except IOError, e:
            print e

        k += 1
Example #10
0
def GetExec():
    try:
        for names in os.listdir(r'.\CurrentCel/'):
            os.remove(r'.\CurrentCel/'+names)
            os.removedirs(r'.\CurrentCel')
    except:
        errno
    Recs = os.listdir(os.getcwd())
    newList=[]
    j = 0
    PForm = ""
    listdata=dict()
    GeoUntar = []
    k = 0
    ftime = open('lastChecked.txt','r')
    prevTime = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt','w')
    f.write(str(time.time()))
    f.close()
    for i in Recs:
        (nameLeft, ext) = os.path.splitext(i)
        if ext == '.tgz':
            newList.append([i])
            geoListFile = nameLeft + ".pickle"
            if not os.path.isfile(geoListFile) or float(fmt.filemtime(i)) > prevTime:
                filelib = tarfile.TarFile.gzopen(i)
                #Istar = i
                GeoUntar.append(filelib)
                #print Istar
                nameHolder = filelib.getnames()
                ''''for k,itsgo in enumerate(nameHolder):
                    try:
                        if itsgo[-4:] =='.txt' :
                            if itsgo[0:3] != r"GPL":
                                newList.append(itsgo)
                            elif itsgo[0:3] == r"GPL":
                                PForm = itsgo[:-10]
                        elif itsgo[-4:] == ".xml":
                            f = filelib.extractfile(itsgo)
                            minimal = minidom.parse(f).childNodes[0]
                            titleText = minimal.childNodes[-2].childNodes[3].childNodes[0].toxml()
                    except IOError, e:
                        print e'''
                #print nameHolder[:5]
                PForm = nameHolder[1][:-10]
                #i = nameHolder[0]
                #print i
                f = filelib.extractfile(nameHolder[0])
                minimal = minidom.parse(f).childNodes[0]
                titleText = minimal.childNodes[-2].childNodes[3].childNodes[0].toxml()
                #print len(minimal.childNodes)
                listdata[j] = str(nameLeft[:-4]),titleText, PForm, len(nameHolder)-2
                rHoward = [nameLeft,titleText,PForm,len(nameHolder)-2]
                mP.spickle(geoListFile,rHoward)

                j += 1
            else:
                rHoward = mP.opickle(geoListFile)
                listdata[j] = str(rHoward[0][:-4]),rHoward[1],rHoward[2],rHoward[3]
                
                j += 1
        elif ext == r'.tar':
            filelib = tarfile.TarFile.taropen(i)
            nameHolder = filelib.getnames()
            cels = 0
            for n in nameHolder:
                if n[-7:] == r'.CEL.gz':
                    cels += 1
                    """
            sys.path.append(r'..\plugins\Tools\ETOOLSPlugins')
            exTool = __import__('ESearch').GetExec('gds',str(nameLeft[:-4]))
            esTool = __import__('ESummary').GetExec('gds',str(exTool['IdList'][0]))

            titleText = ''
            PForm = ''
            for line in esTool.split('\n'):
                if len(line) > 32:
                    if line[:34] == '\t<Item Name="title" Type="String">':
                        titleText = line[34:-8]
                    elif line[:32] == '\t<Item Name="GPL" Type="String">':
                        PForm = 'GPL' + str(line[32:-7])
                        
            listdata[j] = str(nameLeft[:-4]),titleText, PForm, cels
            newList.append([i,PForm])"""
            
    return [newList,listdata]
Example #11
0
import Catche as mP

na = [
    'A', 'C', 'G', 'T', 'R', 'Y', 'S', 'W', 'K', 'M', 'B', 'D', 'H', 'V', 'N',
    '.'
]

a = dict()
alpha = 1
for i in na:
    for j in na:
        a[i + j] = alpha
        alpha += 1

seqIORec = SeqIO.read(
    r'C:\Users\francis\Documents\Monguis\BioGui\Nucleic Acids\NC_003074.gbk',
    'genbank')
trace = 0
outcome = []

while trace < len(seqIORec.seq) - 1:
    temp = str(seqIORec.seq[trace:trace + 2])
    outcome.append(a[temp])
    trace += 2
if len(seqIORec.seq) % 2 == 1:
    temp = str(seqIORec.seq[trace] + '.')
    outcome.append(a[temp])
    trace += 2
mP.spickle('plainseq.pickle', seqIORec.seq)
mP.spickle('modseq.pickle', outcome)
Example #12
0
            diff -= 1
        pool.map(cS.countSort,baggage)

    i = 0
    while i < lenX:
        sorts = Catche.opickle(r'.\GeneExpressions\CurrentCel/SortAndRank' + names[i] + r'.pickle')
        sortedVals.append(sorts[0])
        rankerVals.append(sorts[1])
        print 'ranked and sorted'
        i += 1
    sortedVals = nP.array(sortedVals)
    meanVals = nP.mean(sortedVals, axis=0)
    for i,n in enumerate(names):
        out.append([])
        for mV in meanVals:
            out[i+1].append(0)
        for j,mV in enumerate(meanVals):
            out[i+1][rankerVals[i][j]] = mV
    print 'done qn'
        
    i = 0
    while i < lenX:
        Catche.spickle(r'.\GeneExpressions\CurrentCel/RMAPreSum' + names[i] + r'.pickle',out[i + 1])
        i += 1
    #print out[0][:5]
    #print out[1][:5]
    #print out[2][:5]
    #print out[-1][:5]
    #print out[0][:20]
    #return out
Example #13
0
    i = 0
    while i < lenX:
        sorts = Catche.opickle(r'.\GeneExpressions\CurrentCel/SortAndRank' +
                               names[i] + r'.pickle')
        sortedVals.append(sorts[0])
        rankerVals.append(sorts[1])
        print 'ranked and sorted'
        i += 1
    sortedVals = nP.array(sortedVals)
    meanVals = nP.mean(sortedVals, axis=0)
    for i, n in enumerate(names):
        out.append([])
        for mV in meanVals:
            out[i + 1].append(0)
        for j, mV in enumerate(meanVals):
            out[i + 1][rankerVals[i][j]] = mV
    print 'done qn'

    i = 0
    while i < lenX:
        Catche.spickle(
            r'.\GeneExpressions\CurrentCel/RMAPreSum' + names[i] + r'.pickle',
            out[i + 1])
        i += 1
    #print out[0][:5]
    #print out[1][:5]
    #print out[2][:5]
    #print out[-1][:5]
    #print out[0][:20]
    #return out
Example #14
0
rollingSum = 0
valMat = []
for n in names:
    valMat.append(
        Catche.opickle(r'.\Gene Expressions\CurrentCel\RMAPreSum' + n +
                       r'.pickle'))
print 'begin sum'
while i < len(ProbeSet):
    print i
    rHoMat = []
    for n, na in enumerate(names):
        k = rollingSum
        rHoMat.append([])
        while k < ProbeSet[i][1]:
            rHoMat[-1].append(valMat[n][k])
            k += 1
    temp = getMedPol(rHoMat)
    for n, na in enumerate(names):
        if na == geoMat[n][0]:
            geoMat[n].append(temp[n])
        else:
            print '??????????????????????????????'
    rollingSum = ProbeSet[i][1]
    i += 1
print 'done rmaSum'

geoMat.append([])
for pname in ProbeSet:
    geoMat[-1].append(pname[0])
Catche.spickle(r'.\Gene Expressions\CurrentCel/RMAFinal.pickle', geoMat)
Example #15
0
rollingSum = 0
valMat = []
for n in names:
    valMat.append(Catche.opickle(r'.\Gene Expressions\CurrentCel\RMAPreSum' + n + r'.pickle'))
print 'begin sum'
while i < len(ProbeSet):
    print i
    rHoMat = []
    for n,na in enumerate(names):
        k = rollingSum
        rHoMat.append([])
        while k < ProbeSet[i][1]:
            rHoMat[-1].append(valMat[n][k])
            k += 1
    temp = getMedPol(rHoMat)
    for n,na in enumerate(names):
        if na == geoMat[n][0]:
            geoMat[n].append(temp[n])
        else:
            print '??????????????????????????????'
    rollingSum = ProbeSet[i][1]
    i += 1
print 'done rmaSum'

geoMat.append([])
for pname in ProbeSet:
    geoMat[-1].append(pname[0])
Catche.spickle(r'.\Gene Expressions\CurrentCel/RMAFinal.pickle',geoMat)

        
Example #16
0
from Bio import SeqIO
import Catche as mP

na = ["A", "C", "G", "T", "R", "Y", "S", "W", "K", "M", "B", "D", "H", "V", "N", "."]

a = dict()
alpha = 1
for i in na:
    for j in na:
        a[i + j] = alpha
        alpha += 1

seqIORec = SeqIO.read(r"C:\Users\francis\Documents\Monguis\BioGui\Nucleic Acids\NC_003074.gbk", "genbank")
trace = 0
outcome = []

while trace < len(seqIORec.seq) - 1:
    temp = str(seqIORec.seq[trace : trace + 2])
    outcome.append(a[temp])
    trace += 2
if len(seqIORec.seq) % 2 == 1:
    temp = str(seqIORec.seq[trace] + ".")
    outcome.append(a[temp])
    trace += 2
mP.spickle("plainseq.pickle", seqIORec.seq)
mP.spickle("modseq.pickle", outcome)