예제 #1
0
파일: arepa.py 프로젝트: biobakery/arepa
def get_mappingfile(strTaxID, fApprox=True, strDir=c_strDirMapping):

    if not (strTaxID):
        return None
    else:
        if not (sfle.isempty(c_strFileManualMapping)):
            pHash = {
                k: v
                for k, v in [
                    a.split('\t')
                    for a in sfle.readcomment(open(c_strFileManualMapping))
                ]
            }
            astrMapOutTmp = list(
                filter(bool, [
                    pHash.get(item)
                    for item in [" ".join(taxid2org(strTaxID).split(" ")[:2])]
                ]))
            astrMapOut = [sfle.d( c_strDirMapping, x) for x in astrMapOutTmp] \
            if astrMapOutTmp else []
        if not (astrMapOut):
            # give an un-prioritized list
            astrIDs = [strTaxID] if not (fApprox) else org2taxid(
                taxid2org(strTaxID), True)
            for strID in astrIDs:
                astrGlob = glob.glob(sfle.d(strDir, strID + "_*"))
                if astrGlob:
                    astrMapOut = astrGlob
                    break
            return (astrMapOut[0] if astrMapOut else None)
def MaAsLin(filePCL):
    #Build input file name if they exist or give ""
    strBase = filePCL.get_abspath().replace(sfle.c_strSufPCL, "")
    strR, strRC, strArgs = ((strBase + s)
                            for s in (sfle.c_strSufR, c_strSufRC, sArgsExt))
    fileR, fileRC, fileArgs = ((File(s) if os.path.exists(s) else "")
                               for s in (strR, strRC, strArgs))

    ## Read in an args file if it exists
    lsArgs = []
    if fileArgs:
        fReader = csv.reader(open(fileArgs.get_abspath(), 'r'), delimiter=" ")
        lsArgsTmp = []
        [lsArgsTmp.extend(lsLine) for lsLine in fReader]
        fSkip = False
        for s in lsArgsTmp:
            if s in lsIgnore:
                fSkip = True
                continue
            if fSkip:
                fSkip = not fSkip
                continue
            lsArgs.append(s)

    lsInputArgs = ["-I", [fileR]] if fileR else []
    lsInputArgs.extend(["-i", [fileRC]] if fileRC else [])
    lsArgs.extend(lsInputArgs)

    strBase = os.path.basename(strBase)
    fileTSVFile = File(
        sfle.d(fileDirTmp,
               sfle.rebase(filePCL, sfle.c_strSufPCL, sfle.c_strSufTSV)))
    strT = File(
        sfle.d(
            os.path.join(fileDirOutput.get_abspath(), strBase,
                         strBase + sfle.c_strSufTXT)))

    #Transpose PCL
    sfle.spipe(pE, filePCL, c_fileProgTranspose, fileTSVFile)
    #Run MaAsLin
    sfle.op(pE, c_fileProgMaaslin,
            lsArgs + [[True, strT], [False, fileTSVFile]])
    if fileArgs: Depends(c_fileProgMaaslin, fileArgs)
    Default(strT)
예제 #3
0
def funcDownloadRAW(alistTs):
    def _nnnModify(strID):
        strStrip = re.findall(r'GSM\d+', strID)[0]
        strNNN = strStrip[0:len(strStrip) - 3] + "nnn/"
        return strNNN, strStrip

    for GSMCEL in alistTs:
        GSMid = str(GSMCEL).split(".")[0]
        sfle.download( pE, sfle.d( c_strURLSupp, _nnnModify( GSMid )[0],\
         _nnnModify( GSMid )[1], str(GSMCEL) ) )
예제 #4
0
파일: arepa.py 프로젝트: biobakery/arepa
def path_repo():

    strRet = os.getcwd()
    strRet = strRet[len(path_arepa()):]
    while True:
        strHead, strTail = os.path.split(strRet)
        if not strHead:
            strRet = path_arepa() + strRet
            break
        strRet = strHead
    return sfle.d(strRet, "")
예제 #5
0
def funcIDsTXT(target, source, env):
    strT, astrSs = sfle.ts(target, source)
    astrFiles = sfle.ftpls(c_strHost, sfle.d(c_strPath, c_strID))
    astrFiles = [
        mtch.group(1) for mtch in [
            _f for _f in (re.search(r'(GSE\d+(?:-GPL\d+)?)', s)
                          for s in astrFiles) if _f
        ]
    ]
    with open(strT, "w") as fileOut:
        fileOut.write("%s\n" % "\n".join(astrFiles))
    return None
예제 #6
0
파일: arepa.py 프로젝트: biobakery/arepa
def _taxdump():
    global s_lockTaxdump, s_hashTaxID2Org, s_hashOrg2TaxID

    s_lockTaxdump.acquire()
    if (s_hashTaxID2Org == None) or (s_hashOrg2TaxID == None):
        s_hashTaxID2Org = {}
        s_hashOrg2TaxID = {}
        strTaxIDs = sfle.d(path_arepa(), sfle.c_strDirTmp, "taxdump.txt")
        try:
            for strLine in open(strTaxIDs):
                strOrg, strID = strLine.strip().split("\t")
                s_hashTaxID2Org[strID] = strOrg
                s_hashOrg2TaxID[strOrg] = strID
        except IOError:
            pass
    s_lockTaxdump.release()

    return (s_hashTaxID2Org, s_hashOrg2TaxID)
예제 #7
0
def getGPL(target, source, env):
    astrTs, astrSs = ([f.get_abspath() for f in a] for a in (target, source))
    strAnnot, strPlatform = astrTs[:2]
    strRMeta = astrSs[0]
    pid = [row for row in csv.DictReader(open(strRMeta))][0]["platform_id"]
    strGPLID = c_strID.split("-")[1] if len(c_strID.split("-")) == 2 else pid
    listGPL = [
        v.replace(".annot.gz", "") for v in sfle.readcomment(c_fileAnnot)
    ]
    if strGPLID in listGPL:
        #Annotation file exist, download
        sfle.ex([
            "wget",
            sfle.d(c_strURLGPL, strGPLID + ".annot.gz"), "-O", strAnnot
        ])
    else:
        #Annotation file does not exist, skip download
        sfle.ex(["touch", strAnnot])
    #Make platform file containing gpl identifier
    with open(strPlatform, "w") as outputf:
        outputf.write(strGPLID)
예제 #8
0
 def glob(self, fn):
     return sorted([
         self.fin(a) for a in self.lenv.Glob(sfle.d(self.fileDirInput, fn))
     ])
예제 #9
0
 def glob_tmp(self, fn):
     return sorted(
         [str(a) for a in self.lenv.Glob(sfle.d(self.fileDirTmp, fn))])
예제 #10
0
파일: arepa.py 프로젝트: biobakery/arepa
def level():

    strPath = path_repo()
    strPath = os.getcwd()[len(strPath):]
    iRet = 0
    while True:
        strHead, strTail = os.path.split(strPath)
        if not strHead:
            break
        iRet += 1
        strPath = strHead
    return iRet


c_strProgSConstruct = sfle.d(path_arepa(), sfle.c_strDirSrc, "SConstruct.py")

#===============================================================================
# Gene ID conversion
#===============================================================================

#Constants
c_strDirMapping = sfle.d(path_arepa(), "GeneMapper", sfle.c_strDirEtc,
                         "uniprotko")
c_strFileManualMapping = sfle.d(path_arepa(), "GeneMapper", sfle.c_strDirEtc,
                                "manual_mapping.txt")


def genemapper():

    return sfle.d(path_arepa(), sfle.c_strDirSrc, "SConscript_genemapping.py")
예제 #11
0
import sys
import metadata
import glob


def test(iLevel, strID, hashArgs):
    return (iLevel == 1)


if "testing" in locals():
    sys.exit()

pE = DefaultEnvironment()

c_strID = arepa.cwd()
c_fileInputC = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "mpidbc")
c_fileIDPKL = sfle.d(pE, c_strID + ".pkl")
c_fileIDDAB = sfle.d(pE, c_strID + ".dab")
c_fileIDQUANT = sfle.d(c_strID + ".quant")
c_fileIDRawDAT = sfle.d(pE, c_strID + "_00raw.dat")
c_fileIDDAT = sfle.d(pE, c_strID + ".dat")

c_fileProgUnpickle = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                            "unpickle.py")
c_fileProgC2Metadata = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc,
                              "c2metadata.py")
c_fileProgC2DAT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "c2dat.py")
c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc,
                              "manual_curation/", c_strID + sfle.c_strSufTXT)

c_fileInputSConscriptGM = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
예제 #12
0
import gzip


def test(iLevel, strID, hashArgs):
    return (iLevel == 2) and (strID.find("GDS") == 0)


if "testing" in locals():
    sys.exit()

pE = DefaultEnvironment()

c_strID = arepa.cwd()
c_strGDS, c_strGPL = c_strID.split("-")[:2]

c_fileInputSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                               "SConscript_pcl-dab.py")
c_fileRSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                           "SConscript_rpackage.py")
c_fileInputSOFTGZ = sfle.d(pE, "../" + c_strGDS + ".soft.gz")
c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc,
                              "manual_curation/", c_strID + ".txt")
c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess")
c_strPPfun = sfle.readcomment(c_filePPfun)[0]

c_fileTaxa = sfle.d(pE, "taxa.txt")
c_fileStatus = sfle.d(pE, "status.txt")
c_filePlatform = sfle.d(pE, "platform.txt")
c_fileIDMap = sfle.d(pE, c_strID + ".map")
c_fileIDMapRaw = sfle.d(pE, c_strID + "_raw.map")
c_fileIDPKL = sfle.d(pE, c_strID + ".pkl")
c_fileGPLTXTGZ = sfle.d(pE, c_strGPL + ".annot.gz")
예제 #13
0
import sys


def test(iLevel, strID, hashArgs):
    return (iLevel == 1) and (strID.find("GSE") == 0)


if "testing" in locals():
    sys.exit()
pE = DefaultEnvironment()

c_strID = arepa.cwd()
c_strHost = "ftp.ncbi.nih.gov"
c_strPath = "pub/geo/DATA/SeriesMatrix/"

c_fileIDTXT = sfle.d(pE, c_strID + ".txt")

Import("hashArgs")

#==============================================================================
# Fetch platform count and recurse
#==============================================================================


def funcIDsTXT(target, source, env):
    strT, astrSs = sfle.ts(target, source)
    astrFiles = sfle.ftpls(c_strHost, sfle.d(c_strPath, c_strID))
    astrFiles = [
        mtch.group(1) for mtch in [
            _f for _f in (re.search(r'(GSE\d+(?:-GPL\d+)?)', s)
                          for s in astrFiles) if _f
예제 #14
0
def funcGeneIDMapping(pE,
                      fileDATin,
                      strGeneFrom=None,
                      fileLOGout=None,
                      strMAPin=None,
                      aiCOL=[0, 1],
                      iSkip=0,
                      iLevel=2):
    try:
        int(strMAPin)
        strTaxID, strMAPin = strMAPin, None
    except (ValueError, TypeError):
        strTaxID = None
    # Try to find taxid
    if not strTaxID:
        astrMatch = re.findall(r'taxid_([0-9]+)', c_strID)
        strTaxID = astrMatch[0].strip() if astrMatch else None
    if not strGeneFrom:
        #if strGeneFrom is not specified, try to retrieve it from
        #the "manual_geneids.txt" file in the respective etc directory, else None
        #this problem tends to arise in mixed microbial network data
        if strTaxID and m_hashGeneIDs:
            strOrg = " ".join(arepa.taxid2org(strTaxID).split(" ")[:iLevel])
            strGeneFrom = m_hashGeneIDs.get(strOrg)
    if not (strMAPin):
        strDirAuto = c_strDirData if (c_strID == c_strPathRepo) else ""
        strAutoMAPtmp = sfle.d(strDirAuto, c_strID + c_strSufMap)
        strAutoMAP = strAutoMAPtmp if os.path.exists(strAutoMAPtmp) else None
        # Use manual mapping files, files that are deeper down have priority
        #else try automatically generated mapping file
        afileMaps = Glob(sfle.d(c_strPathTopMapping, "*" + c_strSufMap))
        strTopMAP = str(afileMaps[0]) if afileMaps else None
        strMAPManTmp = sfle.d(c_strDirManMap, c_strID + c_strSufMap)
        strMAPin = (strMAPManTmp if os.path.exists(strMAPManTmp) else
                    None) or strTopMAP or strAutoMAP
        # Use provided mapping files
        if not (strMAPin) and strTaxID:
            for fileMAPname in Glob(
                    sfle.d(c_strPathUniprotKO, "*" + c_strSufMap)):
                if re.search(r'\D' + strTaxID + r'\D', str(fileMAPname)):
                    strMAPin = str(fileMAPname)
                    break
        # Else ask arepa to figure out an appropriate mapping file
        if not (strMAPin) and strTaxID:
            strMAPin = arepa.get_mappingfile(strTaxID)
        # Else use BridgeDB files
        if not (strMAPin) and strTaxID:
            for strSpeciesName in list(c_pHashBridgeDB.keys()):
                if strSpeciesName in arepa.taxid2org(strTaxID):
                    strMAPin = c_pHashBridgeDB[strSpeciesName]
                    break

    strBase, strExt = os.path.splitext(str(fileDATin))
    strCount = funcCounter(g_iterCounter)
    strT = strBase + c_strMapped + strCount + strExt

    aastrPrefix = [[str(fileDATin)], [True, strT], "-c", str(aiCOL)]
    astrGeneFrom = ["-f", strGeneFrom] if strGeneFrom else ["-x"]
    astrGeneTo = ["-t", c_astrGeneTo[0]]
    astrSkip = ["-s", iSkip]
    afileLOGout = ["-l", [True, str(fileLOGout)]] if fileLOGout else []
    astrMapIn = ["-m", [strMAPin]] if strMAPin else []

    aastrArgs = aastrPrefix + astrGeneFrom + astrGeneTo + astrSkip + afileLOGout + astrMapIn

    pE.Precious(strMAPin)
    return pE.Depends(
        sfle.op(pE, c_funcGeneMapper, aastrArgs),
        sfle.scons_child(
            pE, c_strPathGeneMapper, None, None,
            None, ([strMAPin] if not (sfle.in_directory(
                strMAPin, os.path.abspath(arepa.cwd()))) else None)))
예제 #15
0
"""

import sys
import csv
import pickle
import sfle
import arepa
import os
import metadata
import glob
from subprocess import call as ex

c_aiCOL = [0]
c_iSkip = 2
c_iCOL = len(c_aiCOL)
c_fileIDNormPCL = sfle.d(pE, c_strID + "_01norm.pcl")
c_fileIDPCL = sfle.d(pE, c_strID + ".pcl")
c_fileIDDAB = sfle.d(pE, c_strID + ".dab")
c_fileIDQUANT = sfle.d(pE, c_strID + ".quant")
c_fileIDPKL = sfle.d(pE, c_strID + ".pkl")
c_fileStatus = sfle.d(pE, "status.txt")
c_fileIDMap = sfle.d(pE, c_strID + ".map")
c_fileIDMapRaw = sfle.d(pE, c_strID + "_raw.map")
c_strDirManMap = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "manual_mapping")

c_fileIDMappedPCL = sfle.d(pE, c_strID + "_00mapped.pcl")
c_fileIDMappedPCL2 = sfle.d(pE, c_strID + "_01mapped.pcl")

c_fileFlagSleipnir = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc,
                            "sleipnir")
예제 #16
0
 def fsrc(self, fn):
     return self.lenv.File(sfle.d(self.fileDirSrc, fn)).path
예제 #17
0
annot2map.py: 
 
parse mapping files
begins with !platform_table_begin 
ends with !platform_table_end 
"""

import sfle
import glob
import csv
import sys
import re
import arepa
import gzip

c_fileMapping = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "mapping")
c_hashHead  = { k:v for (k, v) in [[y.strip() for y in x.split("%")] for x in sfle.readcomment( open( c_fileMapping))] } if sfle.readcomment(open(c_fileMapping))\
  else {
    "^ID .*? platform"              : "Affy",
    "Entrez Gene Symbol"         : "HGNC",
    "Uniprot .*? Symbol"     : "Uniprot/TrEMBL",
    "^(Entrez)? UniGene Symbol"  : "UniGene",
    "Entrez Unigene Identifier"     : "UniGene_ID",
    "GenBank Accession"             : "GB_ACC",
    "Entrez Gene identifier"        : "Entrez Gene",
    "GenBank Identifier"            : "GenBank"
}

iArg = len(sys.argv)
strFileAnnotGZ = sys.argv[1] if iArg > 1 else None
strFileOut = sys.argv[2] if iArg > 2 else None
예제 #18
0
 def fin(self, fn):
     if isinstance(fn, str):
         return str(self.lenv.File(sfle.d(self.fileDirInput, fn)))
     else:
         return str(self.lenv.File(fn))
예제 #19
0
import metadata
import re


def test(iLevel, strID, hashArgs):
    return (iLevel == 2) and (strID.find("GSE") == 0)


if "testing" in locals():
    sys.exit()

pE = DefaultEnvironment()

c_strID = arepa.cwd()
c_strSufRPackage = "_rpackage"
c_fileGPL = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "gpl.txt")
c_fileAnnot = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "annot.txt")
c_fileInputSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                               "SConscript_pcl-dab.py")
c_fileRSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                           "SConscript_rpackage.py")
c_fileProgUnpickle = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
                            "unpickle.py")
c_fileInputGSER = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "gse.R")
c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc,
                              "manual_curation/", c_strID + ".txt")

c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess")
c_fileRunRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "raw")

c_fileIDPKL = sfle.d(pE, c_strID + ".pkl")
예제 #20
0
파일: arepa.py 프로젝트: biobakery/arepa
def genemapper():

    return sfle.d(path_arepa(), sfle.c_strDirSrc, "SConscript_genemapping.py")
예제 #21
0

if "testing" in locals():
    sys.exit()

pE = DefaultEnvironment()

c_strID = arepa.cwd().replace("-RAW", "")
c_strURLGEO = 'ftp.ncbi.nih.gov'
c_strURLGEOsupp = 'pub/geo/DATA/supplementary/samples/'
c_strURLSupp = 'ftp://' + c_strURLGEO + '/' + c_strURLGEOsupp
c_strFileGSM = "../GSM.txt"
c_strFilePCL = "../" + c_strID + ".pcl"

c_listTs = sfle.readcomment(c_strFileGSM)
c_fileProgReadCel = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc,
                           "readCel.R")
c_fileProgProcessRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc,
                              "preprocessRaw.R")
c_strInputRData = arepa.cwd() + ".RData"
c_strOutputRData = c_strInputRData.replace("-RAW", "")

c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess")
c_strPPfun   = sfle.readcomment( c_filePPfun )[0] if \
     sfle.readcomment( c_filePPfun ) else "affy::rma"

c_fileExpTable = sfle.d(pE, "../" + c_strID + "_exp_metadata.txt")
c_fileCondTable = sfle.d(pE, "../" + c_strID + "_cond_metadata.txt")

Import("hashArgs")

#Download CEL files (if they exist)
예제 #22
0
파일: arepa.py 프로젝트: biobakery/arepa
def path_arepa():

    return sfle.d(os.path.abspath(sfle.d(os.path.dirname(__file__), "..")), "")
예제 #23
0
 def fout(self, fn, mult=None):
     if mult:
         return [self.fout(fn.format(m)) for m in mult]
     return str(self.lenv.File(sfle.d(self.fileDirOutput, fn)))
예제 #24
0
    hashRet.get(i) for i in [c_strCount, c_strQuery, c_strWebEnv]
]

#===========================================================================
# Iteratively download temporary xml files
#===========================================================================


def discrete_list(num, increment):
    iTries = (num / increment) + 1
    return [str(1 + (a * increment)) for a in range(iTries)]


count_list = discrete_list(int(id_count), c_iIncrement)
astrOutput = [
    sfle.d(sfle.c_strDirTmp, geo_id + str(i) + c_strSufXML)
    for i in range(1,
                   len(count_list) + 1)
]
query_list = list(
    zip(astrOutput, count_list, [query_key] * len(count_list),
        [web_env] * len(count_list)))

# Download
for astr in query_list:
    strT, strCount, strQuery, strWeb = astr
    sfle.ex([
        "curl", "-g", "-f", "-z", '"' + strT + '"',
        '"' + c_strURLSum % (strCount, strQuery, strWeb) + '"', ">", strT
    ])
예제 #25
0
 def ftmp(self, fn, mult=None):
     if mult:
         return [self.ftmp(fn.format(m)) for m in mult]
     return str(self.lenv.File(sfle.d(self.fileDirTmp, fn)))
예제 #26
0
	Maintainer: $arepa_maintainer
	Description: Automatically generated R package by arepa 
	Depends: R (>= 2.10.0), affy
	Suggests: survival
	License: MIT
	URL: http://huttenhower.sph.harvard.edu/arepa
"""

import sfle 
import arepa
import sys  
import pickle

c_strNAMESPACE		= r"'exportPattern("'"^[[:alpha:]]+"'")'"

c_fileProgUnpickle	= sfle.d( pE, arepa.path_arepa(), sfle.c_strDirSrc, "unpickle.py" )

def funcCheckRStructure( pE, strDatasetName, filePKL, fileNAMESPACE, fileDESCRIPTION, fileManMaster, strNAMESPACE = c_strNAMESPACE ):
	'''
	Completes necessary components for R package building 
	Assumes that data/ and man/ directories have the corresponding data and manual files per dataset
	
	Input: 
	fileNAMESPACE = pointer to NAMESPACE file to be tracked 
	fileManMaster = pointer to the master manual file in man/ 
	'''

	def _makeDescription( target, source, env ):
		strT, astrSs = sfle.ts( target, source )
		pHash = pickle.load(open(astrSs[0]))
		pHashDescription	= { "Package": strDatasetName.replace("-", "."), "Type": "Package", "Title": pHash.get("title"), 
예제 #27
0
import os
import sys
import sfle
import pickle
import itertools
import re
import time
import sfle

g_iterCounter = itertools.count(0)

c_strID = arepa.cwd()
c_strPathRepo = arepa.name_repo()
c_strSufMap = ".map"
c_strMapped = "_mapped"
c_strDirData = sfle.d(arepa.path_repo(), sfle.c_strDirData)
c_strDirManMap = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "manual_mapping")
c_astrGeneTo = sfle.readcomment(
    sfle.d(arepa.path_arepa(), sfle.c_strDirEtc, "geneid")
    or [arepa.genemap_genename()])
c_strPathGeneMapper = sfle.d(arepa.path_arepa(), "GeneMapper")
c_strFileUnzipLog = sfle.d(c_strPathGeneMapper, sfle.c_strDirTmp, "unzip.log")
c_strFileCompileLog = sfle.d(c_strPathGeneMapper, sfle.c_strDirTmp,
                             "compile.log")
c_strPathTopMapping = sfle.d(c_strPathGeneMapper, sfle.c_strDirEtc,
                             "manual_mapping")
c_strPathUniprotKO = sfle.d(c_strPathGeneMapper, sfle.c_strDirEtc, "uniprotko")
c_fileProgMakeUnique = sfle.d(arepa.path_arepa(), sfle.c_strDirSrc,
                              "makeunique.py")
c_funcGeneMapper = sfle.d(c_strPathGeneMapper, sfle.c_strDirSrc,
                          "bridgemapper.py")
예제 #28
0
__author__ = "Timothy Tickle and Curtis Huttenhower"
__copyright__ = "Copyright 2012"
__credits__ = ["Timothy Tickle", "Curtis Huttenhower"]
__maintainer__ = "Timothy Tickle"
__email__ = "*****@*****.**"

import argparse
import os
import sfle
import sys

c_strSufRC = ".read.config"

c_fileDirSrc = Dir(
    sfle.d(os.path.dirname(sfle.current_file()), sfle.c_strDirSrc))
c_fileProgMaaslin = File(sfle.d(c_fileDirSrc, "Maaslin.R"))
sArgsExt = ".args"
#Commandline to ignore
lsIgnore = ["-i", "-I", "--input_config", "--input_process"]


def MaAsLin(filePCL):
    #Build input file name if they exist or give ""
    strBase = filePCL.get_abspath().replace(sfle.c_strSufPCL, "")
    strR, strRC, strArgs = ((strBase + s)
                            for s in (sfle.c_strSufR, c_strSufRC, sArgsExt))
    fileR, fileRC, fileArgs = ((File(s) if os.path.exists(s) else "")
                               for s in (strR, strRC, strArgs))

    ## Read in an args file if it exists
예제 #29
0
import arepa
import gzip
import os
import re
import sfle
import sys

def test( iLevel, strID, hashArgs ):
	return ( iLevel == 1 ) and ( strID.find( "GDS" ) == 0 )
if "testing" in locals( ):
	sys.exit( )

pE = DefaultEnvironment( )

c_strID				= arepa.cwd( )
c_fileIDTXT			= sfle.d( pE, c_strID + ".txt" )
c_fileIDSOFTGZ		= sfle.d( pE, c_strID + ".soft.gz" )

Import( "hashArgs" )

#===============================================================================
# Download SOFT file
#===============================================================================

sfle.download( pE, hashArgs["c_strURLGDS"] + os.path.basename( str(c_fileIDSOFTGZ) ) )

def funcGPLsTXT( target, source, env ):
	strT, astrSs = sfle.ts( target, source )
	setstrGPLs = set()
	for strLine in gzip.open( astrSs[0] ):
		mtch = re.search( r'^!dataset_platform\s*=\s*(\S+)', strLine )
예제 #30
0
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

SConscript_dat-dab.py: 

shared code across modules 
handles all behavior pertaining to 
mapping dat to dab and generating quant files 
"""

import sfle
import arepa

c_fileFlagSleipnir = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirEtc,
                            "sleipnir")


def funcDAB(pE, fileOutDAB, afileInDAT):

    astrSleipnir = sfle.readcomment(c_fileFlagSleipnir)
    bSleipnir = (astrSleipnir[0] == "True")
    print("sleipnir", ("On" if bSleipnir else "Off"))

    def _funcDAB(target, source, env):
        strT, astrSs = sfle.ts(target, source)
        strOut, strMap = astrSs[:2]
        return sfle.ex(("Dat2Dab", "-o", strT, "-i",
                        (strOut if sfle.isempty(strMap) else strMap)))

    if bSleipnir: