def funcIDNormPCL(target, source, env, iMaxLines=100000): strT, astrSs = sfle.ts(target, source) strS = astrSs[1] if ( len(sfle.readcomment(astrSs[1])) > c_iSkip) else astrSs[0] iLC = sfle.lc(strS) return (sfle.ex("Normalizer -t pcl -T medmult < " + strS, strT) if (iLC < iMaxLines) else sfle.ex("head -n 3 < " + strS, strT))
def funcMergeMap(target, source, env): strT, astrSs = sfle.ts(target, source) fileTaxa, fileMerge, fileIDRaw = astrSs[:3] astrTaxa = sfle.readcomment(fileTaxa) strMap = arepa.get_mappingfile(astrTaxa[0]) if astrTaxa else "" return (sfle.ex([fileMerge, fileIDRaw, strMap, strT]) if strMap else sfle.ex(["cp", fileIDRaw, strT]))
def funcRawMap(target, source, env): strT, astrSs = sfle.ts(target, source) strGPLTXTGZ, strPlatformTXT, strProgAnnot2Map, strProgGPL2TXT = astrSs[:4] strGPLID = (sfle.readcomment(open(strPlatformTXT)) or [""])[0] return (ex([strProgAnnot2Map, strGPLTXTGZ, strT]) if not (sfle.isempty(str(strGPLTXTGZ))) else ex( [strProgGPL2TXT, c_strGPLPath + strGPLID, strT]))
def funcRawProcess(target, source, env): strT, astrSs = sfle.ts(target, source) strIn, strRData, strExpMetadata, strCondMetadata = astrSs[:4] iRet = sfle.ex((sfle.cat(strIn), "| R --no-save", "--args", strRData, strT, c_strPPfun, strExpMetadata, strCondMetadata)) if iRet: iRet = sfle.ex((sfle.cat(strIn), " | R --no-save --args", c_strFilePCL, strT, c_strPPfun, strExpMetadata, strCondMetadata)) return iRet
def funcGPLsTXT( target, source, env ): strT, astrSs = sfle.ts( target, source ) setstrGPLs = set() for strLine in gzip.open( astrSs[0] ): mtch = re.search( r'^!dataset_platform\s*=\s*(\S+)', strLine ) if mtch: setstrGPLs.add( mtch.group( 1 ) ) with open( strT, "w" ) as fileOut: fileOut.write( "%s\n" % "\n".join( ("-".join( (c_strID, s) ) for s in setstrGPLs) ) ) return None
def _makeDescription( target, source, env ): strT, astrSs = sfle.ts( target, source ) pHash = pickle.load(open(astrSs[0])) pHashDescription = { "Package": strDatasetName.replace("-", "."), "Type": "Package", "Title": pHash.get("title"), "Version": arepa.c_strVersion, "Author": ", ".join(arepa.c_astrAuthors), "Date": arepa.c_strDate, "Maintainer": arepa.c_strMaintainer, "Depends": "R (>= 2.10.0), affy", "Suggests": "survival", "URL": arepa.c_strURL, "License": arepa.c_strLicense, "Description": "ARepA generated package" } with open(strT, "w") as outputf: for k, v in list(pHashDescription.items()): outputf.write( k + ": " + v + "\n" )
def funcIDsTXT(target, source, env): strT, astrSs = sfle.ts(target, source) astrFiles = sfle.ftpls(c_strHost, sfle.d(c_strPath, c_strID)) astrFiles = [ mtch.group(1) for mtch in [ _f for _f in (re.search(r'(GSE\d+(?:-GPL\d+)?)', s) for s in astrFiles) if _f ] ] with open(strT, "w") as fileOut: fileOut.write("%s\n" % "\n".join(astrFiles)) return None
def _makeMasterMan( target, source, env ): strT, astrSs = sfle.ts( target, source ) pHash = pickle.load(open(astrSs[0])) def _metaStr( strDescription, strContent ): return "\\"+ strDescription + "{" + strContent + "}" strDataAccession = arepa.cwd( ) + "-package" strDataTitle = pHash.get( "title" ) or "" strDataGloss = pHash.get( "gloss" ) or "" aastrOut = [("name", strDataAccession), ("title", strDataTitle), ("description", strDataGloss)] with open( strT, "w" ) as outputf: for strDescription, strContent in aastrOut: outputf.write( _metaStr( strDescription, strContent ) + "\n" )
def funcRet(target, source, env, setstrInclude=setstrInclude, setstrExclude=setstrExclude): strT, astrSs = sfle.ts(target, source) for strS in astrSs: for astrLine in csv.reader(open(strS), csv.excel_tab): if not (astrLine and astrLine[0]): continue strID = astrLine[0] if (setstrInclude and (strID not in setstrInclude)) or (strID in setstrExclude): continue env["sconscript_child"](target, source[0], env, c_strID + "-RAW")
def _compileR( target, source, env ): strT, astrSs = sfle.ts( target, source ) sfle.ex( ["chmod", "755", strDirectory] ) sfle.ex( ["R", "CMD", "build", strDirectory] ) with open( strT, "w" ) as outputf: outputf.write( "R package compiled OK")
def funcIDQUANT(target, source, env): strT, astrSs = sfle.ts(target, source) strS = astrSs[0] iLC = sfle.lc(strS) return (sfle.ex("echo '-1.5\t-0.5\t0.5\t1.5\t2.5\t3.5\t4.5' >" + strT))
def funcIDDAB(target, source, env): strT, astrSs = sfle.ts(target, source) strS = astrSs[0] iLC = sfle.lc(strS) return (sfle.ex((sfle.cat(strS), " | Distancer -o", strT)) if (iLC > 3) else sfle.ex("echo", strT))
def funcIDKNNPCL(target, source, env, iMaxLines=40000): strT, astrSs = sfle.ts(target, source) strS = astrSs[0] iLC = sfle.lc(strS) return (sfle.ex("KNNImputer < " + strS, strT) if (iLC < iMaxLines) else sfle.ex("head -n 3 < " + strS, strT))
def _funcDAB(target, source, env): strT, astrSs = sfle.ts(target, source) strOut, strMap = astrSs[:2] return sfle.ex(("Dat2Dab", "-o", strT, "-i", (strOut if sfle.isempty(strMap) else strMap)))