Ejemplo n.º 1
0
def bridgemapper(istm, ostm, strMap, strCols, strFrom, strTo, ostmLog, iSkip):

    strCols = strCols[1:-1]
    aiCols = [int(s) for s in re.split(r'\s*,\s*', strCols)] if strCols else []

    #Open a blank metadata object and initialize
    pMeta = metadata.open()
    pMeta.set("mapped", False)

    aastrData = []
    setstrIn = set()
    csvr = csv.reader(istm, csv.excel_tab)
    for astrLine in csvr:
        aastrData.append(astrLine)
        pMeta.update_md5sum("\t".join(astrLine))
        if csvr.line_num < iSkip:
            continue
        for iCol in aiCols:
            if iCol < len(astrLine):
                setstrIn.add(astrLine[iCol])
            else:
                sys.stderr.write(
                    " +++ ERROR in GeneMapper +++ Number of requested columns to \
					map is larger than the number of columns in the input data file.\n")
    pMeta.store_checksum()
    hashMap = None
    # Make sure mapping file exists and has nonzero file size
    if strMap and os.path.exists(strMap) and (os.stat(strMap)[6] > 0):
        hashMap = convertGeneIds(setstrIn, strMap, strFrom, strTo)
    else:
        sys.stderr.write(
            "+++ ERROR in GeneMapper +++ Input file does not exist or is empty. \
			Return empty file. \n")

    if hashMap:
        if any(hashMap.values()):
            pMeta.set("mapped", True)
            for iRow in range(iSkip, len(aastrData)):
                astrRow = aastrData[iRow]
                for iCol in aiCols:
                    if iCol < len(astrRow):
                        strTo = hashMap.get(astrRow[iCol])
                        if strTo:
                            astrRow[iCol] = strTo
                        else:
                            astrRow[iCol] = ""
    else:
        sys.stderr.write("+++Error in GeneMapper +++ Empty mapping. \
			Return original file. \n")

    csvw = csv.writer(ostm, csv.excel_tab)
    #make sure that if the mapping is empty for one of the columns, delete the entire row
    for astrLine in aastrData:
        if all(astrLine): csvw.writerow(astrLine)
    if ostmLog:
        pMeta.save_text(ostmLog)
Ejemplo n.º 2
0
def _main():
    args = argp.parse_args()
    iLC = sfle.lc(args.istm.name)

    if (args.strFrom == args.strTo) or (iLC >= args.iMaxLines):
        #if the two gene identifier types are the same, or if the line count exceeds iMaxLines,
        #return the input file

        pAastrData = csv.reader(args.istm, csv.excel_tab)
        csvw = csv.writer(args.ostm, csv.excel_tab)
        pMeta = metadata.open()
        for astrLine in pAastrData:
            csvw.writerow(astrLine)
            pMeta.update_md5sum("\t".join(astrLine))
        pMeta.store_checksum()
        if args.ostmLog:
            pMeta = metadata.open()
            pMeta.set("mapped", True)
            pMeta.save_text(args.ostmLog)
    elif not (args.strMap) or not (os.stat(str(args.strMap))[6]):
        #if there is no map file specified
        pAastrData = csv.reader(args.istm, csv.excel_tab)
        csvw = csv.writer(args.ostm, csv.excel_tab)
        pMeta = metadata.open()
        for astrLine in pAastrData:
            csvw.writerow(astrLine)
            pMeta.update_md5sum("\t".join(astrLine))
        pMeta.store_checksum()
        if args.ostmLog:
            pMeta.set("mapped", False)
            pMeta.save_text(args.ostmLog)
    else:
        #if gene sniffer flag is on, try to guess the best possible gene identifier
        if args.fSniffer:
            args.strFrom = gene_sniffer(args.istm, args.strCols)

        bridgemapper(args.istm, args.ostm, args.strMap, args.strCols,
                     args.strFrom, args.strTo, args.ostmLog, args.iSkip)
Ejemplo n.º 3
0
def makeunique(istm, ostm, strSplit, iCols, iSkip, ostmLog):
    '''
	Splits up compressed elements (e.g. a///b c///d) into their cartesian products (e.g. a c, a d, b c, bd);
	then, gets rid of duplicate elements in the set of unordered tuples (equivalence under permutation).
	User can specify how many lines to skip beforehand, and how many columns are to be considered 
	'''
    aastrMatIn = [x for x in csv.reader(istm, csv.excel_tab)]
    aastrHeaders, aastrDataIn = aastrMatIn[:iSkip], aastrMatIn[iSkip:]

    #open a blank metadata object
    pMeta = metadata.open()
    if strSplit:
        aastrSplit = []
        for astrRow in aastrDataIn:
            astrNames, astrVals = astrRow[:iCols], astrRow[iCols:]
            #Choice: keep rows without values
            if any(astrNames) and reduce(
                    lambda y, z: y or z,
                [x.find(strSplit) != -1 for x in astrNames]):
                astrSplit = [[y.strip() for y in x.split(strSplit)]
                             for x in astrNames]
                aastrSplit += [
                    list(v) + astrVals
                    for v in [x for x in itertools.product(*astrSplit)]
                ]
            else:
                aastrSplit.append(astrRow)
    else:
        aastrSplit = aastrDataIn

    pNames = set([])
    aastrUnique = []
    for astrRow in aastrSplit:
        astrNames, astrVals = astrRow[:iCols], astrRow[iCols:]
        lenNames = len(astrNames)
        if (len(frozenset(astrNames)) == lenNames
                and not (frozenset(astrNames) in pNames)):
            pNames |= {frozenset(astrNames)}
            aastrUnique.append(astrRow)

    pMeta.set("mapped", any(aastrUnique))

    #write output
    csvw = csv.writer(ostm, csv.excel_tab)
    for row in aastrHeaders + aastrUnique:
        csvw.writerow(row)

    #save metadata
    if ostmLog:
        pMeta.save_text(ostmLog)
Ejemplo n.º 4
0
import arepa
import csv
import sys
import metadata

c_strCurated = "curated"

if not (2 <= len(sys.argv[1:]) <= 3):
    raise Exception(
        "usage: pkl2metadata.py <ID.pkl> <per-exp.txt> [per-cond.txt]")

c_fileIDpkl, c_fileExpTable = sys.argv[1:3]
c_fileCondTable = sys.argv[3] if len(sys.argv[1:]) > 2 else None

hashMeta = metadata.open(open(c_fileIDpkl, "r"))


def writeTable(hMeta, astrKeys, outputf, bIter=False):
    hMeta = {k: hMeta[k] for k in astrKeys}
    csvw = csv.writer(open(outputf, "w"), csv.excel_tab)
    if bIter:
        astrHeader = hMeta.get("sample_name") or hMeta.get("")
        _astrKeys = [x for x in astrKeys if hMeta.get(x) != astrHeader]
        csvw.writerow(["sample_name"] + _astrKeys)
        for iSample, strSample in enumerate(astrHeader):
            csvw.writerow( [str(x).replace("\n", " ") for x in [ strSample ] + [hMeta.get(s)[iSample] for s in \
            _astrKeys]])
    else:
        csvw.writerow(astrKeys)
        csvw.writerow(
Ejemplo n.º 5
0
             strMethods, strAuthors, strPMIDs, strTaxAs, strTaxBs, strTypes,
             strDBs, strIDs, strConfs):

    metadatum(pMetadata.taxid, [strTaxAs, strTaxBs], 1)
    metadatum(pMetadata.pmid, [strPMIDs], 1)
    metadatum(pMetadata.type, [strTypes.lower()], 2)
    metadatum(pMetadata.technique, [strMethods], 2)


if len(sys.argv) < 2:
    raise Exception("Usage: c2metadata.py <id> < <mpidbc>")

strTarget = sys.argv[1]
strStatus = sys.argv[2] if len(sys.argv[1:]) > 1 else None
strManual = sys.argv[3] if len(sys.argv[1:]) > 2 else None

pMetadata = metadata.open()
cfile.read(sys.stdin, c_iColumns, strTarget, callback, pMetadata)
if strStatus:
    strMapped, strBool = [
        x for x in csv.reader(open(strStatus), csv.excel_tab)
    ][0]
    fMapped = True if strBool == "True" else False
    pMetadata.set(strMapped, fMapped)

if strManual:
    for strKey, strVal in csv.reader(open(strManual), csv.excel_tab):
        pMetadata.set(strKey, strVal)

pMetadata.save(sys.stdout)