def main():
    import argparse
    import sys

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--dryrun',
        action="store_true",
        default=False,
        help="Run without making changes [default: %(default)s]")

    parser.add_argument('--storetable', action="store_true", default=False)

    parser.add_argument('--projectId', type=str)

    args = parser.parse_args()
    syn = synapseclient.login(silent=True)

    annots = PCBCAnnotations.PCBCAllAnnotations(syn)
    a = PCBCAnnotations.PCBCAllAnnotationTableUpdate(syn, annots)

    my_synapse_id = None

    if not args.dryrun:
        my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110',
                                                         syn=syn)

    a.update_annots_synapse(executed=my_synapse_id, dryrun=args.dryrun)

    if args.storetable:
        tbl = a.update_annots_table_synapse(projectId=args.projectId,
                                            dryrun=args.dryrun)
def main():
    import argparse
    import sys
    
    parser = argparse.ArgumentParser()

    parser.add_argument('--dryrun', action="store_true",
                        default=False,
                        help="Run without making changes [default: %(default)s]")

    parser.add_argument('--storetable', action="store_true", default=False)
    
    parser.add_argument('--projectId', type=str)


    args = parser.parse_args()
    syn = synapseclient.login(silent=True)
    
    annots = PCBCAnnotations.PCBCAnnotations(syn)
    a = PCBCAnnotations.PCBCAnnotationTableUpdate(syn, annots)
    
    my_synapse_id = None
    
    if not args.dryrun:
        my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110', syn=syn)
    
    a.update_annots_synapse(executed=my_synapse_id, dryrun=args.dryrun)
    
    if args.storetable:
        tbl = a.update_annots_table_synapse(projectId=args.projectId, dryrun=args.dryrun)
    if isUptodate(name, files, args.parentId):
        print " is up to date"
        continue
    if list(set(filteredMeta.fileType))[0] in ["seg", "bed"]:
        dfs = mp.map(lambda f: pd.read_csv(f.path, sep="\t"), files)
        df = pd.concat(dfs, axis=0)
        df.to_csv(args.filepath + name, sep="\t", index=False)
        nSamples = len(set(df.Sample))
        nFeatures = 0
    else:  # All other fileTypes
        dfs = mp.map(lambda f: pd.read_csv(f.path, sep="\t", index_col=0), files)
        df = pd.concat(dfs, axis=1)
        df.to_csv(args.filepath + name, sep="\t")
        nFeatures, nSamples = df.shape
    print "Created", name, df.shape
    # Add file to Synapse
    entity = synapseclient.File(args.filepath + name, parentId=args.parentId)
    # Set annotations
    entity.platform = platform
    entity.dataSubType = dataSubType
    entity.acronym = "PANCAN"
    entity.dataProducer = "TCGA"
    entity.disease = "cancer"
    entity.center = list(set(filteredMeta.center))
    entity.centerTitle = list(set(filteredMeta.centerTitle))
    entity.fileType = list(set(filteredMeta.fileType))
    entity.platformTitle = list(set(filteredMeta.platformTitle))
    entity.nSamples = nSamples
    entity.nFeatures = nFeatures
    entity = syn.store(entity, used=files, executed=thisCodeInSynapse(parentId=args.parentId))
    if list(set(filteredMeta.fileType))[0] in ['seg', 'bed']:
        dfs = mp.map(lambda f: pd.read_csv(f.path, sep='\t'), files)
        df = pd.concat(dfs, axis=0)
        df.to_csv(args.filepath + name, sep='\t', index=False)
        nSamples = len(set(df.Sample))
        nFeatures = 0
    else:  #All other fileTypes
        dfs = mp.map(lambda f: pd.read_csv(f.path, sep='\t', index_col=0),
                     files)
        df = pd.concat(dfs, axis=1)
        df.to_csv(args.filepath + name, sep='\t')
        nFeatures, nSamples = df.shape
    print 'Created', name, df.shape
    #Add file to Synapse
    entity = synapseclient.File(args.filepath + name, parentId=args.parentId)
    #Set annotations
    entity.platform = platform
    entity.dataSubType = dataSubType
    entity.acronym = 'PANCAN'
    entity.dataProducer = 'TCGA'
    entity.disease = 'cancer'
    entity.center = list(set(filteredMeta.center))
    entity.centerTitle = list(set(filteredMeta.centerTitle))
    entity.fileType = list(set(filteredMeta.fileType))
    entity.platformTitle = list(set(filteredMeta.platformTitle))
    entity.nSamples = nSamples
    entity.nFeatures = nFeatures
    entity = syn.store(entity,
                       used=files,
                       executed=thisCodeInSynapse(parentId=args.parentId))
Esempio n. 5
0
    to a specific platform."""
    old_whitelist  = syn.get(WHITELISTID, version=version)
    whitelist = pd.read_csv(whitelistEntity.path, sep='\t')
    oldToRemove = set(whitelist.ix[whitelist.Do_not_use & (whitelist.platform==platform), 
                                'aliquot_barcode'])
    return oldToRemove
    

#mp = Pool(8)
syn = synapseclient.login(silent=True)

whitelistEntity = syn.get(WHITELISTID)
whitelist = pd.read_csv(whitelistEntity.path, sep='\t')
inputFiles = synapseHelpers.query2df(syn.chunkedQuery(QUERY_STR))

code=synapseHelpers.thisCodeInSynapse(parentId='syn1774100')
for i, row in inputFiles.iterrows():
    print row.id, row['name'],
    inputFileEntity = syn.get(row.id)
    outFileName = row['name'][:-4]+'_whitelisted'+row['name'][-4:]
    
    toRemove = set(whitelist.ix[whitelist.Do_not_use & (whitelist.platform == row['platform']), 
                                'aliquot_barcode'])

    if isUptodate(outFileName, [inputFileEntity], toRemove, row['platform']):
        print ' is up to date - but update provenance'
        e = syn.get(getFileIdFromName(outFileName), downloadFile=False)
        syn.store(e, used=[inputFileEntity, whitelistEntity], executed=code)
        continue
    if row.fileType =='bed5':  #Do the filtering for bed files
        df = pd.read_csv(inputFileEntity.path, sep='\t')