def main(uargs): uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config'])) conf = ConfigObj(uargs['--config'], configspec=get_configspec()) try: conf_args = conf[uargs['--profile']] except KeyError: msg = 'Profile "{}" not found in config file' raise KeyError, msg.format(uargs['--profile']) # selecting config args keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken'] conf_args = [conf_args[x] for x in keys] myAPI = BaseSpaceAPI(*conf_args) # setting user if uargs['--user'] is not None: user = myAPI.getUserById(uargs['--user']) else: user = myAPI.getUserById('current') # all samples for project if uargs['project']: samples = myAPI.getSamplesByProject(uargs['<Id>']) for sample in samples: file_download(myAPI, sample.Id) if uargs['sample']: file_download(myAPI, uargs['<Id>'])
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--profile', default="DEFAULT", help="the .basespacepy.cfg profile to load") parser.add_argument('-d', '--dry', action='store_true', default=False, help="dry run; return size of selected items") parser.add_argument('-f', '--force', action='store_true', default=False, help="force overwrite; otherwise cat counters on new filenames") parser.add_argument('-j', '--project', required=True, nargs="+", help="project to download; can accept multiple values") parser.add_argument('-t', '--type', choices=['b','f','bam','fastq'], default='f', help='type of file to download') args = parser.parse_args() myAPI = BaseSpaceAPI(profile=args.profile, timeout=500) user = myAPI.getUserById('current') qp = QueryParameters.QueryParameters({'Limit':1024}) projects = user.getProjects(myAPI, qp) if args.type in ['b', 'bam']: download = downloadProjectBam elif args.type in ['f', 'fastq']: download = downloadProjectFastq userProjs = stringsToBSObj(projects, args.project) for lostProj in set(args.project) - set([str(x) for x in userProjs]): warning("cannot find " + str(lostProj)) TotalSize = 0 for project in userProjs: TotalSize += download(project , myAPI, args.dry, force=args.force) if len(userProjs) > 1: print(humanFormat(TotalSize) + "\tTotal")
def Main(): args = ParseArg() client_key = args.key client_secret = args.secret token = args.token folder = args.directory BaseSpaceUrl = 'https://api.basespace.illumina.com/' myAPI = BaseSpaceAPI(client_key,client_secret,BaseSpaceUrl,"v1pre3","AA",token) user = myAPI.getUserById('current') print >> sys.stderr, "\nUser name: %s\n"%(str(user)) Projects = myAPI.getProjectByUser() Found = False for p in Projects: if p.Name == args.project: print >>sys.stderr, " Find project %s with ID: %s. "%(p.Name, p.Id) Project = p Found = True break if not Found: print >>sys.stderr, " Could not find project %s, from user %s, please check your token." %(args.project,str(user)) sys.exit(0) Samples=Project.getSamples(myAPI) print >> sys.stderr, "Samples for this project: " + str(Samples) for s in Samples: print >>sys.stderr," Downloading files in sample " + str(s) subfolder=folder+"/"+str(s) if not os.path.exists(subfolder): os.makedirs(subfolder) for f in s.getFiles(myAPI): print >> sys.stderr," "+str(f) f.downloadFile(myAPI,subfolder)
def Main(): args = ParseArg() client_key = args.key client_secret = args.secret token = args.token folder = args.directory BaseSpaceUrl = 'https://api.basespace.illumina.com/' myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, "v1pre3", "AA", token) user = myAPI.getUserById('current') print >> sys.stderr, "\nUser name: %s\n" % (str(user)) Projects = myAPI.getProjectByUser() Found = False for p in Projects: if p.Name == args.project: print >> sys.stderr, " Find project %s with ID: %s. " % (p.Name, p.Id) Project = p Found = True break if not Found: print >> sys.stderr, " Could not find project %s, from user %s, please check your token." % ( args.project, str(user)) sys.exit(0) Samples = Project.getSamples(myAPI) print >> sys.stderr, "Samples for this project: " + str(Samples) for s in Samples: print >> sys.stderr, " Downloading files in sample " + str(s) subfolder = folder + "/" + str(s) if not os.path.exists(subfolder): os.makedirs(subfolder) for f in s.getFiles(myAPI): print >> sys.stderr, " " + str(f) f.downloadFile(myAPI, subfolder)
def download_Project(project_Name, output_folder): # initialize an authentication object using the key and secret from your app # Fill in with your own values ''' client_key = <my key> client_secret = <my secret> AppSessionId = <my appSession id> BaseSpaceUrl = 'https://api.basespace.illumina.com/' version = 'v1pre3' accessToken = <my acceseToken> ''' myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, version, AppSessionId,AccessToken=accessToken) # Retrieve current user user = myAPI.getUserById('current') user=str(user) id_name=user.split(':') #print id_name[0] # Retrieve all the project associated to that user projects=myAPI.getProjectByUser(id_name[0], queryPars=QueryParameters( {'Limit': '100'})) project_found=0 for project in projects: project=str(project) nameProject_id=project.split('-') if str(project_Name) in str(nameProject_id): project_found=1 id_project=nameProject_id[1].split('=') id_project=id_project[1] samples=myAPI.getSamplesByProject(id_project, queryPars=QueryParameters( {'Limit': '100'})) print "There are "+str(len(samples))+" samples in the requested project ("+str(project_Name)+" - ID_PROJECT "+str(id_project)+")" if not os.path.exists(output_folder): os.makedirs(output_folder) print time.ctime()+" START DOWNLOADING" for file in samples: file_out=file.getFiles(myAPI) #print file_out for fastq in file_out: fastq.downloadFile(myAPI,output_folder) print time.ctime()+" FILE "+str(fastq)+" DOWNLOADED" path_file=join(output_folder,str(fastq)) path_S3=join(str(project_Name),str(fastq)) s3_upload(path_file,"bmi-ngs",path_S3) print time.ctime()+" DOWNLOAD COMPLETED" if project_found==0: print "Project Not Found"
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--profile', default="DEFAULT", help="the .basespacepy.cfg profile to load") parser.add_argument('-d', '--dry', action='store_true', default=False, help="dry run; return size of selected items") parser.add_argument( '-f', '--force', action='store_true', default=False, help="force overwrite; otherwise cat counters on new filenames") parser.add_argument( '-r', '--run', default=[], nargs="+", help="run name to download; can accept multiple values") parser.add_argument( '--file', default=[], nargs="+", help= "specific file(s) to pull from each run; can accept multiple values") args = parser.parse_args() myAPI = BaseSpaceAPI(profile=args.profile, timeout=500) user = myAPI.getUserById('current') qp = QueryParameters.QueryParameters({'Limit': 1024}) runs = user.getRuns(myAPI, qp) userRuns = stringsToBSObj(runs, args.run) if not args.run: userRuns = runs for lostRun in set(args.run) - set([str(x) for x in userRuns]): warning("cannot find " + str(lostRun)) TotalSize = 0 userFiles = args.file for run in userRuns: # must create a copy of userFiles or the downloadRun function will strip entries from this instance of the list TotalSize += downloadRun(run, myAPI, args.dry, files=[x for x in userFiles], force=args.force)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--profile', default="DEFAULT", help="the .basespacepy.cfg profile to load") parser.add_argument('-j', '--project', required=True, nargs="+", help="project to download; can accept multiple values") args = parser.parse_args() myAPI = BaseSpaceAPI(profile=args.profile, timeout=500) user = myAPI.getUserById('current') qp = QueryParameters.QueryParameters({'Limit': 1024}) projects = user.getProjects(myAPI, qp) userProjs = stringsToBSObj(projects, args.project) for lostProj in set(args.project) - set([str(x) for x in userProjs]): warning("cannot find " + str(lostProj)) fullSampleMetadata = pd.DataFrame() fullFileMetadata = pd.DataFrame() for project in userProjs: smout, fmout = downloadProjectMetadata(project, myAPI) fullSampleMetadata = fullSampleMetadata.append(smout) fullFileMetadata = fullFileMetadata.append(fmout) thisInstant = str(datetime.datetime.today()).replace(' ', ';') fullSampleMetadata.to_csv('fullSampleMetadata.' + thisInstant + '.txt', sep='\t', header=True, index=False) fullFileMetadata.to_csv('fullFileMetadata.' + thisInstant + '.txt', sep='\t', header=True, index=False)
def upload(clientKey=None, clientSecret=None, accessToken=None, appResultId=None, fileNameRegexesInclude=list(), fileNameRegexesOmit=list(), inputDirectory='\.', dryRun=False, numRetries=3): ''' Creates an App Result and uploads files. TODO Provide an App Result identifier, and optionally regexes to include or omit files based on their names (path not included). Omission takes precedence over inclusion. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param appResultId the BaseSpace App Result identifier :param fileNameRegexesInclude a list of regexes on which to include files based on name :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include) :param inputDirectory the root input directory :param numRetries the number of retries for a single download API call ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 10000 sleepTime = 1.0 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT', clientKey=clientKey, clientSecret=clientSecret, AccessToken=accessToken) # get the current user user = myAPI.getUserById('current') # get the app result appResult = myAPI.getAppResultById(Id=appResultId) appSession = appResult.AppSession print "Uploading files to the App Result: " + str(appResult) # Filter file names based on the include or omit regexes includePatterns = [re.compile(pattern) for pattern in fileNameRegexesInclude] omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit] def includePatternMatch(f): if not includePatterns: return True for pattern in includePatterns: if pattern.match(f): return True return False def omitPatternMatch(f): if not omitPatterns: return False for pattern in omitPatterns: if pattern.match(f): return True return False def keepFile(f): return includePatternMatch(f) and not omitPatternMatch(f) # walk the current directory structure for root, dirs, files in os.walk(inputDirectory): for fileName in files: localPath = os.path.join(root, fileName) directory = root.replace(inputDirectory, "") if AppResults.isBinaryContent(fileName): contentType = 'application/octet-stream' else: contentType = 'text/plain' if keepFile(fileName): print "Uploading file: %s" % localPath if not options.dryRun: retryIdx = 0 retryException = None while retryIdx < numRetries: try: appResult.uploadFile(api=myAPI, localPath=localPath, fileName=fileName, directory=directory, contentType=contentType) except BaseSpaceException.ServerResponseException as e: retryIdx += 1 time.sleep(sleepTime) retryException = e else: break if retryIdx == numRetries: raise retryException print "Upload complete"
import sys, os, glob, logging from argparse import ArgumentParser from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp list_options = qp({'Limit': 1024}) logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S', ) bs = BaseSpaceAPI() user = bs.getUserById('current') logging.info("User Name: %s", user) projects = bs.getProjectByUser(list_options) project_list = [project.Name for project in projects] cli = ArgumentParser() cli.add_argument( 'project', nargs='?', help= 'Which project to download files from. When not specified, list projects instead.' ) cli.add_argument( '--dry-run', '-n', action='store_true',
myAPI = BaseSpaceAPI(profile="DEFAULT") # First, let's grab the genome with id=4 myGenome = myAPI.getGenomeById("4") print "\nThe Genome is " + str(myGenome) print "We can get more information from the genome object" print "Id: " + myGenome.Id print "Href: " + myGenome.Href print "DisplayName: " + myGenome.DisplayName # Get a list of all genomes allGenomes = myAPI.getAvailableGenomes() print "\nGenomes \n" + str(allGenomes) # Let's have a look at the current user user = myAPI.getUserById("current") print "\nThe current user is \n" + str(user) # Now list the projects for this user myProjects = myAPI.getProjectByUser() print "\nThe projects for this user are \n" + str(myProjects) # We can also achieve this by making a call using the 'user instance' myProjects2 = user.getProjects(myAPI) print "\nProjects retrieved from the user instance \n" + str(myProjects2) # List the runs available for the current user runs = user.getRuns(myAPI) print "\nThe runs for this user are \n" + str(runs) # In the same manner we can get a list of accessible user runs
def download(clientKey=None, clientSecret=None, accessToken=None, appResultId=None, fileNameRegexesInclude=list(), fileNameRegexesOmit=list(), outputDirectory='\.', createBsDir=True, force=False, numRetries=3): ''' Downloads App Result files. Provide an App Result identifier, and optionally regexes to include or omit files based on their names (path not included). Omission takes precedence over inclusion. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param appResultId the BaseSpace App Result identifier :param fileNameRegexesInclude a list of regexes on which to include files based on name :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include) :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise :param force use the force: overwrite existing files if true, false otherwise :param numRetries the number of retries for a single download API call ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 10000 sleepTime = 1.0 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') appResult = myAPI.getAppResultById(Id=appResultId) print "Retrieving files from the App Result: " + str(appResult) # Get all the files from the AppResult filesToDownload = appResult.getFiles(myAPI, queryPars=qp({'Limit' : fileLimit})) # Filter file names based on the include or omit regexes includePatterns = [re.compile(pattern) for pattern in fileNameRegexesInclude] omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit] def includePatternMatch(f): if not includePatterns: return True for pattern in includePatterns: if pattern.match(f): return True return False def omitPatternMatch(f): if not omitPatterns: return False for pattern in omitPatterns: if pattern.match(f): return True return False def keepFile(f): return includePatternMatch(f) and not omitPatternMatch(f) filesToDownload = [f for f in filesToDownload if keepFile(str(f))] print "Will download %d files." % len(filesToDownload) for i in range(len(filesToDownload)): appResultFile = filesToDownload[i] print 'Downloading (%d/%d): %s' % ((i+1), len(filesToDownload), str(appResultFile)) print "File Path: %s" % appResultFile.Path if not options.dryRun: outputPath = str(appResultFile.Path) if not createBsDir: outputPath = os.path.basename(outputPath) if os.path.exists(outputPath): if force: print "Overwritting: %s" % outputPath else: print "Skipping existing file: %s" % outputPath continue else: print "Downloading to: %s" % outputPath retryIdx = 0 retryException = None while retryIdx < numRetries: try: appResultFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) except BaseSpaceException.ServerResponseException as e: retryIdx += 1 time.sleep(sleepTime) retryException = e else: break if retryIdx == numRetries: raise retryException print "Download complete."
#################### Storing BaseSpaceApi using python's pickle module ################################# """ It may sometimes be useful to preserve certain api objects across a series of http requests from the same user-session. Here we demonstrate how the Python pickle module may be used to achieve this end. The example will be for an instance of BaseSpaceAPI, but the same technique may be used for BaseSpaceAuth. In fact, a single instance of BaseSpaceAuth would be enough for a single App and could be shared by all http-requests, as the identity of this object is only given by the client_key and client_secret. (There is, of course, no problem in having multiple identical BaseSpaceAuth instances). """ # Get current user user= BSapi.getUserById('current') print user print BSapi #### Here some work goes on # now we wish to store the API object for the next time we get a request in this session # make a file to store the BaseSpaceAPi instance in, for easy identification we will name this by any id that may be used for identifying # the session again. mySessionId = BSapi.appSessionId + '.pickle' f = open(mySessionId,'w') Pickle.dump(BSapi, f) f.close() # Imagine the current request is done, we will simulate this by deleting the api instance BSapi = None
def download(clientKey=None, clientSecret=None, accessToken=None, runId=None, runName=None, outputDirectory='\.', createBsDir=True): ''' Downloads run-level files. Run Id and run name should not be specified together. All files for a given run will be downloaded based on either the unique run ID, or the first run found with matching experiment name. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param runId the BaseSpace run identifier :param runName the BaseSpace run experiment name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 1024 runLimit = 100 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') expName = None if runId: run = myAPI.getRunById(Id=runId) runFiles = Runs.__get_files_to_download(myAPI, run.Id, fileLimit) expName = run.ExperimentName else: runs = myAPI.getAccessibleRunsByUser(qp({'Limit' : runLimit})) for run in runs: runId = run.Id if runName and runName == run.ExperimentName: expName = run.ExperimentName runFiles = Samples.__get_files_to_download(myAPI, runId) if 0 < len(runFiles): break if not expName: if runName: print 'Could not find a run with name: %s' % runName else: print 'Could not find a run for user' sys.exit(1) numFiles = len(runFiles) print "Will download files from %d ." % numFiles i = 0 for runFile in runFiles: outDir = os.path.join(outputDirectory, expName) print 'Downloading (%d/%d): %s' % ((i+1), numFiles, str()) print "BaseSpace File Path: %s" % runFile.Path print "Destination File Path: %s" % os.path.join(outDir, runFile.Name) if not options.dryRun: runFile.downloadFile(myAPI, outDir, createBsDir=createBsDir) i = i + 1 print "Download complete."
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 1024 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: offset = 0 while True: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit, 'Offset' : offset})) if len(myProjects) == 0: break for project in myProjects: projectId = project.Id sys.stderr.write("project.Name: " + str(project.Name) + " projectName: " + str(projectName) + '\n') if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break if 0 < len(sampleToFiles): break offset += projectLimit numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print "Will download files from %d ." % numFiles i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print 'Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile)) print "BaseSpace File Path: %s" % sampleFile.Path print "Sample Id: %s" % sampleId if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print "Download complete."
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-k', '--key', dest='key', required=True, type=str, help='specify client key') parser.add_argument('-s', '--secret', dest='secret', required=True, type=str, help='specify client secret') parser.add_argument('-t', '--token', dest='token', required=True, type=str, help='specify access token') parser.add_argument('-r', '--run', dest='run', required=False, type=str, help='specify the run name to download') parser.add_argument('-d', '--directory', dest='directory', required=False, type=str, default='./', help='specify download directory. \ The default is the current directory') parser.add_argument('--offset', dest='offset', required=False, type=int, default=0, help='specify the starting offset to read. \ The default is 0') parser.add_argument('-n', '--num_items', dest='num_items', required=False, type=int, default=10, help='specify the maximum number of items to return \ (max 1024). The default is 10') parser.add_argument('-e', '--excluded_path', dest='excluded_path', required=False, type=str, default=None, help='specify files to skip (comma separated). \ If file paths contain \ this(ese) strings, it will be skipped.)\ The default is None') args = parser.parse_args() client_key = args.key client_secret = args.secret client_token = args.token run_name = args.run download_directory = args.directory if args.excluded_path: excluded_file_path_strings = args.excluded_path.split(',') else: excluded_file_path_strings = [] num_items = args.num_items offset = args.offset base_space_url = 'https://api.basespace.illumina.com/' my_bs_api = BaseSpaceAPI(client_key, client_secret, base_space_url, 'v1pre3', '', client_token) user = my_bs_api.getUserById('current') print('User: {}'.format(str(user)), sep='', file=sys.stderr) runs = user.getRuns(my_bs_api, queryPars=qp({'Limit': num_items})) print('Run(s): {}'.format(runs), sep='', file=sys.stderr) if run_name: run = runs[[index for index, value in enumerate(runs) if value.Name == run_name][0]] print('Total size ({}): {} GB'.format(run.Name, run.TotalSize / 1000000000), sep='', file=sys.stderr) print('Offset: {}'.format(offset), sep='', file=sys.stderr) print('Number of items to return: {}'.format(num_items), sep='', file=sys.stderr) for f in run.getFiles(my_bs_api, queryPars=qp({'Limit': num_items, 'Offset': offset})): file_path = f.Path if any([i in file_path for i in excluded_file_path_strings]): print('Skipping file: {}'.format(file_path), file=sys.stderr) else: print('Downloading file: {}'.format(file_path), '...', sep=' ', end='', file=sys.stderr) try: f.downloadFile(my_bs_api, download_directory, createBsDir=True) etag = f.getFileS3metadata(my_bs_api)['etag'] file_path = download_directory + '/' + file_path if len(etag) == 32: f_md5 = md5_hash(file_path) if f_md5 == etag: print(' done (md5 correct)!', file=sys.stderr) else: print(' error (md5 incorrect)!', f.Id, etag, f_md5, file=sys.stderr) else: if f.Size == os.path.getsize(file_path): print(' done (file size correct)!', file=sys.stderr) else: print(' error (file size incorrect)!', f.Id, etag, file=sys.stderr) except Exception as e: print(' error ({})!!'.format(e), file=sys.stderr)
def download(clientKey=None, clientSecret=None, accessToken=None, appResultId=None, fileNameRegexesInclude=list(), fileNameRegexesOmit=list(), outputDirectory='\.', createBsDir=True, force=False, numRetries=3): ''' Downloads App Result files. Provide an App Result identifier, and optionally regexes to include or omit files based on their names (path not included). Omission takes precedence over inclusion. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param appResultId the BaseSpace App Result identifier :param fileNameRegexesInclude a list of regexes on which to include files based on name :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include) :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise :param force use the force: overwrite existing files if true, false otherwise :param numRetries the number of retries for a single download API call ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 10000 sleepTime = 1.0 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') appResult = myAPI.getAppResultById(Id=appResultId) print "Retrieving files from the App Result: " + str(appResult) # Get all the files from the AppResult filesToDownload = appResult.getFiles(myAPI, queryPars=qp({'Limit': fileLimit})) # Filter file names based on the include or omit regexes includePatterns = [ re.compile(pattern) for pattern in fileNameRegexesInclude ] omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit] def includePatternMatch(f): if not includePatterns: return True for pattern in includePatterns: if pattern.match(f): return True return False def omitPatternMatch(f): if not omitPatterns: return False for pattern in omitPatterns: if pattern.match(f): return True return False def keepFile(f): return includePatternMatch(f) and not omitPatternMatch(f) filesToDownload = [f for f in filesToDownload if keepFile(str(f))] print "Will download %d files." % len(filesToDownload) for i in range(len(filesToDownload)): appResultFile = filesToDownload[i] print 'Downloading (%d/%d): %s' % ( (i + 1), len(filesToDownload), str(appResultFile)) print "File Path: %s" % appResultFile.Path if not options.dryRun: outputPath = str(appResultFile.Path) if not createBsDir: outputPath = os.path.basename(outputPath) if os.path.exists(outputPath): if force: print "Overwritting: %s" % outputPath else: print "Skipping existing file: %s" % outputPath continue else: print "Downloading to: %s" % outputPath retryIdx = 0 retryException = None while retryIdx < numRetries: try: appResultFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) except BaseSpaceException.ServerResponseException as e: retryIdx += 1 time.sleep(sleepTime) retryException = e else: break if retryIdx == numRetries: raise retryException print "Download complete."
import sys, os, glob, logging from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp listOptions = qp({'Limit': 1024}) logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S') myAPI = BaseSpaceAPI() p = sys.argv[1] user = myAPI.getUserById('current') logging.info("User Name: %s" % str(user)) projects = myAPI.getProjectByUser(listOptions) project_list = [project.Name for project in projects] try: idx = project_list.index(sys.argv[1]) project = projects[idx] except ValueError: message = '"%s" is not in your projects. Available projects are:\n%s' % \ (sys.argv[1], '\n'.join(project_list)) logging.error(message) sys.exit(1) downloaded = glob.glob('*fastq.gz') # get already downloaded fastq logging.info("Retrieving samples from project %s" % sys.argv[1]) samples = project.getSamples(myAPI, listOptions)
def main(uargs): uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config'])) conf = ConfigObj(uargs['--config'], configspec=get_configspec()) try: conf_args = conf[uargs['--profile']] except KeyError: msg = 'Profile "{}" not found in config file' raise KeyError, msg.format(uargs['--profile']) # selecting config args keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken'] conf_args = [conf_args[x] for x in keys] myAPI = BaseSpaceAPI(*conf_args) # setting user if uargs['--user'] is not None: user = myAPI.getUserById(uargs['--user']) else: user = myAPI.getUserById('current') # user projects projects = myAPI.getProjectByUser() print "## The projects for this user are:" project_headers = ['ProjectID','Name','UserOwnedBy','DateCreated'] sample_headers = ['SampleID','NumReadsRaw','NumReadsPF','IsPairedEnd','Status'] print '\t'.join(project_headers + sample_headers) for project in projects: project_data = [project.Id, project.Name, project.UserOwnedBy, project.DateCreated] samples = myAPI.getSamplesByProject(project.Id) for sample in samples: sample_data = [sample.Id, sample.NumReadsRaw, sample.NumReadsPF, sample.IsPairedEnd, sample.Status] print '\t'.join([str(x) for x in project_data + sample_data]) #files = myAPI.getSampleFilesById(sample.Id) #for f in files: #fo = myAPI.getFileById(f.Id) # myAPI.fileDownload(f.Id, '.') # user runs print "## The runs for this user are:" print '\t'.join(['RunID','ExperimentName','UserOwnedBy', 'DateCreated', 'Status']) runs = user.getRuns(myAPI) for run in runs: data = [run.Id, run.ExperimentName, run.UserOwnedBy, run.DateCreated, run.Status] print '\t'.join([str(x) for x in data])
myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, version, AppSessionId,AccessToken=accessToken) # First, let's grab the genome with id=4 myGenome = myAPI.getGenomeById('4') print "\nThe Genome is " + str(myGenome) print "We can get more information from the genome object" print 'Id: ' + myGenome.Id print 'Href: ' + myGenome.Href print 'DisplayName: ' + myGenome.DisplayName # Get a list of all genomes allGenomes = myAPI.getAvailableGenomes() print "\nGenomes \n" + str(allGenomes) # Let's have a look at the current user user = myAPI.getUserById('current') print "\nThe current user is \n" + str(user) # Now list the projects for this user myProjects = myAPI.getProjectByUser('current') print "\nThe projects for this user are \n" + str(myProjects) # We can also achieve this by making a call using the 'user instance' myProjects2 = user.getProjects(myAPI) print "\nProjects retrieved from the user instance \n" + str(myProjects2) # List the runs available for the current user runs = user.getRuns(myAPI) print "\nThe runs for this user are \n" + str(runs) # In the same manner we can get a list of accessible user runs
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 100 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) for project in myProjects: projectId = project.Id if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print("will download files from %d ." % numFiles) i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile))) print("BaseSpace File Path: %s" % sampleFile.Path) print("Sample Id: %s" % sampleId) if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print("FASTQ file downloading complete.")
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr("Output directory not specified; using current directory ({})".format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr('Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend(_select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr('Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend(_select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend(_select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr('Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format(len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format(output_directory)) if recreate_basespace_dir_tree: print_stderr("Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format(output_directory)) else: print_stderr("Error: no files found to download.")
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr( "Output directory not specified; using current directory ({})". format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr( 'Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get( "apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend( _select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr( 'Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend( _select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend( _select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr( 'Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend( _select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format( len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format( output_directory)) if recreate_basespace_dir_tree: print_stderr( "Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format( i + 1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format( output_directory)) else: print_stderr("Error: no files found to download.")