def Main(): args = ParseArg() client_key = args.key client_secret = args.secret token = args.token folder = args.directory BaseSpaceUrl = 'https://api.basespace.illumina.com/' myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, "v1pre3", "AA", token) user = myAPI.getUserById('current') print >> sys.stderr, "\nUser name: %s\n" % (str(user)) Projects = myAPI.getProjectByUser() Found = False for p in Projects: if p.Name == args.project: print >> sys.stderr, " Find project %s with ID: %s. " % (p.Name, p.Id) Project = p Found = True break if not Found: print >> sys.stderr, " Could not find project %s, from user %s, please check your token." % ( args.project, str(user)) sys.exit(0) Samples = Project.getSamples(myAPI) print >> sys.stderr, "Samples for this project: " + str(Samples) for s in Samples: print >> sys.stderr, " Downloading files in sample " + str(s) subfolder = folder + "/" + str(s) if not os.path.exists(subfolder): os.makedirs(subfolder) for f in s.getFiles(myAPI): print >> sys.stderr, " " + str(f) f.downloadFile(myAPI, subfolder)
def Main(): args = ParseArg() client_key = args.key client_secret = args.secret token = args.token folder = args.directory BaseSpaceUrl = 'https://api.basespace.illumina.com/' myAPI = BaseSpaceAPI(client_key,client_secret,BaseSpaceUrl,"v1pre3","AA",token) user = myAPI.getUserById('current') print >> sys.stderr, "\nUser name: %s\n"%(str(user)) Projects = myAPI.getProjectByUser() Found = False for p in Projects: if p.Name == args.project: print >>sys.stderr, " Find project %s with ID: %s. "%(p.Name, p.Id) Project = p Found = True break if not Found: print >>sys.stderr, " Could not find project %s, from user %s, please check your token." %(args.project,str(user)) sys.exit(0) Samples=Project.getSamples(myAPI) print >> sys.stderr, "Samples for this project: " + str(Samples) for s in Samples: print >>sys.stderr," Downloading files in sample " + str(s) subfolder=folder+"/"+str(s) if not os.path.exists(subfolder): os.makedirs(subfolder) for f in s.getFiles(myAPI): print >> sys.stderr," "+str(f) f.downloadFile(myAPI,subfolder)
def download_Project(project_Name, output_folder): # initialize an authentication object using the key and secret from your app # Fill in with your own values ''' client_key = <my key> client_secret = <my secret> AppSessionId = <my appSession id> BaseSpaceUrl = 'https://api.basespace.illumina.com/' version = 'v1pre3' accessToken = <my acceseToken> ''' myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, version, AppSessionId,AccessToken=accessToken) # Retrieve current user user = myAPI.getUserById('current') user=str(user) id_name=user.split(':') #print id_name[0] # Retrieve all the project associated to that user projects=myAPI.getProjectByUser(id_name[0], queryPars=QueryParameters( {'Limit': '100'})) project_found=0 for project in projects: project=str(project) nameProject_id=project.split('-') if str(project_Name) in str(nameProject_id): project_found=1 id_project=nameProject_id[1].split('=') id_project=id_project[1] samples=myAPI.getSamplesByProject(id_project, queryPars=QueryParameters( {'Limit': '100'})) print "There are "+str(len(samples))+" samples in the requested project ("+str(project_Name)+" - ID_PROJECT "+str(id_project)+")" if not os.path.exists(output_folder): os.makedirs(output_folder) print time.ctime()+" START DOWNLOADING" for file in samples: file_out=file.getFiles(myAPI) #print file_out for fastq in file_out: fastq.downloadFile(myAPI,output_folder) print time.ctime()+" FILE "+str(fastq)+" DOWNLOADED" path_file=join(output_folder,str(fastq)) path_S3=join(str(project_Name),str(fastq)) s3_upload(path_file,"bmi-ngs",path_S3) print time.ctime()+" DOWNLOAD COMPLETED" if project_found==0: print "Project Not Found"
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr("Output directory not specified; using current directory ({})".format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr('Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend(_select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr('Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend(_select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend(_select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr('Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format(len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format(output_directory)) if recreate_basespace_dir_tree: print_stderr("Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format(output_directory)) else: print_stderr("Error: no files found to download.")
# First, let's grab the genome with id=4 myGenome = myAPI.getGenomeById('4') print "\nThe Genome is " + str(myGenome) print "We can get more information from the genome object" print 'Id: ' + myGenome.Id print 'Href: ' + myGenome.Href print 'DisplayName: ' + myGenome.DisplayName # Get a list of all genomes allGenomes = myAPI.getAvailableGenomes() print "\nGenomes \n" + str(allGenomes) # Let's have a look at the current user user = myAPI.getUserById('current') print "\nThe current user is \n" + str(user) # Now list the projects for this user myProjects = myAPI.getProjectByUser('current') print "\nThe projects for this user are \n" + str(myProjects) # We can also achieve this by making a call using the 'user instance' myProjects2 = user.getProjects(myAPI) print "\nProjects retrieved from the user instance \n" + str(myProjects2) # List the runs available for the current user runs = user.getRuns(myAPI) print "\nThe runs for this user are \n" + str(runs) # In the same manner we can get a list of accessible user runs runs = user.getRuns(myAPI) print "\nRuns retrieved from user instance \n" + str(runs)
import sys, os, glob, logging from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp listOptions = qp({'Limit': 1024}) logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S') myAPI = BaseSpaceAPI() p = sys.argv[1] user = myAPI.getUserById('current') logging.info("User Name: %s" % str(user)) projects = myAPI.getProjectByUser(listOptions) project_list = [project.Name for project in projects] try: idx = project_list.index(sys.argv[1]) project = projects[idx] except ValueError: message = '"%s" is not in your projects. Available projects are:\n%s' % \ (sys.argv[1], '\n'.join(project_list)) logging.error(message) sys.exit(1) downloaded = glob.glob('*fastq.gz') # get already downloaded fastq logging.info("Retrieving samples from project %s" % sys.argv[1]) samples = project.getSamples(myAPI, listOptions)
from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp list_options = qp({'Limit': 1024}) logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S', ) bs = BaseSpaceAPI() user = bs.getUserById('current') logging.info("User Name: %s", user) projects = bs.getProjectByUser(list_options) project_list = [project.Name for project in projects] cli = ArgumentParser() cli.add_argument( 'project', nargs='?', help= 'Which project to download files from. When not specified, list projects instead.' ) cli.add_argument( '--dry-run', '-n', action='store_true', help='Only show which files would be downloaded without downloading them.') cli.add_argument('--dir',
myGenome = myAPI.getGenomeById("4") print "\nThe Genome is " + str(myGenome) print "We can get more information from the genome object" print "Id: " + myGenome.Id print "Href: " + myGenome.Href print "DisplayName: " + myGenome.DisplayName # Get a list of all genomes allGenomes = myAPI.getAvailableGenomes() print "\nGenomes \n" + str(allGenomes) # Let's have a look at the current user user = myAPI.getUserById("current") print "\nThe current user is \n" + str(user) # Now list the projects for this user myProjects = myAPI.getProjectByUser() print "\nThe projects for this user are \n" + str(myProjects) # We can also achieve this by making a call using the 'user instance' myProjects2 = user.getProjects(myAPI) print "\nProjects retrieved from the user instance \n" + str(myProjects2) # List the runs available for the current user runs = user.getRuns(myAPI) print "\nThe runs for this user are \n" + str(runs) # In the same manner we can get a list of accessible user runs runs = user.getRuns(myAPI) print "\nRuns retrieved from user instance \n" + str(runs)
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr( "Output directory not specified; using current directory ({})". format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr( 'Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get( "apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend( _select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr( 'Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend( _select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend( _select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr( 'Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend( _select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format( len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format( output_directory)) if recreate_basespace_dir_tree: print_stderr( "Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format( i + 1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format( output_directory)) else: print_stderr("Error: no files found to download.")
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 100 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) for project in myProjects: projectId = project.Id if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print("will download files from %d ." % numFiles) i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile))) print("BaseSpace File Path: %s" % sampleFile.Path) print("Sample Id: %s" % sampleId) if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print("FASTQ file downloading complete.")
# If you're not using a config file, fill in you app's credentials here: clientKey = "" clientSecret = "" appSessionId = "" apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' # First we will initialize a BaseSpace API object using our app information and the appSessionId if clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId) else: myAPI = BaseSpaceAPI(profile='DEFAULT') user = myAPI.getUserById('current') myProjects = myAPI.getProjectByUser('current') # Let's list all the AppResults and samples for these projects for singleProject in myProjects: print "# " + str(singleProject) appResults = singleProject.getAppResults(myAPI) print " The App results for project " + str( singleProject) + " are \n\t" + str(appResults) samples = singleProject.getSamples(myAPI) print " The samples for project " + str( singleProject) + " are \n\t" + str(samples) # ## we'll take a further look at the files belonging to the sample and ##analyses from the last project in the loop above for a in appResults: print "# " + a.Id
def main(uargs): uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config'])) conf = ConfigObj(uargs['--config'], configspec=get_configspec()) try: conf_args = conf[uargs['--profile']] except KeyError: msg = 'Profile "{}" not found in config file' raise KeyError, msg.format(uargs['--profile']) # selecting config args keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken'] conf_args = [conf_args[x] for x in keys] myAPI = BaseSpaceAPI(*conf_args) # setting user if uargs['--user'] is not None: user = myAPI.getUserById(uargs['--user']) else: user = myAPI.getUserById('current') # user projects projects = myAPI.getProjectByUser() print "## The projects for this user are:" project_headers = ['ProjectID','Name','UserOwnedBy','DateCreated'] sample_headers = ['SampleID','NumReadsRaw','NumReadsPF','IsPairedEnd','Status'] print '\t'.join(project_headers + sample_headers) for project in projects: project_data = [project.Id, project.Name, project.UserOwnedBy, project.DateCreated] samples = myAPI.getSamplesByProject(project.Id) for sample in samples: sample_data = [sample.Id, sample.NumReadsRaw, sample.NumReadsPF, sample.IsPairedEnd, sample.Status] print '\t'.join([str(x) for x in project_data + sample_data]) #files = myAPI.getSampleFilesById(sample.Id) #for f in files: #fo = myAPI.getFileById(f.Id) # myAPI.fileDownload(f.Id, '.') # user runs print "## The runs for this user are:" print '\t'.join(['RunID','ExperimentName','UserOwnedBy', 'DateCreated', 'Status']) runs = user.getRuns(myAPI) for run in runs: data = [run.Id, run.ExperimentName, run.UserOwnedBy, run.DateCreated, run.Status] print '\t'.join([str(x) for x in data])
myGenome = myAPI.getGenomeById('4') print("\nThe Genome is " + str(myGenome)) print("We can get more information from the genome object") print('Id: ' + myGenome.Id) print('Href: ' + myGenome.Href) print('DisplayName: ' + myGenome.DisplayName) # Get a list of all genomes allGenomes = myAPI.getAvailableGenomes() print("\nGenomes \n" + str(allGenomes)) # Let's have a look at the current user user = myAPI.getUserById('current') print("\nThe current user is \n" + str(user)) # Now list the projects for this user myProjects = myAPI.getProjectByUser() print("\nThe projects for this user are \n" + str(myProjects)) # We can also achieve this by making a call using the 'user instance' myProjects2 = user.getProjects(myAPI) print("\nProjects retrieved from the user instance \n" + str(myProjects2)) # List the runs available for the current user runs = user.getRuns(myAPI) print("\nThe runs for this user are \n" + str(runs)) # In the same manner we can get a list of accessible user runs runs = user.getRuns(myAPI) print("\nRuns retrieved from user instance \n" + str(runs))
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 1024 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: offset = 0 while True: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit, 'Offset' : offset})) if len(myProjects) == 0: break for project in myProjects: projectId = project.Id sys.stderr.write("project.Name: " + str(project.Name) + " projectName: " + str(projectName) + '\n') if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break if 0 < len(sampleToFiles): break offset += projectLimit numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print "Will download files from %d ." % numFiles i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print 'Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile)) print "BaseSpace File Path: %s" % sampleFile.Path print "Sample Id: %s" % sampleId if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print "Download complete."
class BaseSpace(object): def __init__(self, project_id=None, project_name=None, get_all_projects=False): super(BaseSpace, self).__init__() # BaseSpace credentials creds = self._get_credentials() self.client_key = creds['client_id'] self.client_secret = creds['client_secret'] self.access_token = creds['access_token'] self.version = creds['version'] self.api_server = creds['api_server'] self.api = BaseSpaceAPI(self.client_key, self.client_secret, self.api_server, self.version, AccessToken=self.access_token) self.params = qp(pars={'Limit': 1024, 'SortDir': 'Desc'}) if project_id is not None: self.project_id = project_id self.project_name = None elif project_name is not None: self.project_name = project_name self.project_id = self._get_project_id_from_name(project_name) else: self.project_id = None self.project_name = None # self.project_id, self.project_name = self._user_selected_project_id() self._runs = None @property def runs(self): if self._runs is None: self._runs = self.api.getAccessibleRunsByUser( queryPars=self.params) return self._runs def _get_credentials(self): # BaseSpace credentials file should be in JSON format cred_file = os.path.expanduser('~/.abstar/basespace_credentials') cred_handle = open(cred_file, 'r') return json.load(cred_handle) def _get_project_id_from_name(self): projects = self.api.getProjectByUser(queryPars=self.params) for project in projects: name = project.Name.encode('ascii', 'ignore') if name == self.project_name: return project.Id print('No projects matched the given project name ({})'.format(name)) sys.exit(1) def _user_selected_project_id(self): projects = self.api.getProjectByUser(queryPars=self.params) self.print_basespace_project() offset = 0 while True: for i, project in enumerate(projects[offset * 25:(offset * 25) + 25]): project_name = project.Name.encode('ascii', 'ignore') print('[ {} ] {}'.format(i + (offset * 25), project_name)) print('') project_index = raw_input( "Select the project number (or 'next' to see more projects): ") try: project_index = int(project_index) return projects[project_index].Id, projects[ project_index].Name.encode('ascii', 'ignore') except: offset += 1 return projects[project_index].Id, projects[project_index].Name.encode( 'ascii', 'ignore') def _get_projects(self, start=0): projects = self.api.getProjectByUser(queryPars=self.params) self.print_basespace_project() for i, project in enumerate(projects[:25]): project_name = project.Name.encode('ascii', 'ignore') print('[ {} ] {}'.format(i, project_name)) print('') return projects def _get_samples(self, project_id): samples = [] offset = 0 while True: query_params = qp(pars={ 'Limit': 1024, 'SortDir': 'Asc', 'Offset': offset * 1024 }) s = self.api.getSamplesByProject(self.project_id, queryPars=query_params) if not s: break samples.extend(s) offset += 1 return samples def _get_files(self): files = [] samples = self._get_samples(self.project_id) for sample in samples: files.extend( self.api.getFilesBySample(sample.Id, queryPars=self.params)) return files def download(self, direc): if all([self.project_id is None, self.project_name is None]): self.project_id, self.project_name = self._user_selected_project_id( ) files = self._get_files() self.print_download_info(files) start = time.time() for i, f in enumerate(files): # self.log.write('[ {} ] {}\n'.format(i, str(f))) logger.info('[ {} ] {}'.format(i, str(f))) f.downloadFile(self.api, direc) end = time.time() self.print_completed_download_info(start, end) return len(files) def print_basespace_project(self): print('') print('') print('========================================') print('BaseSpace Project Selection') print('========================================') print('') def print_download_info(self, files): logger.info('') logger.info('') logger.info('========================================') logger.info('Downloading files from BaseSpace') logger.info('========================================') logger.info('') logger.info('Identified {0} files for download.'.format(len(files))) logger.info('') def print_completed_download_info(self, start, end): logger.info('') logger.info('Download completed in {0} seconds'.format(end - start))