def __get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit=1024, sampleFileLmit=1024): sampleToFiles = {} samples = myAPI.getSamplesByProject(Id=projectId, queryPars=qp({'Limit' : sampleLimit})) for sample in samples: if None != sampleId and sampleId != sample.Id: continue elif None != sampleName and sampleName != sample.Name: continue sampleFiles = myAPI.getSampleFilesById(Id=sample.Id, queryPars=qp({'Limit' : sampleFileLmit})) sampleToFiles[sample.Id] = sampleFiles return sampleToFiles
def getFiles(self,api, myQp={}): ''' Returns a list of File objects :param api: A BaseSpaceAPI instance :param myQp: Query parameters to sort and filter the file list by. ''' self.isInit() return api.getFilesBySample(self.Id,queryPars=qp(myQp)) self.Name = None # str self.HrefFiles = None # str self.DateCreated = None # datetime self.SampleNumber = None # int self.Id = None # str self.Href = None # str self.UserOwnedBy = None # UserCompact self.ExperimentName = None # str self.Run = None # RunCompact self.HrefGenome = None # str self.IsPairedEnd = None # int self.Read1 = None # int self.Read2 = None # int self.NumReadsRaw = None # int self.NumReadsPF = None # int self.References = None # dict self.SampleId = None # str self.Status = None # str self.StatusSummary = None # str
def __init__(self, project_id=None, project_name=None, get_all_projects=False): super(BaseSpace, self).__init__() # BaseSpace credentials creds = self._get_credentials() self.client_key = creds['client_id'] self.client_secret = creds['client_secret'] self.access_token = creds['access_token'] self.version = creds['version'] self.api_server = creds['api_server'] self.api = BaseSpaceAPI(self.client_key, self.client_secret, self.api_server, self.version, AccessToken=self.access_token) self.params = qp(pars={'Limit': 1024, 'SortDir': 'Desc'}) if project_id is not None: self.project_id = project_id self.project_name = None elif project_name is not None: self.project_name = project_name self.project_id = self._get_project_id_from_name(project_name) else: self.project_id = None self.project_name = None # self.project_id, self.project_name = self._user_selected_project_id() self._runs = None
def getAppResults(self,api,myQp={},statuses=[]): ''' Returns a list of AppResult objects. :param api: An instance of BaseSpaceAPI :param statuses: An optional list of statuses ''' self.isInit() return api.getAppResultsByProject(self.Id, queryPars=qp(myQp),statuses=statuses)
def getFiles(self,api,myQp={}): ''' Returns a list of file objects :param api: An instance of BaseSpaceAPI :param myQp: (Optional) QueryParameters for sorting and filtering the file list ''' self.isInit() return api.getAppResultFiles(self.Id,queryPars=qp(myQp))
def _get_samples(self, project_id): samples = [] offset = 0 while True: query_params = qp(pars={'Limit': 1024, 'SortDir': 'Asc', 'Offset': offset * 1024}) s = self.api.getSamplesByProject(project_id, queryPars=query_params) if not s: break samples.extend(s) offset += 1 return samples
def getSamples(self, api, queryPars=None): ''' Returns a list of Sample objects associated with the Run :param api: An instance of BaseSpaceAPI :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' self.isInit() if queryPars is None: queryPars = qp() return api.getRunSamplesById(self.Id, queryPars=queryPars)
def getFiles(self, api, queryPars=None): ''' Returns a list of file objects :param api: An instance of BaseSpaceAPI :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' self.isInit() if queryPars is None: queryPars = qp() return api.getAppResultFiles(self.Id, queryPars=queryPars)
def getAppSessionInputsById(self, Id, queryPars=None): ''' Returns a dictionary of input properties from the provided AppSessions, keyed by input Name ''' if queryPars is None: queryPars = qp() props = self.getAppSessionPropertiesById(Id, queryPars) inputs = {} for prop in props.Items: match = re.search("^Input\.(.+)", prop.Name) if match != None: inputs[match.group(1)] = prop return inputs
def getAppSessionPropertiesById(self, Id, queryPars=None): ''' Returns the Properties of an AppSession :param Id: The AppSessionId ''' if queryPars is None: queryPars = qp() resourcePath = '/appsessions/{Id}/properties' resourcePath = resourcePath.replace('{Id}',Id) method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(PropertiesResponse.PropertiesResponse, resourcePath, method, queryParams, headerParams, verbose=0)
def getAvailableGenomes(self, queryPars=qp()): """ Returns a list of all available genomes :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering """ # Parse inputs resourcePath = "/genomes" resourcePath = resourcePath.replace("{format}", "json") method = "GET" queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__listRequest__(GenomeV1.GenomeV1, resourcePath, method, queryParams, headerParams, verbose=0)
def getAvailableGenomes(self, queryPars=qp()): ''' Returns a list of all available genomes :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/genomes' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__listRequest__(GenomeV1.GenomeV1,resourcePath, method, queryParams, headerParams,verbose=0)
def getSamplePropertiesById(self, Id, queryPars=None): ''' Returns the Properties of a Sample object :param Id: The id of the sample ''' if queryPars is None: queryPars = qp() resourcePath = '/samples/{Id}/properties' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' resourcePath = resourcePath.replace('{Id}', Id) queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(PropertiesResponse.PropertiesResponse, resourcePath, method, queryParams, headerParams, verbose=0)
def getAppResultFiles(self, Id, queryPars=qp()): ''' Returns a list of File object for the AppResult with id = Id :param Id: The id of the appresult. :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/appresults/{Id}/files' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} resourcePath = resourcePath.replace('{Id}',Id) return self.__listRequest__(File.File,resourcePath, method, queryParams, headerParams,verbose=0)
def getProjectByUser(self, Id, queryPars=qp()): ''' Returns a list available projects for a User with the specified Id :param Id: The id of the user :param qp: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/users/{Id}/projects' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} if Id != None: resourcePath = resourcePath.replace('{Id}', Id) return self.__listRequest__(Project.Project,resourcePath, method, queryParams, headerParams)
def getAccessibleRunsByUser(self, Id, queryPars=qp()): ''' Returns a list of accessible runs for the User with id=Id :param Id: An user id :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/users/{Id}/runs' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} resourcePath = resourcePath.replace('{Id}',Id) return self.__listRequest__(RunCompact.RunCompact,resourcePath, method, queryParams, headerParams)
def getProjectById(self, Id, queryPars=None): ''' Request a project object by Id :param Id: The Id of the project ''' if queryPars is None: queryPars = qp() resourcePath = '/projects/{Id}' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' resourcePath = resourcePath.replace('{Id}', Id) queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(ProjectResponse.ProjectResponse,resourcePath, method, queryParams, headerParams)
def getFilesBySample(self, Id, queryPars=qp()): ''' Returns a list of File objects associated with sample with Id :param Id: A Sample id :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/samples/{Id}/files' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} resourcePath = resourcePath.replace('{Id}',Id) return self.__listRequest__(File.File,resourcePath, method, queryParams, headerParams,verbose=0)
def getSamplesByProject(self, Id, queryPars=qp()): """ Returns a list of samples associated with a project with Id :param Id: The id of the project :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering """ # Parse inputs resourcePath = "/projects/{Id}/samples" resourcePath = resourcePath.replace("{format}", "json") method = "GET" queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} resourcePath = resourcePath.replace("{Id}", Id) return self.__listRequest__(Sample.Sample, resourcePath, method, queryParams, headerParams)
def getRunPropertiesById(self, Id, queryPars=None): ''' Request the Properties of a run object by Id :param Id: The Id of the run ''' if queryPars is None: queryPars = qp() resourcePath = '/runs/{Id}/properties' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' resourcePath = resourcePath.replace('{Id}', Id) queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(PropertiesResponse.PropertiesResponse,resourcePath, method, queryParams, headerParams)
def getAppResultPropertiesById(self, Id, queryPars=None): ''' Returns the Properties of an AppResult object corresponding to Id :param Id: The Id of the AppResult ''' # Parse inputs if queryPars is None: queryPars = qp() resourcePath = '/appresults/{Id}/properties' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' resourcePath = resourcePath.replace('{Id}', Id) queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(PropertiesResponse.PropertiesResponse, resourcePath, method, queryParams, headerParams)
def getFileById(self, Id, queryPars=None): ''' Returns a file object by Id :param Id: The id of the file ''' if queryPars is None: queryPars = qp() resourcePath = '/files/{Id}' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' resourcePath = resourcePath.replace('{Id}', Id) queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(FileResponse.FileResponse,resourcePath, method,\ queryParams, headerParams,verbose=0)
def getSamplesByProject(self, Id, queryPars=None): ''' Returns a list of samples associated with a project with Id :param Id: The id of the project :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' if queryPars is None: queryPars = qp() resourcePath = '/projects/{Id}/samples' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} resourcePath = resourcePath.replace('{Id}',Id) return self.__listRequest__(Sample.Sample,resourcePath, method, queryParams, headerParams,verbose=0)
def getAppSessionPropertyByName(self, Id, queryPars=None, name=''): ''' Returns the multi-value Property of the provided AppSession that has the provided Property name. Note - this method (and REST API) is supported for ONLY multi-value Properties. :param Id: The AppSessionId :param name: Name of the multi-value property to retrieve ''' if queryPars is None: queryPars = qp() resourcePath = '/appsessions/{Id}/properties/{Name}/items' resourcePath = resourcePath.replace('{Id}', Id) resourcePath = resourcePath.replace('{Name}', name) method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} return self.__singleRequest__(MultiValuePropertyResponse.MultiValuePropertyResponse, resourcePath, method, queryParams, headerParams, verbose=0)
def getAppResultsByProject(self, Id, queryPars=qp(),statuses=[]): ''' Returns a list of AppResult object associated with the project with Id :param Id: The project id :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering :param statuses: An (optional) list of AppResult statuses to filter by ''' # Parse inputs resourcePath = '/projects/{Id}/appresults' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() if len(statuses): queryParams['Statuses'] = ",".join(statuses) headerParams = {} resourcePath = resourcePath.replace('{Id}',Id) return self.__listRequest__(AppResult.AppResult,resourcePath, method, queryParams, headerParams,verbose=0)
def filterVariantSet(self,Id, Chrom, StartPos, EndPos, Format, queryPars=qp(pars={'SortBy':'Position'})): ''' List the variants in a set of variants. Maximum returned records is 1000 :param Id: The id of the variant file :param Chrom: The chromosome of interest :param StartPos: The start position of the sequence of interest :param EndPos: The start position of the sequence of interest :param Format: Set to 'vcf' to get the results as lines in VCF format :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering ''' # Parse inputs resourcePath = '/variantset/{Id}/variants/chr{Chrom}' resourcePath = resourcePath.replace('{format}', 'json') method = 'GET' queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} queryParams['StartPos'] = StartPos queryParams['EndPos'] = EndPos queryParams['Format'] = Format resourcePath = resourcePath.replace('{Chrom}', Chrom) resourcePath = resourcePath.replace('{Id}', Id) return self.__listRequest__(Variant.Variant,resourcePath, method, queryParams, headerParams,verbose=0)
def filterVariantSet(self, Id, Chrom, StartPos, EndPos, Format, queryPars=qp(pars={"SortBy": "Position"})): """ List the variants in a set of variants. Maximum returned records is 1000 :param Id: The id of the variant file :param Chrom: The chromosome of interest :param StartPos: The start position of the sequence of interest :param EndPos: The start position of the sequence of interest :param Format: Set to 'vcf' to get the results as lines in VCF format :param queryPars: An (optional) object of type QueryParameters for custom sorting and filtering """ # Parse inputs resourcePath = "/variantset/{Id}/variants/chr{Chrom}" resourcePath = resourcePath.replace("{format}", "json") method = "GET" queryPars.validate() queryParams = queryPars.getParameterDict() headerParams = {} queryParams["StartPos"] = StartPos queryParams["EndPos"] = EndPos queryParams["Format"] = Format resourcePath = resourcePath.replace("{Chrom}", Chrom) resourcePath = resourcePath.replace("{Id}", Id) return self.__listRequest__(Variant.Variant, resourcePath, method, queryParams, headerParams, verbose=0)
def call_sdk(self): return self.myAPI.getAppResultPropertiesById(self.appresult_id, qp(self.qp))
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-k', '--key', dest='key', required=True, type=str, help='specify client key') parser.add_argument('-s', '--secret', dest='secret', required=True, type=str, help='specify client secret') parser.add_argument('-t', '--token', dest='token', required=True, type=str, help='specify access token') parser.add_argument('-r', '--run', dest='run', required=False, type=str, help='specify the run name to download') parser.add_argument('-d', '--directory', dest='directory', required=False, type=str, default='./', help='specify download directory. \ The default is the current directory') parser.add_argument('--offset', dest='offset', required=False, type=int, default=0, help='specify the starting offset to read. \ The default is 0') parser.add_argument('-n', '--num_items', dest='num_items', required=False, type=int, default=10, help='specify the maximum number of items to return \ (max 1024). The default is 10') parser.add_argument('-e', '--excluded_path', dest='excluded_path', required=False, type=str, default=None, help='specify files to skip (comma separated). \ If file paths contain \ this(ese) strings, it will be skipped.)\ The default is None') args = parser.parse_args() client_key = args.key client_secret = args.secret client_token = args.token run_name = args.run download_directory = args.directory if args.excluded_path: excluded_file_path_strings = args.excluded_path.split(',') else: excluded_file_path_strings = [] num_items = args.num_items offset = args.offset base_space_url = 'https://api.basespace.illumina.com/' my_bs_api = BaseSpaceAPI(client_key, client_secret, base_space_url, 'v1pre3', '', client_token) user = my_bs_api.getUserById('current') print('User: {}'.format(str(user)), sep='', file=sys.stderr) runs = user.getRuns(my_bs_api, queryPars=qp({'Limit': num_items})) print('Run(s): {}'.format(runs), sep='', file=sys.stderr) if run_name: run = runs[[index for index, value in enumerate(runs) if value.Name == run_name][0]] print('Total size ({}): {} GB'.format(run.Name, run.TotalSize / 1000000000), sep='', file=sys.stderr) print('Offset: {}'.format(offset), sep='', file=sys.stderr) print('Number of items to return: {}'.format(num_items), sep='', file=sys.stderr) for f in run.getFiles(my_bs_api, queryPars=qp({'Limit': num_items, 'Offset': offset})): file_path = f.Path if any([i in file_path for i in excluded_file_path_strings]): print('Skipping file: {}'.format(file_path), file=sys.stderr) else: print('Downloading file: {}'.format(file_path), '...', sep=' ', end='', file=sys.stderr) try: f.downloadFile(my_bs_api, download_directory, createBsDir=True) etag = f.getFileS3metadata(my_bs_api)['etag'] file_path = download_directory + '/' + file_path if len(etag) == 32: f_md5 = md5_hash(file_path) if f_md5 == etag: print(' done (md5 correct)!', file=sys.stderr) else: print(' error (md5 incorrect)!', f.Id, etag, f_md5, file=sys.stderr) else: if f.Size == os.path.getsize(file_path): print(' done (file size correct)!', file=sys.stderr) else: print(' error (file size incorrect)!', f.Id, etag, file=sys.stderr) except Exception as e: print(' error ({})!!'.format(e), file=sys.stderr)
def call_sdk(self): return self.myAPI.getAppSessionPropertiesById(self.appsession_id, qp(self.qp))
def call_sdk(self): return self.myAPI.getAppSessionPropertyByName(self.appsession_id, qp(self.qp), self.property_name)
import sys, os, glob, logging from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp listOptions = qp({'Limit': 1024}) logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S') myAPI = BaseSpaceAPI() p = sys.argv[1] user = myAPI.getUserById('current') logging.info("User Name: %s" % str(user)) projects = myAPI.getProjectByUser(listOptions) project_list = [project.Name for project in projects] try: idx = project_list.index(sys.argv[1]) project = projects[idx] except ValueError: message = '"%s" is not in your projects. Available projects are:\n%s' % \ (sys.argv[1], '\n'.join(project_list)) logging.error(message) sys.exit(1) downloaded = glob.glob('*fastq.gz') # get already downloaded fastq logging.info("Retrieving samples from project %s" % sys.argv[1]) samples = project.getSamples(myAPI, listOptions)
def call_sdk(self): return self.myAPI.getFilePropertiesById(self.file_id, qp(self.qp))
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr("Output directory not specified; using current directory ({})".format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr('Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend(_select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr('Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend(_select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend(_select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr('Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format(len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format(output_directory)) if recreate_basespace_dir_tree: print_stderr("Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format(output_directory)) else: print_stderr("Error: no files found to download.")
def call_sdk(self): return self.myAPI.getSamplePropertiesById(self.sample_id, qp(self.qp))
#!/usr/bin/env python2.7 from __future__ import print_function import sys, os, glob, logging from argparse import ArgumentParser from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp list_options = qp({'Limit': 1024}) logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S', ) bs = BaseSpaceAPI() user = bs.getUserById('current') logging.info("User Name: %s", user) projects = bs.getProjectByUser(list_options) project_list = [project.Name for project in projects] cli = ArgumentParser() cli.add_argument( 'project', nargs='?', help= 'Which project to download files from. When not specified, list projects instead.' )
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr( "Output directory not specified; using current directory ({})". format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr( 'Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get( "apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend( _select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr( 'Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend( _select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend( _select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr( 'Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend( _select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format( len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format( output_directory)) if recreate_basespace_dir_tree: print_stderr( "Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format( i + 1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format( output_directory)) else: print_stderr("Error: no files found to download.")
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 100 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) for project in myProjects: projectId = project.Id if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print("will download files from %d ." % numFiles) i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile))) print("BaseSpace File Path: %s" % sampleFile.Path) print("Sample Id: %s" % sampleId) if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print("FASTQ file downloading complete.")
def call_sdk(self): return self.myAPI.getProjectPropertiesById(self.project_id, qp(self.qp))
def call_sdk(self): return self.myAPI.filterVariantSet(self.file_id, self.chrom, self.start_pos, self.end_pos, self.format, qp(self.qp))
def download(clientKey=None, clientSecret=None, accessToken=None, appResultId=None, fileNameRegexesInclude=list(), fileNameRegexesOmit=list(), outputDirectory='\.', createBsDir=True, force=False, numRetries=3): ''' Downloads App Result files. Provide an App Result identifier, and optionally regexes to include or omit files based on their names (path not included). Omission takes precedence over inclusion. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param appResultId the BaseSpace App Result identifier :param fileNameRegexesInclude a list of regexes on which to include files based on name :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include) :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise :param force use the force: overwrite existing files if true, false otherwise :param numRetries the number of retries for a single download API call ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 10000 sleepTime = 1.0 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') appResult = myAPI.getAppResultById(Id=appResultId) print "Retrieving files from the App Result: " + str(appResult) # Get all the files from the AppResult filesToDownload = appResult.getFiles(myAPI, queryPars=qp({'Limit': fileLimit})) # Filter file names based on the include or omit regexes includePatterns = [ re.compile(pattern) for pattern in fileNameRegexesInclude ] omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit] def includePatternMatch(f): if not includePatterns: return True for pattern in includePatterns: if pattern.match(f): return True return False def omitPatternMatch(f): if not omitPatterns: return False for pattern in omitPatterns: if pattern.match(f): return True return False def keepFile(f): return includePatternMatch(f) and not omitPatternMatch(f) filesToDownload = [f for f in filesToDownload if keepFile(str(f))] print "Will download %d files." % len(filesToDownload) for i in range(len(filesToDownload)): appResultFile = filesToDownload[i] print 'Downloading (%d/%d): %s' % ( (i + 1), len(filesToDownload), str(appResultFile)) print "File Path: %s" % appResultFile.Path if not options.dryRun: outputPath = str(appResultFile.Path) if not createBsDir: outputPath = os.path.basename(outputPath) if os.path.exists(outputPath): if force: print "Overwritting: %s" % outputPath else: print "Skipping existing file: %s" % outputPath continue else: print "Downloading to: %s" % outputPath retryIdx = 0 retryException = None while retryIdx < numRetries: try: appResultFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) except BaseSpaceException.ServerResponseException as e: retryIdx += 1 time.sleep(sleepTime) retryException = e else: break if retryIdx == numRetries: raise retryException print "Download complete."
def call_sdk(self): return self.myAPI.getRunPropertiesById(self.run_id, qp(self.qp))