Esempio n. 1
0
    def download(clientKey=None, clientSecret=None, accessToken=None, runId=None, runName=None, outputDirectory='\.', createBsDir=True):
        '''
        Downloads run-level files.

        Run Id and run name should not be specified together.

        All files for a given run will be downloaded based on either the unique run ID, or
        the first run found with matching experiment name.
                
        :param clientKey the Illumina developer app client key
        :param clientSecret the Illumina developer app client secret
        :param accessToken the Illumina developer app access token
        :param runId the BaseSpace run identifier
        :param runName the BaseSpace run experiment name
        :param outputDirectory the root output directory
        :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
        '''
        appSessionId = ''
        apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/'
        apiVersion = 'v1pre3'
        fileLimit = 1024
        runLimit = 100         

        # init the API
        if None != clientKey:
            myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken)
        else:
            myAPI = BaseSpaceAPI(profile='DEFAULT')

        # get the current user
        user = myAPI.getUserById('current')

        expName = None
        if runId:
            run = myAPI.getRunById(Id=runId)
            runFiles = Runs.__get_files_to_download(myAPI, run.Id, fileLimit)
            expName = run.ExperimentName
        else:
            runs = myAPI.getAccessibleRunsByUser(qp({'Limit' : runLimit}))
            for run in runs:
                runId = run.Id
                if runName and runName == run.ExperimentName:
                    expName = run.ExperimentName
                    runFiles = Samples.__get_files_to_download(myAPI, runId)
                    if 0 < len(runFiles):
                        break
            if not expName:
                if runName:
                    print 'Could not find a run with name: %s' % runName
                else:
                    print 'Could not find a run for user'
                sys.exit(1)
        
        numFiles = len(runFiles)
        print "Will download files from %d ." % numFiles
        i = 0
        for runFile in runFiles:
            outDir = os.path.join(outputDirectory, expName)
            print 'Downloading (%d/%d): %s' % ((i+1), numFiles, str())
            print "BaseSpace File Path: %s" % runFile.Path
            print "Destination File Path: %s" % os.path.join(outDir, runFile.Name)
            if not options.dryRun:
                runFile.downloadFile(myAPI, outDir, createBsDir=createBsDir)
            i = i + 1
        print "Download complete."
Esempio n. 2
0
class BaseSpace(object):
    def __init__(self,
                 project_id=None,
                 project_name=None,
                 get_all_projects=False):
        super(BaseSpace, self).__init__()
        # BaseSpace credentials
        creds = self._get_credentials()
        self.client_key = creds['client_id']
        self.client_secret = creds['client_secret']
        self.access_token = creds['access_token']
        self.version = creds['version']
        self.api_server = creds['api_server']
        self.api = BaseSpaceAPI(self.client_key,
                                self.client_secret,
                                self.api_server,
                                self.version,
                                AccessToken=self.access_token)
        self.params = qp(pars={'Limit': 1024, 'SortDir': 'Desc'})
        if project_id is not None:
            self.project_id = project_id
            self.project_name = None
        elif project_name is not None:
            self.project_name = project_name
            self.project_id = self._get_project_id_from_name(project_name)
        else:
            self.project_id = None
            self.project_name = None
            # self.project_id, self.project_name = self._user_selected_project_id()
        self._runs = None

    @property
    def runs(self):
        if self._runs is None:
            self._runs = self.api.getAccessibleRunsByUser(
                queryPars=self.params)
        return self._runs

    def _get_credentials(self):
        # BaseSpace credentials file should be in JSON format
        cred_file = os.path.expanduser('~/.abstar/basespace_credentials')
        cred_handle = open(cred_file, 'r')
        return json.load(cred_handle)

    def _get_project_id_from_name(self):
        projects = self.api.getProjectByUser(queryPars=self.params)
        for project in projects:
            name = project.Name.encode('ascii', 'ignore')
            if name == self.project_name:
                return project.Id
        print('No projects matched the given project name ({})'.format(name))
        sys.exit(1)

    def _user_selected_project_id(self):
        projects = self.api.getProjectByUser(queryPars=self.params)
        self.print_basespace_project()
        offset = 0
        while True:
            for i, project in enumerate(projects[offset * 25:(offset * 25) +
                                                 25]):
                project_name = project.Name.encode('ascii', 'ignore')
                print('[ {} ] {}'.format(i + (offset * 25), project_name))
            print('')
            project_index = raw_input(
                "Select the project number (or 'next' to see more projects): ")
            try:
                project_index = int(project_index)
                return projects[project_index].Id, projects[
                    project_index].Name.encode('ascii', 'ignore')
            except:
                offset += 1
        return projects[project_index].Id, projects[project_index].Name.encode(
            'ascii', 'ignore')

    def _get_projects(self, start=0):
        projects = self.api.getProjectByUser(queryPars=self.params)
        self.print_basespace_project()
        for i, project in enumerate(projects[:25]):
            project_name = project.Name.encode('ascii', 'ignore')
            print('[ {} ] {}'.format(i, project_name))
        print('')
        return projects

    def _get_samples(self, project_id):
        samples = []
        offset = 0
        while True:
            query_params = qp(pars={
                'Limit': 1024,
                'SortDir': 'Asc',
                'Offset': offset * 1024
            })
            s = self.api.getSamplesByProject(self.project_id,
                                             queryPars=query_params)
            if not s:
                break
            samples.extend(s)
            offset += 1
        return samples

    def _get_files(self):
        files = []
        samples = self._get_samples(self.project_id)
        for sample in samples:
            files.extend(
                self.api.getFilesBySample(sample.Id, queryPars=self.params))
        return files

    def download(self, direc):
        if all([self.project_id is None, self.project_name is None]):
            self.project_id, self.project_name = self._user_selected_project_id(
            )
        files = self._get_files()
        self.print_download_info(files)
        start = time.time()
        for i, f in enumerate(files):
            # self.log.write('[ {} ] {}\n'.format(i, str(f)))
            logger.info('[ {} ] {}'.format(i, str(f)))
            f.downloadFile(self.api, direc)
        end = time.time()
        self.print_completed_download_info(start, end)
        return len(files)

    def print_basespace_project(self):
        print('')
        print('')
        print('========================================')
        print('BaseSpace Project Selection')
        print('========================================')
        print('')

    def print_download_info(self, files):
        logger.info('')
        logger.info('')
        logger.info('========================================')
        logger.info('Downloading files from BaseSpace')
        logger.info('========================================')
        logger.info('')
        logger.info('Identified {0} files for download.'.format(len(files)))
        logger.info('')

    def print_completed_download_info(self, start, end):
        logger.info('')
        logger.info('Download completed in {0} seconds'.format(end - start))