Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--profile',
                        default="DEFAULT",
                        help="the .basespacepy.cfg profile to load")
    parser.add_argument('-j',
                        '--project',
                        required=True,
                        nargs="+",
                        help="project to download; can accept multiple values")

    args = parser.parse_args()
    myAPI = BaseSpaceAPI(profile=args.profile, timeout=500)
    user = myAPI.getUserById('current')
    qp = QueryParameters.QueryParameters({'Limit': 1024})

    projects = user.getProjects(myAPI, qp)
    userProjs = stringsToBSObj(projects, args.project)
    for lostProj in set(args.project) - set([str(x) for x in userProjs]):
        warning("cannot find " + str(lostProj))

    fullSampleMetadata = pd.DataFrame()
    fullFileMetadata = pd.DataFrame()
    for project in userProjs:
        smout, fmout = downloadProjectMetadata(project, myAPI)
        fullSampleMetadata = fullSampleMetadata.append(smout)
        fullFileMetadata = fullFileMetadata.append(fmout)
    thisInstant = str(datetime.datetime.today()).replace(' ', ';')
    fullSampleMetadata.to_csv('fullSampleMetadata.' + thisInstant + '.txt',
                              sep='\t',
                              header=True,
                              index=False)
    fullFileMetadata.to_csv('fullFileMetadata.' + thisInstant + '.txt',
                            sep='\t',
                            header=True,
                            index=False)
import sys, os, glob, logging
from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
from BaseSpacePy.model.QueryParameters import QueryParameters as qp

listOptions = qp({'Limit': 1024})

logging.basicConfig(level=logging.INFO,
                    format='[%(asctime)s] %(message)s',
                    datefmt='%d/%m/%Y %H:%M:%S')
myAPI = BaseSpaceAPI()

p = sys.argv[1]
user = myAPI.getUserById('current')
logging.info("User Name: %s" % str(user))
projects = myAPI.getProjectByUser(listOptions)
project_list = [project.Name for project in projects]

try:
    idx = project_list.index(sys.argv[1])
    project = projects[idx]
except ValueError:
    message = '"%s" is not in your projects. Available projects are:\n%s' % \
              (sys.argv[1],
               '\n'.join(project_list))
    logging.error(message)
    sys.exit(1)

downloaded = glob.glob('*fastq.gz')  # get already downloaded fastq

logging.info("Retrieving samples from project %s" % sys.argv[1])
samples = project.getSamples(myAPI, listOptions)
exception message: 'Forbidden: App credentials do not match AppSession application'.
"""
"""
NOTE: You will need to provide the credentials for your app (available in the developer portal).
You can do this with a master config file (preferred), or by filling in values below.
"""
# If you're not using a config file, fill in you app's credentials here:
clientKey = ""
clientSecret = ""
appSessionId = ""
apiServer = 'https://api.basespace.illumina.com/'  # or 'https://api.cloud-hoth.illumina.com/'
apiVersion = 'v1pre3'

# First we will initialize a BaseSpace API object using our app information and the appSessionId
if clientKey:
    myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion,
                         appSessionId)
else:
    myAPI = BaseSpaceAPI(profile='DEFAULT')

# Using the basespaceApi we can request the appSession object corresponding to the AppSession id supplied
myAppSession = myAPI.getAppSession()
print myAppSession

# An app session contains a referal to one or more appLaunchObjects which reference the data module
# the user launched the app on. This can be a list of projects, samples, or a mixture of objects
print "\nType of data the app was triggered on can be seen in 'references'"
print myAppSession.References

# We can also get a handle to the user who started the AppSession
print "\nWe can get a handle for the user who triggered the app\n" + str(
    myAppSession.UserCreatedBy)
from __future__ import print_function

import sys, os, glob, logging
from argparse import ArgumentParser

from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
from BaseSpacePy.model.QueryParameters import QueryParameters as qp

list_options = qp({'Limit': 1024})

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] %(message)s',
    datefmt='%d/%m/%Y %H:%M:%S',
)
bs = BaseSpaceAPI()

user = bs.getUserById('current')
logging.info("User Name: %s", user)
projects = bs.getProjectByUser(list_options)
project_list = [project.Name for project in projects]

cli = ArgumentParser()
cli.add_argument(
    'project',
    nargs='?',
    help=
    'Which project to download files from. When not specified, list projects instead.'
)
cli.add_argument(
    '--dry-run',
Esempio n. 5
0
def download_basespace_files(config_file_path=None,
                             client_key=None,
                             client_secret=None,
                             access_token=None,
                             project_id_list=None,
                             project_name_list=None,
                             sample_id_list=None,
                             sample_name_list=None,
                             dry_run=False,
                             output_directory=None,
                             recreate_basespace_dir_tree=True):
    # Check input parameters / load from config file / defaults
    if not project_id_list: project_id_list = []
    if not project_name_list: project_name_list = []
    if not sample_id_list: sample_id_list = []
    if not sample_name_list: sample_name_list = []
    if not output_directory:
        output_directory = os.getcwd()
        print_stderr(
            "Output directory not specified; using current directory ({})".
            format(output_directory))
    else:
        output_directory = os.path.abspath(output_directory)
    if not dry_run:
        safe_makedir(output_directory)
    config_dict = {}
    if config_file_path:
        config_parser = ConfigParser()
        config_parser.read(config_file_path)
        config_dict = config_parser._defaults
        if not client_key: client_key = config_dict.get('clientkey')
        if not client_secret: client_secret = config_dict.get('clientsecret')
        if not access_token: access_token = config_dict.get('accesstoken')
    if not (client_key and client_secret and access_token):
        missing_params = []
        if not client_key: missing_params.append("client_key")
        if not client_secret: missing_params.append("client_secret")
        if not access_token: missing_params.append("access_token")
        print_stderr(
            'Error: Required parameters not supplied either in config '
            'file ({}) or via arguments.'.format(config_file_path,
                                                 ', '.join(missing_params)))
        sys.exit(1)
    app_session_id = config_dict.get("appsessionid") or ""
    api_server = config_dict.get(
        "apiserver") or "https://api.basespace.illumina.com"
    api_version = config_dict.get("apiversion") or "v1pre3"
    # Get the API connection object
    myAPI = BaseSpaceAPI(clientKey=client_key,
                         clientSecret=client_secret,
                         apiServer=api_server,
                         version=api_version,
                         appSessionId=app_session_id,
                         AccessToken=access_token)
    basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024}))
    user = myAPI.getUserById('current')
    # If user specified projects, get them by name or id
    project_objects = []
    if project_name_list:
        project_objects.extend(
            _select_from_object(filter_list=project_name_list,
                                search_list=basespace_projects,
                                key_attr="Name",
                                obj_type="project",
                                user=user))
    if project_id_list:
        digit_pattern = re.compile(r'^\d+$')
        project_filtered_id_list = []
        for project_id in project_id_list:
            if not digit_pattern.match(project_id):
                print_stderr(
                    'Error: Invalid format for user-specified project id '
                    '"{}": project ids are strictly numeric. Did you mean '
                    'to pass this as a project name?'.format(project_id))
            else:
                project_filtered_id_list.append(project_id)
        project_objects.extend(
            _select_from_object(filter_list=project_filtered_id_list,
                                search_list=basespace_projects,
                                key_attr="Id",
                                obj_type="project",
                                user=user))
    if not (project_name_list or project_id_list):
        # Get all projects if none are specified by user
        project_objects = basespace_projects

    basespace_samples = []
    for project_obj in project_objects:
        basespace_samples.extend(project_obj.getSamples(myAPI))
    sample_objects = []
    if sample_name_list:
        sample_objects.extend(
            _select_from_object(filter_list=sample_name_list,
                                search_list=basespace_samples,
                                key_attr="Name",
                                obj_type="sample",
                                user=user))
    if sample_id_list:
        digit_pattern = re.compile(r'^\d+$')
        sample_filtered_id_list = []
        for sample_id in sample_id_list:
            if not digit_pattern.match(sample_id):
                print_stderr(
                    'Error: Invalid format for user-specified sample id '
                    '"{}": sample ids are strictly numeric. Did you mean '
                    'to pass this as a sample name?'.format(sample_id))
            else:
                sample_filtered_id_list.append(sample_id)
        sample_objects.extend(
            _select_from_object(filter_list=sample_filtered_id_list,
                                search_list=basespace_samples,
                                key_attr="Id",
                                obj_type="sample",
                                user=user))
    if not (sample_name_list or sample_id_list):
        # Get all samples if none are specified by user
        sample_objects = basespace_samples

    files_to_download = []
    for sample_obj in sample_objects:
        files_to_download.extend(sample_obj.getFiles(myAPI))

    if files_to_download:
        print_stderr("Found {} files to download: ".format(
            len(files_to_download)))
        for file_obj in files_to_download:
            print_stderr("\t- {}".format(file_obj))
        print_stderr('Downloading files to output directory {}'.format(
            output_directory))
        if recreate_basespace_dir_tree:
            print_stderr(
                "Recreating BaseSpace project directory tree for file.")
        if dry_run:
            print_stderr("-> Dry run: not downloading any data.")
        for i, file_obj in enumerate(files_to_download):
            print_stderr('[{}/{}] Downloading file "{}"'.format(
                i + 1, len(files_to_download), file_obj))
            if not dry_run:
                file_obj.downloadFile(api=myAPI,
                                      localDir=output_directory,
                                      createBsDir=recreate_basespace_dir_tree)
        print_stderr('Download completed; files are located in "{}"'.format(
            output_directory))
    else:
        print_stderr("Error: no files found to download.")
    def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True):
        '''
        Downloads sample-level files.

        Project Id and project name should
        not be specified together; similarly sample Id and sample name should not be
        specified together.

        1. If only a project Id or only a project name is given, all files for all
        samples will be downloaded within that project.  If additionally a sample Id or
        sample name is given, then only the first matching sample within the project
        will be downloaded.
        2. If only a sample Id is given, then all files for that sample will be downloaded.
        3. If only a sample name is given, then all files within the first project
        containing a sample with matching name will be downloaded.
                
        :param clientKey the Illumina developer app client key
        :param clientSecret the Illumina developer app client secret
        :param accessToken the Illumina developer app access token
        :param sampleId the BaseSpace sample identifier
        :param projectId the BaseSpace project identifier
        :param sampleName the BaseSpace sample name
        :param projectName the BaseSpace project name
        :param outputDirectory the root output directory
        :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
        '''
        appSessionId = ''
        apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/'
        apiVersion = 'v1pre3'
        projectLimit = 100         
        sampleLimit = 1024         
        sampleFileLimit = 1024 

        # init the API
        if None != clientKey:
            myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken)
        else:
            myAPI = BaseSpaceAPI(profile='DEFAULT')

        # get the current user
        user = myAPI.getUserById('current')

        sampleToFiles = {}
        if None != projectId:
            sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
        else:
            myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit}))
            for project in myProjects:
                projectId = project.Id
                if None != projectName and project.Name != projectName:
                    continue
                sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
                if 0 < len(sampleToFiles):
                    break
        numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles])
        print("will download files from %d ." % numFiles)
        i = 0
        for sampleId in sampleToFiles:
            for sampleFile in sampleToFiles[sampleId]:
                print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile)))
                print("BaseSpace File Path: %s" % sampleFile.Path)
                print("Sample Id: %s" % sampleId)
                if not options.dryRun:
                    if createBsDir:
                        sampleOutputDirectory = os.path.join(outputDirectory, sampleId)
                    else:
                        sampleOutputDirectory = outputDirectory
                    sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir)
                i = i + 1
        print("FASTQ file downloading complete.")
Esempio n. 7
0
def main(uargs):
    
    uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config']))
    conf = ConfigObj(uargs['--config'], configspec=get_configspec())    

    try:
        conf_args = conf[uargs['--profile']]
    except KeyError:
        msg = 'Profile "{}" not found in config file'
        raise KeyError, msg.format(uargs['--profile'])
    
 
    # selecting config args
    keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken']
    conf_args = [conf_args[x] for x in keys]
    myAPI = BaseSpaceAPI(*conf_args)    

    # setting user
    if uargs['--user'] is not None:
        user = myAPI.getUserById(uargs['--user'])
    else:
        user = myAPI.getUserById('current')

    # user projects
    projects = myAPI.getProjectByUser()
    print "## The projects for this user are:" 
    project_headers = ['ProjectID','Name','UserOwnedBy','DateCreated']
    sample_headers = ['SampleID','NumReadsRaw','NumReadsPF','IsPairedEnd','Status']
    print '\t'.join(project_headers + sample_headers)
    for project in projects:
        project_data = [project.Id,
                         project.Name,
                         project.UserOwnedBy,
                         project.DateCreated]

        samples = myAPI.getSamplesByProject(project.Id)
        for sample in samples:
            sample_data = [sample.Id,
                           sample.NumReadsRaw,
                           sample.NumReadsPF,
                           sample.IsPairedEnd,
                           sample.Status]
            print '\t'.join([str(x) for x in project_data + sample_data])
    
            #files = myAPI.getSampleFilesById(sample.Id)
            #for f in files:
                #fo = myAPI.getFileById(f.Id)
            #    myAPI.fileDownload(f.Id, '.')
                

    # user runs
    print "## The runs for this user are:"
    print '\t'.join(['RunID','ExperimentName','UserOwnedBy',
                     'DateCreated', 'Status'])
    runs = user.getRuns(myAPI)
    for run in runs:
        data = [run.Id,
                run.ExperimentName,
                run.UserOwnedBy,
                run.DateCreated,
                run.Status]
        print '\t'.join([str(x) for x in data])
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(description='')

    parser.add_argument('-k', '--key', dest='key', required=True,
                        type=str,
                        help='specify client key')
    parser.add_argument('-s', '--secret', dest='secret', required=True,
                        type=str,
                        help='specify client secret')
    parser.add_argument('-t', '--token', dest='token', required=True,
                        type=str,
                        help='specify access token')
    parser.add_argument('-r', '--run', dest='run', required=False,
                        type=str,
                        help='specify the run name to download')
    parser.add_argument('-d', '--directory', dest='directory', required=False,
                        type=str,
                        default='./',
                        help='specify download directory. \
                        The default is the current directory')
    parser.add_argument('--offset', dest='offset', required=False,
                        type=int,
                        default=0,
                        help='specify the starting offset to read. \
                              The default is 0')
    parser.add_argument('-n', '--num_items', dest='num_items', required=False,
                        type=int,
                        default=10,
                        help='specify the maximum number of items to return \
                              (max 1024). The default is 10')
    parser.add_argument('-e', '--excluded_path', dest='excluded_path', required=False,
                        type=str,
                        default=None,
                        help='specify files to skip (comma separated). \
                              If file paths contain \
                              this(ese) strings, it will be skipped.)\
                              The default is None')

    args = parser.parse_args()

    client_key = args.key
    client_secret = args.secret
    client_token = args.token
    run_name = args.run
    download_directory = args.directory

    if args.excluded_path:
        excluded_file_path_strings = args.excluded_path.split(',')
    else:
        excluded_file_path_strings = []

    num_items = args.num_items
    offset = args.offset

    base_space_url = 'https://api.basespace.illumina.com/'

    my_bs_api = BaseSpaceAPI(client_key,
                             client_secret,
                             base_space_url,
                             'v1pre3',
                             '',
                             client_token)

    user = my_bs_api.getUserById('current')
    print('User: {}'.format(str(user)),
          sep='', file=sys.stderr)

    runs = user.getRuns(my_bs_api, queryPars=qp({'Limit': num_items}))
    print('Run(s): {}'.format(runs), sep='', file=sys.stderr)

    if run_name:
        run = runs[[index for index, value
                    in enumerate(runs) if value.Name == run_name][0]]

        print('Total size ({}): {} GB'.format(run.Name,
                                              run.TotalSize / 1000000000),
              sep='', file=sys.stderr)
        print('Offset: {}'.format(offset),
              sep='', file=sys.stderr)
        print('Number of items to return: {}'.format(num_items),
              sep='', file=sys.stderr)

        for f in run.getFiles(my_bs_api,
                              queryPars=qp({'Limit': num_items,
                                            'Offset': offset})):

            file_path = f.Path

            if any([i in file_path for i in excluded_file_path_strings]):
                print('Skipping file: {}'.format(file_path),
                      file=sys.stderr)

            else:
                print('Downloading file: {}'.format(file_path),
                      '...', sep=' ', end='', file=sys.stderr)

                try:
                    f.downloadFile(my_bs_api, download_directory, createBsDir=True)

                    etag = f.getFileS3metadata(my_bs_api)['etag']
                    file_path = download_directory + '/' + file_path

                    if len(etag) == 32:
                        f_md5 = md5_hash(file_path)

                        if f_md5 == etag:
                            print(' done (md5 correct)!',
                                  file=sys.stderr)
                        else:
                            print(' error (md5 incorrect)!',
                                  f.Id,
                                  etag,
                                  f_md5,
                                  file=sys.stderr)

                    else:
                        if f.Size == os.path.getsize(file_path):
                            print(' done (file size correct)!',
                                  file=sys.stderr)
                        else:
                            print(' error (file size incorrect)!',
                                  f.Id,
                                  etag,
                                  file=sys.stderr)

                except Exception as e:
                    print(' error ({})!!'.format(e),
                          file=sys.stderr)
Esempio n. 9
0
import json
import os
import unittest

from BaseSpacePy.api.AppLaunchHelpers import AppSessionMetaDataRaw, AppSessionMetaDataSDK, LaunchSpecification, \
    LaunchPayload
from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
from BaseSpacePy.model.AppSessionResponse import AppSessionResponse

api = BaseSpaceAPI()

mydir = os.path.dirname(os.path.abspath(__file__))
app_session_path = os.path.join(mydir, "appsession.json")
app_name = "BWA Whole Genome Sequencing v1.0"
app_id = "279279"
app_properties = [
    {'Type': 'string', 'Name': 'Input.AnnotationSource'},
    {'Type': 'string[]', 'Name': 'Input.FlagPCRDuplicates-id'},
    {'Type': 'string', 'Name': 'Input.genome-id'},
    {'Type': 'string', 'Name': 'Input.GQX-id'},
    {'Type': 'project', 'Name': 'Input.project-id'},
    {'Type': 'sample', 'Name': 'Input.sample-id'},
    {'Type': 'string', 'Name': 'Input.StrandBias-id'}
]
app_property_names = [
    'AnnotationSource',
    'FlagPCRDuplicates-id',
    'genome-id',
    'GQX-id',
    'project-id',
    'sample-id',
Esempio n. 10
0
    def download(clientKey=None,
                 clientSecret=None,
                 accessToken=None,
                 appResultId=None,
                 fileNameRegexesInclude=list(),
                 fileNameRegexesOmit=list(),
                 outputDirectory='\.',
                 createBsDir=True,
                 force=False,
                 numRetries=3):
        '''
        Downloads App Result files.

        Provide an App Result identifier, and optionally regexes to include or omit files 
        based on their names (path not included).  Omission takes precedence over inclusion.
                
        :param clientKey the Illumina developer app client key
        :param clientSecret the Illumina developer app client secret
        :param accessToken the Illumina developer app access token
        :param appResultId the BaseSpace App Result identifier
        :param fileNameRegexesInclude a list of regexes on which to include files based on name
        :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include)
        :param outputDirectory the root output directory
        :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
        :param force use the force: overwrite existing files if true, false otherwise
        :param numRetries the number of retries for a single download API call
        '''
        appSessionId = ''
        apiServer = 'https://api.basespace.illumina.com/'  # or 'https://api.cloud-hoth.illumina.com/'
        apiVersion = 'v1pre3'
        fileLimit = 10000
        sleepTime = 1.0

        # init the API
        if None != clientKey:
            myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer,
                                 apiVersion, appSessionId, accessToken)
        else:
            myAPI = BaseSpaceAPI(profile='DEFAULT')

        # get the current user
        user = myAPI.getUserById('current')

        appResult = myAPI.getAppResultById(Id=appResultId)
        print "Retrieving files from the App Result: " + str(appResult)

        # Get all the files from the AppResult
        filesToDownload = appResult.getFiles(myAPI,
                                             queryPars=qp({'Limit':
                                                           fileLimit}))

        # Filter file names based on the include or omit regexes
        includePatterns = [
            re.compile(pattern) for pattern in fileNameRegexesInclude
        ]
        omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit]

        def includePatternMatch(f):
            if not includePatterns:
                return True
            for pattern in includePatterns:
                if pattern.match(f):
                    return True
            return False

        def omitPatternMatch(f):
            if not omitPatterns:
                return False
            for pattern in omitPatterns:
                if pattern.match(f):
                    return True
            return False

        def keepFile(f):
            return includePatternMatch(f) and not omitPatternMatch(f)

        filesToDownload = [f for f in filesToDownload if keepFile(str(f))]

        print "Will download %d files." % len(filesToDownload)
        for i in range(len(filesToDownload)):
            appResultFile = filesToDownload[i]
            print 'Downloading (%d/%d): %s' % (
                (i + 1), len(filesToDownload), str(appResultFile))
            print "File Path: %s" % appResultFile.Path
            if not options.dryRun:
                outputPath = str(appResultFile.Path)
                if not createBsDir:
                    outputPath = os.path.basename(outputPath)
                if os.path.exists(outputPath):
                    if force:
                        print "Overwritting: %s" % outputPath
                    else:
                        print "Skipping existing file: %s" % outputPath
                        continue
                else:
                    print "Downloading to: %s" % outputPath
                retryIdx = 0
                retryException = None
                while retryIdx < numRetries:
                    try:
                        appResultFile.downloadFile(myAPI,
                                                   outputDirectory,
                                                   createBsDir=createBsDir)
                    except BaseSpaceException.ServerResponseException as e:
                        retryIdx += 1
                        time.sleep(sleepTime)
                        retryException = e
                    else:
                        break
                if retryIdx == numRetries:
                    raise retryException
        print "Download complete."