Exemple #1
0
def Main():
    args = ParseArg()
    client_key = args.key
    client_secret = args.secret
    token = args.token
    folder = args.directory
    BaseSpaceUrl = 'https://api.basespace.illumina.com/'
    myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, "v1pre3",
                         "AA", token)
    user = myAPI.getUserById('current')
    print >> sys.stderr, "\nUser name: %s\n" % (str(user))
    Projects = myAPI.getProjectByUser()
    Found = False
    for p in Projects:
        if p.Name == args.project:
            print >> sys.stderr, "  Find project %s with ID: %s. " % (p.Name,
                                                                      p.Id)
            Project = p
            Found = True
            break
    if not Found:
        print >> sys.stderr, "  Could not find project %s, from user %s, please check your token." % (
            args.project, str(user))
        sys.exit(0)

    Samples = Project.getSamples(myAPI)
    print >> sys.stderr, "Samples for this project: " + str(Samples)
    for s in Samples:
        print >> sys.stderr, "  Downloading files in sample " + str(s)
        subfolder = folder + "/" + str(s)
        if not os.path.exists(subfolder):
            os.makedirs(subfolder)
        for f in s.getFiles(myAPI):
            print >> sys.stderr, "    " + str(f)
            f.downloadFile(myAPI, subfolder)
Exemple #2
0
def Main():
    args = ParseArg()
    client_key = args.key
    client_secret = args.secret
    token = args.token
    folder = args.directory
    BaseSpaceUrl = 'https://api.basespace.illumina.com/'
    myAPI = BaseSpaceAPI(client_key,client_secret,BaseSpaceUrl,"v1pre3","AA",token)
    user = myAPI.getUserById('current')
    print >> sys.stderr, "\nUser name: %s\n"%(str(user))
    Projects = myAPI.getProjectByUser()
    Found = False
    for p in Projects:
        if p.Name == args.project:
            print >>sys.stderr, "  Find project %s with ID: %s. "%(p.Name, p.Id)
            Project = p
            Found = True
            break
    if not Found: 
        print >>sys.stderr, "  Could not find project %s, from user %s, please check your token." %(args.project,str(user))
        sys.exit(0)

    Samples=Project.getSamples(myAPI)
    print >> sys.stderr, "Samples for this project: " + str(Samples)
    for s in Samples:
        print >>sys.stderr,"  Downloading files in sample " + str(s)
        subfolder=folder+"/"+str(s)
        if not os.path.exists(subfolder):
            os.makedirs(subfolder)
        for f in s.getFiles(myAPI):
            print >> sys.stderr,"    "+str(f)
            f.downloadFile(myAPI,subfolder)
Exemple #3
0
def download_Project(project_Name, output_folder):
    
    # initialize an authentication object using the key and secret from your app
    # Fill in with your own values

    '''
    client_key                 = <my key>
    client_secret              = <my secret>
    AppSessionId               = <my appSession id>
    BaseSpaceUrl               = 'https://api.basespace.illumina.com/'
    version                    = 'v1pre3'
    accessToken                = <my acceseToken>
    '''
   
    myAPI = BaseSpaceAPI(client_key, client_secret, BaseSpaceUrl, version, AppSessionId,AccessToken=accessToken)
    # Retrieve current user
    user = myAPI.getUserById('current')
    user=str(user)
    id_name=user.split(':')
    #print id_name[0]
    
    # Retrieve all the project associated to that user
    projects=myAPI.getProjectByUser(id_name[0], queryPars=QueryParameters( {'Limit': '100'}))
    project_found=0
    for project in projects:
        project=str(project)
        nameProject_id=project.split('-')
        if str(project_Name) in str(nameProject_id):
            project_found=1
            id_project=nameProject_id[1].split('=')
            id_project=id_project[1]
            samples=myAPI.getSamplesByProject(id_project,  queryPars=QueryParameters( {'Limit': '100'}))
            print "There are "+str(len(samples))+" samples in the requested project ("+str(project_Name)+" - ID_PROJECT "+str(id_project)+")"
            
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            print time.ctime()+" START DOWNLOADING"
            for file in samples:
                file_out=file.getFiles(myAPI)
                #print file_out
                
                for fastq in file_out:
                    fastq.downloadFile(myAPI,output_folder)
                    print time.ctime()+" FILE "+str(fastq)+" DOWNLOADED"
                    path_file=join(output_folder,str(fastq))
                    path_S3=join(str(project_Name),str(fastq))
                    s3_upload(path_file,"bmi-ngs",path_S3)
            print time.ctime()+" DOWNLOAD COMPLETED"
    if project_found==0:
        print "Project Not Found"
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None,
                             project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None,
                             dry_run=False, output_directory=None, recreate_basespace_dir_tree=True):
    # Check input parameters / load from config file / defaults
    if not project_id_list: project_id_list = []
    if not project_name_list: project_name_list = []
    if not sample_id_list: sample_id_list = []
    if not sample_name_list: sample_name_list = []
    if not output_directory:
        output_directory = os.getcwd()
        print_stderr("Output directory not specified; using current directory ({})".format(output_directory))
    else:
        output_directory = os.path.abspath(output_directory)
    if not dry_run:
        safe_makedir(output_directory)
    config_dict = {}
    if config_file_path:
        config_parser = ConfigParser()
        config_parser.read(config_file_path)
        config_dict = config_parser._defaults
        if not client_key: client_key = config_dict.get('clientkey')
        if not client_secret: client_secret = config_dict.get('clientsecret')
        if not access_token: access_token = config_dict.get('accesstoken')
    if not (client_key and client_secret and access_token):
        missing_params = []
        if not client_key: missing_params.append("client_key")
        if not client_secret: missing_params.append("client_secret")
        if not access_token: missing_params.append("access_token")
        print_stderr('Error: Required parameters not supplied either in config '
                     'file ({}) or via arguments.'.format(config_file_path,
                                                          ', '.join(missing_params)))
        sys.exit(1)
    app_session_id = config_dict.get("appsessionid") or ""
    api_server = config_dict.get("apiserver") or "https://api.basespace.illumina.com"
    api_version = config_dict.get("apiversion") or "v1pre3"
    # Get the API connection object
    myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret,
                         apiServer=api_server, version=api_version,
                         appSessionId=app_session_id, AccessToken=access_token)
    basespace_projects = myAPI.getProjectByUser(qp({'Limit' : 1024}))
    user = myAPI.getUserById('current')
    # If user specified projects, get them by name or id
    project_objects = []
    if project_name_list:
        project_objects.extend(_select_from_object(filter_list=project_name_list,
                                                   search_list=basespace_projects,
                                                   key_attr="Name",
                                                   obj_type="project",
                                                   user=user))
    if project_id_list:
        digit_pattern = re.compile(r'^\d+$')
        project_filtered_id_list = []
        for project_id in project_id_list:
            if not digit_pattern.match(project_id):
                print_stderr('Error: Invalid format for user-specified project id '
                             '"{}": project ids are strictly numeric. Did you mean '
                             'to pass this as a project name?'.format(project_id))
            else:
                project_filtered_id_list.append(project_id)
        project_objects.extend(_select_from_object(filter_list=project_filtered_id_list,
                                                   search_list=basespace_projects,
                                                   key_attr="Id",
                                                   obj_type="project",
                                                   user=user))
    if not (project_name_list or project_id_list):
        # Get all projects if none are specified by user
        project_objects = basespace_projects

    basespace_samples = []
    for project_obj in project_objects:
        basespace_samples.extend(project_obj.getSamples(myAPI))
    sample_objects = []
    if sample_name_list:
        sample_objects.extend(_select_from_object(filter_list=sample_name_list,
                                                  search_list=basespace_samples,
                                                  key_attr="Name",
                                                  obj_type="sample",
                                                  user=user))
    if sample_id_list:
        digit_pattern = re.compile(r'^\d+$')
        sample_filtered_id_list = []
        for sample_id in sample_id_list:
            if not digit_pattern.match(sample_id):
                print_stderr('Error: Invalid format for user-specified sample id '
                             '"{}": sample ids are strictly numeric. Did you mean '
                             'to pass this as a sample name?'.format(sample_id))
            else:
                sample_filtered_id_list.append(sample_id)
        sample_objects.extend(_select_from_object(filter_list=sample_filtered_id_list,
                                                  search_list=basespace_samples,
                                                  key_attr="Id",
                                                  obj_type="sample",
                                                  user=user))
    if not (sample_name_list or sample_id_list):
        # Get all samples if none are specified by user
        sample_objects = basespace_samples

    files_to_download = []
    for sample_obj in sample_objects:
        files_to_download.extend(sample_obj.getFiles(myAPI))

    if files_to_download:
        print_stderr("Found {} files to download: ".format(len(files_to_download)))
        for file_obj in files_to_download:
            print_stderr("\t- {}".format(file_obj))
        print_stderr('Downloading files to output directory {}'.format(output_directory))
        if recreate_basespace_dir_tree:
            print_stderr("Recreating BaseSpace project directory tree for file.")
        if dry_run:
            print_stderr("-> Dry run: not downloading any data.")
        for i, file_obj in enumerate(files_to_download):
            print_stderr('[{}/{}] Downloading file "{}"'.format(i+1, len(files_to_download),
                                                                file_obj))
            if not dry_run:
                file_obj.downloadFile(api=myAPI, localDir=output_directory,
                                      createBsDir=recreate_basespace_dir_tree)
        print_stderr('Download completed; files are located in "{}"'.format(output_directory))
    else:
        print_stderr("Error: no files found to download.")
# First, let's grab the genome with id=4
myGenome    = myAPI.getGenomeById('4')
print "\nThe Genome is " + str(myGenome)
print "We can get more information from the genome object"
print 'Id: ' + myGenome.Id
print 'Href: ' + myGenome.Href
print 'DisplayName: ' + myGenome.DisplayName

# Get a list of all genomes
allGenomes  = myAPI.getAvailableGenomes()
print "\nGenomes \n" + str(allGenomes)

# Let's have a look at the current user
user        = myAPI.getUserById('current')
print "\nThe current user is \n" + str(user)

# Now list the projects for this user
myProjects   = myAPI.getProjectByUser('current')
print "\nThe projects for this user are \n" + str(myProjects)

# We can also achieve this by making a call using the 'user instance'
myProjects2 = user.getProjects(myAPI)
print "\nProjects retrieved from the user instance \n" + str(myProjects2)

# List the runs available for the current user
runs = user.getRuns(myAPI)
print "\nThe runs for this user are \n" + str(runs)

# In the same manner we can get a list of accessible user runs
runs = user.getRuns(myAPI)
print "\nRuns retrieved from user instance \n" + str(runs)
import sys, os, glob, logging
from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
from BaseSpacePy.model.QueryParameters import QueryParameters as qp

listOptions = qp({'Limit': 1024})

logging.basicConfig(level=logging.INFO,
                    format='[%(asctime)s] %(message)s',
                    datefmt='%d/%m/%Y %H:%M:%S')
myAPI = BaseSpaceAPI()

p = sys.argv[1]
user = myAPI.getUserById('current')
logging.info("User Name: %s" % str(user))
projects = myAPI.getProjectByUser(listOptions)
project_list = [project.Name for project in projects]

try:
    idx = project_list.index(sys.argv[1])
    project = projects[idx]
except ValueError:
    message = '"%s" is not in your projects. Available projects are:\n%s' % \
              (sys.argv[1],
               '\n'.join(project_list))
    logging.error(message)
    sys.exit(1)

downloaded = glob.glob('*fastq.gz')  # get already downloaded fastq

logging.info("Retrieving samples from project %s" % sys.argv[1])
samples = project.getSamples(myAPI, listOptions)
from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI
from BaseSpacePy.model.QueryParameters import QueryParameters as qp

list_options = qp({'Limit': 1024})

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] %(message)s',
    datefmt='%d/%m/%Y %H:%M:%S',
)
bs = BaseSpaceAPI()

user = bs.getUserById('current')
logging.info("User Name: %s", user)
projects = bs.getProjectByUser(list_options)
project_list = [project.Name for project in projects]

cli = ArgumentParser()
cli.add_argument(
    'project',
    nargs='?',
    help=
    'Which project to download files from. When not specified, list projects instead.'
)
cli.add_argument(
    '--dry-run',
    '-n',
    action='store_true',
    help='Only show which files would be downloaded without downloading them.')
cli.add_argument('--dir',
myGenome = myAPI.getGenomeById("4")
print "\nThe Genome is " + str(myGenome)
print "We can get more information from the genome object"
print "Id: " + myGenome.Id
print "Href: " + myGenome.Href
print "DisplayName: " + myGenome.DisplayName

# Get a list of all genomes
allGenomes = myAPI.getAvailableGenomes()
print "\nGenomes \n" + str(allGenomes)

# Let's have a look at the current user
user = myAPI.getUserById("current")
print "\nThe current user is \n" + str(user)

# Now list the projects for this user
myProjects = myAPI.getProjectByUser()
print "\nThe projects for this user are \n" + str(myProjects)

# We can also achieve this by making a call using the 'user instance'
myProjects2 = user.getProjects(myAPI)
print "\nProjects retrieved from the user instance \n" + str(myProjects2)

# List the runs available for the current user
runs = user.getRuns(myAPI)
print "\nThe runs for this user are \n" + str(runs)

# In the same manner we can get a list of accessible user runs
runs = user.getRuns(myAPI)
print "\nRuns retrieved from user instance \n" + str(runs)
Exemple #9
0
def download_basespace_files(config_file_path=None,
                             client_key=None,
                             client_secret=None,
                             access_token=None,
                             project_id_list=None,
                             project_name_list=None,
                             sample_id_list=None,
                             sample_name_list=None,
                             dry_run=False,
                             output_directory=None,
                             recreate_basespace_dir_tree=True):
    # Check input parameters / load from config file / defaults
    if not project_id_list: project_id_list = []
    if not project_name_list: project_name_list = []
    if not sample_id_list: sample_id_list = []
    if not sample_name_list: sample_name_list = []
    if not output_directory:
        output_directory = os.getcwd()
        print_stderr(
            "Output directory not specified; using current directory ({})".
            format(output_directory))
    else:
        output_directory = os.path.abspath(output_directory)
    if not dry_run:
        safe_makedir(output_directory)
    config_dict = {}
    if config_file_path:
        config_parser = ConfigParser()
        config_parser.read(config_file_path)
        config_dict = config_parser._defaults
        if not client_key: client_key = config_dict.get('clientkey')
        if not client_secret: client_secret = config_dict.get('clientsecret')
        if not access_token: access_token = config_dict.get('accesstoken')
    if not (client_key and client_secret and access_token):
        missing_params = []
        if not client_key: missing_params.append("client_key")
        if not client_secret: missing_params.append("client_secret")
        if not access_token: missing_params.append("access_token")
        print_stderr(
            'Error: Required parameters not supplied either in config '
            'file ({}) or via arguments.'.format(config_file_path,
                                                 ', '.join(missing_params)))
        sys.exit(1)
    app_session_id = config_dict.get("appsessionid") or ""
    api_server = config_dict.get(
        "apiserver") or "https://api.basespace.illumina.com"
    api_version = config_dict.get("apiversion") or "v1pre3"
    # Get the API connection object
    myAPI = BaseSpaceAPI(clientKey=client_key,
                         clientSecret=client_secret,
                         apiServer=api_server,
                         version=api_version,
                         appSessionId=app_session_id,
                         AccessToken=access_token)
    basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024}))
    user = myAPI.getUserById('current')
    # If user specified projects, get them by name or id
    project_objects = []
    if project_name_list:
        project_objects.extend(
            _select_from_object(filter_list=project_name_list,
                                search_list=basespace_projects,
                                key_attr="Name",
                                obj_type="project",
                                user=user))
    if project_id_list:
        digit_pattern = re.compile(r'^\d+$')
        project_filtered_id_list = []
        for project_id in project_id_list:
            if not digit_pattern.match(project_id):
                print_stderr(
                    'Error: Invalid format for user-specified project id '
                    '"{}": project ids are strictly numeric. Did you mean '
                    'to pass this as a project name?'.format(project_id))
            else:
                project_filtered_id_list.append(project_id)
        project_objects.extend(
            _select_from_object(filter_list=project_filtered_id_list,
                                search_list=basespace_projects,
                                key_attr="Id",
                                obj_type="project",
                                user=user))
    if not (project_name_list or project_id_list):
        # Get all projects if none are specified by user
        project_objects = basespace_projects

    basespace_samples = []
    for project_obj in project_objects:
        basespace_samples.extend(project_obj.getSamples(myAPI))
    sample_objects = []
    if sample_name_list:
        sample_objects.extend(
            _select_from_object(filter_list=sample_name_list,
                                search_list=basespace_samples,
                                key_attr="Name",
                                obj_type="sample",
                                user=user))
    if sample_id_list:
        digit_pattern = re.compile(r'^\d+$')
        sample_filtered_id_list = []
        for sample_id in sample_id_list:
            if not digit_pattern.match(sample_id):
                print_stderr(
                    'Error: Invalid format for user-specified sample id '
                    '"{}": sample ids are strictly numeric. Did you mean '
                    'to pass this as a sample name?'.format(sample_id))
            else:
                sample_filtered_id_list.append(sample_id)
        sample_objects.extend(
            _select_from_object(filter_list=sample_filtered_id_list,
                                search_list=basespace_samples,
                                key_attr="Id",
                                obj_type="sample",
                                user=user))
    if not (sample_name_list or sample_id_list):
        # Get all samples if none are specified by user
        sample_objects = basespace_samples

    files_to_download = []
    for sample_obj in sample_objects:
        files_to_download.extend(sample_obj.getFiles(myAPI))

    if files_to_download:
        print_stderr("Found {} files to download: ".format(
            len(files_to_download)))
        for file_obj in files_to_download:
            print_stderr("\t- {}".format(file_obj))
        print_stderr('Downloading files to output directory {}'.format(
            output_directory))
        if recreate_basespace_dir_tree:
            print_stderr(
                "Recreating BaseSpace project directory tree for file.")
        if dry_run:
            print_stderr("-> Dry run: not downloading any data.")
        for i, file_obj in enumerate(files_to_download):
            print_stderr('[{}/{}] Downloading file "{}"'.format(
                i + 1, len(files_to_download), file_obj))
            if not dry_run:
                file_obj.downloadFile(api=myAPI,
                                      localDir=output_directory,
                                      createBsDir=recreate_basespace_dir_tree)
        print_stderr('Download completed; files are located in "{}"'.format(
            output_directory))
    else:
        print_stderr("Error: no files found to download.")
    def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True):
        '''
        Downloads sample-level files.

        Project Id and project name should
        not be specified together; similarly sample Id and sample name should not be
        specified together.

        1. If only a project Id or only a project name is given, all files for all
        samples will be downloaded within that project.  If additionally a sample Id or
        sample name is given, then only the first matching sample within the project
        will be downloaded.
        2. If only a sample Id is given, then all files for that sample will be downloaded.
        3. If only a sample name is given, then all files within the first project
        containing a sample with matching name will be downloaded.
                
        :param clientKey the Illumina developer app client key
        :param clientSecret the Illumina developer app client secret
        :param accessToken the Illumina developer app access token
        :param sampleId the BaseSpace sample identifier
        :param projectId the BaseSpace project identifier
        :param sampleName the BaseSpace sample name
        :param projectName the BaseSpace project name
        :param outputDirectory the root output directory
        :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
        '''
        appSessionId = ''
        apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/'
        apiVersion = 'v1pre3'
        projectLimit = 100         
        sampleLimit = 1024         
        sampleFileLimit = 1024 

        # init the API
        if None != clientKey:
            myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken)
        else:
            myAPI = BaseSpaceAPI(profile='DEFAULT')

        # get the current user
        user = myAPI.getUserById('current')

        sampleToFiles = {}
        if None != projectId:
            sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
        else:
            myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit}))
            for project in myProjects:
                projectId = project.Id
                if None != projectName and project.Name != projectName:
                    continue
                sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
                if 0 < len(sampleToFiles):
                    break
        numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles])
        print("will download files from %d ." % numFiles)
        i = 0
        for sampleId in sampleToFiles:
            for sampleFile in sampleToFiles[sampleId]:
                print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile)))
                print("BaseSpace File Path: %s" % sampleFile.Path)
                print("Sample Id: %s" % sampleId)
                if not options.dryRun:
                    if createBsDir:
                        sampleOutputDirectory = os.path.join(outputDirectory, sampleId)
                    else:
                        sampleOutputDirectory = outputDirectory
                    sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir)
                i = i + 1
        print("FASTQ file downloading complete.")
Exemple #11
0
# If you're not using a config file, fill in you app's credentials here:
clientKey = ""
clientSecret = ""
appSessionId = ""
apiServer = 'https://api.basespace.illumina.com/'  # or 'https://api.cloud-hoth.illumina.com/'
apiVersion = 'v1pre3'

# First we will initialize a BaseSpace API object using our app information and the appSessionId
if clientKey:
    myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion,
                         appSessionId)
else:
    myAPI = BaseSpaceAPI(profile='DEFAULT')

user = myAPI.getUserById('current')
myProjects = myAPI.getProjectByUser('current')

# Let's list all the AppResults and samples for these projects
for singleProject in myProjects:
    print "# " + str(singleProject)
    appResults = singleProject.getAppResults(myAPI)
    print "    The App results for project " + str(
        singleProject) + " are \n\t" + str(appResults)
    samples = singleProject.getSamples(myAPI)
    print "    The samples for project " + str(
        singleProject) + " are \n\t" + str(samples)
#
## we'll take a further look at the files belonging to the sample and
##analyses from the last project in the loop above
for a in appResults:
    print "# " + a.Id
def main(uargs):
    
    uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config']))
    conf = ConfigObj(uargs['--config'], configspec=get_configspec())    

    try:
        conf_args = conf[uargs['--profile']]
    except KeyError:
        msg = 'Profile "{}" not found in config file'
        raise KeyError, msg.format(uargs['--profile'])
    
 
    # selecting config args
    keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken']
    conf_args = [conf_args[x] for x in keys]
    myAPI = BaseSpaceAPI(*conf_args)    

    # setting user
    if uargs['--user'] is not None:
        user = myAPI.getUserById(uargs['--user'])
    else:
        user = myAPI.getUserById('current')

    # user projects
    projects = myAPI.getProjectByUser()
    print "## The projects for this user are:" 
    project_headers = ['ProjectID','Name','UserOwnedBy','DateCreated']
    sample_headers = ['SampleID','NumReadsRaw','NumReadsPF','IsPairedEnd','Status']
    print '\t'.join(project_headers + sample_headers)
    for project in projects:
        project_data = [project.Id,
                         project.Name,
                         project.UserOwnedBy,
                         project.DateCreated]

        samples = myAPI.getSamplesByProject(project.Id)
        for sample in samples:
            sample_data = [sample.Id,
                           sample.NumReadsRaw,
                           sample.NumReadsPF,
                           sample.IsPairedEnd,
                           sample.Status]
            print '\t'.join([str(x) for x in project_data + sample_data])
    
            #files = myAPI.getSampleFilesById(sample.Id)
            #for f in files:
                #fo = myAPI.getFileById(f.Id)
            #    myAPI.fileDownload(f.Id, '.')
                

    # user runs
    print "## The runs for this user are:"
    print '\t'.join(['RunID','ExperimentName','UserOwnedBy',
                     'DateCreated', 'Status'])
    runs = user.getRuns(myAPI)
    for run in runs:
        data = [run.Id,
                run.ExperimentName,
                run.UserOwnedBy,
                run.DateCreated,
                run.Status]
        print '\t'.join([str(x) for x in data])
Exemple #13
0
myGenome    = myAPI.getGenomeById('4')
print("\nThe Genome is " + str(myGenome))
print("We can get more information from the genome object")
print('Id: ' + myGenome.Id)
print('Href: ' + myGenome.Href)
print('DisplayName: ' + myGenome.DisplayName)

# Get a list of all genomes
allGenomes  = myAPI.getAvailableGenomes()
print("\nGenomes \n" + str(allGenomes))

# Let's have a look at the current user
user        = myAPI.getUserById('current')
print("\nThe current user is \n" + str(user))

# Now list the projects for this user
myProjects   = myAPI.getProjectByUser()
print("\nThe projects for this user are \n" + str(myProjects))

# We can also achieve this by making a call using the 'user instance'
myProjects2 = user.getProjects(myAPI)
print("\nProjects retrieved from the user instance \n" + str(myProjects2))

# List the runs available for the current user
runs = user.getRuns(myAPI)
print("\nThe runs for this user are \n" + str(runs))

# In the same manner we can get a list of accessible user runs
runs = user.getRuns(myAPI)
print("\nRuns retrieved from user instance \n" + str(runs))
    def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True):
        '''
        Downloads sample-level files.

        Project Id and project name should
        not be specified together; similarly sample Id and sample name should not be
        specified together.

        1. If only a project Id or only a project name is given, all files for all
        samples will be downloaded within that project.  If additionally a sample Id or
        sample name is given, then only the first matching sample within the project
        will be downloaded.
        2. If only a sample Id is given, then all files for that sample will be downloaded.
        3. If only a sample name is given, then all files within the first project
        containing a sample with matching name will be downloaded.
                
        :param clientKey the Illumina developer app client key
        :param clientSecret the Illumina developer app client secret
        :param accessToken the Illumina developer app access token
        :param sampleId the BaseSpace sample identifier
        :param projectId the BaseSpace project identifier
        :param sampleName the BaseSpace sample name
        :param projectName the BaseSpace project name
        :param outputDirectory the root output directory
        :param createBsDir true to recreate the path structure within BaseSpace, false otherwise
        '''
        appSessionId = ''
        apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/'
        apiVersion = 'v1pre3'
        projectLimit = 1024
        sampleLimit = 1024         
        sampleFileLimit = 1024 

        # init the API
        if None != clientKey:
            myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken)
        else:
            myAPI = BaseSpaceAPI(profile='DEFAULT')

        # get the current user
        user = myAPI.getUserById('current')

        sampleToFiles = {}
        if None != projectId:
            sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
        else:
            offset = 0
            while True:
                myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit, 'Offset' : offset}))
                if len(myProjects) == 0:
                    break
                for project in myProjects:
                    projectId = project.Id
                    sys.stderr.write("project.Name: " + str(project.Name)  + " projectName: " + str(projectName) + '\n')
                    if None != projectName and project.Name != projectName:
                        continue
                    sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit)
                    if 0 < len(sampleToFiles):
                        break
                if 0 < len(sampleToFiles):
                    break
                offset += projectLimit
        numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles])
        print "Will download files from %d ." % numFiles
        i = 0
        for sampleId in sampleToFiles:
            for sampleFile in sampleToFiles[sampleId]:
                print 'Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile))
                print "BaseSpace File Path: %s" % sampleFile.Path
                print "Sample Id: %s" % sampleId
                if not options.dryRun:
                    if createBsDir:
                        sampleOutputDirectory = os.path.join(outputDirectory, sampleId)
                    else:
                        sampleOutputDirectory = outputDirectory
                    sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir)
                i = i + 1
        print "Download complete."
Exemple #15
0
class BaseSpace(object):
    def __init__(self,
                 project_id=None,
                 project_name=None,
                 get_all_projects=False):
        super(BaseSpace, self).__init__()
        # BaseSpace credentials
        creds = self._get_credentials()
        self.client_key = creds['client_id']
        self.client_secret = creds['client_secret']
        self.access_token = creds['access_token']
        self.version = creds['version']
        self.api_server = creds['api_server']
        self.api = BaseSpaceAPI(self.client_key,
                                self.client_secret,
                                self.api_server,
                                self.version,
                                AccessToken=self.access_token)
        self.params = qp(pars={'Limit': 1024, 'SortDir': 'Desc'})
        if project_id is not None:
            self.project_id = project_id
            self.project_name = None
        elif project_name is not None:
            self.project_name = project_name
            self.project_id = self._get_project_id_from_name(project_name)
        else:
            self.project_id = None
            self.project_name = None
            # self.project_id, self.project_name = self._user_selected_project_id()
        self._runs = None

    @property
    def runs(self):
        if self._runs is None:
            self._runs = self.api.getAccessibleRunsByUser(
                queryPars=self.params)
        return self._runs

    def _get_credentials(self):
        # BaseSpace credentials file should be in JSON format
        cred_file = os.path.expanduser('~/.abstar/basespace_credentials')
        cred_handle = open(cred_file, 'r')
        return json.load(cred_handle)

    def _get_project_id_from_name(self):
        projects = self.api.getProjectByUser(queryPars=self.params)
        for project in projects:
            name = project.Name.encode('ascii', 'ignore')
            if name == self.project_name:
                return project.Id
        print('No projects matched the given project name ({})'.format(name))
        sys.exit(1)

    def _user_selected_project_id(self):
        projects = self.api.getProjectByUser(queryPars=self.params)
        self.print_basespace_project()
        offset = 0
        while True:
            for i, project in enumerate(projects[offset * 25:(offset * 25) +
                                                 25]):
                project_name = project.Name.encode('ascii', 'ignore')
                print('[ {} ] {}'.format(i + (offset * 25), project_name))
            print('')
            project_index = raw_input(
                "Select the project number (or 'next' to see more projects): ")
            try:
                project_index = int(project_index)
                return projects[project_index].Id, projects[
                    project_index].Name.encode('ascii', 'ignore')
            except:
                offset += 1
        return projects[project_index].Id, projects[project_index].Name.encode(
            'ascii', 'ignore')

    def _get_projects(self, start=0):
        projects = self.api.getProjectByUser(queryPars=self.params)
        self.print_basespace_project()
        for i, project in enumerate(projects[:25]):
            project_name = project.Name.encode('ascii', 'ignore')
            print('[ {} ] {}'.format(i, project_name))
        print('')
        return projects

    def _get_samples(self, project_id):
        samples = []
        offset = 0
        while True:
            query_params = qp(pars={
                'Limit': 1024,
                'SortDir': 'Asc',
                'Offset': offset * 1024
            })
            s = self.api.getSamplesByProject(self.project_id,
                                             queryPars=query_params)
            if not s:
                break
            samples.extend(s)
            offset += 1
        return samples

    def _get_files(self):
        files = []
        samples = self._get_samples(self.project_id)
        for sample in samples:
            files.extend(
                self.api.getFilesBySample(sample.Id, queryPars=self.params))
        return files

    def download(self, direc):
        if all([self.project_id is None, self.project_name is None]):
            self.project_id, self.project_name = self._user_selected_project_id(
            )
        files = self._get_files()
        self.print_download_info(files)
        start = time.time()
        for i, f in enumerate(files):
            # self.log.write('[ {} ] {}\n'.format(i, str(f)))
            logger.info('[ {} ] {}'.format(i, str(f)))
            f.downloadFile(self.api, direc)
        end = time.time()
        self.print_completed_download_info(start, end)
        return len(files)

    def print_basespace_project(self):
        print('')
        print('')
        print('========================================')
        print('BaseSpace Project Selection')
        print('========================================')
        print('')

    def print_download_info(self, files):
        logger.info('')
        logger.info('')
        logger.info('========================================')
        logger.info('Downloading files from BaseSpace')
        logger.info('========================================')
        logger.info('')
        logger.info('Identified {0} files for download.'.format(len(files)))
        logger.info('')

    def print_completed_download_info(self, start, end):
        logger.info('')
        logger.info('Download completed in {0} seconds'.format(end - start))