Example #1
0
    def upload_part(self,
                    data,
                    index=None,
                    display_progress=False,
                    report_progress_fn=None,
                    **kwargs):
        """
        :param data: Data to be uploaded in this part
        :type data: str or mmap object
        :param index: Index of part to be uploaded; must be in [1, 10000]
        :type index: integer
        :param display_progress: Whether to print "." to stderr when done
        :type display_progress: boolean
        :param report_progress_fn: Optional: a function to call that takes in two arguments (self, # bytes transmitted)
        :type report_progress_fn: function or None
        :raises: :exc:`dxpy.exceptions.DXFileError` if *index* is given and is not in the correct range, :exc:`requests.exceptions.HTTPError` if upload fails

        Uploads the data in *data* as part number *index* for the
        associated file. If no value for *index* is given, *index*
        defaults to 1. This probably only makes sense if this is the
        only part to be uploaded.
        """
        req_input = {}
        if index is not None:
            req_input["index"] = int(index)

        md5 = hashlib.md5()
        if hasattr(data, 'seek') and hasattr(data, 'tell'):
            # data is a buffer; record initial position (so we can rewind back)
            rewind_input_buffer_offset = data.tell()
            while True:
                bytes_read = data.read(MD5_READ_CHUNK_SIZE)
                if bytes_read:
                    md5.update(bytes_read)
                else:
                    break
            # rewind the buffer to original position
            data.seek(rewind_input_buffer_offset)
        else:
            md5.update(data)

        req_input["md5"] = md5.hexdigest()
        req_input["size"] = len(data)

        def get_upload_url_and_headers():
            # This function is called from within a retry loop, so to avoid amplifying the number of retries
            # geometrically, we decrease the allowed number of retries for the nested API call every time.
            if 'max_retries' not in kwargs:
                kwargs['max_retries'] = dxpy.DEFAULT_RETRIES
            elif kwargs['max_retries'] > 0:
                kwargs['max_retries'] -= 1

            if "timeout" not in kwargs:
                kwargs["timeout"] = FILE_REQUEST_TIMEOUT

            resp = dxpy.api.file_upload(self._dxid, req_input, **kwargs)
            url = resp["url"]
            return url, _validate_headers(resp.get("headers", {}))

        # The file upload API requires us to get a pre-authenticated upload URL (and headers for it) every time we
        # attempt an upload. Because DXHTTPRequest will retry requests under retryable conditions, we give it a callback
        # to ask us for a new upload URL every time it attempts a request (instead of giving them directly).
        dxpy.DXHTTPRequest(get_upload_url_and_headers,
                           data,
                           jsonify_data=False,
                           prepend_srv=False,
                           always_retry=True,
                           timeout=FILE_REQUEST_TIMEOUT,
                           auth=None,
                           method='PUT')

        self._num_uploaded_parts += 1

        if display_progress:
            warn(".")

        if report_progress_fn is not None:
            report_progress_fn(self, len(data))
 def tearDownClass(cls):
     dxpy.api.project_destroy(cls.project_id)
     for entity_id in cls.ids_to_destroy:
         dxpy.DXHTTPRequest("/" + entity_id + "/destroy", {})
Example #3
0
def cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj):
    ''' Copy the source to a destination that does not currently
    exist. This involves creating the target file/folder.
    '''
    # Destination folder path is new => renaming
    if len(args.sources) != 1:
        # Can't copy and rename more than one object
        raise DXCLIError('The destination folder does not exist')
    last_slash_pos = get_last_pos_of_char('/', dest_path)
    if last_slash_pos == 0:
        dest_folder = '/'
    else:
        dest_folder = dest_path[:last_slash_pos]
    dest_name = dest_path[last_slash_pos + 1:].replace('\/', '/')
    try:
        dx_dest.list_folder(folder=dest_folder, only='folders')
    except dxpy.DXAPIError as details:
        if details.code == requests.codes['not_found']:
            raise DXCLIError('The destination folder does not exist')
        else:
            raise
    except:
        err_exit()

    # Clone and rename either the data object or the folder.
    # src_result is None if it could not be resolved to an object.
    src_proj, src_path, src_results = try_call(resolve_existing_path,
                                               args.sources[0],
                                               allow_mult=True, all_mult=args.all)

    if src_proj == dest_proj:
        if is_hashid(args.sources[0]):
            # This is the only case in which the source project is
            # purely assumed, so give a better error message.
            raise DXCLIError(fill('Error: You must specify a source project for ' + args.sources[0]))
        else:
            raise DXCLIError(fill('A source path and the destination path resolved to the ' +
                                'same project or container.  Please specify different source ' +
                                'and destination containers, e.g.') +
                             '\n  dx cp source-project:source-id-or-path dest-project:dest-path')

    if src_results is None:
        try:
            contents = dxpy.api.project_list_folder(src_proj,
                                                    {"folder": src_path, "includeHidden": True})
            dxpy.api.project_new_folder(dest_proj, {"folder": dest_path})
            exists = dxpy.api.project_clone(src_proj,
                                            {"folders": contents['folders'],
                                             "objects": [result['id'] for result in contents['objects']],
                                             "project": dest_proj,
                                             "destination": dest_path})['exists']
            if len(exists) > 0:
                print(fill('The following objects already existed in the destination ' +
                           'container and were not copied:') + '\n ' + '\n '.join(exists))
                return
        except:
            err_exit()
    else:
        try:
            exists = dxpy.api.project_clone(src_proj,
                                            {"objects": [result['id'] for result in src_results],
                                             "project": dest_proj,
                                             "destination": dest_folder})['exists']
            if len(exists) > 0:
                print(fill('The following objects already existed in the destination ' +
                           'container and were not copied:') + '\n ' + '\n '.join(exists))
            for result in src_results:
                if result['id'] not in exists:
                    dxpy.DXHTTPRequest('/' + result['id'] + '/rename',
                                       {"project": dest_proj,
                                        "name": dest_name})
            return
        except:
            err_exit()
Example #4
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.DXHTTPRequest(
                    '/' + dxpy.WORKSPACE_ID + '/describe', {})['name']
            except:
                pass
            if proj_name is not None:
                print 'Your current working directory is ' + proj_name + ':' + os.environ.get(
                    'DX_CLI_WD', '/')
        while True:
            print 'Pick an option to find input data:'
            try:
                opt_num = pick([
                    'List and choose from available data in the current project',
                    'List and choose from available data in the DNAnexus Reference Genomes project',
                    'Select another project to list and choose available data',
                    'Select an output from a previously-run job (current project only)',
                    'Return to original prompt (specify an ID or path directly)'
                ])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                query_project = dxpy.find_one_project(name="Reference Genomes",
                                                      public=True,
                                                      level="VIEW")['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW',
                                                       describe=True,
                                                       explicit_perms=True)
                print '\nProjects to choose from:'
                query_project = paginate_and_pick(
                    project_generator,
                    (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(
                    classname=in_class,
                    typename=param_desc.get('type'),
                    describe=True,
                    project=query_project)
                print '\nAvailable data:'
                result_choice = paginate_and_pick(
                    result_generator,
                    (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print 'No compatible data found'
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [
                        result_choice['project'] + ':' + result_choice['id']
                    ]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print
                print 'Previously-run jobs to choose from:'
                result_choice = paginate_and_pick(
                    result_generator,
                    (lambda result: get_find_jobs_string(result['describe'],
                                                         has_children=False,
                                                         single_result=True)),
                    filter_fn=(
                        lambda result: result['describe']['state'] not in [
                            'unresponsive', 'terminating', 'terminated',
                            'failed'
                        ]))
                if result_choice == 'none found':
                    print 'No jobs found'
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice[
                            'describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(
                            result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print 'No output spec found for the executable'
                            try:
                                field = raw_input(
                                    'Output field to use (^C or <ENTER> to cancel): '
                                )
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print '\nOutput fields to choose from:'
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print 'Using the only output field: ' + keys[0]
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print 'No available output fields'
            else:
                print fill('Enter an ID or path (<TAB> twice for compatible ' +
                           in_class + 's in current directory)' +
                           (array_help_str if is_array else ''))
                return shlex.split(raw_input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print fill('Enter "true", "false"' + array_help_str)
            else:
                print fill('Enter "true" or "false"')
        elif in_class == 'string' and is_array:
            print fill('Enter a nonempty string' + array_help_str)
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print fill('Enter a number' + array_help_str)
        elif in_class == 'hash':
            print fill('Enter a quoted JSON hash')
        result = raw_input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
Example #5
0
def resolve_existing_path(path,
                          expected=None,
                          ask_to_resolve=True,
                          expected_classes=None,
                          allow_mult=False,
                          describe={},
                          all_mult=False,
                          allow_empty_string=True):
    '''
    :param ask_to_resolve: Whether picking may be necessary (if true, a list is returned; if false, only one result is returned)
    :type ask_to_resolve: boolean
    :param allow_mult: Whether to allow the user to select multiple results from the same path
    :type allow_mult: boolean
    :param describe: Input hash to describe call for the results
    :type describe: dict
    :param all_mult: Whether to return all matching results without prompting (only applicable if allow_mult == True)
    :type all_mult: boolean
    :returns: A LIST of results when ask_to_resolve is False or allow_mult is True
    :raises: :exc:`ResolutionError` if the request path was invalid, or a single result was requested and input is not a TTY
    :param allow_empty_string: If false, a ResolutionError will be raised if *path* is an empty string. Use this when resolving the empty string could result in unexpected behavior.
    :type allow_empty_string: boolean

    Returns either a list of results or a single result (depending on
    how many is expected; if only one, then an interactive picking of
    a choice will be initiated if input is a tty, or else throw an error).

    TODO: Always treats the path as a glob pattern.

    Output is of the form {"id": id, "describe": describe hash} a list
    of those

    TODO: Allow arbitrary flags for the describe hash.

    NOTE: if expected_classes is provided and conflicts with the class
    of the hash ID, it will return None for all fields.
    '''

    project, folderpath, entity_name = resolve_path(
        path, expected, allow_empty_string=allow_empty_string)

    if entity_name is None:
        # Definitely a folder (or project)
        # FIXME? Should I check that the folder exists if expected="folder"?
        return project, folderpath, entity_name
    elif is_hashid(entity_name):
        found_valid_class = True
        if expected_classes is not None:
            found_valid_class = False
            for klass in expected_classes:
                if entity_name.startswith(klass):
                    found_valid_class = True
        if not found_valid_class:
            return None, None, None

        if 'project' not in describe:
            if project != dxpy.WORKSPACE_ID:
                describe['project'] = project
            elif dxpy.WORKSPACE_ID is not None:
                describe['project'] = dxpy.WORKSPACE_ID
        try:
            desc = dxpy.DXHTTPRequest('/' + entity_name + '/describe',
                                      describe)
        except Exception as details:
            if 'project' in describe:
                # Now try it without the hint
                del describe['project']
                try:
                    desc = dxpy.DXHTTPRequest('/' + entity_name + '/describe',
                                              describe)
                except Exception as details:
                    raise ResolutionError(str(details))
            else:
                raise ResolutionError(str(details))
        result = {"id": entity_name, "describe": desc}
        if ask_to_resolve and not allow_mult:
            return project, folderpath, result
        else:
            return project, folderpath, [result]
    elif project is None:
        raise ResolutionError(
            'Could not resolve \"' + path +
            '\" to a project context.  Please either set a default project using dx select or cd, or add a colon (":") after your project ID or name'
        )
    else:
        msg = 'Object of name ' + unicode(
            entity_name) + ' could not be resolved in folder ' + unicode(
                folderpath) + ' of project ID ' + str(project)
        # Probably an object
        if is_job_id(project):
            # The following will raise if no results could be found
            results = resolve_job_ref(project, entity_name, describe=describe)
        else:
            try:
                results = list(
                    dxpy.find_data_objects(project=project,
                                           folder=folderpath,
                                           name=entity_name,
                                           name_mode='glob',
                                           recurse=False,
                                           describe=describe,
                                           visibility='either'))
            except BaseException as details:
                raise ResolutionError(str(details))
        if len(results) == 0:
            # Could not find it as a data object.  If anything, it's a
            # folder.

            if '/' in entity_name:
                # Then there's no way it's supposed to be a folder
                raise ResolutionError(msg)

            # This is the only possibility left.  Leave the
            # error-checking for later.  Note that folderpath does
            possible_folder = folderpath + '/' + entity_name
            possible_folder, skip = clean_folder_path(possible_folder,
                                                      'folder')
            return project, possible_folder, None

        # Caller wants ALL results; just return the whole thing
        if not ask_to_resolve:
            return project, None, results

        if len(results) > 1:
            if allow_mult and (all_mult or is_glob_pattern(entity_name)):
                return project, None, results
            if sys.stdout.isatty():
                print 'The given path \"' + path + '\" resolves to the following data objects:'
                choice = pick(map(
                    lambda result: get_ls_l_desc(result['describe']), results),
                              allow_mult=allow_mult)
                if allow_mult and choice == '*':
                    return project, None, results
                else:
                    return project, None, ([results[choice]]
                                           if allow_mult else results[choice])
            else:
                raise ResolutionError('The given path \"' + path +
                                      '\" resolves to ' + str(len(results)) +
                                      ' data objects')
        elif len(results) == 1:
            return project, None, ([results[0]] if allow_mult else results[0])
Example #6
0
def resolve_job_ref(job_id, name, describe={}):
    try:
        job_desc = dxpy.DXHTTPRequest('/' + job_id + '/describe', {})
    except BaseException as details:
        raise ResolutionError(str(details))
    project = job_desc['project']
    describe['project'] = project
    if job_desc['state'] != 'done':
        raise ResolutionError(
            'the job ' + job_id + ' is ' + job_desc['state'] +
            ', and it must be in the done state for its outputs to be accessed'
        )

    output_field = job_desc['output'].get(name, None)
    results = []
    if output_field is not None:
        if isinstance(output_field, list):
            if len(output_field) > 0:
                if not isinstance(
                        output_field[0],
                        dict) or '$dnanexus_link' not in output_field[0]:
                    raise ResolutionError(
                        'Found \"' + name + '\" as an output field name of ' +
                        job_id + ', but it is an array of non-data objects')
                ids = [link['$dnanexus_link'] for link in output_field]
                try:
                    results = [{
                        "id":
                        out_id,
                        "describe":
                        dxpy.DXHTTPRequest('/' + out_id + '/describe',
                                           describe)
                    } for out_id in ids]
                except BaseException as details:
                    raise ResolutionError(str(details))
            else:
                raise ResolutionError('Found \"' + name +
                                      '\" as an output field name of ' +
                                      job_id + ', but it is an empty array')
        elif isinstance(output_field,
                        dict) and '$dnanexus_link' in output_field:
            obj_id = output_field['$dnanexus_link']
            try:
                results = [{
                    "id":
                    obj_id,
                    "describe":
                    dxpy.DXHTTPRequest('/' + obj_id + '/describe', describe)
                }]
            except BaseException as details:
                raise ResolutionError(str(details))
        else:
            raise ResolutionError('Found \"' + name +
                                  '\" as an output field name of ' + job_id +
                                  ', but it is not of a data object class')
    else:
        raise ResolutionError('Could not find \"' + name +
                              '\" as an output field name of ' + job_id +
                              '; available fields are: ' +
                              ', '.join(job_desc['output'].keys()))

    return results
Example #7
0
def cp(args):
    dest_proj, dest_path, _none = try_call(resolve_path,
                                           args.destination,
                                           expected='folder')
    if dest_path is None:
        raise DXCLIError('Cannot copy to a hash ID')
    dx_dest = dxpy.get_handler(dest_proj)
    try:
        # check if the destination exists
        dx_dest.list_folder(folder=dest_path, only='folders')
    except:
        cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj)
        return

    # The destination exists, we need to copy all of the sources to it.
    if len(args.sources) == 0:
        raise DXCLIError('No sources provided to copy to another project')
    src_objects = []
    src_folders = []
    for source in args.sources:
        src_proj, src_folderpath, src_results = try_call(resolve_existing_path,
                                                         source,
                                                         allow_mult=True,
                                                         all_mult=args.all)
        if src_proj == dest_proj:
            if is_hashid(source):
                # This is the only case in which the source project is
                # purely assumed, so give a better error message.
                raise DXCLIError(
                    fill('Error: You must specify a source project for ' +
                         source))
            else:
                raise DXCLIError(
                    fill(
                        'Error: A source path and the destination path resolved '
                        + 'to the same project or container. Please specify ' +
                        'different source and destination containers, e.g.') +
                    '\n  dx cp source-project:source-id-or-path dest-project:dest-path'
                )

        if src_proj is None:
            raise DXCLIError(
                fill(
                    'Error: A source project must be specified or a current ' +
                    'project set in order to clone objects between projects'))

        if src_results is None:
            src_folders.append(src_folderpath)
        else:
            src_objects += [result['id'] for result in src_results]
    try:
        exists = dxpy.DXHTTPRequest(
            '/' + src_proj + '/clone', {
                "objects": src_objects,
                "folders": src_folders,
                "project": dest_proj,
                "destination": dest_path
            })['exists']
        if len(exists) > 0:
            print(
                fill(
                    'The following objects already existed in the destination container '
                    + 'and were left alone:') + '\n ' + '\n '.join(exists))
    except:
        err_exit()
Example #8
0
def resolve_job_ref(job_id, name, describe={}):
    try:
        job_desc = dxpy.api.job_describe(job_id)
    except Exception as details:
        raise ResolutionError(str(details))
    project = job_desc['project']
    describe['project'] = project
    if job_desc['state'] != 'done':
        raise ResolutionError(
            'the job ' + job_id + ' is ' + job_desc['state'] +
            ', and it must be in the done state for its outputs to be accessed'
        )

    index = None
    if '.' in name:
        try:
            actual_name, str_index = name.rsplit('.', 1)
            index = int(str_index)
            name = actual_name
        except ValueError:
            pass

    output_field = job_desc['output'].get(name, None)
    if index is not None:
        if not isinstance(output_field, list):
            raise ResolutionError(
                'Found "' + name + '" as an output field name of ' + job_id +
                ', but it is not an array and cannot be indexed')
        if index < 0 or index >= len(output_field):
            raise ResolutionError('Found "' + name +
                                  '" as an output field name of ' + job_id +
                                  ', but the specified index ' + str_index +
                                  ' is out of range')
        output_field = output_field[index]
    results = []
    if output_field is not None:
        if isinstance(output_field, list):
            if len(output_field) > 0:
                if not isinstance(
                        output_field[0],
                        dict) or '$dnanexus_link' not in output_field[0]:
                    raise ResolutionError(
                        'Found "' + name + '" as an output field name of ' +
                        job_id + ', but it is an array of non-data objects')
                ids = [link['$dnanexus_link'] for link in output_field]
                try:
                    results = [{
                        "id":
                        out_id,
                        "describe":
                        dxpy.DXHTTPRequest('/' + out_id + '/describe',
                                           describe)
                    } for out_id in ids]
                except Exception as details:
                    raise ResolutionError(str(details))
            else:
                raise ResolutionError('Found "' + name +
                                      '" as an output field name of ' +
                                      job_id + ', but it is an empty array')
        elif isinstance(output_field,
                        dict) and '$dnanexus_link' in output_field:
            obj_id = output_field['$dnanexus_link']
            try:
                results = [{
                    "id":
                    obj_id,
                    "describe":
                    dxpy.DXHTTPRequest('/' + obj_id + '/describe', describe)
                }]
            except Exception as details:
                raise ResolutionError(str(details))
        else:
            raise ResolutionError('Found "' + name +
                                  '" as an output field name of ' + job_id +
                                  ', but it is not of a data object class')
    else:
        raise ResolutionError('Could not find "' + name +
                              '" as an output field name of ' + job_id +
                              '; available fields are: ' +
                              ', '.join(job_desc['output'].keys()))

    return results