Esempio n. 1
0
def get_project(project_name):
    '''Try to find the project with the given name or id.'''

    # First, see if the project is a project-id.
    try:
        project = dxpy.DXProject(project_name)
        return project
    except dxpy.DXError:
        pass

    project = dxpy.find_projects(name=project_name,
                                 name_mode='glob',
                                 return_handler=True,
                                 level="VIEW")
    project = [p for p in project]
    if len(project) < 1:
        print('Did not find project {0}.'.format(project_name),
              file=sys.stderr)
        sys.exit(1)
    elif len(project) > 1:
        print('Found more than 1 project matching {0}.'.format(project_name),
              file=sys.stderr)
        sys.exit(1)
    else:
        project = project[0]

    return project
Esempio n. 2
0
def main():
  parser = get_parser()
  args = parser.parse_args()
  days_ago = args.days_ago
  days_ago = "-" + str(days_ago) + "d"
  org = args.org
  
  if not org.startswith("org-"):
    parser.error("Argument --org must be passed a value prefixed with 'org-'.")
  
  dx_projects = dxpy.find_projects(created_after=days_ago,billed_to=org)
  #dx_projects is a generater of dicts of the form {u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BzzP0j0070XJ8vkJpk0Vgkb7', u'level': u'ADMINISTER'}
  for i in dx_projects:
    proj_id = i["id"]
    proj = dxpy.DXProject(proj_id)
    # Use a quick filter to check if this project has been cleaned already:
    try:
      folder_list = proj.list_folder("/raw_data")
    except dxpy.exceptions.ResourceNotFound:
      continue 
    raw_files = folder_list["objects"]
    if len(raw_files) < 3:
      #Then this project should already have been cleaned, otherwise there'd be at least three files.
      #When cleaned, the only files present should be the RunInfo.xml and runParameters.xml. 
      continue
    APP.run(app_input={},project=proj_id,folder=RAW_DATA_FOLDER)
    print(proj.name + " (" + proj_id + ")")
Esempio n. 3
0
def get_project(project_name):
    """Try to find the project with the given name or id on DNAnexus.
    It is assumed that the user or job is logged in prior to running this 
    function so dx API queries are returned."""

    # First, see if the project is a project-id.
    try:
        project = dxpy.DXProject(project_name)
        return project
    except dxpy.DXError:
        pass

    project = dxpy.find_projects(name=project_name,
                                 name_mode='glob',
                                 return_handler=True,
                                 level="VIEW")
    project = [p for p in project]
    if len(project) < 1:
        print('Did not find project {0}.'.format(project_name),
              file=sys.stderr)
        sys.exit(1)
    elif len(project) > 1:
        print('Found more than 1 project matching {0}.'.format(project_name),
              file=sys.stderr)
        sys.exit(1)
    else:
        project = project[0]

    return project
Esempio n. 4
0
def locate_or_create_dx_project(project_name, billTo=None, skip_share=False):
    '''Try to find the project with the given name.  If one doesn't exist,
    we'll create it.'''
    projects = dxpy.find_projects(name=project_name,
                                  name_mode='glob',
                                  return_handler=True,
                                  level='CONTRIBUTE')

    project = [p for p in projects]
    if len(project) < 1:
        project_params = {'name': project_name, 'summary': 'VGP Data Project'}
        if billTo:
            project_params['billTo'] = billTo
        project = dxpy.DXProject(dxpy.api.project_new(project_params)['id'])
    elif len(project) > 1:
        print 'Found more than 1 project matching ' + project_name + '.'
        print 'Please provide a unique project!'
        sys.exit(1)
    else:
        project = project[0]

    if skip_share is False:
        project.invite(SHARE_WITH, "VIEW")

    return project
Esempio n. 5
0
def resolve_container_id_or_name(raw_string,
                                 is_error=False,
                                 unescape=True,
                                 multi=False):
    '''
    :param raw_string: A potential project or container ID or name
    :type raw_string: string
    :param is_error: Whether to raise an exception if the project or container ID cannot be resolved
    :type is_error: boolean
    :param unescape: Whether to unescaping the string is required (TODO: External link to section on escaping characters.)
    :type unescape: boolean
    :returns: Project or container ID if found or else None
    :rtype: string or None
    :raises: :exc:`ResolutionError` if *is_error* is True and the project or container could not be resolved

    Attempt to resolve *raw_string* to a project or container ID.

    '''
    if unescape:
        string = unescape_name_str(raw_string)
    if is_container_id(string):
        return ([string] if multi else string)

    if string in cached_project_names:
        return ([cached_project_names[string]]
                if multi else cached_project_names[string])

    try:
        results = list(
            dxpy.find_projects(name=string, describe=True, level='VIEW'))
    except Exception as details:
        raise ResolutionError(str(details))

    if len(results) == 1:
        cached_project_names[string] = results[0]['id']
        return ([results[0]['id']] if multi else results[0]['id'])
    elif len(results) == 0:
        if is_error:
            raise ResolutionError('Could not find a project named "' + string +
                                  '"')
        return ([] if multi else None)
    elif not multi:
        if INTERACTIVE_CLI:
            print('Found multiple projects with name "' + string + '"')
            choice = pick([
                '{id} ({level})'.format(id=result['id'], level=result['level'])
                for result in results
            ])
            return results[choice]['id']
        else:
            raise ResolutionError(
                'Found multiple projects with name "' + string +
                '"; please use a project ID to specify the desired project')
    else:
        # len(results) > 1 and multi
        return [result['id'] for result in results]
Esempio n. 6
0
 def findNewProjects(self):
     """
     Find projects available on DNAnexus, not present in the local cache
     :return: string array of project names.
     """
     dx_projects = list(dxpy.find_projects(return_handler=True))
     dx_project_names = [project.name for project in dx_projects]
     new_dx_projects = list(set(dx_project_names) - set(self.getProjects()))
     new_dx_projects = [x for x in new_dx_projects if not x.startswith('PIPELINE') and not x.endswith('resources')]
     print("Found {} new projects on DNAnexus, for {}".format(len(new_dx_projects), dxpy.whoami()))
     return new_dx_projects
Esempio n. 7
0
    def test_find_projects(self):
        dxproject = dxpy.DXProject()
        results = list(dxpy.find_projects())
        found_proj = False;
        for result in results:
            if result["id"] == dxproject.get_id():
                self.assertEqual(result["level"], 'ADMINISTER')
                found_proj = True
            self.assertFalse('describe' in result)
        self.assertTrue(found_proj)

        results = list(dxpy.find_projects(level='VIEW', describe=True))
        found_proj = False;
        for result in results:
            if result["id"] == self.second_proj_id:
                self.assertEqual(result["level"], 'ADMINISTER')
                found_proj = True
                self.assertTrue('describe' in result)
                self.assertEqual(result['describe']['name'], 'test project 2')
        self.assertTrue(found_proj)
Esempio n. 8
0
    def find_projects(self):
        dxproject = dxpy.DXProject()
        results = list(dxpy.find_projects())
        found_proj = False;
        for result in results:
            if result["id"] == dxproject.get_id():
                self.assertEqual(result["level"], 'ADMINISTER')
                found_proj = True
            self.assertFalse('describe' in result)
        self.assertTrue(found_proj)

        results = list(dxpy.find_projects(level='VIEW', describe=True))
        found_proj = False;
        for result in results:
            if result["id"] == 'project-0000000000000000000000pb':
                self.assertEqual(result["level"], 'ADMINISTER')
                found_proj = True
            self.assertTrue('describe' in result)
            self.assertEqual(result['describe']['name'], 'public-test-project')
        self.assertTrue(found_proj)
def get_002_projects():
    """
    Return list of 002 projects

    Returns:
        list: List of project ids
    """

    project_objects = []

    projects = dx.find_projects(name="002_*", name_mode="glob")

    for project in projects:
        project_objects.append(dx.DXProject(project["id"]))

    return project_objects
Esempio n. 10
0
def locate_or_create_dx_project(project_name):
    '''Try to find the project with the given name.  If one doesn't exist,
    we'll create it.'''
    projects = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True, level='CONTRIBUTE')

    project = [p for p in projects]
    if len(project) < 1:
        project = dxpy.DXProject(dxpy.api.project_new({'name': project_name, 'summary': 'FALCON Unzip Assembly'})['id'])
    elif len(project) > 1:
        print 'Found more than 1 project matching ' + project_name + '.'
        print 'Please provide a unique project!'
        sys.exit(1)
    else:
        project = project[0]

    return project
Esempio n. 11
0
def resolve_container_id_or_name(raw_string, is_error=False, multi=False):
    '''
    :param raw_string: A potential project or container ID or name
    :type raw_string: string
    :param is_error: Whether to raise an exception if the project or
            container ID cannot be resolved
    :type is_error: boolean
    :returns: Project or container ID if found or else None
    :rtype: string or None
    :raises: :exc:`ResolutionError` if *is_error* is True and the
            project or container could not be resolved

    Unescapes and attempts to resolve *raw_string* to a project or
    container ID.

    '''
    string = unescape_name_str(raw_string)
    if is_container_id(string):
        return ([string] if multi else string)

    if string in cached_project_names:
        return ([cached_project_names[string]] if multi else cached_project_names[string])

    try:
        results = list(dxpy.find_projects(name=string, describe=True, level='VIEW'))
    except Exception as details:
        raise ResolutionError(str(details))

    if len(results) == 1:
        cached_project_names[string] = results[0]['id']
        return ([results[0]['id']] if multi else results[0]['id'])
    elif len(results) == 0:
        if is_error:
            raise ResolutionError('Could not find a project named "' + string + '"')
        return ([] if multi else None)
    elif not multi:
        if INTERACTIVE_CLI:
            print('Found multiple projects with name "' + string + '"')
            choice = pick(['{id} ({level})'.format(id=result['id'], level=result['level'])
                           for result in results])
            return results[choice]['id']
        else:
            raise ResolutionError('Found multiple projects with name "' + string + '"; please use a project ID to specify the desired project')
    else:
        # len(results) > 1 and multi
        return [result['id'] for result in results]
Esempio n. 12
0
def resolve_container_id_or_name(raw_string, is_error=False, unescape=True, multi=False):
    '''
    :param raw_string: A potential project or container ID or name
    :type raw_string: string
    :param is_error: Whether to raise an exception if the project or container ID cannot be resolved
    :type is_error: boolean
    :param unescape: Whether to unescaping the string is required (TODO: External link to section on escaping characters.)
    :type unescape: boolean
    :returns: Project or container ID if found or else None
    :rtype: string or None
    :raises: :exc:`ResolutionError` if *is_error* is True and the project or container could not be resolved

    Attempt to resolve *raw_string* to a project or container ID.

    '''
    if unescape:
        string = unescape_name_str(raw_string)
    if is_container_id(string):
        return ([string] if multi else string)

    if string in cached_project_names:
        return ([cached_project_names[string]] if multi else cached_project_names[string])

    try:
        results = list(dxpy.find_projects(name=string, describe=True, level='VIEW'))
    except Exception as details:
        raise ResolutionError(str(details))

    if len(results) == 1:
        cached_project_names[string] = results[0]['id']
        return ([results[0]['id']] if multi else results[0]['id'])
    elif len(results) == 0:
        if is_error:
            raise ResolutionError('Could not find a project named "' + string + '"')
        return ([] if multi else None)
    elif not multi:
        if sys.stdout.isatty():
            print('Found multiple projects with name "' + string + '"')
            choice = pick(map(lambda result: result['id'] + ' (' + result['level'] + ')', results))
            return results[choice]['id']
        else:
            raise ResolutionError('Found multiple projects with name "' + string + '"; please use a project ID to specify the desired project')
    else:
        # len(results) > 1 and multi
        return map(lambda result: result['id'], results)
Esempio n. 13
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    seq_run_name = args.seq_run_name
    skip_projects = args.skip_projects
    download_dir = args.download_dir
    billing_account_id = args.billing_account_id
    if billing_account_id:
        if not billing_account_id.startswith(
                gbsc_dnanexus.utils.DX_USER_PREFIX
        ) and not billing_account_id.startswith(
                gbsc_dnanexus.utils.DX_ORG_PREFIX):
            raise Exception(
                "Error - The DNAnexus Billing account, set by the --billing-account argument, must start with with {user} or {org}. Instead, got {value}"
                .format(user=gbsc_dnanexus.utils.DX_USER_PREFIX,
                        org=gbsc_dnanexus.utils.DX_ORG_PREFIX,
                        value=billing_account_id))
    else:
        billing_account_id = None  #must be None rather than the empty string in order to work in dxpy.find_projects.

    dx_projects = dxpy.find_projects(billed_to=billing_account_id,
                                     properties={"seq_run_name": seq_run_name})
    #dx_projects is a generator of dicts of the form:
    #  {u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BzqVkxj08kVZbPXk54X0P2JY', u'level': u'ADMINISTER'}

    popens = []
    for proj in dx_projects:
        proj_id = proj["id"]
        if proj_id in skip_projects:
            continue
        cmd = "download_project.py --dx-project-id {proj_id} --download-dir {download_dir}".format(
            proj_id=proj_id, download_dir=download_dir)
        print("Running command {cmd}.".format(cmd=cmd))
        popens.append(
            subprocess.Popen(cmd,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE))
    for p in popens:
        stdout, stderr = p.communicate()
        print(stdout)
        print(stderr)
        print("\n\n")
Esempio n. 14
0
def get_project(project_name):
    '''Try to find the project with the given name or id.'''

    # First, see if the project is a project-id.
    try:
        project = dxpy.DXProject(project_name)
        return project
    except dxpy.DXError:
        pass

    project = dxpy.find_projects(name=project_name, return_handler=True, level="VIEW")
    project = [p for p in project]
    if len(project) == 0:
        print('Did not find project {0}'.format(project_name))
        return None
    elif len(project) == 1:
        return project[0]
    else:
        raise Exception('Found more than 1 project matching {0}'.format(project_name))
def main():
  parser = get_parser()
  uhts_conn = scgpm_lims.Connection()
  args = parser.parse_args()
  billing_account = args.billing_account
  if not billing_account.startswith("org-"):
    billing_account = "org-" + billing_account
  
  projects = list(dxpy.find_projects(billed_to=billing_account))
  #projects = [{u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BvxVV1Q092QgFKk9Qv2bKj6Z', u'level': u'ADMINISTER'}]
  for p in projects:
    dx_proj = dxpy.DXProject(p["id"])
    dx_proj_name = dx_proj.name
    if not dx_proj_name.startswith("16"):
      continue #not a sequencing results project
    if dx_proj_name == "160715_HEK-ZNF_Controls":
      continue
    print(dx_proj_name)
    uhts_run_name,lane,rest = lane_reg.split(dx_proj_name)
    runinfo = uhts_conn.getruninfo(run=uhts_run_name)["run_info"]
    laneinfo = runinfo["lanes"][lane]
    merge_props = {}
    merge_props["seq_run_name"] = uhts_run_name
    merge_props["seq_lane_index"] = lane
    merge_props["seq_instrument"] = runinfo["sequencing_instrument"]
    merge_props["paired_end"] = str(runinfo["paired_end"])
    merge_props["sequencer_type"] = runinfo["platform_name"]
    merge_props["lab"] = laneinfo["lab"]
    merge_props["queue"] = laneinfo["queue"]
    merge_props["library_name"] = laneinfo["sample_name"].split()[0] #take first whitespace separated element.
  
    dx_properties = dx_proj.describe(input_params={"properties": True})["properties"]
    #check for empty prop vals and delete them:
    pop_attrs = []
    for dx_prop_name in dx_properties:
      val = dx_properties[dx_prop_name].strip()
      if not val:
        pop_attrs.append(dx_prop_name)
    for pa in pop_attrs:
      dx_properties.pop(pa)
  
    dx_properties.update(merge_props)
    dxpy.api.project_set_properties(object_id=dx_proj.id,input_params={"properties": dx_properties})
def get_project(project_name):
    project = dxpy.find_projects(name=project_name,
                                 name_mode='glob',
                                 return_handler=True)

    project = [p for p in project]
    if len(project) < 1:
        project = dxpy.DXProject(
            dxpy.api.project_new({
                'name': project_name,
                'summary': 'ChIP-Seq Pipeline'
            })['id'])
    elif len(project) > 1:
        print 'Found more than 1 project matching ' + project_name + '.'
        print 'Please provide a unique project!'
        sys.exit(1)
    else:
        project = project[0]

    return project
Esempio n. 17
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes Files project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                region = None
                if dxpy.WORKSPACE_ID:
                    region = dxpy.describe(dxpy.WORKSPACE_ID).get("region")
                query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=dict(fields=get_ls_l_desc_fields()),
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
Esempio n. 18
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        if text != "" or expected == 'project':
            results = filter(lambda result: result['id'] != dxpy.WORKSPACE_ID or include_current_proj,
                             list(dxpy.find_projects(describe=True, level=perm_level)))
            matches += filter(startswith(text),
                              [(escape_completion_name_str(result['describe']['name']) + ':') for result in results])

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
            folderpath, entity_name = clean_folder_path(text)
            matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
            if expected != 'folder':
                if classes is not None:
                    for classname in classes:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, classname,
                                                    typespec)
                else:
                    matches += get_data_matches(text, slash_pos, dxproj,
                                                folderpath, typespec=typespec)
    else:
        # project is ambiguous, but attempt to resolve to an object or folder
        proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        for proj in proj_ids:
            dxproj = dxpy.get_handler(proj)
            matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
            if expected != 'folder':
                if classes is not None:
                    for classname in classes:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, classname,
                                                    typespec)
                else:
                    matches += get_data_matches(text, delim_pos, dxproj,
                                                folderpath, typespec=typespec)
    return matches
Esempio n. 19
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=True,
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
import traceback
import logging
from time import sleep

logging.basicConfig()
logger = logging.getLogger(__name__)

# E3_PROJECT_ID = 'project-BKp5K980bpZZ096Xp1XQ02fZ'
# FRIP_DEV_PROJECT_ID = 'project-F3BpKqj07z6y979Z4X36P6z9'
# FRIP_PROJECT_ID = 'project-F3Bvp4004vxZxbpZBBJGPyYy'
# TEST_ANALYSIS_ID = 'analysis-F2v67b80bpZV0p9q788kgBGp'
# TEST_ANALYSIS_ID = 'analysis-F3BZ8v8036977yg98x815zB3'\
ACCESSION_OUTPUT_FOLDER = "/accession_log/"

APPLETS_PROJECT_ID = next(dxpy.find_projects(
    name='ENCODE - ChIP Production',
    return_handler=True)).get_id()
APPLETS = {}

ASSAY_SPECIFICS = {
    'tf': {
        'pre_stage': 'SPP Peaks',
        'final_stage': 'Final IDR peak calls',
        'applet': 'encode_idr',
    },
    'histone': {
        'pre_stage': 'ENCODE Peaks',
        'final_stage': 'Final narrowpeaks',
        'applet': 'overlap_peaks',
    }
}
    def _set_dxproject_id(self, latest_project=False):
        """
        Searches for the project in DNAnexus based on the input arguments when instantiating the class. 
        If multiple projects are found based on the search criteria, an exception will be raised. A 
        few various search strategies are employed, based on the input arguments. In all cases, if the 
        'billing_account_id' was specifed, all searches will search for projects only belonging to the 
        specified billing account. The search strategies work as follows: If the project ID was
        provided, the search will attempt to find the project by ID only. If the project ID wasn't 
        provided, but the project name was specified, then the search will attempt to find the project
        by name and by any project properties that may have been set during instantiation 
        (uhts_run_name, sequencing_lane, and library_name). If neither the project name nor the 
        project ID was specified, then a search by whichever project properties were specified will take place.
    
        This method will not set the self.dx_project_id if none of the search methods are successful 
        in finding a single project, and this may indicate that the sequencing hasn't finished yet.
    
        Args:
            latest_project: `bool`. True indicates that if multiple projects are found given the search 
            criteria, the most recently created project will be returned.
    
        Returns: 
            `str`. The DNAnexus project ID or the empty string if a project wasn't found.
     
        Raises: 
            `scgpm_seqresults_dnanexus.dnanexus_utils.DxMultipleProjectsWithSameLibraryName()`: The 
                search is by self.library_name, and multiple DNAnexus projects have that library name.

            `DxMissingLibraryNameProperty`: The DNAnexus project property 'library_name' is not present. 
        """
        dx_project_props = {}
        if self.library_name:
            dx_project_props["library_name"] = self.library_name
        if self.uhts_run_name:
            dx_project_props["seq_run_name"] = self.uhts_run_name
        if self.sequencing_lane:
            dx_project_props["seq_lane_index"] = str(self.sequencing_lane)

        dx_proj = ""
        if self.dx_project_id:
            prefix = "project-"
            if not self.dx_project_id.startswith(prefix):
                self.dx_project_id = prefix + self.dx_project_id
            dx_proj = dxpy.DXProject(dxid=self.dx_project_id)
        elif self.dx_project_name:
            res = dxpy.find_one_project(properties=dx_project_props,
                                        billed_to=self.billing_account_id,
                                        zero_ok=True,
                                        more_ok=False,
                                        name=self.dx_project_name)
            if res:
                dx_proj = dxpy.DXProject(dxid=res["id"])
        else:
            #try to find by library_name and potential uhts_run_name
            res = list(
                dxpy.find_projects(properties=dx_project_props,
                                   billed_to=self.billing_account_id))
            if len(res) == 1:
                dx_proj = dxpy.DXProject(dxid=res[0]["id"])
            elif len(res) > 1:
                dx_proj_ids = [x["id"] for x in res]
                if not latest_project:
                    raise DxMultipleProjectsWithSameLibraryName(
                        "Error - Multiple DNAnexus projects have the same value for the library_name property value of {library_name}. The projects are {dx_proj_ids}."
                        .format(library_name=self.library_name,
                                dx_proj_ids=dx_proj_ids))
                dx_proj = gbsc_dnanexus.utils.select_newest_project(
                    dx_project_ids=dx_proj_ids)

        if not dx_proj:
            return

        self.dx_project = dx_proj
        self.dx_project_id = dx_proj.id
        self.dx_project_name = dx_proj.name
        self.dx_project_props = dxpy.api.project_describe(
            object_id=dx_proj.id,
            input_params={"fields": {
                "properties": True
            }})["properties"]
        try:
            self.library_name = self.dx_project_props["library_name"]
        except KeyError:
            msg = "DNAnexus project {} is missing the library_name property.".format(
                self.dx_project_name)
            raise DxMissingLibraryNameProperty(msg)
Esempio n. 22
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None, visibility=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean
    :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*)

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1:
        if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None:
            return [text + " "]

    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        # Also, add space if expected == "project"
        if text != "" or expected == 'project':
            results = dxpy.find_projects(describe=True, level=perm_level)
            if not include_current_proj:
                results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID]
            matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)]

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            # try-catch block in case dxpy.WORKSPACE_ID is garbage
            try:
                dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
                folderpath, entity_name = clean_folder_path(text)
                matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, slash_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec,
                                                        visibility=visibility)
                    else:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    else:
        # project is given by a path, but attempt to resolve to an
        # object or folder anyway
        try:
            proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        except ResolutionError as details:
            sys.stderr.write("\n" + fill(unicode(details)))
            return matches
        for proj in proj_ids:
            # protects against dxpy.WORKSPACE_ID being garbage
            try:
                dxproj = dxpy.get_handler(proj)
                matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, delim_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec, visibility=visibility)
                    else:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    return matches
Esempio n. 23
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None, visibility=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean
    :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*)

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1:
        if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None:
            return [text + " "]

    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        # Also, add space if expected == "project"
        if text != "" or expected == 'project':
            results = dxpy.find_projects(describe=True, level=perm_level)
            if not include_current_proj:
                results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID]
            matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)]

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            # try-catch block in case dxpy.WORKSPACE_ID is garbage
            try:
                dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
                folderpath, entity_name = clean_folder_path(text)
                matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, slash_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec,
                                                        visibility=visibility)
                    else:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    else:
        # project is given by a path, but attempt to resolve to an
        # object or folder anyway
        try:
            proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        except ResolutionError as details:
            sys.stderr.write("\n" + fill(str(details)))
            return matches
        for proj in proj_ids:
            # protects against dxpy.WORKSPACE_ID being garbage
            try:
                dxproj = dxpy.get_handler(proj)
                matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, delim_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec, visibility=visibility)
                    else:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    return matches