def get_project(project_name): '''Try to find the project with the given name or id.''' # First, see if the project is a project-id. try: project = dxpy.DXProject(project_name) return project except dxpy.DXError: pass project = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True, level="VIEW") project = [p for p in project] if len(project) < 1: print('Did not find project {0}.'.format(project_name), file=sys.stderr) sys.exit(1) elif len(project) > 1: print('Found more than 1 project matching {0}.'.format(project_name), file=sys.stderr) sys.exit(1) else: project = project[0] return project
def main(): parser = get_parser() args = parser.parse_args() days_ago = args.days_ago days_ago = "-" + str(days_ago) + "d" org = args.org if not org.startswith("org-"): parser.error("Argument --org must be passed a value prefixed with 'org-'.") dx_projects = dxpy.find_projects(created_after=days_ago,billed_to=org) #dx_projects is a generater of dicts of the form {u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BzzP0j0070XJ8vkJpk0Vgkb7', u'level': u'ADMINISTER'} for i in dx_projects: proj_id = i["id"] proj = dxpy.DXProject(proj_id) # Use a quick filter to check if this project has been cleaned already: try: folder_list = proj.list_folder("/raw_data") except dxpy.exceptions.ResourceNotFound: continue raw_files = folder_list["objects"] if len(raw_files) < 3: #Then this project should already have been cleaned, otherwise there'd be at least three files. #When cleaned, the only files present should be the RunInfo.xml and runParameters.xml. continue APP.run(app_input={},project=proj_id,folder=RAW_DATA_FOLDER) print(proj.name + " (" + proj_id + ")")
def get_project(project_name): """Try to find the project with the given name or id on DNAnexus. It is assumed that the user or job is logged in prior to running this function so dx API queries are returned.""" # First, see if the project is a project-id. try: project = dxpy.DXProject(project_name) return project except dxpy.DXError: pass project = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True, level="VIEW") project = [p for p in project] if len(project) < 1: print('Did not find project {0}.'.format(project_name), file=sys.stderr) sys.exit(1) elif len(project) > 1: print('Found more than 1 project matching {0}.'.format(project_name), file=sys.stderr) sys.exit(1) else: project = project[0] return project
def locate_or_create_dx_project(project_name, billTo=None, skip_share=False): '''Try to find the project with the given name. If one doesn't exist, we'll create it.''' projects = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True, level='CONTRIBUTE') project = [p for p in projects] if len(project) < 1: project_params = {'name': project_name, 'summary': 'VGP Data Project'} if billTo: project_params['billTo'] = billTo project = dxpy.DXProject(dxpy.api.project_new(project_params)['id']) elif len(project) > 1: print 'Found more than 1 project matching ' + project_name + '.' print 'Please provide a unique project!' sys.exit(1) else: project = project[0] if skip_share is False: project.invite(SHARE_WITH, "VIEW") return project
def resolve_container_id_or_name(raw_string, is_error=False, unescape=True, multi=False): ''' :param raw_string: A potential project or container ID or name :type raw_string: string :param is_error: Whether to raise an exception if the project or container ID cannot be resolved :type is_error: boolean :param unescape: Whether to unescaping the string is required (TODO: External link to section on escaping characters.) :type unescape: boolean :returns: Project or container ID if found or else None :rtype: string or None :raises: :exc:`ResolutionError` if *is_error* is True and the project or container could not be resolved Attempt to resolve *raw_string* to a project or container ID. ''' if unescape: string = unescape_name_str(raw_string) if is_container_id(string): return ([string] if multi else string) if string in cached_project_names: return ([cached_project_names[string]] if multi else cached_project_names[string]) try: results = list( dxpy.find_projects(name=string, describe=True, level='VIEW')) except Exception as details: raise ResolutionError(str(details)) if len(results) == 1: cached_project_names[string] = results[0]['id'] return ([results[0]['id']] if multi else results[0]['id']) elif len(results) == 0: if is_error: raise ResolutionError('Could not find a project named "' + string + '"') return ([] if multi else None) elif not multi: if INTERACTIVE_CLI: print('Found multiple projects with name "' + string + '"') choice = pick([ '{id} ({level})'.format(id=result['id'], level=result['level']) for result in results ]) return results[choice]['id'] else: raise ResolutionError( 'Found multiple projects with name "' + string + '"; please use a project ID to specify the desired project') else: # len(results) > 1 and multi return [result['id'] for result in results]
def findNewProjects(self): """ Find projects available on DNAnexus, not present in the local cache :return: string array of project names. """ dx_projects = list(dxpy.find_projects(return_handler=True)) dx_project_names = [project.name for project in dx_projects] new_dx_projects = list(set(dx_project_names) - set(self.getProjects())) new_dx_projects = [x for x in new_dx_projects if not x.startswith('PIPELINE') and not x.endswith('resources')] print("Found {} new projects on DNAnexus, for {}".format(len(new_dx_projects), dxpy.whoami())) return new_dx_projects
def test_find_projects(self): dxproject = dxpy.DXProject() results = list(dxpy.find_projects()) found_proj = False; for result in results: if result["id"] == dxproject.get_id(): self.assertEqual(result["level"], 'ADMINISTER') found_proj = True self.assertFalse('describe' in result) self.assertTrue(found_proj) results = list(dxpy.find_projects(level='VIEW', describe=True)) found_proj = False; for result in results: if result["id"] == self.second_proj_id: self.assertEqual(result["level"], 'ADMINISTER') found_proj = True self.assertTrue('describe' in result) self.assertEqual(result['describe']['name'], 'test project 2') self.assertTrue(found_proj)
def find_projects(self): dxproject = dxpy.DXProject() results = list(dxpy.find_projects()) found_proj = False; for result in results: if result["id"] == dxproject.get_id(): self.assertEqual(result["level"], 'ADMINISTER') found_proj = True self.assertFalse('describe' in result) self.assertTrue(found_proj) results = list(dxpy.find_projects(level='VIEW', describe=True)) found_proj = False; for result in results: if result["id"] == 'project-0000000000000000000000pb': self.assertEqual(result["level"], 'ADMINISTER') found_proj = True self.assertTrue('describe' in result) self.assertEqual(result['describe']['name'], 'public-test-project') self.assertTrue(found_proj)
def get_002_projects(): """ Return list of 002 projects Returns: list: List of project ids """ project_objects = [] projects = dx.find_projects(name="002_*", name_mode="glob") for project in projects: project_objects.append(dx.DXProject(project["id"])) return project_objects
def locate_or_create_dx_project(project_name): '''Try to find the project with the given name. If one doesn't exist, we'll create it.''' projects = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True, level='CONTRIBUTE') project = [p for p in projects] if len(project) < 1: project = dxpy.DXProject(dxpy.api.project_new({'name': project_name, 'summary': 'FALCON Unzip Assembly'})['id']) elif len(project) > 1: print 'Found more than 1 project matching ' + project_name + '.' print 'Please provide a unique project!' sys.exit(1) else: project = project[0] return project
def resolve_container_id_or_name(raw_string, is_error=False, multi=False): ''' :param raw_string: A potential project or container ID or name :type raw_string: string :param is_error: Whether to raise an exception if the project or container ID cannot be resolved :type is_error: boolean :returns: Project or container ID if found or else None :rtype: string or None :raises: :exc:`ResolutionError` if *is_error* is True and the project or container could not be resolved Unescapes and attempts to resolve *raw_string* to a project or container ID. ''' string = unescape_name_str(raw_string) if is_container_id(string): return ([string] if multi else string) if string in cached_project_names: return ([cached_project_names[string]] if multi else cached_project_names[string]) try: results = list(dxpy.find_projects(name=string, describe=True, level='VIEW')) except Exception as details: raise ResolutionError(str(details)) if len(results) == 1: cached_project_names[string] = results[0]['id'] return ([results[0]['id']] if multi else results[0]['id']) elif len(results) == 0: if is_error: raise ResolutionError('Could not find a project named "' + string + '"') return ([] if multi else None) elif not multi: if INTERACTIVE_CLI: print('Found multiple projects with name "' + string + '"') choice = pick(['{id} ({level})'.format(id=result['id'], level=result['level']) for result in results]) return results[choice]['id'] else: raise ResolutionError('Found multiple projects with name "' + string + '"; please use a project ID to specify the desired project') else: # len(results) > 1 and multi return [result['id'] for result in results]
def resolve_container_id_or_name(raw_string, is_error=False, unescape=True, multi=False): ''' :param raw_string: A potential project or container ID or name :type raw_string: string :param is_error: Whether to raise an exception if the project or container ID cannot be resolved :type is_error: boolean :param unescape: Whether to unescaping the string is required (TODO: External link to section on escaping characters.) :type unescape: boolean :returns: Project or container ID if found or else None :rtype: string or None :raises: :exc:`ResolutionError` if *is_error* is True and the project or container could not be resolved Attempt to resolve *raw_string* to a project or container ID. ''' if unescape: string = unescape_name_str(raw_string) if is_container_id(string): return ([string] if multi else string) if string in cached_project_names: return ([cached_project_names[string]] if multi else cached_project_names[string]) try: results = list(dxpy.find_projects(name=string, describe=True, level='VIEW')) except Exception as details: raise ResolutionError(str(details)) if len(results) == 1: cached_project_names[string] = results[0]['id'] return ([results[0]['id']] if multi else results[0]['id']) elif len(results) == 0: if is_error: raise ResolutionError('Could not find a project named "' + string + '"') return ([] if multi else None) elif not multi: if sys.stdout.isatty(): print('Found multiple projects with name "' + string + '"') choice = pick(map(lambda result: result['id'] + ' (' + result['level'] + ')', results)) return results[choice]['id'] else: raise ResolutionError('Found multiple projects with name "' + string + '"; please use a project ID to specify the desired project') else: # len(results) > 1 and multi return map(lambda result: result['id'], results)
def main(): parser = get_parser() args = parser.parse_args() seq_run_name = args.seq_run_name skip_projects = args.skip_projects download_dir = args.download_dir billing_account_id = args.billing_account_id if billing_account_id: if not billing_account_id.startswith( gbsc_dnanexus.utils.DX_USER_PREFIX ) and not billing_account_id.startswith( gbsc_dnanexus.utils.DX_ORG_PREFIX): raise Exception( "Error - The DNAnexus Billing account, set by the --billing-account argument, must start with with {user} or {org}. Instead, got {value}" .format(user=gbsc_dnanexus.utils.DX_USER_PREFIX, org=gbsc_dnanexus.utils.DX_ORG_PREFIX, value=billing_account_id)) else: billing_account_id = None #must be None rather than the empty string in order to work in dxpy.find_projects. dx_projects = dxpy.find_projects(billed_to=billing_account_id, properties={"seq_run_name": seq_run_name}) #dx_projects is a generator of dicts of the form: # {u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BzqVkxj08kVZbPXk54X0P2JY', u'level': u'ADMINISTER'} popens = [] for proj in dx_projects: proj_id = proj["id"] if proj_id in skip_projects: continue cmd = "download_project.py --dx-project-id {proj_id} --download-dir {download_dir}".format( proj_id=proj_id, download_dir=download_dir) print("Running command {cmd}.".format(cmd=cmd)) popens.append( subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)) for p in popens: stdout, stderr = p.communicate() print(stdout) print(stderr) print("\n\n")
def get_project(project_name): '''Try to find the project with the given name or id.''' # First, see if the project is a project-id. try: project = dxpy.DXProject(project_name) return project except dxpy.DXError: pass project = dxpy.find_projects(name=project_name, return_handler=True, level="VIEW") project = [p for p in project] if len(project) == 0: print('Did not find project {0}'.format(project_name)) return None elif len(project) == 1: return project[0] else: raise Exception('Found more than 1 project matching {0}'.format(project_name))
def main(): parser = get_parser() uhts_conn = scgpm_lims.Connection() args = parser.parse_args() billing_account = args.billing_account if not billing_account.startswith("org-"): billing_account = "org-" + billing_account projects = list(dxpy.find_projects(billed_to=billing_account)) #projects = [{u'permissionSources': [u'user-nathankw'], u'public': False, u'id': u'project-BvxVV1Q092QgFKk9Qv2bKj6Z', u'level': u'ADMINISTER'}] for p in projects: dx_proj = dxpy.DXProject(p["id"]) dx_proj_name = dx_proj.name if not dx_proj_name.startswith("16"): continue #not a sequencing results project if dx_proj_name == "160715_HEK-ZNF_Controls": continue print(dx_proj_name) uhts_run_name,lane,rest = lane_reg.split(dx_proj_name) runinfo = uhts_conn.getruninfo(run=uhts_run_name)["run_info"] laneinfo = runinfo["lanes"][lane] merge_props = {} merge_props["seq_run_name"] = uhts_run_name merge_props["seq_lane_index"] = lane merge_props["seq_instrument"] = runinfo["sequencing_instrument"] merge_props["paired_end"] = str(runinfo["paired_end"]) merge_props["sequencer_type"] = runinfo["platform_name"] merge_props["lab"] = laneinfo["lab"] merge_props["queue"] = laneinfo["queue"] merge_props["library_name"] = laneinfo["sample_name"].split()[0] #take first whitespace separated element. dx_properties = dx_proj.describe(input_params={"properties": True})["properties"] #check for empty prop vals and delete them: pop_attrs = [] for dx_prop_name in dx_properties: val = dx_properties[dx_prop_name].strip() if not val: pop_attrs.append(dx_prop_name) for pa in pop_attrs: dx_properties.pop(pa) dx_properties.update(merge_props) dxpy.api.project_set_properties(object_id=dx_proj.id,input_params={"properties": dx_properties})
def get_project(project_name): project = dxpy.find_projects(name=project_name, name_mode='glob', return_handler=True) project = [p for p in project] if len(project) < 1: project = dxpy.DXProject( dxpy.api.project_new({ 'name': project_name, 'summary': 'ChIP-Seq Pipeline' })['id']) elif len(project) > 1: print 'Found more than 1 project matching ' + project_name + '.' print 'Please provide a unique project!' sys.exit(1) else: project = project[0] return project
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes Files project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: region = None if dxpy.WORKSPACE_ID: region = dxpy.describe(dxpy.WORKSPACE_ID).get("region") query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=dict(fields=get_ls_l_desc_fields()), project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" if text != "" or expected == 'project': results = filter(lambda result: result['id'] != dxpy.WORKSPACE_ID or include_current_proj, list(dxpy.find_projects(describe=True, level=perm_level))) matches += filter(startswith(text), [(escape_completion_name_str(result['describe']['name']) + ':') for result in results]) if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname, typespec) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec) else: # project is ambiguous, but attempt to resolve to an object or folder proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) for proj in proj_ids: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname, typespec) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec) return matches
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=True, project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
import traceback import logging from time import sleep logging.basicConfig() logger = logging.getLogger(__name__) # E3_PROJECT_ID = 'project-BKp5K980bpZZ096Xp1XQ02fZ' # FRIP_DEV_PROJECT_ID = 'project-F3BpKqj07z6y979Z4X36P6z9' # FRIP_PROJECT_ID = 'project-F3Bvp4004vxZxbpZBBJGPyYy' # TEST_ANALYSIS_ID = 'analysis-F2v67b80bpZV0p9q788kgBGp' # TEST_ANALYSIS_ID = 'analysis-F3BZ8v8036977yg98x815zB3'\ ACCESSION_OUTPUT_FOLDER = "/accession_log/" APPLETS_PROJECT_ID = next(dxpy.find_projects( name='ENCODE - ChIP Production', return_handler=True)).get_id() APPLETS = {} ASSAY_SPECIFICS = { 'tf': { 'pre_stage': 'SPP Peaks', 'final_stage': 'Final IDR peak calls', 'applet': 'encode_idr', }, 'histone': { 'pre_stage': 'ENCODE Peaks', 'final_stage': 'Final narrowpeaks', 'applet': 'overlap_peaks', } }
def _set_dxproject_id(self, latest_project=False): """ Searches for the project in DNAnexus based on the input arguments when instantiating the class. If multiple projects are found based on the search criteria, an exception will be raised. A few various search strategies are employed, based on the input arguments. In all cases, if the 'billing_account_id' was specifed, all searches will search for projects only belonging to the specified billing account. The search strategies work as follows: If the project ID was provided, the search will attempt to find the project by ID only. If the project ID wasn't provided, but the project name was specified, then the search will attempt to find the project by name and by any project properties that may have been set during instantiation (uhts_run_name, sequencing_lane, and library_name). If neither the project name nor the project ID was specified, then a search by whichever project properties were specified will take place. This method will not set the self.dx_project_id if none of the search methods are successful in finding a single project, and this may indicate that the sequencing hasn't finished yet. Args: latest_project: `bool`. True indicates that if multiple projects are found given the search criteria, the most recently created project will be returned. Returns: `str`. The DNAnexus project ID or the empty string if a project wasn't found. Raises: `scgpm_seqresults_dnanexus.dnanexus_utils.DxMultipleProjectsWithSameLibraryName()`: The search is by self.library_name, and multiple DNAnexus projects have that library name. `DxMissingLibraryNameProperty`: The DNAnexus project property 'library_name' is not present. """ dx_project_props = {} if self.library_name: dx_project_props["library_name"] = self.library_name if self.uhts_run_name: dx_project_props["seq_run_name"] = self.uhts_run_name if self.sequencing_lane: dx_project_props["seq_lane_index"] = str(self.sequencing_lane) dx_proj = "" if self.dx_project_id: prefix = "project-" if not self.dx_project_id.startswith(prefix): self.dx_project_id = prefix + self.dx_project_id dx_proj = dxpy.DXProject(dxid=self.dx_project_id) elif self.dx_project_name: res = dxpy.find_one_project(properties=dx_project_props, billed_to=self.billing_account_id, zero_ok=True, more_ok=False, name=self.dx_project_name) if res: dx_proj = dxpy.DXProject(dxid=res["id"]) else: #try to find by library_name and potential uhts_run_name res = list( dxpy.find_projects(properties=dx_project_props, billed_to=self.billing_account_id)) if len(res) == 1: dx_proj = dxpy.DXProject(dxid=res[0]["id"]) elif len(res) > 1: dx_proj_ids = [x["id"] for x in res] if not latest_project: raise DxMultipleProjectsWithSameLibraryName( "Error - Multiple DNAnexus projects have the same value for the library_name property value of {library_name}. The projects are {dx_proj_ids}." .format(library_name=self.library_name, dx_proj_ids=dx_proj_ids)) dx_proj = gbsc_dnanexus.utils.select_newest_project( dx_project_ids=dx_proj_ids) if not dx_proj: return self.dx_project = dx_proj self.dx_project_id = dx_proj.id self.dx_project_name = dx_proj.name self.dx_project_props = dxpy.api.project_describe( object_id=dx_proj.id, input_params={"fields": { "properties": True }})["properties"] try: self.library_name = self.dx_project_props["library_name"] except KeyError: msg = "DNAnexus project {} is missing the library_name property.".format( self.dx_project_name) raise DxMissingLibraryNameProperty(msg)
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None, visibility=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*) Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1: if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None: return [text + " "] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" # Also, add space if expected == "project" if text != "" or expected == 'project': results = dxpy.find_projects(describe=True, level=perm_level) if not include_current_proj: results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID] matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)] if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: # try-catch block in case dxpy.WORKSPACE_ID is garbage try: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass else: # project is given by a path, but attempt to resolve to an # object or folder anyway try: proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) except ResolutionError as details: sys.stderr.write("\n" + fill(unicode(details))) return matches for proj in proj_ids: # protects against dxpy.WORKSPACE_ID being garbage try: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass return matches
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None, visibility=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*) Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1: if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None: return [text + " "] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" # Also, add space if expected == "project" if text != "" or expected == 'project': results = dxpy.find_projects(describe=True, level=perm_level) if not include_current_proj: results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID] matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)] if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: # try-catch block in case dxpy.WORKSPACE_ID is garbage try: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass else: # project is given by a path, but attempt to resolve to an # object or folder anyway try: proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) except ResolutionError as details: sys.stderr.write("\n" + fill(str(details))) return matches for proj in proj_ids: # protects against dxpy.WORKSPACE_ID being garbage try: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass return matches