def setUpClass(cls): if RUN_JOB_ON_DX: if not project_name: print "'PROJ_NAME' environment variable must be defined!" sys.exit(1) working_project_id = dxpy.find_one_project(more_ok=False, name=project_name)["id"] run_args = {} run_args["project"] = working_project_id run_args["name"] = "vcfscope-measure on chr21" run_args["folder"] = "/purge/" + app_name input_hash = {} input_hash["vcfgz"] = dxpy.dxlink("file-BkkjFkj098Gb2jZ1Yx533JFv", project_id) input_hash["bam"] = dxpy.dxlink("file-Bkkjj5Q098Gkvkb3Xx5Pxj1J", project_id) input_hash["bai"] = dxpy.dxlink("file-Bkkjj5Q098GzYx2bG5YJ3z34", project_id) input_hash["region"] = dxpy.dxlink("file-Bkkj22Q098Gz5yK1Q955G5gX", project_id) app = dxpy.DXApp(name=app_name, alias="9.9.7") cls.job = app.run(input_hash, **run_args) else: job_id = "job-F1JpY9Q0pVj0BgpYBp14f31Q" cls.job = dxpy.DXJob(job_id) cls.job.wait_on_done()
def canonical_project(self): """The dxid of the unique project for the given project name. Only resolves project user has access to. Raises: MultipleObjectsSameNameError: If project name is not unique on DX platform NotFoundError: If project name doesn't exist on DNAnexus """ if utils.is_valid_dxid(self.project, 'project'): return self.project with _wrap_dx_calls(): try: proj_dict = dxpy.find_one_project(name=self.project, level='VIEW', zero_ok=True, more_ok=False) except DXSearchError as e: raise MultipleObjectsSameNameError( 'Found more than one project for given name: ' '{!r}'.format(self.project), e) if proj_dict is None: raise ProjectNotFoundError( 'Found no projects for name: {!r}'.format(self.project)) return proj_dict['id']
def resolve_project(project_name, level=None): try: project = dxpy.find_one_project(name=project_name, name_mode='exact', level=level, return_handler=False) except: print 'Could not find 1 and only 1 project named {0}.'.format(project_name) sys.exit(1) return dxpy.DXProject(project['id'])
def get_project(projectName, level=None): '''Returns the DXProject by name or errors out if not found.''' try: project = dxpy.find_one_project(name=projectName, name_mode='exact', level=level, return_handler=False) except: print "Could not find 1 and only 1 project named '"+projectName+"'." sys.exit(1) return dxpy.DXProject(project['id'])
def resolve_applets_project(): try: project = dxpy.find_one_project(name=ENCODE_CHIP_SEQ_PROJECT_NAME, name_mode='exact', return_handler=False) except: print 'Could not find 1 and only 1 project named {0}.'.format( ENCODE_CHIP_SEQ_PROJECT_NAME) return project['id']
def get_project(projectName, level=None): '''Returns the DXProject by name or errors out if not found.''' try: project = dxpy.find_one_project(name=projectName, name_mode='exact', level=level, return_handler=False) except: print "Could not find 1 and only 1 project named '" + projectName + "'." sys.exit(1) return dxpy.DXProject(project['id'])
def resolve_project(project_name, level=None): try: project = dxpy.find_one_project(name=project_name, name_mode='exact', level=level, return_handler=False) except: print 'Could not find 1 and only 1 project named %s; ' % format( project_name) sys.exit(1) return dxpy.DXProject(project['id'])
def resolve_project(identifier, privs='r'): project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True) if project == None: try: project = dxpy.get_handler(identifier) except: logging.error('Could not find a unique project with name or id %s' %(identifier)) raise ValueError(identifier) logging.debug('Project %s access level is %s' %(project.name, project.describe()['level'])) if privs == 'w' and project.describe()['level'] == 'VIEW': logging.error('Output project %s is read-only' %(identifier)) raise ValueError(identifier) return project
def resolve_project(identifier, privs="r"): project = dxpy.find_one_project(name=identifier, level="VIEW", name_mode="exact", return_handler=True, zero_ok=True) if project == None: try: project = dxpy.get_handler(identifier) except: logging.error("Could not find a unique project with name or id %s" % (identifier)) raise ValueError(identifier) logging.debug("Project %s access level is %s" % (project.name, project.describe()["level"])) if privs == "w" and project.describe()["level"] == "VIEW": logging.error("Output project %s is read-only" % (identifier)) raise ValueError(identifier) return project
def find_reference_file_by_name(reference_name, project_name): '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.''' project = dxpy.find_one_project(name=project_name, name_mode='exact', return_handler=False) cached = '*' if (reference_name, project['id']) not in REFERENCE_FILES: found = dxpy.find_one_data_object(classname="file", name=reference_name, project=project['id'], recurse=True, zero_ok=False, more_ok=False, return_handler=True) REFERENCE_FILES[(reference_name, project['id'])] = found cached = '' print cached + "Resolved %s to %s" % (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id()) return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
def find_reference_file_by_name(reference_name, project_name): '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.''' project = dxpy.find_one_project(name=project_name, name_mode='exact', return_handler=False) cached = '* ' if (reference_name, project['id']) not in REFERENCE_FILES: found = dxpy.find_one_data_object(classname="file", name=reference_name, project=project['id'], recurse=True, zero_ok=False, more_ok=False, return_handler=True) REFERENCE_FILES[(reference_name, project['id'])] = found cached = '' #print >> sys.stderr, cached + "Resolved %s to %s" % \ # (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id()) return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
def __dx_find_one_project(self): """Find a single DNAnexus project from the input runfolder name Returns: A DNAnexus project ID. If the search fails, returns None. """ try: # Search for the project matching self.runfolder. # name_mode='regexp' - look for any occurence of the runfolder name in the project name. # Setting more_ok/zero_ok to False ensures only one project is succesfully returned. project = dxpy.find_one_project(name=self.runfolder, name_mode='regexp', more_ok=False, zero_ok=False) self.logger.debug( f'{self.runfolder} DNAnexus project: {project["id"]}') return project['id'] except dxpy.exceptions.DXSearchError: # Catch exception and raise none self.logger.debug( f'0 or >1 DNAnexus projects found for {self.runfolder}') return None
def __init__(self, project, ref_genome="1kg_v37", url_duration=ONE_YEAR): """ :param project: :param ref_genome: :param url_duration: number of seconds for which the generated URL will be valid """ if isinstance(project, dxpy.DXProject): pass elif project.startswith("project-"): project = dxpy.DXProject(project) else: project = dxpy.DXProject(dxpy.find_one_project(name=project)["id"]) assert isinstance(project, dxpy.DXProject) self.project = project Global = Element('Global') Global.set("name", project.name) Global.set("version", "1") self.Global = Global self.url_duration = url_duration self.genome = ref_genome
def main(): args = get_args() ## resolve projects project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME) print 'Project: ' + project.describe()['name'] print 'Experiment to analyze: ' + args.experiment if not project_has_folder(project, '/'+args.experiment): project.new_folder('/'+args.experiment) #TODO get all replicate ids from encoded DB from ENCSR (args.experiment) #TODO error out if ENCSR not found, status not complete etc. if args.test: source_id = project.get_id() else: source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id() ## resolve replicates/fastq inputs paired = args.paired if not paired: if len(args.replicates) < 1: sys.exit('Need to have at least 1 replicate file (unpaired) use -r or --replicates') replicates = find_replicates(args.replicates, source_id, project, args.experiment, args.test) if not replicates: print "No replicates found in project: " + project.name print "Looking for " + ", ".join(args.replicates) sys.exit(1) dx_reps = { 'reads': [ dxpy.dxlink(r) for r in replicates ] } rnames = '-'.join([ r.split('.')[0] for r in args.replicates]) else: if len(args.pair1) < 1 or len(args.pair2) < 1: sys.exit("Need to have at least 1 replicate in pair1 (--r1/--pair1) and pair2 (--r2/--pair2") pair1reps = find_replicates(args.pair1, source_id, project, args.experiment, args.test) if not pair1reps: print "No replicates for pair1 found in project: " + project.name print "Looking for " + ", ".join(args.pair1) sys.exit(1) pair2reps = find_replicates(args.pair2, source_id, project, args.experiment, args.test) if not pair2reps: print "No replicates for pair2 found in project: " + project.name print "Looking for " + ", ".join(args.pair2) sys.exit(1) dx_reps = { 'pair1_reads': [ dxpy.dxlink(r) for r in pair1reps ], 'pair2_reads': [ dxpy.dxlink(r) for r in pair2reps ] } rnames = '-'.join([ r.split('.')[0] for r in args.pair1+args.pair2]) gender = args.gender organism = args.organism #TODO determine paired or gender from ENCSR metadata # Now create a new workflow () spec_name = args.experiment+'-'+rnames title_root = 'dx_dna_me_' name_root = 'ENCODE Bismark DNA-ME pipeline: ' desc = 'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment' if paired: title_root = title_root + '_paired_end' name_root = name_root + '(paired-end)' else: title_root = title_root + '_single_end' name_root = name_root + '(single-end)' if args.export: project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id'] wf = dxpy.new_dxworkflow(title=title_root, name=name_root, description=desc, folder=PUBLIC_FOLDER, project=project_id) else: project_id = project.get_id() wf = dxpy.new_dxworkflow(title='dx_dna_me_'+spec_name, name='ENCODE Bismark DNA-ME pipeline: '+spec_name, description='The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment' + args.experiment, folder='/'+args.experiment, project=project.get_id()) populate_workflow(wf, dx_reps, args.experiment, paired, gender, organism, project.id, args.export)
def main(): args = get_args() if len(args.replicates) < 1: sys.exit('Need to have at least 1 replicate file.') project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME) print 'Project: ' + project.describe()['name'] print 'Experiment to analyze: ' + args.experiment if not project_has_folder(project, '/' + args.experiment): project.new_folder('/' + args.experiment) #TODO get all replicate ids from encoded DB from ENCSR (args.experiment) #TODO error out if ENCSR not found, status not complete etc. if args.test: source_id = project.get_id() else: source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id() replicates = [] for rep in args.replicates: dx_rep = dxpy.find_data_objects(classname='file', name=rep, name_mode='exact', project=source_id, return_handler=False) replicates.extend(dx_rep) if not args.test: replicates = copy_files(replicates, project.get_id(), "/" + args.experiment) if not replicates: print "No replicates found in project: " + project.name print "Looking for " + ", ".join(args.replicates) sys.exit(1) inputs = {'rnd_seed': 12345} inputs['paired'] = args.paired inputs['gender'] = args.gender inputs['organism'] = args.organism inputs['library_id'] = args.library inputs['nthreads'] = args.nthreads #TODO determine paired or gender from ENCSR metadata # Now create a new workflow () inputs['spec_name'] = args.experiment + '-' + '-'.join( [r.split('.')[0] for r in args.replicates]) title_root = 'dx_long_rna_seq_' name_root = 'ENCODE Long RNA Seq: ' desc = 'The ENCODE RNA Seq pipeline for long RNAs' if args.paired: title_root = title_root + '_paired_end ' name_root = name_root + '(paired-end) ' inputs['stranded'] = True else: title_root = title_root + '_single_end ' name_root = name_root + '(single-end) ' inputs['stranded'] = False if args.export: project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id'] wf = dxpy.new_dxworkflow(title=title_root, name=name_root, description=desc, folder=PUBLIC_FOLDER, project=project_id) else: project_id = project.get_id() wf = dxpy.new_dxworkflow(title=title_root + inputs['spec_name'], name=name_root + inputs['spec_name'], description=desc + ' for experiment:' + args.experiment, folder='/' + args.experiment, project=project.get_id()) populate_workflow(wf, replicates, args.experiment, inputs, project.id, args.export)
def main(): args = get_args() if len(args.replicates) < 1: sys.exit('Need to have at least 1 replicate file.') project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME) print 'Project: ' + project.describe()['name'] print 'Experiment to analyze: ' + args.experiment if not project_has_folder(project, '/'+args.experiment): project.new_folder('/'+args.experiment) #TODO get all replicate ids from encoded DB from ENCSR (args.experiment) #TODO error out if ENCSR not found, status not complete etc. if args.test: source_id = project.get_id() else: source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id() replicates = [] for rep in args.replicates: dx_rep = dxpy.find_data_objects(classname='file', name=rep, name_mode='exact', project=source_id, return_handler=False) replicates.extend(dx_rep) if not args.test: replicates = copy_files(replicates, project.get_id(), "/"+args.experiment) if not replicates: print "No replicates found in project: " + project.name print "Looking for " + ", ".join(args.replicates) sys.exit(1) inputs = { 'rnd_seed': 12345 } inputs['paired'] = args.paired inputs['gender']= args.gender inputs['organism'] = args.organism inputs['library_id'] = args.library inputs['nthreads'] = args.nthreads #TODO determine paired or gender from ENCSR metadata # Now create a new workflow () inputs['spec_name'] = args.experiment+'-'+'-'.join([ r.split('.')[0] for r in args.replicates]) title_root = 'dx_long_rna_seq_' name_root = 'ENCODE Long RNA Seq: ' desc = 'The ENCODE RNA Seq pipeline for long RNAs' if args.paired: title_root = title_root + '_paired_end ' name_root = name_root + '(paired-end) ' inputs['stranded'] = True else: title_root = title_root + '_single_end ' name_root = name_root + '(single-end) ' inputs['stranded'] = False if args.export: project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id'] wf = dxpy.new_dxworkflow(title=title_root, name=name_root, description=desc, folder=PUBLIC_FOLDER, project=project_id) else: project_id = project.get_id() wf = dxpy.new_dxworkflow(title=title_root+inputs['spec_name'], name=name_root+inputs['spec_name'], description=desc+' for experiment:' + args.experiment, folder='/'+args.experiment, project=project.get_id()) populate_workflow(wf, replicates, args.experiment, inputs, project.id, args.export)
def main(): args = get_args() ## resolve projects project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME) print 'Project: ' + project.describe()['name'] print 'Experiment to analyze: ' + args.experiment if not project_has_folder(project, '/' + args.experiment): project.new_folder('/' + args.experiment) #TODO get all replicate ids from encoded DB from ENCSR (args.experiment) #TODO error out if ENCSR not found, status not complete etc. if args.test: source_id = project.get_id() else: source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id() ## resolve replicates/fastq inputs paired = args.paired if not paired: if len(args.replicates) < 1: sys.exit( 'Need to have at least 1 replicate file (unpaired) use -r or --replicates' ) replicates = find_replicates(args.replicates, source_id, project, args.experiment, args.test) if not replicates: print "No replicates found in project: " + project.name print "Looking for " + ", ".join(args.replicates) sys.exit(1) dx_reps = {'reads': [dxpy.dxlink(r) for r in replicates]} rnames = '-'.join([r.split('.')[0] for r in args.replicates]) else: if len(args.pair1) < 1 or len(args.pair2) < 1: sys.exit( "Need to have at least 1 replicate in pair1 (--r1/--pair1) and pair2 (--r2/--pair2" ) pair1reps = find_replicates(args.pair1, source_id, project, args.experiment, args.test) if not pair1reps: print "No replicates for pair1 found in project: " + project.name print "Looking for " + ", ".join(args.pair1) sys.exit(1) pair2reps = find_replicates(args.pair2, source_id, project, args.experiment, args.test) if not pair2reps: print "No replicates for pair2 found in project: " + project.name print "Looking for " + ", ".join(args.pair2) sys.exit(1) dx_reps = { 'pair1_reads': [dxpy.dxlink(r) for r in pair1reps], 'pair2_reads': [dxpy.dxlink(r) for r in pair2reps] } rnames = '-'.join([r.split('.')[0] for r in args.pair1 + args.pair2]) gender = args.gender organism = args.organism #TODO determine paired or gender from ENCSR metadata # Now create a new workflow () spec_name = args.experiment + '-' + rnames title_root = 'dx_dna_me_' name_root = 'ENCODE Bismark DNA-ME pipeline: ' desc = 'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment' if paired: title_root = title_root + '_paired_end' name_root = name_root + '(paired-end)' else: title_root = title_root + '_single_end' name_root = name_root + '(single-end)' if args.export: project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id'] wf = dxpy.new_dxworkflow(title=title_root, name=name_root, description=desc, folder=PUBLIC_FOLDER, project=project_id) else: project_id = project.get_id() wf = dxpy.new_dxworkflow( title='dx_dna_me_' + spec_name, name='ENCODE Bismark DNA-ME pipeline: ' + spec_name, description= 'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment' + args.experiment, folder='/' + args.experiment, project=project.get_id()) populate_workflow(wf, dx_reps, args.experiment, paired, gender, organism, project.id, args.export)
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None, visibility=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*) Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1: if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None: return [text + " "] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" # Also, add space if expected == "project" if text != "" or expected == 'project': results = dxpy.find_projects(describe=True, level=perm_level) if not include_current_proj: results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID] matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)] if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: # try-catch block in case dxpy.WORKSPACE_ID is garbage try: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass else: # project is given by a path, but attempt to resolve to an # object or folder anyway try: proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) except ResolutionError as details: sys.stderr.write("\n" + fill(unicode(details))) return matches for proj in proj_ids: # protects against dxpy.WORKSPACE_ID being garbage try: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass return matches
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=True, project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None, visibility=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*) Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1: if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None: return [text + " "] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" # Also, add space if expected == "project" if text != "" or expected == 'project': results = dxpy.find_projects(describe=True, level=perm_level) if not include_current_proj: results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID] matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)] if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: # try-catch block in case dxpy.WORKSPACE_ID is garbage try: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass else: # project is given by a path, but attempt to resolve to an # object or folder anyway try: proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) except ResolutionError as details: sys.stderr.write("\n" + fill(str(details))) return matches for proj in proj_ids: # protects against dxpy.WORKSPACE_ID being garbage try: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass return matches
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes Files project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: region = None if dxpy.WORKSPACE_ID: region = dxpy.describe(dxpy.WORKSPACE_ID).get("region") query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=dict(fields=get_ls_l_desc_fields()), project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
def _set_dxproject_id(self, latest_project=False): """ Searches for the project in DNAnexus based on the input arguments when instantiating the class. If multiple projects are found based on the search criteria, an exception will be raised. A few various search strategies are employed, based on the input arguments. In all cases, if the 'billing_account_id' was specifed, all searches will search for projects only belonging to the specified billing account. The search strategies work as follows: If the project ID was provided, the search will attempt to find the project by ID only. If the project ID wasn't provided, but the project name was specified, then the search will attempt to find the project by name and by any project properties that may have been set during instantiation (uhts_run_name, sequencing_lane, and library_name). If neither the project name nor the project ID was specified, then a search by whichever project properties were specified will take place. This method will not set the self.dx_project_id if none of the search methods are successful in finding a single project, and this may indicate that the sequencing hasn't finished yet. Args: latest_project: `bool`. True indicates that if multiple projects are found given the search criteria, the most recently created project will be returned. Returns: `str`. The DNAnexus project ID or the empty string if a project wasn't found. Raises: `scgpm_seqresults_dnanexus.dnanexus_utils.DxMultipleProjectsWithSameLibraryName()`: The search is by self.library_name, and multiple DNAnexus projects have that library name. `DxMissingLibraryNameProperty`: The DNAnexus project property 'library_name' is not present. """ dx_project_props = {} if self.library_name: dx_project_props["library_name"] = self.library_name if self.uhts_run_name: dx_project_props["seq_run_name"] = self.uhts_run_name if self.sequencing_lane: dx_project_props["seq_lane_index"] = str(self.sequencing_lane) dx_proj = "" if self.dx_project_id: prefix = "project-" if not self.dx_project_id.startswith(prefix): self.dx_project_id = prefix + self.dx_project_id dx_proj = dxpy.DXProject(dxid=self.dx_project_id) elif self.dx_project_name: res = dxpy.find_one_project(properties=dx_project_props, billed_to=self.billing_account_id, zero_ok=True, more_ok=False, name=self.dx_project_name) if res: dx_proj = dxpy.DXProject(dxid=res["id"]) else: #try to find by library_name and potential uhts_run_name res = list( dxpy.find_projects(properties=dx_project_props, billed_to=self.billing_account_id)) if len(res) == 1: dx_proj = dxpy.DXProject(dxid=res[0]["id"]) elif len(res) > 1: dx_proj_ids = [x["id"] for x in res] if not latest_project: raise DxMultipleProjectsWithSameLibraryName( "Error - Multiple DNAnexus projects have the same value for the library_name property value of {library_name}. The projects are {dx_proj_ids}." .format(library_name=self.library_name, dx_proj_ids=dx_proj_ids)) dx_proj = gbsc_dnanexus.utils.select_newest_project( dx_project_ids=dx_proj_ids) if not dx_proj: return self.dx_project = dx_proj self.dx_project_id = dx_proj.id self.dx_project_name = dx_proj.name self.dx_project_props = dxpy.api.project_describe( object_id=dx_proj.id, input_params={"fields": { "properties": True }})["properties"] try: self.library_name = self.dx_project_props["library_name"] except KeyError: msg = "DNAnexus project {} is missing the library_name property.".format( self.dx_project_name) raise DxMissingLibraryNameProperty(msg)
def main(): argparser = argparse.ArgumentParser(description="Create a release for dxWDL") argparser.add_argument("--folder", help="Release folder that already exists") argparser.add_argument("--project", help="Project where to place release", default="dxWDL") args = argparser.parse_args() # resolve project print("resolving project {}".format(args.project)) project = dxpy.find_one_project(name = args.project, more_ok=False, return_handler=True) # Create release folder, if needed if args.folder is None: folder = time.strftime("/releases/%Y-%m-%d/%H%M%S") project.new_folder(folder, parents=True) make_prerequisits(project, folder) print("Uploading jar files") upload_libs(project, folder) else: folder = args.folder # Figure out what the current version is version_id = release_version() print('version_id="{}"'.format(version_id)) print("resolving dxWDL runtime asset") asset = dxpy.search.find_one_data_object(classname="record", project=project.get_id(), name="dxWDLrt", folder=folder, return_handler=True, more_ok=False) print("assetId={}".format(asset.get_id())) print("resolving jar files -- validation ") objs = [] for lib in required_libs: objs.append({ "name" : lib, "folder" : folder }) descs = list(dxpy.search.resolve_data_objects(objs, project=project.get_id())) lib_object_ids=[] for d in descs: print(d[0]) lib_object_ids.append(d[0]["id"]) print(lib_object_ids) # embed configuration information into dxWDL script print("Embedding configuration into dxWDL script") script = None with open(os.path.join(top_dir,'dxWDL'), 'r') as fd: script = fd.read() script = script.replace('version_id = None\n', 'version_id = "{}"\n'.format(version_id)) script = script.replace('asset_id = None\n', 'asset_id = "{}"\n'.format(asset.get_id())) script = script.replace('project_id = None\n', 'project_id = "{}"\n'.format(project.get_id())) script = script.replace('lib_object_ids = None\n', 'lib_object_ids = {}\n'.format(lib_object_ids)) rm_silent('/tmp/dxWDL') rm_silent('/tmp/dxWDL_latest') with open('/tmp/dxWDL', 'w') as fd: fd.write(script) upload_script(project, folder)