def run_analysis(analysis): """Launch analysis (outermost task, calls subtasks that monitor and run preprocessing, execution, postprocessing) """ logger.debug("analysis_manager.tasks run_analysis called") # updating status of analysis to running analysis = Analysis.objects.filter(uuid=analysis.uuid)[0] analysis_status = AnalysisStatus.objects.get(analysis=analysis) analysis.set_status(Analysis.RUNNING_STATUS) # DOWNLOADING # GETTING LIST OF DOWNLOADED REMOTE FILES datainputs = analysis.workflow_data_input_maps.all() download_tasks = [] for files in datainputs: cur_node_uuid = files.data_uuid cur_fs_uuid = Node.objects.get(uuid=cur_node_uuid).file_uuid # Adding downloading task if file is not local if not is_local(cur_fs_uuid): # getting the current file_uuid from the given node_uuid task_id = import_file.subtask((cur_fs_uuid, False, )) download_tasks.append(task_id) # PREPROCESSING task_id = run_analysis_preprocessing.subtask((analysis, )) download_tasks.append(task_id) result_chord, result_set = progress_chord(download_tasks)( chord_execution.subtask(analysis=analysis, )) # saving preprocessing taskset analysis_status.preprocessing_taskset_id = result_set.task_id analysis_status.save()
def run_analysis(analysis): '''Launch analysis (outermost task, calls subtasks that monitor and run preprocessing, execution, postprocessing) ''' logger.debug("analysis_manager.tasks run_analysis called") # updating status of analysis to running analysis = Analysis.objects.filter(uuid=analysis.uuid)[0] analysis_status = AnalysisStatus.objects.get(analysis=analysis) analysis.set_status(Analysis.RUNNING_STATUS) # DOWNLOADING # GETTING LIST OF DOWNLOADED REMOTE FILES datainputs = analysis.workflow_data_input_maps.all() download_tasks = [] for files in datainputs: cur_node_uuid = files.data_uuid cur_fs_uuid = Node.objects.get(uuid=cur_node_uuid).file_uuid # Adding downloading task if file is not local if not is_local(cur_fs_uuid): # getting the current file_uuid from the given node_uuid task_id = import_file.subtask(( cur_fs_uuid, False, )) download_tasks.append(task_id) # PREPROCESSING task_id = run_analysis_preprocessing.subtask((analysis, )) download_tasks.append(task_id) result_chord, result_set = progress_chord(download_tasks)( chord_execution.subtask(analysis=analysis, )) # saving preprocessing taskset analysis_status.preprocessing_taskset_id = result_set.task_id analysis_status.save()
def _refinery_file_import(analysis_uuid): """ Check on the status of the files being imported into Refinery. Fail the task appropriately if we cannot retrieve the status. """ analysis = _get_analysis(analysis_uuid) analysis_status = _get_analysis_status(analysis_uuid) if not analysis_status.refinery_import_task_group_id: logger.info("Starting analysis '%s'", analysis) analysis.set_status(Analysis.RUNNING_STATUS) logger.info("Starting input file import tasks for analysis '%s'", analysis) refinery_import_tasks = [] if analysis.is_tool_based: tool = _get_workflow_tool(analysis_uuid) input_file_uuid_list = tool.get_input_file_uuid_list() else: input_file_uuid_list = analysis.get_input_file_uuid_list() for input_file_uuid in input_file_uuid_list: refinery_import_task = import_file.subtask((input_file_uuid, )) refinery_import_tasks.append(refinery_import_task) refinery_import_taskset = TaskSet( tasks=refinery_import_tasks).apply_async() refinery_import_taskset.save() analysis_status.refinery_import_task_group_id = \ refinery_import_taskset.taskset_id analysis_status.save() run_analysis.retry(countdown=RETRY_INTERVAL) # check if all files were successfully imported into Refinery refinery_import_taskset = get_taskset_result( analysis_status.refinery_import_task_group_id) if not refinery_import_taskset.ready(): logger.debug("Input file import pending for analysis '%s'", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not refinery_import_taskset.successful(): error_msg = "Analysis '{}' failed during file import".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.send_email() refinery_import_taskset.delete() return
def _get_galaxy_download_tasks(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") task_list = [] # retrieving list of files to download for workflow dl_files = analysis.workflow_dl_files # creating dictionary based on files to download predetermined by workflow # w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = galaxy_instance.get_history_file_list( analysis.history_id) except galaxy.client.ConnectionError as exc: error_msg = ( "Error downloading Galaxy history files for analysis '%s': %s") logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_type != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join([ 'datasets', str(results['dataset_id']), 'display?to_ext=txt' ])) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_type == 'html': file_type = 'zip' # TODO: when changing permanent=True, fix update of % download # of file filestore_uuid = create(source=download_url, filetype=file_type) # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: task_id = import_file.subtask( (filestore_uuid, False, file_size)) task_list.append(task_id) return task_list
def _get_galaxy_download_task_ids(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") task_id_list = [] # retrieving list of files to download for workflow tool = _get_workflow_tool(analysis.uuid) tool.create_analysis_output_node_connections() galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = tool.get_galaxy_dataset_download_list() except galaxy.client.ConnectionError as exc: error_msg = ( "Error downloading Galaxy history files for analysis '%s': %s") logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_id_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_extension = results["type"] result_name = "{}.{}".format(results['name'], file_extension) # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_extension != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join([ 'datasets', str(results['dataset_id']), 'display?to_ext=txt' ])) file_store_item = FileStoreItem(source=download_url) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_extension == 'html': file_extension = 'zip' # assign file type manually since it cannot be inferred from source try: extension = FileExtension.objects.get(name=file_extension) except (FileExtension.DoesNotExist, FileExtension.MultipleObjectsReturned) as exc: logger.warn( "Could not assign type to file '%s' using " "extension '%s': %s", file_store_item, file_extension, exc) else: file_store_item.filetype = extension.filetype file_store_item.save() # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=file_store_item.uuid, file_name=result_name, file_type=file_extension) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: task_id = import_file.subtask( (file_store_item.uuid, False, file_size)) task_id_list.append(task_id) return task_id_list
def run_analysis(analysis_uuid): """Manage analysis execution""" RETRY_INTERVAL = 5 # seconds try: analysis = Analysis.objects.get(uuid=analysis_uuid) except (Analysis.DoesNotExist, Analysis.MultipleObjectsReturned) as exc: logger.error("Can not retrieve analysis with UUID '%s': '%s'", analysis_uuid, exc) run_analysis.update_state(state=celery.states.FAILURE) return # if cancelled by user if analysis.failed(): return try: analysis_status = AnalysisStatus.objects.get(analysis=analysis) except (AnalysisStatus.DoesNotExist, AnalysisStatus.MultipleObjectsReturned) as exc: logger.error("Can not retrieve status for analysis '%s': '%s'", analysis, exc) run_analysis.update_state(state=celery.states.FAILURE) return if not analysis_status.refinery_import_task_group_id: logger.info("Starting analysis '%s'", analysis) analysis.set_status(Analysis.RUNNING_STATUS) logger.info("Starting input file import tasks for analysis '%s'", analysis) refinery_import_tasks = [] for input_file_uuid in analysis.get_input_file_uuid_list(): refinery_import_task = import_file.subtask( (input_file_uuid, False, )) refinery_import_tasks.append(refinery_import_task) refinery_import = TaskSet(tasks=refinery_import_tasks).apply_async() refinery_import.save() analysis_status.refinery_import_task_group_id = \ refinery_import.taskset_id analysis_status.save() run_analysis.retry(countdown=RETRY_INTERVAL) # check if all files were successfully imported into Refinery refinery_import = TaskSetResult.restore( analysis_status.refinery_import_task_group_id) if not refinery_import.ready(): logger.debug("Input file import pending for analysis '%s'", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not refinery_import.successful(): logger.error("Analysis '%s' failed during file import", analysis) analysis.set_status(Analysis.FAILURE_STATUS) analysis.send_email() refinery_import.delete() return # import files into Galaxy and start analysis if not analysis_status.galaxy_import_task_group_id: logger.debug("Starting analysis execution in Galaxy") try: analysis.prepare_galaxy() except (requests.exceptions.ConnectionError, galaxy.client.ConnectionError): logger.error("Analysis '%s' failed during preparation in Galaxy", analysis) analysis.set_status(Analysis.FAILURE_STATUS) analysis.send_email() refinery_import.delete() return galaxy_import_tasks = [ start_galaxy_analysis.subtask((analysis_uuid, )), ] galaxy_import = TaskSet(tasks=galaxy_import_tasks).apply_async() galaxy_import.save() analysis_status.galaxy_import_task_group_id = \ galaxy_import.taskset_id analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) # check if data files were successfully imported into Galaxy galaxy_import = TaskSetResult.restore( analysis_status.galaxy_import_task_group_id) if not galaxy_import.ready(): logger.debug("Analysis '%s' pending in Galaxy", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not galaxy_import.successful(): logger.error("Analysis '%s' failed in Galaxy", analysis) analysis.set_status(Analysis.FAILURE_STATUS) analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() refinery_import.delete() galaxy_import.delete() analysis.galaxy_cleanup() return # check if analysis has finished running in Galaxy try: percent_complete = analysis.galaxy_progress() except RuntimeError: analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() refinery_import.delete() galaxy_import.delete() analysis.galaxy_cleanup() return except galaxy.client.ConnectionError: analysis_status.set_galaxy_history_state(AnalysisStatus.UNKNOWN) run_analysis.retry(countdown=RETRY_INTERVAL) else: # workaround to avoid moving the progress bar backward if analysis_status.galaxy_history_progress < percent_complete: analysis_status.galaxy_history_progress = percent_complete analysis_status.save() if percent_complete < 100: analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) else: analysis_status.set_galaxy_history_state(AnalysisStatus.OK) # retrieve analysis results from Galaxy if not analysis_status.galaxy_export_task_group_id: galaxy_export_tasks = get_galaxy_download_tasks(analysis) logger.info("Starting downloading of results from Galaxy for analysis " "'%s'", analysis) galaxy_export = TaskSet(tasks=galaxy_export_tasks).apply_async() galaxy_export.save() analysis_status.galaxy_export_task_group_id = galaxy_export.taskset_id analysis_status.save() run_analysis.retry(countdown=RETRY_INTERVAL) # check if analysis results have finished downloading from Galaxy galaxy_export = TaskSetResult.restore( analysis_status.galaxy_export_task_group_id) if not galaxy_export.ready(): logger.debug("Results download pending for analysis '%s'", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) # all tasks must have succeeded or failed elif not galaxy_export.successful(): logger.error("Analysis '%s' failed while downloading results from " "Galaxy", analysis) analysis.set_status(Analysis.FAILURE_STATUS) analysis.send_email() refinery_import.delete() galaxy_import.delete() galaxy_export.delete() analysis.galaxy_cleanup() return # attach workflow outputs back to dataset isatab graph if analysis.workflow.type == Workflow.ANALYSIS_TYPE: analysis.attach_outputs_dataset() elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE: analysis.attach_outputs_downloads() else: logger.warning("Unknown workflow type '%s' in analysis '%s'", analysis.workflow.type, analysis.name) analysis.set_status(Analysis.SUCCESS_STATUS) analysis.rename_results() analysis.send_email() logger.info("Analysis '%s' finished successfully", analysis) analysis.galaxy_cleanup() refinery_import.delete() galaxy_import.delete() galaxy_export.delete() # Update file count and file size of the corresponding data set analysis.data_set.file_count = analysis.data_set.get_file_count() analysis.data_set.file_size = analysis.data_set.get_file_size() analysis.data_set.save()
def get_galaxy_download_tasks(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") # retrieving list of files to download for workflow dl_files = analysis.workflow_dl_files # creating dictionary based on files to download predetermined by workflow # w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = galaxy_instance.get_history_file_list( analysis.history_id) except galaxy.client.ConnectionError as exc: error_msg = "Error downloading Galaxy history files for analysis " \ "'%s': %s" logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_type != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join( ['datasets', str(results['dataset_id']), 'display?to_ext=txt'])) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_type == 'html': file_type = 'zip' # TODO: when changing permanent=True, fix update of % download # of file filestore_uuid = create( source=download_url, filetype=file_type, permanent=False) # adding history files to django model temp_file = AnalysisResult( analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: # local download, force copying into the file_store instead # of symlinking if galaxy_instance.local_download: task_id = import_file.subtask( (filestore_uuid, False, True, file_size,)) else: task_id = import_file.subtask( (filestore_uuid, False, False, file_size,)) task_list.append(task_id) return task_list
def run_analysis(analysis_uuid): """Manage analysis execution""" RETRY_INTERVAL = 5 # seconds try: analysis = Analysis.objects.get(uuid=analysis_uuid) except (Analysis.DoesNotExist, Analysis.MultipleObjectsReturned) as exc: logger.error("Can not retrieve analysis with UUID '%s': '%s'", analysis_uuid, exc) run_analysis.update_state(state=celery.states.FAILURE) return # if cancelled by user if analysis.failed(): return try: analysis_status = AnalysisStatus.objects.get(analysis=analysis) except (AnalysisStatus.DoesNotExist, AnalysisStatus.MultipleObjectsReturned) as exc: logger.error("Can not retrieve status for analysis '%s': '%s'", analysis, exc) run_analysis.update_state(state=celery.states.FAILURE) return if not analysis_status.refinery_import_task_group_id: logger.info("Starting analysis '%s'", analysis) analysis.set_status(Analysis.RUNNING_STATUS) logger.info("Starting input file import tasks for analysis '%s'", analysis) refinery_import_tasks = [] for input_file_uuid in analysis.get_input_file_uuid_list(): refinery_import_task = import_file.subtask((input_file_uuid, )) refinery_import_tasks.append(refinery_import_task) refinery_import = TaskSet(tasks=refinery_import_tasks).apply_async() refinery_import.save() analysis_status.refinery_import_task_group_id = \ refinery_import.taskset_id analysis_status.save() run_analysis.retry(countdown=RETRY_INTERVAL) # check if all files were successfully imported into Refinery refinery_import = TaskSetResult.restore( analysis_status.refinery_import_task_group_id) if not refinery_import.ready(): logger.debug("Input file import pending for analysis '%s'", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not refinery_import.successful(): error_msg = "Analysis '{}' failed during file import".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.send_email() refinery_import.delete() return # import files into Galaxy and start analysis if not analysis_status.galaxy_import_task_group_id: logger.debug("Starting analysis execution in Galaxy") try: analysis.prepare_galaxy() except (requests.exceptions.ConnectionError, galaxy.client.ConnectionError): error_msg = "Analysis '{}' failed during preparation in " \ "Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.send_email() refinery_import.delete() return galaxy_import_tasks = [ start_galaxy_analysis.subtask((analysis_uuid, )), ] galaxy_import = TaskSet(tasks=galaxy_import_tasks).apply_async() galaxy_import.save() analysis_status.galaxy_import_task_group_id = \ galaxy_import.taskset_id analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) # check if data files were successfully imported into Galaxy galaxy_import = TaskSetResult.restore( analysis_status.galaxy_import_task_group_id) if not galaxy_import.ready(): logger.debug("Analysis '%s' pending in Galaxy", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not galaxy_import.successful(): error_msg = "Analysis '{}' failed in Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() refinery_import.delete() galaxy_import.delete() analysis.galaxy_cleanup() return # check if analysis has finished running in Galaxy try: percent_complete = analysis.galaxy_progress() except RuntimeError: analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() refinery_import.delete() galaxy_import.delete() analysis.galaxy_cleanup() return except galaxy.client.ConnectionError: analysis_status.set_galaxy_history_state(AnalysisStatus.UNKNOWN) run_analysis.retry(countdown=RETRY_INTERVAL) else: # workaround to avoid moving the progress bar backward if analysis_status.galaxy_history_progress < percent_complete: analysis_status.galaxy_history_progress = percent_complete analysis_status.save() if percent_complete < 100: analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) else: analysis_status.set_galaxy_history_state(AnalysisStatus.OK) # retrieve analysis results from Galaxy if not analysis_status.galaxy_export_task_group_id: galaxy_export_tasks = get_galaxy_download_tasks(analysis) logger.info( "Starting downloading of results from Galaxy for analysis " "'%s'", analysis) galaxy_export = TaskSet(tasks=galaxy_export_tasks).apply_async() galaxy_export.save() analysis_status.galaxy_export_task_group_id = galaxy_export.taskset_id analysis_status.save() run_analysis.retry(countdown=RETRY_INTERVAL) # check if analysis results have finished downloading from Galaxy galaxy_export = TaskSetResult.restore( analysis_status.galaxy_export_task_group_id) if not galaxy_export.ready(): logger.debug("Results download pending for analysis '%s'", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) # all tasks must have succeeded or failed elif not galaxy_export.successful(): error_msg = "Analysis '%s' failed while downloading results from " \ "Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.send_email() refinery_import.delete() galaxy_import.delete() galaxy_export.delete() analysis.galaxy_cleanup() return # attach workflow outputs back to dataset isatab graph if analysis.workflow.type == Workflow.ANALYSIS_TYPE: analysis.attach_outputs_dataset() elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE: analysis.attach_outputs_downloads() else: logger.warning("Unknown workflow type '%s' in analysis '%s'", analysis.workflow.type, analysis.name) analysis.set_status(Analysis.SUCCESS_STATUS) analysis.rename_results() analysis.send_email() logger.info("Analysis '%s' finished successfully", analysis) analysis.galaxy_cleanup() refinery_import.delete() galaxy_import.delete() galaxy_export.delete() # Update file count and file size of the corresponding data set analysis.data_set.file_count = analysis.data_set.get_file_count() # FIXME: line below is causing analyses to be marked as failed # analysis.data_set.file_size = analysis.data_set.get_file_size() analysis.data_set.save()
def download_history_files(analysis): """Download entire histories from galaxy. Getting files out of history to file store. """ logger.debug("analysis_manger.download_history_files called") # retrieving list of files to download for workflow #TODO: handle Django exceptions analysis = Analysis.objects.get(uuid=analysis.uuid) dl_files = analysis.workflow_dl_files ### creating dictionary based on files to download predetermined by workflow w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] # gets current galaxy connection connection = analysis.get_galaxy_connection() try: download_list = connection.get_history_file_list(analysis.history_id) except RuntimeError as exc: error_msg = "Post-processing failed: " + \ "error downloading Galaxy history files for analysis '{}': {}" \ .format(analysis.name, exc.message) logger.error(error_msg) if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)): analysis.set_status(Analysis.FAILURE_STATUS, error_msg) try: analysis.delete_galaxy_library() analysis.delete_galaxy_workflow() analysis.delete_galaxy_history() except RuntimeError: logger.error("Cleanup failed for analysis '{}'".format( analysis.name)) return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determing tag if galaxy results should be download through http or copying files directly local_download = analysis.workflow.workflow_engine.instance.local_download # to retrieve HTML files as zip archives via dataset URL if local_download and file_type != 'html': download_url = results['file_name'] else: download_url = connection.make_url(str( results['dataset_id']), is_data=True, key=False) # workaround to set the correct file type for zip archives of # reports produced by FASTQC if file_type == 'html': file_type = 'zip' # getting file_store_uuid, # TODO: when changing permanent=True, fix update of % download of file filestore_uuid = create(source=download_url, filetype=file_type, permanent=False) # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: #task_id = import_file.subtask((filestore_uuid, True, False, file_size,)) # local download, force copying into the file_store instead of symlinking if local_download: task_id = import_file.subtask(( filestore_uuid, False, True, file_size, )) else: task_id = import_file.subtask(( filestore_uuid, False, False, file_size, )) task_list.append(task_id) return task_list
def download_history_files(analysis) : """Download entire histories from galaxy. Getting files out of history to file store. """ logger.debug("analysis_manger.download_history_files called") # retrieving list of files to download for workflow #TODO: handle Django exceptions analysis = Analysis.objects.get(uuid=analysis.uuid) dl_files = analysis.workflow_dl_files ### creating dictionary based on files to download predetermined by workflow w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] # gets current galaxy connection connection = analysis.get_galaxy_connection() try: download_list = connection.get_history_file_list(analysis.history_id) except RuntimeError as exc: error_msg = "Post-processing failed: " + \ "error downloading Galaxy history files for analysis '{}': {}" \ .format(analysis.name, exc.message) logger.error(error_msg) if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)): analysis.set_status(Analysis.FAILURE_STATUS, error_msg) try: analysis.delete_galaxy_library() analysis.delete_galaxy_workflow() analysis.delete_galaxy_history() except RuntimeError: logger.error( "Cleanup failed for analysis '{}'".format(analysis.name)) return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determing tag if galaxy results should be download through http or copying files directly local_download = analysis.workflow.workflow_engine.instance.local_download # to retrieve HTML files as zip archives via dataset URL if local_download and file_type != 'html': download_url = results['file_name'] else: download_url = connection.make_url( str(results['dataset_id']), is_data=True, key=False) # workaround to set the correct file type for zip archives of # reports produced by FASTQC if file_type == 'html': file_type = 'zip' # getting file_store_uuid, # TODO: when changing permanent=True, fix update of % download of file filestore_uuid = create( source=download_url, filetype=file_type, permanent=False ) # adding history files to django model temp_file = AnalysisResult( analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: #task_id = import_file.subtask((filestore_uuid, True, False, file_size,)) # local download, force copying into the file_store instead of symlinking if local_download: task_id = import_file.subtask( (filestore_uuid, False, True, file_size,)) else: task_id = import_file.subtask( (filestore_uuid, False, False, file_size,)) task_list.append(task_id) return task_list