def _get_galaxy_download_tasks(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") task_list = [] # retrieving list of files to download for workflow dl_files = analysis.workflow_dl_files # creating dictionary based on files to download predetermined by workflow # w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = galaxy_instance.get_history_file_list( analysis.history_id) except galaxy.client.ConnectionError as exc: error_msg = ( "Error downloading Galaxy history files for analysis '%s': %s") logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_type != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join([ 'datasets', str(results['dataset_id']), 'display?to_ext=txt' ])) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_type == 'html': file_type = 'zip' # TODO: when changing permanent=True, fix update of % download # of file filestore_uuid = create(source=download_url, filetype=file_type) # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: task_id = import_file.subtask( (filestore_uuid, False, file_size)) task_list.append(task_id) return task_list
def _get_galaxy_download_task_ids(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") task_id_list = [] # retrieving list of files to download for workflow tool = _get_workflow_tool(analysis.uuid) tool.create_analysis_output_node_connections() galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = tool.get_galaxy_dataset_download_list() except galaxy.client.ConnectionError as exc: error_msg = ( "Error downloading Galaxy history files for analysis '%s': %s") logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_id_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_extension = results["type"] result_name = "{}.{}".format(results['name'], file_extension) # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_extension != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join([ 'datasets', str(results['dataset_id']), 'display?to_ext=txt' ])) file_store_item = FileStoreItem(source=download_url) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_extension == 'html': file_extension = 'zip' # assign file type manually since it cannot be inferred from source try: extension = FileExtension.objects.get(name=file_extension) except (FileExtension.DoesNotExist, FileExtension.MultipleObjectsReturned) as exc: logger.warn( "Could not assign type to file '%s' using " "extension '%s': %s", file_store_item, file_extension, exc) else: file_store_item.filetype = extension.filetype file_store_item.save() # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=file_store_item.uuid, file_name=result_name, file_type=file_extension) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: task_id = import_file.subtask( (file_store_item.uuid, False, file_size)) task_id_list.append(task_id) return task_id_list
def download_history_files(analysis): """Download entire histories from galaxy. Getting files out of history to file store. """ logger.debug("analysis_manger.download_history_files called") # retrieving list of files to download for workflow #TODO: handle Django exceptions analysis = Analysis.objects.get(uuid=analysis.uuid) dl_files = analysis.workflow_dl_files ### creating dictionary based on files to download predetermined by workflow w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] # gets current galaxy connection connection = analysis.get_galaxy_connection() try: download_list = connection.get_history_file_list(analysis.history_id) except RuntimeError as exc: error_msg = "Post-processing failed: " + \ "error downloading Galaxy history files for analysis '{}': {}" \ .format(analysis.name, exc.message) logger.error(error_msg) if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)): analysis.set_status(Analysis.FAILURE_STATUS, error_msg) try: analysis.delete_galaxy_library() analysis.delete_galaxy_workflow() analysis.delete_galaxy_history() except RuntimeError: logger.error("Cleanup failed for analysis '{}'".format( analysis.name)) return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determing tag if galaxy results should be download through http or copying files directly local_download = analysis.workflow.workflow_engine.instance.local_download # to retrieve HTML files as zip archives via dataset URL if local_download and file_type != 'html': download_url = results['file_name'] else: download_url = connection.make_url(str( results['dataset_id']), is_data=True, key=False) # workaround to set the correct file type for zip archives of # reports produced by FASTQC if file_type == 'html': file_type = 'zip' # getting file_store_uuid, # TODO: when changing permanent=True, fix update of % download of file filestore_uuid = create(source=download_url, filetype=file_type, permanent=False) # adding history files to django model temp_file = AnalysisResult(analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: #task_id = import_file.subtask((filestore_uuid, True, False, file_size,)) # local download, force copying into the file_store instead of symlinking if local_download: task_id = import_file.subtask(( filestore_uuid, False, True, file_size, )) else: task_id = import_file.subtask(( filestore_uuid, False, False, file_size, )) task_list.append(task_id) return task_list
def get_galaxy_download_tasks(analysis): """Get file import tasks for Galaxy analysis results""" logger.debug("Preparing to download analysis results from Galaxy") # retrieving list of files to download for workflow dl_files = analysis.workflow_dl_files # creating dictionary based on files to download predetermined by workflow # w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] galaxy_instance = analysis.workflow.workflow_engine.instance try: download_list = galaxy_instance.get_history_file_list( analysis.history_id) except galaxy.client.ConnectionError as exc: error_msg = "Error downloading Galaxy history files for analysis " \ "'%s': %s" logger.error(error_msg, analysis.name, exc.message) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.galaxy_cleanup() return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determining tag if galaxy results should be download through # http or copying files directly to retrieve HTML files as zip # archives via dataset URL if galaxy_instance.local_download and file_type != 'html': download_url = results['file_name'] else: download_url = urlparse.urljoin( galaxy_instance.base_url, '/'.join( ['datasets', str(results['dataset_id']), 'display?to_ext=txt'])) # workaround to set the correct file type for zip archives of # FastQC HTML reports produced by Galaxy dynamically if file_type == 'html': file_type = 'zip' # TODO: when changing permanent=True, fix update of % download # of file filestore_uuid = create( source=download_url, filetype=file_type, permanent=False) # adding history files to django model temp_file = AnalysisResult( analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: # local download, force copying into the file_store instead # of symlinking if galaxy_instance.local_download: task_id = import_file.subtask( (filestore_uuid, False, True, file_size,)) else: task_id = import_file.subtask( (filestore_uuid, False, False, file_size,)) task_list.append(task_id) return task_list
def download_history_files(analysis) : """Download entire histories from galaxy. Getting files out of history to file store. """ logger.debug("analysis_manger.download_history_files called") # retrieving list of files to download for workflow #TODO: handle Django exceptions analysis = Analysis.objects.get(uuid=analysis.uuid) dl_files = analysis.workflow_dl_files ### creating dictionary based on files to download predetermined by workflow w/ keep operators dl_dict = {} for dl in dl_files.all(): temp_dict = {} temp_dict['filename'] = dl.filename temp_dict['pair_id'] = dl.pair_id dl_dict[str(dl.step_id)] = temp_dict task_list = [] # gets current galaxy connection connection = analysis.get_galaxy_connection() try: download_list = connection.get_history_file_list(analysis.history_id) except RuntimeError as exc: error_msg = "Post-processing failed: " + \ "error downloading Galaxy history files for analysis '{}': {}" \ .format(analysis.name, exc.message) logger.error(error_msg) if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)): analysis.set_status(Analysis.FAILURE_STATUS, error_msg) try: analysis.delete_galaxy_library() analysis.delete_galaxy_workflow() analysis.delete_galaxy_history() except RuntimeError: logger.error( "Cleanup failed for analysis '{}'".format(analysis.name)) return task_list # Iterating through files in current galaxy history for results in download_list: # download file if result state is "ok" if results['state'] == 'ok': file_type = results["type"] curr_file_id = results['name'] if curr_file_id in dl_dict: curr_dl_dict = dl_dict[curr_file_id] result_name = curr_dl_dict['filename'] + '.' + file_type # size of file defined by galaxy file_size = results['file_size'] # Determing tag if galaxy results should be download through http or copying files directly local_download = analysis.workflow.workflow_engine.instance.local_download # to retrieve HTML files as zip archives via dataset URL if local_download and file_type != 'html': download_url = results['file_name'] else: download_url = connection.make_url( str(results['dataset_id']), is_data=True, key=False) # workaround to set the correct file type for zip archives of # reports produced by FASTQC if file_type == 'html': file_type = 'zip' # getting file_store_uuid, # TODO: when changing permanent=True, fix update of % download of file filestore_uuid = create( source=download_url, filetype=file_type, permanent=False ) # adding history files to django model temp_file = AnalysisResult( analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid, file_name=result_name, file_type=file_type) temp_file.save() analysis.results.add(temp_file) analysis.save() # downloading analysis results into file_store # only download files if size is greater than 1 if file_size > 0: #task_id = import_file.subtask((filestore_uuid, True, False, file_size,)) # local download, force copying into the file_store instead of symlinking if local_download: task_id = import_file.subtask( (filestore_uuid, False, True, file_size,)) else: task_id = import_file.subtask( (filestore_uuid, False, False, file_size,)) task_list.append(task_id) return task_list