Example #1
0
def run_analysis(analysis):
    """Launch analysis (outermost task, calls subtasks that monitor and run
    preprocessing, execution, postprocessing)
    """
    logger.debug("analysis_manager.tasks run_analysis called")
    # updating status of analysis to running
    analysis = Analysis.objects.filter(uuid=analysis.uuid)[0]
    analysis_status = AnalysisStatus.objects.get(analysis=analysis)
    analysis.set_status(Analysis.RUNNING_STATUS)
    # DOWNLOADING
    # GETTING LIST OF DOWNLOADED REMOTE FILES
    datainputs = analysis.workflow_data_input_maps.all()
    download_tasks = []
    for files in datainputs:
        cur_node_uuid = files.data_uuid
        cur_fs_uuid = Node.objects.get(uuid=cur_node_uuid).file_uuid
        # Adding downloading task if file is not local
        if not is_local(cur_fs_uuid):
            # getting the current file_uuid from the given node_uuid
            task_id = import_file.subtask((cur_fs_uuid, False, ))
            download_tasks.append(task_id)
    # PREPROCESSING
    task_id = run_analysis_preprocessing.subtask((analysis, ))
    download_tasks.append(task_id)
    result_chord, result_set = progress_chord(download_tasks)(
        chord_execution.subtask(analysis=analysis, ))
    # saving preprocessing taskset
    analysis_status.preprocessing_taskset_id = result_set.task_id
    analysis_status.save()
Example #2
0
def run_analysis(analysis):
    '''Launch analysis (outermost task, calls subtasks that monitor and run
    preprocessing, execution, postprocessing)

    '''
    logger.debug("analysis_manager.tasks run_analysis called")
    # updating status of analysis to running
    analysis = Analysis.objects.filter(uuid=analysis.uuid)[0]
    analysis_status = AnalysisStatus.objects.get(analysis=analysis)
    analysis.set_status(Analysis.RUNNING_STATUS)
    # DOWNLOADING
    # GETTING LIST OF DOWNLOADED REMOTE FILES
    datainputs = analysis.workflow_data_input_maps.all()
    download_tasks = []
    for files in datainputs:
        cur_node_uuid = files.data_uuid
        cur_fs_uuid = Node.objects.get(uuid=cur_node_uuid).file_uuid
        # Adding downloading task if file is not local
        if not is_local(cur_fs_uuid):
            # getting the current file_uuid from the given node_uuid
            task_id = import_file.subtask((
                cur_fs_uuid,
                False,
            ))
            download_tasks.append(task_id)
    # PREPROCESSING
    task_id = run_analysis_preprocessing.subtask((analysis, ))
    download_tasks.append(task_id)
    result_chord, result_set = progress_chord(download_tasks)(
        chord_execution.subtask(analysis=analysis, ))
    # saving preprocessing taskset
    analysis_status.preprocessing_taskset_id = result_set.task_id
    analysis_status.save()
Example #3
0
def _refinery_file_import(analysis_uuid):
    """
    Check on the status of the files being imported into Refinery.
    Fail the task appropriately if we cannot retrieve the status.
    """
    analysis = _get_analysis(analysis_uuid)
    analysis_status = _get_analysis_status(analysis_uuid)

    if not analysis_status.refinery_import_task_group_id:
        logger.info("Starting analysis '%s'", analysis)
        analysis.set_status(Analysis.RUNNING_STATUS)
        logger.info("Starting input file import tasks for analysis '%s'",
                    analysis)
        refinery_import_tasks = []

        if analysis.is_tool_based:
            tool = _get_workflow_tool(analysis_uuid)
            input_file_uuid_list = tool.get_input_file_uuid_list()
        else:
            input_file_uuid_list = analysis.get_input_file_uuid_list()

        for input_file_uuid in input_file_uuid_list:
            refinery_import_task = import_file.subtask((input_file_uuid, ))
            refinery_import_tasks.append(refinery_import_task)
        refinery_import_taskset = TaskSet(
            tasks=refinery_import_tasks).apply_async()
        refinery_import_taskset.save()
        analysis_status.refinery_import_task_group_id = \
            refinery_import_taskset.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if all files were successfully imported into Refinery
    refinery_import_taskset = get_taskset_result(
        analysis_status.refinery_import_task_group_id)
    if not refinery_import_taskset.ready():
        logger.debug("Input file import pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    elif not refinery_import_taskset.successful():
        error_msg = "Analysis '{}' failed during file import".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.send_email()
        refinery_import_taskset.delete()
        return
Example #4
0
def _get_galaxy_download_tasks(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")
    task_list = []

    # retrieving list of files to download for workflow
    dl_files = analysis.workflow_dl_files
    # creating dictionary based on files to download predetermined by workflow
    # w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict
    galaxy_instance = analysis.workflow.workflow_engine.instance

    try:
        download_list = galaxy_instance.get_history_file_list(
            analysis.history_id)
    except galaxy.client.ConnectionError as exc:
        error_msg = (
            "Error downloading Galaxy history files for analysis '%s': %s")
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']
            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']
                # Determining tag if galaxy results should be download through
                # http or copying files directly to retrieve HTML files as zip
                # archives via dataset URL
                if galaxy_instance.local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = urlparse.urljoin(
                        galaxy_instance.base_url, '/'.join([
                            'datasets',
                            str(results['dataset_id']), 'display?to_ext=txt'
                        ]))
                # workaround to set the correct file type for zip archives of
                # FastQC HTML reports produced by Galaxy dynamically
                if file_type == 'html':
                    file_type = 'zip'
                # TODO: when changing permanent=True, fix update of % download
                # of file
                filestore_uuid = create(source=download_url,
                                        filetype=file_type)
                # adding history files to django model
                temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                           file_store_uuid=filestore_uuid,
                                           file_name=result_name,
                                           file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    task_id = import_file.subtask(
                        (filestore_uuid, False, file_size))
                    task_list.append(task_id)

    return task_list
Example #5
0
def _get_galaxy_download_task_ids(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")
    task_id_list = []

    # retrieving list of files to download for workflow
    tool = _get_workflow_tool(analysis.uuid)
    tool.create_analysis_output_node_connections()

    galaxy_instance = analysis.workflow.workflow_engine.instance

    try:
        download_list = tool.get_galaxy_dataset_download_list()
    except galaxy.client.ConnectionError as exc:
        error_msg = (
            "Error downloading Galaxy history files for analysis '%s': %s")
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_id_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_extension = results["type"]
            result_name = "{}.{}".format(results['name'], file_extension)

            # size of file defined by galaxy
            file_size = results['file_size']
            # Determining tag if galaxy results should be download through
            # http or copying files directly to retrieve HTML files as zip
            # archives via dataset URL
            if galaxy_instance.local_download and file_extension != 'html':
                download_url = results['file_name']
            else:
                download_url = urlparse.urljoin(
                    galaxy_instance.base_url, '/'.join([
                        'datasets',
                        str(results['dataset_id']), 'display?to_ext=txt'
                    ]))

            file_store_item = FileStoreItem(source=download_url)

            # workaround to set the correct file type for zip archives of
            # FastQC HTML reports produced by Galaxy dynamically
            if file_extension == 'html':
                file_extension = 'zip'
            # assign file type manually since it cannot be inferred from source
            try:
                extension = FileExtension.objects.get(name=file_extension)
            except (FileExtension.DoesNotExist,
                    FileExtension.MultipleObjectsReturned) as exc:
                logger.warn(
                    "Could not assign type to file '%s' using "
                    "extension '%s': %s", file_store_item, file_extension, exc)
            else:
                file_store_item.filetype = extension.filetype

            file_store_item.save()

            # adding history files to django model
            temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                       file_store_uuid=file_store_item.uuid,
                                       file_name=result_name,
                                       file_type=file_extension)
            temp_file.save()
            analysis.results.add(temp_file)
            analysis.save()

            # downloading analysis results into file_store
            # only download files if size is greater than 1
            if file_size > 0:
                task_id = import_file.subtask(
                    (file_store_item.uuid, False, file_size))
                task_id_list.append(task_id)

    return task_id_list
Example #6
0
def run_analysis(analysis_uuid):
    """Manage analysis execution"""
    RETRY_INTERVAL = 5  # seconds

    try:
        analysis = Analysis.objects.get(uuid=analysis_uuid)
    except (Analysis.DoesNotExist, Analysis.MultipleObjectsReturned) as exc:
        logger.error("Can not retrieve analysis with UUID '%s': '%s'",
                     analysis_uuid, exc)
        run_analysis.update_state(state=celery.states.FAILURE)
        return

    # if cancelled by user
    if analysis.failed():
        return

    try:
        analysis_status = AnalysisStatus.objects.get(analysis=analysis)
    except (AnalysisStatus.DoesNotExist,
            AnalysisStatus.MultipleObjectsReturned) as exc:
        logger.error("Can not retrieve status for analysis '%s': '%s'",
                     analysis, exc)
        run_analysis.update_state(state=celery.states.FAILURE)
        return

    if not analysis_status.refinery_import_task_group_id:
        logger.info("Starting analysis '%s'", analysis)
        analysis.set_status(Analysis.RUNNING_STATUS)
        logger.info("Starting input file import tasks for analysis '%s'",
                    analysis)
        refinery_import_tasks = []
        for input_file_uuid in analysis.get_input_file_uuid_list():
            refinery_import_task = import_file.subtask(
                    (input_file_uuid, False, ))
            refinery_import_tasks.append(refinery_import_task)
        refinery_import = TaskSet(tasks=refinery_import_tasks).apply_async()
        refinery_import.save()
        analysis_status.refinery_import_task_group_id = \
            refinery_import.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if all files were successfully imported into Refinery
    refinery_import = TaskSetResult.restore(
            analysis_status.refinery_import_task_group_id)
    if not refinery_import.ready():
        logger.debug("Input file import pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    elif not refinery_import.successful():
        logger.error("Analysis '%s' failed during file import", analysis)
        analysis.set_status(Analysis.FAILURE_STATUS)
        analysis.send_email()
        refinery_import.delete()
        return

    # import files into Galaxy and start analysis
    if not analysis_status.galaxy_import_task_group_id:
        logger.debug("Starting analysis execution in Galaxy")
        try:
            analysis.prepare_galaxy()
        except (requests.exceptions.ConnectionError,
                galaxy.client.ConnectionError):
            logger.error("Analysis '%s' failed during preparation in Galaxy",
                         analysis)
            analysis.set_status(Analysis.FAILURE_STATUS)
            analysis.send_email()
            refinery_import.delete()
            return
        galaxy_import_tasks = [
            start_galaxy_analysis.subtask((analysis_uuid, )),
        ]
        galaxy_import = TaskSet(tasks=galaxy_import_tasks).apply_async()
        galaxy_import.save()
        analysis_status.galaxy_import_task_group_id = \
            galaxy_import.taskset_id
        analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if data files were successfully imported into Galaxy
    galaxy_import = TaskSetResult.restore(
            analysis_status.galaxy_import_task_group_id)
    if not galaxy_import.ready():
        logger.debug("Analysis '%s' pending in Galaxy", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    elif not galaxy_import.successful():
        logger.error("Analysis '%s' failed in Galaxy", analysis)
        analysis.set_status(Analysis.FAILURE_STATUS)
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        analysis.galaxy_cleanup()
        return

    # check if analysis has finished running in Galaxy
    try:
        percent_complete = analysis.galaxy_progress()
    except RuntimeError:
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        analysis.galaxy_cleanup()
        return
    except galaxy.client.ConnectionError:
        analysis_status.set_galaxy_history_state(AnalysisStatus.UNKNOWN)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    else:
        # workaround to avoid moving the progress bar backward
        if analysis_status.galaxy_history_progress < percent_complete:
            analysis_status.galaxy_history_progress = percent_complete
            analysis_status.save()
        if percent_complete < 100:
            analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
            run_analysis.retry(countdown=RETRY_INTERVAL)
        else:
            analysis_status.set_galaxy_history_state(AnalysisStatus.OK)

    # retrieve analysis results from Galaxy
    if not analysis_status.galaxy_export_task_group_id:
        galaxy_export_tasks = get_galaxy_download_tasks(analysis)
        logger.info("Starting downloading of results from Galaxy for analysis "
                    "'%s'", analysis)
        galaxy_export = TaskSet(tasks=galaxy_export_tasks).apply_async()
        galaxy_export.save()
        analysis_status.galaxy_export_task_group_id = galaxy_export.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if analysis results have finished downloading from Galaxy
    galaxy_export = TaskSetResult.restore(
            analysis_status.galaxy_export_task_group_id)
    if not galaxy_export.ready():
        logger.debug("Results download pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    # all tasks must have succeeded or failed
    elif not galaxy_export.successful():
        logger.error("Analysis '%s' failed while downloading results from "
                     "Galaxy", analysis)
        analysis.set_status(Analysis.FAILURE_STATUS)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        galaxy_export.delete()
        analysis.galaxy_cleanup()
        return

    # attach workflow outputs back to dataset isatab graph
    if analysis.workflow.type == Workflow.ANALYSIS_TYPE:
        analysis.attach_outputs_dataset()
    elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE:
        analysis.attach_outputs_downloads()
    else:
        logger.warning("Unknown workflow type '%s' in analysis '%s'",
                       analysis.workflow.type, analysis.name)

    analysis.set_status(Analysis.SUCCESS_STATUS)
    analysis.rename_results()
    analysis.send_email()
    logger.info("Analysis '%s' finished successfully", analysis)
    analysis.galaxy_cleanup()
    refinery_import.delete()
    galaxy_import.delete()
    galaxy_export.delete()

    # Update file count and file size of the corresponding data set
    analysis.data_set.file_count = analysis.data_set.get_file_count()
    analysis.data_set.file_size = analysis.data_set.get_file_size()
    analysis.data_set.save()
Example #7
0
def get_galaxy_download_tasks(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")

    # retrieving list of files to download for workflow
    dl_files = analysis.workflow_dl_files
    # creating dictionary based on files to download predetermined by workflow
    # w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict
    task_list = []
    galaxy_instance = analysis.workflow.workflow_engine.instance
    try:
        download_list = galaxy_instance.get_history_file_list(
            analysis.history_id)
    except galaxy.client.ConnectionError as exc:
        error_msg = "Error downloading Galaxy history files for analysis " \
                    "'%s': %s"
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']
            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']
                # Determining tag if galaxy results should be download through
                # http or copying files directly to retrieve HTML files as zip
                # archives via dataset URL
                if galaxy_instance.local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = urlparse.urljoin(
                            galaxy_instance.base_url, '/'.join(
                                    ['datasets', str(results['dataset_id']),
                                     'display?to_ext=txt']))
                # workaround to set the correct file type for zip archives of
                # FastQC HTML reports produced by Galaxy dynamically
                if file_type == 'html':
                    file_type = 'zip'
                # TODO: when changing permanent=True, fix update of % download
                # of file
                filestore_uuid = create(
                    source=download_url, filetype=file_type, permanent=False)
                # adding history files to django model
                temp_file = AnalysisResult(
                    analysis_uuid=analysis.uuid,
                    file_store_uuid=filestore_uuid,
                    file_name=result_name, file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    # local download, force copying into the file_store instead
                    # of symlinking
                    if galaxy_instance.local_download:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, True, file_size,))
                    else:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, False, file_size,))
                    task_list.append(task_id)

    return task_list
Example #8
0
def run_analysis(analysis_uuid):
    """Manage analysis execution"""
    RETRY_INTERVAL = 5  # seconds

    try:
        analysis = Analysis.objects.get(uuid=analysis_uuid)
    except (Analysis.DoesNotExist, Analysis.MultipleObjectsReturned) as exc:
        logger.error("Can not retrieve analysis with UUID '%s': '%s'",
                     analysis_uuid, exc)
        run_analysis.update_state(state=celery.states.FAILURE)
        return

    # if cancelled by user
    if analysis.failed():
        return

    try:
        analysis_status = AnalysisStatus.objects.get(analysis=analysis)
    except (AnalysisStatus.DoesNotExist,
            AnalysisStatus.MultipleObjectsReturned) as exc:
        logger.error("Can not retrieve status for analysis '%s': '%s'",
                     analysis, exc)
        run_analysis.update_state(state=celery.states.FAILURE)
        return

    if not analysis_status.refinery_import_task_group_id:
        logger.info("Starting analysis '%s'", analysis)
        analysis.set_status(Analysis.RUNNING_STATUS)
        logger.info("Starting input file import tasks for analysis '%s'",
                    analysis)
        refinery_import_tasks = []
        for input_file_uuid in analysis.get_input_file_uuid_list():
            refinery_import_task = import_file.subtask((input_file_uuid, ))
            refinery_import_tasks.append(refinery_import_task)
        refinery_import = TaskSet(tasks=refinery_import_tasks).apply_async()
        refinery_import.save()
        analysis_status.refinery_import_task_group_id = \
            refinery_import.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if all files were successfully imported into Refinery
    refinery_import = TaskSetResult.restore(
        analysis_status.refinery_import_task_group_id)
    if not refinery_import.ready():
        logger.debug("Input file import pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    elif not refinery_import.successful():
        error_msg = "Analysis '{}' failed during file import".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.send_email()
        refinery_import.delete()
        return

    # import files into Galaxy and start analysis
    if not analysis_status.galaxy_import_task_group_id:
        logger.debug("Starting analysis execution in Galaxy")
        try:
            analysis.prepare_galaxy()
        except (requests.exceptions.ConnectionError,
                galaxy.client.ConnectionError):
            error_msg = "Analysis '{}' failed during preparation in " \
                        "Galaxy".format(analysis)
            logger.error(error_msg)
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            analysis.send_email()
            refinery_import.delete()
            return
        galaxy_import_tasks = [
            start_galaxy_analysis.subtask((analysis_uuid, )),
        ]
        galaxy_import = TaskSet(tasks=galaxy_import_tasks).apply_async()
        galaxy_import.save()
        analysis_status.galaxy_import_task_group_id = \
            galaxy_import.taskset_id
        analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if data files were successfully imported into Galaxy
    galaxy_import = TaskSetResult.restore(
        analysis_status.galaxy_import_task_group_id)
    if not galaxy_import.ready():
        logger.debug("Analysis '%s' pending in Galaxy", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    elif not galaxy_import.successful():
        error_msg = "Analysis '{}' failed in Galaxy".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        analysis.galaxy_cleanup()
        return

    # check if analysis has finished running in Galaxy
    try:
        percent_complete = analysis.galaxy_progress()
    except RuntimeError:
        analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        analysis.galaxy_cleanup()
        return
    except galaxy.client.ConnectionError:
        analysis_status.set_galaxy_history_state(AnalysisStatus.UNKNOWN)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    else:
        # workaround to avoid moving the progress bar backward
        if analysis_status.galaxy_history_progress < percent_complete:
            analysis_status.galaxy_history_progress = percent_complete
            analysis_status.save()
        if percent_complete < 100:
            analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS)
            run_analysis.retry(countdown=RETRY_INTERVAL)
        else:
            analysis_status.set_galaxy_history_state(AnalysisStatus.OK)

    # retrieve analysis results from Galaxy
    if not analysis_status.galaxy_export_task_group_id:
        galaxy_export_tasks = get_galaxy_download_tasks(analysis)
        logger.info(
            "Starting downloading of results from Galaxy for analysis "
            "'%s'", analysis)
        galaxy_export = TaskSet(tasks=galaxy_export_tasks).apply_async()
        galaxy_export.save()
        analysis_status.galaxy_export_task_group_id = galaxy_export.taskset_id
        analysis_status.save()
        run_analysis.retry(countdown=RETRY_INTERVAL)

    # check if analysis results have finished downloading from Galaxy
    galaxy_export = TaskSetResult.restore(
        analysis_status.galaxy_export_task_group_id)
    if not galaxy_export.ready():
        logger.debug("Results download pending for analysis '%s'", analysis)
        run_analysis.retry(countdown=RETRY_INTERVAL)
    # all tasks must have succeeded or failed
    elif not galaxy_export.successful():
        error_msg = "Analysis '%s' failed while downloading results from  " \
                    "Galaxy".format(analysis)
        logger.error(error_msg)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.send_email()
        refinery_import.delete()
        galaxy_import.delete()
        galaxy_export.delete()
        analysis.galaxy_cleanup()
        return

    # attach workflow outputs back to dataset isatab graph
    if analysis.workflow.type == Workflow.ANALYSIS_TYPE:
        analysis.attach_outputs_dataset()
    elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE:
        analysis.attach_outputs_downloads()
    else:
        logger.warning("Unknown workflow type '%s' in analysis '%s'",
                       analysis.workflow.type, analysis.name)

    analysis.set_status(Analysis.SUCCESS_STATUS)
    analysis.rename_results()
    analysis.send_email()
    logger.info("Analysis '%s' finished successfully", analysis)
    analysis.galaxy_cleanup()
    refinery_import.delete()
    galaxy_import.delete()
    galaxy_export.delete()

    # Update file count and file size of the corresponding data set
    analysis.data_set.file_count = analysis.data_set.get_file_count()
    # FIXME: line below is causing analyses to be marked as failed
    # analysis.data_set.file_size = analysis.data_set.get_file_size()
    analysis.data_set.save()
Example #9
0
def download_history_files(analysis):
    """Download entire histories from galaxy.
    Getting files out of history to file store.

    """
    logger.debug("analysis_manger.download_history_files called")

    # retrieving list of files to download for workflow
    #TODO: handle Django exceptions
    analysis = Analysis.objects.get(uuid=analysis.uuid)
    dl_files = analysis.workflow_dl_files

    ### creating dictionary based on files to download predetermined by workflow w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict

    task_list = []
    # gets current galaxy connection
    connection = analysis.get_galaxy_connection()
    try:
        download_list = connection.get_history_file_list(analysis.history_id)
    except RuntimeError as exc:
        error_msg = "Post-processing failed: " + \
            "error downloading Galaxy history files for analysis '{}': {}" \
            .format(analysis.name, exc.message)
        logger.error(error_msg)
        if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)):
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            try:
                analysis.delete_galaxy_library()
                analysis.delete_galaxy_workflow()
                analysis.delete_galaxy_history()
            except RuntimeError:
                logger.error("Cleanup failed for analysis '{}'".format(
                    analysis.name))
        return task_list

    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']

            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']

                # Determing tag if galaxy results should be download through http or copying files directly
                local_download = analysis.workflow.workflow_engine.instance.local_download

                # to retrieve HTML files as zip archives via dataset URL
                if local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = connection.make_url(str(
                        results['dataset_id']),
                                                       is_data=True,
                                                       key=False)

                # workaround to set the correct file type for zip archives of
                # reports produced by FASTQC
                if file_type == 'html':
                    file_type = 'zip'

                # getting file_store_uuid,
                # TODO: when changing permanent=True, fix update of % download of file
                filestore_uuid = create(source=download_url,
                                        filetype=file_type,
                                        permanent=False)

                # adding history files to django model
                temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                           file_store_uuid=filestore_uuid,
                                           file_name=result_name,
                                           file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()

                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    #task_id = import_file.subtask((filestore_uuid, True, False, file_size,))
                    # local download, force copying into the file_store instead of symlinking
                    if local_download:
                        task_id = import_file.subtask((
                            filestore_uuid,
                            False,
                            True,
                            file_size,
                        ))
                    else:
                        task_id = import_file.subtask((
                            filestore_uuid,
                            False,
                            False,
                            file_size,
                        ))
                    task_list.append(task_id)

    return task_list
def download_history_files(analysis) :
    """Download entire histories from galaxy.
    Getting files out of history to file store.

    """
    logger.debug("analysis_manger.download_history_files called")

    # retrieving list of files to download for workflow
    #TODO: handle Django exceptions
    analysis = Analysis.objects.get(uuid=analysis.uuid)
    dl_files = analysis.workflow_dl_files

    ### creating dictionary based on files to download predetermined by workflow w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict

    task_list = []
    # gets current galaxy connection
    connection = analysis.get_galaxy_connection()
    try:
        download_list = connection.get_history_file_list(analysis.history_id)
    except RuntimeError as exc:
        error_msg = "Post-processing failed: " + \
            "error downloading Galaxy history files for analysis '{}': {}" \
            .format(analysis.name, exc.message)
        logger.error(error_msg)
        if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)):
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            try:
                analysis.delete_galaxy_library()
                analysis.delete_galaxy_workflow()
                analysis.delete_galaxy_history()
            except RuntimeError:
                logger.error(
                    "Cleanup failed for analysis '{}'".format(analysis.name))
        return task_list

    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']

            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']

                # Determing tag if galaxy results should be download through http or copying files directly
                local_download = analysis.workflow.workflow_engine.instance.local_download

                # to retrieve HTML files as zip archives via dataset URL
                if local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = connection.make_url(
                        str(results['dataset_id']), is_data=True, key=False)

                # workaround to set the correct file type for zip archives of
                # reports produced by FASTQC
                if file_type == 'html':
                    file_type = 'zip'

                # getting file_store_uuid,
                # TODO: when changing permanent=True, fix update of % download of file 
                filestore_uuid = create(
                    source=download_url,
                    filetype=file_type,
                    permanent=False
                )

                # adding history files to django model 
                temp_file = AnalysisResult(
                    analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid,
                    file_name=result_name, file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file) 
                analysis.save()
                
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    #task_id = import_file.subtask((filestore_uuid, True, False, file_size,))
                    # local download, force copying into the file_store instead of symlinking
                    if local_download:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, True, file_size,))
                    else:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, False, file_size,))
                    task_list.append(task_id)

    return task_list