Example #1
0
def _get_galaxy_download_tasks(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")
    task_list = []

    # retrieving list of files to download for workflow
    dl_files = analysis.workflow_dl_files
    # creating dictionary based on files to download predetermined by workflow
    # w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict
    galaxy_instance = analysis.workflow.workflow_engine.instance

    try:
        download_list = galaxy_instance.get_history_file_list(
            analysis.history_id)
    except galaxy.client.ConnectionError as exc:
        error_msg = (
            "Error downloading Galaxy history files for analysis '%s': %s")
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']
            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']
                # Determining tag if galaxy results should be download through
                # http or copying files directly to retrieve HTML files as zip
                # archives via dataset URL
                if galaxy_instance.local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = urlparse.urljoin(
                        galaxy_instance.base_url, '/'.join([
                            'datasets',
                            str(results['dataset_id']), 'display?to_ext=txt'
                        ]))
                # workaround to set the correct file type for zip archives of
                # FastQC HTML reports produced by Galaxy dynamically
                if file_type == 'html':
                    file_type = 'zip'
                # TODO: when changing permanent=True, fix update of % download
                # of file
                filestore_uuid = create(source=download_url,
                                        filetype=file_type)
                # adding history files to django model
                temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                           file_store_uuid=filestore_uuid,
                                           file_name=result_name,
                                           file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    task_id = import_file.subtask(
                        (filestore_uuid, False, file_size))
                    task_list.append(task_id)

    return task_list
Example #2
0
def _get_galaxy_download_task_ids(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")
    task_id_list = []

    # retrieving list of files to download for workflow
    tool = _get_workflow_tool(analysis.uuid)
    tool.create_analysis_output_node_connections()

    galaxy_instance = analysis.workflow.workflow_engine.instance

    try:
        download_list = tool.get_galaxy_dataset_download_list()
    except galaxy.client.ConnectionError as exc:
        error_msg = (
            "Error downloading Galaxy history files for analysis '%s': %s")
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_id_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_extension = results["type"]
            result_name = "{}.{}".format(results['name'], file_extension)

            # size of file defined by galaxy
            file_size = results['file_size']
            # Determining tag if galaxy results should be download through
            # http or copying files directly to retrieve HTML files as zip
            # archives via dataset URL
            if galaxy_instance.local_download and file_extension != 'html':
                download_url = results['file_name']
            else:
                download_url = urlparse.urljoin(
                    galaxy_instance.base_url, '/'.join([
                        'datasets',
                        str(results['dataset_id']), 'display?to_ext=txt'
                    ]))

            file_store_item = FileStoreItem(source=download_url)

            # workaround to set the correct file type for zip archives of
            # FastQC HTML reports produced by Galaxy dynamically
            if file_extension == 'html':
                file_extension = 'zip'
            # assign file type manually since it cannot be inferred from source
            try:
                extension = FileExtension.objects.get(name=file_extension)
            except (FileExtension.DoesNotExist,
                    FileExtension.MultipleObjectsReturned) as exc:
                logger.warn(
                    "Could not assign type to file '%s' using "
                    "extension '%s': %s", file_store_item, file_extension, exc)
            else:
                file_store_item.filetype = extension.filetype

            file_store_item.save()

            # adding history files to django model
            temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                       file_store_uuid=file_store_item.uuid,
                                       file_name=result_name,
                                       file_type=file_extension)
            temp_file.save()
            analysis.results.add(temp_file)
            analysis.save()

            # downloading analysis results into file_store
            # only download files if size is greater than 1
            if file_size > 0:
                task_id = import_file.subtask(
                    (file_store_item.uuid, False, file_size))
                task_id_list.append(task_id)

    return task_id_list
Example #3
0
def download_history_files(analysis):
    """Download entire histories from galaxy.
    Getting files out of history to file store.

    """
    logger.debug("analysis_manger.download_history_files called")

    # retrieving list of files to download for workflow
    #TODO: handle Django exceptions
    analysis = Analysis.objects.get(uuid=analysis.uuid)
    dl_files = analysis.workflow_dl_files

    ### creating dictionary based on files to download predetermined by workflow w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict

    task_list = []
    # gets current galaxy connection
    connection = analysis.get_galaxy_connection()
    try:
        download_list = connection.get_history_file_list(analysis.history_id)
    except RuntimeError as exc:
        error_msg = "Post-processing failed: " + \
            "error downloading Galaxy history files for analysis '{}': {}" \
            .format(analysis.name, exc.message)
        logger.error(error_msg)
        if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)):
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            try:
                analysis.delete_galaxy_library()
                analysis.delete_galaxy_workflow()
                analysis.delete_galaxy_history()
            except RuntimeError:
                logger.error("Cleanup failed for analysis '{}'".format(
                    analysis.name))
        return task_list

    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']

            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']

                # Determing tag if galaxy results should be download through http or copying files directly
                local_download = analysis.workflow.workflow_engine.instance.local_download

                # to retrieve HTML files as zip archives via dataset URL
                if local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = connection.make_url(str(
                        results['dataset_id']),
                                                       is_data=True,
                                                       key=False)

                # workaround to set the correct file type for zip archives of
                # reports produced by FASTQC
                if file_type == 'html':
                    file_type = 'zip'

                # getting file_store_uuid,
                # TODO: when changing permanent=True, fix update of % download of file
                filestore_uuid = create(source=download_url,
                                        filetype=file_type,
                                        permanent=False)

                # adding history files to django model
                temp_file = AnalysisResult(analysis_uuid=analysis.uuid,
                                           file_store_uuid=filestore_uuid,
                                           file_name=result_name,
                                           file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()

                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    #task_id = import_file.subtask((filestore_uuid, True, False, file_size,))
                    # local download, force copying into the file_store instead of symlinking
                    if local_download:
                        task_id = import_file.subtask((
                            filestore_uuid,
                            False,
                            True,
                            file_size,
                        ))
                    else:
                        task_id = import_file.subtask((
                            filestore_uuid,
                            False,
                            False,
                            file_size,
                        ))
                    task_list.append(task_id)

    return task_list
Example #4
0
def get_galaxy_download_tasks(analysis):
    """Get file import tasks for Galaxy analysis results"""
    logger.debug("Preparing to download analysis results from Galaxy")

    # retrieving list of files to download for workflow
    dl_files = analysis.workflow_dl_files
    # creating dictionary based on files to download predetermined by workflow
    # w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict
    task_list = []
    galaxy_instance = analysis.workflow.workflow_engine.instance
    try:
        download_list = galaxy_instance.get_history_file_list(
            analysis.history_id)
    except galaxy.client.ConnectionError as exc:
        error_msg = "Error downloading Galaxy history files for analysis " \
                    "'%s': %s"
        logger.error(error_msg, analysis.name, exc.message)
        analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
        analysis.galaxy_cleanup()
        return task_list
    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']
            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']
                # Determining tag if galaxy results should be download through
                # http or copying files directly to retrieve HTML files as zip
                # archives via dataset URL
                if galaxy_instance.local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = urlparse.urljoin(
                            galaxy_instance.base_url, '/'.join(
                                    ['datasets', str(results['dataset_id']),
                                     'display?to_ext=txt']))
                # workaround to set the correct file type for zip archives of
                # FastQC HTML reports produced by Galaxy dynamically
                if file_type == 'html':
                    file_type = 'zip'
                # TODO: when changing permanent=True, fix update of % download
                # of file
                filestore_uuid = create(
                    source=download_url, filetype=file_type, permanent=False)
                # adding history files to django model
                temp_file = AnalysisResult(
                    analysis_uuid=analysis.uuid,
                    file_store_uuid=filestore_uuid,
                    file_name=result_name, file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file)
                analysis.save()
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    # local download, force copying into the file_store instead
                    # of symlinking
                    if galaxy_instance.local_download:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, True, file_size,))
                    else:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, False, file_size,))
                    task_list.append(task_id)

    return task_list
def download_history_files(analysis) :
    """Download entire histories from galaxy.
    Getting files out of history to file store.

    """
    logger.debug("analysis_manger.download_history_files called")

    # retrieving list of files to download for workflow
    #TODO: handle Django exceptions
    analysis = Analysis.objects.get(uuid=analysis.uuid)
    dl_files = analysis.workflow_dl_files

    ### creating dictionary based on files to download predetermined by workflow w/ keep operators
    dl_dict = {}
    for dl in dl_files.all():
        temp_dict = {}
        temp_dict['filename'] = dl.filename
        temp_dict['pair_id'] = dl.pair_id
        dl_dict[str(dl.step_id)] = temp_dict

    task_list = []
    # gets current galaxy connection
    connection = analysis.get_galaxy_connection()
    try:
        download_list = connection.get_history_file_list(analysis.history_id)
    except RuntimeError as exc:
        error_msg = "Post-processing failed: " + \
            "error downloading Galaxy history files for analysis '{}': {}" \
            .format(analysis.name, exc.message)
        logger.error(error_msg)
        if not isinstance(exc, (ConnectionError, TimeoutError, AuthError)):
            analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
            try:
                analysis.delete_galaxy_library()
                analysis.delete_galaxy_workflow()
                analysis.delete_galaxy_history()
            except RuntimeError:
                logger.error(
                    "Cleanup failed for analysis '{}'".format(analysis.name))
        return task_list

    # Iterating through files in current galaxy history
    for results in download_list:
        # download file if result state is "ok"
        if results['state'] == 'ok':
            file_type = results["type"]
            curr_file_id = results['name']

            if curr_file_id in dl_dict:
                curr_dl_dict = dl_dict[curr_file_id]
                result_name = curr_dl_dict['filename'] + '.' + file_type
                # size of file defined by galaxy
                file_size = results['file_size']

                # Determing tag if galaxy results should be download through http or copying files directly
                local_download = analysis.workflow.workflow_engine.instance.local_download

                # to retrieve HTML files as zip archives via dataset URL
                if local_download and file_type != 'html':
                    download_url = results['file_name']
                else:
                    download_url = connection.make_url(
                        str(results['dataset_id']), is_data=True, key=False)

                # workaround to set the correct file type for zip archives of
                # reports produced by FASTQC
                if file_type == 'html':
                    file_type = 'zip'

                # getting file_store_uuid,
                # TODO: when changing permanent=True, fix update of % download of file 
                filestore_uuid = create(
                    source=download_url,
                    filetype=file_type,
                    permanent=False
                )

                # adding history files to django model 
                temp_file = AnalysisResult(
                    analysis_uuid=analysis.uuid, file_store_uuid=filestore_uuid,
                    file_name=result_name, file_type=file_type)
                temp_file.save()
                analysis.results.add(temp_file) 
                analysis.save()
                
                # downloading analysis results into file_store
                # only download files if size is greater than 1
                if file_size > 0:
                    #task_id = import_file.subtask((filestore_uuid, True, False, file_size,))
                    # local download, force copying into the file_store instead of symlinking
                    if local_download:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, True, file_size,))
                    else:
                        task_id = import_file.subtask(
                            (filestore_uuid, False, False, file_size,))
                    task_list.append(task_id)

    return task_list