Ejemplo n.º 1
0
def create_analysis(validated_analysis_config):
    """
    Create an Analysis instance from a properly validated analysis_config
    :param validated_analysis_config: a dict including the necessary
    information to create an Analysis that has been validated prior by
    `analysis_manager.utils.validate_analysis_config`
    :return: an Analysis instance
    :raises: RuntimeError
    """
    common_analysis_objects = fetch_objects_required_for_analysis(
        validated_analysis_config)
    current_workflow = common_analysis_objects["current_workflow"]
    data_set = common_analysis_objects["data_set"]
    user = common_analysis_objects["user"]

    try:
        tool = tool_manager.models.WorkflowTool.objects.get(
            uuid=validated_analysis_config["tool_uuid"])
    except (tool_manager.models.WorkflowTool.DoesNotExist,
            tool_manager.models.WorkflowTool.MultipleObjectsReturned) as e:
        raise RuntimeError("Couldn't fetch Tool from UUID: {}".format(e))

    analysis = Analysis.objects.create(
        uuid=str(uuid.uuid4()),
        summary="Galaxy workflow execution for: {}".format(tool.name),
        name="{} - {} - {}".format(
            tool.get_tool_name(),
            get_aware_local_time().strftime("%Y/%m/%d %H:%M:%S"),
            tool.get_owner_username().title()),
        project=user.profile.catch_all_project,
        data_set=data_set,
        workflow=current_workflow,
        time_start=timezone.now())
    analysis.set_owner(user)
    return analysis
Ejemplo n.º 2
0
def _create_analysis_name(current_workflow):
    """
    Create an string representative of an Analysis
    :param current_workflow: The <Workflow> associated with said Analysis
    :return: String comprised of the workflow's name and a timestamp
    """
    return "{} {}".format(
        current_workflow.name,
        get_aware_local_time().strftime("%Y-%m-%d @ %H:%M:%S"))
def createBaseWorkflow(workflow_name):
    """Creates base template workflow"""
    return {
        "a_galaxy_workflow": "true",
        "annotation": "",
        "format-version": "0.1",
        "name": workflow_name + "-" + str(get_aware_local_time()),
        "steps": {},
    }
Ejemplo n.º 4
0
def createBaseWorkflow(workflow_name):
    """Creates base template workflow"""
    return {
        "a_galaxy_workflow": "true",
        "annotation": "",
        "format-version": "0.1",
        "name": workflow_name + "-" + str(get_aware_local_time()),
        "steps": {},
    }
Ejemplo n.º 5
0
def _create_analysis_name(current_workflow):
    """
    Create an string representative of an Analysis
    :param current_workflow: The <Workflow> associated with said Analysis
    :return: String comprised of the workflow's name and a timestamp
    """
    return "{} {}".format(
        current_workflow.name,
        get_aware_local_time().strftime("%Y-%m-%d @ %H:%M:%S")
    )
Ejemplo n.º 6
0
def create_analysis(validated_analysis_config):
    """
    Create an Analysis instance from a properly validated analysis_config
    :param validated_analysis_config: a dict including the necessary
    information to create an Analysis that has been validated prior by
    `analysis_manager.utils.validate_analysis_config`
    :return: an Analysis instance
    :raises: RuntimeError
    """
    common_analysis_objects = fetch_objects_required_for_analysis(
        validated_analysis_config
    )
    current_workflow = common_analysis_objects["current_workflow"]
    data_set = common_analysis_objects["data_set"]
    user = common_analysis_objects["user"]

    try:
        tool = tool_manager.models.WorkflowTool.objects.get(
            uuid=validated_analysis_config["tool_uuid"]
        )
    except (tool_manager.models.WorkflowTool.DoesNotExist,
            tool_manager.models.WorkflowTool.MultipleObjectsReturned) as e:
        raise RuntimeError("Couldn't fetch Tool from UUID: {}".format(e))

    analysis = Analysis.objects.create(
        uuid=str(uuid.uuid4()),
        summary="Galaxy workflow execution for: {}".format(tool.name),
        name="{} - {} - {}".format(
            tool.get_tool_name(),
            get_aware_local_time().strftime("%Y/%m/%d %H:%M:%S"),
            tool.get_owner_username().title()
        ),
        project=user.profile.catch_all_project,
        data_set=data_set,
        workflow=current_workflow,
        time_start=timezone.now()
    )
    analysis.set_owner(user)
    return analysis
Ejemplo n.º 7
0
def create_noderelationship(request):
    """ajax function for creating noderelationships based on multiple node sets
    """
    logger.debug("analysis_manager.views create_noderelationship called")
    logger.debug(json.dumps(request.POST, indent=4))
    if request.is_ajax():
        nr_name = request.POST.getlist('name')[0]
        nr_description = request.POST.getlist('description')[0]
        # getting nodeset uuids
        node_set_uuid1 = request.POST.getlist('node_set_uuid1')[0]
        node_set_uuid2 = request.POST.getlist('node_set_uuid2')[0]
        # getting instances of current nodeset
        curr_node_set1 = NodeSet.objects.get(uuid=node_set_uuid1)
        curr_node_set2 = NodeSet.objects.get(uuid=node_set_uuid2)
        # fields to match on
        diff_fields = request.POST.getlist('fields[]')
        if len(diff_fields) < 1:
            logger.error('create_noderelationship: failed b/c no field '
                         'selected for defining Node Relationships')
        # get study uuid
        assay_uuid = request.POST.getlist('assay_uuid')[0]
        study_uuid = request.POST.getlist('study_uuid')[0]
        study = Study.objects.get(uuid=study_uuid)
        # TODO: catch if study or data set don't exist
        study = Study.objects.get(uuid=study_uuid)
        assay = Assay.objects.get(uuid=assay_uuid)
        # Need to deal w/ limits on current solr queries
        # solr results
        curr_node_dict1 = curr_node_set1.solr_query_components
        curr_node_dict1 = json.loads(curr_node_dict1)
        curr_node_dict2 = curr_node_set2.solr_query_components
        curr_node_dict2 = json.loads(curr_node_dict2)
        # getting list of node uuids based on input solr query
        node_set_solr1 = get_solr_results(
            curr_node_set1.solr_query,
            selected_mode=curr_node_dict1['documentSelectionBlacklistMode'],
            selected_nodes=curr_node_dict1['documentSelection'])
        node_set_solr2 = get_solr_results(
            curr_node_set2.solr_query,
            selected_mode=curr_node_dict2['documentSelectionBlacklistMode'],
            selected_nodes=curr_node_dict2['documentSelection'])
        # all fields from the first solr query
        all_fields = node_set_solr1['responseHeader']['params']['fl']
        # actual documents retreived from solr response
        node_set_results1 = node_set_solr1['response']['docs']
        node_set_results2 = node_set_solr2['response']['docs']
        # match between 2 nodesets for a given column
        nodes_set_match, match_info = match_nodesets(node_set_results1,
                                                     node_set_results2,
                                                     diff_fields, all_fields)

        logger.debug("MAKING RELATIONSHIPS NOW")
        logger.debug(json.dumps(nodes_set_match, indent=4))
        logger.debug(nodes_set_match)
        # TODO: need to include names, descriptions, summary
        if nr_name.strip() == '':
            nr_name = "{} - {} {}".format(curr_node_set1.name,
                                          curr_node_set2.name,
                                          str(get_aware_local_time()))
        if nr_description.strip() == '':
            nr_description = "{} - {} {}".format(curr_node_set1.name,
                                                 curr_node_set2.name,
                                                 str(get_aware_local_time()))
        new_relationship = NodeRelationship(node_set_1=curr_node_set1,
                                            node_set_2=curr_node_set2,
                                            study=study,
                                            assay=assay,
                                            name=nr_name,
                                            summary=nr_description)
        new_relationship.save()

        for i in range(len(nodes_set_match)):
            node1 = Node.objects.get(uuid=nodes_set_match[i]['uuid_1'])
            node2 = Node.objects.get(uuid=nodes_set_match[i]['uuid_2'])
            new_pair = NodePair(node1=node1, node2=node2, group=i + 1)
            new_pair.save()
            new_relationship.node_pairs.add(new_pair)

        return HttpResponse(json.dumps(match_info, indent=4),
                            content_type='application/json')
Ejemplo n.º 8
0
def create_noderelationship(request):
    """ajax function for creating noderelationships based on multiple node sets
    """
    logger.debug("analysis_manager.views create_noderelationship called")
    logger.debug(json.dumps(request.POST, indent=4))
    if request.is_ajax():
        nr_name = request.POST.getlist('name')[0]
        nr_description = request.POST.getlist('description')[0]
        # getting nodeset uuids
        node_set_uuid1 = request.POST.getlist('node_set_uuid1')[0]
        node_set_uuid2 = request.POST.getlist('node_set_uuid2')[0]
        # getting instances of current nodeset
        curr_node_set1 = NodeSet.objects.get(uuid=node_set_uuid1)
        curr_node_set2 = NodeSet.objects.get(uuid=node_set_uuid2)
        # fields to match on
        diff_fields = request.POST.getlist('fields[]')
        if len(diff_fields) < 1:
            logger.error('create_noderelationship: failed b/c no field '
                         'selected for defining Node Relationships')
        # get study uuid
        assay_uuid = request.POST.getlist('assay_uuid')[0]
        study_uuid = request.POST.getlist('study_uuid')[0]
        study = Study.objects.get(uuid=study_uuid)
        # TODO: catch if study or data set don't exist
        study = Study.objects.get(uuid=study_uuid)
        assay = Assay.objects.get(uuid=assay_uuid)
        # Need to deal w/ limits on current solr queries
        # solr results
        curr_node_dict1 = curr_node_set1.solr_query_components
        curr_node_dict1 = json.loads(curr_node_dict1)
        curr_node_dict2 = curr_node_set2.solr_query_components
        curr_node_dict2 = json.loads(curr_node_dict2)
        # getting list of node uuids based on input solr query
        node_set_solr1 = get_solr_results(
            curr_node_set1.solr_query,
            selected_mode=curr_node_dict1['documentSelectionBlacklistMode'],
            selected_nodes=curr_node_dict1['documentSelection']
        )
        node_set_solr2 = get_solr_results(
            curr_node_set2.solr_query,
            selected_mode=curr_node_dict2['documentSelectionBlacklistMode'],
            selected_nodes=curr_node_dict2['documentSelection']
        )
        # all fields from the first solr query
        all_fields = node_set_solr1['responseHeader']['params']['fl']
        # actual documents retreived from solr response
        node_set_results1 = node_set_solr1['response']['docs']
        node_set_results2 = node_set_solr2['response']['docs']
        # match between 2 nodesets for a given column
        nodes_set_match, match_info = match_nodesets(
            node_set_results1, node_set_results2, diff_fields, all_fields)

        logger.debug("MAKING RELATIONSHIPS NOW")
        logger.debug(json.dumps(nodes_set_match, indent=4))
        logger.debug(nodes_set_match)
        # TODO: need to include names, descriptions, summary
        if nr_name.strip() == '':
            nr_name = "{} - {} {}".format(
                curr_node_set1.name, curr_node_set2.name, str(
                    get_aware_local_time())
            )
        if nr_description.strip() == '':
            nr_description = "{} - {} {}".format(
                curr_node_set1.name, curr_node_set2.name, str(
                    get_aware_local_time())
            )
        new_relationship = NodeRelationship(node_set_1=curr_node_set1,
                                            node_set_2=curr_node_set2,
                                            study=study,
                                            assay=assay,
                                            name=nr_name,
                                            summary=nr_description)
        new_relationship.save()

        for i in range(len(nodes_set_match)):
            node1 = Node.objects.get(uuid=nodes_set_match[i]['uuid_1'])
            node2 = Node.objects.get(uuid=nodes_set_match[i]['uuid_2'])
            new_pair = NodePair(node1=node1, node2=node2, group=i+1)
            new_pair.save()
            new_relationship.node_pairs.add(new_pair)

        return HttpResponse(json.dumps(match_info, indent=4),
                            mimetype='application/json')
Ejemplo n.º 9
0
def run(request):
    """Run analysis, return URL of the analysis status page
    Needs re-factoring
    """
    logger.debug("Received request to start analysis")
    if not request.is_ajax():
        return HttpResponseBadRequest()  # 400
    allowed_methods = ['POST']
    if request.method not in allowed_methods:
        return HttpResponseNotAllowed(allowed_methods)  # 405

    analysis_config = json.loads(request.body)
    try:
        workflow_uuid = analysis_config['workflowUuid']
        study_uuid = analysis_config['studyUuid']
        node_set_uuid = analysis_config['nodeSetUuid']
        node_group_uuid = analysis_config['nodeGroupUuid']
        node_relationship_uuid = analysis_config['nodeRelationshipUuid']
        custom_name = analysis_config['name']
    except KeyError:
        return HttpResponseBadRequest()  # 400
    # must provide workflow and study UUIDs,
    # and either node set UUID or node relationship UUID
    if not (workflow_uuid and study_uuid and
            (node_set_uuid or node_relationship_uuid or node_group_uuid)):
        return HttpResponseBadRequest()  # 400

    # single-input workflow based node group
    if node_group_uuid:
        try:
            curr_node_group = NodeGroup.objects.get(uuid=node_group_uuid)
        except NodeGroup.DoesNotExist:
            logger.error("Node Group with UUID '{}' does not exist".format(
                node_group_uuid))
            return HttpResponse(status='404')
        except NodeGroup.MultipleObjectsReturned:
            logger.error("Node Group with UUID '{}' returned multiple "
                         "objects".format(node_group_uuid))
            return HttpResponse(status='500')

        try:
            curr_workflow = Workflow.objects.get(uuid=workflow_uuid)
        except Workflow.DoesNotExist:
            logger.error("WorkFlow with UUID '{}' does not exist".format(
                workflow_uuid))
            return HttpResponse(status='404')
        except Workflow.MultipleObjectsReturned:
            logger.error("WorkFlow with UUID '{}' returns multiple objects"
                         .format(workflow_uuid))
            return HttpResponse(status='500')

        try:
            study = Study.objects.get(uuid=study_uuid)
        except Study.DoesNotExist:
            logger.error("Study with UUID '{}' does not exist".format(
                study_uuid))
            return HttpResponse(status='404')
        except Study.MultipleObjectsReturned:
            logger.error("Study with UUID '{}' returns multiple objects"
                         .format(study_uuid))
            return HttpResponse(status='500')

        investigation_links = InvestigationLink.objects.filter(
            investigation__uuid=study.investigation.uuid).order_by(
                "version")
        if not investigation_links:
            logger.error("InvestigationLink with UUID '{}' with does not "
                         "exist".format(study.investigation.uuid))
            return HttpResponse(status='404')

        data_set = investigation_links.reverse()[0].data_set
        logger.info("Associating analysis with data set %s (%s)",
                    data_set, data_set.uuid)

        # ANALYSIS MODEL
        # How to create a simple analysis object
        if not custom_name:
            temp_name = curr_workflow.name + " " + get_aware_local_time()\
                .strftime("%Y-%m-%d @ %H:%M:%S")
        else:
            temp_name = custom_name

        summary_name = "None provided."
        analysis = Analysis.objects.create(
            summary=summary_name,
            name=temp_name,
            project=request.user.get_profile().catch_all_project,
            data_set=data_set,
            workflow=curr_workflow,
            time_start=timezone.now()
        )
        analysis.set_owner(request.user)

        # getting distinct workflow inputs
        try:
            workflow_data_inputs = curr_workflow.data_inputs.all()[0]
        except IndexError:
            logger.error("Workflow with UUID '{}' has an index "
                         "error with inputs".format(workflow_uuid.uuid))
            return HttpResponse(status='500')

        # NEED TO GET LIST OF FILE_UUIDS from node_group_uuid fields
        count = 0
        for node_file in curr_node_group.nodes.all():
            count += 1
            temp_input = WorkflowDataInputMap.objects.create(
                workflow_data_input_name=workflow_data_inputs.name,
                data_uuid=node_file.uuid,
                pair_id=count
            )
            analysis.workflow_data_input_maps.add(temp_input)
            analysis.save()

    # single-input workflow
    if node_set_uuid:
        # TODO: handle DoesNotExist exception
        curr_node_set = NodeSet.objects.get(uuid=node_set_uuid)
        curr_node_dict = curr_node_set.solr_query_components
        curr_node_dict = json.loads(curr_node_dict)
        # solr results
        solr_uuids = get_solr_results(
            curr_node_set.solr_query,
            only_uuids=True,
            selected_mode=curr_node_dict['documentSelectionBlacklistMode'],
            selected_nodes=curr_node_dict['documentSelection']
        )
        # retrieving workflow based on input workflow_uuid
        # TODO: handle DoesNotExist exception
        curr_workflow = Workflow.objects.filter(uuid=workflow_uuid)[0]

        # TODO: catch if study or data set don't exist
        study = Study.objects.get(uuid=study_uuid)
        data_set = InvestigationLink.objects.filter(
            investigation__uuid=study.investigation.uuid).order_by(
                "version").reverse()[0].data_set

        logger.info("Associating analysis with data set %s (%s)",
                    data_set, data_set.uuid)

        # ANALYSIS MODEL
        # How to create a simple analysis object
        if not custom_name:
            temp_name = curr_workflow.name + " " + get_aware_local_time()\
                .strftime("%Y-%m-%d @ %H:%M:%S")
        else:
            temp_name = custom_name

        summary_name = "None provided."
        analysis = Analysis.objects.create(
            summary=summary_name,
            name=temp_name,
            project=request.user.get_profile().catch_all_project,
            data_set=data_set,
            workflow=curr_workflow,
            time_start=timezone.now()
        )
        analysis.set_owner(request.user)

        # getting distinct workflow inputs
        workflow_data_inputs = curr_workflow.data_inputs.all()[0]

        # NEED TO GET LIST OF FILE_UUIDS from solr query
        count = 0
        for file_uuid in solr_uuids:
            count += 1
            temp_input = WorkflowDataInputMap.objects.create(
                workflow_data_input_name=workflow_data_inputs.name,
                data_uuid=file_uuid,
                pair_id=count
            )
            analysis.workflow_data_input_maps.add(temp_input)
            analysis.save()

    # dual-input workflow
    if node_relationship_uuid:
        # Input list for running analysis
        ret_list = []
        # retrieving workflow based on input workflow_uuid
        curr_workflow = Workflow.objects.get(uuid=workflow_uuid)

        # TODO: catch if study or data set don't exist
        study = Study.objects.get(uuid=study_uuid)
        data_set = InvestigationLink.objects.filter(
            investigation__uuid=study.investigation.uuid).order_by(
                "version").reverse()[0].data_set

        # Get node relationship model
        curr_relationship = NodeRelationship.objects.get(
            uuid=node_relationship_uuid)
        # Iterating over node pairs
        input_keys = []
        base_input = {}
        # defining inputs used for analysis
        for workflow_inputs in curr_workflow.input_relationships.all():
            base_input[workflow_inputs.set1] = {}
            base_input[workflow_inputs.set2] = {}
            input_keys.append(workflow_inputs.set1)
            input_keys.append(workflow_inputs.set2)

        # creating instance of instance of input data pairing for analysis,
        # i.e. [{u'exp_file':
        # {'node_uuid': u'3d061699-6bc8-11e2-9b55-406c8f1d5108', 'pair_id': 1},
        # u'input_file':
        # {'node_uuid': u'3d180d11-6bc8-11e2-9bc7-406c8f1d5108', 'pair_id': 1}}
        # ]
        count = 1
        for curr_pair in curr_relationship.node_pairs.all():
            temp_pair = copy.deepcopy(base_input)
            logger.debug("Temp Pair: %s", temp_pair)
            logger.debug("Current Pair: %s", curr_pair)
            if curr_pair.node2:
                temp_pair[input_keys[0]]['node_uuid'] = curr_pair.node1.uuid
                temp_pair[input_keys[0]]['pair_id'] = count
                temp_pair[input_keys[1]]['node_uuid'] = curr_pair.node2.uuid
                temp_pair[input_keys[1]]['pair_id'] = count
                ret_list.append(temp_pair)
                logger.debug("Temp Pair: %s", temp_pair)
                count += 1

        logger.info("Associating analysis with data set %s (%s)",
                    data_set, data_set.uuid)

        # ANALYSIS MODEL
        # How to create a simple analysis object
        if not custom_name:
            temp_name = curr_workflow.name + " " + get_aware_local_time()\
                .strftime("%Y-%m-%d @ %H:%M:%S")
        else:
            temp_name = custom_name

        summary_name = "None provided."

        analysis = Analysis.objects.create(
            summary=summary_name,
            name=temp_name,
            project=request.user.get_profile().catch_all_project,
            data_set=data_set,
            workflow=curr_workflow,
            time_start=timezone.now()
        )
        analysis.set_owner(request.user)

        # getting distinct workflow inputs
        workflow_data_inputs = curr_workflow.data_inputs.all()

        logger.debug("ret_list")
        logger.debug(json.dumps(ret_list, indent=4))

        # ANALYSIS MODEL
        # Updating Refinery Models for updated workflow input
        # (galaxy worfkflow input id & node_uuid)
        count = 0
        for samp in ret_list:
            count += 1
            for k, v in samp.items():
                temp_input = WorkflowDataInputMap.objects.create(
                    workflow_data_input_name=k,
                    data_uuid=samp[k]["node_uuid"],
                    pair_id=count)
                analysis.workflow_data_input_maps.add(temp_input)
                analysis.save()

    # keeping new reference to analysis_status
    analysis_status = AnalysisStatus.objects.create(analysis=analysis)
    analysis_status.save()

    # call function via analysis_manager
    run_analysis.delay(analysis.uuid)

    return HttpResponse(reverse('analysis-status', args=(analysis.uuid,)))