def create_analysis(validated_analysis_config): """ Create an Analysis instance from a properly validated analysis_config :param validated_analysis_config: a dict including the necessary information to create an Analysis that has been validated prior by `analysis_manager.utils.validate_analysis_config` :return: an Analysis instance :raises: RuntimeError """ common_analysis_objects = fetch_objects_required_for_analysis( validated_analysis_config) current_workflow = common_analysis_objects["current_workflow"] data_set = common_analysis_objects["data_set"] user = common_analysis_objects["user"] try: tool = tool_manager.models.WorkflowTool.objects.get( uuid=validated_analysis_config["tool_uuid"]) except (tool_manager.models.WorkflowTool.DoesNotExist, tool_manager.models.WorkflowTool.MultipleObjectsReturned) as e: raise RuntimeError("Couldn't fetch Tool from UUID: {}".format(e)) analysis = Analysis.objects.create( uuid=str(uuid.uuid4()), summary="Galaxy workflow execution for: {}".format(tool.name), name="{} - {} - {}".format( tool.get_tool_name(), get_aware_local_time().strftime("%Y/%m/%d %H:%M:%S"), tool.get_owner_username().title()), project=user.profile.catch_all_project, data_set=data_set, workflow=current_workflow, time_start=timezone.now()) analysis.set_owner(user) return analysis
def _create_analysis_name(current_workflow): """ Create an string representative of an Analysis :param current_workflow: The <Workflow> associated with said Analysis :return: String comprised of the workflow's name and a timestamp """ return "{} {}".format( current_workflow.name, get_aware_local_time().strftime("%Y-%m-%d @ %H:%M:%S"))
def createBaseWorkflow(workflow_name): """Creates base template workflow""" return { "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": workflow_name + "-" + str(get_aware_local_time()), "steps": {}, }
def _create_analysis_name(current_workflow): """ Create an string representative of an Analysis :param current_workflow: The <Workflow> associated with said Analysis :return: String comprised of the workflow's name and a timestamp """ return "{} {}".format( current_workflow.name, get_aware_local_time().strftime("%Y-%m-%d @ %H:%M:%S") )
def create_analysis(validated_analysis_config): """ Create an Analysis instance from a properly validated analysis_config :param validated_analysis_config: a dict including the necessary information to create an Analysis that has been validated prior by `analysis_manager.utils.validate_analysis_config` :return: an Analysis instance :raises: RuntimeError """ common_analysis_objects = fetch_objects_required_for_analysis( validated_analysis_config ) current_workflow = common_analysis_objects["current_workflow"] data_set = common_analysis_objects["data_set"] user = common_analysis_objects["user"] try: tool = tool_manager.models.WorkflowTool.objects.get( uuid=validated_analysis_config["tool_uuid"] ) except (tool_manager.models.WorkflowTool.DoesNotExist, tool_manager.models.WorkflowTool.MultipleObjectsReturned) as e: raise RuntimeError("Couldn't fetch Tool from UUID: {}".format(e)) analysis = Analysis.objects.create( uuid=str(uuid.uuid4()), summary="Galaxy workflow execution for: {}".format(tool.name), name="{} - {} - {}".format( tool.get_tool_name(), get_aware_local_time().strftime("%Y/%m/%d %H:%M:%S"), tool.get_owner_username().title() ), project=user.profile.catch_all_project, data_set=data_set, workflow=current_workflow, time_start=timezone.now() ) analysis.set_owner(user) return analysis
def create_noderelationship(request): """ajax function for creating noderelationships based on multiple node sets """ logger.debug("analysis_manager.views create_noderelationship called") logger.debug(json.dumps(request.POST, indent=4)) if request.is_ajax(): nr_name = request.POST.getlist('name')[0] nr_description = request.POST.getlist('description')[0] # getting nodeset uuids node_set_uuid1 = request.POST.getlist('node_set_uuid1')[0] node_set_uuid2 = request.POST.getlist('node_set_uuid2')[0] # getting instances of current nodeset curr_node_set1 = NodeSet.objects.get(uuid=node_set_uuid1) curr_node_set2 = NodeSet.objects.get(uuid=node_set_uuid2) # fields to match on diff_fields = request.POST.getlist('fields[]') if len(diff_fields) < 1: logger.error('create_noderelationship: failed b/c no field ' 'selected for defining Node Relationships') # get study uuid assay_uuid = request.POST.getlist('assay_uuid')[0] study_uuid = request.POST.getlist('study_uuid')[0] study = Study.objects.get(uuid=study_uuid) # TODO: catch if study or data set don't exist study = Study.objects.get(uuid=study_uuid) assay = Assay.objects.get(uuid=assay_uuid) # Need to deal w/ limits on current solr queries # solr results curr_node_dict1 = curr_node_set1.solr_query_components curr_node_dict1 = json.loads(curr_node_dict1) curr_node_dict2 = curr_node_set2.solr_query_components curr_node_dict2 = json.loads(curr_node_dict2) # getting list of node uuids based on input solr query node_set_solr1 = get_solr_results( curr_node_set1.solr_query, selected_mode=curr_node_dict1['documentSelectionBlacklistMode'], selected_nodes=curr_node_dict1['documentSelection']) node_set_solr2 = get_solr_results( curr_node_set2.solr_query, selected_mode=curr_node_dict2['documentSelectionBlacklistMode'], selected_nodes=curr_node_dict2['documentSelection']) # all fields from the first solr query all_fields = node_set_solr1['responseHeader']['params']['fl'] # actual documents retreived from solr response node_set_results1 = node_set_solr1['response']['docs'] node_set_results2 = node_set_solr2['response']['docs'] # match between 2 nodesets for a given column nodes_set_match, match_info = match_nodesets(node_set_results1, node_set_results2, diff_fields, all_fields) logger.debug("MAKING RELATIONSHIPS NOW") logger.debug(json.dumps(nodes_set_match, indent=4)) logger.debug(nodes_set_match) # TODO: need to include names, descriptions, summary if nr_name.strip() == '': nr_name = "{} - {} {}".format(curr_node_set1.name, curr_node_set2.name, str(get_aware_local_time())) if nr_description.strip() == '': nr_description = "{} - {} {}".format(curr_node_set1.name, curr_node_set2.name, str(get_aware_local_time())) new_relationship = NodeRelationship(node_set_1=curr_node_set1, node_set_2=curr_node_set2, study=study, assay=assay, name=nr_name, summary=nr_description) new_relationship.save() for i in range(len(nodes_set_match)): node1 = Node.objects.get(uuid=nodes_set_match[i]['uuid_1']) node2 = Node.objects.get(uuid=nodes_set_match[i]['uuid_2']) new_pair = NodePair(node1=node1, node2=node2, group=i + 1) new_pair.save() new_relationship.node_pairs.add(new_pair) return HttpResponse(json.dumps(match_info, indent=4), content_type='application/json')
def create_noderelationship(request): """ajax function for creating noderelationships based on multiple node sets """ logger.debug("analysis_manager.views create_noderelationship called") logger.debug(json.dumps(request.POST, indent=4)) if request.is_ajax(): nr_name = request.POST.getlist('name')[0] nr_description = request.POST.getlist('description')[0] # getting nodeset uuids node_set_uuid1 = request.POST.getlist('node_set_uuid1')[0] node_set_uuid2 = request.POST.getlist('node_set_uuid2')[0] # getting instances of current nodeset curr_node_set1 = NodeSet.objects.get(uuid=node_set_uuid1) curr_node_set2 = NodeSet.objects.get(uuid=node_set_uuid2) # fields to match on diff_fields = request.POST.getlist('fields[]') if len(diff_fields) < 1: logger.error('create_noderelationship: failed b/c no field ' 'selected for defining Node Relationships') # get study uuid assay_uuid = request.POST.getlist('assay_uuid')[0] study_uuid = request.POST.getlist('study_uuid')[0] study = Study.objects.get(uuid=study_uuid) # TODO: catch if study or data set don't exist study = Study.objects.get(uuid=study_uuid) assay = Assay.objects.get(uuid=assay_uuid) # Need to deal w/ limits on current solr queries # solr results curr_node_dict1 = curr_node_set1.solr_query_components curr_node_dict1 = json.loads(curr_node_dict1) curr_node_dict2 = curr_node_set2.solr_query_components curr_node_dict2 = json.loads(curr_node_dict2) # getting list of node uuids based on input solr query node_set_solr1 = get_solr_results( curr_node_set1.solr_query, selected_mode=curr_node_dict1['documentSelectionBlacklistMode'], selected_nodes=curr_node_dict1['documentSelection'] ) node_set_solr2 = get_solr_results( curr_node_set2.solr_query, selected_mode=curr_node_dict2['documentSelectionBlacklistMode'], selected_nodes=curr_node_dict2['documentSelection'] ) # all fields from the first solr query all_fields = node_set_solr1['responseHeader']['params']['fl'] # actual documents retreived from solr response node_set_results1 = node_set_solr1['response']['docs'] node_set_results2 = node_set_solr2['response']['docs'] # match between 2 nodesets for a given column nodes_set_match, match_info = match_nodesets( node_set_results1, node_set_results2, diff_fields, all_fields) logger.debug("MAKING RELATIONSHIPS NOW") logger.debug(json.dumps(nodes_set_match, indent=4)) logger.debug(nodes_set_match) # TODO: need to include names, descriptions, summary if nr_name.strip() == '': nr_name = "{} - {} {}".format( curr_node_set1.name, curr_node_set2.name, str( get_aware_local_time()) ) if nr_description.strip() == '': nr_description = "{} - {} {}".format( curr_node_set1.name, curr_node_set2.name, str( get_aware_local_time()) ) new_relationship = NodeRelationship(node_set_1=curr_node_set1, node_set_2=curr_node_set2, study=study, assay=assay, name=nr_name, summary=nr_description) new_relationship.save() for i in range(len(nodes_set_match)): node1 = Node.objects.get(uuid=nodes_set_match[i]['uuid_1']) node2 = Node.objects.get(uuid=nodes_set_match[i]['uuid_2']) new_pair = NodePair(node1=node1, node2=node2, group=i+1) new_pair.save() new_relationship.node_pairs.add(new_pair) return HttpResponse(json.dumps(match_info, indent=4), mimetype='application/json')
def run(request): """Run analysis, return URL of the analysis status page Needs re-factoring """ logger.debug("Received request to start analysis") if not request.is_ajax(): return HttpResponseBadRequest() # 400 allowed_methods = ['POST'] if request.method not in allowed_methods: return HttpResponseNotAllowed(allowed_methods) # 405 analysis_config = json.loads(request.body) try: workflow_uuid = analysis_config['workflowUuid'] study_uuid = analysis_config['studyUuid'] node_set_uuid = analysis_config['nodeSetUuid'] node_group_uuid = analysis_config['nodeGroupUuid'] node_relationship_uuid = analysis_config['nodeRelationshipUuid'] custom_name = analysis_config['name'] except KeyError: return HttpResponseBadRequest() # 400 # must provide workflow and study UUIDs, # and either node set UUID or node relationship UUID if not (workflow_uuid and study_uuid and (node_set_uuid or node_relationship_uuid or node_group_uuid)): return HttpResponseBadRequest() # 400 # single-input workflow based node group if node_group_uuid: try: curr_node_group = NodeGroup.objects.get(uuid=node_group_uuid) except NodeGroup.DoesNotExist: logger.error("Node Group with UUID '{}' does not exist".format( node_group_uuid)) return HttpResponse(status='404') except NodeGroup.MultipleObjectsReturned: logger.error("Node Group with UUID '{}' returned multiple " "objects".format(node_group_uuid)) return HttpResponse(status='500') try: curr_workflow = Workflow.objects.get(uuid=workflow_uuid) except Workflow.DoesNotExist: logger.error("WorkFlow with UUID '{}' does not exist".format( workflow_uuid)) return HttpResponse(status='404') except Workflow.MultipleObjectsReturned: logger.error("WorkFlow with UUID '{}' returns multiple objects" .format(workflow_uuid)) return HttpResponse(status='500') try: study = Study.objects.get(uuid=study_uuid) except Study.DoesNotExist: logger.error("Study with UUID '{}' does not exist".format( study_uuid)) return HttpResponse(status='404') except Study.MultipleObjectsReturned: logger.error("Study with UUID '{}' returns multiple objects" .format(study_uuid)) return HttpResponse(status='500') investigation_links = InvestigationLink.objects.filter( investigation__uuid=study.investigation.uuid).order_by( "version") if not investigation_links: logger.error("InvestigationLink with UUID '{}' with does not " "exist".format(study.investigation.uuid)) return HttpResponse(status='404') data_set = investigation_links.reverse()[0].data_set logger.info("Associating analysis with data set %s (%s)", data_set, data_set.uuid) # ANALYSIS MODEL # How to create a simple analysis object if not custom_name: temp_name = curr_workflow.name + " " + get_aware_local_time()\ .strftime("%Y-%m-%d @ %H:%M:%S") else: temp_name = custom_name summary_name = "None provided." analysis = Analysis.objects.create( summary=summary_name, name=temp_name, project=request.user.get_profile().catch_all_project, data_set=data_set, workflow=curr_workflow, time_start=timezone.now() ) analysis.set_owner(request.user) # getting distinct workflow inputs try: workflow_data_inputs = curr_workflow.data_inputs.all()[0] except IndexError: logger.error("Workflow with UUID '{}' has an index " "error with inputs".format(workflow_uuid.uuid)) return HttpResponse(status='500') # NEED TO GET LIST OF FILE_UUIDS from node_group_uuid fields count = 0 for node_file in curr_node_group.nodes.all(): count += 1 temp_input = WorkflowDataInputMap.objects.create( workflow_data_input_name=workflow_data_inputs.name, data_uuid=node_file.uuid, pair_id=count ) analysis.workflow_data_input_maps.add(temp_input) analysis.save() # single-input workflow if node_set_uuid: # TODO: handle DoesNotExist exception curr_node_set = NodeSet.objects.get(uuid=node_set_uuid) curr_node_dict = curr_node_set.solr_query_components curr_node_dict = json.loads(curr_node_dict) # solr results solr_uuids = get_solr_results( curr_node_set.solr_query, only_uuids=True, selected_mode=curr_node_dict['documentSelectionBlacklistMode'], selected_nodes=curr_node_dict['documentSelection'] ) # retrieving workflow based on input workflow_uuid # TODO: handle DoesNotExist exception curr_workflow = Workflow.objects.filter(uuid=workflow_uuid)[0] # TODO: catch if study or data set don't exist study = Study.objects.get(uuid=study_uuid) data_set = InvestigationLink.objects.filter( investigation__uuid=study.investigation.uuid).order_by( "version").reverse()[0].data_set logger.info("Associating analysis with data set %s (%s)", data_set, data_set.uuid) # ANALYSIS MODEL # How to create a simple analysis object if not custom_name: temp_name = curr_workflow.name + " " + get_aware_local_time()\ .strftime("%Y-%m-%d @ %H:%M:%S") else: temp_name = custom_name summary_name = "None provided." analysis = Analysis.objects.create( summary=summary_name, name=temp_name, project=request.user.get_profile().catch_all_project, data_set=data_set, workflow=curr_workflow, time_start=timezone.now() ) analysis.set_owner(request.user) # getting distinct workflow inputs workflow_data_inputs = curr_workflow.data_inputs.all()[0] # NEED TO GET LIST OF FILE_UUIDS from solr query count = 0 for file_uuid in solr_uuids: count += 1 temp_input = WorkflowDataInputMap.objects.create( workflow_data_input_name=workflow_data_inputs.name, data_uuid=file_uuid, pair_id=count ) analysis.workflow_data_input_maps.add(temp_input) analysis.save() # dual-input workflow if node_relationship_uuid: # Input list for running analysis ret_list = [] # retrieving workflow based on input workflow_uuid curr_workflow = Workflow.objects.get(uuid=workflow_uuid) # TODO: catch if study or data set don't exist study = Study.objects.get(uuid=study_uuid) data_set = InvestigationLink.objects.filter( investigation__uuid=study.investigation.uuid).order_by( "version").reverse()[0].data_set # Get node relationship model curr_relationship = NodeRelationship.objects.get( uuid=node_relationship_uuid) # Iterating over node pairs input_keys = [] base_input = {} # defining inputs used for analysis for workflow_inputs in curr_workflow.input_relationships.all(): base_input[workflow_inputs.set1] = {} base_input[workflow_inputs.set2] = {} input_keys.append(workflow_inputs.set1) input_keys.append(workflow_inputs.set2) # creating instance of instance of input data pairing for analysis, # i.e. [{u'exp_file': # {'node_uuid': u'3d061699-6bc8-11e2-9b55-406c8f1d5108', 'pair_id': 1}, # u'input_file': # {'node_uuid': u'3d180d11-6bc8-11e2-9bc7-406c8f1d5108', 'pair_id': 1}} # ] count = 1 for curr_pair in curr_relationship.node_pairs.all(): temp_pair = copy.deepcopy(base_input) logger.debug("Temp Pair: %s", temp_pair) logger.debug("Current Pair: %s", curr_pair) if curr_pair.node2: temp_pair[input_keys[0]]['node_uuid'] = curr_pair.node1.uuid temp_pair[input_keys[0]]['pair_id'] = count temp_pair[input_keys[1]]['node_uuid'] = curr_pair.node2.uuid temp_pair[input_keys[1]]['pair_id'] = count ret_list.append(temp_pair) logger.debug("Temp Pair: %s", temp_pair) count += 1 logger.info("Associating analysis with data set %s (%s)", data_set, data_set.uuid) # ANALYSIS MODEL # How to create a simple analysis object if not custom_name: temp_name = curr_workflow.name + " " + get_aware_local_time()\ .strftime("%Y-%m-%d @ %H:%M:%S") else: temp_name = custom_name summary_name = "None provided." analysis = Analysis.objects.create( summary=summary_name, name=temp_name, project=request.user.get_profile().catch_all_project, data_set=data_set, workflow=curr_workflow, time_start=timezone.now() ) analysis.set_owner(request.user) # getting distinct workflow inputs workflow_data_inputs = curr_workflow.data_inputs.all() logger.debug("ret_list") logger.debug(json.dumps(ret_list, indent=4)) # ANALYSIS MODEL # Updating Refinery Models for updated workflow input # (galaxy worfkflow input id & node_uuid) count = 0 for samp in ret_list: count += 1 for k, v in samp.items(): temp_input = WorkflowDataInputMap.objects.create( workflow_data_input_name=k, data_uuid=samp[k]["node_uuid"], pair_id=count) analysis.workflow_data_input_maps.add(temp_input) analysis.save() # keeping new reference to analysis_status analysis_status = AnalysisStatus.objects.create(analysis=analysis) analysis_status.save() # call function via analysis_manager run_analysis.delay(analysis.uuid) return HttpResponse(reverse('analysis-status', args=(analysis.uuid,)))