def search_with_dataset(dataset_path, query=None, **kwargs): """Search the datamart using a dataset""" if not isfile(dataset_path): user_msg = ('The dataset file could not be found.') return err_resp(user_msg) search_url = get_nyu_url() + '/search' # -------------------------------- # Behavioral logging # -------------------------------- if 'user_workspace' in kwargs: log_data = dict(feature_id=f'POST|by-dataset|{search_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_SEARCH, path=search_url) LogEntryMaker.create_datamart_entry(kwargs['user_workspace'], log_data) # -------------------------------- # -------------------------------- # Query the datamart # -------------------------------- try: with open(dataset_path, 'rb') as dataset_p: search_files = dict(data=dataset_p) if query: search_files['query'] = query try: response = requests.post(\ search_url, files=search_files, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) except IOError as err_obj: user_msg = (f'Failed to search with the dataset file.' f' Technical: {err_obj}') return err_resp(user_msg) if response.status_code != 200: print(str(response)) print(response.text) return err_resp(('NYU Datamart internal server error.' ' status_code: %s') % response.status_code) json_results = response.json()['results'] if not json_results: return err_resp('No datasets found. (%s)' % \ (get_timestamp_string_readable(time_only=True),)) print('num results: ', len(json_results)) return ok_resp(json_results)
def log_preprocess_call(user, json_data, session_id=''): """Note: The preprocess call also does problem discovery.""" # -------------------------------- # Behavioral logging # -------------------------------- # Check the request for an l1_activity, default to DATA_PREPARATION # activity_l1_val = json_data[bl_static.KEY_L1_ACTIVITY] \ if bl_static.KEY_L1_ACTIVITY in json_data \ else bl_static.L1_DATA_PREPARATION # Check the request for an l1_activity, default to DATA_PREPARATION # activity_l2_val = json_data[bl_static.KEY_L2_ACTIVITY] \ if bl_static.KEY_L2_ACTIVITY in json_data \ else bl_static.L2_DATA_EXPLORE log_data = dict(session_key=session_id, feature_id=rook_static.PREPROCESS_DATA, activity_l1=activity_l1_val, activity_l2=activity_l2_val) LogEntryMaker.create_system_entry(user, log_data) # Log the discovery activity # log_data2 = dict(session_key=session_id, feature_id=rook_static.PROBLEM_DISCOVERY, activity_l1=bl_static.L1_PROBLEM_DEFINITION, activity_l2=activity_l2_val) LogEntryMaker.create_system_entry(user, log_data2)
def view_hello(request): """gRPC: Call from UI as a hearbeat""" user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) # -------------------------------- # Behavioral logging # -------------------------------- log_data = dict(session_key=get_session_key(request), feature_id=ta2_static.HELLO, activity_l1=bl_static.L1_SYSTEM_ACTIVITY, activity_l2=bl_static.L2_APP_LAUNCH) LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data) # note: this is just a heartbeat, so no params are sent # # Begin to log D3M call # call_entry = None if ServiceCallEntry.record_d3m_call(): call_entry = ServiceCallEntry.get_dm3_entry(\ request_obj=request, call_type='Hello', request_msg=('no params for this call')) # Let's call the TA2! # resp_info = ta2_hello() if not resp_info.success: return JsonResponse(get_json_error(resp_info.err_msg)) json_str = resp_info.result_obj # Convert JSON str to python dict - err catch here # - let it blow up for now--should always return JSON json_format_info = json_loads(json_str) if not json_format_info.success: return JsonResponse(get_json_error(json_format_info.err_msg)) # Save D3M log # if call_entry: call_entry.save_d3m_response(json_format_info.result_obj) json_info = get_json_success('success!', data=json_format_info.result_obj) return JsonResponse(json_info, safe=False)
def view_list_primitives(request): """gRPC: Call from UI with a ListPrimitivesRequest""" user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) # -------------------------------- # (2) Begin to log D3M call # -------------------------------- call_entry = None if ServiceCallEntry.record_d3m_call(): call_entry = ServiceCallEntry.get_dm3_entry(\ request_obj=request, call_type='ListPrimitives', request_msg='no params for this call') # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=get_session_key(request), feature_id=ta2_static.LIST_PRIMITIVES, activity_l1=bl_static.L1_SYSTEM_ACTIVITY, activity_l2=bl_static.L2_ACTIVITY_BLANK) LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data) # Let's call the TA2! # search_info = list_primitives() #print('search_info', search_info) if not search_info.success: return JsonResponse(get_json_error(search_info.err_msg)) # Convert JSON str to python dict - err catch here # json_format_info = json_loads(search_info.result_obj) if not json_format_info.success: return JsonResponse(get_json_error(json_format_info.err_msg)) # Save D3M log # if call_entry: call_entry.save_d3m_response(json_format_info.result_obj) json_info = get_json_success('success!', data=json_format_info.result_obj) return JsonResponse(json_info, safe=False)
def view_end_search_solutions(request): """gRPC: Call from UI with a EndSearchSolutionsRequest""" print('view_end_search_solutions 1') user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) user = user_info.result_obj print('view_end_search_solutions 2') req_body_info = get_request_body(request) if not req_body_info.success: return JsonResponse(get_json_error(req_body_info.err_msg)) print('view_end_search_solutions 3') # -------------------------------- # Behavioral logging # -------------------------------- log_data = dict(session_key=get_session_key(request), feature_id=ta2_static.END_SEARCH_SOLUTIONS, activity_l1=bl_static.L1_SYSTEM_ACTIVITY, activity_l2=bl_static.L2_ACTIVITY_BLANK) LogEntryMaker.create_ta2ta3_entry(user, log_data) print('view_end_search_solutions 4') # Let's call the TA2 and end the session! # params = dict(user=user) search_info = end_search_solutions(req_body_info.result_obj, **params) if not search_info.success: return JsonResponse(get_json_error(search_info.err_msg)) # The session is over, write the log entries files # #LogEntryMaker.write_user_log_from_request(request) # User is done at this point! # Write out the log and delete it.... user_workspace = None ws_info = get_latest_user_workspace(request) if ws_info.success: user_workspace = ws_info.result_obj ResetUtil.write_and_clear_behavioral_logs(user, user_workspace) json_info = get_json_success('success!', data=search_info.result_obj) return JsonResponse(json_info, safe=False)
def write_and_clear_behavioral_logs(user, user_workspace): """Write out any behavioral logs files and delete the entries from the database""" if not isinstance(user, User): return err_resp('user was not a User object') if user_workspace and not isinstance(user_workspace, UserWorkspace): return err_resp('user_workspace was not a UserWorkspace object') # Write out any behavioral logs for the workspace # if user_workspace: log_info = LogEntryMaker.write_user_log(user_workspace) if log_info.success: print('log written: ', log_info.result_obj) else: print('log writing failed: ', log_info.err_msg) # clear behavioral logs for current user # log_clear = BehavioralLogFormatter.delete_logs_for_user(user) if log_clear.success: print('\n'.join(log_clear.result_obj)) else: print(log_clear.err_msg)
def view_stop_search_solutions(request): """gRPC: Call from UI with a StopSearchSolutions""" user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) req_body_info = get_request_body(request) if not req_body_info.success: return JsonResponse(get_json_error(req_body_info.err_msg)) # Begin to log D3M call # call_entry = None if ServiceCallEntry.record_d3m_call(): call_entry = ServiceCallEntry.get_dm3_entry(\ request_obj=request, call_type=ta2_static.STOP_SEARCH_SOLUTIONS, request_msg=req_body_info.result_obj) # -------------------------------- # Behavioral logging # -------------------------------- log_data = dict(session_key=get_session_key(request), feature_id=ta2_static.STOP_SEARCH_SOLUTIONS, activity_l1=bl_static.L1_SYSTEM_ACTIVITY, activity_l2=bl_static.L2_ACTIVITY_BLANK) LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data) # Let's call the TA2! # search_info = stop_search_solutions(req_body_info.result_obj) #print('search_info', search_info) if not search_info.success: return JsonResponse(get_json_error(search_info.err_msg)) # Convert JSON str to python dict - err catch here # - let it blow up for now--should always return JSON json_dict = json.loads(search_info.result_obj, object_pairs_hook=OrderedDict) # Save D3M log # if call_entry: call_entry.save_d3m_response(json_dict) json_info = get_json_success('success!', data=json_dict) return JsonResponse(json_info, safe=False)
def view_create_log_entry(request, is_verbose=False): """Make log entry endpoint""" user_info = get_authenticated_user(request) if not user_info.success: user_msg = 'Can only log entries when user is logged in.' return JsonResponse(get_json_error(user_msg)) user = user_info.result_obj session_key = get_session_key(request) # ---------------------------------------- # Get the log data # ---------------------------------------- json_info = get_request_body_as_json(request) if not json_info.success: return JsonResponse(get_json_error(json_info.err_msg)) log_data = json_info.result_obj log_data.update(dict(session_key=session_key)) # Default L2 to unkown # if not bl_static.KEY_L2_ACTIVITY in log_data: log_data[bl_static.KEY_L2_ACTIVITY] = bl_static.L2_ACTIVITY_BLANK # Note: this form is also used by the LogEntryMaker # - redundant but ok for now, want to return form errors # in a separate field # f = BehavioralLogEntryForm(log_data) if not f.is_valid(): print('nope: %s' % f.errors) user_msg = 'Error found in log entry.' return JsonResponse(get_json_error(user_msg, errors=f.errors)) log_create_info = LogEntryMaker.create_log_entry(\ user, log_data['type'], log_data) if not log_create_info.success: return JsonResponse(get_json_error(log_create_info.err_msg)) user_msg = 'Log entry saved!' if is_verbose: return JsonResponse(get_json_success(\ user_msg, data=log_create_info.result_obj.to_dict())) return JsonResponse(get_json_success(user_msg))
def get_partials_datasets(request): # request body req_body_info = get_request_body_as_json(request) if not req_body_info.success: return JsonResponse(get_json_error(req_body_info.err_msg)) req_info = req_body_info.result_obj # workspace user_workspace_info = get_latest_user_workspace(request) if not user_workspace_info.success: return JsonResponse(get_json_error(user_workspace_info.err_msg)) user_workspace = user_workspace_info.result_obj # user user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) activity_l1 = bl_static.L1_PROBLEM_DEFINITION activity_l2 = bl_static.L2_ACTIVITY_BLANK log_data = dict(session_key=get_session_key(request), feature_id='PARTIALS_APP', activity_l1=activity_l1, activity_l2=activity_l2) LogEntryMaker.create_system_entry(user_workspace.user, log_data) try: response = create_partials_datasets(req_info, user_workspace.id) except Exception: print("caught traceback when creating ICE datasets:", flush=True) print(traceback.format_exc(), flush=True) response = { KEY_SUCCESS: False, KEY_MESSAGE: "Internal error while creating ICE datasets." } return JsonResponse(response)
def datamart_search(query_dict=None, dataset_path=None, **kwargs): """Search the ISI datamart""" if query_dict is None and dataset_path is None: return err_resp('Either a query or dataset path must be supplied.') if query_dict is not None and not isinstance(query_dict, dict): user_msg = ('There is something wrong with the search parameters.' ' Please try again. (expected a dictionary)') return err_resp(user_msg) search_url = get_isi_url() + '/search' # -------------------------------- # Behavioral logging # -------------------------------- if 'user' in kwargs: log_data = dict(feature_id=f'POST|{search_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_SEARCH, path=search_url) LogEntryMaker.create_datamart_entry(kwargs['user'], log_data) # -------------------------------- # -------------------------------- # Query the datamart # -------------------------------- query_json = None if query_dict: formatted_json_info = json_dumps(query_dict) if not formatted_json_info.success: return err_resp('Failed to convert query to JSON. %s' % \ formatted_json_info.err_msg) query_json = formatted_json_info.result_obj print(f'formatted query: {query_json}') if dataset_path: limit = kwargs.get('limit', 20) if not isinstance(limit, int): user_msg = ('The results limit must be an' ' integer (datamart_search)') return err_resp(user_msg) if not USE_CACHED_SEARCH: try: with open(dataset_path, 'rb') as dataset_p: try: response = requests.post( search_url, params={'max_return_docs': limit}, json={'query_json': query_json}, files={'data': dataset_p}, verify=False, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp( 'Request timed out. responded with: %s' % err_obj) except IOError as err_obj: user_msg = (f'Failed to search with the dataset file.' f' Technical: {err_obj}') return err_resp(user_msg) else: raise NotImplementedError( 'Augmentations on results without a dataset path are not implemented by ISI.' ) if not USE_CACHED_SEARCH: if response.status_code != 200: return err_resp(response['reason']) response_json = response.json() if response_json['code'] != "0000": return err_resp(response_json['message']) else: import json print('loading file') response_json = json.load( open('/datamart_endpoints/cached_isi_search_response.json', 'r')) json_results = response_json['search_results']['results'] #num_datasets = len(response['data']) #print('num_datasets', num_datasets) #print('iterating through....') sorted_data = sorted( json_results, #response['data'], key=lambda k: k['score'], reverse=True) #print([ds['score'] for ds in sorted_data]) return ok_resp(sorted_data[:limit])
def search_with_dataset(dataset_path, query=None, **kwargs): """Search the datamart using a dataset""" if not isfile(dataset_path): user_msg = ('The dataset file could not be found.') return err_resp(user_msg) search_url = get_isi_url() + '/search' # -------------------------------- # Behavioral logging # -------------------------------- if 'user_workspace' in kwargs: log_data = dict(feature_id=f'POST|by-dataset|{search_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_SEARCH, path=search_url) LogEntryMaker.create_datamart_entry(kwargs['user_workspace'], log_data) # -------------------------------- # -------------------------------- # Query the datamart # -------------------------------- query_json = None if query: formatted_json_info = json_dumps(query) if not formatted_json_info.success: return err_resp('Failed to convert query to JSON. %s' % \ formatted_json_info.err_msg) query_json = formatted_json_info.result_obj print(f'formatted query: {query_json}') limit = kwargs.get('limit', 20) if not isinstance(limit, int): user_msg = ('The results limit must be an' ' integer (datamart_search)') return err_resp(user_msg) if not USE_CACHED_SEARCH: try: with open(dataset_path, 'rb') as dataset_p: try: response = requests.post( search_url, params={'max_return_docs': limit}, json={'query_json': query_json}, files={'data': dataset_p}, verify=False, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp( 'Request timed out. responded with: %s' % err_obj) except IOError as err_obj: user_msg = (f'Failed to search with the dataset file.' f' Technical: {err_obj}') return err_resp(user_msg) if response.status_code != 200: print(str(response)) print(response.text) return err_resp(('ISI Datamart internal server error.' ' status_code: %s') % response.status_code) response_json = response.json() else: import json print('loading file') response_json = json.load( open('/datamart_endpoints/cached_isi_search_response.json', 'r')) #print('response_json', response_json) if not 'results' in response_json: return err_resp('No datasets found. (%s)' % \ (get_timestamp_string_readable(time_only=True),)) json_results = response_json['results'] print('num results: ', len(json_results)) return ok_resp(json_results)
def run_describe_solution(self, pipeline_id, solution_id, msg_cnt=-1): """sync: Run a DescribeSolution call for each solution_id""" print(f'run_describe_solution 1. pipeline_id: {pipeline_id}') # ---------------------------------- # Create the input # ---------------------------------- req_params = {ta2_static.KEY_SOLUTION_ID: solution_id} json_str_info = json_dumps(req_params) if not json_str_info.success: self.add_err_msg(json_str_info.err_msg) return json_str_input = json_str_info.result_obj # -------------------------------- # (2) Save request # -------------------------------- stored_request = StoredRequest(\ user=self.user_object, search_id=self.search_id, pipeline_id=pipeline_id, workspace='(not specified)', request_type=ta2_static.DESCRIBE_SOLUTION, is_finished=False, request=req_params) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.DESCRIBE_SOLUTION, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SUMMARIZATION, other=req_params) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) print( f'run_describe_solution 2. stored_request.pipeline_id: {stored_request.pipeline_id}' ) # ---------------------------------- # Run Describe Solution # ---------------------------------- describe_info = describe_solution(json_str_input) if not describe_info.success: self.add_err_msg(describe_info.err_msg) StoredResponse.add_err_response(\ stored_request, describe_info.err_msg) return # ---------------------------------- # Parse the DescribeSolutionResponse # ---------------------------------- describe_data_info = json_loads(describe_info.result_obj) if not describe_data_info.success: self.add_err_msg(describe_data_info.err_msg) StoredResponse.add_err_response(\ stored_request, describe_data_info.err_msg) return # ----------------------------------------------- # Add the pipline id to the result # ----------------------------------------------- describe_data = describe_data_info.result_obj describe_data[ta2_static.KEY_PIPELINE_ID] = pipeline_id describe_data[ta2_static.KEY_SEARCH_ID] = self.search_id describe_data[ta2_static.KEY_SOLUTION_ID] = solution_id describe_data.move_to_end(ta2_static.KEY_PIPELINE_ID, last=False) # params = dict() # if not stored_request.pipeline_id: # params['pipeline_id'] = describe_data[KEY_PIPELINE_ID] stored_info = StoredResponse.add_success_response(\ stored_request, describe_data, pipeline_id=pipeline_id) if not stored_info.success: print('stored info fail!', stored_info.err_msg) print( f'run_describe_solution 3. stored_info.result_obj.pipeline_id: {stored_info.result_obj.pipeline_id}' ) print( f'run_describe_solution 4. stored_request.pipeline_id: {stored_request.pipeline_id}' ) # ----------------------------------------- # Tracking this in the behavioral log, # e.g. checking time lapse between creation # of solution and if user investigates this model, # later, if at all # ----------------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.DESCRIBE_SOLUTION_RESPONSE, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SEARCH, other=describe_data) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # ----------------------------------------------- # send responses back to WebSocket # --------------------------------------------- ws_msg = WebsocketMessage.get_success_message(\ 'DescribeSolution', 'it worked', msg_cnt=msg_cnt, data=describe_data) print('ws_msg: %s' % ws_msg) #print('ws_msg', ws_msg.as_dict()) ws_msg.send_message(self.websocket_id)
def run_get_search_solution_results(self): """Run SearchSolutions against a TA2""" # ----------------------------------- # (1) make GRPC request object # ----------------------------------- params_dict = dict(searchId=self.search_id) params_info = json_dumps(params_dict) if not params_info.success: self.send_websocket_err_msg(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, params_info.err_msg) return try: grpc_req = Parse(params_info.result_obj, core_pb2.GetSearchSolutionsResultsRequest()) except ParseError as err_obj: err_msg = ('GetSearchSolutionsResultsRequest: Failed to' ' convert JSON to gRPC: %s') % (err_obj) self.send_websocket_err_msg(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, params_info.err_msg) return # -------------------------------- # (2) Save the request to the db # -------------------------------- stored_request = StoredRequest(\ user=self.user_object, search_id=self.search_id, workspace='(not specified)', request_type=ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, is_finished=False, request=params_dict) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SEARCH, other=params_dict) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # -------------------------------- # (3) Make the gRPC request # -------------------------------- core_stub, err_msg = TA2Connection.get_grpc_stub() if err_msg: return err_resp(err_msg) msg_cnt = 0 try: # ----------------------------------------- # Iterate through the streaming responses # Note: The StoredResponse.id becomes the pipeline id # ----------------------------------------- for reply in core_stub.GetSearchSolutionsResults(\ grpc_req, timeout=settings.TA2_GRPC_LONG_TIMEOUT): msg_cnt += 1 # ----------------------------------------------- # Parse the response into JSON + store response # ----------------------------------------------- msg_json_str = message_to_json(reply) msg_json_info = json_loads(msg_json_str) if not msg_json_info.success: user_msg = 'Failed to convert response to JSON: %s' % \ msg_json_info.err_msg self.send_websocket_err_msg(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, user_msg) StoredResponse.add_stream_err_response(\ stored_response, user_msg) # Wait for next response.... continue result_json = msg_json_info.result_obj # TA2s (specifically NYU) responds once when trying a new pipeline, with a message missing a solutionId # the same process responds again once the solution contains a solutionId print('results json from TA2') print(result_json) if not result_json.get('solutionId'): continue if ta2_static.KEY_SOLUTION_ID not in result_json: user_msg = '"%s" not found in response to JSON: %s' % \ (ta2_static.KEY_SOLUTION_ID, result_json) StoredResponse.add_stream_err_response(\ stored_response, user_msg) self.send_websocket_err_msg(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, user_msg) # Wait for next response.... continue # Solution id used for DescribeSolution... # solution_id = result_json[ta2_static.KEY_SOLUTION_ID] # ----------------------------------------- # Looks good, save the response # ----------------------------------------- stored_resp_info = StoredResponse.add_stream_success_response(\ stored_request, result_json) # ----------------------------------------- # Tracking this in the behavioral log, # e.g. checking time lapse between creation # of solution and if user investigates this model, # later, if at all # ----------------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static. GET_SEARCH_SOLUTIONS_RESULTS_RESPONSE, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SEARCH, other=result_json) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # ----------------------------------------- # Make sure the response was saved (probably won't happen) # ----------------------------------------- if not stored_resp_info.success: # Not good but probably won't happen # send a message to the user... # user_msg = 'Failed to store response from %s: %s' % \ (ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, msg_json_info.err_msg) StoredResponse.add_stream_err_response(\ stored_response, user_msg) self.send_websocket_err_msg(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, user_msg) # Wait for the next response... continue # --------------------------------------------- # Looks good! Get the StoredResponse # - This id will be used as the pipeline id # --------------------------------------------- stored_response = stored_resp_info.result_obj stored_response.use_id_as_pipeline_id() StoredResponse.add_stream_success_response(\ stored_response, stored_response) # ----------------------------------------------- # send responses back to WebSocket # --------------------------------------------- ws_msg = WebsocketMessage.get_success_message(\ ta2_static.GET_SEARCH_SOLUTIONS_RESULTS, 'it worked', msg_cnt=msg_cnt, data=stored_response.as_dict()) print('ws_msg: %s' % ws_msg) #print('ws_msg', ws_msg.as_dict()) ws_msg.send_message(self.websocket_id) stored_response.mark_as_sent_to_user() print('msg received #%d' % msg_cnt) # ----------------------------------------------- # continue the process describe/score/etc # ----------------------------------------------- # DescribeSolution - run sync # self.run_describe_solution(stored_response.pipeline_id, solution_id, msg_cnt) # FitSolution - run async # print('PRE run_fit_solution') self.run_fit_solution(stored_response.pipeline_id, solution_id) print('POST run_fit_solution') print('PRE run_score_solution') self.run_score_solution(stored_response.pipeline_id, solution_id) print('POST run_score_solution') # ----------------------------------------------- # All results arrived, send message to UI # ----------------------------------------------- ws_msg = WebsocketMessage.get_success_message( \ ta2_static.ENDGetSearchSolutionsResults, {'searchId': self.search_id, 'message': 'it worked'}) print('ws_msg: %s' % ws_msg) ws_msg.send_message(self.websocket_id) except grpc.RpcError as err_obj: stored_request.set_error_status(str(err_obj)) return except Exception as err_obj: stored_request.set_error_status(str(err_obj)) return StoredRequestUtil.set_finished_ok_status(stored_request.id)
def make_search_solutions_call(all_params, websocket_id, user_id, **kwargs): """Return the result of a SearchSolutions call. If successful, an async process is kicked off""" if not websocket_id: return err_resp('websocket_id must be set') print('make_search_solutions_call 1') param_check = SearchSolutionsHelper.check_params(all_params) if not param_check.success: return param_check print('make_search_solutions_call 2') try: user_obj = User.objects.get(pk=user_id) except User.DoesNotExist: user_msg = 'No user found for id: %s' % user_id return err_resp(user_msg) search_solution_params = all_params[ ta2_static.KEY_SEARCH_SOLUTION_PARAMS] # -------------------------------- # (2) Logging # -------------------------------- stored_request = StoredRequest(\ user=user_obj, # search_id=self.search_id, workspace='(not specified)', request_type=ta2_static.SEARCH_SOLUTIONS, is_finished=False, request=search_solution_params) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- session_key = kwargs.get(SESSION_KEY, None) log_data = dict(session_key=session_key, feature_id=ta2_static.SEARCH_SOLUTIONS, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SEARCH, other=search_solution_params) LogEntryMaker.create_ta2ta3_entry(user_obj, log_data) # 11/6/2019 - late night hack, these variables shouldn't be here # - introduced somewhere in the .js when setting a problem # search_solution_params.pop('id', None) search_solution_params.pop('session_key', None) # Run SearchSolutions against the TA2 # search_info = search_solutions(search_solution_params) if not search_info.success: StoredResponse.add_err_response(stored_request, search_info.err_msg) return search_info print('make_search_solutions_call 2') search_info_json = json_loads(search_info.result_obj) if not search_info_json.success: StoredResponse.add_err_response(stored_request, search_info_json.err_msg) return search_info_json search_info_data = search_info_json.result_obj print('search_info_data', json_dumps(search_info_data)[1]) print('make_search_solutions_call 3') if not ta2_static.KEY_SEARCH_ID in search_info_data: user_msg = 'searchId not found in the SearchSolutionsResponse' StoredResponse.add_err_response(stored_request, user_msg) return err_resp(user_msg) search_id = search_info_data['searchId'] StoredResponse.add_success_response(stored_request, search_info_data, search_id=search_id) # Async task to run GetSearchSolutionsResults # extra_params = {SESSION_KEY: session_key} SearchSolutionsHelper.kick_off_solution_results.delay(\ search_id, websocket_id, user_id, all_search_params=all_params, **extra_params) # Back to the UI, looking good # return ok_resp(search_info_data)
def view_R_route(request, app_name_in_url): """Route TwoRavens calls to Rook orig: TwoRavens -> Rook view: TwoRavens -> Django 2ravens -> Rook This is a bit messy. Still trying to handle two UI calls: - old ones, form POSTs sent with solaJSON key - new ones, straight JSON requests """ # ----------------------------- # get the app info # ----------------------------- rook_app_info = RAppInfo.get_appinfo_from_url(app_name_in_url) if rook_app_info is None: raise Http404(('unknown rook app: "{0}" (please add "{0}" to ' ' "tworaven_apps/R_services/app_names.py")').format(\ app_name_in_url)) # ----------------------------- # Used for logging # ----------------------------- user_workspace_info = get_latest_user_workspace(request) if not user_workspace_info.success: return JsonResponse(get_json_error(user_workspace_info.err_msg)) user_workspace = user_workspace_info.result_obj # ----------------------------- # additional params # ----------------------------- raven_data_text = {} # default additional_params = {} # params to add to a JSON call, e.g. for PARTIALS_APP # ----------------------------- # look for the "solaJSON" variable in the POST # ----------------------------- if request.POST and UI_KEY_SOLA_JSON in request.POST: # this is a POST with a JSON string under the key solaJSON key raven_data_text = request.POST[UI_KEY_SOLA_JSON] else: # See if the body is JSON format raven_data_info = get_request_body_as_json(request) if not raven_data_info.success: err_msg = ("Neither key '%s' found in POST" " nor JSON in request.body") % UI_KEY_SOLA_JSON return JsonResponse(dict(status="ERROR", message=err_msg)) raven_data_text = raven_data_info.result_obj # Retrieve post data and attempt to insert django session id # (if none exists) # # retrieve session key session_key = get_session_key(request) if isinstance(raven_data_text, str): blank_session_str = '%s":""' % ROOK_ZESSIONID if raven_data_text.find(blank_session_str) > -1: # was converting to JSON, but now just simple text substitution # updated_session_str = '%s":"%s"' % (ROOK_ZESSIONID, session_key) raven_data_text = raven_data_text.replace(blank_session_str, updated_session_str) elif raven_data_text.find(ROOK_ZESSIONID) == -1: print('MAJOR ISSUE: NOT SESSION AT ALL (R_services.views.py)') elif isinstance(raven_data_text, dict): # We have a dict, make sure it gets a session if ROOK_ZESSIONID in raven_data_text: if raven_data_text[ROOK_ZESSIONID] in [None, '']: raven_data_text[ROOK_ZESSIONID] = session_key elif ROOK_ZESSIONID not in raven_data_text: raven_data_text[ROOK_ZESSIONID] = session_key # Add the additional params raven_data_text.update(additional_params) try: raven_data_text = json.dumps(raven_data_text) except TypeError: return JsonResponse(\ dict(success=False, message='Failed to convert data to JSON')) # print('raven_data_text', raven_data_text) app_data = json.loads(raven_data_text) # -------------------------------- # Behavioral logging # -------------------------------- print('rook_app_info.name:', rook_app_info.name) feature_id = rook_app_info.name if rook_app_info.name == app_names.EXPLORE_APP: activity_l1 = bl_static.L1_DATA_PREPARATION activity_l2 = bl_static.L2_DATA_EXPLORE elif rook_app_info.name == app_names.PLOTDATA_APP: feature_id = 'EXPLORE_VIEW_PLOTS' activity_l1 = bl_static.L1_DATA_PREPARATION activity_l2 = bl_static.L2_DATA_EXPLORE else: activity_l1 = bl_static.L1_PROBLEM_DEFINITION activity_l2 = bl_static.L2_ACTIVITY_BLANK log_data = dict(session_key=session_key, feature_id=feature_id, activity_l1=activity_l1, activity_l2=activity_l2) LogEntryMaker.create_system_entry(user_workspace.user, log_data) # Call R services # rook_svc_url = rook_app_info.get_rook_server_url() print('rook_svc_url', rook_svc_url) try: rservice_req = requests.post(rook_svc_url, json=app_data) except ConnectionError: err_msg = 'R Server not responding: %s' % rook_svc_url resp_dict = dict(message=err_msg) return JsonResponse(resp_dict) print('status code from rook call: %s' % rservice_req.status_code) # print('rook text: %s' % rservice_req.text) return HttpResponse(rservice_req.text)
def solution_export3(user, raven_json, **kwargs): """ Send a SolutionExportRequest to the SolutionExport command """ if not isinstance(user, User): err_msg = '"user" must be a User object' return err_resp(err_msg) if not isinstance(raven_json, dict): err_msg = 'raven_dict must be a python dict' return err_resp(err_msg) if not ta2_static.KEY_SEARCH_ID in raven_json: err_msg = (f'Key: "{ta2_static.KEY_SEARCH_ID}" not found in the' f' "raven_json" dict. (solution_export3)') return err_resp(err_msg) search_id = raven_json.pop( ta2_static.KEY_SEARCH_ID) # not needed for GRPC call session_key = kwargs.get(SESSION_KEY, '') # -------------------------------- # Convert dict to string # -------------------------------- raven_json_info = json_dumps(raven_json) if not raven_json_info.success: return err_resp(raven_json_info.err_msg) raven_json_str = raven_json_info.result_obj # -------------------------------- # convert the JSON string to a gRPC request # -------------------------------- try: req = Parse(raven_json_str, core_pb2.SolutionExportRequest()) except ParseError as err_obj: err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj) return err_resp(err_msg) # In test mode, return canned response # if settings.TA2_STATIC_TEST_MODE: resp = core_pb2.SolutionExportResponse() return ok_resp(message_to_json(resp)) core_stub, err_msg = TA2Connection.get_grpc_stub() if err_msg: return err_resp(err_msg) # -------------------------------- # Save the request to the db # -------------------------------- stored_request = StoredRequest(\ user=user, search_id=search_id, workspace='(not specified)', request_type=ta2_static.SOLUTION_EXPORT, is_finished=False, request=raven_json) stored_request.save() # -------------------------------- # Behavioral logging # -------------------------------- log_data = dict(session_key=session_key, feature_id=ta2_static.SOLUTION_EXPORT, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_EXPORT, other=raven_json) LogEntryMaker.create_ta2ta3_entry(user, log_data) # -------------------------------- # Send the gRPC request # -------------------------------- try: reply = core_stub.SolutionExport(\ req, timeout=settings.TA2_GRPC_SHORT_TIMEOUT) except Exception as err_obj: user_msg = f'Error: {err_obj}' StoredResponse.add_err_response(stored_request, user_msg) return err_resp(user_msg) # -------------------------------- # Convert the reply to JSON and send it back # -------------------------------- resp_json_str = message_to_json(reply) resp_json_dict_info = json_loads(resp_json_str) if not resp_json_dict_info.success: user_msg = (f'Failed to convert GRPC response to JSON:' f' {resp_json_dict_info.err_msg}') StoredResponse.add_err_response(stored_request, user_msg) return err_resp(user_msg) StoredResponse.add_success_response(stored_request, resp_json_dict_info.result_obj) return ok_resp(resp_json_str)
def datamart_materialize(user_workspace, search_result): """Materialize an NYU dataset!""" LOGGER.info('-- atttempt to materialize NYU dataset --') if not isinstance(user_workspace, UserWorkspace): return err_resp('user_workspace must be a UserWorkspace') if not isinstance(search_result, dict): return err_resp('search_result must be a python dictionary') print('\nsearch_result', search_result) print('\nsearch_result.keys()', search_result.keys()) if not dm_static.KEY_NYU_DATAMART_ID in search_result: user_msg = (f'"search_result" did not contain' f' "{dm_static.KEY_NYU_DATAMART_ID}" key') return err_resp(user_msg) # ----------------------------------------- # Build the folder path where the .zip will # be unbundled # ----------------------------------------- LOGGER.info('(1) build path') datamart_id = search_result[dm_static.KEY_NYU_DATAMART_ID] dest_folderpath_info = DatamartJobUtilNYU.get_output_folderpath(\ user_workspace, datamart_id, dir_type=dm_static.KEY_MATERIALIZE) # Failed to get/create the output folder # if not dest_folderpath_info.success: return err_resp(dest_folderpath_info.err_msg) # Set the output folder # dest_folderpath = dest_folderpath_info.result_obj # Set the output file path # dest_filepath = join(dest_folderpath, 'tables', 'learningData.csv') LOGGER.info('(2) Download file') # ----------------------------------------- # Has the file already been downloaded? # ----------------------------------------- print('dest_filepath', dest_filepath) LOGGER.info('(2a) Has the file already been downloaded?') if isfile(dest_filepath): LOGGER.info('Yes, already downloaded') # Get preview rows # preview_info = read_file_rows(dest_filepath, dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) info_dict = DatamartJobUtilNYU.format_materialize_response(\ datamart_id, dm_static.DATAMART_NYU_NAME, dest_filepath, preview_info) return ok_resp(info_dict) # ----------------------------------------- # Download the file # ----------------------------------------- LOGGER.info('(2b) File not yet downloaded. Attempting download') if not 'id' in search_result: user_msg = f'search_result did not contain the key "id"' return err_resp(user_msg) download_url = (f'{get_nyu_url()}/download/' f'{search_result[dm_static.KEY_NYU_DATAMART_ID]}') # ---------------------------- # Behavioral logging # ---------------------------- log_data = dict(feature_id=f'GET|{download_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_DOWNLOAD, path=download_url) LogEntryMaker.create_datamart_entry(user_workspace, log_data) # ---------------------------- # Download the file! # ---------------------------- try: response = requests.get(\ download_url, params={'format': 'd3m'}, verify=False, stream=True, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) if response.status_code != 200: user_msg = (f'Materialize failed. Status code:' f' {response.status_code}. response: {response.text}') return err_resp(user_msg) save_info = DatamartJobUtilNYU.save_datamart_file(\ dest_folderpath, response, expected_filepath=dest_filepath) if not save_info.success: return err_resp(save_info.err_msg) save_info = save_info.result_obj # ---------------------------- # Get preview rows # ---------------------------- preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH], dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) info_dict = DatamartJobUtilNYU.format_materialize_response( \ datamart_id, dm_static.DATAMART_NYU_NAME, dest_filepath, preview_info, **save_info) return ok_resp(info_dict)
def view_pebbles_home(request): """Serve up the workspace, the current home page. Include global js settings""" if not request.user.is_authenticated: return HttpResponseRedirect(reverse('login')) app_config = AppConfiguration.get_config() if app_config is None: return HttpResponseRedirect(reverse('view_no_domain_config_error')) user_info = get_authenticated_user(request) if not user_info.success: return JsonResponse(get_json_error(user_info.err_msg)) user = user_info.result_obj # Is this D3M Mode? If so, make sure: # (1) there is D3M config information # (2) user is logged in # if app_config.is_d3m_domain(): # (1) Is there a valid D3M config? d3m_config_info = get_latest_d3m_config() if not d3m_config_info: return HttpResponseRedirect(reverse('view_list_dataset_choices_html')) # return HttpResponseRedirect(reverse('view_d3m_config_error')) session_key = get_session_key(request) else: session_key = '(event-data-no-session-key)' dinfo = dict(title='TwoRavens', session_key=session_key, DEBUG=settings.DEBUG, ALLOW_SOCIAL_AUTH=settings.ALLOW_SOCIAL_AUTH, CSRF_COOKIE_NAME=settings.CSRF_COOKIE_NAME, app_config=app_config.convert_to_dict(), # TA2_STATIC_TEST_MODE=settings.TA2_STATIC_TEST_MODE, TA2_TEST_SERVER_URL=settings.TA2_TEST_SERVER_URL, # TA2_D3M_SOLVER_ENABLED=pybool_to_js(settings.TA2_D3M_SOLVER_ENABLED), TA2_WRAPPED_SOLVERS=settings.TA2_WRAPPED_SOLVERS, # TA3_GRPC_USER_AGENT=settings.TA3_GRPC_USER_AGENT, TA3TA2_API_VERSION=TA3TA2Util.get_api_version(), DISPLAY_DATAMART_UI=settings.DISPLAY_DATAMART_UI, WEBSOCKET_PREFIX=settings.WEBSOCKET_PREFIX) log_data = dict(session_key=session_key, feature_id=bl_static.FID_START_RAVENS_PEBBLES_PAGE, activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_OPEN) LogEntryMaker.create_system_entry(user, log_data) #print('-' * 40) #print(dinfo['app_config']) return render(request, 'index.html', dinfo)
def datamart_augment(user_workspace, dataset_path, task_data, **kwargs): """Augment the file via the NYU API""" if not isinstance(user_workspace, UserWorkspace): return err_resp('user_workspace must be a UserWorkspace') # Make sure the soure file exists # if not isfile(dataset_path): user_msg = f'Original data file not found: {dataset_path}' return err_resp(user_msg) # Make sure the NYU datamart id is in the task_data # if not dm_static.KEY_NYU_DATAMART_ID in task_data: user_msg = (f'"task_data" did not contain' f' "{dm_static.KEY_NYU_DATAMART_ID}" key') return err_resp(user_msg) # used for folder naming # datamart_id = task_data[dm_static.KEY_NYU_DATAMART_ID] # --------------------------------- # The augment url... # --------------------------------- augment_url = f"{ get_nyu_url() }/augment" # ---------------------------- # Behavioral logging # ---------------------------- log_data = dict(feature_id=f'POST|{augment_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_AUGMENT, path=augment_url) LogEntryMaker.create_datamart_entry(user_workspace, log_data) # ---------------------------- # --------------------------------- # Ready the query parameters # --------------------------------- data_params = dict(data=open(dataset_path, 'rb'), task=json.dumps(task_data)) # --------------------------------- # Make the augment request # --------------------------------- try: response = requests.post(augment_url, files=data_params, stream=True, allow_redirects=True, verify=False, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) # Any errors? # if response.status_code != 200: user_msg = (f'NYU Datamart internal server error. Status code:' f' "{response.status_code}".' f' <hr />Technical: {response.content}') # print(response.content) return err_resp(user_msg) # Write the augmented file # dest_folderpath_info = DatamartJobUtilNYU.get_output_folderpath(\ user_workspace, datamart_id, dir_type=dm_static.KEY_AUGMENT) if not dest_folderpath_info.success: return err_resp(dest_folderpath_info.err_msg) augment_folderpath = dest_folderpath_info.result_obj # Set the output file # dest_filepath = join(augment_folderpath, 'tables', 'learningData.csv') save_info = DatamartJobUtilNYU.save_datamart_file(\ augment_folderpath, response, expected_filepath=dest_filepath) if not save_info.success: return err_resp(save_info.err_msg) save_info = save_info.result_obj # ----------------------------------------- # Retrieve preview rows and return response # ----------------------------------------- # preview rows # preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH], dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) # Format/return reponse # info_dict = DatamartJobUtilNYU.format_materialize_response(\ datamart_id, dm_static.DATAMART_NYU_NAME, save_info[dm_static.KEY_DATA_PATH], preview_info, **save_info) return ok_resp(info_dict)
def run_process(self): """(1) Run ProduceSolution""" if self.has_error(): return # ---------------------------------- # Create the input # ---------------------------------- json_str_info = json_dumps(self.produce_params) if not json_str_info.success: self.add_err_msg(json_str_info.err_msg) return json_str_input = json_str_info.result_obj # -------------------------------- # (2) Save the request to the db # -------------------------------- req_type = ta2_static.PRODUCE_SOLUTION stored_request = StoredRequest(\ user=self.user_object, request_type=req_type, pipeline_id=self.pipeline_id, search_id=self.search_id, is_finished=False, request=self.produce_params) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.PRODUCE_SOLUTION, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_EXPLANATION, other=self.produce_params) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # ---------------------------------- # Run FitSolution # ---------------------------------- produce_info = produce_solution(json_str_input) if not produce_info.success: StoredResponse.add_err_response(stored_request, produce_info.err_msg) self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION, produce_info.err_msg) return # ---------------------------------- # Parse the ProduceSolutionResponse # ---------------------------------- response_info = json_loads(produce_info.result_obj) if not response_info.success: StoredResponse.add_err_response(stored_request, response_info.err_msg) self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION, response_info.err_msg) return result_json = response_info.result_obj # ---------------------------------- # Get the requestId # ---------------------------------- if not ta2_static.KEY_REQUEST_ID in result_json: user_msg = (' "%s" not found in response to JSON: %s') % \ (ta2_static.KEY_REQUEST_ID, result_json) # StoredResponse.add_err_response(stored_request, user_msg) # self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION, user_msg) return # Store success response # StoredResponse.add_success_response(stored_request, result_json) print('produce 3') self.run_get_produce_solution_responses(result_json[ta2_static.KEY_REQUEST_ID])
def datamart_materialize(user_workspace, search_result): """Materialize the dataset""" LOGGER.info('-- atttempt to materialize ISI dataset --') if not isinstance(user_workspace, UserWorkspace): return err_resp('user_workspace must be a UserWorkspace') if not isinstance(search_result, dict): return err_resp('search_result must be a python dictionary') if dm_static.KEY_ISI_DATAMART_ID not in search_result: user_msg = (f'"search_result" did not contain' f' "{dm_static.KEY_ISI_DATAMART_ID}" key') return err_resp(user_msg) # ----------------------------------------- # Format output file path # ----------------------------------------- LOGGER.info('(1) build path') datamart_id = search_result[dm_static.KEY_ISI_DATAMART_ID] dest_filepath_info = DatamartJobUtilISI.get_output_filepath(\ user_workspace, datamart_id, dir_type='materialize') if not dest_filepath_info.success: return err_resp(dest_filepath_info.err_msg) dest_filepath = dest_filepath_info.result_obj LOGGER.info('(2) Download file') # ----------------------------------------- # Has the file already been downloaded? # ----------------------------------------- print('dest_filepath', dest_filepath) if isfile(dest_filepath): LOGGER.info('(2a) file already downloaded') # Get preview rows # preview_info = read_file_rows(dest_filepath, dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) info_dict = DatamartJobUtilISI.format_materialize_response(\ datamart_id, dm_static.DATAMART_ISI_NAME, dest_filepath, preview_info) return ok_resp(info_dict) # ----------------------------------------- # Download the file # ----------------------------------------- # can this be streamed to a file? LOGGER.info('(2b) attempting download') # ---------------------------- # Behavioral logging # ---------------------------- isi_materialize_url = get_isi_url() + f'/download/{datamart_id}' log_data = dict(feature_id=f'GET|{isi_materialize_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_DOWNLOAD, path=isi_materialize_url) LogEntryMaker.create_datamart_entry(user_workspace, log_data) try: print('isi_materialize_url', isi_materialize_url) response = requests.get(\ isi_materialize_url, params={'id': datamart_id, 'format': 'd3m'}, verify=False, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) if response.status_code != 200: user_msg = (f'Materialize failed. Status code:' f' {response.status_code}. response: {response.text}') return err_resp(user_msg) LOGGER.info('(3) Download complete. Save file') # ----------------------------------------- # Save the downloaded file # ----------------------------------------- save_info = DatamartJobUtilISI.save_datamart_file(\ dest_filepath, response) if not save_info.success: return err_resp(save_info.err_msg) save_info = save_info.result_obj # ----------------------------------------- # Retrieve preview rows and return response # ----------------------------------------- LOGGER.info('(4) File saved') # preview rows # preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH], dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) # Format/return reponse # info_dict = DatamartJobUtilISI.format_materialize_response( datamart_id, dm_static.DATAMART_ISI_NAME, save_info[dm_static.KEY_DATA_PATH], preview_info, **save_info) return ok_resp(info_dict)
def datamart_augment(user_workspace, data_path, search_result, exact_match=False, **kwargs): if not isinstance(user_workspace, UserWorkspace): return err_resp('user_workspace must be a UserWorkspace') if not isfile(data_path): user_msg = f'Original data file not found: {data_path}' return err_resp(user_msg) # ---------------------------- # mock call # ---------------------------- # 291780000 """ right_data = '291770000' left_columns= '[[2]]' right_columns = '[[6]]' exact_match = True data_path = '/Users/ramanprasad/Documents/github-rp/TwoRavens/ravens_volume/test_data/TR1_Greed_Versus_Grievance/TRAIN/dataset_TRAIN/tables/learningData.csv' """ # ---------------------------- LOGGER.info('(1) build path') datamart_id = search_result[dm_static.KEY_ISI_DATAMART_ID] dest_filepath_info = DatamartJobUtilISI.get_output_filepath( user_workspace, f'{datamart_id}-{get_timestamp_string()}', dir_type=dm_static.KEY_AUGMENT) if not dest_filepath_info.success: return err_resp(dest_filepath_info.err_msg) augment_filepath = dest_filepath_info.result_obj augment_url = get_isi_url() + '/augment' # ---------------------------- # Behavioral logging # ---------------------------- log_data = dict(feature_id=f'POST|{augment_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_AUGMENT, path=augment_url) LogEntryMaker.create_datamart_entry(user_workspace, log_data) # ---------------------------- try: response = requests.post( augment_url, data={ 'task': json.dumps(search_result), 'format': 'd3m' }, files={'data': open(data_path, 'r')}, verify=False, timeout=settings.DATAMART_VERY_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) if not response.status_code == 200: user_msg = (f'ISI Augment response failed with status code: ' f'{response.status_code}.') return err_resp(user_msg) save_info = DatamartJobUtilISI.save_datamart_file(\ augment_filepath, response) if not save_info.success: return err_resp(save_info.err_msg) save_info = save_info.result_obj # ----------------------------------------- # Retrieve preview rows and return response # ----------------------------------------- # preview rows # preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH], dm_static.NUM_PREVIEW_ROWS) if not preview_info.success: user_msg = (f'Failed to retrieve preview rows.' f' {preview_info.err_msg}') return err_resp(user_msg) # Format/return reponse # info_dict = DatamartJobUtilISI.format_materialize_response( \ datamart_id, dm_static.DATAMART_ISI_NAME, save_info[dm_static.KEY_DATA_PATH], preview_info, **save_info) return ok_resp(info_dict)
def run_process(self): """(1) Run ScoreSolution""" if self.has_error(): return # ---------------------------------- # Create the input # ---------------------------------- LOGGER.info('ScoreSolutionHelper.run_process 2') json_str_info = json_dumps(self.score_params) if not json_str_info.success: self.add_err_msg(json_str_info.err_msg) return json_str_input = json_str_info.result_obj # ---------------------------------- # (2) Save the request # ---------------------------------- stored_request = StoredRequest(\ user=self.user_object, search_id=self.search_id, pipeline_id=self.pipeline_id, workspace='(not specified)', request_type=ta2_static.SCORE_SOLUTION, is_finished=False, request=self.score_params) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.SCORE_SOLUTION, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SUMMARIZATION, other=self.score_params) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # ---------------------------------- # Run ScoreSolution # ---------------------------------- LOGGER.info('run ScoreSolution: %s', json_str_input) fit_info = score_solution(json_str_input) if not fit_info.success: print('ScoreSolution err_msg: ', fit_info.err_msg) StoredResponse.add_err_response(stored_request, fit_info.err_msg) self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION, fit_info.err_msg) return # ---------------------------------- # Parse the ScoreSolutionResponse # ---------------------------------- response_info = json_loads(fit_info.result_obj) if not response_info.success: print('ScoreSolution grpc err_msg: ', response_info.err_msg) StoredResponse.add_err_response(stored_request, response_info.err_msg) self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION, response_info.err_msg) return result_json = response_info.result_obj # ---------------------------------- # Get the requestId # ---------------------------------- if not ta2_static.KEY_REQUEST_ID in result_json: user_msg = (' "%s" not found in response to JSON: %s') % \ (ta2_static.KEY_REQUEST_ID, result_json) StoredResponse.add_err_response(stored_request, user_msg) self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION, user_msg) return StoredResponse.add_success_response(stored_request, result_json) self.run_get_score_solution_responses( result_json[ta2_static.KEY_REQUEST_ID])
def datamart_search(query_dict=None, dataset_path=None, **kwargs): """Search the NYU datamart""" if query_dict is None and dataset_path is None: return err_resp('Either a query or dataset path must be supplied.') if query_dict is not None and not isinstance(query_dict, dict): user_msg = ('There is something wrong with the search parameters.' ' Please try again. (expected a dictionary)') return err_resp(user_msg) search_url = get_nyu_url() + '/search' # -------------------------------- # Behavioral logging # -------------------------------- if 'user' in kwargs: log_data = dict(feature_id=f'POST|{search_url}', activity_l1=bl_static.L1_DATA_PREPARATION, activity_l2=bl_static.L2_DATA_SEARCH, path=search_url) LogEntryMaker.create_datamart_entry(kwargs['user'], log_data) # -------------------------------- # -------------------------------- # Query the datamart # -------------------------------- if dataset_path: try: with open(dataset_path, 'rb') as dataset_p: try: response = requests.post( search_url, json=query_dict, files=dict(data=dataset_p), timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp( 'Request timed out. responded with: %s' % err_obj) except IOError as err_obj: user_msg = (f'Failed to search with the dataset file.' f' Technical: {err_obj}') return err_resp(user_msg) else: try: response = requests.post( search_url, json=query_dict, stream=True, timeout=settings.DATAMART_LONG_TIMEOUT) except requests.exceptions.Timeout as err_obj: return err_resp('Request timed out. responded with: %s' % err_obj) if response.status_code != 200: print(str(response)) print(response.text) return err_resp(('NYU Datamart internal server error.' ' status_code: %s') % response.status_code) json_results = response.json()['results'] if not json_results: return err_resp('No datasets found. (%s)' % \ (get_timestamp_string_readable(time_only=True),)) # print('num results: ', len(json_results)) return ok_resp(json_results)
def run_get_score_solution_responses(self, request_id): """(2) Run GetScoreSolutionResults""" if self.has_error(): return if not request_id: self.send_websocket_err_msg(ta2_static.GET_SCORE_SOLUTION_RESULTS, 'request_id must be set') return # ----------------------------------- # (1) make GRPC request object # ----------------------------------- params_dict = {ta2_static.KEY_REQUEST_ID: request_id} params_info = json_dumps(params_dict) try: grpc_req = Parse(params_info.result_obj, core_pb2.GetScoreSolutionResultsRequest()) except ParseError as err_obj: err_msg = ('Failed to convert JSON to gRPC: %s') % (err_obj) self.send_websocket_err_msg(ta2_static.GET_SCORE_SOLUTION_RESULTS, err_msg) return # -------------------------------- # (2) Save the request to the db # -------------------------------- stored_request = StoredRequest(\ user=self.user_object, request_type=ta2_static.GET_SCORE_SOLUTION_RESULTS, search_id=self.search_id, pipeline_id=self.pipeline_id, is_finished=False, request=params_dict) stored_request.save() # -------------------------------- # (2a) Behavioral logging # -------------------------------- log_data = dict(session_key=self.session_key, feature_id=ta2_static.GET_SCORE_SOLUTION_RESULTS, activity_l1=bl_static.L1_MODEL_SELECTION, activity_l2=bl_static.L2_MODEL_SUMMARIZATION, other=params_dict) LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data) # -------------------------------- # (3) Make the gRPC request # -------------------------------- core_stub, err_msg = TA2Connection.get_grpc_stub() if err_msg: return err_resp(err_msg) msg_cnt = 0 try: # ----------------------------------------- # Iterate through the streaming responses # Note: The StoredResponse.id becomes the pipeline id # ----------------------------------------- for reply in core_stub.GetScoreSolutionResults(\ grpc_req, timeout=settings.TA2_GRPC_LONG_TIMEOUT): msg_cnt += 1 stored_response = None # to hold a StoredResponse object # ----------------------------------------------- # Parse the response into JSON + store response # ----------------------------------------------- msg_json_str = message_to_json(reply) msg_json_info = json_loads(msg_json_str) if not msg_json_info.success: err_msg = ('Failed to convert JSON to gRPC: %s') % \ (err_obj,) StoredResponse.add_stream_err_response( stored_request, user_msg) self.send_websocket_err_msg(\ ta2_static.GET_SCORE_SOLUTION_RESULTS, err_msg) # Wait for next response.... continue result_json = msg_json_info.result_obj # ----------------------------------------- # Looks good, save the response # ----------------------------------------- stored_resp_info = StoredResponse.add_stream_success_response(\ stored_request, result_json) # ----------------------------------------- # Make sure the response was saved (probably won't happen) # ----------------------------------------- if not stored_resp_info.success: # Not good but probably won't happen # send a message to the user... # self.send_websocket_err_msg(\ ta2_static.GET_SCORE_SOLUTION_RESULTS, stored_resp_info.err_msg) # StoredResponse.add_stream_err_response(\ stored_request, stored_resp_info.err_msg) # continue # --------------------------------------------- # Looks good! Get the StoredResponse # - send responses back to WebSocket # --------------------------------------------- stored_response = stored_resp_info.result_obj stored_response.set_pipeline_id(self.pipeline_id) # --------------------------------------------- # If progress is complete, # send response back to WebSocket # --------------------------------------------- progress_val = get_dict_value(\ result_json, [ta2_static.KEY_PROGRESS, ta2_static.KEY_PROGRESS_STATE]) if (not progress_val.success) or \ (progress_val.result_obj != ta2_static.KEY_PROGRESS_COMPLETED): user_msg = 'GetScoreSolutionResultsResponse is not yet complete' LOGGER.info(user_msg) # wait for next message... continue ws_msg = WebsocketMessage.get_success_message(\ ta2_static.GET_SCORE_SOLUTION_RESULTS, 'it worked', msg_cnt=msg_cnt, data=stored_response.as_dict()) LOGGER.info('ws_msg: %s' % ws_msg) #print('ws_msg', ws_msg.as_dict()) ws_msg.send_message(self.websocket_id) # stored_response.mark_as_sent_to_user() except grpc.RpcError as err_obj: stored_request.set_error_status(str(err_obj)) return except Exception as err_obj: stored_request.set_error_status(str(err_obj)) return StoredRequestUtil.set_finished_ok_status(stored_request.id)