def search_with_dataset(dataset_path, query=None, **kwargs):
        """Search the datamart using a dataset"""
        if not isfile(dataset_path):
            user_msg = ('The dataset file could not be found.')
            return err_resp(user_msg)

        search_url = get_nyu_url() + '/search'

        # --------------------------------
        # Behavioral logging
        # --------------------------------
        if 'user_workspace' in kwargs:
            log_data = dict(feature_id=f'POST|by-dataset|{search_url}',
                            activity_l1=bl_static.L1_DATA_PREPARATION,
                            activity_l2=bl_static.L2_DATA_SEARCH,
                            path=search_url)

            LogEntryMaker.create_datamart_entry(kwargs['user_workspace'],
                                                log_data)
        # --------------------------------

        # --------------------------------
        # Query the datamart
        # --------------------------------
        try:
            with open(dataset_path, 'rb') as dataset_p:
                search_files = dict(data=dataset_p)
                if query:
                    search_files['query'] = query

                try:
                    response = requests.post(\
                        search_url,
                        files=search_files,
                        timeout=settings.DATAMART_LONG_TIMEOUT)

                except requests.exceptions.Timeout as err_obj:
                    return err_resp('Request timed out. responded with: %s' %
                                    err_obj)

        except IOError as err_obj:
            user_msg = (f'Failed to search with the dataset file.'
                        f'  Technical: {err_obj}')
            return err_resp(user_msg)

        if response.status_code != 200:
            print(str(response))
            print(response.text)
            return err_resp(('NYU Datamart internal server error.'
                             ' status_code: %s') % response.status_code)

        json_results = response.json()['results']

        if not json_results:
            return err_resp('No datasets found. (%s)' % \
                            (get_timestamp_string_readable(time_only=True),))

        print('num results: ', len(json_results))

        return ok_resp(json_results)
Beispiel #2
0
def log_preprocess_call(user, json_data, session_id=''):
    """Note: The preprocess call also does problem discovery."""
    # --------------------------------
    # Behavioral logging
    # --------------------------------
    # Check the request for an l1_activity, default to DATA_PREPARATION
    #
    activity_l1_val = json_data[bl_static.KEY_L1_ACTIVITY] \
                      if bl_static.KEY_L1_ACTIVITY in json_data \
                      else bl_static.L1_DATA_PREPARATION

    # Check the request for an l1_activity, default to DATA_PREPARATION
    #
    activity_l2_val = json_data[bl_static.KEY_L2_ACTIVITY] \
                      if bl_static.KEY_L2_ACTIVITY in json_data \
                      else bl_static.L2_DATA_EXPLORE

    log_data = dict(session_key=session_id,
                    feature_id=rook_static.PREPROCESS_DATA,
                    activity_l1=activity_l1_val,
                    activity_l2=activity_l2_val)

    LogEntryMaker.create_system_entry(user, log_data)

    # Log the discovery activity
    #
    log_data2 = dict(session_key=session_id,
                     feature_id=rook_static.PROBLEM_DISCOVERY,
                     activity_l1=bl_static.L1_PROBLEM_DEFINITION,
                     activity_l2=activity_l2_val)

    LogEntryMaker.create_system_entry(user, log_data2)
Beispiel #3
0
def view_hello(request):
    """gRPC: Call from UI as a hearbeat"""
    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))


    # --------------------------------
    # Behavioral logging
    # --------------------------------
    log_data = dict(session_key=get_session_key(request),
                    feature_id=ta2_static.HELLO,
                    activity_l1=bl_static.L1_SYSTEM_ACTIVITY,
                    activity_l2=bl_static.L2_APP_LAUNCH)

    LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data)


    # note: this is just a heartbeat, so no params are sent
    #

    # Begin to log D3M call
    #
    call_entry = None
    if ServiceCallEntry.record_d3m_call():
        call_entry = ServiceCallEntry.get_dm3_entry(\
                        request_obj=request,
                        call_type='Hello',
                        request_msg=('no params for this call'))

    # Let's call the TA2!
    #
    resp_info = ta2_hello()
    if not resp_info.success:
        return JsonResponse(get_json_error(resp_info.err_msg))

    json_str = resp_info.result_obj

    # Convert JSON str to python dict - err catch here
    #  - let it blow up for now--should always return JSON
    json_format_info = json_loads(json_str)
    if not json_format_info.success:
        return JsonResponse(get_json_error(json_format_info.err_msg))


    # Save D3M log
    #
    if call_entry:
        call_entry.save_d3m_response(json_format_info.result_obj)

    json_info = get_json_success('success!',
                                 data=json_format_info.result_obj)

    return JsonResponse(json_info, safe=False)
Beispiel #4
0
def view_list_primitives(request):
    """gRPC: Call from UI with a ListPrimitivesRequest"""
    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))


    # --------------------------------
    # (2) Begin to log D3M call
    # --------------------------------
    call_entry = None
    if ServiceCallEntry.record_d3m_call():
        call_entry = ServiceCallEntry.get_dm3_entry(\
                        request_obj=request,
                        call_type='ListPrimitives',
                        request_msg='no params for this call')


    # --------------------------------
    # (2a) Behavioral logging
    # --------------------------------
    log_data = dict(session_key=get_session_key(request),
                    feature_id=ta2_static.LIST_PRIMITIVES,
                    activity_l1=bl_static.L1_SYSTEM_ACTIVITY,
                    activity_l2=bl_static.L2_ACTIVITY_BLANK)

    LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data)


    # Let's call the TA2!
    #
    search_info = list_primitives()
    #print('search_info', search_info)
    if not search_info.success:
        return JsonResponse(get_json_error(search_info.err_msg))

    # Convert JSON str to python dict - err catch here
    #
    json_format_info = json_loads(search_info.result_obj)
    if not json_format_info.success:
        return JsonResponse(get_json_error(json_format_info.err_msg))

    # Save D3M log
    #
    if call_entry:
        call_entry.save_d3m_response(json_format_info.result_obj)

    json_info = get_json_success('success!', data=json_format_info.result_obj)

    return JsonResponse(json_info, safe=False)
Beispiel #5
0
def view_end_search_solutions(request):
    """gRPC: Call from UI with a EndSearchSolutionsRequest"""
    print('view_end_search_solutions 1')
    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))
    user = user_info.result_obj

    print('view_end_search_solutions 2')
    req_body_info = get_request_body(request)
    if not req_body_info.success:
        return JsonResponse(get_json_error(req_body_info.err_msg))

    print('view_end_search_solutions 3')

    # --------------------------------
    # Behavioral logging
    # --------------------------------
    log_data = dict(session_key=get_session_key(request),
                    feature_id=ta2_static.END_SEARCH_SOLUTIONS,
                    activity_l1=bl_static.L1_SYSTEM_ACTIVITY,
                    activity_l2=bl_static.L2_ACTIVITY_BLANK)

    LogEntryMaker.create_ta2ta3_entry(user, log_data)
    print('view_end_search_solutions 4')

    # Let's call the TA2 and end the session!
    #
    params = dict(user=user)
    search_info = end_search_solutions(req_body_info.result_obj,
                                       **params)

    if not search_info.success:
        return JsonResponse(get_json_error(search_info.err_msg))

    # The session is over, write the log entries files
    #
    #LogEntryMaker.write_user_log_from_request(request)
    # User is done at this point!
    # Write out the log and delete it....
    user_workspace = None
    ws_info = get_latest_user_workspace(request)
    if ws_info.success:
        user_workspace = ws_info.result_obj
    ResetUtil.write_and_clear_behavioral_logs(user, user_workspace)


    json_info = get_json_success('success!', data=search_info.result_obj)
    return JsonResponse(json_info, safe=False)
Beispiel #6
0
    def write_and_clear_behavioral_logs(user, user_workspace):
        """Write out any behavioral logs files
        and delete the entries from the database"""
        if not isinstance(user, User):
            return err_resp('user was not a User object')

        if user_workspace and not isinstance(user_workspace, UserWorkspace):
            return err_resp('user_workspace was not a UserWorkspace object')

        # Write out any behavioral logs for the workspace
        #
        if user_workspace:
            log_info = LogEntryMaker.write_user_log(user_workspace)
            if log_info.success:
                print('log written: ', log_info.result_obj)
            else:
                print('log writing failed: ', log_info.err_msg)

        # clear behavioral logs for current user
        #
        log_clear = BehavioralLogFormatter.delete_logs_for_user(user)
        if log_clear.success:
            print('\n'.join(log_clear.result_obj))
        else:
            print(log_clear.err_msg)
Beispiel #7
0
def view_stop_search_solutions(request):
    """gRPC: Call from UI with a StopSearchSolutions"""
    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))

    req_body_info = get_request_body(request)
    if not req_body_info.success:
        return JsonResponse(get_json_error(req_body_info.err_msg))

    # Begin to log D3M call
    #
    call_entry = None
    if ServiceCallEntry.record_d3m_call():
        call_entry = ServiceCallEntry.get_dm3_entry(\
                        request_obj=request,
                        call_type=ta2_static.STOP_SEARCH_SOLUTIONS,
                        request_msg=req_body_info.result_obj)

    # --------------------------------
    # Behavioral logging
    # --------------------------------
    log_data = dict(session_key=get_session_key(request),
                    feature_id=ta2_static.STOP_SEARCH_SOLUTIONS,
                    activity_l1=bl_static.L1_SYSTEM_ACTIVITY,
                    activity_l2=bl_static.L2_ACTIVITY_BLANK)

    LogEntryMaker.create_ta2ta3_entry(user_info.result_obj, log_data)

    # Let's call the TA2!
    #
    search_info = stop_search_solutions(req_body_info.result_obj)
    #print('search_info', search_info)
    if not search_info.success:
        return JsonResponse(get_json_error(search_info.err_msg))

    # Convert JSON str to python dict - err catch here
    #  - let it blow up for now--should always return JSON
    json_dict = json.loads(search_info.result_obj, object_pairs_hook=OrderedDict)

    # Save D3M log
    #
    if call_entry:
        call_entry.save_d3m_response(json_dict)

    json_info = get_json_success('success!', data=json_dict)
    return JsonResponse(json_info, safe=False)
Beispiel #8
0
def view_create_log_entry(request, is_verbose=False):
    """Make log entry endpoint"""
    user_info = get_authenticated_user(request)
    if not user_info.success:
        user_msg = 'Can only log entries when user is logged in.'
        return JsonResponse(get_json_error(user_msg))

    user = user_info.result_obj

    session_key = get_session_key(request)

    # ----------------------------------------
    # Get the log data
    # ----------------------------------------
    json_info = get_request_body_as_json(request)
    if not json_info.success:
        return JsonResponse(get_json_error(json_info.err_msg))

    log_data = json_info.result_obj
    log_data.update(dict(session_key=session_key))

    # Default L2 to unkown
    #
    if not bl_static.KEY_L2_ACTIVITY in log_data:
        log_data[bl_static.KEY_L2_ACTIVITY] = bl_static.L2_ACTIVITY_BLANK

    # Note: this form is also used by the LogEntryMaker
    #   - redundant but ok for now, want to return form errors
    #       in a separate field
    #
    f = BehavioralLogEntryForm(log_data)
    if not f.is_valid():
        print('nope: %s' % f.errors)
        user_msg = 'Error found in log entry.'
        return JsonResponse(get_json_error(user_msg, errors=f.errors))


    log_create_info = LogEntryMaker.create_log_entry(\
                            user,
                            log_data['type'],
                            log_data)

    if not log_create_info.success:
        return JsonResponse(get_json_error(log_create_info.err_msg))

    user_msg = 'Log entry saved!'

    if is_verbose:
        return JsonResponse(get_json_success(\
                                user_msg,
                                data=log_create_info.result_obj.to_dict()))

    return JsonResponse(get_json_success(user_msg))
def get_partials_datasets(request):
    # request body
    req_body_info = get_request_body_as_json(request)
    if not req_body_info.success:
        return JsonResponse(get_json_error(req_body_info.err_msg))
    req_info = req_body_info.result_obj

    # workspace
    user_workspace_info = get_latest_user_workspace(request)
    if not user_workspace_info.success:
        return JsonResponse(get_json_error(user_workspace_info.err_msg))
    user_workspace = user_workspace_info.result_obj

    # user
    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))

    activity_l1 = bl_static.L1_PROBLEM_DEFINITION
    activity_l2 = bl_static.L2_ACTIVITY_BLANK

    log_data = dict(session_key=get_session_key(request),
                    feature_id='PARTIALS_APP',
                    activity_l1=activity_l1,
                    activity_l2=activity_l2)

    LogEntryMaker.create_system_entry(user_workspace.user, log_data)

    try:
        response = create_partials_datasets(req_info, user_workspace.id)

    except Exception:
        print("caught traceback when creating ICE datasets:", flush=True)
        print(traceback.format_exc(), flush=True)
        response = {
            KEY_SUCCESS: False,
            KEY_MESSAGE: "Internal error while creating ICE datasets."
        }

    return JsonResponse(response)
Beispiel #10
0
    def datamart_search(query_dict=None, dataset_path=None, **kwargs):
        """Search the ISI datamart"""

        if query_dict is None and dataset_path is None:
            return err_resp('Either a query or dataset path must be supplied.')

        if query_dict is not None and not isinstance(query_dict, dict):
            user_msg = ('There is something wrong with the search parameters.'
                        ' Please try again. (expected a dictionary)')
            return err_resp(user_msg)

        search_url = get_isi_url() + '/search'

        # --------------------------------
        # Behavioral logging
        # --------------------------------
        if 'user' in kwargs:
            log_data = dict(feature_id=f'POST|{search_url}',
                            activity_l1=bl_static.L1_DATA_PREPARATION,
                            activity_l2=bl_static.L2_DATA_SEARCH,
                            path=search_url)

            LogEntryMaker.create_datamart_entry(kwargs['user'], log_data)
        # --------------------------------

        # --------------------------------
        # Query the datamart
        # --------------------------------

        query_json = None
        if query_dict:
            formatted_json_info = json_dumps(query_dict)
            if not formatted_json_info.success:
                return err_resp('Failed to convert query to JSON. %s' % \
                                formatted_json_info.err_msg)
            query_json = formatted_json_info.result_obj

        print(f'formatted query: {query_json}')

        if dataset_path:
            limit = kwargs.get('limit', 20)
            if not isinstance(limit, int):
                user_msg = ('The results limit must be an'
                            ' integer (datamart_search)')
                return err_resp(user_msg)

            if not USE_CACHED_SEARCH:
                try:
                    with open(dataset_path, 'rb') as dataset_p:
                        try:
                            response = requests.post(
                                search_url,
                                params={'max_return_docs': limit},
                                json={'query_json': query_json},
                                files={'data': dataset_p},
                                verify=False,
                                timeout=settings.DATAMART_LONG_TIMEOUT)

                        except requests.exceptions.Timeout as err_obj:
                            return err_resp(
                                'Request timed out. responded with: %s' %
                                err_obj)

                except IOError as err_obj:
                    user_msg = (f'Failed to search with the dataset file.'
                                f'  Technical: {err_obj}')
                    return err_resp(user_msg)

        else:
            raise NotImplementedError(
                'Augmentations on results without a dataset path are not implemented by ISI.'
            )

        if not USE_CACHED_SEARCH:
            if response.status_code != 200:
                return err_resp(response['reason'])

            response_json = response.json()

            if response_json['code'] != "0000":
                return err_resp(response_json['message'])

        else:
            import json
            print('loading file')
            response_json = json.load(
                open('/datamart_endpoints/cached_isi_search_response.json',
                     'r'))
        json_results = response_json['search_results']['results']

        #num_datasets = len(response['data'])
        #print('num_datasets', num_datasets)
        #print('iterating through....')

        sorted_data = sorted(
            json_results,  #response['data'],
            key=lambda k: k['score'],
            reverse=True)

        #print([ds['score'] for ds in sorted_data])

        return ok_resp(sorted_data[:limit])
Beispiel #11
0
    def search_with_dataset(dataset_path, query=None, **kwargs):
        """Search the datamart using a dataset"""
        if not isfile(dataset_path):
            user_msg = ('The dataset file could not be found.')
            return err_resp(user_msg)

        search_url = get_isi_url() + '/search'

        # --------------------------------
        # Behavioral logging
        # --------------------------------
        if 'user_workspace' in kwargs:
            log_data = dict(feature_id=f'POST|by-dataset|{search_url}',
                            activity_l1=bl_static.L1_DATA_PREPARATION,
                            activity_l2=bl_static.L2_DATA_SEARCH,
                            path=search_url)

            LogEntryMaker.create_datamart_entry(kwargs['user_workspace'],
                                                log_data)
        # --------------------------------

        # --------------------------------
        # Query the datamart
        # --------------------------------

        query_json = None
        if query:
            formatted_json_info = json_dumps(query)
            if not formatted_json_info.success:
                return err_resp('Failed to convert query to JSON. %s' % \
                                formatted_json_info.err_msg)
            query_json = formatted_json_info.result_obj

        print(f'formatted query: {query_json}')

        limit = kwargs.get('limit', 20)
        if not isinstance(limit, int):
            user_msg = ('The results limit must be an'
                        ' integer (datamart_search)')
            return err_resp(user_msg)

        if not USE_CACHED_SEARCH:
            try:
                with open(dataset_path, 'rb') as dataset_p:
                    try:
                        response = requests.post(
                            search_url,
                            params={'max_return_docs': limit},
                            json={'query_json': query_json},
                            files={'data': dataset_p},
                            verify=False,
                            timeout=settings.DATAMART_LONG_TIMEOUT)

                    except requests.exceptions.Timeout as err_obj:
                        return err_resp(
                            'Request timed out. responded with: %s' % err_obj)

            except IOError as err_obj:
                user_msg = (f'Failed to search with the dataset file.'
                            f'  Technical: {err_obj}')
                return err_resp(user_msg)

            if response.status_code != 200:
                print(str(response))
                print(response.text)
                return err_resp(('ISI Datamart internal server error.'
                                 ' status_code: %s') % response.status_code)

            response_json = response.json()
        else:
            import json
            print('loading file')
            response_json = json.load(
                open('/datamart_endpoints/cached_isi_search_response.json',
                     'r'))

        #print('response_json', response_json)

        if not 'results' in response_json:
            return err_resp('No datasets found. (%s)' % \
                            (get_timestamp_string_readable(time_only=True),))

        json_results = response_json['results']

        print('num results: ', len(json_results))

        return ok_resp(json_results)
    def run_describe_solution(self, pipeline_id, solution_id, msg_cnt=-1):
        """sync: Run a DescribeSolution call for each solution_id"""
        print(f'run_describe_solution 1. pipeline_id: {pipeline_id}')
        # ----------------------------------
        # Create the input
        # ----------------------------------
        req_params = {ta2_static.KEY_SOLUTION_ID: solution_id}
        json_str_info = json_dumps(req_params)
        if not json_str_info.success:
            self.add_err_msg(json_str_info.err_msg)
            return

        json_str_input = json_str_info.result_obj

        # --------------------------------
        # (2) Save request
        # --------------------------------
        stored_request = StoredRequest(\
                        user=self.user_object,
                        search_id=self.search_id,
                        pipeline_id=pipeline_id,
                        workspace='(not specified)',
                        request_type=ta2_static.DESCRIBE_SOLUTION,
                        is_finished=False,
                        request=req_params)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.DESCRIBE_SOLUTION,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SUMMARIZATION,
                        other=req_params)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        print(
            f'run_describe_solution 2. stored_request.pipeline_id: {stored_request.pipeline_id}'
        )

        # ----------------------------------
        # Run Describe Solution
        # ----------------------------------
        describe_info = describe_solution(json_str_input)
        if not describe_info.success:
            self.add_err_msg(describe_info.err_msg)
            StoredResponse.add_err_response(\
                                stored_request,
                                describe_info.err_msg)
            return

        # ----------------------------------
        # Parse the DescribeSolutionResponse
        # ----------------------------------
        describe_data_info = json_loads(describe_info.result_obj)
        if not describe_data_info.success:
            self.add_err_msg(describe_data_info.err_msg)
            StoredResponse.add_err_response(\
                                stored_request,
                                describe_data_info.err_msg)
            return

        # -----------------------------------------------
        # Add the pipline id to the result
        # -----------------------------------------------
        describe_data = describe_data_info.result_obj

        describe_data[ta2_static.KEY_PIPELINE_ID] = pipeline_id
        describe_data[ta2_static.KEY_SEARCH_ID] = self.search_id
        describe_data[ta2_static.KEY_SOLUTION_ID] = solution_id
        describe_data.move_to_end(ta2_static.KEY_PIPELINE_ID, last=False)

        # params = dict()
        # if not stored_request.pipeline_id:
        #    params['pipeline_id'] = describe_data[KEY_PIPELINE_ID]

        stored_info = StoredResponse.add_success_response(\
                                            stored_request,
                                            describe_data,
                                            pipeline_id=pipeline_id)

        if not stored_info.success:
            print('stored info fail!', stored_info.err_msg)

        print(
            f'run_describe_solution 3. stored_info.result_obj.pipeline_id: {stored_info.result_obj.pipeline_id}'
        )

        print(
            f'run_describe_solution 4. stored_request.pipeline_id: {stored_request.pipeline_id}'
        )

        # -----------------------------------------
        # Tracking this in the behavioral log,
        #  e.g. checking time lapse between creation
        #   of solution and if user investigates this model,
        #  later, if at all
        # -----------------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.DESCRIBE_SOLUTION_RESPONSE,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SEARCH,
                        other=describe_data)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        # -----------------------------------------------
        # send responses back to WebSocket
        # ---------------------------------------------
        ws_msg = WebsocketMessage.get_success_message(\
                    'DescribeSolution',
                    'it worked',
                    msg_cnt=msg_cnt,
                    data=describe_data)

        print('ws_msg: %s' % ws_msg)
        #print('ws_msg', ws_msg.as_dict())

        ws_msg.send_message(self.websocket_id)
    def run_get_search_solution_results(self):
        """Run SearchSolutions against a TA2"""

        # -----------------------------------
        # (1) make GRPC request object
        # -----------------------------------
        params_dict = dict(searchId=self.search_id)
        params_info = json_dumps(params_dict)
        if not params_info.success:
            self.send_websocket_err_msg(\
                    ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                    params_info.err_msg)
            return

        try:
            grpc_req = Parse(params_info.result_obj,
                             core_pb2.GetSearchSolutionsResultsRequest())
        except ParseError as err_obj:
            err_msg = ('GetSearchSolutionsResultsRequest: Failed to'
                       ' convert JSON to gRPC: %s') % (err_obj)
            self.send_websocket_err_msg(\
                    ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                    params_info.err_msg)
            return

        # --------------------------------
        # (2) Save the request to the db
        # --------------------------------
        stored_request = StoredRequest(\
                        user=self.user_object,
                        search_id=self.search_id,
                        workspace='(not specified)',
                        request_type=ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                        is_finished=False,
                        request=params_dict)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SEARCH,
                        other=params_dict)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        # --------------------------------
        # (3) Make the gRPC request
        # --------------------------------
        core_stub, err_msg = TA2Connection.get_grpc_stub()
        if err_msg:
            return err_resp(err_msg)

        msg_cnt = 0
        try:
            # -----------------------------------------
            # Iterate through the streaming responses
            # Note: The StoredResponse.id becomes the pipeline id
            # -----------------------------------------
            for reply in core_stub.GetSearchSolutionsResults(\
                    grpc_req, timeout=settings.TA2_GRPC_LONG_TIMEOUT):

                msg_cnt += 1

                # -----------------------------------------------
                # Parse the response into JSON + store response
                # -----------------------------------------------
                msg_json_str = message_to_json(reply)
                msg_json_info = json_loads(msg_json_str)

                if not msg_json_info.success:
                    user_msg = 'Failed to convert response to JSON: %s' % \
                               msg_json_info.err_msg

                    self.send_websocket_err_msg(\
                                    ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                                    user_msg)

                    StoredResponse.add_stream_err_response(\
                                        stored_response, user_msg)
                    # Wait for next response....
                    continue

                result_json = msg_json_info.result_obj

                # TA2s (specifically NYU) responds once when trying a new pipeline, with a message missing a solutionId
                # the same process responds again once the solution contains a solutionId
                print('results json from TA2')
                print(result_json)

                if not result_json.get('solutionId'):
                    continue

                if ta2_static.KEY_SOLUTION_ID not in result_json:
                    user_msg = '"%s" not found in response to JSON: %s' % \
                               (ta2_static.KEY_SOLUTION_ID, result_json)

                    StoredResponse.add_stream_err_response(\
                                        stored_response, user_msg)

                    self.send_websocket_err_msg(\
                                    ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                                    user_msg)

                    # Wait for next response....
                    continue

                # Solution id used for DescribeSolution...
                #
                solution_id = result_json[ta2_static.KEY_SOLUTION_ID]

                # -----------------------------------------
                # Looks good, save the response
                # -----------------------------------------
                stored_resp_info = StoredResponse.add_stream_success_response(\
                                    stored_request, result_json)

                # -----------------------------------------
                # Tracking this in the behavioral log,
                #  e.g. checking time lapse between creation
                #   of solution and if user investigates this model,
                #  later, if at all
                # -----------------------------------------
                log_data = dict(session_key=self.session_key,
                                feature_id=ta2_static.
                                GET_SEARCH_SOLUTIONS_RESULTS_RESPONSE,
                                activity_l1=bl_static.L1_MODEL_SELECTION,
                                activity_l2=bl_static.L2_MODEL_SEARCH,
                                other=result_json)

                LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

                # -----------------------------------------
                # Make sure the response was saved (probably won't happen)
                # -----------------------------------------
                if not stored_resp_info.success:
                    # Not good but probably won't happen
                    # send a message to the user...
                    #
                    user_msg = 'Failed to store response from %s: %s' % \
                                (ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                                 msg_json_info.err_msg)

                    StoredResponse.add_stream_err_response(\
                                        stored_response, user_msg)

                    self.send_websocket_err_msg(\
                                    ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                                    user_msg)

                    # Wait for the next response...
                    continue

                # ---------------------------------------------
                # Looks good!  Get the StoredResponse
                # - This id will be used as the pipeline id
                # ---------------------------------------------
                stored_response = stored_resp_info.result_obj
                stored_response.use_id_as_pipeline_id()

                StoredResponse.add_stream_success_response(\
                                    stored_response, stored_response)

                # -----------------------------------------------
                # send responses back to WebSocket
                # ---------------------------------------------
                ws_msg = WebsocketMessage.get_success_message(\
                            ta2_static.GET_SEARCH_SOLUTIONS_RESULTS,
                            'it worked',
                            msg_cnt=msg_cnt,
                            data=stored_response.as_dict())

                print('ws_msg: %s' % ws_msg)
                #print('ws_msg', ws_msg.as_dict())

                ws_msg.send_message(self.websocket_id)

                stored_response.mark_as_sent_to_user()
                print('msg received #%d' % msg_cnt)
                # -----------------------------------------------
                # continue the process describe/score/etc
                # -----------------------------------------------

                # DescribeSolution - run sync
                #
                self.run_describe_solution(stored_response.pipeline_id,
                                           solution_id, msg_cnt)

                # FitSolution - run async
                #
                print('PRE run_fit_solution')
                self.run_fit_solution(stored_response.pipeline_id, solution_id)
                print('POST run_fit_solution')

                print('PRE run_score_solution')
                self.run_score_solution(stored_response.pipeline_id,
                                        solution_id)
                print('POST run_score_solution')

            # -----------------------------------------------
            # All results arrived, send message to UI
            # -----------------------------------------------
            ws_msg = WebsocketMessage.get_success_message( \
                ta2_static.ENDGetSearchSolutionsResults,
                {'searchId': self.search_id, 'message': 'it worked'})

            print('ws_msg: %s' % ws_msg)
            ws_msg.send_message(self.websocket_id)

        except grpc.RpcError as err_obj:
            stored_request.set_error_status(str(err_obj))
            return

        except Exception as err_obj:
            stored_request.set_error_status(str(err_obj))
            return

        StoredRequestUtil.set_finished_ok_status(stored_request.id)
    def make_search_solutions_call(all_params, websocket_id, user_id,
                                   **kwargs):
        """Return the result of a SearchSolutions call.
        If successful, an async process is kicked off"""
        if not websocket_id:
            return err_resp('websocket_id must be set')

        print('make_search_solutions_call 1')

        param_check = SearchSolutionsHelper.check_params(all_params)
        if not param_check.success:
            return param_check

        print('make_search_solutions_call 2')

        try:
            user_obj = User.objects.get(pk=user_id)
        except User.DoesNotExist:
            user_msg = 'No user found for id: %s' % user_id
            return err_resp(user_msg)

        search_solution_params = all_params[
            ta2_static.KEY_SEARCH_SOLUTION_PARAMS]

        # --------------------------------
        # (2) Logging
        # --------------------------------
        stored_request = StoredRequest(\
                        user=user_obj,
                        # search_id=self.search_id,
                        workspace='(not specified)',
                        request_type=ta2_static.SEARCH_SOLUTIONS,
                        is_finished=False,
                        request=search_solution_params)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        session_key = kwargs.get(SESSION_KEY, None)

        log_data = dict(session_key=session_key,
                        feature_id=ta2_static.SEARCH_SOLUTIONS,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SEARCH,
                        other=search_solution_params)

        LogEntryMaker.create_ta2ta3_entry(user_obj, log_data)

        # 11/6/2019 - late night hack, these variables shouldn't be here
        #    - introduced somewhere in the .js when setting a  problem
        #
        search_solution_params.pop('id', None)
        search_solution_params.pop('session_key', None)

        # Run SearchSolutions against the TA2
        #
        search_info = search_solutions(search_solution_params)
        if not search_info.success:
            StoredResponse.add_err_response(stored_request,
                                            search_info.err_msg)
            return search_info

        print('make_search_solutions_call 2')

        search_info_json = json_loads(search_info.result_obj)
        if not search_info_json.success:
            StoredResponse.add_err_response(stored_request,
                                            search_info_json.err_msg)
            return search_info_json
        search_info_data = search_info_json.result_obj
        print('search_info_data', json_dumps(search_info_data)[1])

        print('make_search_solutions_call 3')

        if not ta2_static.KEY_SEARCH_ID in search_info_data:
            user_msg = 'searchId not found in the SearchSolutionsResponse'
            StoredResponse.add_err_response(stored_request, user_msg)
            return err_resp(user_msg)

        search_id = search_info_data['searchId']

        StoredResponse.add_success_response(stored_request,
                                            search_info_data,
                                            search_id=search_id)
        # Async task to run GetSearchSolutionsResults
        #
        extra_params = {SESSION_KEY: session_key}

        SearchSolutionsHelper.kick_off_solution_results.delay(\
                        search_id, websocket_id, user_id,
                        all_search_params=all_params,
                        **extra_params)

        # Back to the UI, looking good
        #
        return ok_resp(search_info_data)
Beispiel #15
0
def view_R_route(request, app_name_in_url):
    """Route TwoRavens calls to Rook
        orig: TwoRavens -> Rook
        view: TwoRavens -> Django 2ravens -> Rook

    This is a bit messy.  Still trying to handle two UI calls:
    - old ones, form POSTs sent with solaJSON key
    - new ones, straight JSON requests
    """
    # -----------------------------
    # get the app info
    # -----------------------------
    rook_app_info = RAppInfo.get_appinfo_from_url(app_name_in_url)
    if rook_app_info is None:
        raise Http404(('unknown rook app: "{0}" (please add "{0}" to '
                       ' "tworaven_apps/R_services/app_names.py")').format(\
                       app_name_in_url))

    # -----------------------------
    # Used for logging
    # -----------------------------
    user_workspace_info = get_latest_user_workspace(request)
    if not user_workspace_info.success:
        return JsonResponse(get_json_error(user_workspace_info.err_msg))

    user_workspace = user_workspace_info.result_obj


    # -----------------------------
    # additional params
    # -----------------------------
    raven_data_text = {}    # default
    additional_params = {}  # params to add to a JSON call, e.g. for PARTIALS_APP

    # -----------------------------
    # look for the "solaJSON" variable in the POST
    # -----------------------------
    if request.POST and UI_KEY_SOLA_JSON in request.POST:
        # this is a POST with a JSON string under the key solaJSON key
        raven_data_text = request.POST[UI_KEY_SOLA_JSON]
    else:
        # See if the body is JSON format
        raven_data_info = get_request_body_as_json(request)
        if not raven_data_info.success:
            err_msg = ("Neither key '%s' found in POST"
                       " nor JSON in request.body") % UI_KEY_SOLA_JSON
            return JsonResponse(dict(status="ERROR",
                                     message=err_msg))

        raven_data_text = raven_data_info.result_obj

    # Retrieve post data and attempt to insert django session id
    # (if none exists)
    #
    # retrieve session key
    session_key = get_session_key(request)

    if isinstance(raven_data_text, str):

        blank_session_str = '%s":""' % ROOK_ZESSIONID
        if raven_data_text.find(blank_session_str) > -1:
            # was converting to JSON, but now just simple text substitution
            #
            updated_session_str = '%s":"%s"' % (ROOK_ZESSIONID, session_key)
            raven_data_text = raven_data_text.replace(blank_session_str, updated_session_str)
        elif raven_data_text.find(ROOK_ZESSIONID) == -1:
            print('MAJOR ISSUE: NOT SESSION AT ALL (R_services.views.py)')

    elif isinstance(raven_data_text, dict):
        #  We have a dict, make sure it gets a session
        if ROOK_ZESSIONID in raven_data_text:
            if raven_data_text[ROOK_ZESSIONID] in [None, '']:
                raven_data_text[ROOK_ZESSIONID] = session_key
        elif ROOK_ZESSIONID not in raven_data_text:
            raven_data_text[ROOK_ZESSIONID] = session_key

        # Add the additional params
        raven_data_text.update(additional_params)

        try:
            raven_data_text = json.dumps(raven_data_text)
        except TypeError:
            return JsonResponse(\
                        dict(success=False,
                             message='Failed to convert data to JSON'))

    # print('raven_data_text', raven_data_text)

    app_data = json.loads(raven_data_text)

    # --------------------------------
    # Behavioral logging
    # --------------------------------
    print('rook_app_info.name:', rook_app_info.name)
    feature_id = rook_app_info.name
    if rook_app_info.name == app_names.EXPLORE_APP:
        activity_l1 = bl_static.L1_DATA_PREPARATION
        activity_l2 = bl_static.L2_DATA_EXPLORE

    elif rook_app_info.name == app_names.PLOTDATA_APP:
        feature_id = 'EXPLORE_VIEW_PLOTS'
        activity_l1 = bl_static.L1_DATA_PREPARATION
        activity_l2 = bl_static.L2_DATA_EXPLORE
    else:
        activity_l1 = bl_static.L1_PROBLEM_DEFINITION
        activity_l2 = bl_static.L2_ACTIVITY_BLANK

    log_data = dict(session_key=session_key,
                    feature_id=feature_id,
                    activity_l1=activity_l1,
                    activity_l2=activity_l2)

    LogEntryMaker.create_system_entry(user_workspace.user, log_data)

    # Call R services
    #
    rook_svc_url = rook_app_info.get_rook_server_url()
    print('rook_svc_url', rook_svc_url)
    try:
        rservice_req = requests.post(rook_svc_url,
                                     json=app_data)
    except ConnectionError:
        err_msg = 'R Server not responding: %s' % rook_svc_url
        resp_dict = dict(message=err_msg)
        return JsonResponse(resp_dict)

    print('status code from rook call: %s' % rservice_req.status_code)

    # print('rook text: %s' % rservice_req.text)
    return HttpResponse(rservice_req.text)
Beispiel #16
0
def solution_export3(user, raven_json, **kwargs):
    """
    Send a SolutionExportRequest to the SolutionExport command
    """
    if not isinstance(user, User):
        err_msg = '"user" must be a User object'
        return err_resp(err_msg)

    if not isinstance(raven_json, dict):
        err_msg = 'raven_dict must be a python dict'
        return err_resp(err_msg)

    if not ta2_static.KEY_SEARCH_ID in raven_json:
        err_msg = (f'Key: "{ta2_static.KEY_SEARCH_ID}" not found in the'
                   f' "raven_json" dict.  (solution_export3)')
        return err_resp(err_msg)

    search_id = raven_json.pop(
        ta2_static.KEY_SEARCH_ID)  # not needed for GRPC call

    session_key = kwargs.get(SESSION_KEY, '')

    # --------------------------------
    # Convert dict to string
    # --------------------------------
    raven_json_info = json_dumps(raven_json)
    if not raven_json_info.success:
        return err_resp(raven_json_info.err_msg)

    raven_json_str = raven_json_info.result_obj

    # --------------------------------
    # convert the JSON string to a gRPC request
    # --------------------------------
    try:
        req = Parse(raven_json_str, core_pb2.SolutionExportRequest())
    except ParseError as err_obj:
        err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj)
        return err_resp(err_msg)

    # In test mode, return canned response
    #
    if settings.TA2_STATIC_TEST_MODE:
        resp = core_pb2.SolutionExportResponse()

        return ok_resp(message_to_json(resp))

    core_stub, err_msg = TA2Connection.get_grpc_stub()
    if err_msg:
        return err_resp(err_msg)

    # --------------------------------
    # Save the request to the db
    # --------------------------------
    stored_request = StoredRequest(\
                    user=user,
                    search_id=search_id,
                    workspace='(not specified)',
                    request_type=ta2_static.SOLUTION_EXPORT,
                    is_finished=False,
                    request=raven_json)
    stored_request.save()

    # --------------------------------
    # Behavioral logging
    # --------------------------------
    log_data = dict(session_key=session_key,
                    feature_id=ta2_static.SOLUTION_EXPORT,
                    activity_l1=bl_static.L1_MODEL_SELECTION,
                    activity_l2=bl_static.L2_MODEL_EXPORT,
                    other=raven_json)

    LogEntryMaker.create_ta2ta3_entry(user, log_data)

    # --------------------------------
    # Send the gRPC request
    # --------------------------------
    try:
        reply = core_stub.SolutionExport(\
                            req,
                            timeout=settings.TA2_GRPC_SHORT_TIMEOUT)
    except Exception as err_obj:
        user_msg = f'Error: {err_obj}'
        StoredResponse.add_err_response(stored_request, user_msg)

        return err_resp(user_msg)

    # --------------------------------
    # Convert the reply to JSON and send it back
    # --------------------------------
    resp_json_str = message_to_json(reply)

    resp_json_dict_info = json_loads(resp_json_str)
    if not resp_json_dict_info.success:
        user_msg = (f'Failed to convert GRPC response to JSON:'
                    f' {resp_json_dict_info.err_msg}')
        StoredResponse.add_err_response(stored_request, user_msg)
        return err_resp(user_msg)

    StoredResponse.add_success_response(stored_request,
                                        resp_json_dict_info.result_obj)

    return ok_resp(resp_json_str)
    def datamart_materialize(user_workspace, search_result):
        """Materialize an NYU dataset!"""
        LOGGER.info('-- atttempt to materialize NYU dataset --')
        if not isinstance(user_workspace, UserWorkspace):
            return err_resp('user_workspace must be a UserWorkspace')

        if not isinstance(search_result, dict):
            return err_resp('search_result must be a python dictionary')

        print('\nsearch_result', search_result)
        print('\nsearch_result.keys()', search_result.keys())
        if not dm_static.KEY_NYU_DATAMART_ID in search_result:
            user_msg = (f'"search_result" did not contain'
                        f' "{dm_static.KEY_NYU_DATAMART_ID}" key')
            return err_resp(user_msg)

        # -----------------------------------------
        # Build the folder path where the .zip will
        #   be unbundled
        # -----------------------------------------
        LOGGER.info('(1) build path')
        datamart_id = search_result[dm_static.KEY_NYU_DATAMART_ID]

        dest_folderpath_info = DatamartJobUtilNYU.get_output_folderpath(\
                                        user_workspace,
                                        datamart_id,
                                        dir_type=dm_static.KEY_MATERIALIZE)

        # Failed to get/create the output folder
        #
        if not dest_folderpath_info.success:
            return err_resp(dest_folderpath_info.err_msg)

        # Set the output folder
        #
        dest_folderpath = dest_folderpath_info.result_obj

        # Set the output file path
        #
        dest_filepath = join(dest_folderpath, 'tables', 'learningData.csv')

        LOGGER.info('(2) Download file')

        # -----------------------------------------
        # Has the file already been downloaded?
        # -----------------------------------------
        print('dest_filepath', dest_filepath)

        LOGGER.info('(2a) Has the file already been downloaded?')
        if isfile(dest_filepath):
            LOGGER.info('Yes, already downloaded')

            # Get preview rows
            #
            preview_info = read_file_rows(dest_filepath,
                                          dm_static.NUM_PREVIEW_ROWS)
            if not preview_info.success:
                user_msg = (f'Failed to retrieve preview rows.'
                            f' {preview_info.err_msg}')
                return err_resp(user_msg)

            info_dict = DatamartJobUtilNYU.format_materialize_response(\
                            datamart_id, dm_static.DATAMART_NYU_NAME,
                            dest_filepath, preview_info)

            return ok_resp(info_dict)

        # -----------------------------------------
        # Download the file
        # -----------------------------------------
        LOGGER.info('(2b) File not yet downloaded. Attempting download')

        if not 'id' in search_result:
            user_msg = f'search_result did not contain the key "id"'
            return err_resp(user_msg)

        download_url = (f'{get_nyu_url()}/download/'
                        f'{search_result[dm_static.KEY_NYU_DATAMART_ID]}')

        # ----------------------------
        # Behavioral logging
        # ----------------------------
        log_data = dict(feature_id=f'GET|{download_url}',
                        activity_l1=bl_static.L1_DATA_PREPARATION,
                        activity_l2=bl_static.L2_DATA_DOWNLOAD,
                        path=download_url)

        LogEntryMaker.create_datamart_entry(user_workspace, log_data)

        # ----------------------------
        # Download the file!
        # ----------------------------
        try:
            response = requests.get(\
                        download_url,
                        params={'format': 'd3m'},
                        verify=False,
                        stream=True,
                        timeout=settings.DATAMART_LONG_TIMEOUT)
        except requests.exceptions.Timeout as err_obj:
            return err_resp('Request timed out. responded with: %s' % err_obj)

        if response.status_code != 200:
            user_msg = (f'Materialize failed.  Status code:'
                        f' {response.status_code}.  response: {response.text}')
            return err_resp(user_msg)

        save_info = DatamartJobUtilNYU.save_datamart_file(\
                                    dest_folderpath,
                                    response,
                                    expected_filepath=dest_filepath)

        if not save_info.success:
            return err_resp(save_info.err_msg)
        save_info = save_info.result_obj

        # ----------------------------
        # Get preview rows
        # ----------------------------
        preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH],
                                      dm_static.NUM_PREVIEW_ROWS)
        if not preview_info.success:
            user_msg = (f'Failed to retrieve preview rows.'
                        f' {preview_info.err_msg}')
            return err_resp(user_msg)

        info_dict = DatamartJobUtilNYU.format_materialize_response( \
            datamart_id,
            dm_static.DATAMART_NYU_NAME,
            dest_filepath,
            preview_info,
            **save_info)

        return ok_resp(info_dict)
Beispiel #18
0
def view_pebbles_home(request):
    """Serve up the workspace, the current home page.
    Include global js settings"""
    if not request.user.is_authenticated:
        return HttpResponseRedirect(reverse('login'))

    app_config = AppConfiguration.get_config()
    if app_config is None:
        return HttpResponseRedirect(reverse('view_no_domain_config_error'))

    user_info = get_authenticated_user(request)
    if not user_info.success:
        return JsonResponse(get_json_error(user_info.err_msg))
    user = user_info.result_obj

    # Is this D3M Mode?  If so, make sure:
    #  (1) there is D3M config information
    #  (2) user is logged in
    #
    if app_config.is_d3m_domain():
        # (1) Is there a valid D3M config?
        d3m_config_info = get_latest_d3m_config()
        if not d3m_config_info:
            return HttpResponseRedirect(reverse('view_list_dataset_choices_html'))
            # return HttpResponseRedirect(reverse('view_d3m_config_error'))

        session_key = get_session_key(request)

    else:
        session_key = '(event-data-no-session-key)'

    dinfo = dict(title='TwoRavens',
                 session_key=session_key,
                 DEBUG=settings.DEBUG,
                 ALLOW_SOCIAL_AUTH=settings.ALLOW_SOCIAL_AUTH,
                 CSRF_COOKIE_NAME=settings.CSRF_COOKIE_NAME,
                 app_config=app_config.convert_to_dict(),
                 #
                 TA2_STATIC_TEST_MODE=settings.TA2_STATIC_TEST_MODE,
                 TA2_TEST_SERVER_URL=settings.TA2_TEST_SERVER_URL,
                 #
                 TA2_D3M_SOLVER_ENABLED=pybool_to_js(settings.TA2_D3M_SOLVER_ENABLED),
                 TA2_WRAPPED_SOLVERS=settings.TA2_WRAPPED_SOLVERS,
                 #
                 TA3_GRPC_USER_AGENT=settings.TA3_GRPC_USER_AGENT, TA3TA2_API_VERSION=TA3TA2Util.get_api_version(),
                 DISPLAY_DATAMART_UI=settings.DISPLAY_DATAMART_UI,
                 WEBSOCKET_PREFIX=settings.WEBSOCKET_PREFIX)



    log_data = dict(session_key=session_key,
                    feature_id=bl_static.FID_START_RAVENS_PEBBLES_PAGE,
                    activity_l1=bl_static.L1_DATA_PREPARATION,
                    activity_l2=bl_static.L2_DATA_OPEN)

    LogEntryMaker.create_system_entry(user, log_data)
    #print('-' * 40)
    #print(dinfo['app_config'])

    return render(request,
                  'index.html',
                  dinfo)
    def datamart_augment(user_workspace, dataset_path, task_data, **kwargs):
        """Augment the file via the NYU API"""
        if not isinstance(user_workspace, UserWorkspace):
            return err_resp('user_workspace must be a UserWorkspace')

        # Make sure the soure file exists
        #
        if not isfile(dataset_path):
            user_msg = f'Original data file not found: {dataset_path}'
            return err_resp(user_msg)

        # Make sure the NYU datamart id is in the task_data
        #
        if not dm_static.KEY_NYU_DATAMART_ID in task_data:
            user_msg = (f'"task_data" did not contain'
                        f' "{dm_static.KEY_NYU_DATAMART_ID}" key')
            return err_resp(user_msg)

        # used for folder naming
        #
        datamart_id = task_data[dm_static.KEY_NYU_DATAMART_ID]

        # ---------------------------------
        # The augment url...
        # ---------------------------------
        augment_url = f"{ get_nyu_url() }/augment"

        # ----------------------------
        # Behavioral logging
        # ----------------------------
        log_data = dict(feature_id=f'POST|{augment_url}',
                        activity_l1=bl_static.L1_DATA_PREPARATION,
                        activity_l2=bl_static.L2_DATA_AUGMENT,
                        path=augment_url)

        LogEntryMaker.create_datamart_entry(user_workspace, log_data)
        # ----------------------------

        # ---------------------------------
        # Ready the query parameters
        # ---------------------------------
        data_params = dict(data=open(dataset_path, 'rb'),
                           task=json.dumps(task_data))

        # ---------------------------------
        # Make the augment request
        # ---------------------------------
        try:
            response = requests.post(augment_url,
                                     files=data_params,
                                     stream=True,
                                     allow_redirects=True,
                                     verify=False,
                                     timeout=settings.DATAMART_LONG_TIMEOUT)
        except requests.exceptions.Timeout as err_obj:
            return err_resp('Request timed out. responded with: %s' % err_obj)

        # Any errors?
        #
        if response.status_code != 200:
            user_msg = (f'NYU Datamart internal server error. Status code:'
                        f' "{response.status_code}".'
                        f' <hr />Technical: {response.content}')
            # print(response.content)

            return err_resp(user_msg)

        # Write the augmented file
        #
        dest_folderpath_info = DatamartJobUtilNYU.get_output_folderpath(\
                                        user_workspace,
                                        datamart_id,
                                        dir_type=dm_static.KEY_AUGMENT)

        if not dest_folderpath_info.success:
            return err_resp(dest_folderpath_info.err_msg)

        augment_folderpath = dest_folderpath_info.result_obj

        # Set the output file
        #
        dest_filepath = join(augment_folderpath, 'tables', 'learningData.csv')


        save_info = DatamartJobUtilNYU.save_datamart_file(\
                                    augment_folderpath,
                                    response,
                                    expected_filepath=dest_filepath)

        if not save_info.success:
            return err_resp(save_info.err_msg)
        save_info = save_info.result_obj

        # -----------------------------------------
        # Retrieve preview rows and return response
        # -----------------------------------------

        # preview rows
        #
        preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH],
                                      dm_static.NUM_PREVIEW_ROWS)
        if not preview_info.success:
            user_msg = (f'Failed to retrieve preview rows.'
                        f' {preview_info.err_msg}')
            return err_resp(user_msg)

        # Format/return reponse
        #
        info_dict = DatamartJobUtilNYU.format_materialize_response(\
                        datamart_id,
                        dm_static.DATAMART_NYU_NAME,
                        save_info[dm_static.KEY_DATA_PATH],
                        preview_info,
                        **save_info)

        return ok_resp(info_dict)
    def run_process(self):
        """(1) Run ProduceSolution"""
        if self.has_error():
            return
        # ----------------------------------
        # Create the input
        # ----------------------------------
        json_str_info = json_dumps(self.produce_params)
        if not json_str_info.success:
            self.add_err_msg(json_str_info.err_msg)
            return

        json_str_input = json_str_info.result_obj

        # --------------------------------
        # (2) Save the request to the db
        # --------------------------------
        req_type = ta2_static.PRODUCE_SOLUTION

        stored_request = StoredRequest(\
                        user=self.user_object,
                        request_type=req_type,
                        pipeline_id=self.pipeline_id,
                        search_id=self.search_id,
                        is_finished=False,
                        request=self.produce_params)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.PRODUCE_SOLUTION,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_EXPLANATION,
                        other=self.produce_params)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        # ----------------------------------
        # Run FitSolution
        # ----------------------------------
        produce_info = produce_solution(json_str_input)
        if not produce_info.success:
            StoredResponse.add_err_response(stored_request,
                                            produce_info.err_msg)

            self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION,
                                        produce_info.err_msg)
            return

        # ----------------------------------
        # Parse the ProduceSolutionResponse
        # ----------------------------------
        response_info = json_loads(produce_info.result_obj)
        if not response_info.success:
            StoredResponse.add_err_response(stored_request,
                                            response_info.err_msg)

            self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION,
                                        response_info.err_msg)
            return

        result_json = response_info.result_obj

        # ----------------------------------
        # Get the requestId
        # ----------------------------------
        if not ta2_static.KEY_REQUEST_ID in result_json:
            user_msg = (' "%s" not found in response to JSON: %s') % \
                        (ta2_static.KEY_REQUEST_ID, result_json)
            #
            StoredResponse.add_err_response(stored_request,
                                            user_msg)
            #
            self.send_websocket_err_msg(ta2_static.PRODUCE_SOLUTION, user_msg)
            return

        # Store success response
        #
        StoredResponse.add_success_response(stored_request, result_json)

        print('produce 3')

        self.run_get_produce_solution_responses(result_json[ta2_static.KEY_REQUEST_ID])
Beispiel #21
0
    def datamart_materialize(user_workspace, search_result):
        """Materialize the dataset"""
        LOGGER.info('-- atttempt to materialize ISI dataset --')
        if not isinstance(user_workspace, UserWorkspace):
            return err_resp('user_workspace must be a UserWorkspace')

        if not isinstance(search_result, dict):
            return err_resp('search_result must be a python dictionary')

        if dm_static.KEY_ISI_DATAMART_ID not in search_result:
            user_msg = (f'"search_result" did not contain'
                        f' "{dm_static.KEY_ISI_DATAMART_ID}" key')
            return err_resp(user_msg)

        # -----------------------------------------
        # Format output file path
        # -----------------------------------------
        LOGGER.info('(1) build path')
        datamart_id = search_result[dm_static.KEY_ISI_DATAMART_ID]

        dest_filepath_info = DatamartJobUtilISI.get_output_filepath(\
                                        user_workspace,
                                        datamart_id,
                                        dir_type='materialize')

        if not dest_filepath_info.success:
            return err_resp(dest_filepath_info.err_msg)

        dest_filepath = dest_filepath_info.result_obj

        LOGGER.info('(2) Download file')

        # -----------------------------------------
        # Has the file already been downloaded?
        # -----------------------------------------
        print('dest_filepath', dest_filepath)
        if isfile(dest_filepath):
            LOGGER.info('(2a) file already downloaded')

            # Get preview rows
            #
            preview_info = read_file_rows(dest_filepath,
                                          dm_static.NUM_PREVIEW_ROWS)
            if not preview_info.success:
                user_msg = (f'Failed to retrieve preview rows.'
                            f' {preview_info.err_msg}')
                return err_resp(user_msg)

            info_dict = DatamartJobUtilISI.format_materialize_response(\
                            datamart_id, dm_static.DATAMART_ISI_NAME,
                            dest_filepath, preview_info)

            return ok_resp(info_dict)

        # -----------------------------------------
        # Download the file
        # -----------------------------------------
        # can this be streamed to a file?

        LOGGER.info('(2b) attempting download')

        # ----------------------------
        # Behavioral logging
        # ----------------------------
        isi_materialize_url = get_isi_url() + f'/download/{datamart_id}'

        log_data = dict(feature_id=f'GET|{isi_materialize_url}',
                        activity_l1=bl_static.L1_DATA_PREPARATION,
                        activity_l2=bl_static.L2_DATA_DOWNLOAD,
                        path=isi_materialize_url)

        LogEntryMaker.create_datamart_entry(user_workspace, log_data)

        try:
            print('isi_materialize_url', isi_materialize_url)
            response = requests.get(\
                        isi_materialize_url,
                        params={'id': datamart_id, 'format': 'd3m'},
                        verify=False,
                        timeout=settings.DATAMART_LONG_TIMEOUT)
        except requests.exceptions.Timeout as err_obj:
            return err_resp('Request timed out. responded with: %s' % err_obj)

        if response.status_code != 200:
            user_msg = (f'Materialize failed.  Status code:'
                        f' {response.status_code}.  response: {response.text}')
            return err_resp(user_msg)

        LOGGER.info('(3) Download complete.  Save file')

        # -----------------------------------------
        # Save the downloaded file
        # -----------------------------------------
        save_info = DatamartJobUtilISI.save_datamart_file(\
                        dest_filepath,
                        response)

        if not save_info.success:
            return err_resp(save_info.err_msg)
        save_info = save_info.result_obj

        # -----------------------------------------
        # Retrieve preview rows and return response
        # -----------------------------------------

        LOGGER.info('(4) File saved')

        # preview rows
        #
        preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH],
                                      dm_static.NUM_PREVIEW_ROWS)
        if not preview_info.success:
            user_msg = (f'Failed to retrieve preview rows.'
                        f' {preview_info.err_msg}')
            return err_resp(user_msg)

        # Format/return reponse
        #
        info_dict = DatamartJobUtilISI.format_materialize_response(
            datamart_id, dm_static.DATAMART_ISI_NAME,
            save_info[dm_static.KEY_DATA_PATH], preview_info, **save_info)

        return ok_resp(info_dict)
Beispiel #22
0
    def datamart_augment(user_workspace,
                         data_path,
                         search_result,
                         exact_match=False,
                         **kwargs):
        if not isinstance(user_workspace, UserWorkspace):
            return err_resp('user_workspace must be a UserWorkspace')

        if not isfile(data_path):
            user_msg = f'Original data file not found: {data_path}'
            return err_resp(user_msg)

        # ----------------------------
        # mock call
        # ----------------------------
        # 291780000
        """
        right_data = '291770000'
        left_columns= '[[2]]'
        right_columns = '[[6]]'
        exact_match = True
        data_path = '/Users/ramanprasad/Documents/github-rp/TwoRavens/ravens_volume/test_data/TR1_Greed_Versus_Grievance/TRAIN/dataset_TRAIN/tables/learningData.csv'
        """
        # ----------------------------
        LOGGER.info('(1) build path')
        datamart_id = search_result[dm_static.KEY_ISI_DATAMART_ID]

        dest_filepath_info = DatamartJobUtilISI.get_output_filepath(
            user_workspace,
            f'{datamart_id}-{get_timestamp_string()}',
            dir_type=dm_static.KEY_AUGMENT)

        if not dest_filepath_info.success:
            return err_resp(dest_filepath_info.err_msg)

        augment_filepath = dest_filepath_info.result_obj

        augment_url = get_isi_url() + '/augment'

        # ----------------------------
        # Behavioral logging
        # ----------------------------
        log_data = dict(feature_id=f'POST|{augment_url}',
                        activity_l1=bl_static.L1_DATA_PREPARATION,
                        activity_l2=bl_static.L2_DATA_AUGMENT,
                        path=augment_url)

        LogEntryMaker.create_datamart_entry(user_workspace, log_data)
        # ----------------------------

        try:
            response = requests.post(
                augment_url,
                data={
                    'task': json.dumps(search_result),
                    'format': 'd3m'
                },
                files={'data': open(data_path, 'r')},
                verify=False,
                timeout=settings.DATAMART_VERY_LONG_TIMEOUT)

        except requests.exceptions.Timeout as err_obj:
            return err_resp('Request timed out. responded with: %s' % err_obj)

        if not response.status_code == 200:
            user_msg = (f'ISI Augment response failed with status code: '
                        f'{response.status_code}.')
            return err_resp(user_msg)

        save_info = DatamartJobUtilISI.save_datamart_file(\
                        augment_filepath,
                        response)

        if not save_info.success:
            return err_resp(save_info.err_msg)
        save_info = save_info.result_obj

        # -----------------------------------------
        # Retrieve preview rows and return response
        # -----------------------------------------

        # preview rows
        #
        preview_info = read_file_rows(save_info[dm_static.KEY_DATA_PATH],
                                      dm_static.NUM_PREVIEW_ROWS)
        if not preview_info.success:
            user_msg = (f'Failed to retrieve preview rows.'
                        f' {preview_info.err_msg}')
            return err_resp(user_msg)

        # Format/return reponse
        #
        info_dict = DatamartJobUtilISI.format_materialize_response( \
            datamart_id,
            dm_static.DATAMART_ISI_NAME,
            save_info[dm_static.KEY_DATA_PATH],
            preview_info,
            **save_info)

        return ok_resp(info_dict)
Beispiel #23
0
    def run_process(self):
        """(1) Run ScoreSolution"""
        if self.has_error():
            return
        # ----------------------------------
        # Create the input
        # ----------------------------------
        LOGGER.info('ScoreSolutionHelper.run_process 2')
        json_str_info = json_dumps(self.score_params)
        if not json_str_info.success:
            self.add_err_msg(json_str_info.err_msg)
            return

        json_str_input = json_str_info.result_obj

        # ----------------------------------
        # (2) Save the request
        # ----------------------------------
        stored_request = StoredRequest(\
                        user=self.user_object,
                        search_id=self.search_id,
                        pipeline_id=self.pipeline_id,
                        workspace='(not specified)',
                        request_type=ta2_static.SCORE_SOLUTION,
                        is_finished=False,
                        request=self.score_params)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.SCORE_SOLUTION,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SUMMARIZATION,
                        other=self.score_params)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        # ----------------------------------
        # Run ScoreSolution
        # ----------------------------------
        LOGGER.info('run ScoreSolution: %s', json_str_input)
        fit_info = score_solution(json_str_input)
        if not fit_info.success:
            print('ScoreSolution err_msg: ', fit_info.err_msg)
            StoredResponse.add_err_response(stored_request, fit_info.err_msg)
            self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION,
                                        fit_info.err_msg)
            return

        # ----------------------------------
        # Parse the ScoreSolutionResponse
        # ----------------------------------
        response_info = json_loads(fit_info.result_obj)
        if not response_info.success:
            print('ScoreSolution grpc err_msg: ', response_info.err_msg)
            StoredResponse.add_err_response(stored_request,
                                            response_info.err_msg)
            self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION,
                                        response_info.err_msg)
            return

        result_json = response_info.result_obj

        # ----------------------------------
        # Get the requestId
        # ----------------------------------
        if not ta2_static.KEY_REQUEST_ID in result_json:
            user_msg = (' "%s" not found in response to JSON: %s') % \
                        (ta2_static.KEY_REQUEST_ID, result_json)
            StoredResponse.add_err_response(stored_request, user_msg)

            self.send_websocket_err_msg(ta2_static.SCORE_SOLUTION, user_msg)
            return

        StoredResponse.add_success_response(stored_request, result_json)

        self.run_get_score_solution_responses(
            result_json[ta2_static.KEY_REQUEST_ID])
    def datamart_search(query_dict=None, dataset_path=None, **kwargs):
        """Search the NYU datamart"""

        if query_dict is None and dataset_path is None:
            return err_resp('Either a query or dataset path must be supplied.')

        if query_dict is not None and not isinstance(query_dict, dict):
            user_msg = ('There is something wrong with the search parameters.'
                        ' Please try again. (expected a dictionary)')
            return err_resp(user_msg)

        search_url = get_nyu_url() + '/search'

        # --------------------------------
        # Behavioral logging
        # --------------------------------
        if 'user' in kwargs:
            log_data = dict(feature_id=f'POST|{search_url}',
                            activity_l1=bl_static.L1_DATA_PREPARATION,
                            activity_l2=bl_static.L2_DATA_SEARCH,
                            path=search_url)

            LogEntryMaker.create_datamart_entry(kwargs['user'], log_data)
        # --------------------------------

        # --------------------------------
        # Query the datamart
        # --------------------------------

        if dataset_path:
            try:
                with open(dataset_path, 'rb') as dataset_p:
                    try:
                        response = requests.post(
                            search_url,
                            json=query_dict,
                            files=dict(data=dataset_p),
                            timeout=settings.DATAMART_LONG_TIMEOUT)

                    except requests.exceptions.Timeout as err_obj:
                        return err_resp(
                            'Request timed out. responded with: %s' % err_obj)

            except IOError as err_obj:
                user_msg = (f'Failed to search with the dataset file.'
                            f'  Technical: {err_obj}')
                return err_resp(user_msg)

        else:
            try:
                response = requests.post(
                    search_url,
                    json=query_dict,
                    stream=True,
                    timeout=settings.DATAMART_LONG_TIMEOUT)
            except requests.exceptions.Timeout as err_obj:
                return err_resp('Request timed out. responded with: %s' %
                                err_obj)
            if response.status_code != 200:
                print(str(response))
                print(response.text)
                return err_resp(('NYU Datamart internal server error.'
                                 ' status_code: %s') % response.status_code)

        json_results = response.json()['results']

        if not json_results:
            return err_resp('No datasets found. (%s)' % \
                            (get_timestamp_string_readable(time_only=True),))

        # print('num results: ', len(json_results))

        return ok_resp(json_results)
Beispiel #25
0
    def run_get_score_solution_responses(self, request_id):
        """(2) Run GetScoreSolutionResults"""
        if self.has_error():
            return

        if not request_id:
            self.send_websocket_err_msg(ta2_static.GET_SCORE_SOLUTION_RESULTS,
                                        'request_id must be set')
            return

        # -----------------------------------
        # (1) make GRPC request object
        # -----------------------------------
        params_dict = {ta2_static.KEY_REQUEST_ID: request_id}
        params_info = json_dumps(params_dict)

        try:
            grpc_req = Parse(params_info.result_obj,
                             core_pb2.GetScoreSolutionResultsRequest())
        except ParseError as err_obj:
            err_msg = ('Failed to convert JSON to gRPC: %s') % (err_obj)
            self.send_websocket_err_msg(ta2_static.GET_SCORE_SOLUTION_RESULTS,
                                        err_msg)
            return

        # --------------------------------
        # (2) Save the request to the db
        # --------------------------------
        stored_request = StoredRequest(\
                        user=self.user_object,
                        request_type=ta2_static.GET_SCORE_SOLUTION_RESULTS,
                        search_id=self.search_id,
                        pipeline_id=self.pipeline_id,
                        is_finished=False,
                        request=params_dict)
        stored_request.save()

        # --------------------------------
        # (2a) Behavioral logging
        # --------------------------------
        log_data = dict(session_key=self.session_key,
                        feature_id=ta2_static.GET_SCORE_SOLUTION_RESULTS,
                        activity_l1=bl_static.L1_MODEL_SELECTION,
                        activity_l2=bl_static.L2_MODEL_SUMMARIZATION,
                        other=params_dict)

        LogEntryMaker.create_ta2ta3_entry(self.user_object, log_data)

        # --------------------------------
        # (3) Make the gRPC request
        # --------------------------------
        core_stub, err_msg = TA2Connection.get_grpc_stub()
        if err_msg:
            return err_resp(err_msg)

        msg_cnt = 0
        try:
            # -----------------------------------------
            # Iterate through the streaming responses
            # Note: The StoredResponse.id becomes the pipeline id
            # -----------------------------------------
            for reply in core_stub.GetScoreSolutionResults(\
                    grpc_req, timeout=settings.TA2_GRPC_LONG_TIMEOUT):

                msg_cnt += 1

                stored_response = None  # to hold a StoredResponse object

                # -----------------------------------------------
                # Parse the response into JSON + store response
                # -----------------------------------------------
                msg_json_str = message_to_json(reply)
                msg_json_info = json_loads(msg_json_str)

                if not msg_json_info.success:
                    err_msg = ('Failed to convert JSON to gRPC: %s') % \
                               (err_obj,)
                    StoredResponse.add_stream_err_response(
                        stored_request, user_msg)

                    self.send_websocket_err_msg(\
                            ta2_static.GET_SCORE_SOLUTION_RESULTS,
                            err_msg)
                    # Wait for next response....
                    continue

                result_json = msg_json_info.result_obj

                # -----------------------------------------
                # Looks good, save the response
                # -----------------------------------------
                stored_resp_info = StoredResponse.add_stream_success_response(\
                                    stored_request, result_json)

                # -----------------------------------------
                # Make sure the response was saved (probably won't happen)
                # -----------------------------------------
                if not stored_resp_info.success:
                    # Not good but probably won't happen
                    # send a message to the user...
                    #
                    self.send_websocket_err_msg(\
                                    ta2_static.GET_SCORE_SOLUTION_RESULTS,
                                    stored_resp_info.err_msg)
                    #
                    StoredResponse.add_stream_err_response(\
                                    stored_request, stored_resp_info.err_msg)
                    #
                    continue

                # ---------------------------------------------
                # Looks good!  Get the StoredResponse
                # - send responses back to WebSocket
                # ---------------------------------------------
                stored_response = stored_resp_info.result_obj
                stored_response.set_pipeline_id(self.pipeline_id)

                # ---------------------------------------------
                # If progress is complete,
                #  send response back to WebSocket
                # ---------------------------------------------
                progress_val = get_dict_value(\
                                result_json,
                                [ta2_static.KEY_PROGRESS,
                                 ta2_static.KEY_PROGRESS_STATE])

                if (not progress_val.success) or \
                   (progress_val.result_obj != ta2_static.KEY_PROGRESS_COMPLETED):
                    user_msg = 'GetScoreSolutionResultsResponse is not yet complete'
                    LOGGER.info(user_msg)
                    # wait for next message...
                    continue


                ws_msg = WebsocketMessage.get_success_message(\
                            ta2_static.GET_SCORE_SOLUTION_RESULTS,
                            'it worked',
                            msg_cnt=msg_cnt,
                            data=stored_response.as_dict())

                LOGGER.info('ws_msg: %s' % ws_msg)
                #print('ws_msg', ws_msg.as_dict())

                ws_msg.send_message(self.websocket_id)
                # stored_response.mark_as_sent_to_user()

        except grpc.RpcError as err_obj:
            stored_request.set_error_status(str(err_obj))
            return

        except Exception as err_obj:
            stored_request.set_error_status(str(err_obj))
            return

        StoredRequestUtil.set_finished_ok_status(stored_request.id)