def process_ts_query(self):
        """
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        """
        valid_input = self.validate_input()
        if not valid_input:
            err_json = dict()
            err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \
                                  "If not sure, please access http://mydigurl/projects/<project_name>/fields API for " \
                                  "reference"
            return rest.bad_request(err_json)

        resp = self.cquery.process()
        if resp is None or resp[1] == 400:
            return resp
        try:
            resp = resp[0]
            if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][
                    'hits'][0]['_source'].keys():
                docid = resp['hits']['hits'][0]['_source']['doc_id']
                argmap = dict()
                argmap['measure/value'] = docid
                argmap['_group-by'] = 'event_date'
                argmap['_interval'] = self.agg
                self.myargs = argmap
                newquery = ConjunctiveQueryProcessor(self.project_name,
                                                     self.config,
                                                     self.project_root_name,
                                                     self.es,
                                                     myargs=self.myargs)
                resp = newquery.process()
                if resp[1] == 400:
                    return resp
                else:
                    resp = resp[0]
                logger.debug("Response for query is {}".format(resp))
                isDateAggregation = True if "." not in self.field and self.config[
                    self.field]['type'] == "date" else False
                ts, dims = DigOutputProcessor(resp['aggregations'][self.field],
                                              self.agg_field,
                                              isDateAggregation).process()
                ts_obj = TimeSeries(
                    ts,
                    dict(),
                    dims,
                    percent_change=self.percent_change,
                    impute_method=self.impute_method,
                    impute_values=self.impute_values).to_dict()
                return rest.ok(ts_obj)
            else:
                return rest.not_found("Time series not found")
        except Exception as e:
            logger.exception(
                "Exception encountered while performing time series query")
            return rest.bad_request("Enter valid query")
Beispiel #2
0
 def process(self):
     """
     This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
     set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
     joined by a '\n'
     """
     valid_input = self.validate_input()
     if not valid_input:
         err_json = {}
         err_json[
             'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference"
         return rest.bad_request(err_json)
     query = self._build_query("must")
     res = None
     logger.debug('Query {}'.format(query))
     if self.num_results + self.fr > 10000:
         res = self.es.es_search(self.project_name,
                                 self.project_root_name,
                                 query,
                                 True,
                                 ignore_no_index=True)
     else:
         res = self.es.es_search(self.project_name,
                                 self.project_root_name,
                                 query,
                                 False,
                                 ignore_no_index=True)
     if type(res) == RequestError:
         logger.warning('problem with query\n  {}'.format(str(res)))
         return rest.bad_request(str(res))
     res_filtered = self.filter_response(res, self.field_names)
     resp = {}
     if self.nested_query is not None and len(
             res_filtered['hits']['hits']) > 0:
         res_filtered = self.setNestedDocuments(res_filtered)
         logger.debug('Completed setting nested documents')
     if self.group_by is None:
         if self.verbosity == "minimal":
             if self.field_names is None:
                 self.field_names = ','.join(self.config_fields)
             resp = self.minify_response(res_filtered, self.field_names)
         elif self.verbosity == "full":
             resp = res_filtered['hits']['hits']
         else:
             resp = res_filtered
     else:
         resp = res_filtered
     logger.debug('Minifying documents complete')
     if self.response_format == "json_lines":
         logger.debug("Returning json lines response")
         return Response(self.create_json_lines_response(resp),
                         mimetype='application/x-jsonlines')
     return rest.ok(resp)
    def process(self):
        '''
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        '''
        valid_input = self.validate_input()
        if not valid_input:
            err_json = {}
            err_json['message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference"
            return rest.bad_request(err_json)
        query = self._build_query("must")
        res = self.es.search(self.project_name, self.project_root_name ,query, ignore_no_index=True)
        res_filtered = self.filter_response(res,self.field_names)
        resp={}
        print query
        if self.nested_query is not None and len(res_filtered['hits']['hits']) > 0:
            res_filtered = self.setNestedDocuments(res_filtered)
        if self.response_format =="json_lines":
            return rest.ok('\n'.join(str(x) for x in res_filtered['hits']['hits']))
        elif self.group_by is None:
            if self.verbosity == "minimal":
                if self.field_names is None:
                    self.field_names = ','.join(self.config_fields)
                resp = self.minify_response(res_filtered,self.field_names)
            elif self.verbosity == "full":
                resp = res_filtered['hits']['hits']
            else:
                resp = res_filtered
        else:
            resp = res_filtered

        return rest.ok(resp)
    def process_ts_query(self):
        '''
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        '''
        valid_input = self.validate_input()
        if not valid_input:
            err_json = {}
            err_json[
                'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference"
            return rest.bad_request(err_json)

        resp = self.cquery.process()[0]
        try:
            if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][
                    'hits'][0]['_source'].keys():
                docid = resp['hits']['hits'][0]['_source']['doc_id']
                argmap = {}
                argmap['measure/value'] = docid
                argmap['_group-by'] = 'event_date'
                argmap['_interval'] = self.agg
                self.request.args = argmap
                newquery = ConjunctiveQueryProcessor(self.request,
                                                     self.project_name,
                                                     self.config,
                                                     self.project_root_name,
                                                     self.es)
                resp = newquery.process()[0]
                print resp
                ts, dims = DigOutputProcessor(resp['aggregations'][self.field],
                                              self.agg_field).process()
                ts_obj = TimeSeries(ts, dict(), dims).to_dict()
                return rest.ok(ts_obj)
            else:
                return rest.not_found("Time series not found")
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lines = traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)
            lines = ''.join(lines)
            print lines
            return rest.bad_request("Enter valid query")
Beispiel #5
0
 def process_event_query(self):
     """
     This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
     set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
     joined by a '\n'
     """
     valid_input = self.validate_input()
     if not valid_input:
         err_json = dict()
         err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \
                               "If not sure, please access http://mydigurl/projects/<project_name>/fields " \
                               "API for reference"
         return rest.bad_request(err_json)
     try:
         resp = self.cquery.process()
         if resp[1] == 400:
             logger.warning(
                 "Request generated 4xx response. Check request again")
             return resp
         else:
             resp = resp[0]
         if resp is not None and len(
                 resp['aggregations'][self.field]['buckets']) > 0:
             if "." not in self.field and self.config[
                     self.field]['type'] == "date":
                 ts, dims = DigOutputProcessor(
                     resp['aggregations'][self.field], self.agg_field,
                     True).process()
                 ts_obj = TimeSeries(
                     ts, {},
                     dims,
                     percent_change=self.percent_change,
                     impute_method=self.impute_method).to_dict()
             else:
                 ts, dims = DigOutputProcessor(
                     resp['aggregations'][self.field], self.agg_field,
                     False).process()
                 ts_obj = TimeSeries(
                     ts, {},
                     dims,
                     percent_change=self.percent_change,
                     impute_method=self.impute_method).to_dict()
             return rest.ok(ts_obj)
         else:
             return rest.not_found("No Time series found for query")
     except Exception as e:
         logger.exception(
             "Exception encountered while performing Event query")
         return rest.internal_error("Internal Error occured")
    def process_event_query(self):
        '''
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        '''
        valid_input = self.validate_input()
        if not valid_input:
            err_json = {}
            err_json[
                'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference"
            return rest.bad_request(err_json)

        resp = self.cquery.process()[0]
        ts, dims = DigOutputProcessor(resp['aggregations'][self.field],
                                      self.agg_field).process()
        ts_obj = TimeSeries(ts, {}, dims).to_dict()
        return rest.ok(ts_obj)
 def delete(self):
     # redundant with projects/project_name/delete
     msg = 'cannot delete from projects endpoint. you should call projects/{your_project_name}'
     return rest.bad_request(msg)
    def post(self):
        input = request.get_json(force=True)
        logger.info('/projects received: %s' % (input))
        project_name = input.get('project_name', '')
        if len(project_name) == 0 or len(project_name) >= 256:
            return rest.bad_request('Invalid project name.')
        if project_name in data['projects']:
            #msg = 'You cannot post an existing project to the /projects endpoint. For updates, post to projects/{your_project_name}'
            msg = 'You cannot post an existing project to the /projects endpoint.'
            return rest.bad_request(msg)
        project_sources = input.get('sources', [])
        if len(project_sources) == 0:
            return rest.bad_request('Invalid sources.')

        logger.info('/projects project_name: %s' % (project_name))
        logger.info('/projects project_sources: %s' % (project_sources))            

        try:
            # create project data structure, folders & files
            project_dir_path = _get_project_dir_path(project_name)
        
            project_lock.acquire(project_name)
            logger.info('/projects creating directory: %s' % (project_dir_path))
            os.makedirs(project_dir_path)
            data['projects'][project_name] = {'sources': {}}
            data['projects'][project_name]['project_name'] = project_name
            data['projects'][project_name]['sources'] = project_sources
            with open(os.path.join(project_dir_path, 'project_config.json'), 'w') as f:
                f.write(json.dumps(data['projects'][project_name], indent=4, default=json_encode))
            # we should try to create a service for domain "sources:type" 
            # (or update it if timerange defined by "sources:start_date" and "sources:end_date" is bigger than existing)
            ret, domain_name, ingestion_id, job_id, err = check_domain_service(project_sources, project_name)
            data['projects'][project_name]['domain'] = domain_name
            if ret==0:
                msg = 'project %s created.' % project_name
                logger.info(msg)
                # store job infos
                data['projects'][project_name]['ingestion_id'] = ingestion_id
                data['projects'][project_name]['job_id'] = job_id
                data['projects'][project_name]['status'] = 'indexing'
                # insert into mongoDB
                logger.info('Project %s (before mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                db_projects.insert_one(data['projects'][project_name])
                logger.info('Project %s (after mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                # How come data['projects'][project_name] has an '_id' field now???
                if '_id' in data['projects'][project_name]:
                    del data['projects'][project_name]['_id']
                logger.info('Project %s (after mongodb insertion and cleaning) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                try:
                    return rest.created(msg)
                finally:
                    restart_apache()
            elif ret==1:
                msg = 'domain for project %s was already previously created. %s' % (project_name, err)
                logger.info(msg)
                # what should we return in this case
                return rest.ok(msg) 
            else:
                # we should remove project_name
                del data['projects'][project_name]
                msg = 'project %s creation failed while creating search service: %s' % (project_name, err)
                logger.info(msg)
                return rest.internal_error(msg)
        except Exception as e:
            # try to remove project_name
            try:
                del data['projects'][project_name]
            except:
                pass
            # try to remove data files too
            try:
                shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            except:
                pass
            msg = 'project {} creation failed: {} {}'.format(project_name, e, sys.exc_info()[0])
            logger.error(msg)
            return rest.internal_error(msg)
        finally:
            project_lock.release(project_name)
 def delete(self, domain_name):
     # Should we allow it?
     return rest.bad_request('Deleting a domain is not allowed.')
 def post(self, domain_name):
     return rest.bad_request('You cannot post a domain, you should post a project using a domain.')
 def post(self):
     return rest.bad_request('You cannot post to this endpoint. Domains are created from projects.')
 def post(self, project_name):
     return rest.bad_request('A project update is not allowed.')
    def post(self):
        input = request.get_json(force=True)
        logger.info('/projects received: %s' % (input))
        project_name = input.get('project_name', '')
        if len(project_name) == 0 or len(project_name) >= 256:
            return rest.bad_request('Invalid project name.')
        if project_name in data['projects']:
            #msg = 'You cannot post an existing project to the /projects endpoint. For updates, post to projects/{your_project_name}'
            msg = 'You cannot post an existing project to the /projects endpoint.'
            return rest.bad_request(msg)
        project_sources = input.get('sources', [])
        if len(project_sources) == 0:
            return rest.bad_request('Invalid sources.')

        logger.info('/projects project_name: %s' % (project_name))
        logger.info('/projects project_sources: %s' % (project_sources))            

        try:
            # create project data structure, folders & files
            project_dir_path = _get_project_dir_path(project_name)
        
            project_lock.acquire(project_name)
            logger.info('/projects creating directory: %s' % (project_dir_path))
            os.makedirs(project_dir_path)
            data['projects'][project_name] = {'sources': {}}
            data['projects'][project_name]['project_name'] = project_name
            data['projects'][project_name]['sources'] = project_sources
            with open(os.path.join(project_dir_path, 'project_config.json'), 'w') as f:
                f.write(json.dumps(data['projects'][project_name], indent=4, default=json_encode))
            # we should try to create a service for domain "sources:type" 
            # (or update it if timerange defined by "sources:start_date" and "sources:end_date" is bigger than existing)
            ret, domain_name, ingestion_id, job_id, err = check_domain_service(project_sources, project_name)
            data['projects'][project_name]['domain'] = domain_name
            if ret==0:
                msg = 'project %s created.' % project_name
                logger.info(msg)
                # store job infos
                data['projects'][project_name]['ingestion_id'] = ingestion_id
                data['projects'][project_name]['job_id'] = job_id
                data['projects'][project_name]['status'] = 'indexing'
                # insert into mongoDB
                logger.info('Project %s (before mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                db_projects.insert_one(data['projects'][project_name])
                logger.info('Project %s (after mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                # How come data['projects'][project_name] has an '_id' field now???
                if '_id' in data['projects'][project_name]:
                    del data['projects'][project_name]['_id']
                logger.info('Project %s (after mongodb insertion and cleaning) dict keys are %s' % (project_name, data['projects'][project_name].keys()))
                try:
                    return rest.created(msg)
                finally:
                    restart_apache()
            elif ret==1:
                msg = 'domain for project %s was already previously created. %s' % (project_name, err)
                logger.info(msg)
                # what should we return in this case
                return rest.ok(msg) 
            else:
                # we should remove project_name
                del data['projects'][project_name]
                msg = 'project %s creation failed while creating search service: %s' % (project_name, err)
                logger.info(msg)
                return rest.internal_error(msg)
        except Exception as e:
            # try to remove project_name
            try:
                del data['projects'][project_name]
            except:
                pass
            # try to remove data files too
            try:
                shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            except:
                pass
            msg = 'project {} creation failed: {} {}'.format(project_name, e, sys.exc_info()[0])
            logger.error(msg)
            return rest.internal_error(msg)
        finally:
            project_lock.release(project_name)
 def delete(self, domain_name):
     # Should we allow it?
     return rest.bad_request('Deleting a domain is not allowed.')
 def post(self, domain_name):
     return rest.bad_request('You cannot post a domain, you should post a project using a domain.')
 def post(self):
     return rest.bad_request('You cannot post to this endpoint. Domains are created from projects.')
 def post(self, project_name):
     return rest.bad_request('A project update is not allowed.')
 def delete(self):
     # redundant with projects/project_name/delete
     msg = 'cannot delete from projects endpoint. you should call projects/{your_project_name}'
     return rest.bad_request(msg)