def process_ts_query(self): """ This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' """ valid_input = self.validate_input() if not valid_input: err_json = dict() err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \ "If not sure, please access http://mydigurl/projects/<project_name>/fields API for " \ "reference" return rest.bad_request(err_json) resp = self.cquery.process() if resp is None or resp[1] == 400: return resp try: resp = resp[0] if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][ 'hits'][0]['_source'].keys(): docid = resp['hits']['hits'][0]['_source']['doc_id'] argmap = dict() argmap['measure/value'] = docid argmap['_group-by'] = 'event_date' argmap['_interval'] = self.agg self.myargs = argmap newquery = ConjunctiveQueryProcessor(self.project_name, self.config, self.project_root_name, self.es, myargs=self.myargs) resp = newquery.process() if resp[1] == 400: return resp else: resp = resp[0] logger.debug("Response for query is {}".format(resp)) isDateAggregation = True if "." not in self.field and self.config[ self.field]['type'] == "date" else False ts, dims = DigOutputProcessor(resp['aggregations'][self.field], self.agg_field, isDateAggregation).process() ts_obj = TimeSeries( ts, dict(), dims, percent_change=self.percent_change, impute_method=self.impute_method, impute_values=self.impute_values).to_dict() return rest.ok(ts_obj) else: return rest.not_found("Time series not found") except Exception as e: logger.exception( "Exception encountered while performing time series query") return rest.bad_request("Enter valid query")
def process(self): """ This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' """ valid_input = self.validate_input() if not valid_input: err_json = {} err_json[ 'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference" return rest.bad_request(err_json) query = self._build_query("must") res = None logger.debug('Query {}'.format(query)) if self.num_results + self.fr > 10000: res = self.es.es_search(self.project_name, self.project_root_name, query, True, ignore_no_index=True) else: res = self.es.es_search(self.project_name, self.project_root_name, query, False, ignore_no_index=True) if type(res) == RequestError: logger.warning('problem with query\n {}'.format(str(res))) return rest.bad_request(str(res)) res_filtered = self.filter_response(res, self.field_names) resp = {} if self.nested_query is not None and len( res_filtered['hits']['hits']) > 0: res_filtered = self.setNestedDocuments(res_filtered) logger.debug('Completed setting nested documents') if self.group_by is None: if self.verbosity == "minimal": if self.field_names is None: self.field_names = ','.join(self.config_fields) resp = self.minify_response(res_filtered, self.field_names) elif self.verbosity == "full": resp = res_filtered['hits']['hits'] else: resp = res_filtered else: resp = res_filtered logger.debug('Minifying documents complete') if self.response_format == "json_lines": logger.debug("Returning json lines response") return Response(self.create_json_lines_response(resp), mimetype='application/x-jsonlines') return rest.ok(resp)
def process(self): ''' This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' ''' valid_input = self.validate_input() if not valid_input: err_json = {} err_json['message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference" return rest.bad_request(err_json) query = self._build_query("must") res = self.es.search(self.project_name, self.project_root_name ,query, ignore_no_index=True) res_filtered = self.filter_response(res,self.field_names) resp={} print query if self.nested_query is not None and len(res_filtered['hits']['hits']) > 0: res_filtered = self.setNestedDocuments(res_filtered) if self.response_format =="json_lines": return rest.ok('\n'.join(str(x) for x in res_filtered['hits']['hits'])) elif self.group_by is None: if self.verbosity == "minimal": if self.field_names is None: self.field_names = ','.join(self.config_fields) resp = self.minify_response(res_filtered,self.field_names) elif self.verbosity == "full": resp = res_filtered['hits']['hits'] else: resp = res_filtered else: resp = res_filtered return rest.ok(resp)
def process_ts_query(self): ''' This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' ''' valid_input = self.validate_input() if not valid_input: err_json = {} err_json[ 'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference" return rest.bad_request(err_json) resp = self.cquery.process()[0] try: if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][ 'hits'][0]['_source'].keys(): docid = resp['hits']['hits'][0]['_source']['doc_id'] argmap = {} argmap['measure/value'] = docid argmap['_group-by'] = 'event_date' argmap['_interval'] = self.agg self.request.args = argmap newquery = ConjunctiveQueryProcessor(self.request, self.project_name, self.config, self.project_root_name, self.es) resp = newquery.process()[0] print resp ts, dims = DigOutputProcessor(resp['aggregations'][self.field], self.agg_field).process() ts_obj = TimeSeries(ts, dict(), dims).to_dict() return rest.ok(ts_obj) else: return rest.not_found("Time series not found") except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) lines = ''.join(lines) print lines return rest.bad_request("Enter valid query")
def process_event_query(self): """ This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' """ valid_input = self.validate_input() if not valid_input: err_json = dict() err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \ "If not sure, please access http://mydigurl/projects/<project_name>/fields " \ "API for reference" return rest.bad_request(err_json) try: resp = self.cquery.process() if resp[1] == 400: logger.warning( "Request generated 4xx response. Check request again") return resp else: resp = resp[0] if resp is not None and len( resp['aggregations'][self.field]['buckets']) > 0: if "." not in self.field and self.config[ self.field]['type'] == "date": ts, dims = DigOutputProcessor( resp['aggregations'][self.field], self.agg_field, True).process() ts_obj = TimeSeries( ts, {}, dims, percent_change=self.percent_change, impute_method=self.impute_method).to_dict() else: ts, dims = DigOutputProcessor( resp['aggregations'][self.field], self.agg_field, False).process() ts_obj = TimeSeries( ts, {}, dims, percent_change=self.percent_change, impute_method=self.impute_method).to_dict() return rest.ok(ts_obj) else: return rest.not_found("No Time series found for query") except Exception as e: logger.exception( "Exception encountered while performing Event query") return rest.internal_error("Internal Error occured")
def process_event_query(self): ''' This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' ''' valid_input = self.validate_input() if not valid_input: err_json = {} err_json[ 'message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference" return rest.bad_request(err_json) resp = self.cquery.process()[0] ts, dims = DigOutputProcessor(resp['aggregations'][self.field], self.agg_field).process() ts_obj = TimeSeries(ts, {}, dims).to_dict() return rest.ok(ts_obj)
def delete(self): # redundant with projects/project_name/delete msg = 'cannot delete from projects endpoint. you should call projects/{your_project_name}' return rest.bad_request(msg)
def post(self): input = request.get_json(force=True) logger.info('/projects received: %s' % (input)) project_name = input.get('project_name', '') if len(project_name) == 0 or len(project_name) >= 256: return rest.bad_request('Invalid project name.') if project_name in data['projects']: #msg = 'You cannot post an existing project to the /projects endpoint. For updates, post to projects/{your_project_name}' msg = 'You cannot post an existing project to the /projects endpoint.' return rest.bad_request(msg) project_sources = input.get('sources', []) if len(project_sources) == 0: return rest.bad_request('Invalid sources.') logger.info('/projects project_name: %s' % (project_name)) logger.info('/projects project_sources: %s' % (project_sources)) try: # create project data structure, folders & files project_dir_path = _get_project_dir_path(project_name) project_lock.acquire(project_name) logger.info('/projects creating directory: %s' % (project_dir_path)) os.makedirs(project_dir_path) data['projects'][project_name] = {'sources': {}} data['projects'][project_name]['project_name'] = project_name data['projects'][project_name]['sources'] = project_sources with open(os.path.join(project_dir_path, 'project_config.json'), 'w') as f: f.write(json.dumps(data['projects'][project_name], indent=4, default=json_encode)) # we should try to create a service for domain "sources:type" # (or update it if timerange defined by "sources:start_date" and "sources:end_date" is bigger than existing) ret, domain_name, ingestion_id, job_id, err = check_domain_service(project_sources, project_name) data['projects'][project_name]['domain'] = domain_name if ret==0: msg = 'project %s created.' % project_name logger.info(msg) # store job infos data['projects'][project_name]['ingestion_id'] = ingestion_id data['projects'][project_name]['job_id'] = job_id data['projects'][project_name]['status'] = 'indexing' # insert into mongoDB logger.info('Project %s (before mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys())) db_projects.insert_one(data['projects'][project_name]) logger.info('Project %s (after mongodb insertion) dict keys are %s' % (project_name, data['projects'][project_name].keys())) # How come data['projects'][project_name] has an '_id' field now??? if '_id' in data['projects'][project_name]: del data['projects'][project_name]['_id'] logger.info('Project %s (after mongodb insertion and cleaning) dict keys are %s' % (project_name, data['projects'][project_name].keys())) try: return rest.created(msg) finally: restart_apache() elif ret==1: msg = 'domain for project %s was already previously created. %s' % (project_name, err) logger.info(msg) # what should we return in this case return rest.ok(msg) else: # we should remove project_name del data['projects'][project_name] msg = 'project %s creation failed while creating search service: %s' % (project_name, err) logger.info(msg) return rest.internal_error(msg) except Exception as e: # try to remove project_name try: del data['projects'][project_name] except: pass # try to remove data files too try: shutil.rmtree(os.path.join(_get_project_dir_path(project_name))) except: pass msg = 'project {} creation failed: {} {}'.format(project_name, e, sys.exc_info()[0]) logger.error(msg) return rest.internal_error(msg) finally: project_lock.release(project_name)
def delete(self, domain_name): # Should we allow it? return rest.bad_request('Deleting a domain is not allowed.')
def post(self, domain_name): return rest.bad_request('You cannot post a domain, you should post a project using a domain.')
def post(self): return rest.bad_request('You cannot post to this endpoint. Domains are created from projects.')
def post(self, project_name): return rest.bad_request('A project update is not allowed.')