def process_ts_query(self): """ This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' """ valid_input = self.validate_input() if not valid_input: err_json = dict() err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \ "If not sure, please access http://mydigurl/projects/<project_name>/fields API for " \ "reference" return rest.bad_request(err_json) resp = self.cquery.process() if resp is None or resp[1] == 400: return resp try: resp = resp[0] if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][ 'hits'][0]['_source'].keys(): docid = resp['hits']['hits'][0]['_source']['doc_id'] argmap = dict() argmap['measure/value'] = docid argmap['_group-by'] = 'event_date' argmap['_interval'] = self.agg self.myargs = argmap newquery = ConjunctiveQueryProcessor(self.project_name, self.config, self.project_root_name, self.es, myargs=self.myargs) resp = newquery.process() if resp[1] == 400: return resp else: resp = resp[0] logger.debug("Response for query is {}".format(resp)) isDateAggregation = True if "." not in self.field and self.config[ self.field]['type'] == "date" else False ts, dims = DigOutputProcessor(resp['aggregations'][self.field], self.agg_field, isDateAggregation).process() ts_obj = TimeSeries( ts, dict(), dims, percent_change=self.percent_change, impute_method=self.impute_method, impute_values=self.impute_values).to_dict() return rest.ok(ts_obj) else: return rest.not_found("Time series not found") except Exception as e: logger.exception( "Exception encountered while performing time series query") return rest.bad_request("Enter valid query")
def delete(self, project_name): if project_name not in data['projects']: return rest.not_found() try: project_lock.acquire(project_name) # - get corresponding domain domain_name = data['projects'][project_name]['domain'] # - delete ingestion_id row from hbase updates table # should we delete corresponding files on HDFS? # delete hbase table sha1_infos? ingestion_id = data['projects'][project_name]['ingestion_id'] from happybase.connection import Connection conn = Connection(config['image']['hbase_host']) table = conn.table(config['image']['hbase_table_updates']) table.delete(ingestion_id, columns=['info:lopq_codes_path', 'info:lopq_model_pkl']) # remove project: # - from current data dict del data['projects'][project_name] # - files associated with project shutil.rmtree(os.path.join(_get_project_dir_path(project_name))) # - from mongodb db_projects.delete_one({'project_name':project_name}) msg = 'project {} has been deleted'.format(project_name) logger.info(msg) # if it's the last project from a domain, shoud we remove the domain? # for now assume one project per domain and delete too # stop and remove docker container docker_name = data['domains'][domain_name]['docker_name'] subproc = sub.Popen("sudo docker stop {}; sudo docker rm {}".format(docker_name, docker_name), shell=True) # cleanup ports list data['ports'].remove(data['domains'][domain_name]['port']) # remove domain: # - from current data dict del data['domains'][domain_name] # - files associated with project shutil.rmtree(os.path.join(_get_domain_dir_path(domain_name))) # - from mongodb db_domains.delete_one({'domain_name':domain_name}) # should we also clean up things in HDFS?... msg2 = 'domain {} has been deleted'.format(domain_name) logger.info(msg2) # regenerate apache conf from scratch for domains that are still active. reset_apache_conf() return rest.deleted(msg+' '+msg2) except Exception as e: logger.error('deleting project %s: %s' % (project_name, e.message)) return rest.internal_error('deleting project %s error, halted.' % project_name) finally: project_lock.remove(project_name)
def process_event_query(self): """ This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' """ valid_input = self.validate_input() if not valid_input: err_json = dict() err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \ "If not sure, please access http://mydigurl/projects/<project_name>/fields " \ "API for reference" return rest.bad_request(err_json) try: resp = self.cquery.process() if resp[1] == 400: logger.warning( "Request generated 4xx response. Check request again") return resp else: resp = resp[0] if resp is not None and len( resp['aggregations'][self.field]['buckets']) > 0: if "." not in self.field and self.config[ self.field]['type'] == "date": ts, dims = DigOutputProcessor( resp['aggregations'][self.field], self.agg_field, True).process() ts_obj = TimeSeries( ts, {}, dims, percent_change=self.percent_change, impute_method=self.impute_method).to_dict() else: ts, dims = DigOutputProcessor( resp['aggregations'][self.field], self.agg_field, False).process() ts_obj = TimeSeries( ts, {}, dims, percent_change=self.percent_change, impute_method=self.impute_method).to_dict() return rest.ok(ts_obj) else: return rest.not_found("No Time series found for query") except Exception as e: logger.exception( "Exception encountered while performing Event query") return rest.internal_error("Internal Error occured")
def process_ts_query(self): ''' This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines joined by a '\n' ''' # valid_input = self.validate_input() # if not valid_input: # err_json = {} # err_json['message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference" # return rest.bad_request(err_json) resp = self.cquery.process()[0] try: if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][ 'hits'][0]['_source'].keys(): docid = resp['hits']['hits'][0]['_source']['doc_id'] argmap = {} argmap['measure/value'] = docid argmap['_group-by'] = 'event_date' argmap['_interval'] = self.agg #meta = resp['hits']['hits'][0]['_source']['measure']['metadata'] self.request.args = argmap newquery = ConjunctiveQueryProcessor(self.request, self.project_name, self.config, self.project_root_name, self.es) resp = newquery.process()[0] print resp ts, dims = DigOutputProcessor( resp['aggregations'][self.field]).process() dimensions = [] dimensions.append("DATE") dimensions.append(dims) ts_obj = TimeSeries(ts, dict(), dimensions).to_dict() return rest.ok(ts_obj) else: return rest.not_found("Time series not found") except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) lines = ''.join(lines) print lines return rest.bad_request("Enter valid query for measure document")
def get(self, domain_name): if domain_name not in data['domains']: return rest.not_found() return data['domains'][domain_name]
def get(self, project_name): if project_name not in data['projects']: return rest.not_found() check_project_indexing_finished(project_name) logger.info('Getting project %s, dict keys are %s' % (project_name, data['projects'][project_name].keys())) return data['projects'][project_name]