def process_ts_query(self):
        """
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        """
        valid_input = self.validate_input()
        if not valid_input:
            err_json = dict()
            err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \
                                  "If not sure, please access http://mydigurl/projects/<project_name>/fields API for " \
                                  "reference"
            return rest.bad_request(err_json)

        resp = self.cquery.process()
        if resp is None or resp[1] == 400:
            return resp
        try:
            resp = resp[0]
            if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][
                    'hits'][0]['_source'].keys():
                docid = resp['hits']['hits'][0]['_source']['doc_id']
                argmap = dict()
                argmap['measure/value'] = docid
                argmap['_group-by'] = 'event_date'
                argmap['_interval'] = self.agg
                self.myargs = argmap
                newquery = ConjunctiveQueryProcessor(self.project_name,
                                                     self.config,
                                                     self.project_root_name,
                                                     self.es,
                                                     myargs=self.myargs)
                resp = newquery.process()
                if resp[1] == 400:
                    return resp
                else:
                    resp = resp[0]
                logger.debug("Response for query is {}".format(resp))
                isDateAggregation = True if "." not in self.field and self.config[
                    self.field]['type'] == "date" else False
                ts, dims = DigOutputProcessor(resp['aggregations'][self.field],
                                              self.agg_field,
                                              isDateAggregation).process()
                ts_obj = TimeSeries(
                    ts,
                    dict(),
                    dims,
                    percent_change=self.percent_change,
                    impute_method=self.impute_method,
                    impute_values=self.impute_values).to_dict()
                return rest.ok(ts_obj)
            else:
                return rest.not_found("Time series not found")
        except Exception as e:
            logger.exception(
                "Exception encountered while performing time series query")
            return rest.bad_request("Enter valid query")
    def delete(self, project_name):
        if project_name not in data['projects']:
            return rest.not_found()
        try:
            project_lock.acquire(project_name)
            # - get corresponding domain
            domain_name = data['projects'][project_name]['domain']
            # - delete ingestion_id row from hbase updates table
            # should we delete corresponding files on HDFS?
            # delete hbase table sha1_infos?
            ingestion_id = data['projects'][project_name]['ingestion_id']
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            table.delete(ingestion_id, columns=['info:lopq_codes_path', 'info:lopq_model_pkl'])
            # remove project:
            # - from current data dict
            del data['projects'][project_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            # - from mongodb
            db_projects.delete_one({'project_name':project_name})
            msg = 'project {} has been deleted'.format(project_name)
            logger.info(msg)
            # if it's the last project from a domain, shoud we remove the domain?
            # for now assume one project per domain and delete too
            # stop and remove docker container
            docker_name = data['domains'][domain_name]['docker_name']
            subproc = sub.Popen("sudo docker stop {}; sudo docker rm {}".format(docker_name, docker_name), shell=True)
            # cleanup ports list
            data['ports'].remove(data['domains'][domain_name]['port'])
            # remove domain:
            # - from current data dict
            del data['domains'][domain_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_domain_dir_path(domain_name)))
            # - from mongodb
            db_domains.delete_one({'domain_name':domain_name})
            # should we also clean up things in HDFS?...
            msg2 = 'domain {} has been deleted'.format(domain_name)
            logger.info(msg2)
            # regenerate apache conf from scratch for domains that are still active.
            reset_apache_conf()

            return rest.deleted(msg+' '+msg2)
        except Exception as e:
            logger.error('deleting project %s: %s' % (project_name, e.message))
            return rest.internal_error('deleting project %s error, halted.' % project_name)
        finally:
            project_lock.remove(project_name)
    def delete(self, project_name):
        if project_name not in data['projects']:
            return rest.not_found()
        try:
            project_lock.acquire(project_name)
            # - get corresponding domain
            domain_name = data['projects'][project_name]['domain']
            # - delete ingestion_id row from hbase updates table
            # should we delete corresponding files on HDFS?
            # delete hbase table sha1_infos?
            ingestion_id = data['projects'][project_name]['ingestion_id']
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            table.delete(ingestion_id, columns=['info:lopq_codes_path', 'info:lopq_model_pkl'])
            # remove project:
            # - from current data dict
            del data['projects'][project_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            # - from mongodb
            db_projects.delete_one({'project_name':project_name})
            msg = 'project {} has been deleted'.format(project_name)
            logger.info(msg)
            # if it's the last project from a domain, shoud we remove the domain?
            # for now assume one project per domain and delete too
            # stop and remove docker container
            docker_name = data['domains'][domain_name]['docker_name']
            subproc = sub.Popen("sudo docker stop {}; sudo docker rm {}".format(docker_name, docker_name), shell=True)
            # cleanup ports list
            data['ports'].remove(data['domains'][domain_name]['port'])
            # remove domain:
            # - from current data dict
            del data['domains'][domain_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_domain_dir_path(domain_name)))
            # - from mongodb
            db_domains.delete_one({'domain_name':domain_name})
            # should we also clean up things in HDFS?...
            msg2 = 'domain {} has been deleted'.format(domain_name)
            logger.info(msg2)
            # regenerate apache conf from scratch for domains that are still active.
            reset_apache_conf()

            return rest.deleted(msg+' '+msg2)
        except Exception as e:
            logger.error('deleting project %s: %s' % (project_name, e.message))
            return rest.internal_error('deleting project %s error, halted.' % project_name)
        finally:
            project_lock.remove(project_name)
Beispiel #4
0
 def process_event_query(self):
     """
     This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
     set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
     joined by a '\n'
     """
     valid_input = self.validate_input()
     if not valid_input:
         err_json = dict()
         err_json['message'] = "Please enter valid query params. Fields must exist for the given project. " \
                               "If not sure, please access http://mydigurl/projects/<project_name>/fields " \
                               "API for reference"
         return rest.bad_request(err_json)
     try:
         resp = self.cquery.process()
         if resp[1] == 400:
             logger.warning(
                 "Request generated 4xx response. Check request again")
             return resp
         else:
             resp = resp[0]
         if resp is not None and len(
                 resp['aggregations'][self.field]['buckets']) > 0:
             if "." not in self.field and self.config[
                     self.field]['type'] == "date":
                 ts, dims = DigOutputProcessor(
                     resp['aggregations'][self.field], self.agg_field,
                     True).process()
                 ts_obj = TimeSeries(
                     ts, {},
                     dims,
                     percent_change=self.percent_change,
                     impute_method=self.impute_method).to_dict()
             else:
                 ts, dims = DigOutputProcessor(
                     resp['aggregations'][self.field], self.agg_field,
                     False).process()
                 ts_obj = TimeSeries(
                     ts, {},
                     dims,
                     percent_change=self.percent_change,
                     impute_method=self.impute_method).to_dict()
             return rest.ok(ts_obj)
         else:
             return rest.not_found("No Time series found for query")
     except Exception as e:
         logger.exception(
             "Exception encountered while performing Event query")
         return rest.internal_error("Internal Error occured")
    def process_ts_query(self):
        '''
        This is the main function in this class. This calls several functions to validate input, set match clauses, set filter clauses
        set sort clauses and finally resolve any nested documents if needed. Finally this function returns the data as a json or json_lines
        joined by a '\n'
        '''
        # valid_input = self.validate_input()
        # if not valid_input:
        #     err_json = {}
        #     err_json['message'] = "Please enter valid query params. Fields must exist for the given project. If not sure, please access http://mydigurl/projects/<project_name>/fields API for reference"
        #     return rest.bad_request(err_json)

        resp = self.cquery.process()[0]
        try:
            if len(resp['hits']['hits']) > 0 and 'doc_id' in resp['hits'][
                    'hits'][0]['_source'].keys():
                docid = resp['hits']['hits'][0]['_source']['doc_id']
                argmap = {}
                argmap['measure/value'] = docid
                argmap['_group-by'] = 'event_date'
                argmap['_interval'] = self.agg
                #meta = resp['hits']['hits'][0]['_source']['measure']['metadata']
                self.request.args = argmap
                newquery = ConjunctiveQueryProcessor(self.request,
                                                     self.project_name,
                                                     self.config,
                                                     self.project_root_name,
                                                     self.es)
                resp = newquery.process()[0]
                print resp
                ts, dims = DigOutputProcessor(
                    resp['aggregations'][self.field]).process()
                dimensions = []
                dimensions.append("DATE")
                dimensions.append(dims)
                ts_obj = TimeSeries(ts, dict(), dimensions).to_dict()
                return rest.ok(ts_obj)
            else:
                return rest.not_found("Time series not found")
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lines = traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)
            lines = ''.join(lines)
            print lines
            return rest.bad_request("Enter valid query for measure document")
 def get(self, domain_name):
     if domain_name not in data['domains']:
         return rest.not_found()
     return data['domains'][domain_name]
 def get(self, project_name):
     if project_name not in data['projects']:
         return rest.not_found()
     check_project_indexing_finished(project_name)
     logger.info('Getting project %s, dict keys are %s' % (project_name, data['projects'][project_name].keys()))
     return data['projects'][project_name]
 def get(self, domain_name):
     if domain_name not in data['domains']:
         return rest.not_found()
     return data['domains'][domain_name]
 def get(self, project_name):
     if project_name not in data['projects']:
         return rest.not_found()
     check_project_indexing_finished(project_name)
     logger.info('Getting project %s, dict keys are %s' % (project_name, data['projects'][project_name].keys()))
     return data['projects'][project_name]