Example #1
0
class ESLayers(object):
    """Implementation of Elastic Search as layers backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def _transform(self, layer, version, layer_name):
        """Add some meta data fields which are ES specific"""
        layer = dict(layer)  # copy
        label = layer['label']
        del layer['label']
        return {
            'id': '%s/%s/%s' % (version, layer_name, label),
            'version': version,
            'name': layer_name,
            'label': label,
            'layer': layer
        }

    def bulk_put(self, layers, version, layer_name, root_label):
        """Store all layer objects"""
        self.es.bulk_index(
            settings.ELASTIC_SEARCH_INDEX, 'layer',
            map(lambda l: self._transform(l, version, layer_name), layers))

    def get(self, name, label, version):
        """Find the layer that matches these parameters"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'layer',
                                 version + '/' + name + '/' + label)

            return result['_source']['layer']
        except ElasticHttpNotFoundError:
            return None
Example #2
0
def delete(config, tree_names, all, force):
    """Delete indices and their catalog entries.
    
    This deletes the indices that have the format version of the copy of DXR
    this runs under.
    
    """
    es = ElasticSearch(config.es_hosts)
    if all:
        echo('Deleting catalog...')
        es.delete_index(config.es_catalog_index)
        # TODO: Delete tree indices as well.
    else:
        for tree_name in tree_names:
            frozen_id = '%s/%s' % (FORMAT, tree_name)
            try:
                frozen = es.get(config.es_catalog_index, TREE, frozen_id)
            except ElasticHttpNotFoundError:
                raise ClickException('No tree "%s" in catalog.' % tree_name)
            # Delete the index first. That way, if that fails, we can still
            # try again; we won't have lost the catalog entry. Refresh is
            # infrequent enough that we wouldn't avoid a race around a
            # catalogued but deleted instance the other way around.
            try:
                es.delete_index(frozen['_source']['es_alias'])
            except ElasticHttpNotFoundError:
                # It's already gone. Fine. Just remove the catalog entry.
                pass
            es.delete(config.es_catalog_index, TREE, frozen_id)
Example #3
0
class ESDiffs(object):
    """Implementation of Elastic Search as diff backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    @staticmethod
    def to_id(label, old, new):
        return "%s/%s/%s" % (label, old, new)

    def put(self, label, old_version, new_version, diff):
        """Store a diff between two versions of a regulation node"""
        struct = {
            'label': label,
            'old_version': old_version,
            'new_version': new_version,
            'diff': diff
        }
        self.es.index(settings.ELASTIC_SEARCH_INDEX, 'diff', struct,
                      id=self.to_id(label, old_version, new_version))

    def get(self, label, old_version, new_version):
        """Find the associated diff"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'diff',
                                 self.to_id(label, old_version, new_version))
            return result['_source']['diff']
        except ElasticHttpNotFoundError:
            return None
Example #4
0
class ESLayers(object):
    """Implementation of Elastic Search as layers backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def _transform(self, layer, version, layer_name):
        """Add some meta data fields which are ES specific"""
        layer = dict(layer)     # copy
        label = layer['label']
        del layer['label']
        return {
            'id': '%s/%s/%s' % (version, layer_name, label),
            'version': version,
            'name': layer_name,
            'label': label,
            'layer': layer
        }

    def bulk_put(self, layers, version, layer_name, root_label):
        """Store all layer objects"""
        self.es.bulk_index(
            settings.ELASTIC_SEARCH_INDEX, 'layer',
            map(lambda l: self._transform(l, version, layer_name),
                layers))

    def get(self, name, label, version):
        """Find the layer that matches these parameters"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'layer',
                                 version + '/' + name + '/' + label)

            return result['_source']['layer']
        except ElasticHttpNotFoundError:
            return None
Example #5
0
class ESNotices(object):
    """Implementation of Elastic Search as notice backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def put(self, doc_number, notice):
        """Store a single notice"""
        self.es.index(settings.ELASTIC_SEARCH_INDEX, 'notice', notice,
                      id=doc_number)

    def get(self, doc_number):
        """Find the associated notice"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'notice',
                                 doc_number)

            return result['_source']
        except ElasticHttpNotFoundError:
            return None

    def listing(self, part=None):
        """All notices or filtered by cfr_part"""
        if part:
            query = {'match': {'cfr_part': part}}
        else:
            query = {'match_all': {}}
        query = {'fields': ['effective_on', 'fr_url', 'publication_date'],
                 'query': query}
        notices = []
        results = self.es.search(query, doc_type='notice', size=100,
                                 index=settings.ELASTIC_SEARCH_INDEX)
        for notice in results['hits']['hits']:
            notice['fields']['document_number'] = notice['_id']
            notices.append(notice['fields'])
        return notices
Example #6
0
class ESDiffs(object):
    """Implementation of Elastic Search as diff backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    @staticmethod
    def to_id(label, old, new):
        return "%s/%s/%s" % (label, old, new)

    def put(self, label, old_version, new_version, diff):
        """Store a diff between two versions of a regulation node"""
        struct = {
            'label': label,
            'old_version': old_version,
            'new_version': new_version,
            'diff': diff
        }
        self.es.index(settings.ELASTIC_SEARCH_INDEX,
                      'diff',
                      struct,
                      id=self.to_id(label, old_version, new_version))

    def get(self, label, old_version, new_version):
        """Find the associated diff"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'diff',
                                 self.to_id(label, old_version, new_version))
            return result['_source']['diff']
        except ElasticHttpNotFoundError:
            return None
Example #7
0
def export_serie(id, output):

    result = []
    es = ElasticSearch(CONTEXT["datahub-store"])

    series = es.get(CONTEXT["datahub-index"], "_all", id)["_source"]["data"]["series"][0]["data"]

    for key, value in series:
        date = datetime.datetime.strptime(key, "%Y-%m-%d")
        output.write("%s/%s/%s,%s\n" % (date.month, date.day, date.year, value))
Example #8
0
class ESBase(object):
    """Shared code for Elastic Search storage models"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def safe_fetch(self, doc_type, id):
        """Attempt to retrieve a document from Elastic Search.
        :return: Found document, if it exists, otherwise None"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, doc_type, id)
            return result['_source']
        except ElasticHttpNotFoundError:
            return None
Example #9
0
class ESBase(object):
    """Shared code for Elastic Search storage models"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def safe_fetch(self, doc_type, id):
        """Attempt to retrieve a document from Elastic Search.
        :return: Found document, if it exists, otherwise None"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, doc_type, id)
            return result['_source']
        except ElasticHttpNotFoundError:
            return None
Example #10
0
class ESRegulations(object):
    """Implementation of Elastic Search as regulations backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def get(self, label, version):
        """Find the regulation label + version"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'reg_tree',
                                 version + '/' + label)

            reg_node = result['_source']
            del reg_node['regulation']
            del reg_node['version']
            del reg_node['label_string']
            del reg_node['id']
            return reg_node
        except ElasticHttpNotFoundError:
            return None

    def _transform(self, reg, version):
        """Add some meta data fields which are ES specific"""
        node = dict(reg)  # copy
        node['version'] = version
        node['label_string'] = '-'.join(node['label'])
        node['regulation'] = node['label'][0]
        node['id'] = version + '/' + node['label_string']
        node['root'] = len(node['label']) == 1
        return node

    def bulk_put(self, regs, version, root_label):
        """Store all reg objects"""
        self.es.bulk_index(settings.ELASTIC_SEARCH_INDEX, 'reg_tree',
                           map(lambda r: self._transform(r, version), regs))

    def listing(self, label=None):
        """List regulation version-label pairs that match this label (or are
        root, if label is None)"""
        if label is None:
            query = {'match': {'root': True}}
        else:
            query = {'match': {'label_string': label}}
        query = {'fields': ['label_string', 'version'], 'query': query}
        result = self.es.search(query,
                                index=settings.ELASTIC_SEARCH_INDEX,
                                doc_type='reg_tree',
                                size=100)
        return sorted((res['fields']['version'], res['fields']['label_string'])
                      for res in result['hits']['hits'])
Example #11
0
class GetLastUpdate():

    def __init__(self):
        self.es = ElasticSearch(config.DATABASE_URL)

    def get(self, query):
        query = query.split(',')
        last_update = 0
        for q in query:
            l = self.es.get(q, 'seq', 'last_seq')['_source']
            if last_update == 0 or l['time'] < last_update:
                last_update = l['time']
        lu = {}
        lu['last_update'] = last_update
        return json.dumps({"results": lu})
Example #12
0
class ESRegulations(object):
    """Implementation of Elastic Search as regulations backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def get(self, label, version):
        """Find the regulation label + version"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'reg_tree',
                                 version + '/' + label)

            reg_node = result['_source']
            del reg_node['regulation']
            del reg_node['version']
            del reg_node['label_string']
            del reg_node['id']
            return reg_node
        except ElasticHttpNotFoundError:
            return None

    def _transform(self, reg, version):
        """Add some meta data fields which are ES specific"""
        node = dict(reg)    # copy
        node['version'] = version
        node['label_string'] = '-'.join(node['label'])
        node['regulation'] = node['label'][0]
        node['id'] = version + '/' + node['label_string']
        node['root'] = len(node['label']) == 1
        return node

    def bulk_put(self, regs, version, root_label):
        """Store all reg objects"""
        self.es.bulk_index(settings.ELASTIC_SEARCH_INDEX, 'reg_tree',
                           map(lambda r: self._transform(r, version), regs))

    def listing(self, label=None):
        """List regulation version-label pairs that match this label (or are
        root, if label is None)"""
        if label is None:
            query = {'match': {'root': True}}
        else:
            query = {'match': {'label_string': label}}
        query = {'fields': ['label_string', 'version'], 'query': query}
        result = self.es.search(query, index=settings.ELASTIC_SEARCH_INDEX,
                                doc_type='reg_tree', size=100)
        return sorted((res['fields']['version'], res['fields']['label_string'])
                      for res in result['hits']['hits'])
Example #13
0
class ESNotices(object):
    """Implementation of Elastic Search as notice backend"""
    def __init__(self):
        self.es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)

    def put(self, doc_number, notice):
        """Store a single notice"""
        self.es.index(settings.ELASTIC_SEARCH_INDEX,
                      'notice',
                      notice,
                      id=doc_number)

    def get(self, doc_number):
        """Find the associated notice"""
        try:
            result = self.es.get(settings.ELASTIC_SEARCH_INDEX, 'notice',
                                 doc_number)

            return result['_source']
        except ElasticHttpNotFoundError:
            return None

    def listing(self, part=None):
        """All notices or filtered by cfr_part"""
        if part:
            query = {'match': {'cfr_parts': part}}
        else:
            query = {'match_all': {}}
        query = {
            'fields': ['effective_on', 'fr_url', 'publication_date'],
            'query': query
        }
        notices = []
        results = self.es.search(query,
                                 doc_type='notice',
                                 size=100,
                                 index=settings.ELASTIC_SEARCH_INDEX)
        for notice in results['hits']['hits']:
            notice['fields']['document_number'] = notice['_id']
            notices.append(notice['fields'])
        return notices
Example #14
0
class ElasticConnector(Connector):
    """
    Class for connectors that are operate with elasticsearch database
  """
    MAX_SIZE = 1000

    def __init__(self, database, host='http://localhost:9200/'):
        self.client = ElasticSearch(host)
        self.index = database
        self.create_index()

    def query_to_id(self, query):
        """
      Returns id representation of a specified query
      This is a temporary method as a replacement of elasticsearch query search
    """
        return "_".join(str(k) + "_" + str(v)
                        for k, v in query.items()).replace("/", "_")

    def create_index(self):
        """
      Creates specified index or catches an exception if it has already been created
    """
        try:
            self.client.create_index(self.index)
        except Exception as e:
            pass

    def set_dynamic_mapping(self, collection):
        """
      Sets dynamic mapping for a specified document type
    """
        self.client.put_mapping(self.index, collection, {'dynamic': True})

    def save_block(self, block):
        """
      Saves operation info in a database
    """
        super().save_block(block)
        collection = block.get_collection()
        dictionary = block.to_dict()
        query = block.get_query()
        self.update_by_query(collection, query, block)

    def update_by_query(self, collection, query, document):
        """
      Sets dynamic mapping for a specified collection,
      then creates a new id for a document depending on query for it.
      Saves a new object in a database as a new one
    """
        try:
            self.set_dynamic_mapping(collection)
            document_id = document.get_id()
            document_body = document.to_dict()
            if "_id" in document_body.keys():
                del document_body['_id']
            self.client.index(self.index,
                              collection,
                              document_body,
                              id=self.query_to_id(query))
        except Exception as e:
            print(e)
            pass

    def find_last_block(self):
        """
      Finds last block index as a value field of a document 
      in a status collection with specified id
    """
        try:
            document = self.client.get(self.index, 'status',
                                       'height_all_tsx')['_source']
            return document['value']
        except ElasticHttpNotFoundError as e:
            return 0

    def update_last_block(self, last_block):
        """
      Updates last block index as a value field of a document 
      in a status collection with specified id
    """
        self.client.index(self.index,
                          'status', {'value': last_block},
                          id='height_all_tsx')

    def save_instance(self, instance):
        """
      Saves account or comment object
    """
        self.update_by_query(instance.get_collection(), instance.get_query(),
                             instance)

    def get_instances_to_update(self, collection):
        """
      Finds and returns all dictionaries with objects that should be updated
    """
        hits = self.client.search("need_update:true",
                                  index=self.index,
                                  doc_type=collection,
                                  size=self.MAX_SIZE)['hits']['hits']
        return [{**hit['_source'], **{"_id": hit["_id"]}} for hit in hits]

    def update_instances(self, collection, instances):
        """
      Resets need_update flag for all instances in a list by their ids in _id field
    """
        for instance in instances:
            self.client.update(self.index,
                               collection,
                               instance["_id"],
                               doc={'need_update': False})
Example #15
0
    'age': 31,
    'title': 'Programmer'
}, {
    'id': 3,
    'name': 'Freddy Coder抽',
    'age': 29,
    'title': 'Office Assistant'
}]

es.bulk((es.index_op(doc, id=doc.pop('id')) for doc in docs),
        index='test',
        doc_type='test')

es.refresh('test')

res1 = es.get('test', 'test', 1)

# 全文匹配, 注意中英文的分词方式.
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html

res8 = es.search(index='test',
                 size=2,
                 query={"query": {
                     "query_string": {
                         "query": "抽"
                     }
                 }})

# 前缀匹配查询,只接受小写.
res12 = es.search(index='test', query={"query": {"prefix": {"title": "p"}}})
Example #16
0
class Elastic(DataLayer):
    """ElasticSearch data layer."""

    serializers = {
        'integer': int,
        'datetime': parse_date
    }

    def init_app(self, app):
        app.config.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200/')
        app.config.setdefault('ELASTICSEARCH_INDEX', 'eve')
        self.es = ElasticSearch(app.config['ELASTICSEARCH_URL'])
        self.index = app.config['ELASTICSEARCH_INDEX']

    def _get_field_mapping(self, schema):
        """Get mapping for given field schema."""
        if schema['type'] == 'datetime':
            return {'type': 'date'}
        elif schema['type'] == 'string' and schema.get('unique'):
            return {'type': 'string', 'index': 'not_analyzed'}
        elif schema['type'] == 'string':
            return {'type': 'string'}

    def put_mapping(self, app):
        """Put mapping for elasticsearch for current schema.

        It's not called automatically now, but rather left for user to call it whenever it makes sense.
        """
        for resource, resource_config in app.config['DOMAIN'].items():
            properties = {}
            properties[config.DATE_CREATED] = self._get_field_mapping({'type': 'datetime'})
            properties[config.LAST_UPDATED] = self._get_field_mapping({'type': 'datetime'})

            for field, schema in resource_config['schema'].items():
                field_mapping = self._get_field_mapping(schema)
                if field_mapping:
                    properties[field] = field_mapping

            datasource = (resource, )  # TODO: config.SOURCES not available yet (self._datasource_ex(resource))
            mapping = {}
            mapping[datasource[0]] = {'properties': properties}
            self.es.put_mapping(self.index, datasource[0], mapping)

    def find(self, resource, req, sub_resource_lookup):
        """
        TODO: implement sub_resource_lookup
        """
        query = {
            'query': {
                'query_string': {
                    'query': request.args.get('q', '*'),
                    'default_field': request.args.get('df', '_all'),
                    'default_operator': 'AND'
                }
            }
        }

        if not req.sort and self._default_sort(resource):
            req.sort = self._default_sort(resource)

        # skip sorting when there is a query to use score
        if req.sort and 'q' not in request.args:
            query['sort'] = []
            sort = ast.literal_eval(req.sort)
            for (key, sortdir) in sort:
                sort_dict = dict([(key, 'asc' if sortdir > 0 else 'desc')])
                query['sort'].append(sort_dict)

        if req.where:
            where = json.loads(req.where)
            if where:
                query['filter'] = {
                    'term': where
                }

        if req.max_results:
            query['size'] = req.max_results

        if req.page > 1:
            query['from'] = (req.page - 1) * req.max_results

        source_config = config.SOURCES[resource]
        if 'facets' in source_config:
            query['facets'] = source_config['facets']

        try:
            args = self._es_args(resource)
            args['es_fiels'] = self._fields(resource)
            return self._parse_hits(self.es.search(query, **args), resource)
        except es_exceptions.ElasticHttpError:
            return ElasticCursor()

    def find_one(self, resource, **lookup):
        args = self._es_args(resource)
        args['es_fields'] = self._fields(resource)

        if config.ID_FIELD in lookup:
            try:
                hit = self.es.get(id=lookup[config.ID_FIELD], **args)
            except es_exceptions.ElasticHttpNotFoundError:
                return

            if not hit['exists']:
                return

            doc = hit.get('fields', hit.get('_source', {}))
            doc['_id'] = hit.get('_id')
            convert_dates(doc, self._dates(resource))
            return doc
        else:
            query = {
                'query': {
                    'constant_score': {
                        'filter': {
                            'term': lookup
                        }
                    }
                }
            }

            try:
                args['size'] = 1
                docs = self._parse_hits(self.es.search(query, **args), resource)
                return docs.first()
            except es_exceptions.ElasticHttpNotFoundError:
                return None

    def find_list_of_ids(self, resource, ids, client_projection=None):
        args = self._es_args(resource)
        args['es_fields'] = self._fields(resource)
        return self._parse_hits(self.es.multi_get(ids, **args), resource)

    def insert(self, resource, doc_or_docs, **kwargs):
        ids = []
        kwargs.update(self._es_args(resource))
        for doc in doc_or_docs:
            doc.update(self.es.index(doc=doc, id=doc.get('_id'), **kwargs))
            ids.append(doc['_id'])
        self.es.refresh(self.index)
        return ids

    def update(self, resource, id_, updates):
        args = self._es_args(resource, refresh=True)
        return self.es.update(id=id_, doc=updates, **args)

    def replace(self, resource, id_, document):
        args = self._es_args(resource, refresh=True)
        args['overwrite_existing'] = True
        return self.es.index(document=document, id=id_, **args)

    def remove(self, resource, id_=None):
        args = self._es_args(resource, refresh=True)
        if id_:
            return self.es.delete(id=id_, **args)
        else:
            try:
                return self.es.delete_all(**args)
            except es_exceptions.ElasticHttpNotFoundError:
                return

    def _parse_hits(self, hits, resource):
        """Parse hits response into documents."""
        return ElasticCursor(hits, self._dates(resource))

    def _es_args(self, resource, refresh=None):
        """Get index and doctype args."""
        datasource = self._datasource(resource)
        args = {
            'index': self.index,
            'doc_type': datasource[0],
            }
        if refresh:
            args['refresh'] = refresh
        return args

    def _fields(self, resource):
        """Get projection fields for given resource."""
        datasource = self._datasource(resource)
        keys = datasource[2].keys()
        return ','.join(keys)

    def _default_sort(self, resource):
        datasource = self._datasource(resource)
        return datasource[3]

    def _dates(self, resource):
        dates = [config.LAST_UPDATED, config.DATE_CREATED]
        datasource = self._datasource(resource)
        schema = config.DOMAIN[datasource[0]]['schema']
        for field, field_schema in schema.items():
            if field_schema['type'] == 'datetime':
                dates.append(field)
        return dates
Example #17
0
class GetGrowing():

    def __init__(self):
        self.count_fake = 0
        self.es = ElasticSearch(config.DATABASE_URL)
        self.overflow = 1000000

    def fake_suffix(self):
        self.count_fake += 1
        return 'X'*(5-min(len(str(self.count_fake)), 4))+str(self.count_fake)

    def create_fake_request(self, original_request, member_of_campaign,
                            status='upcoming', total=None):
        fake_request = {}
        fake_request['status'] = status
        fake_request['member_of_campaign'] = member_of_campaign

        for f in ['pwg', 'priority', 'total_events', 'time_event']:
            fake_request[f] = original_request[f]

        if total is not None:
            fake_request['total_events'] = total
        else:
            fake_request['total_events'] = 0
            
        fake_request['prepid'] = '-'.join([original_request['pwg'],
                                           member_of_campaign,
                                           self.fake_suffix()])
        return fake_request

    def get(self, campaign):
        arg_list = campaign.split(',')
        # get all chained campaigns which contain selected CAMPAIGN
        # reduction to only cc
        while True:
            again = False
            for arg in arg_list:
                if not arg.startswith('chain'):
                    # this is a flow, or a campaign: not matter for the query
                    ccs = [s['_source'] for s in
                           self.es.search(('campaigns:%s' % arg),
                                          index='chained_campaigns',
                                          size=self.overflow)['hits']['hits']]
                    arg_list.extend(map(lambda cc: cc['prepid'], ccs))
                    # arg is going to be duplicated
                    arg_list.remove(arg)
                    again = True
                    break
            if not again:
                break
        # arg_list contains only chained campaigns
        steps = []  # what are the successive campaigns
        all_cr = []  # what are the chained requests to look at
        all_cc = {}
        # unique it
        arg_list = list(set(arg_list))
        # collect all cr
        for a_cc in arg_list:
            try:
                mcm_cc = self.es.get('chained_campaigns',
                                     'chained_campaign', a_cc)['_source']
            except Exception:
                # try to see if that's a flow
                # TODO: patch for this exception
                return '%s does not exists' % (a_cc)

            all_cc[a_cc] = mcm_cc  # keep it in mind
            all_cr.extend([s['_source'] for s in
                           self.es.search(('member_of_campaign:%s' % a_cc),
                                          index='chained_requests',
                                          size=self.overflow)['hits']['hits']])
            these_steps = map(lambda s: s[0], mcm_cc['campaigns'])
            if len(steps) == 0:
                steps = these_steps
            else:
                # concatenate to existing steps
                # add possible steps at the beginning
                connection = 0
                while not steps[connection] in these_steps:
                    connection += 1
                new_start = these_steps.index(steps[connection])
                if new_start != 0:
                    # they do not start at the same campaign
                    for where in range(new_start):
                        steps.insert(where, these_steps[where])
                # verify strict overlapping
                # ==> does not function properly and limits the flexibility
                for check in range(new_start, len(these_steps)):
                    if these_steps[check] not in steps:
                        steps.append(these_steps[check])

        # preload all requests !!!
        all_requests = {}
        for step in steps:
            for r in [s['_source'] for s in
                      self.es.search(('member_of_campaign:%s' % step),
                                     index='requests',
                                     size=self.overflow)['hits']['hits']]:
                all_requests[r['prepid']] = r
        req_copy = dict(all_requests)
        # avoid double counting
        already_counted = set()
        # the list of requests to be emitted to d3js
        list_of_request_for_ramunas = []
        for cr in all_cr:
            upcoming = 0
            if len(cr['chain']) == 0:
                # crap data
                continue
            stop_at = cr['step']
            stop_at = len(cr['chain'])-1
            for (r_i, r) in enumerate(cr['chain']):
                if r_i > stop_at:
                    # this is a reserved request, will count as upcoming later
                    continue
                mcm_r = all_requests[r]
                try:
                    del req_copy[r]
                except KeyError:
                    pass
                upcoming = int(mcm_r['total_events']*abs(mcm_r['efficiency']))

                if r in already_counted:
                    continue
                else:
                    already_counted.add(r)

                # add it to emit
                def pop(mcm_r):
                    for member in mcm_r.keys():
                        if member not in ['prepid', 'pwg', 'efficiency',
                                          'total_events', 'status', 'priority',
                                          'member_of_campaign', 'time_event']:
                            mcm_r.pop(member)
                    return mcm_r

                if mcm_r['status'] == 'done':
                    if (not len(mcm_r['output_dataset'])
                        or mcm_r['total_events'] == -1):
                        mcm_r['total_events'] = 0
                    else:
                        mcm_r['total_events'] = mcm_r['completed_events']
                if mcm_r['status'] == 'submitted':
                    try:
                        if not len(mcm_r['reqmgr_name']):
                            mcm_r['total_events'] = 0
                    except KeyError:
                        pass

                if mcm_r['status'] == 'submitted':
                    mcm_r_fake_done = copy.deepcopy(mcm_r)
                    mcm_r_fake_done['status'] = 'done'
                    mcm_r_fake_done['total_events'] = mcm_r['completed_events']
                    mcm_r_fake_subm = copy.deepcopy(mcm_r)
                    mcm_r_fake_subm['total_events'] = max(
                        [0, mcm_r['total_events'] - mcm_r['completed_events']])
                    list_of_request_for_ramunas.append(pop(mcm_r_fake_subm))
                    list_of_request_for_ramunas.append(pop(mcm_r_fake_done))
                else:
                    if mcm_r['total_events'] == -1:
                        mcm_r['total_events'] = 0
                    list_of_request_for_ramunas.append(pop(mcm_r))
            for noyet in all_cc[cr[
                    'member_of_campaign']]['campaigns'][stop_at+1:]:
                # create a fake request with the proper member of campaign
                processing_r = all_requests[cr['chain'][stop_at]]
                fake_one = self.create_fake_request(processing_r, noyet[0],
                                                    total=upcoming)
                list_of_request_for_ramunas.append(fake_one)
        # add req that does not belong to chain (from org campaign)
        for r in req_copy:
            r = req_copy[r]
            if r['member_of_campaign'] == campaign:
                if r['status'] == 'done':
                    if (not len(r['output_dataset'])
                        or r['total_events'] == -1):
                        r['total_events'] = 0
                    else:
                        r['total_events'] = r['completed_events']
                if r['total_events'] == -1:
                    r['total_events'] = 0
                list_of_request_for_ramunas.append(r)
        return json.dumps({"results": list_of_request_for_ramunas})
Example #18
0
        'UBERON:0001911': 'mammary gland',
        'UBERON:0001630': 'muscle organ',
        'UBERON:0000007': 'pituitary gland',
        'UBERON:0002370': 'thymus',
        'UBERON:0000478': 'extraembryonic structure'
    }

s = connection.search(query, index=index, size=20000)

results = s['hits']['hits']
terms = []

# Loops compares the terms from different indexes
for result in results:
    try:
        s1 = connection.get(index_compared, 'basic', result['_id'])
        for k in result['_source'].viewkeys() & s1['_source'].viewkeys():
            # Checking for differences between documents between different indexes
            if result['_source'][k] != s1['_source'][k]:
                if k == 'organs' or k == 'systems':
                    # I am worried only about CL terms.
                    if 'CL' in result['_id']:
                        # I don't want any duplicates
                        if result['_source'] not in terms:
                            terms.append(result['_source'])
    except:
        print result['_id']

# Initializes the graph structure and appends edges for each term
G = nx.DiGraph()
for term in results:
Example #19
0
class Datahub():
    
    context = None
    _es = None
    index_name = None
    user_id = None
    
    def __init__(self,context,index=None,user_id=None):
        self.context = context
        self._es = ElasticSearch(self.context['datahub-store'])
        self.index_name = index if index is not None else self.context['datahub-index']
        self.user_id = user_id
        
    def _get_user_id(self):
        if self.user_id is None:
            return "public"
        else:
            return "user_%s" % (self.user_id)  
        
    def query(self,name,type_name="_all",es_from=0):
        if self.user_id is None:
            q = '%s AND owner:public' % name
        else:
            q = '%s AND (owner:public OR owner:"%s")' % (name,self._get_user_id())
        results= self._es.search(q,index=self.index_name,doc_type=type_name,es_from=es_from)
        
        if results['hits']['total'] > 0:
    
            return (map(lambda s : (s['_id'],s['_source']),results['hits']['hits']),results['hits']['total'],results['took'])
        else:
            return None,results['hits']['total'],results['took']
        
    def get(self,key,type_name="_all"):
        result={}
        try:
            result = self._es.get(self.index_name,type_name,id=key)
            if result['_source']['owner'] == 'public' or result['_source']['owner'] == self._get_user_id():
                return result['_source']
            else:
                return None
        
        except ElasticHttpNotFoundError:
            return None
       
       
    def index(self,name,display_type,data,category="private",source="private",zone="private",description=""):
        
        serie = {
         "name" : name,
         "owner" : self._get_user_id(),
         "display": display_type,
         "zone": zone,
         "category" : category,
         "source" : source,
         'description' : description,
         "data" : data
         }
        
        id = self._es.index(self.index_name, display_type, serie)

        return id['_id']
    
    def get_user_series(self):
        series = []
        q = 'owner:%s' % (self._get_user_id())
        results = self._es.search(q,index=self.index_name,doc_type='_all',es_from=0,size=999)
        
        series = map(lambda serie : '%s;%s;%s' % (serie['_id'],serie['_source']['name'],cjson.encode(serie['_source']['data']['series'][0]['data'])),results['hits']['hits'])
        
        return series
        
        
class ElasticSearchDataStore(datastore.DataStore):
    """Implements the API.""" 
    def __init__(self, index_list):
        # Connect to the Elasticsearch server.
        self.client = ElasticSearch('http://%s:%s/' % (ELASTICSEARCH_SERVER_IP,
                                                       ELASTICSEARCH_PORT))
        # TODO Refactor this to not need the index list at this stage.
        self.index_list = index_list

    def search(self, sketch, query, filters):
        """Search ElasticSearch. This will take a query string from the UI
        together with a filter definition. Based on this it will send the
        search request to elasticsearch and get result back.

        Args:
            sketch -- string, sketch ID
            query -- string, query string
            filters -- dict, Dictionary containing filters to apply 

        Returns:
            Set of event documents in JSON format
        """

        if filters.get("time_start", None):
            query = {
                "query": {
                    "query_string": {
                        "query": query
                    }
                },
                "filter": {
                    "range": {
                        "datetime": {
                            "gte": filters['time_start'],
                            "lte": filters['time_end']
                        }
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }
        elif filters.get("star", None):
            query = {
                "query": {
                    "match_all": {}
                },
                "filter": {
                    "nested": {
                        "path": "timesketch_label", "filter": {
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "timesketch_label.name": "__ts_star"
                                    }
                                },
                                {
                                    "term": {
                                        "timesketch_label.sketch": str(sketch)
                                    }
                                }
                            ]
                        }
                        }
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }
        else:
            query = {
                "query": {
                    "query_string": {
                        "query": query
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }

        return self.client.search(query, index=self.index_list,
                                  doc_type="plaso_event", size=500)


    def get_single_event(self, event_id):
        """Get singel event document form elasticsearch

        Args:
            event_id -- string, event ID

        Returns:
            Event document as JSON
        """
        return self.client.get(index=self.index_list[0],
            doc_type="plaso_event",id=event_id)

    def add_label_to_event(self, event, sketch, user, label, toggle=False):
        """Add label to a event document in ElasticSearch.

        Args:
            event -- string, event ID
            sketch -- string, sketch ID
            user -- string, user ID
            label -- string, the label to apply
            toggle -- Bool, Toggle label or create a new one

        Returns:
            HTTP status code

        In order for this to work, we need to add a mapping for this nested
        document. This needs to be done when the index is forst created.
        mapping = {
            "plaso_event": {
                "properties": {
                    "timesketch_label": {
                        "type": "nested"
                    }
                }
            }
        }
        """

        doc = self.client.get(self.index_list, "plaso_event", event)
        try:
            doc['_source']['timesketch_label']
        except KeyError:
            doc = {"timesketch_label": []}
            self.client.update(self.index_list, "plaso_event", event, doc=doc)

        if toggle:
            script_string = "if(ctx._source.timesketch_label.contains"\
                            "(timesketch_label)) {ctx._source.timesketch_label"\
                            ".remove(timesketch_label)} else {ctx._source."\
                            "timesketch_label += timesketch_label}"
        else:
            script_string = "if( ! ctx._source.timesketch_label.contains"\
                            "(timesketch_label)) {ctx._source.timesketch_label"\
                            "+= timesketch_label}"
        script = {
            "script": script_string,
            "params": {
                "timesketch_label": {
                    "name": label, "user": user, "sketch": sketch
                }
            }
        }
        self.client.update(self.index_list, "plaso_event", event, script)
Example #21
0
#!/usr/bin/env python
#coding:utf-8

from pyelasticsearch import ElasticSearch
from pyelasticsearch import bulk_chunks

es = ElasticSearch('http://localhost:9200/')

def create_index(es):
    # _index, _type, _source, _id
    es.index('contacts','person',{'name': 'Joe Tester', 'age': 25, 'title': 'QA Master'},id=1)

    docs = [{'id': 2, 'name': 'Jessica Coder', 'age': 32, 'title': 'Programmer'},\
            {'id': 3, 'name': 'Freddy Tester', 'age': 29, 'title': 'Office Assistant'}]

    es.bulk((es.index_op(doc, id=doc.pop('id')) for doc in docs),\
            index='contacts',\
            doc_type='person')

    es.refresh('contacts')

def delete_index(es):
    es.delete_index('contacts')

#create_index(es)
print es.get('contacts', 'person', 2)
print es.search('name:joe OR name:freddy', index='contacts')
Example #22
0
from pyelasticsearch import ElasticSearch
from pprint import pprint

es = ElasticSearch('http://localhost:9200/')

pprint(es.get('pin','bm', 1))

pprint(es.search('tags:blog', size=2, index='pin'))
pprint(es.search('tags:blog AND genomics', index='pin'))

Example #23
0
class GetHistorical():
    '''
    Used to return list of point for historical plots
    '''

    def __init__(self):
        self.es = ElasticSearch(config.DATABASE_URL)
        # normally es will crop results to 20
        # and a million rows is more than we have in db
        self.overflow = 1000000

    def select_dataset(self, ds1, ds2):
        '''
        This selection is from statsMonitoring.py
        '''
        t1=ds1.split('/')[1:]
        t2=ds2.split('/')[1:]
        if len(t1[1]) > len(t2[1]):
            return 1
        else:
            def tierP(t):
                tierPriority=[
                    '/RECO',
                    'SIM-RECO',
                    'DIGI-RECO',
                    'AOD',
                    'SIM-RAW-RECO',
                    'DQM' ,
                    'GEN-SIM',
                    'RAW-RECO',
                    'USER',
                    'ALCARECO']
                for (p, tier) in enumerate(tierPriority):
                    if tier in t:
                        return p
                return t
            p1 = tierP(t1[2])
            p2 = tierP(t2[2])
            decision = (p1 > p2)
            if t1[2] == 'AODSIM' and t2[2] == 'MINIAODSIM':
                decision = True
            return decision * 2 - 1
            
    def db_query(self, input):
        '''
        Query DB and return array of raw documents
        '''

        iterable = []

        # try to query for campaign and get list of requests
        req_arr = [s['_source'] for s in
                   self.es.search(('member_of_campaign:%s' % input),
                                  index='requests',
                                  size=self.overflow)['hits']['hits']]

        # if empty, assume input is a request
        if not len(req_arr):
            try:
                req_arr = [self.es.get('requests',
                                       'request', input)['_source']]
            except:
                # if exception thrown this may be a workglow
                iterable = [input]

        # iterate over array and collect details
        for req in req_arr:
            try:
                dataset_list = req['output_dataset']
                if len(dataset_list):
                    dataset_list.sort(cmp=self.select_dataset)
                    ds = dataset_list[0]
                else:
                    ds = None

                for reqmgr in req['reqmgr_name']:
                    i = {}
                    i['expected'] = req['total_events']
                    i['name'] = reqmgr
                    i['output_dataset'] = ds
                    i['priority'] = req['priority']
                    i['pwg'] = req['pwg']
                    i['request'] = True
                    i['status'] = req['status']
                    iterable.append(i)
            except:
                pass

        # iterate over workflows and yield documents 
        for i in iterable:
            if 'request' in i:
                try:
                    yield [i['request'], self.es.get(
                            'stats', 'stats', i['name'])['_source'], i]
                except:
                    yield [True, None, i]
            else:
                try:
                    yield [False, self.es.get(
                            'stats', 'stats', i)['_source'], None]
                except:
                    yield [False, None, None]


    def rm_useless(self, arr):
        '''
        Compressing data: remove first probe of resubmissions and points that
        are equal to previous measurement
        '''
        r = []
        prev = {'e': -1, 'x': -1}
        for (x, a) in enumerate(arr):
            if ((a['e'] != prev['e'] or a['x'] != prev['x'])
                and (a['e'] != 0 or x == 0)):
                r.append(a)
                prev = a
        return r

    def prepare_response(self, query, probe, p_min, p_max, status_i, pwg_i):
        stop = False
        r = []
        status = {}
        pwg = {}

        for q in query:
            
            # Process the db documents
            for (is_request, document, details) in self.db_query(q):

                # skip empty documents
                if document is None:
                    continue

                # filter out requests
                if is_request:

                    def get_filter_dict(doc, arr, inp):
                        if doc not in arr:
                            arr[doc] = False
                            if inp is None:
                                arr[doc] = True
                            else:
                                for i in inp:
                                    if i == doc:
                                        arr[doc] = True
                                        break
                        return arr

                    # generate stauts dict
                    status = get_filter_dict(details['status'], status,
                                             status_i)

                    # generate pwg dict
                    pwg = get_filter_dict(details['pwg'], pwg, pwg_i)

                    # pwg filtering 
                    if not (pwg_i is None or details['pwg'] in pwg_i):
                        continue
                    # status filtering
                    if not (status_i is None or details['status'] in status_i):
                        continue
                    # priority filtering
                    if (details['priority'] < p_min or (
                            details['priority'] > p_max and p_max != -1)):
                        continue

                    # skip requests with not desired output dataset
                    if (document['pdmv_dataset_name'] !=
                        details['output_dataset']):
                        if details['output_dataset'] is not None and document['pdmv_dataset_name'] != 'None Yet':
                            continue

                # create an array of requests to be processed
                response = {}
                response['data'] = []
                response['request'] = document['pdmv_prep_id']

                # taskchain handiling
                if not is_request and (document['pdmv_type'] == 'TaskChain'):
                    # load taskchain instead of normal req
                    for t in document['pdmv_monitor_taskchain']:
                        res = {}
                        res['request'] = t['dataset']
                        res['data'] = []
                        for record in t['monitor']:
                            if len(record['pdmv_monitor_time']):
                                data = {}
                                data['e'] = record['pdmv_evts_in_DAS']
                                data['t'] = time.mktime(time.strptime(
                                        record['pdmv_monitor_time']))*1000
                                data['x'] = document['pdmv_expected_events']
                            res['data'].append(data)
                        r.append(res)
                    re = {}
                    re['data'] = r  
                    re['status'] = {}
                    re['pwg'] = {}
                    re['taskchain'] = True
                    stop = True
                        
                else:
                    if 'pdmv_monitor_history' in document:
                        for record in document['pdmv_monitor_history']:
                            if len(record['pdmv_monitor_time']):
                                data = {}
                                if details is None or details['output_dataset'] is not None:
                                    # a is events in das
                                    data['e'] = record['pdmv_evts_in_DAS']
                                else:
                                    # if the output in mcm is not specified yet,
                                    # treat as this has not produced anything
                                    # ensures present=historical
                                    data['e'] = 0
                                data['t'] = time.mktime(time.strptime(
                                        record['pdmv_monitor_time']))*1000

                                # x is expected events
                                if is_request:
                                    data['x'] = details['expected']
                                else:
                                    data['x'] = document[
                                        'pdmv_expected_events']
                                response['data'].append(data)
                    r.append(response)

        if stop:
            return re

        # Step 1: Get accumulated requests
        tmp = {}
        for x in r:
            s = x['request']
            if s not in tmp:
                tmp[s] = {}
                tmp[s]['data'] = []
            tmp[s]['data'] += x['data']
            tmp[s]['data'] = sorted(tmp[s]['data'], key=lambda e: e['t'])
            tmp[s]['data'] = self.rm_useless(tmp[s]['data'])
        
        # Step 2: Get and sort timestamps
        times = []
        for t in tmp:
            times += (x['t'] for x in tmp[t]['data'])
        times = sorted(set(times))

        if len(times) > (probe-1):
            skiper = len(times) / (probe-1)
        else:
            skiper = -1

        filter_times  = []
        i = 0
        for (x, t) in enumerate(times):
            if i < skiper and x < len(times) - 1 and x != 0:
                i += 1
            else:
                filter_times.append(t)
                i = 0
        
        # Step 3 & 4: Cycle through requests and add data points
        data = []
        for ft in filter_times:
            d = {'e': 0, 't': ft, 'x': 0}
            for t in tmp:
                prevx = {'e': 0, 'x': 0}
                for (i, x) in enumerate(tmp[t]['data']):
                    if x['t'] > ft:
                        d['e'] += prevx['e']
                        d['x'] += prevx['x']
                        break
                    elif x['t'] == ft or i == len(tmp[t]['data'])-1:
                        d['e'] += x['e']
                        d['x'] += x['x']
                        break
                    else:
                        prevx = x
            data.append(d)
        
        re = {}
        re['data'] = data
        re['status'] = status
        re['pwg'] = pwg
        re['taskchain'] = False
        return re

    def get(self, query, probe=100, priority_min=0, priority_max=-1,
            status=None, pwg=None):
        return json.dumps({"results": self.prepare_response(
                    query.split(','), probe, priority_min, priority_max,
                    status, pwg)})
Example #24
0
class GetLifetime():

    def __init__(self):
        self.es = ElasticSearch(config.DATABASE_URL)
        # normally es will crop results
        # and a million rows is more than we have in db
        self.overflow = 1000000

    def db_query(self, input):
        """
        Query DB and return array of raw documents
        """
        iterable = []
        try:
            # check if the input is a campaign
            req_arr = [s['_source'] for s in
                       self.es.search(('member_of_campaign:%s' % input),
                                      index='requests',
                                      size=self.overflow)['hits']['hits']]

            for r in req_arr:
                res = ([s['name'] for s in
                        self.es.get('requests', 'request',
                                    r['prepid'])['_source']['reqmgr_name']])
                for e in res:
                    iterable.append(e)
        except:
            pass

        if not len(iterable):
            try:
                # check if the input is a request
                iterable = [s['name'] for s in
                            self.es.get('requests', 'request',
                                        input)['_source']['reqmgr_name']]
            except:
                # input can be a reqmgr_name
                iterable = [input]

        for i in iterable:
            try:
                yield self.es.get('stats', 'stats', i)['_source']
            except:
                yield None

    def rm_useless(self, arr):
        r = []
        prev = {'a': -1, 'e': -1, 'x': -1}
        for a in arr:
            if a['a'] != prev['a'] or a['e'] != prev['e'] or a['x'] != prev['x']:
                r.append(a)
                prev = a
        return r

    def prepare_response(self, query):
        #print "Start"
        #prev = int(round(time.time() * 1000))
        #print prev
        r = []

        # Process the db documents
        for d in self.db_query(query):

            if d is None:
                continue

            response = {}
            response['campaign'] = d['pdmv_campaign']
            response['data'] = []
            response['input'] = query
            response['priority'] = d['pdmv_priority']
            response['pwg'] = '#HaveToQueryRequest'
            response['request'] = d['pdmv_prep_id']
            response['status'] = '#HaveToQueryRequest'
            response['title'] = d['pdmv_prep_id'] + d['pdmv_dataset_name']

            if 'pdmv_monitor_history' in d:
                for record in d['pdmv_monitor_history']:
                    if len(record['pdmv_monitor_time']):
                        data = {}
                        data['a'] = record['pdmv_evts_in_DAS'] + record['pdmv_open_evts_in_DAS']
                        data['e'] = record['pdmv_evts_in_DAS']
                        data['t'] = time.mktime(time.strptime(record['pdmv_monitor_time']))*1000
                        data['x'] = d['pdmv_expected_events']
                        response['data'].append(data)
            r.append(response)
        
        #print "Data prepared"
        #print int(round(time.time() * 1000)) - prev
        #prev = int(round(time.time() * 1000))

        # Step 1: Get accumulated requests
        tmp = {}
        for x in r:
            s = x['request']
            try:
                tmp[s] += x['data']
            except KeyError:
                tmp[s] = x['data']
            tmp[s] = self.rm_useless(tmp[s])

        #for name in tmp:
        #    tmp[name] = sorted(tmp[name], key=lambda e: e['t'])

        #print "Accum request"
        #print int(round(time.time() * 1000)) - prev
        #prev = int(round(time.time() * 1000))


        # Step 2: Get and sort timestamps
        times = []
        for t in tmp:
            times += (x['t'] for x in tmp[t])
        times = sorted(set(times))
        #print "Sorted times"
        #print int(round(time.time() * 1000)) - prev
        #prev = int(round(time.time() * 1000))

        '''
        Step 3 & Step 4
        data = []
        for t in times:
            dummy = {'a':0, 'e':0, 'x':0, 't': t}
            for name in tmp:
                pre = {'a':0, 'e':0, 'x':0}
                for i in xrange(len(tmp[name])):
                    if tmp[name][i]['t'] == t:
                        dummy['a'] += tmp[name][i]['a']
                        dummy['e'] += tmp[name][i]['e']
                        dummy['x'] += tmp[name][i]['x']
                        break
                    elif tmp[name][i]['t'] > t:
                        dummy['a'] += pre['a']
                        dummy['e'] += pre['e']
                        dummy['x'] += pre['x']
                        break
                    elif tmp[name][i]['t'] < t:
                        pre = tmp[name][i]
            data.append(dummy)
        '''

        # Step 3: Create dummy points for each request
        tmp2 = {}
        for t in tmp:
            nxw = []
            cur_index = 0
            dummy = {'a':0, 'e':0, 'x':0}
            listed = sorted(tmp[t], key=lambda e: e['t'])
            for a in times:
                if cur_index < len(listed) and a == listed[cur_index]['t']:
                    dummy = listed[cur_index]
                    cur_index += 1
                dummy['t'] = a
                nxw.append(dummy)
            tmp2[t] = nxw
        #print "Dummy points"
        #print int(round(time.time() * 1000)) - prev
        #prev = int(round(time.time() * 1000))

        # get only 1000 points
        skiper = len(times) / 20

        # Step 4: Generating data points
        data = []
        i = 0

        for (x, t) in enumerate(times):
            if i < skiper and x < len(times) - 1 and x != 0:
                i += 1
            else:
                i = 0
                d = {'a': 0, 'e':0, 't': t, 'x': 0}
                for m in tmp2:
                    d['a'] += tmp2[m][x]['a']
                    d['e'] += tmp2[m][x]['e']
                    d['x'] += tmp2[m][x]['x']
                data.append(d)
            
        #print "Data points"
        #print int(round(time.time() * 1000)) - prev
        print len(data)
        return data

    def get(self, query):
        return json.dumps({"results": self.prepare_response(query)})
Example #25
0
class GetChain():

    def __init__(self):
        self.countDummy = 0
        self.es = ElasticSearch(config.DATABASE_URL)
        self.overflow = 1000000

    def fakeId(self):
        self.countDummy += 1
        return 'X'*(5-len('%d' % (self.countDummy)))+'%d' % (self.countDummy)

    def __createDummyRequest(self, req, memberOfCampaign, status='upcoming',
                             total=None):
        fake_r = {}
        fake_r['status'] = status
        fake_r['member_of_campaign'] = memberOfCampaign
        for member in ['pwg', 'priority', 'total_events', 'time_event']:
            fake_r[member] = req[member]
        if total is not None:
            fake_r['total_events'] = total
        fake_r['prepid'] = '-'.join([req['pwg'],
                                     memberOfCampaign, self.fakeId()])
        fake_r['cloned_from'] = req['prepid']
        return fake_r

    def get(self, campaign):
        arg_list = campaign.split(',')
        # Get all chained campaigns which contain selected CAMPAIGN
        # reduction to only cc
        while True:
            again = False
            for arg in arg_list:
                if not arg.startswith('chain'):
                    # this is a flow, or a campaign: not matter for the query
                    ccs = [s['_source'] for s in
                           self.es.search(('campaigns:%s' % arg),
                                          index='chained_campaigns',
                                          size=self.overflow)['hits']['hits']]
                    arg_list.extend(map(lambda cc: cc['prepid'], ccs))
                    arg_list.remove(arg)
                    again = True
                    break
            if not again:
                break
        #  arg_list contains only chained campaigns
        steps = []  # what are the successive campaigns
        all_cr = []  # what are the chained requests to look at
        all_cc = {}
        # unique it
        arg_list = list(set(arg_list))
        # collect all crs
        for a_cc in arg_list:
            try:
                mcm_cc = self.es.get('chained_campaigns',
                                     'chain_campaign', a_cc)['_source']
            except Exception:
                # try to see if that's a flow
                return '%s does not exists' % (a_cc)
            all_cc[a_cc] = mcm_cc  # keep it in mind
            all_cr.extend([s['_source'] for s in
                           self.es.search(('member_of_campaign:%s' % a_cc),
                                          index='chained_requests',
                                          size=self.overflow)['hits']['hits']])
            these_steps = map(lambda s: s[0], mcm_cc['campaigns'])
            if len(steps) == 0:
                steps = these_steps
            else:
                # concatenate to existing steps
                # add possible steps at the beginning
                connection = 0
                while not steps[connection] in these_steps:
                    connection += 1
                new_start = these_steps.index(steps[connection])
                if new_start != 0:
                    # they do not start at the same campaign
                    for where in range(new_start):
                        steps.insert(where, these_steps[where])
                # verify strict overlapping
                # ==> does not function properly and limits the flexibility
                for check in range(new_start, len(these_steps)):
                    if these_steps[check] not in steps:
                        steps.append(these_steps[check])
        # preload all requests !!!
        all_requests = {}
        for step in steps:
            for r in [s['_source'] for s in
                      self.es.search(('member_of_campaign:%s' % step),
                                     index='requests',
                                     size=self.overflow)['hits']['hits']]:
                all_requests[r['prepid']] = r
        # avoid double counting
        already_counted = set()
        # the list of requests to be emitted to d3js
        list_of_request_for_ramunas = []
        for cr in all_cr:
            upcoming = 0
            if len(cr['chain']) == 0:
                # crap data
                continue
            stop_at = cr['step']
            stop_at = len(cr['chain'])-1
            for (r_i, r) in enumerate(cr['chain']):
                if r_i > stop_at:
                    # this is a reserved request, will count as upcoming later
                    continue
                mcm_r = all_requests[r]
                upcoming = mcm_r['total_events']
                if r in already_counted:
                    continue
                else:
                    already_counted.add(r)

                # add it to emit
                def pop(mcm_r):
                    for member in mcm_r.keys():
                        if member not in ['prepid', 'pwg', 'priority',
                                          'total_events', 'status',
                                          'member_of_campaign', 'time_event']:
                            mcm_r.pop(member)
                    return mcm_r

                if mcm_r['status'] == 'submitted':
                    mcm_r_fake_done = copy.deepcopy(mcm_r)
                    mcm_r_fake_done['status'] = 'done'
                    mcm_r_fake_done['total_events'] = mcm_r['completed_events']
                    mcm_r_fake_subm = copy.deepcopy(mcm_r)
                    mcm_r_fake_subm['total_events'] = max(
                        [0, mcm_r['total_events'] - mcm_r['completed_events']])
                    list_of_request_for_ramunas.append(pop(mcm_r_fake_subm))
                    list_of_request_for_ramunas.append(pop(mcm_r_fake_done))
                else:
                    list_of_request_for_ramunas.append(pop(mcm_r))
            for noyet in all_cc[cr[
                    'member_of_campaign']]['campaigns'][stop_at+1:]:
                # create a fake request with the proper member of campaign
                processing_r = all_requests[cr['chain'][stop_at]]
                fake_one = self.__createDummyRequest(processing_r, noyet[0],
                                                     total=upcoming)
                list_of_request_for_ramunas.append(fake_one)
        return json.dumps({"results": list_of_request_for_ramunas})