Example #1
0
 def es_deindex(self):
     conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH)
     try:
         conn.delete(index=self.tenant.slug,
                     doc_type=self.Meta.document_type,
                     id=meta.id)
     except:
         pass
Example #2
0
 def es_deindex(self):
     conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH)
     try:
         conn.delete(
             index=self.tenant.slug,
             doc_type=self.Meta.document_type,
             id=meta.id
         )
     except:
         pass
class KVStore(KVStoreBase):
    def __init__(self, *args, **kwargs):
        super(KVStore, self).__init__(*args, **kwargs)
        self.connection = ES(settings.THUMBNAIL_ELASTIC_SEARCH_SERVERS)

    def _get_raw(self, key):
        try:
            #import pdb; pdb.set_trace()
            value = self.connection.get(settings.THUMBNAIL_ELASTIC_SEARCH_INDEX, 
                                        settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE,
                                        key)
            return value['_source']['value']
        except:
            return None

    def _set_raw(self, key, value):
        ret = self.connection.index({"value": value}, 
                                    settings.THUMBNAIL_ELASTIC_SEARCH_INDEX,
                                    settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE,
                                    key)
        return ret['ok']
    
    def _delete_raw(self, *keys):
        rets = []
        for key in keys:
            try:
                ret = self.connection.delete(settings.THUMBNAIL_ELASTIC_SEARCH_INDEX,
                                             settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE,
                                             key)
                rets.append(ret['ok'])
            except:
                rets.append(False)
        return rets

    def _find_keys_raw(self, prefix):
        search = Search(query=PrefixQuery("_id", prefix), size=1000, start=0, fields=[])
        results = self.connection.search(search, 
                                         indexes=[settings.THUMBNAIL_ELASTIC_SEARCH_INDEX,], 
                                         doc_types=[settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE,])
        return [hit['_id'] for hit in results['hits']['hits']]
Example #4
0
class DocManager():
    """The DocManager class creates a connection to the backend engine and
        adds/removes documents, and in the case of rollback, searches for them.

        The reason for storing id/doc pairs as opposed to doc's is so that
        multiple updates to the same doc reflect the most up to date version as
        opposed to multiple, slightly different versions of a doc.

        We are using elastic native fields for _id and ns, but we also store
        them as fields in the document, due to compatibility issues.
        """

    def __init__(self, url, auto_commit=True, unique_key='_id'):
        """Verify Elastic URL and establish a connection.
        """

        if verify_url(url) is False:
            raise SystemError
        self.elastic = ES(server=url)
        self.auto_commit = auto_commit
        self.doc_type = 'string'  # default type is string, change if needed
        self.unique_key = unique_key
        if auto_commit:
            self.run_auto_commit()

    def stop(self):
        """ Stops the instance
        """
        self.auto_commit = False

    def upsert(self, doc):
        """Update or insert a document into Elastic

        If you'd like to have different types of document in your database,
        you can store the doc type as a field in Mongo and set doc_type to
        that field. (e.g. doc_type = doc['_type'])

        """

        doc_type = self.doc_type
        index = doc['ns']
        doc[self.unique_key] = str(doc[self.unique_key])
        doc_id = doc[self.unique_key]
        id_query = TextQuery('_id', doc_id)
        elastic_cursor = self.elastic.search(query=id_query, indices=index)

        try:
            self.elastic.index(bsjson.dumps(doc), index, doc_type, doc_id)
        except ValueError:
            logging.info("Could not update %s" % (doc,))
        self.elastic.refresh()

    def remove(self, doc):
        """Removes documents from Elastic

        The input is a python dictionary that represents a mongo document.
        """
        try:
            self.elastic.delete(doc['ns'], 'string', str(doc[self.unique_key]))
        except (NotFoundException, TypeMissingException, IndexMissingException):
            pass

    def _remove(self):
        """For test purposes only. Removes all documents in test.test
        """
        try:
            self.elastic.delete('test.test', 'string', '')
        except (NotFoundException, TypeMissingException, IndexMissingException):
            pass

    def search(self, start_ts, end_ts):
        """Called to query Elastic for documents in a time range.
        """
        res = ESRange('_ts', from_value=start_ts, to_value=end_ts)
        results = self.elastic.search(RangeQuery(res))
        return results

    def _search(self):
        """For test purposes only. Performs search on Elastic with empty query.
        Does not have to be implemented.
        """
        results = self.elastic.search(MatchAllQuery())
        return results

    def commit(self):
        """This function is used to force a refresh/commit.
        """
        retry_until_ok(self.elastic.refresh)

    def run_auto_commit(self):
        """Periodically commits to the Elastic server.
        """
        self.elastic.refresh()

        if self.auto_commit:
            Timer(1, self.run_auto_commit).start()

    def get_last_doc(self):
        """Returns the last document stored in the Elastic engine.
        """

        result = self.elastic.search(MatchAllQuery(), size=1, sort='_ts:desc')
        for item in result:
            return item
es = ES()

index_name = "my_index"
type_name = "my_type"

from utils_pyes import create_and_add_mapping

create_and_add_mapping(es, index_name, type_name)

es.index(doc={"name": "Joe Tester", "parsedtext": "Joe Testere nice guy", "uuid": "11111", "position": 1},
         index=index_name, doc_type=type_name, id=1)
es.index(doc={"name": "data1", "value": "value1"}, index=index_name, doc_type=type_name + "2", id=1, parent=1)
es.index(doc={"name": "Bill Baloney", "parsedtext": "Bill Testere nice guy", "uuid": "22222", "position": 2},
         index=index_name, doc_type=type_name, id=2, bulk=True)
es.index(doc={"name": "data2", "value": "value2"}, index=index_name, doc_type=type_name + "2", id=2, parent=2,
         bulk=True)
es.index(doc={"name": "Bill Clinton", "parsedtext": """Bill is not
        nice guy""", "uuid": "33333", "position": 3}, index=index_name, doc_type=type_name, id=3, bulk=True)

es.force_bulk()

es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1')
es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1', bulk=True)

es.delete(index=index_name, doc_type=type_name, id=1, bulk=True)
es.delete(index=index_name, doc_type=type_name, id=3)

es.force_bulk()
es.indices.refresh(index_name)

es.indices.delete_index(index_name)
Example #6
0
class ESIndexerBase(object):
    ES_HOST = ES_HOST
    ES_INDEX_NAME = ES_INDEX_NAME
    ES_INDEX_TYPE = 'gene'

    def __init__(self):
        self.conn = ES(self.ES_HOST, default_indexes=[self.ES_INDEX_NAME],
        	           timeout=10.0)
        self.step = 10000

    def create_index(self):
        try:
            print self.conn.open_index(self.ES_INDEX_NAME)
        except IndexMissingException:
            print self.conn.create_index(self.ES_INDEX_NAME)

    def delete_index_type(self, index_type):
        '''Delete all indexes for a given index_type.'''
        index_name = self.ES_INDEX_NAME
#        index_type = self.ES_INDEX_TYPE
        #Check if index_type exists
        mapping = self.conn.get_mapping(index_type, index_name)
        if index_name not in mapping or index_type not in mapping[index_name]:
            print 'Error: index type "%s" does not exist in index "%s".' % (index_type, index_name)
            return
        path = '/%s/%s' % (index_name, index_type)
        if ask('Confirm to delete all data under "%s":' % path) == 'Y':
            return self.conn.delete_mapping(index_name, index_type)

    def index(self, doc, index_type, id=None):
        '''add a doc to the index. If id is not None, the existing doc will be
           updated.
        '''
#        index_type = self.ES_INDEX_TYPE
        return self.conn.index(doc, self.ES_INDEX_NAME, index_type, id=id)

    def delete_index(self, index_type, id):
        '''delete a doc from the index based on passed id.'''
#        index_type = self.ES_INDEX_TYPE
        return self.conn.delete(self.ES_INDEX_NAME, index_type, id)

    def optimize(self):
        return self.conn.optimize(self.ES_INDEX_NAME, wait_for_merge=True)

    def get_field_mapping(self):
        import dataload
        reload(dataload)
        dataload.register_sources()
        return dataload.get_mapping()

    def build_index(self, doc_d, update_mapping=False, bulk=True):
        index_name = self.ES_INDEX_NAME
        index_type = self.ES_INDEX_TYPE

        #Test if index exists
        try:
            print "Opening index...", self.conn.open_index(index_name)
        except NotFoundException:
            print 'Error: index "%s" does not exist. Create it first.' % index_name
            return -1

        try:
            cur_mapping = self.conn.get_mapping(index_type, index_name)
            empty_mapping = False
        except ElasticSearchException:
            #if no existing mapping available for index_type
            #force update_mapping to True
            empty_mapping = True
            update_mapping = True

#        empty_mapping = not cur_mapping[index_name].get(index_type, {})
#        if empty_mapping:
#            #if no existing mapping available for index_type
#            #force update_mapping to True
#            update_mapping = True

        if update_mapping:
            print "Updating mapping...",
            if not empty_mapping:
                print "\n\tRemoving existing mapping...",
                print self.conn.delete_mapping(index_name, index_type)
            _mapping = self.get_field_mapping()
            print self.conn.put_mapping(index_type,
                                   _mapping,
                                   [index_name])
        print "Building index..."
        t0 = time.time()
        for doc_id, doc in doc_d.items():
            self.conn.index(doc, index_name, index_type, doc_id, bulk=bulk)
        print self.conn.flush()
        print self.conn.refresh()
        print "Done[%s]" % timesofar(t0)

    def query(self, qs, fields='symbol,name', **kwargs):
        _q = StringQuery(qs)
        res = self.conn.search(_q, fields=fields, **kwargs)
        return res
Example #7
0
class ElasticSearchServer(ESDBRequests):
    """
    An object representing the CouchDB server, use it to list, create, delete
    and connect to databases.

    More info http://wiki.apache.org/couchdb/HTTP_database_API
    """

    def __init__(self, dburl = 'http://localhost:9200', indices, types, usePYCurl = False, ckey = None, cert = None, capath = None):
        """
        Set up a connection to the CouchDB server
        """
        check_server_url(dburl)
        # PYCurl TODO
        # Same with cert and key
        self.url = dburl
        self.ESconn = ES(dburl)
        self.ckey = ckey
        self.cert = cert
        check_name(indices)
        check_name(types)
        self.indices = indices
        self.types = types

    def listDatabases(self):
        "List all the databases the server hosts"
        # TODO
        return self.get('/_all_dbs')

    def createDatabase(self, schema):
        """
        A database must be named with all lowercase characters (a-z),
        digits (0-9), or any of the _$()+-/ characters and must end with a slash
        in the URL.
        """
        self.ESconn.indices.create_index_if_missing(self.indices)
        self.ESconn.indices.put_mapping(self.types, {'properties': schema}, [self.indices])

    def insertDoc(self, doc, _id):
        """ TODO """
        self.ESconn.index(doc, self.indices, self.types, _id)

    def deleteDoc(self,  _id):
        self.ESconn.delete(self.indices, self.types, _id)

    def termBoolQuery(self, query):
        """ query - dict
            must:
                key = key in the database
                value = searchable value
            should
                key = key in the database
                value = searchable value
            must_not
                key = key in the database
                value = searchable value
        """
        queryMust = []
        queryShould = []
        queryMustNot = []
        for item in ["must", "should", "must_not"]:
            if item in query:
                for dictVals in query[item]:
                    for dictKey in dictVals:
                        tempq = TermQuery(dictKey, dictVals[dictKey])
                        if item == "must":
                            queryMust.append(tempq)
                        elif item == "should":
                            queryShould.append(tempq)
                        elif item == "must_not":
                            queryMustNot.append(tempq)
        query = BoolQuery(must=None if not queryMust else queryMust,
                          should=None if not queryShould else queryShould,
                          must_not=None if not queryMustNot else queryMustNot)

        search = Search(query)
        results = self.ESconn.search(search, self.indices)
        response = {"status_code": 200, "message": "Successful", "content": []}
        response["content"] = [result for result in results]
        return response
Example #8
0
class Elastic(object):

    def init_app(self, app):
        self.conn = ES(app.config['ELASTIC_URL'], timeout=2)
        #self.remote_conns = [ES(url) for url in app.config['REMOTE_ELASTIC_URL']]

    def search(self, start=0, size=20, doc_types='resource', indices='order_index', sort=None, **kwargs):
        # set filter
        filters = []
        for k,v in kwargs.items():
            if k and k!='complete_time':
                filters.append(TermFilter(k, v))
            elif k and v!='' and k=='complete_time':
                ct = kwargs['complete_time']
                if len(ct) == 2:
                    filters.append(RangeFilter(ESRange('complete_time', from_value=ct[0], to_value=ct[1])))
                else:
                    filters.append(RangeFilter(ESRange('complete_time', from_value=ct[0])))
        
        _filter = None
        if filters:
            _filter = ANDFilter(filters)

        bq = MatchAllQuery()
        # filtered
        q = FilteredQuery(bq, _filter)

        # sort
        if sort:
            sf = SortFactory()
            for s in sort:
                sf.add(s)
            s = Search(q, sort=sf)
        else:
            s = Search(q)

        # result
        return self.conn.search(s, indices=indices, doc_types=doc_types, start=start, size=size)

    def delete(self, index='order_index', doc_type='resource', id=''):
        return self.conn.delete(index=index, doc_type=doc_type, id=id)

    def create(self, index='order_index', doc_type='resource', doc=None):
        # try:
        #     self.delete(index, doc_type, doc['id'])
        # except NotFoundException:
        #     pass
        try:
            return self.conn.index(doc, index, doc_type, id=doc['id'])
        except:# not connection
            pass

    def multi_create(self, index='order_index', doc_type='resource', doc=None):
        """如果同步缓存到远程,要使用celery"""
        try:
            return self.conn.index(doc, index, doc_type, id=doc['id'])
        except:# not connection
            pass
            
        try:
            for rconn in self.remote_conns:
                rconn.index(doc, index, doc_type, id=doc['id'])
        except:
            print '--------sync cache to remote error------'
Example #9
0
class ElasticCatalog(object):
    default_indexes = {
        'zelastic_doc_id': {
            'type': 'string',
            'index': 'not_analyzed'
        }
    }

    def __init__(self, connection_string, elastic_name, storage, bulk=False,
                 bulk_size=400):
        self.conn = ES(connection_string, bulk_size=bulk_size)
        self.bulk_size = bulk_size
        self.name = elastic_name
        self.storage = storage
        self.bulk = bulk

    def update_mapping(self, name):
        meta = self.storage.meta(name)
        indexes = meta['indexes']
        properties = self.default_indexes.copy()
        try:
            self.conn.create_index(self.name)
        except IndexAlreadyExistsException:
            pass
        for index_name, _type in indexes.items():
            index = None
            if _type == 'str':
                index = {
                    'type': 'string',
                    'index': 'not_analyzed',
                }
            elif _type == 'full':
                index = {
                    'type': 'string',
                    'index': 'analyzed',
                }
            elif _type == 'bool':
                index = {
                    'type': 'boolean'
                }
            elif _type == 'int':
                index = {
                    'type': 'integer',
                }
            elif _type in ('datetime', 'date'):
                index = {
                    'type': 'date',
                }
            elif _type == 'float':
                index = {
                    'type': 'float',
                }
            if index is not None:
                properties[index_name] = index
        self.conn.indices.put_mapping(
            doc_type=name,
            mapping={
                'ignore_conflicts': True,
                'properties': properties
            },
            indices=[self.name])

    def id(self, container_name, key):
        return '%s-%s' % (container_name, key)

    def index(self, container_name, doc, key):
        # need to add data to the index that isn't actually persisted
        data = {
            'zelastic_doc_id': key
        }
        meta = self.storage.meta(container_name)
        indexes = meta['indexes']
        for index in indexes.keys():
            if index in doc:
                data[index] = doc[index]
        self.conn.index(
            data,
            self.name,
            container_name,
            self.id(container_name, key),
            bulk=self.bulk)

    def delete(self, container_name, key):
        self.conn.delete(
            self.name,
            container_name,
            self.id(container_name, key),
            bulk=self.bulk)

    def delete_all(self, container_name):
        self.conn.delete_mapping(
            self.name,
            container_name)

    def search(self, container_name, query, **kwargs):
        return self.conn.search(
            query,
            indexes=[self.name],
            doc_types=[container_name],
            **kwargs)

    def getFacets(self, container_name, field, size=100):
        return self.conn.search_raw({
            "facets": {
                field: {
                    "terms": {
                        "all_terms": True,
                        "field": field,
                        "size": size,
                        "order": "term"
                    }
                }
            }
        }, indexes=[self.name], doc_type=container_name)
Example #10
0
es.index(doc={
    "name": "Bill Clinton",
    "parsedtext": """Bill is not
        nice guy""",
    "uuid": "33333",
    "position": 3
},
         index=index_name,
         doc_type=type_name,
         id=3,
         bulk=True)

es.force_bulk()

es.update(index=index_name,
          doc_type=type_name,
          id=2,
          script='ctx._source.position += 1')
es.update(index=index_name,
          doc_type=type_name,
          id=2,
          script='ctx._source.position += 1',
          bulk=True)

es.delete(index=index_name, doc_type=type_name, id=1, bulk=True)
es.delete(index=index_name, doc_type=type_name, id=3)

es.force_bulk()
es.indices.refresh(index_name)

es.indices.delete_index(index_name)