Esempio n. 1
0
 def index_document_by_node(self, node):
     """ Index a node by supplying a node argument """
     if isinstance(node, Node):
         if node.type == Node.FILE_TYPE:
             size = node.get_size()
         else:
             size = ''
         put_url = '{idx_name}/node/{id}'.format(
             idx_name=self.idx_name,
             # dual url encoding as ES decodes it
             id=uenc(uenc(node.path.encode('utf-8'))))
         data = {
             'name': node.name,
             'parent': uenc(node.get_parent()),
             'date_modified': node.date_modified,
             'size': size,
             'type': node.type
         }
         result = self.es_service.conn.put(put_url, data=data)
         if result['status'] != 201:
             error_msg = u'Couldn\'t index document: {doc} to ES'.format(
                 doc=node.path
             )
             self.app.logger.error(error_msg)
         else:
             self.app.logger.debug(u'Indexed {name}'.format(name=node.name))
     else:
         raise TypeError(u'node is not of type domain.node.Node')
Esempio n. 2
0
 def images(self, **kwargs):
     """BrooklynMuseumAPI.images: perform a collection.getImages request"""
     if 'item_type' in kwargs and 'item_id' in kwargs:
         req_url = '%s?%s&method=collection.getImages&%s' % \
             (self.url, uenc(self._params), uenc(kwargs))
         rsp = urllib2.urlopen(req_url).read()
         return self.parse_response(rsp)
     else:
         raise
Esempio n. 3
0
 def do_folder_sync(self, node_id):
     """
     Pass a folder_id, read it from disk and the ES index. Compare its
     content and files and return the correct results based what is stored
     on disk.
     
     We do two queries, first to find total hits and then use total hits
     to do the second query. The reason for this is that a huge size value
     decrease Elasticsearch query performance by a huge margin.
     """
     folder = Folder.get_instance(node_id, decode=True)
     if folder:
         index_id = uenc(folder.path.encode('utf-8'))
         max_size = int(self.es_service.conn.get(self.count_url, data={
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })['hits']['total'])
         search_url = u'{idx_name}/node/_search'.format(
             idx_name=self.idx_name)
         results = self.es_service.conn.get(search_url, data={
             "from": 0,
             "size": max_size,
             "fields": [],
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })
         es_node_ids = set([doc['_id'] for doc in results['hits']['hits']])
         disk_nodes = {node.index_id: node for node in (
             folder.folders + folder.files)}
         disk_node_ids = set(disk_nodes.keys())
         deleted_docs = es_node_ids - disk_node_ids
         new_docs = disk_node_ids - es_node_ids
         for doc_to_delete in deleted_docs:
             self.delete_document_by_id(doc_to_delete)
         for new_document in new_docs:
             self.index_document_by_node(disk_nodes[new_document])
         self.flush_index()
     else:
         self.app.logger.error(
             'No folder found by passing node id: {node_id}'.format(
                 node_id=node_id
             ))
Esempio n. 4
0
 def search(self, **kwargs):
     args = kwargs
     req_url = '%s/records/v%s.%s?api_key=%s&%s' % (\
         self.base_url,
         self.version,
         self.format,
         self.api_key,
         uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return DigitalNZResponse(self, rsp)
Esempio n. 5
0
 def delete_document_by_id(self, document_id):
     """ Deletes documents from ES by id string argument """
     del_url = '{idx_name}/node/{id}'.format(
         idx_name=self.idx_name,
         id=uenc(document_id)) # dual url encoding as ES decodes it
     result = self.es_service.conn.delete(del_url)
     if result['status'] != 200:
         error_msg = u'Couldn\'t delete document: {doc} from ES'.format(
             doc=del_url
         )
         self.app.logger.error(error_msg)
     else:
         return document_id
Esempio n. 6
0
 def custom_search(self, title=None, **kwargs):
     args = kwargs
     if title is None:
         raise
     req_url = '%s/custom_searches/v%s/%s.%s?api_key=%s&%s' % (\
         self.base_url,
         self.version,
         title,
         self.format,
         self.api_key,
         uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return DigitalNZResponse(self, rsp)        
Esempio n. 7
0
 def index_folders_and_files(self, folder=None):
     """
     Find all folders for the user. Path to the folder is url encoded and
     assigned as a id for the Folder type in the ElasticSearch index.
     """
     items_indexed = 0
     folders = self.find_all_folders(folder)
     folder_bulk = []
     for folder in folders:
         index_id = uenc(folder['path'].encode('utf-8'))
         parent = uenc(folder['parent'].encode('utf-8'))
         folder_bulk.append({'index': {'_id': index_id}})
         data = {
             'name': folder['name'],
             'parent': parent,
             'date_modified': folder['date_modified'],
             'type': Node.FOLDER_TYPE,
             'size': ''
         }
         folder_bulk.append(data)
         items_indexed += self.index_files(folder)
     self.es_service.bulk_insert(self.bulk_insert_url, folder_bulk)
     items_indexed += len(folders)
     return items_indexed
Esempio n. 8
0
 def search(self, **kwargs):
     """BrooklynMuseumAPI.search: perform a collection.search request"""
     req_url = '%s?%s&method=collection.search&%s' % \
         (self.url, uenc(self._params), uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return self.parse_response(rsp)