コード例 #1
0
ファイル: user_data_service.py プロジェクト: olavgg/py-sth
 def do_folder_sync(self, node_id):
     """
     Pass a folder_id, read it from disk and the ES index. Compare its
     content and files and return the correct results based what is stored
     on disk.
     
     We do two queries, first to find total hits and then use total hits
     to do the second query. The reason for this is that a huge size value
     decrease Elasticsearch query performance by a huge margin.
     """
     folder = Folder.get_instance(node_id, decode=True)
     if folder:
         index_id = uenc(folder.path.encode('utf-8'))
         max_size = int(self.es_service.conn.get(self.count_url, data={
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })['hits']['total'])
         search_url = u'{idx_name}/node/_search'.format(
             idx_name=self.idx_name)
         results = self.es_service.conn.get(search_url, data={
             "from": 0,
             "size": max_size,
             "fields": [],
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })
         es_node_ids = set([doc['_id'] for doc in results['hits']['hits']])
         disk_nodes = {node.index_id: node for node in (
             folder.folders + folder.files)}
         disk_node_ids = set(disk_nodes.keys())
         deleted_docs = es_node_ids - disk_node_ids
         new_docs = disk_node_ids - es_node_ids
         for doc_to_delete in deleted_docs:
             self.delete_document_by_id(doc_to_delete)
         for new_document in new_docs:
             self.index_document_by_node(disk_nodes[new_document])
         self.flush_index()
     else:
         self.app.logger.error(
             'No folder found by passing node id: {node_id}'.format(
                 node_id=node_id
             ))
コード例 #2
0
ファイル: user_data_service.py プロジェクト: olavgg/py-sth
 def index_files(self, folder):
     """ Index the files in folder """
     folder_instance = Folder.get_instance(folder['path'], user=self.user)
     folder_files = folder_instance.files
     file_bulk = []
     for file_obj in folder_files:
         file_bulk.append({'index': {'_id': file_obj.index_id}})
         fdata = {
             'name': file_obj.name,
             'parent': folder_instance.index_id,
             'date_modified': file_obj.date_modified,
             'size': file_obj.get_size(),
             'type': file_obj.type
         }
         file_bulk.append(fdata)
     if len(file_bulk) > 0:
         self.es_service.bulk_insert(self.bulk_insert_url, file_bulk)
     return len(file_bulk)