コード例 #1
0
ファイル: user_data_service.py プロジェクト: olavgg/py-sth
 def do_full_sync(self):
     """
     Do a full sync of the user filesystem. Find all folders from the
     user's filesystem compare them one by one. Then fetch all folders in
     ES and compare them for a cleanup. In case some folders have been 
     renamed.
     """
     self.disable_realtime_indexing()
     disk_folders = Folder.find_all_folders(self.user)
     es_data = {"docs": []}
     for folder in disk_folders:
         es_data["docs"].append({
             "_index": self.idx_name,
             "_type": "node",
             "_id": folder.index_id,
             "fields": ["_id"]
         })
     results = self.es_service.conn.get('_mget', data=es_data)
     if results['status'] == 200:
         es_results = [d_id['_id'] for d_id in results['docs']
                       if d_id['exists'] == True]
         for folder in disk_folders:
             if folder.index_id in es_results:
                 self.app.logger.debug(u'Syncing folder: {f}'.format(
                     f=folder.sys_path))
                 self.do_folder_sync(folder.index_id)
             else:
                 self.app.logger.debug(u'Created folder: {f}'.format(
                     f=folder.sys_path))
                 self.index_folders_and_files(folder=folder)
     else:
         self.app.logger.error(u'Couldn\'t fetch documents. Full sync stopped.')
         return
     max_size = int(self.es_service.conn.get(self.count_url, data={
         "query": {
             "bool": {
                 "must": [{
                              "term": {
                                  "type": Node.FOLDER_TYPE
                              }
                          }]
             }
         }
     })['hits']['total'])
     search_url = '{idx_name}/_search'.format(idx_name=self.idx_name)
     es_docs = self.es_service.conn.get(search_url, data={
         "from": 0,
         "size": max_size,
         "fields": [],
         "query": {
             "bool": {
                 "must": [{
                              "term": {
                                  "type": Node.FOLDER_TYPE
                              }
                          }]
             }
         }
     })['hits']['hits']
     es_docs = {doc['_id']: doc for doc in es_docs}
     folder_nodes = {folder.index_id: folder for folder in disk_folders}
     es_folders = set(es_docs.keys())
     folders = set(folder_nodes.keys())
     deleted_docs = es_folders - folders
     deleted_ids = []
     for doc_to_delete in deleted_docs:
         deleted_ids += self.delete_document_by_parent_id(doc_to_delete)
         if doc_to_delete not in deleted_ids:
             deleted_ids.append(
                 self.delete_document_by_id(doc_to_delete))
     self.app.logger.debug(
         u'Deleted nodes with id like:\n {name}'.format(name=deleted_ids))
     self.enable_realtime_indexing()
     self.optimize_index()
     self.flush_index()