def do_full_sync(self): """ Do a full sync of the user filesystem. Find all folders from the user's filesystem compare them one by one. Then fetch all folders in ES and compare them for a cleanup. In case some folders have been renamed. """ self.disable_realtime_indexing() disk_folders = Folder.find_all_folders(self.user) es_data = {"docs": []} for folder in disk_folders: es_data["docs"].append({ "_index": self.idx_name, "_type": "node", "_id": folder.index_id, "fields": ["_id"] }) results = self.es_service.conn.get('_mget', data=es_data) if results['status'] == 200: es_results = [d_id['_id'] for d_id in results['docs'] if d_id['exists'] == True] for folder in disk_folders: if folder.index_id in es_results: self.app.logger.debug(u'Syncing folder: {f}'.format( f=folder.sys_path)) self.do_folder_sync(folder.index_id) else: self.app.logger.debug(u'Created folder: {f}'.format( f=folder.sys_path)) self.index_folders_and_files(folder=folder) else: self.app.logger.error(u'Couldn\'t fetch documents. Full sync stopped.') return max_size = int(self.es_service.conn.get(self.count_url, data={ "query": { "bool": { "must": [{ "term": { "type": Node.FOLDER_TYPE } }] } } })['hits']['total']) search_url = '{idx_name}/_search'.format(idx_name=self.idx_name) es_docs = self.es_service.conn.get(search_url, data={ "from": 0, "size": max_size, "fields": [], "query": { "bool": { "must": [{ "term": { "type": Node.FOLDER_TYPE } }] } } })['hits']['hits'] es_docs = {doc['_id']: doc for doc in es_docs} folder_nodes = {folder.index_id: folder for folder in disk_folders} es_folders = set(es_docs.keys()) folders = set(folder_nodes.keys()) deleted_docs = es_folders - folders deleted_ids = [] for doc_to_delete in deleted_docs: deleted_ids += self.delete_document_by_parent_id(doc_to_delete) if doc_to_delete not in deleted_ids: deleted_ids.append( self.delete_document_by_id(doc_to_delete)) self.app.logger.debug( u'Deleted nodes with id like:\n {name}'.format(name=deleted_ids)) self.enable_realtime_indexing() self.optimize_index() self.flush_index()