def _synchronize_locally_created(self, doc_pair, local_client, remote_client): name = os.path.basename(doc_pair.local_path) if not doc_pair.folderish and is_office_temp_file(name) and doc_pair.error_count == 0: # Might be an Office temp file delay it by 60s # Save the error_count to not ignore next time self.increase_error(doc_pair, 'Can be Office Temp') return remote_ref = local_client.get_remote_id(doc_pair.local_path) # Find the parent pair to find the ref of the remote folder to # create the document parent_pair = self._dao.get_state_from_local(doc_pair.local_parent_path) log.trace('Entered _synchronize_locally_created, parent_pair = %r', parent_pair) if parent_pair is None: # Try to get it from xattr log.trace("Fallback to xattr") if local_client.exists(doc_pair.local_parent_path): parent_ref = local_client.get_remote_id(doc_pair.local_parent_path) parent_pair = self._get_normal_state_from_remote_ref(parent_ref) if parent_pair is None or parent_pair.remote_ref is None: # Illegal state: report the error and let's wait for the # parent folder issue to get resolved first if parent_pair is not None and parent_pair.pair_state == 'unsynchronized': self._dao.unsynchronize_state(doc_pair, 'PARENT_UNSYNC') self._handle_unsynchronized(local_client, doc_pair) return raise ValueError( "Parent folder of %s, %s is not bound to a remote folder" % (doc_pair.local_path, doc_pair.local_parent_path)) if remote_ref is not None and '#' in remote_ref: # TODO Decide what to do log.warn("This document %r has remote_ref %s", doc_pair, remote_ref) # Get the remote doc # Verify it is not already synced elsewhere ( a missed move ? ) # If same hash dont do anything and reconcile remote_doc_client = self._engine.get_remote_doc_client() uid = remote_ref.split('#')[-1] info = remote_doc_client.get_info(uid, raise_if_missing=False, use_trash=False) if info: if info.state == 'deleted': log.debug("Untrash from the client: %r", doc_pair) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref remote_doc_client.undelete(uid) fs_item_info = remote_client.get_info(remote_ref) # Handle document move if not fs_item_info.path.startswith(remote_parent_path): fs_item_info = remote_client.move(fs_item_info.uid, parent_pair.remote_ref) # Handle document rename if fs_item_info.name != doc_pair.local_name: fs_item_info = remote_client.rename(fs_item_info.uid, doc_pair.local_name) self._dao.update_remote_state(doc_pair, fs_item_info, remote_parent_path=remote_parent_path, versionned=False) # Handle document modification - update the doc_pair doc_pair = self._dao.get_state_from_id(doc_pair.id) self._synchronize_locally_modified(doc_pair, local_client, remote_client) return # Document exists on the server if info.parent_uid == parent_pair.remote_ref: log.warning("Document is already on the server should not create: %r | %r", doc_pair, info) self._dao.synchronize_state(doc_pair) return parent_ref = parent_pair.remote_ref if parent_pair.remote_can_create_child: fs_item_info = None remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref if doc_pair.folderish: log.debug("Creating remote folder '%s' in folder '%s'", name, parent_pair.remote_name) fs_item_info = remote_client.make_folder(parent_ref, name) remote_ref = fs_item_info.uid else: # TODO Check if the file is already on the server with the good digest log.debug("Creating remote document '%s' in folder '%s'", name, parent_pair.remote_name) info = local_client.get_info(doc_pair.local_path) if info.size != doc_pair.size: # Size has changed ( copy must still be running ) doc_pair.local_digest = UNACCESSIBLE_HASH self._dao.update_local_state(doc_pair, info, versionned=False, queue=False) self._postpone_pair(doc_pair, 'Unaccessible hash') return if doc_pair.local_digest == UNACCESSIBLE_HASH: doc_pair.local_digest = info.get_digest() log.trace("Creation of postponed local file: %r", doc_pair) self._dao.update_local_state(doc_pair, info, versionned=False, queue=False) if doc_pair.local_digest == UNACCESSIBLE_HASH: self._postpone_pair(doc_pair, 'Unaccessible hash') return # CSPII-9040: Add delay to let Office finish temp file juggling time.sleep(2) fs_item_info = remote_client.stream_file( parent_ref, local_client._abspath(doc_pair.local_path), filename=name) remote_ref = fs_item_info.uid self._dao.update_last_transfer(doc_pair.id, "upload") self._update_speed_metrics() self._dao.update_remote_state(doc_pair, fs_item_info, remote_parent_path=remote_parent_path, versionned=False) log.trace("Put remote_ref in %s", remote_ref) try: local_client.set_remote_id(doc_pair.local_path, remote_ref) except (NotFound, IOError, OSError): new_pair = self._dao.get_state_from_id(doc_pair.id) # File has been moved during creation if new_pair.local_path != doc_pair.local_path: local_client.set_remote_id(new_pair.local_path, remote_ref) self._synchronize_locally_moved(new_pair, local_client, remote_client, update=False) return self._synchronize_if_not_remotely_dirty(doc_pair, local_client, remote_client, remote_info=fs_item_info) else: child_type = 'folder' if doc_pair.folderish else 'file' log.warning("Won't synchronize %s '%s' created in" " local folder '%s' since it is readonly", child_type, doc_pair.local_name, parent_pair.local_name) if doc_pair.folderish: doc_pair.remote_can_create_child = False if self._engine.local_rollback(): local_client.delete(doc_pair.local_path) self._dao.remove_state(doc_pair) else: log.debug("Set pair unsynchronized: %r", doc_pair) self._dao.unsynchronize_state(doc_pair, 'READONLY') self._engine.newReadonly.emit(doc_pair.local_name, parent_pair.remote_name) self._handle_unsynchronized(local_client, doc_pair)
def handle_watchdog_event(self, evt): log.trace("watchdog event: %r", evt) self._metrics['last_event'] = current_milli_time() self._action = Action("Handle watchdog event") if evt.event_type == 'moved': log.debug("Handling watchdog event [%s] on %s to %s", evt.event_type, evt.src_path, evt.dest_path) else: log.debug("Handling watchdog event [%s] on %r", evt.event_type, evt.src_path) try: src_path = normalize_event_filename(evt.src_path) rel_path = self.client.get_path(src_path) if len(rel_path) == 0 or rel_path == '/': self.handle_watchdog_root_event(evt) return file_name = os.path.basename(src_path) parent_path = os.path.dirname(src_path) parent_rel_path = self.client.get_path(parent_path) doc_pair = self._dao.get_state_from_local(rel_path) # Dont care about ignored file, unless it is moved if (self.client.is_ignored(parent_rel_path, file_name) and evt.event_type != 'moved'): return if self.client.is_temp_file(file_name): return if doc_pair is not None: if doc_pair.pair_state == 'unsynchronized': log.debug("Ignoring %s as marked unsynchronized", doc_pair.local_path) if (evt.event_type == 'deleted' or evt.event_type == 'moved' and not is_office_temp_file(os.path.basename(evt.dest_path))): log.debug('Removing pair state for deleted or moved event: %r', doc_pair) self._dao.remove_state(doc_pair) return self._handle_watchdog_event_on_known_pair(doc_pair, evt, rel_path) return if evt.event_type == 'deleted': log.debug('Unknown pair deleted: %s', rel_path) return if (evt.event_type == 'moved'): dest_filename = os.path.basename(evt.dest_path) if (self.client.is_ignored(parent_rel_path, dest_filename)): return # Ignore normalization of the filename on the file system # See https://jira.nuxeo.com/browse/NXDRIVE-188 if evt.dest_path == normalize_event_filename(evt.src_path): log.debug('Ignoring move from %r to normalized name: %r', evt.src_path, evt.dest_path) return src_path = normalize_event_filename(evt.dest_path) rel_path = self.client.get_path(src_path) local_info = self.client.get_info(rel_path, raise_if_missing=False) doc_pair = self._dao.get_state_from_local(rel_path) # If the file exists but not the pair if local_info is not None and doc_pair is None: # Check if it is a pair that we loose track of if local_info.remote_ref is not None: doc_pair = self._dao.get_normal_state_from_remote(local_info.remote_ref) if doc_pair is not None and not self.client.exists(doc_pair.local_path): log.debug("Pair re-moved detected for %r", doc_pair) # Can be a move inside a folder that has also moved self._handle_watchdog_event_on_known_pair(doc_pair, evt, rel_path) return rel_parent_path = self.client.get_path(os.path.dirname(src_path)) if rel_parent_path == '': rel_parent_path = '/' self._dao.insert_local_state(local_info, rel_parent_path) # An event can be missed inside a new created folder as # watchdog will put listener after it if local_info.folderish: self.scan_pair(rel_path) doc_pair = self._dao.get_state_from_local(rel_path) self._schedule_win_folder_scan(doc_pair) return # if the pair is modified and not known consider as created if evt.event_type == 'created' or evt.event_type == 'modified': # If doc_pair is not None mean # the creation has been catched by scan # As Windows send a delete / create event for reparent # Ignore .*.nxpart ? ''' for deleted in deleted_files: if deleted.local_digest == digest: # Move detected log.info('Detected a file movement %r', deleted) deleted.update_state('moved', deleted.remote_state) deleted.update_local(self.client.get_info( rel_path)) continue ''' local_info = self.client.get_info(rel_path, raise_if_missing=False) if local_info is None: log.trace("Event on a disappeared file: %r %s %s", evt, rel_path, file_name) return # This might be a move but Windows don't emit this event... if local_info.remote_ref is not None: moved = False from_pair = self._dao.get_normal_state_from_remote(local_info.remote_ref) if from_pair is not None: if from_pair.processor > 0 or from_pair.local_path == rel_path: # First condition is in process # Second condition is a race condition log.trace("Ignore creation or modification as the coming pair is being processed: %r", rel_path) return # If it is not at the origin anymore, magic teleportation, only on Windows ? if not self.client.exists(from_pair.local_path): log.debug('Move from %r to %r', from_pair.local_path, rel_path) from_pair.local_state = 'moved' self._dao.update_local_state(from_pair, self.client.get_info(rel_path)) moved = True else: # possible move-then-copy case, NXDRIVE-471 doc_pair_full_path = self.client._abspath(rel_path) doc_pair_creation_time = self.get_creation_time(doc_pair_full_path) from_pair_full_path = self.client._abspath(from_pair.local_path) from_pair_creation_time = self.get_creation_time(from_pair_full_path) log.trace('doc_pair_full_path=%s, doc_pair_creation_time=%s, from_pair_full_path=%s, version=%d', doc_pair_full_path, doc_pair_creation_time, from_pair_full_path, from_pair.version) # If file at the original location is newer, # it is moved to the new location earlier then copied back (what else can it be?) if (not from_pair_creation_time <= doc_pair_creation_time) and evt.event_type == 'created': log.trace("Found moved file: from_pair: %f doc_pair:%f for %s", from_pair_creation_time, doc_pair_creation_time, doc_pair_full_path) log.trace("Creation time are: from: %f | new: %f : boolean: %d", from_pair_creation_time, doc_pair_creation_time,(not from_pair_creation_time < doc_pair_creation_time) ) from_pair.local_state = 'moved' self._dao.update_local_state(from_pair, self.client.get_info(rel_path)) self._dao.insert_local_state(self.client.get_info(from_pair.local_path), os.path.dirname(from_pair.local_path)) self.client.remove_remote_id(from_pair.local_path) moved = True if self._windows: self._win_lock.acquire() try: if local_info.remote_ref in self._delete_events: log.debug('Found creation in delete event, handle move instead') # Should be cleaned if not moved: doc_pair = self._delete_events[local_info.remote_ref][1] doc_pair.local_state = 'moved' self._dao.update_local_state(doc_pair, self.client.get_info(rel_path)) del self._delete_events[local_info.remote_ref] return finally: self._win_lock.release() if from_pair is not None: if moved: # Stop the process here return log.debug('Copy paste from %r to %r', from_pair.local_path, rel_path) self._dao.insert_local_state(local_info, parent_rel_path) # An event can be missed inside a new created folder as # watchdog will put listener after it if local_info.folderish: self.scan_pair(rel_path) doc_pair = self._dao.get_state_from_local(rel_path) self._schedule_win_folder_scan(doc_pair) return log.debug('Unhandled case: %r %s %s', evt, rel_path, file_name) except Exception: log.error('Watchdog exception', exc_info=True) finally: self._end_action()
def test_office_temp_file(self): # Normal self.assertEqual(is_office_temp_file("plop"), False) # Powerpoint temp file self.assertEqual(is_office_temp_file("ppt.tmp"), False) self.assertEqual(is_office_temp_file("pptED23.tmp"), True) self.assertEqual(is_office_temp_file("pptzDER.tmp"), False) self.assertEqual(is_office_temp_file("ppt1DER.tmp"), False) self.assertEqual(is_office_temp_file("ppt21AD.tmp"), True) # Powerpoint temp file by Office 365 / 2013 self.assertEqual(is_office_temp_file("ppt1.tmp"), True) self.assertEqual(is_office_temp_file("ppt23F.tmp"), True) # Office temp file 2007+ self.assertEqual(is_office_temp_file("A239FDCA"), True) self.assertEqual(is_office_temp_file("A2Z9FDCA"), False) self.assertEqual(is_office_temp_file("12345678"), True) self.assertEqual(is_office_temp_file("9ABCDEF0"), True) self.assertEqual(is_office_temp_file("A239FDZA"), False) self.assertEqual(is_office_temp_file("A239FDCA.tmp"), True) self.assertEqual(is_office_temp_file("A2Z9FDCA.tmp"), False) self.assertEqual(is_office_temp_file("12345678.tmp"), True) self.assertEqual(is_office_temp_file("9ABCDEF0.tmp"), True) self.assertEqual(is_office_temp_file("A239FDZA.tmp"), False) self.assertEqual(is_office_temp_file("A2D9FDCA1"), False) self.assertEqual(is_office_temp_file("A2D9FDCA1.tmp"), False) self.assertEqual(is_office_temp_file("9ABCDEF0.tep"), False) # Office temp file 2013 self.assertEqual(is_office_temp_file("C199633.tmp"), True) self.assertEqual(is_office_temp_file("BCD574.tmp"), True) # Office 97 self.assertEqual(is_office_temp_file("~A2D9FDCA1.tmp"), True) self.assertEqual(is_office_temp_file("~Whatever is here.tmp"), True) self.assertEqual(is_office_temp_file("~A2D9FDCA1.tm"), False) self.assertEqual(is_office_temp_file("Whatever is here.tmp"), False)
def _handle_watchdog_event_on_known_pair(self, doc_pair, evt, rel_path): log.trace("watchdog event %r on known pair: %r", evt, doc_pair) if (evt.event_type == 'moved'): # Ignore move to Office tmp file dest_filename = os.path.basename(evt.dest_path) if is_office_temp_file(dest_filename): log.debug('Ignoring Office tmp file: %r', evt.dest_path) return # Ignore normalization of the filename on the file system # See https://jira.nuxeo.com/browse/NXDRIVE-188 if evt.dest_path == normalize_event_filename(evt.src_path): log.debug('Ignoring move from %r to normalized name: %r', evt.src_path, evt.dest_path) return src_path = normalize_event_filename(evt.dest_path) rel_path = self.client.get_path(src_path) # Office weird replacement handling if is_office_file(dest_filename): pair = self._dao.get_state_from_local(rel_path) remote_ref = self.client.get_remote_id(rel_path) if pair is not None and pair.remote_ref == remote_ref: local_info = self.client.get_info(rel_path, raise_if_missing=False) if local_info is not None: digest = local_info.get_digest() # Drop event if digest hasn't changed, can be the case if only file permissions have been updated if not doc_pair.folderish and pair.local_digest == digest: log.trace('Dropping watchdog event [%s] as digest has not changed for %s', evt.event_type, rel_path) # If pair are the same dont drop it # It can happen in case of server rename on a document if doc_pair.id != pair.id: self._dao.remove_state(doc_pair) return pair.local_digest = digest pair.local_state = 'modified' self._dao.update_local_state(pair, local_info) self._dao.remove_state(doc_pair) log.debug("Office substitution file: remove pair(%r) mark(%r) as modified", doc_pair, pair) return local_info = self.client.get_info(rel_path, raise_if_missing=False) if local_info is not None: if is_text_edit_tmp_file(local_info.name): log.debug('Ignoring move to TextEdit tmp file %r for %r', local_info.name, doc_pair) return old_local_path = None rel_parent_path = self.client.get_path(os.path.dirname(src_path)) if rel_parent_path == '': rel_parent_path = '/' # Ignore inner movement remote_parent_ref = self.client.get_remote_id(rel_parent_path) if (doc_pair.remote_name == local_info.name and doc_pair.remote_parent_ref == remote_parent_ref): # The pair was moved but it has been canceled manually log.debug("The pair was moved but it has been canceled manually," " setting state to 'synchronized': %r", doc_pair) doc_pair.local_state = 'synchronized' elif not (local_info.name == doc_pair.local_name and doc_pair.remote_parent_ref == remote_parent_ref): log.debug("Detect move for %r (%r)", local_info.name, doc_pair) if doc_pair.local_state != 'created': doc_pair.local_state = 'moved' old_local_path = doc_pair.local_path self._dao.update_local_state(doc_pair, local_info, versionned=True) self._dao.update_local_state(doc_pair, local_info, versionned=False) if self._windows and old_local_path is not None and self._windows_folder_scan_delay > 0: self._win_lock.acquire() try: if old_local_path in self._folder_scan_events: log.debug('Update queue of folders to scan: move from %r to %r', old_local_path, rel_path) del self._folder_scan_events[old_local_path] self._folder_scan_events[rel_path] = ( mktime(local_info.last_modification_time.timetuple()), doc_pair) finally: self._win_lock.release() return acquired_pair = None try: acquired_pair = self._dao.acquire_state(self._thread_id, doc_pair.id) if acquired_pair is not None: self._handle_watchdog_event_on_known_acquired_pair(acquired_pair, evt, rel_path) else: log.trace("Don't update as in process %r", doc_pair) except sqlite3.OperationalError: log.exception("Don't update as cannot acquire %r", doc_pair) finally: self._dao.release_state(self._thread_id) if acquired_pair is not None: refreshed_pair = self._dao.get_state_from_id(acquired_pair.id) if refreshed_pair is not None: log.trace("Re-queuing acquired, released and refreshed state %r", refreshed_pair) self._dao._queue_pair_state(refreshed_pair.id, refreshed_pair.folderish, refreshed_pair.pair_state, pair=refreshed_pair)