def create(self, ref, doc_type, name=None, properties=None): name = safe_filename(name) return self.execute("Document.Create", op_input="doc:" + ref, type=doc_type, name=name, properties=properties)
def upload(self, batch_id, file_path, filename=None, file_index=0, mime_type=None): """Upload a file through an Automation batch Uses poster.httpstreaming to stream the upload and not load the whole file in memory. """ FileAction("Upload", file_path, filename) # Request URL if self.is_new_upload_api_available(): url = self.rest_api_url + self.batch_upload_path + '/' + batch_id + '/' + str(file_index) else: # Backward compatibility with old batch upload API url = self.automation_url.encode('ascii') + self.batch_upload_url # HTTP headers if filename is None: filename = os.path.basename(file_path) file_size = os.path.getsize(file_path) if mime_type is None: mime_type = guess_mime_type(filename) # Quote UTF-8 filenames even though JAX-RS does not seem to be able # to retrieve them as per: https://tools.ietf.org/html/rfc5987 filename = safe_filename(filename) quoted_filename = urllib2.quote(filename.encode('utf-8')) headers = { "X-File-Name": quoted_filename, "X-File-Size": file_size, "X-File-Type": mime_type, "Content-Type": "application/octet-stream", "Content-Length": file_size, } if not self.is_new_upload_api_available(): headers.update({"X-Batch-Id": batch_id, "X-File-Idx": file_index}) headers.update(self._get_common_headers()) # Request data input_file = open(file_path, 'rb') # Use file system block size if available for streaming buffer fs_block_size = self.get_upload_buffer(input_file) data = self._read_data(input_file, fs_block_size) # Execute request cookies = self._get_cookies() log.trace("Calling %s with headers %r and cookies %r for file %s", url, headers, cookies, file_path) req = urllib2.Request(url, data, headers) try: resp = self.streaming_opener.open(req, timeout=self.blob_timeout) except Exception as e: log_details = self._log_details(e) if isinstance(log_details, tuple): _, _, _, error = log_details if error and error.startswith("Unable to find batch"): raise InvalidBatchException() raise e finally: input_file.close() self.end_action() return self._read_response(resp, url)
def conflict_resolver(self, row_id, emit=True): pair = self._dao.get_state_from_id(row_id) if not pair: log.trace('Conflict resolver: empty pair, skipping') return try: local_client = self.get_local_client() parent_ref = local_client.get_remote_id(pair.local_parent_path) same_digests = local_client.is_equal_digests(pair.local_digest, pair.remote_digest, pair.local_path) log.warning( 'Conflict resolver: names=%r(%r|%r) digests=%r(%s|%s)' ' parents=%r(%s|%s) [emit=%r]', pair.remote_name == pair.local_name, pair.remote_name, pair.local_name, same_digests, pair.local_digest, pair.remote_digest, pair.remote_parent_ref == parent_ref, pair.remote_parent_ref, parent_ref, emit, ) if (same_digests and pair.remote_parent_ref == parent_ref and safe_filename(pair.remote_name) == pair.local_name): self._dao.synchronize_state(pair) elif emit: # Raise conflict only if not resolvable self.newConflict.emit(row_id) except: log.exception('Conflict resolver error')
def _abspath_deduped(self, parent, orig_name, old_name=None): """Absolute path on the operating system with deduplicated names""" # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components name, suffix = os.path.splitext(name) for _ in range(1000): os_path = self._abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix if self._disable_duplication: raise ValueError("De-duplication is disabled") # the is a duplicated file, try to come with a new name log.trace("dedup: %s exist try next", os_path) m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: short_name, increment = m.groups() name = u"%s__%d" % (short_name, int(increment) + 1) else: name = name + u'__1' log.trace("Deduplicate a name: %s", name, exc_info=True) raise ValueError("Failed to de-duplicate '%s' under '%s'" % (orig_name, parent))
def _find_remote_child_match_or_create(self, parent_pair, child_info): local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref # Try to get the local definition if not linked child_pair = self._dao.get_state_from_local(local_path) # Case of duplication (the file can exists in with a __x) or local rename if child_pair is None and parent_pair is not None and self._local_client.exists(parent_pair.local_path): for child in self._local_client.get_children_info(parent_pair.local_path): if self._local_client.get_remote_id(child.path) == child_info.uid: if '__' in child.name: log.debug('Found a deduplication case: %r on %r', child_info, child.path) else: log.debug('Found a local rename case: %r on %r', child_info, child.path) child_pair = self._dao.get_state_from_local(child.path) break if child_pair is not None: if child_pair.remote_ref is not None and child_pair.remote_ref != child_info.uid: log.debug("Got an existing pair with different id: %r | %r", child_pair, child_info) else: if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests(child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): # Local rename if child_pair.local_path != local_path: child_pair.local_state = 'moved' child_pair.remote_state = 'unknown' local_info = self._local_client.get_info(child_pair.local_path) self._dao.update_local_state(child_pair, local_info) self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) else: self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) # Use version+1 as we just update the remote info synced = self._dao.synchronize_state(child_pair, version=child_pair.version + 1) if not synced: # Try again, might happen that it has been modified locally and remotely child_pair = self._dao.get_state_from_id(child_pair.id) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.synchronize_state(child_pair) child_pair = self._dao.get_state_from_id(child_pair.id) synced = child_pair.pair_state == 'synchronized' # Can be updated in previous call if synced: self._engine.stop_processor_on(child_pair.local_path) # Push the remote_Id log.debug('Set remote ID on %r / %r == %r', child_pair, child_pair.local_path, child_pair.local_path) self._local_client.set_remote_id(child_pair.local_path, child_info.uid) if child_pair.folderish: self._dao.queue_children(child_pair) else: child_pair.remote_state = 'modified' self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) child_pair = self._dao.get_state_from_id(child_pair.id, from_write=True) return child_pair, False row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) child_pair = self._dao.get_state_from_id(row_id, from_write=True) return child_pair, True
def _abspath_deduped(self, parent, orig_name, old_name=None): """Absolute path on the operating system with deduplicated names""" # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components name, suffix = os.path.splitext(name) for _ in range(1000): os_path = self._abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix #raise ValueError("SHOULD NOT DUPLICATE NOW") # the is a duplicated file, try to come with a new name m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: short_name, increment = m.groups() name = u"%s__%d" % (short_name, int(increment) + 1) else: name = name + u'__1' raise ValueError("Failed to de-duplicate '%s' under '%s'" % ( orig_name, parent))
def _find_remote_child_match_or_create(self, parent_pair, child_info): local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref # Try to get the local definition if not linked child_pair = self._dao.get_state_from_local(local_path) if child_pair is not None: # Should compare to xattr remote uid if child_pair.remote_ref is not None: child_pair = None else: self._dao.update_remote_state(child_pair, child_info, remote_parent_path) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests(child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): # Use version+1 as we just update the remote info if not self._dao.synchronize_state(child_pair, version=child_pair.version + 1): # Try again, might happens that it has been modified locally and remotelly child_pair = self._dao.get_state_from_id(child_pair.id) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.synchronize_state(child_pair) # Push the remote_Id self._local_client.set_remote_id(local_path, child_info.uid) if child_pair.folderish: self._dao.queue_children(child_pair) child_pair = self._dao.get_state_from_id(child_pair.id, from_write=True) return child_pair, False row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) child_pair = self._dao.get_state_from_id(row_id, from_write=True) return child_pair, True
def _abspath_deduped(self, parent, orig_name, old_name=None): """Absolute path on the operating system with deduplicated names""" # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components name, suffix = os.path.splitext(name) for _ in range(1000): os_path = self._abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix if self._disable_duplication: raise ValueError("De-duplication is disabled") # the is a duplicated file, try to come with a new name log.trace("dedup: %s exist try next", os_path) m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: short_name, increment = m.groups() name = u"%s__%d" % (short_name, int(increment) + 1) else: name = name + u"__1" log.trace("Deduplicate a name: %s", name, exc_info=True) raise ValueError("Failed to de-duplicate '%s' under '%s'" % (orig_name, parent))
def _abspath_deduped(self, parent, orig_name, old_name=None): # type: (Text, Text, Optional[Text]) -> Tuple[Text, Text] """ Absolute path on the operating system with deduplicated names. """ # Make name safe by removing invalid chars name = safe_filename(orig_name) # Decompose the name into actionable components name, suffix = os.path.splitext(name) for _ in range(1000): os_path = self.abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix if self._disable_duplication: raise DuplicationDisabledError('De-duplication is disabled') # the is a duplicated file, try to come with a new name m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: short_name, increment = m.groups() name = u"%s__%d" % (short_name, int(increment) + 1) else: name = name + u'__1' log.trace('De-duplicate %r to %r', os_path, name) raise DuplicationError('Failed to de-duplicate %r under %r' % (orig_name, parent))
def rename(self, ref, to_name): # type: (Text, Text) -> FileInfo """ Rename a local file or folder. """ new_name = safe_filename(to_name) source_os_path = self.abspath(ref) parent = ref.rsplit(u'/', 1)[0] old_name = ref.rsplit(u'/', 1)[1] parent = u'/' if parent == '' else parent locker = self.unlock_ref(source_os_path, is_abs=True) try: # Check if only case renaming if (old_name != new_name and old_name.lower() == new_name.lower() and not self.is_case_sensitive()): # Must use a temp rename as FS is not case sensitive temp_path = os.path.join(tempfile.gettempdir(), unicode(uuid.uuid4())) os.rename(source_os_path, temp_path) source_os_path = temp_path # Try the os rename part target_os_path = self.abspath(os.path.join(parent, new_name)) else: target_os_path, new_name = self._abspath_deduped( parent, new_name, old_name) if old_name != new_name: os.rename(source_os_path, target_os_path) if sys.platform == 'win32': # See http://msdn.microsoft.com/en-us/library/aa365535%28v=vs.85%29.aspx ctypes.windll.kernel32.SetFileAttributesW( unicode(target_os_path), 128) new_ref = self.get_children_ref(parent, new_name) return self.get_info(new_ref) finally: self.lock_ref(source_os_path, locker & 2, is_abs=True)
def upload(self, batch_id, file_path, filename=None, file_index=0, mime_type=None): """Upload a file through an Automation batch Uses poster.httpstreaming to stream the upload and not load the whole file in memory. """ self.current_action = FileAction("Upload", file_path, filename) # Request URL url = self.automation_url.encode('ascii') + self.batch_upload_url # HTTP headers if filename is None: filename = os.path.basename(file_path) file_size = os.path.getsize(file_path) if mime_type is None: mime_type = guess_mime_type(filename) # Quote UTF-8 filenames even though JAX-RS does not seem to be able # to retrieve them as per: https://tools.ietf.org/html/rfc5987 filename = safe_filename(filename) quoted_filename = urllib2.quote(filename.encode('utf-8')) headers = { "X-Batch-Id": batch_id, "X-File-Idx": file_index, "X-File-Name": quoted_filename, "X-File-Size": file_size, "X-File-Type": mime_type, "Content-Type": "application/octet-stream", "Content-Length": file_size, } headers.update(self._get_common_headers()) # Request data input_file = open(file_path, 'rb') # Use file system block size if available for streaming buffer fs_block_size = self.get_upload_buffer(input_file) log.trace( "Using file system block size" " for the streaming upload buffer: %u bytes", fs_block_size) data = self._read_data(input_file, fs_block_size) # Execute request cookies = self._get_cookies() log.trace("Calling %s with headers %r and cookies %r for file %s", url, headers, cookies, file_path) req = urllib2.Request(url, data, headers) try: resp = self.streaming_opener.open(req, timeout=self.blob_timeout) except Exception as e: self._log_details(e) raise finally: input_file.close() self.end_action() return self._read_response(resp, url)
def create(self, ref, doc_type, name=None, properties=None): name = safe_filename(name) return self.execute( "Document.Create", op_input="doc:" + ref, type=doc_type, name=name, properties=properties)
def upload(self, batch_id, file_path, filename=None, file_index=0): """Upload a file through an Automation batch Uses poster.httpstreaming to stream the upload and not load the whole file in memory. """ # Request URL url = self.automation_url.encode('ascii') + self.batch_upload_url # HTTP headers if filename is None: filename = os.path.basename(file_path) file_size = os.path.getsize(file_path) ctype, _ = mimetypes.guess_type(filename) if ctype: mime_type = ctype else: mime_type = "application/octet-stream" # Quote UTF-8 filenames even though JAX-RS does not seem to be able # to retrieve them as per: https://tools.ietf.org/html/rfc5987 filename = safe_filename(filename) quoted_filename = urllib2.quote(filename.encode('utf-8')) headers = { "X-Batch-Id": batch_id, "X-File-Idx": file_index, "X-File-Name": quoted_filename, "X-File-Size": file_size, "X-File-Type": mime_type, "Content-Type": "application/octet-stream", "Content-Length": file_size, } headers.update(self._get_common_headers()) # Request data input_file = open(file_path, 'rb') # Use file system block size if available for streaming buffer if sys.platform != 'win32': fs_block_size = os.fstatvfs(input_file.fileno()).f_bsize else: fs_block_size = DEFAULT_STREAMING_BUFFER_SIZE log.trace("Using file system block size" " for the streaming upload buffer: %u bytes", fs_block_size) data = self._read_data(input_file, fs_block_size) # Execute request cookies = self._get_cookies() log.trace("Calling %s with headers %r and cookies %r for file %s", url, headers, cookies, file_path) req = urllib2.Request(url, data, headers) try: resp = self.streaming_opener.open(req, timeout=self.blob_timeout) except Exception as e: self._log_details(e) raise finally: input_file.close() return self._read_response(resp, url)
def _abspath_safe(self, parent, orig_name): """Absolute path on the operating system with deduplicated names""" # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components name, suffix = os.path.splitext(name) os_path = self._abspath(os.path.join(parent, name + suffix)) return os_path
def _abspath_deduped(self, parent, orig_name, old_name=None, limit=DEDUPED_LIMITED_COUNT, disable_duplication=None): """ Return the absolute path on the operating system with deduplicated names. :param parent: parent folder's relative path :param orig_name: item (file or folder) name to be deduplicated :param old_name: :param limit: max number of times the orig_name can exist (including original name) :raise LimitExceededError if max number of duplicates (including the original) has been reached. ValueError if deduplication is disabled. :return: full path and name (base + extension) of deduplicated item """ if limit < 2: raise ValueError("limit must be 2 or greater") # HACK override "disable_duplication" option for each invokation # CSPII-10932 field self._disable_duplication can be configured with the # "--disable-duplication" command-line argument __disable_duplication = self._disable_duplication if disable_duplication is not None: __disable_duplication = disable_duplication # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components # CSPII-11017: avoid directories with names like 2.0.16, etc. which are deduped as 2.0__1.16 # (instead of 2.0.16__1) if os.path.isfile(self._abspath(os.path.join(parent, name))): name, suffix = os.path.splitext(name) else: suffix = '' children = os.listdir(self._abspath(parent)) for _ in range(limit): os_path = self._abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix # if the path exists but the child name differs in case, then allow deduplication, # as the server allows multiple docs where name (title) differs in case only, but # the file system does not is_case_match = (name + suffix) in children if __disable_duplication and is_case_match: raise LimitExceededError(None, "De-duplication is disabled") # the is a duplicated file, try to come with a new name log.trace("dedup: %s exist try next", os_path) m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: name, increment = m.groups() else: increment = '0' name = u"%s__%d" % (name, int(increment) + 1) log.trace("Deduplicate a name: %s", name, exc_info=True) name, _ = os.path.splitext(os.path.basename(os_path)) raise LimitExceededError((os_path, name+suffix), "Failed to de-duplicate '%s' under '%s'" % ( orig_name, parent))
def _abspath_safe(self, parent, orig_name): # type: (Text, Text) -> Text """ Absolute path on the operating system with deduplicated names. """ # Make name safe by removing invalid chars name = safe_filename(orig_name) # Decompose the name into actionable components name, suffix = os.path.splitext(name) return self.abspath(os.path.join(parent, name + suffix))
def _find_remote_child_match_or_create(self, parent_pair, child_info): local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref # Try to get the local definition if not linked child_pair = self._dao.get_state_from_local(local_path) # Case of duplication: the file can exists in with a __x if child_pair is None and parent_pair is not None and self._local_client.exists(parent_pair.local_path): for child in self._local_client.get_children_info(parent_pair.local_path): # Skip any file without __ as it cannot be a deduplicate if '__' not in child.name: continue if self._local_client.get_remote_id(child.path) == child_info.uid: log.debug("Found a deduplication case: %r on %s", child_info, child.path) child_pair = self._dao.get_state_from_local(child.path) break if child_pair is not None: if child_pair.remote_ref is not None and child_pair.remote_ref != child_info.uid: log.debug("Got an existing pair with different id: %r | %r", child_pair, child_info) child_pair = None else: if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests(child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) # Use version+1 as we just update the remote info synced = self._dao.synchronize_state(child_pair, version=child_pair.version + 1) if not synced: # Try again, might happen that it has been modified locally and remotely child_pair = self._dao.get_state_from_id(child_pair.id) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.synchronize_state(child_pair) child_pair = self._dao.get_state_from_id(child_pair.id) synced = child_pair.pair_state == 'synchronized' # Can be updated in previous call if synced: self._engine.stop_processor_on(child_pair.local_path) # Push the remote_Id log.debug("set remote id on: %r / %s == %s", child_pair, child_pair.local_path, local_path) self._local_client.set_remote_id(local_path, child_info.uid) if child_pair.folderish: self._dao.queue_children(child_pair) else: child_pair.remote_state = 'modified' self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) child_pair = self._dao.get_state_from_id(child_pair.id, from_write=True) return child_pair, False row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) child_pair = self._dao.get_state_from_id(row_id, from_write=True) return child_pair, True
def _find_remote_child_match_or_create(self, parent_pair, child_info): local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref # Try to get the local definition if not linked child_pair = self._dao.get_state_from_local(local_path) # Case of duplication: the file can exists in with a __x if child_pair is None and parent_pair is not None and self._local_client.exists(parent_pair.local_path): for child in self._local_client.get_children_info(parent_pair.local_path): # Skip any file without __ as it cannot be a deduplicate if '__' not in child.name: continue if self._local_client.get_remote_id(child.path) == child_info.uid: log.debug("Found a deduplication case: %r on %s", child_info, child.path) child_pair = self._dao.get_state_from_local(child.path) break if child_pair is not None: if child_pair.remote_ref is not None: child_pair = None else: if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests(child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) # Use version+1 as we just update the remote info synced = self._dao.synchronize_state(child_pair, version=child_pair.version + 1) if not synced: # Try again, might happen that it has been modified locally and remotely child_pair = self._dao.get_state_from_id(child_pair.id) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm)): self._dao.synchronize_state(child_pair) child_pair = self._dao.get_state_from_id(child_pair.id) synced = child_pair.pair_state == 'synchronized' # Can be updated in previous call if synced: self._engine.stop_processor_on(child_pair.local_path) # Push the remote_Id self._local_client.set_remote_id(local_path, child_info.uid) if child_pair.folderish: self._dao.queue_children(child_pair) else: child_pair.remote_state = 'modified' self._dao.update_remote_state(child_pair, child_info, remote_parent_path=remote_parent_path) child_pair = self._dao.get_state_from_id(child_pair.id, from_write=True) return child_pair, False row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) child_pair = self._dao.get_state_from_id(row_id, from_write=True) return child_pair, True
def _scan_pair(self, remote_path): if remote_path is None: return remote_path = str(remote_path) if self._dao.is_filter(remote_path): # Skip if filter return if remote_path[-1:] == '/': remote_path = remote_path[0:-1] remote_ref = os.path.basename(remote_path) parent_path = os.path.dirname(remote_path) if parent_path == '/': parent_path = '' # If pair is present already try: child_info = self._client.get_info(remote_ref) except NotFound: # The folder has been deleted return doc_pair = self._dao.get_state_from_remote_with_path( remote_ref, parent_path) if doc_pair is not None: self._do_scan_remote(doc_pair, child_info) return log.debug('parent_path: %r\t%r\t%r', parent_path, os.path.basename(parent_path), os.path.dirname(parent_path)) parent_pair = self._dao.get_state_from_remote_with_path( os.path.basename(parent_path), os.path.dirname(parent_path)) log.debug("scan_pair: parent_pair: %r", parent_pair) if parent_pair is None: return local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref if os.path.dirname(child_info.path) == remote_parent_path: row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) doc_pair = self._dao.get_state_from_id(row_id, from_write=True) if child_info.folderish: self._do_scan_remote(doc_pair, child_info) else: log.debug('Remote scan_pair: %r is not available', remote_path) self._scan_remote()
def _abspath_deduped(self, parent, orig_name, old_name=None, limit=DEDUPED_LIMITED_COUNT): """ Return the absolute path on the operating system with deduplicated names. :param parent: parent folder's relative path :param orig_name: item (file or folder) name to be deduplicated :param old_name: :param limit: max number of times the orig_name can exist (including original name) :raise LimitExceededError if max number of duplicates (including the original) has been reached. ValueError if deduplication is disabled. :return: full path and name (base + extension) of deduplicated item """ if limit < 2: raise ValueError("limit must be 2 or greater") # make name safe by removing invalid chars name = safe_filename(orig_name) # decompose the name into actionable components # CSPII-11017: avoid directories with names like 2.0.16, etc. which are deduped as 2.0__1.16 # (instead of 2.0.16__1) if os.path.isfile(self._abspath(os.path.join(parent, name))): name, suffix = os.path.splitext(name) else: suffix = '' for _ in range(limit): os_path = self._abspath(os.path.join(parent, name + suffix)) if old_name == (name + suffix): return os_path, name + suffix if not os.path.exists(os_path): return os_path, name + suffix if self._disable_duplication: raise ValueError("De-duplication is disabled") # the is a duplicated file, try to come with a new name log.trace("dedup: %s exist try next", os_path) m = re.match(DEDUPED_BASENAME_PATTERN, name) if m: short_name, increment = m.groups() name = u"%s__%d" % (short_name, int(increment) + 1) else: name += u'__1' log.trace("Deduplicate a name: %s", name, exc_info=True) name, _ = os.path.splitext(os.path.basename(os_path)) raise LimitExceededError((os_path, name+suffix), "Failed to de-duplicate '%s' under '%s'" % ( orig_name, parent))
def _update_remotely(self, doc_pair, local_client, remote_client, is_renaming): os_path = local_client._abspath(doc_pair.local_path) if is_renaming: new_os_path = os.path.join(os.path.dirname(os_path), safe_filename(doc_pair.remote_name)) log.debug("Replacing local file '%s' by '%s'.", os_path, new_os_path) else: new_os_path = os_path log.debug("Updating content of local file '%s'.", os_path) self.tmp_file = self._download_content(local_client, remote_client, doc_pair, new_os_path) # Delete original file and rename tmp file remote_id = local_client.get_remote_id(doc_pair.local_path) local_client.delete_final(doc_pair.local_path) if remote_id is not None: local_client.set_remote_id(local_client.get_path(self.tmp_file), doc_pair.remote_ref) updated_info = local_client.rename(local_client.get_path(self.tmp_file), doc_pair.remote_name) doc_pair.local_digest = updated_info.get_digest() self._dao.update_last_transfer(doc_pair.id, "download") self._refresh_local_state(doc_pair, updated_info)
def _scan_pair(self, remote_path): if remote_path is None: return remote_path = str(remote_path) if self._dao.is_filter(remote_path): # Skip if filter return if remote_path[-1:] == '/': remote_path = remote_path[0:-1] remote_ref = os.path.basename(remote_path) parent_path = os.path.dirname(remote_path) if parent_path == '/': parent_path = '' # If pair is present already try: child_info = self._client.get_info(remote_ref) except NotFound: # The folder has been deleted return doc_pair = self._dao.get_state_from_remote_with_path(remote_ref, parent_path) if doc_pair is not None: log.debug("Remote scan_pair: %s", doc_pair.local_path) self._do_scan_remote(doc_pair, child_info) log.debug("Remote scan_pair ended: %s", doc_pair.local_path) return log.debug("parent_path: '%s'\t'%s'\t'%s'", parent_path, os.path.basename(parent_path), os.path.dirname(parent_path)) parent_pair = self._dao.get_state_from_remote_with_path(os.path.basename(parent_path), os.path.dirname(parent_path)) log.debug("scan_pair: parent_pair: %r", parent_pair) if parent_pair is None: return local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref if os.path.dirname(child_info.path) == remote_parent_path: row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) doc_pair = self._dao.get_state_from_id(row_id, from_write=True) if child_info.folderish: log.debug("Remote scan_pair: %s", doc_pair.local_path) self._do_scan_remote(doc_pair, child_info) log.debug("Remote scan_pair ended: %s", doc_pair.local_path) else: log.debug("Remote scan_pair: %s is not available, Do full scan", remote_path) self._scan_remote()
def _conflict_resolver(self, row_id, emit=True): try: pair = self._dao.get_state_from_id(row_id) local_client = self.get_local_client() parent_ref = local_client.get_remote_id(pair.local_parent_path) log.warn("conflict_resolver: name: %d digest: %d(%s/%s) parents: %d(%s/%s)", pair.remote_name == pair.local_name, local_client.is_equal_digests(pair.local_digest, pair.remote_digest, pair.local_path), pair.local_digest, pair.remote_digest, pair.remote_parent_ref == parent_ref, pair.remote_parent_ref, parent_ref) if (safe_filename(pair.remote_name) == pair.local_name and local_client.is_equal_digests(pair.local_digest, pair.remote_digest, pair.local_path) and pair.remote_parent_ref == parent_ref): self._dao.synchronize_state(pair) elif emit: # Raise conflict only if not resolvable self.newConflict.emit(row_id) except Exception: pass
def rename(self, ref, to_name): """Rename a local file or folder Return the actualized info object. """ new_name = safe_filename(to_name) source_os_path = self._abspath(ref) parent = ref.rsplit(u'/', 1)[0] old_name = ref.rsplit(u'/', 1)[1] parent = u'/' if parent == '' else parent locker = self.unlock_ref(ref) try: # Check if only case renaming if (old_name != new_name and old_name.lower() == new_name.lower() and not self.is_case_sensitive()): # Must use a temp rename as FS is not case sensitive temp_path = os.tempnam( self._abspath(parent), LocalClient.CASE_RENAME_PREFIX + old_name + '_') if AbstractOSIntegration.is_windows(): import ctypes ctypes.windll.kernel32.SetFileAttributesW( unicode(temp_path), 2) os.rename(source_os_path, temp_path) source_os_path = temp_path # Try the os rename part target_os_path = self._abspath(os.path.join(parent, new_name)) else: target_os_path, new_name = self._abspath_deduped( parent, new_name, old_name) if old_name != new_name: os.rename(source_os_path, target_os_path) if AbstractOSIntegration.is_windows(): import ctypes # See http://msdn.microsoft.com/en-us/library/aa365535%28v=vs.85%29.aspx ctypes.windll.kernel32.SetFileAttributesW( unicode(target_os_path), 128) new_ref = self.get_children_ref(parent, new_name) return self.get_info(new_ref) finally: self.lock_ref(ref, locker & 2)
def rename(self, ref, to_name): """Rename a local file or folder Return the actualized info object. """ new_name = safe_filename(to_name) source_os_path = self._abspath(ref) parent = ref.rsplit(u'/', 1)[0] old_name = ref.rsplit(u'/', 1)[1] parent = u'/' if parent == '' else parent locker = self.unlock_ref(ref) try: # Check if only case renaming if (old_name != new_name and old_name.lower() == new_name.lower() and not self.is_case_sensitive()): # Must use a temp rename as FS is not case sensitive temp_path = os.tempnam(self._abspath(parent), '.ren_' + old_name + '_') if AbstractOSIntegration.is_windows(): import ctypes ctypes.windll.kernel32.SetFileAttributesW( unicode(temp_path), 2) os.rename(source_os_path, temp_path) source_os_path = temp_path # Try the os rename part target_os_path = self._abspath(os.path.join(parent, new_name)) else: target_os_path, new_name = self._abspath_deduped(parent, new_name, old_name) if old_name != new_name: os.rename(source_os_path, target_os_path) if AbstractOSIntegration.is_windows(): import ctypes # See http://msdn.microsoft.com/en-us/library/aa365535%28v=vs.85%29.aspx ctypes.windll.kernel32.SetFileAttributesW( unicode(target_os_path), 128) new_ref = self.get_children_ref(parent, new_name) return self.get_info(new_ref) finally: self.lock_ref(ref, locker & 2)
def execute_with_blob(self, command, blob_content, filename, **params): """Execute an Automation operation with a blob input Beware that the whole content is loaded in memory when calling this. """ self._check_params(command, params) url = self.automation_url.encode('ascii') + command # Create data by hand :( boundary = "====Part=%s=%s===" % (str(time.time()).replace('.', '='), random.randint(0, 1000000000)) headers = { "Accept": "application/json+nxentity, */*", "Content-Type": ('multipart/related;boundary="%s";' 'type="application/json+nxrequest";' 'start="request"') % boundary, } headers.update(self._get_common_headers()) container = MIMEMultipart("related", type="application/json+nxrequest", start="request") d = {'params': params} json_data = json.dumps(d) json_part = MIMEBase("application", "json+nxrequest") json_part.add_header("Content-ID", "request") json_part.set_payload(json_data) container.attach(json_part) ctype, _ = mimetypes.guess_type(filename) if ctype: maintype, subtype = ctype.split('/', 1) else: maintype, subtype = "application", "octet-stream" blob_part = MIMEBase(maintype, subtype) blob_part.add_header("Content-ID", "input") blob_part.add_header("Content-Transfer-Encoding", "binary") # Quote UTF-8 filenames even though JAX-RS does not seem to be able # to retrieve them as per: https://tools.ietf.org/html/rfc5987 filename = safe_filename(filename) quoted_filename = urllib2.quote(filename.encode('utf-8')) content_disposition = ("attachment; filename*=UTF-8''%s" % quoted_filename) blob_part.add_header("Content-Disposition", content_disposition) blob_part.set_payload(blob_content) container.attach(blob_part) data = ( "--%s\r\n" "%s\r\n" "--%s\r\n" "%s\r\n" "--%s--" ) % ( boundary, json_part.as_string(), boundary, blob_part.as_string(), boundary, ) cookies = self._get_cookies() log.trace("Calling %s with headers %r and cookies %r for file %s", url, headers, cookies, filename) req = urllib2.Request(url, data, headers) try: resp = self.opener.open(req, timeout=self.blob_timeout) except Exception as e: self._log_details(e) raise return self._read_response(resp, url)
def _synchronize_remotely_modified(self, doc_pair, local_client, remote_client): self.tmp_file = None try: is_renaming = safe_filename(doc_pair.remote_name) != doc_pair.local_name if (not local_client.is_equal_digests(doc_pair.local_digest, doc_pair.remote_digest, doc_pair.local_path) and doc_pair.local_digest is not None): self._update_remotely(doc_pair, local_client, remote_client, is_renaming) else: # digest agree so this might be a renaming and/or a move, # and no need to transfer additional bytes over the network is_move, new_parent_pair = self._is_remote_move(doc_pair) if remote_client.is_filtered(doc_pair.remote_parent_path): # A move to a filtered parent ( treat it as deletion ) self._synchronize_remotely_deleted(doc_pair, local_client, remote_client) return if not is_move and not is_renaming: log.debug("No local impact of metadata update on" " document '%s'.", doc_pair.remote_name) else: file_or_folder = 'folder' if doc_pair.folderish else 'file' if (is_move or is_renaming) and doc_pair.folderish: self._engine.set_local_folder_lock(doc_pair.local_path) if is_move: # move and potential rename moved_name = doc_pair.remote_name if is_renaming else doc_pair.local_name # NXDRIVE-471: log old_path = doc_pair.local_path new_path = new_parent_pair.local_path + '/' + moved_name if old_path == new_path: log.debug("WRONG GUESS FOR MOVE: %r", doc_pair) self._is_remote_move(doc_pair, debug=True) self._dao.synchronize_state(doc_pair) log.debug("DOC_PAIR(%r): old_path[%d][%r]: %s, new_path[%d][%r]: %s", doc_pair, local_client.exists(old_path), local_client.get_remote_id(old_path), old_path, local_client.exists(new_path), local_client.get_remote_id(new_path), new_path) ## end of add log log.debug("Moving local %s '%s' to '%s'.", file_or_folder, local_client._abspath(doc_pair.local_path), local_client._abspath(new_parent_pair.local_path + '/' + moved_name)) # May need to add a lock for move updated_info = local_client.move(doc_pair.local_path, new_parent_pair.local_path, name=moved_name) new_parent_path = new_parent_pair.remote_parent_path + "/" + new_parent_pair.remote_ref self._dao.update_remote_parent_path(doc_pair, new_parent_path) elif is_renaming: # renaming log.debug("Renaming local %s '%s' to '%s'.", file_or_folder, local_client._abspath(doc_pair.local_path), doc_pair.remote_name) updated_info = local_client.rename( doc_pair.local_path, doc_pair.remote_name) if is_move or is_renaming: # Should call a DAO method new_path = os.path.dirname(updated_info.path) self._dao.update_local_parent_path(doc_pair, os.path.basename(updated_info.path), new_path) self._refresh_local_state(doc_pair, updated_info) self._handle_readonly(local_client, doc_pair) self._dao.synchronize_state(doc_pair) except (IOError, WindowsError) as e: log.warning( "Delaying local update of remotely modified content %r due to" " concurrent file access (probably opened by another" " process).", doc_pair) raise e finally: if self.tmp_file is not None: try: if os.path.exists(self.tmp_file): os.remove(self.tmp_file) except (IOError, WindowsError): pass if doc_pair.folderish: # Release folder lock in any case self._engine.release_folder_lock()
def _find_remote_child_match_or_create(self, parent_pair, child_info): if not parent_pair.local_path: # The parent folder has an empty local_path, # it probably means that it has been put in error as a duplicate # by a processor => ignoring this child. log.debug('Ignoring child %r of a duplicate folder in error %r', child_info, parent_pair) return None, None local_path = path_join(parent_pair.local_path, safe_filename(child_info.name)) remote_parent_path = parent_pair.remote_parent_path + '/' + parent_pair.remote_ref # Try to get the local definition if not linked child_pair = self._dao.get_state_from_local(local_path) # Case of duplication (the file can exists in with a __x) or local rename if child_pair is None and parent_pair is not None and self._local_client.exists( parent_pair.local_path): for child in self._local_client.get_children_info( parent_pair.local_path): if self._local_client.get_remote_id( child.path) == child_info.uid: if '__' in child.name: log.debug('Found a deduplication case: %r on %r', child_info, child.path) else: log.debug('Found a local rename case: %r on %r', child_info, child.path) child_pair = self._dao.get_state_from_local(child.path) break if child_pair is not None: if child_pair.remote_ref is not None and child_pair.remote_ref != child_info.uid: log.debug("Got an existing pair with different id: %r | %r", child_pair, child_info) else: if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info.digest_algorithm )): # Local rename if child_pair.local_path != local_path: child_pair.local_state = 'moved' child_pair.remote_state = 'unknown' local_info = self._local_client.get_info( child_pair.local_path) self._dao.update_local_state(child_pair, local_info) self._dao.update_remote_state( child_pair, child_info, remote_parent_path=remote_parent_path) else: self._dao.update_remote_state( child_pair, child_info, remote_parent_path=remote_parent_path) # Use version+1 as we just update the remote info synced = self._dao.synchronize_state( child_pair, version=child_pair.version + 1) if not synced: # Try again, might happen that it has been modified locally and remotely child_pair = self._dao.get_state_from_id( child_pair.id) if (child_pair.folderish == child_info.folderish and self._local_client.is_equal_digests( child_pair.local_digest, child_info.digest, child_pair.local_path, remote_digest_algorithm=child_info. digest_algorithm)): self._dao.synchronize_state(child_pair) child_pair = self._dao.get_state_from_id( child_pair.id) synced = child_pair.pair_state == 'synchronized' # Can be updated in previous call if synced: self._engine.stop_processor_on( child_pair.local_path) # Push the remote_Id log.debug('Set remote ID on %r / %r == %r', child_pair, child_pair.local_path, child_pair.local_path) self._local_client.set_remote_id( child_pair.local_path, child_info.uid) if child_pair.folderish: self._dao.queue_children(child_pair) else: child_pair.remote_state = 'modified' self._dao.update_remote_state( child_pair, child_info, remote_parent_path=remote_parent_path) child_pair = self._dao.get_state_from_id(child_pair.id, from_write=True) return child_pair, False row_id = self._dao.insert_remote_state(child_info, remote_parent_path, local_path, parent_pair.local_path) child_pair = self._dao.get_state_from_id(row_id, from_write=True) return child_pair, True
def execute_with_blob(self, command, blob_content, filename, **params): self._check_params(command, params) container = MIMEMultipart("related", type="application/json+nxrequest", start="request") d = {'params': params} json_data = json.dumps(d) json_part = MIMEBase("application", "json+nxrequest") json_part.add_header("Content-ID", "request") json_part.set_payload(json_data) container.attach(json_part) ctype, _ = mimetypes.guess_type(filename) if ctype: maintype, subtype = ctype.split('/', 1) else: maintype, subtype = "application", "octet-stream" blob_part = MIMEBase(maintype, subtype) blob_part.add_header("Content-ID", "input") blob_part.add_header("Content-Transfer-Encoding", "binary") # Quote UTF-8 filenames eventhough JAX-RS does not seem to be able # to retrieve them as per: https://tools.ietf.org/html/rfc5987 filename = safe_filename(filename) quoted_filename = urllib.quote(filename.encode('utf-8')) content_disposition = ("attachment; filename*=UTF-8''%s" % quoted_filename) blob_part.add_header("Content-Disposition", content_disposition) blob_part.set_payload(blob_content) container.attach(blob_part) # Create data by hand :( boundary = "====Part=%s=%s===" % (str(time.time()).replace('.', '='), random.randint(0, 1000000000)) headers = { "Accept": "application/json+nxentity, */*", "Content-Type": ('multipart/related;boundary="%s";' 'type="application/json+nxrequest";' 'start="request"') % boundary, } headers.update(self._get_common_headers()) # TODO: find a way to stream the parts without loading them all in # memory as a byte string # The code http://atlee.ca/software/poster/ might provide some # guidance to implement this although it cannot be reused directly # as we need tighter control on the headers of the multipart data = ( "--%s\r\n" "%s\r\n" "--%s\r\n" "%s\r\n" "--%s--" ) % ( boundary, json_part.as_string(), boundary, blob_part.as_string(), boundary, ) url = self.automation_url.encode('ascii') + command cookies = list(self.cookie_jar) if self.cookie_jar is not None else [] log.trace("Calling '%s' with cookies %r for file '%s'", url, cookies, filename) req = urllib2.Request(url, data, headers) try: resp = self.opener.open(req, timeout=self.blob_timeout) except Exception as e: self._log_details(e) raise info = resp.info() s = resp.read() content_type = info.get('content-type', '') cookies = list(self.cookie_jar) if self.cookie_jar is not None else [] if content_type.startswith("application/json"): log.trace("Response for '%s' with cookies %r and json payload: %r", url, cookies, s) return json.loads(s) if s else None else: log.trace("Response for '%s' with cookies %r and content-type: %r", url, cookies, content_type) return s
def _prepare_edit(self, server_url, doc_id, filename, user=None, download_url=None): engine = self._get_engine(server_url, user=user) if engine is None: # TO_REVIEW Display an error message log.debug("No engine found for %s(%s)", server_url, doc_id) return # Get document info remote_client = engine.get_remote_doc_client() # Avoid any link with the engine, remote_doc are not cached so we can do that remote_client.check_suspended = self.stop_client info = remote_client.get_info(doc_id) # Create local structure dir_path = os.path.join(self._folder, doc_id) if not os.path.exists(dir_path): os.mkdir(dir_path) log.trace('Raw filename: %r', filename) filename = safe_filename(urllib2.unquote(filename)) log.trace('Unquoted filename = %r', filename) decoded_filename = force_decode(filename) if decoded_filename is None: decoded_filename = filename else: # Always use utf-8 encoding for xattr filename = decoded_filename.encode('utf-8') log.debug("Editing %r ('nxdriveeditname' xattr: %r)", decoded_filename, filename) file_path = os.path.join(dir_path, decoded_filename) # Download the file url = None if download_url is not None: url = server_url if not url.endswith('/'): url += '/' url += download_url tmp_file = self._download_content(engine, remote_client, info, file_path, url=url) if tmp_file is None: log.debug("Download failed") return # Set the remote_id dir_path = self._local_client.get_path(os.path.dirname(file_path)) self._local_client.set_remote_id(dir_path, doc_id) self._local_client.set_remote_id(dir_path, server_url, "nxdriveedit") if user is not None: self._local_client.set_remote_id(dir_path, user, "nxdriveedituser") if info.digest is not None: self._local_client.set_remote_id(dir_path, info.digest, "nxdriveeditdigest") # Set digest algorithm if not sent by the server digest_algorithm = info.digest_algorithm if digest_algorithm is None: digest_algorithm = guess_digest_algorithm(info.digest) self._local_client.set_remote_id(dir_path, digest_algorithm, "nxdriveeditdigestalgorithm") self._local_client.set_remote_id(dir_path, filename, "nxdriveeditname") # Rename to final filename # Under Windows first need to delete target file if exists, otherwise will get a 183 WindowsError if sys.platform == 'win32' and os.path.exists(file_path): os.unlink(file_path) os.rename(tmp_file, file_path) return file_path
def _update_remote_states(self): """Incrementally update the state of documents from a change summary""" summary = self._get_changes() if summary['hasTooManyChanges']: log.debug("Forced full scan by server") remote_path = '/' self._dao.add_path_to_scan(remote_path) self._dao.update_config('remote_need_full_scan', remote_path) return # Fetch all events and consider the most recent first sorted_changes = sorted(summary['fileSystemChanges'], key=lambda x: x['eventDate'], reverse=True) n_changes = len(sorted_changes) if n_changes > 0: log.debug("%d remote changes detected", n_changes) self._metrics['last_changes'] = n_changes self._metrics['empty_polls'] = 0 self.changesFound.emit(n_changes) else: self._metrics['empty_polls'] = self._metrics['empty_polls'] + 1 self.noChangesFound.emit() # Scan events and update the related pair states refreshed = set() delete_queue = [] for change in sorted_changes: # Check if synchronization thread was suspended # TODO In case of pause or stop: save the last event id self._interact() eventId = change.get('eventId') remote_ref = change['fileSystemItemId'] processed = False for refreshed_ref in refreshed: if refreshed_ref.endswith(remote_ref): processed = True break if processed: # A more recent version was already processed continue fs_item = change.get('fileSystemItem') new_info = self._client.file_to_info(fs_item) if fs_item else None log.trace("Processing event: %r", change) # Possibly fetch multiple doc pairs as the same doc can be synchronized at 2 places, # typically if under a sync root and locally edited. # See https://jira.nuxeo.com/browse/NXDRIVE-125 doc_pairs = self._dao.get_states_from_remote(remote_ref) if not doc_pairs: # Relax constraint on factory name in FileSystemItem id to # match 'deleted' or 'securityUpdated' events. # See https://jira.nuxeo.com/browse/NXDRIVE-167 doc_pair = self._dao.get_first_state_from_partial_remote(remote_ref) if doc_pair is not None: doc_pairs = [doc_pair] updated = False if doc_pairs: for doc_pair in doc_pairs: doc_pair_repr = doc_pair.local_path if doc_pair.local_path is not None else doc_pair.remote_name if eventId == 'deleted': if fs_item is None: log.debug("Push doc_pair '%s' in delete queue", doc_pair_repr) delete_queue.append(doc_pair) else: log.debug("Ignore delete on doc_pair '%s' as a fsItem is attached", doc_pair_repr) # To ignore completely put updated to true updated = True break elif fs_item is None: if eventId == 'securityUpdated': log.debug("Security has been updated for" " doc_pair '%s' denying Read access," " marking it as deleted", doc_pair_repr) self._dao.delete_remote_state(doc_pair) else: log.debug("Unknown event: '%s'", eventId) else: remote_parent_factory = doc_pair.remote_parent_ref.split('#', 1)[0] new_info_parent_factory = new_info.parent_uid.split('#', 1)[0] # Specific cases of a move on a locally edited doc if (eventId == 'documentMoved' and remote_parent_factory == COLLECTION_SYNC_ROOT_FACTORY_NAME): # If moved from a non sync root to a sync root, break to creation case # (updated is False). # If moved from a sync root to a non sync root, break to noop # (updated is True). break elif (eventId == 'documentMoved' and new_info_parent_factory == COLLECTION_SYNC_ROOT_FACTORY_NAME): # If moved from a sync root to a non sync root, delete from local sync root log.debug("Marking doc_pair '%s' as deleted", doc_pair_repr) self._dao.delete_remote_state(doc_pair) else: # Make new_info consistent with actual doc pair parent path for a doc member of a # collection (typically the Locally Edited one) that is also under a sync root. # Indeed, in this case, when adapted as a FileSystemItem, its parent path will be the one # of the sync root because it takes precedence over the collection, # see AbstractDocumentBackedFileSystemItem constructor. consistent_new_info = new_info if remote_parent_factory == COLLECTION_SYNC_ROOT_FACTORY_NAME: new_info_parent_uid = doc_pair.remote_parent_ref new_info_path = (doc_pair.remote_parent_path + '/' + remote_ref) consistent_new_info = RemoteFileInfo(new_info.name, new_info.uid, new_info_parent_uid, new_info_path, new_info.folderish, new_info.last_modification_time, new_info.last_contributor, new_info.digest, new_info.digest_algorithm, new_info.download_url, new_info.can_rename, new_info.can_delete, new_info.can_update, new_info.can_create_child) # Perform a regular document update on a document # that has been updated, renamed or moved log.debug("Refreshing remote state info" " for doc_pair '%s', eventId = %s (force_recursion:%d)", doc_pair_repr, eventId, (eventId == "securityUpdated")) remote_parent_path = doc_pair.remote_parent_path # if (new_info.digest != doc_pair.local_digest or # safe_filename(new_info.name) != doc_pair.local_name # or new_info.parent_uid != doc_pair.remote_parent_ref): # Force remote state update in case of a locked / unlocked event since lock info is not # persisted, so not part of the dirty check lock_update = eventId == 'documentLocked' or eventId == 'documentUnlocked' if doc_pair.remote_state != 'created': if (new_info.digest != doc_pair.remote_digest or \ safe_filename(new_info.name) != doc_pair.remote_name or \ new_info.parent_uid != doc_pair.remote_parent_ref or \ eventId == 'securityUpdated' or lock_update): doc_pair.remote_state = 'modified' remote_parent_path = os.path.dirname(new_info.path) else: remote_parent_path = os.path.dirname(new_info.path) # TODO Add modify local_path and local_parent_path if needed self._dao.update_remote_state(doc_pair, new_info, remote_parent_path=remote_parent_path, force_update=lock_update) if doc_pair.folderish: log.trace("Force scan recursive on %r : %d", doc_pair, (eventId == "securityUpdated")) self._force_remote_scan(doc_pair, consistent_new_info, remote_path=new_info.path, force_recursion=(eventId == "securityUpdated"), moved=(eventId == "documentMoved")) if lock_update: doc_pair = self._dao.get_state_from_id(doc_pair.id) try: self._handle_readonly(self._local_client, doc_pair) except (OSError, IOError) as ex: log.trace("Can't handle readonly for %r (%r)", doc_pair, ex) pass updated = True refreshed.add(remote_ref) if new_info and not updated: # Handle new document creations created = False parent_pairs = self._dao.get_states_from_remote(new_info.parent_uid) for parent_pair in parent_pairs: child_pair, new_pair = (self._find_remote_child_match_or_create(parent_pair, new_info)) if new_pair: log.debug("Marked doc_pair '%s' as remote creation", child_pair.remote_name) if child_pair.folderish and new_pair: log.debug('Remote recursive scan of the content of %s', child_pair.remote_name) remote_path = child_pair.remote_parent_path + "/" + new_info.uid self._force_remote_scan(child_pair, new_info, remote_path) created = True refreshed.add(remote_ref) break if not created: log.debug("Could not match changed document to a bound local folder: %r", new_info) # Sort by path the deletion to only mark parent sorted_deleted = sorted(delete_queue, key=lambda x: x.local_path, reverse=False) delete_processed = [] for delete_pair in sorted_deleted: # Mark as deleted skip = False for processed in delete_processed: path = processed.local_path if path[-1] != "/": path = path + "/" if delete_pair.local_path.startswith(path): skip = True break if skip: continue # Verify the file is really deleted if self._client.get_fs_item(delete_pair.remote_ref) is not None: continue delete_processed.append(delete_pair) log.debug("Marking doc_pair '%r' as deleted", delete_pair) self._dao.delete_remote_state(delete_pair)