def stat(self, path, full_path=False, with_hash=False): """ Format filesystem stat in the same way as it's returned by server for remote stats. :param path:local path (starting from basepath) :param full_path:optionaly pass full path :param with_hash:add file content hash in the result :return:dict() an fstat lile result: { 'size':1231 'mtime':1214365 'mode':0255 'inode':3255 'hash':'1F3R4234RZEdgFGD' } """ if not path: return False if not full_path: path = self.basepath + path if not os.path.exists(path): return False else: stat_result = os.stat(path) s = dict() s['size'] = stat_result.st_size s['mtime'] = stat_result.st_mtime s['mode'] = stat_result.st_mode s['inode'] = stat_result.st_ino if with_hash: if stat.S_ISREG(stat_result.st_mode): s['hash'] = hashfile(open(path, 'rb'), hashlib.md5()) elif stat.S_ISDIR(stat_result.st_mode): s['hash'] = 'directory' return s
def hashrow(self): """ :return: a dict containing SQL code and values to be executed later """ base = self.brow[0] row = self.brow[1] #logging.info(brow) path = row[1] #unicodedata.normalize('NFC', r[1]) if os.path.exists(base + path): with open(base + path, 'rb') as fd: t = (os.path.getsize(base + path), hashfile(fd, hashlib.md5()), os.path.getmtime(base + path), pickle.dumps(os.stat(base + path)), row[1]) return { "sql": "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND md5='HASHME'", "values": t } else: # delete the index of non existing files return { "sql": "DELETE FROM ajxp_index WHERE node_path=? AND md5='HASHME'", "values": base + path }
def get_node_md5(self, node_path): conn = sqlite3.connect(self.db) conn.row_factory = sqlite3.Row c = conn.cursor() for row in c.execute("SELECT md5 FROM ajxp_index WHERE node_path LIKE ?", (node_path,)): md5 = row['md5'] c.close() return md5 c.close() return hashfile(self.base + node_path, hashlib.md5())
def get_node_md5(self, node_path): node_path = self.normpath(node_path) conn = sqlite3.connect(self.db) conn.row_factory = sqlite3.Row c = conn.cursor() for row in c.execute("SELECT md5 FROM ajxp_index WHERE node_path LIKE ?", (node_path,)): md5 = row['md5'] c.close() return md5 c.close() return hashfile(self.base + node_path, hashlib.md5())
def get_node_md5(self, node_path): """ WARNING NOT USED """ node_path = self.normpath(node_path) with ClosingCursor(self.db, timeout=self.timeout) as c: for row in c.execute("SELECT md5 FROM ajxp_index WHERE node_path LIKE ?", (node_path,)): md5 = row['md5'] c.close() return md5 return hashfile(self.base + node_path, hashlib.md5())
def updateOrInsert(self, src_path, is_directory, skip_nomodif, force_insert = False): search_key = self.remove_prefix(src_path) if is_directory: hash_key = 'directory' else: hash_key = hashfile(open(src_path, 'rb'), hashlib.md5()) node_id = False conn = sqlite3.connect(self.db) if not force_insert: conn.row_factory = sqlite3.Row c = conn.cursor() node_id = None for row in c.execute("SELECT node_id FROM ajxp_index WHERE node_path=?", (search_key,)): node_id = row['node_id'] break c.close() if not node_id: t = ( search_key, os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)) ) logging.debug("Real insert %s" % search_key) conn.execute("INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) VALUES (?,?,?,?,?)", t) else: if skip_nomodif: bytesize = os.path.getsize(src_path) t = ( bytesize, hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key, bytesize, hash_key ) logging.debug("Real update %s if not the same" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND bytesize!=? AND md5!=?", t) else: t = ( os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key ) logging.debug("Real update %s" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=?", t) conn.commit() conn.close()
def get_node_md5(self, node_path): """ WARNING NOT USED """ node_path = self.normpath(node_path) with ClosingCursor(self.db, timeout=self.timeout) as c: for row in c.execute( "SELECT md5 FROM ajxp_index WHERE node_path LIKE ?", (node_path, )): md5 = row['md5'] c.close() return md5 return hashfile(self.base + node_path, hashlib.md5())
def get_node_md5(self, node_path): """ WARNING NOT USED """ node_path = self.normpath(node_path) conn = sqlite3.connect(self.db, timeout=self.timeout) conn.row_factory = sqlite3.Row c = conn.cursor() for row in c.execute("SELECT md5 FROM ajxp_index WHERE node_path LIKE ?", (node_path,)): md5 = row['md5'] c.close() return md5 c.close() return hashfile(self.base + node_path, hashlib.md5())
def stat(self, path, full_path=False, with_hash=False): if not path: return False if not full_path: path = self.basepath + path if not os.path.exists(path): return False else: stat_result = os.stat(path) s = dict() s['size'] = stat_result.st_size s['mtime'] = stat_result.st_mtime s['mode'] = stat_result.st_mode s['inode'] = stat_result.st_ino if with_hash: if stat.S_ISREG(stat_result.st_mode): s['hash'] = hashfile(open(path, 'rb'), hashlib.md5()) elif stat.S_ISDIR(stat_result.st_mode): s['hash'] = 'directory' return s
def hashrow(self): """ :return: a dict containing SQL code and values to be executed later """ base = self.brow[0] row = self.brow[1] #logging.info(brow) path = row[1] #unicodedata.normalize('NFC', r[1]) if os.path.exists(base + path): with open(base + path, 'rb') as fd: t = ( os.path.getsize(base + path), hashfile(fd, hashlib.md5()), os.path.getmtime(base + path), pickle.dumps(os.stat(base + path)), row[1] ) return {"sql": "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND md5='HASHME'", "values": t} else: # delete the index of non existing files return {"sql": "DELETE FROM ajxp_index WHERE node_path=? AND md5='HASHME'", "values": base+path}
def updateOrInsert(self, src_path, is_directory, skip_nomodif, force_insert=False): search_key = self.remove_prefix(src_path) hash_key = 'directory' if is_directory: hash_key = 'directory' else: if os.path.exists(src_path): try: hash_key = hashfile(open(src_path, 'rb'), hashlib.md5()) except Exception as e: return node_id = False if self.prevent_atomic_commit: conn = self.transaction_conn else: conn = sqlite3.connect(self.db) if not force_insert: conn.row_factory = sqlite3.Row c = conn.cursor() node_id = None for row in c.execute( "SELECT node_id FROM ajxp_index WHERE node_path=?", (search_key, )): node_id = row['node_id'] break c.close() if not node_id: t = (search_key, os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path))) logging.debug("Real insert %s" % search_key) c = conn.cursor() del_element = None existing_id = None if hash_key == 'directory': existing_id = self.find_windows_folder_id(src_path) if existing_id: del_element = self.find_deleted_element( c, self.last_seq_id, os.path.basename(src_path), node_id=existing_id) else: del_element = self.find_deleted_element( c, self.last_seq_id, os.path.basename(src_path), md5=hash_key) if del_element: logging.info("THIS IS CAN BE A MOVE OR WINDOWS UPDATE " + src_path) t = (del_element['node_id'], del_element['source'], os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path))) c.execute( "INSERT INTO ajxp_index (node_id,node_path,bytesize,md5,mtime,stat_result) " "VALUES (?,?,?,?,?,?)", t) c.execute( "UPDATE ajxp_index SET node_path=? WHERE node_path=?", (search_key, del_element['source'])) else: if hash_key == 'directory' and existing_id: self.clear_windows_folder_id(src_path) c.execute( "INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) VALUES (?,?,?,?,?)", t) if hash_key == 'directory': self.set_windows_folder_id(c.lastrowid, src_path) else: if skip_nomodif: bytesize = os.path.getsize(src_path) t = (bytesize, hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key, bytesize, hash_key) logging.debug("Real update %s if not the same" % search_key) conn.execute( "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND bytesize!=? AND md5!=?", t) else: t = (os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key) logging.debug("Real update %s" % search_key) conn.execute( "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=?", t) if not self.prevent_atomic_commit: conn.commit() conn.close()
def updateOrInsert(self, src_path, is_directory, skip_nomodif, force_insert = False): search_key = self.remove_prefix(src_path) hash_key = 'directory' if is_directory: hash_key = 'directory' else: if os.path.exists(src_path): try: hash_key = hashfile(open(src_path, 'rb'), hashlib.md5()) except Exception as e: return node_id = False if self.prevent_atomic_commit: conn = self.transaction_conn else: conn = sqlite3.connect(self.db) if not force_insert: conn.row_factory = sqlite3.Row c = conn.cursor() node_id = None for row in c.execute("SELECT node_id FROM ajxp_index WHERE node_path=?", (search_key,)): node_id = row['node_id'] break c.close() if not node_id: t = ( search_key, os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)) ) logging.debug("Real insert %s" % search_key) c = conn.cursor() del_element = None existing_id = None if hash_key == 'directory': existing_id = self.find_windows_folder_id(src_path) if existing_id: del_element = self.find_deleted_element(c, self.last_seq_id, os.path.basename(src_path), node_id=existing_id) else: del_element = self.find_deleted_element(c, self.last_seq_id, os.path.basename(src_path), md5=hash_key) if del_element: logging.info("THIS IS CAN BE A MOVE OR WINDOWS UPDATE " + src_path) t = ( del_element['node_id'], del_element['source'], os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)) ) c.execute("INSERT INTO ajxp_index (node_id,node_path,bytesize,md5,mtime,stat_result) " "VALUES (?,?,?,?,?,?)", t) c.execute("UPDATE ajxp_index SET node_path=? WHERE node_path=?", (search_key, del_element['source'])) else: if hash_key == 'directory' and existing_id: self.clear_windows_folder_id(src_path) c.execute("INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) VALUES (?,?,?,?,?)", t) if hash_key == 'directory': self.set_windows_folder_id(c.lastrowid, src_path) else: if skip_nomodif: bytesize = os.path.getsize(src_path) t = ( bytesize, hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key, bytesize, hash_key ) logging.debug("Real update %s if not the same" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND bytesize!=? AND md5!=?", t) else: t = ( os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key ) logging.debug("Real update %s" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=?", t) if not self.prevent_atomic_commit: conn.commit() conn.close()
def updateOrInsert(self, src_path, is_directory, skip_nomodif, force_insert=False): search_key = self.remove_prefix(src_path) size = 0 mtime = 0 try: stat = os.stat(src_path) if is_directory: hash_key = 'directory' else: size = stat.st_size mtime = stat.st_mtime if self.prevent_atomic_commit: hash_key = "HASHME" # Will be hashed when transaction ends else: hash_key = hashfile(open(src_path, 'rb'), hashlib.md5()) except IOError: # Skip the file from processing, It could be a file that is being copied or a open file! logging.debug( 'Skipping file %s, as it is being copied / kept open!' % src_path) return except Exception as e: logging.exception(e) return while True: try: node_id = False if self.prevent_atomic_commit: conn = self.transaction_conn else: conn = sqlite3.connect(self.db, timeout=self.timeout) conn.row_factory = sqlite3.Row if not force_insert: c = conn.cursor() node_id = None for row in c.execute( "SELECT node_id FROM ajxp_index WHERE node_path=?", (search_key, )): node_id = row['node_id'] break c.close() if not node_id: t = (search_key, size, hash_key, mtime, pickle.dumps(stat)) logging.debug("Real insert %s" % search_key) c = conn.cursor() del_element = None existing_id = None if hash_key == 'directory': existing_id = self.find_windows_folder_id(src_path) if existing_id: del_element = self.find_deleted_element( c, self.last_seq_id, os.path.basename(src_path), node_id=existing_id) else: del_element = self.find_deleted_element( c, self.last_seq_id, os.path.basename(src_path), md5=hash_key) if del_element: logging.info( "THIS IS CAN BE A MOVE OR WINDOWS UPDATE " + src_path) t = (del_element['node_id'], del_element['source'], size, hash_key, mtime, pickle.dumps(stat)) c.execute( "INSERT INTO ajxp_index (node_id,node_path,bytesize,md5,mtime,stat_result) " "VALUES (?,?,?,?,?,?)", t) c.execute( "UPDATE ajxp_index SET node_path=? WHERE node_path=?", (search_key, del_element['source'])) else: if hash_key == 'directory' and existing_id: self.clear_windows_folder_id(src_path) c.execute( "INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) VALUES (?,?,?,?,?)", t) if hash_key == 'directory': self.set_windows_folder_id(c.lastrowid, src_path) else: if skip_nomodif: t = (size, hash_key, mtime, pickle.dumps(stat), search_key, hash_key) logging.debug("Real update not the same (size %d)" % size) conn.execute( "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND md5!=?", t) else: t = (size, hash_key, mtime, pickle.dumps(stat), search_key) logging.debug("Real update %s" % search_key) conn.execute( "UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=?", t) if not self.prevent_atomic_commit: conn.commit() conn.close() break except sqlite3.OperationalError: time.sleep(.1) except IOError: return
def updateOrInsert(self, src_path, is_directory, skip_nomodif, force_insert=False): search_key = self.remove_prefix(src_path) hash_key = 'directory' if is_directory: hash_key = 'directory' else: if os.path.exists(src_path): try: if self.prevent_atomic_commit: hash_key = "HASHME" # Will be hashed when transaction ends else: hash_key = hashfile(open(src_path, 'rb'), hashlib.md5()) except IOError: # Skip the file from processing, It could be a file that is being copied or a open file! logging.debug('Skipping file %s, as it is being copied / kept open!' % src_path) return except Exception as e: logging.exception(e) return while True: try: node_id = False if self.prevent_atomic_commit: conn = self.transaction_conn else: conn = sqlite3.connect(self.db, timeout=self.timeout) conn.row_factory = sqlite3.Row if not force_insert: c = conn.cursor() node_id = None for row in c.execute("SELECT node_id FROM ajxp_index WHERE node_path=?", (search_key,)): node_id = row['node_id'] break c.close() if not node_id: t = ( search_key, os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)) ) logging.debug("Real insert %s" % search_key) c = conn.cursor() del_element = None existing_id = None if hash_key == 'directory': existing_id = self.find_windows_folder_id(src_path) if existing_id: del_element = self.find_deleted_element(c, self.last_seq_id, os.path.basename(src_path), node_id=existing_id) else: del_element = self.find_deleted_element(c, self.last_seq_id, os.path.basename(src_path), md5=hash_key) if del_element: logging.info("THIS IS CAN BE A MOVE OR WINDOWS UPDATE " + src_path) t = ( del_element['node_id'], del_element['source'], os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)) ) c.execute("INSERT INTO ajxp_index (node_id,node_path,bytesize,md5,mtime,stat_result) " "VALUES (?,?,?,?,?,?)", t) c.execute("UPDATE ajxp_index SET node_path=? WHERE node_path=?", (search_key, del_element['source'])) else: if hash_key == 'directory' and existing_id: self.clear_windows_folder_id(src_path) c.execute("INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) VALUES (?,?,?,?,?)", t) if hash_key == 'directory': self.set_windows_folder_id(c.lastrowid, src_path) else: if skip_nomodif: bytesize = os.path.getsize(src_path) t = ( bytesize, hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key, hash_key ) logging.debug("Real update %s if not the same" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=? AND md5!=?", t) else: t = ( os.path.getsize(src_path), hash_key, os.path.getmtime(src_path), pickle.dumps(os.stat(src_path)), search_key ) logging.debug("Real update %s" % search_key) conn.execute("UPDATE ajxp_index SET bytesize=?, md5=?, mtime=?, stat_result=? WHERE node_path=?", t) if not self.prevent_atomic_commit: conn.commit() conn.close() break except sqlite3.OperationalError: time.sleep(.1)
def download(self, path, local, callback_dict=None): """ Download the content of a server file to a local file. :param path: node path on the server :param local: local path on filesystem :param callback_dict: a dict() than can be updated by with progress data :return: Server response """ orig = self.stat(path) if not orig: raise PydioSdkException('download', path, _('Original file was not found on server')) url = self.url + '/download' + self.urlencode_normalized((self.remote_folder + path)) local_tmp = local + '.pydio_dl' headers = None write_mode = 'wb' dl = 0 if not os.path.exists(os.path.dirname(local)): os.makedirs(os.path.dirname(local)) elif os.path.exists(local_tmp): # A .pydio_dl already exists, maybe it's a chunk of the original? # Try to get an md5 of the corresponding chunk current_size = os.path.getsize(local_tmp) chunk_local_hash = hashfile(open(local_tmp, 'rb'), hashlib.md5()) chunk_remote_stat = self.stat(path, True, partial_hash=[0, current_size]) if chunk_local_hash == chunk_remote_stat['hash']: headers = {'range':'bytes=%i-%i' % (current_size, chunk_remote_stat['size'])} write_mode = 'a+' dl = current_size if callback_dict: callback_dict['bytes_sent'] = float(current_size) callback_dict['total_bytes_sent'] = float(current_size) callback_dict['total_size'] = float(chunk_remote_stat['size']) callback_dict['transfer_rate'] = 0 dispatcher.send(signal=TRANSFER_CALLBACK_SIGNAL, send=self, change=callback_dict) else: os.unlink(local_tmp) try: with open(local_tmp, write_mode) as fd: start = time.clock() r = self.perform_request(url=url, stream=True, headers=headers) total_length = r.headers.get('content-length') if total_length is None: # no content length header fd.write(r.content) else: previous_done = 0 for chunk in r.iter_content(1024 * 8): if self.interrupt_tasks: raise PydioSdkException("interrupt", path=path, detail=_('Task interrupted by user')) dl += len(chunk) fd.write(chunk) done = int(50 * dl / int(total_length)) if done != previous_done: transfer_rate = dl // (time.clock() - start) logging.debug("\r[%s%s] %s bps" % ('=' * done, ' ' * (50 - done), transfer_rate)) dispatcher.send(signal=TRANSFER_RATE_SIGNAL, send=self, transfer_rate=transfer_rate) if callback_dict: callback_dict['bytes_sent'] = float(len(chunk)) callback_dict['total_bytes_sent'] = float(dl) callback_dict['total_size'] = float(total_length) callback_dict['transfer_rate'] = transfer_rate dispatcher.send(signal=TRANSFER_CALLBACK_SIGNAL, send=self, change=callback_dict) previous_done = done if not os.path.exists(local_tmp): raise PydioSdkException('download', local, _('File not found after download')) else: stat_result = os.stat(local_tmp) if not orig['size'] == stat_result.st_size: os.unlink(local_tmp) raise PydioSdkException('download', path, _('File is not correct after download')) else: is_system_windows = platform.system().lower().startswith('win') if is_system_windows and os.path.exists(local): os.unlink(local) os.rename(local_tmp, local) return True except PydioSdkException as pe: if pe.operation == 'interrupt': raise pe else: if os.path.exists(local_tmp): os.unlink(local_tmp) raise pe except Exception as e: if os.path.exists(local_tmp): os.unlink(local_tmp) raise PydioSdkException('download', path, _('Error while downloading file: %s') % e.message)