def work(self): if self.running and not self.fs.single_node: self.restart = True @inlineCallbacks def do_work(db): inums = [] try: all_done,n_nodes = yield db.DoFn("select min(event),count(*) from node where root=${root} and id != ${node}", root=self.fs.root_id, node=self.fs.node_id) except NoData: pass else: if not n_nodes: return trace('inodeclean',"%d nodes",n_nodes) try: last_syn, = yield db.DoFn("select id from event where node=${node} and typ = 's' and id <= ${evt} order by id desc limit 1", node=self.fs.node_id, evt=all_done) except NoData: pass else: # remove old inode entries trace('inodeclean',"upto %d",last_syn) yield db.DoSelect("select inode from event where node=${node} and typ = 'd' and id < ${id}", id=last_syn, node=self.fs.node_id, _callback=inums.append, _empty=True) yield db.Do("delete from event where node=${node} and id < ${id}", id=last_syn, node=self.fs.node_id, _empty=True) returnValue( inums ) inums = yield self.fs.db(do_work, DB_RETRIES) # more deadlock prevention if inums: trace('inodeclean',"%d inodes",len(inums)) yield self.fs.db(lambda db: db.Do("delete from inode where id in (%s)" % ",".join((str(x) for x in inums)), _empty=True), DB_RETRIES)
def _fclose(self): with self.lock: if not self.file: return trace('fs',"%d: close file", self.inum) self.file.close() self.file = None
def client_disconnected(self, client): try: self._clients.remove(client) except (ValueError, KeyError): pass else: trace("remote", "Disconnected client from %d", self.node_id)
def server_no_echo(self): self.echo_timer = None if "ping" in tracers: trace("ping", "%s: Echo timeout", str(self.node_id) if self.node_id else "?") else: self.disconnect_retry() trace("remote", "%s: Echo timeout", str(self.node_id) if self.node_id else "?")
def cb2(_): self.node.mtime = nowtuple() if self.node.size < end: trace('rw',"%d: end now %d", self.node.inum, end) self.node.size = end self.node.changes.add(offset,end) self.writes += 1 return len(buf)
def server_disconnected(self, server): if self._server is server: self._server = None if self.echo_timer is not None: self.echo_timer.cancel() self.echo_timer = None trace("remote", "Disconnected server to %d", self.node_id) self.queue_retry()
def remote_exec(self, node, name, *a, **k): if node not in self.fs.topology: trace("remote", "NoLink remote %s %s %s %s %s", caller, node, name, repr(a), repr(k)) raise NoLink(node, "remote exec") # TODO: cache calls to stuff like reading from a file # TODO: prevent cycles return self.fs.call_node(node, name, *a, **k)
def _save_done(self,db): if self._saving != db: trace('fs',"%s: save unlock error: %s %s",self,self._saving,db) raise RuntimeError("inode _save releasing") trace('fs',"%s: save unlock: %s",self,db) self._saving = None if self._saveq: self._saveq.pop(0).callback(None)
def chk(res=None): """Test whether all data are here; if so, don't continue""" trace('cache',"%s: Callback %s: check ! %s - %s",self.inum, res, todo,self.available) if res is not None: if notfound[0] is None: notfound[0] = res else: notfound[0] &= res return not (todo - self.available)
def done(r): self.workerDefer = None if self.restart: if self.running: self.trigger(True) else: trace('background',"Stopped %s (shutdown)", self.__class__.__name__) else: trace('background',"Stopped %s", self.__class__.__name__)
def work(self): """Sync root data""" d_inode = self.delta_inode; self.delta_inode = 0 d_dir = self.delta_dir; self.delta_dir = 0 d_block = self.delta_block; self.delta_block = 0 if d_inode or d_block or d_dir: trace('rootup',"sync i%d b%d d%d", d_inode, d_block, d_dir) yield self.fs.db(lambda db: db.Do("update root set nfiles=nfiles+${inodes}, nblocks=nblocks+${blocks}, ndirs=ndirs+${dirs}", root=self.fs.root_id, inodes=d_inode, blocks=d_block, dirs=d_dir), DB_RETRIES) returnValue( None )
def startService(self): """Startup. Part of IService.""" trace('background',"StartService %s",self.__class__.__name__) super(BackgroundJob,self).startService() if self.restart and not self.workerCall: if self.restart > 1: self.run() else: self.trigger()
def work(self): nodes = set() if self.running: trace('nodecollect',"Start collecting") self.restart = True else: trace('nodecollect',"Not collecting") return # don't need this when shutting down @inlineCallbacks def do_work(db): yield db.DoSelect("select id from node where id != ${node} and root = ${root}",root=self.fs.root_id, node=self.fs.node_id, _empty=1, _callback=nodes.add) if not nodes: trace('nodecollect',"No other nodes: shutting down collector") self.fs.single_node = True return self.fs.single_node = False trace('nodecollect',"%d other nodes",len(nodes)) # TODO: create a topology map ## now create a topology map: how do I reach X from here? topo = yield self.get_paths(db) topo = next_hops(topo, self.fs.node_id) trace('nodecollect',"topology %s",repr(topo)) returnValue( topo ) topo = yield self.fs.db(do_work, DB_RETRIES) # drop obsolete nodes for k in self.fs.remote.keys(): if k not in nodes: trace('nodecollect',"drop node %s",k) del self.fs.remote[k] #self.fs.topology = topo self.fs.topology,self.fs.neighbors = topo # add new nodes self.fs.missing_neighbors = set() for k in nodes: if k not in self.fs.remote: trace('nodecollect',"add node %s",k) d = self.fs.remote[k].connect_retry() # yes, this works, .remote auto-extends def pr(r): r.trap(NoLink) trace('error',"Node %s found, but not connected",k) d.addErrback(pr) def lerr(r): log.err(r,"Problem adding node") d.addErrback(lerr) yield d if k not in self.fs.topology: self.fs.missing_neighbors.add(k) trace('nodecollect',"Done.") self.fs.cleaner.trigger() self.fs.copier.trigger()
def now(self,node,event,db): """Record that this inode's cache state needs to be written immediately.""" trace('cacherecord',"update inode %d now (%d)",node.inum,event) if node.write_event is None or node.write_event < event: node.write_event = event db.call_rolledback(self.note,node) if node in self.caches: self.caches.remove(node) node._save(db,event=event)
def d_size(self,old,new): """A file size changed.""" old = (old+BLOCKSIZE-1)//BLOCKSIZE new = (new+BLOCKSIZE-1)//BLOCKSIZE delta = new-old if not delta: return trace('rootup',"#Size: %d", delta) self.delta_block += delta self.trigger()
def chk(res): lr = len(res) if lr < length: trace('error',"%d: read %d @ %d: only got %d", self.inum, length,offset,lr) self.available.delete(offset+lr,offset+length) self.known.delete(offset+lr,offset+length) # TODO: it's not necessarily gone, we just don't know where. # Thus, replace with "unknown" instead. Unfortunately we don't have that. Yet. self.fs.changer.note(self) return res
def note(self,node,event=None): """Record that this inode's cache state needs to be written.""" if event: trace('cacherecord',"update inode %d (%d)",node.inum,event) if node.write_event is None or node.write_event < event: node.write_event = event else: trace('cacherecord',"update inode %d",node.inum) self.caches.add(node) self.trigger()
def server_connected(self, server): if self._server is not None and self._server is not server: self._server.disconnect() trace("remote", "Connected server to %d", self.node_id) self._server = server self.retry_timeout = INITIAL_RETRY if self.echo_timer is not None: self.echo_timer.cancel() self.echo_timer = reactor.callLater(ECHO_TIMER, self.server_echo) self.fs.copier.trigger()
def remote_data(self, offset, data): """\ Data arrives. """ trace('cache',"%s: recv %d @ %d: known %s", self.inum,len(data),offset, self.known) if not self.inum: return if self.file_closer: self.file_closer.reset(5) yield self.write(offset,data) yield self.has(offset,offset+len(data)) trace('cache',"%s: done; known %s",self.inum,self.known)
def do_work(db): if not d: s = yield db.Do("insert into `event`(`inode`,`node`,`typ`,`range`) values(${inode},${node},${event},${data})", inode=self.fs.root_inum,node=self.fs.node_id,data=None,event='s' ) trace('eventrecord',"write SYN %d",s) else: for event,inode,data in d: if not inode.inum: continue s = yield db.Do("insert into `event`(`inode`,`node`,`typ`,`range`) values(${inode},${node},${event},${data})", inode=inode.inum,node=self.fs.node_id,data=data,event=event ) inode['event'] = s trace('eventrecord',"wrote %d records",len(d)) self.restart = True # unconditional. That's OK, we want a SYN afterwards.
def trim(self,end, do_file=True): r = Range() if end > 0: r.add(0,end) trace('fs' if do_file else 'cache',"%s: trim to %d",self.inum,end) self.known &= r self.available &= r self.fs.changer.note(self) if do_file: yield self.have_file("trim") try: yield deferToThread(self._trim,0) finally: self.timeout_file()
def _have_file(self, reason): with self.lock: if self.file: return False ipath=self._file_path() try: self.file = open(ipath,"r+") trace('fs',"%d: open file %s for %s", self.inum,ipath,reason) except EnvironmentError as e: if e.errno != errno.ENOENT: raise self.file = open(ipath,"w+") trace('fs',"%d: open file %s for %s (new)", self.inum,ipath,reason) return True
def do_work2(db): queue = DeferredQueue() defs = [] nworkers = len(entries)//5+1 if nworkers > self.nworkers: nworkers = self.nworkers for i in range(nworkers): d = self.fetch(db,queue) d.addErrback(log.err,"fetch()") defs.append(d) workers = set() for id,inum,typ in entries: if not self.running: break trace('copyrun',"%d: %s",inum,typ) self.last_entry = id if typ == 'd': def dt(inum): path = build_path(self.fs.store,inum, create=False) try: os.unlink(path) except EnvironmentError as e: if e.errno != errno.ENOENT: raise yield deferToThread(dt,inum) else: inode = SqlInode(self.fs,inum) yield inode._load(db) if typ == 'f': if inum in workers: trace('copyrun',"%d: in workers",inum,typ) continue workers.add(inum) queue.put((id,inode)) elif typ == 't': if inode.cache: yield inode.cache.trim(inode.size) else: raise RuntimeError("Typ '%s' not found (inode %d)" % (typ,inum)) continue for i in range(nworkers): queue.put(None) yield DeferredList(defs)
def call_node(self, dest, name, *a, **k): if dest in self.missing_neighbors: raise NoLink(dest, "missing") try: node = self.topology[dest] rem = self.remote[node] except KeyError: trace("error", "NoLink! %s %s %s %s", dest, name, repr(a), repr(k)) self.missing_neighbors.add(dest) raise NoLink(dest, "missing 2") if dest == node: return getattr(rem, "do_" + name)(*a, **k) else: return rem.remote_exec(node, name, *a, **k)
def connect_timer(self): self.retry_timer = None if self._server or self._connector or self.node_id is None: if self.node_id: trace("remote", "connect_timer: inprogress to node %d", self.node_id) return d = self.connect() def grab_nolink(r): r.trap(NoLink) trace("remote", "connect to node %d failed: %s", self.node_id, r.getErrorMessage()) self.queue_retry() d.addErrback(grab_nolink) d.addErrback(lambda r: log.err(r, "Connection timer"))
def connect(self): """\ Try to connect to this node's remote side. """ assert self.node_id is not None if self._server: return # already done if self._connector: # in progress: wait for it trace("remote", "Chain connect to node %d", self.node_id) yield triggeredDefer(self._connector) return if self.node_id not in self.fs.topology: trace("remote", "Not connecting to node %d: no topo", self.node_id) return if self.fs.topology[self.node_id] != self.node_id: trace("remote", "Not connecting to node %d: via %s", self.node_id, self.fs.topology[self.node_id]) return trace("remote", "Connecting to node %d", self.node_id) try: with self.fs.db() as db: try: m, = yield db.DoFn( "select method from updater where src=${src} and dest=${dest}", src=self.fs.node_id, dest=self.node_id, ) except NoData: raise NoLink(self.node_id, "No Data") m = __import__("sqlfuse.connect." + m, fromlist=("NodeClient",)) m = m.NodeClient(self) self._connector = m.connect() # Do this to avoid having a single Deferred both in the inline # callback chain and as a possible cancellation point yield triggeredDefer(self._connector) if self._server is None: raise NoLink(self.node_id, "No _server") except NoLink: raise except Exception as e: # no connection if isinstance(e, (err.ConnectionRefusedError, NoConnection)): trace("remote", "No link to %d, retrying", self.node_id) else: f = failure.Failure() log.err(f, "Connecting remote") self.queue_retry() finally: self._connector = None
def _save(self,db): if self.inum is None: return event = self.write_event self.write_event = None db.call_rolledback(setattr,self,'write_event',event) if self.cache_id is None: trace('cacherecord',"new for inode %d: ev=%s range=%s",self.inum, event or "-", str(self.known)) ev1=",event" if event else "" ev2=",${event}" if event else "" self.cache_id = yield db.Do("insert into cache(cached,inode,node"+ev1+") values (${data},${inode},${node}"+ev2+")", inode=self.inum, node=self.fs.node_id, data=self.known.encode(), event=event) else: trace('cacherecord',"old for inode %d: ev=%s range=%s",self.inum, event or "-", str(self.known)) ev=",event=${event}" if event else "" yield db.Do("update cache set cached=${data}"+ev+" where id=${cache}", cache=self.cache_id, data=self.known.encode(), event=event, _empty=True)
def do_work(db): inums = [] try: all_done,n_nodes = yield db.DoFn("select min(event),count(*) from node where root=${root} and id != ${node}", root=self.fs.root_id, node=self.fs.node_id) except NoData: pass else: if not n_nodes: return trace('inodeclean',"%d nodes",n_nodes) try: last_syn, = yield db.DoFn("select id from event where node=${node} and typ = 's' and id <= ${evt} order by id desc limit 1", node=self.fs.node_id, evt=all_done) except NoData: pass else: # remove old inode entries trace('inodeclean',"upto %d",last_syn) yield db.DoSelect("select inode from event where node=${node} and typ = 'd' and id < ${id}", id=last_syn, node=self.fs.node_id, _callback=inums.append, _empty=True) yield db.Do("delete from event where node=${node} and id < ${id}", id=last_syn, node=self.fs.node_id, _empty=True) returnValue( inums )
def server_echo(self): self.echo_timer = reactor.callLater(ECHO_TIMEOUT, self.server_no_echo) def get_echo(r): if self.echo_timer: self.echo_timer.cancel() if "ping" in tracers: trace("ping", "%s: got echo", str(self.node_id) if self.node_id else "?") self.echo_timer = reactor.callLater(ECHO_TIMER, self.server_echo) def get_echo_error(r): if self.echo_timer: self.echo_timer.cancel() self.echo_timer = None log.err(r, "Echo " + (str(self.node_id) if self.node_id else "?")) self.disconnect_retry() if "ping" in tracers: trace("ping", "%s: send echo", str(self.node_id) if self.node_id else "?") d = self.do_echo("ping") d.addCallbacks(get_echo, get_echo_error)
def run(self, dummy=None): """Background loop. Started via timer from trigger().""" self.workerCall = None if self.workerDefer: if not self.restart: self.restart = True return self.restart = False trace('background',"Starting %s", self.__class__.__name__) self.workerDefer = maybeDeferred(self.work) def done(r): self.workerDefer = None if self.restart: if self.running: self.trigger(True) else: trace('background',"Stopped %s (shutdown)", self.__class__.__name__) else: trace('background',"Stopped %s", self.__class__.__name__) self.workerDefer.addErrback(lambda e: log.err(e,"Running "+self.__class__.__name__)) self.workerDefer.addBoth(done)