def parse_section(section): sl = section.split() st = sl.pop(0) sl = [unescape(u) for u in sl] if st == 'peersrx': sl = [re.compile(u) for u in sl] return sl
def parse_section(section): """retrieve peers sequence encoded by section name (as urls or regexen, depending on section type) """ sl = section.split() st = sl.pop(0) sl = [unescape(u) for u in sl] if st == 'peersrx': sl = [re.compile(u) for u in sl] return sl
def _checkpt_param(cls, chkpt, prm, xtimish=True): """use config backend to lookup a parameter belonging to checkpoint @chkpt""" cprm = getattr(gconf, "checkpoint_" + prm, None) if not cprm: return chkpt_mapped, val = cprm.split(":", 1) if unescape(chkpt_mapped) != chkpt: return if xtimish: val = cls.deserialize_xtime(val) return val
def _checkpt_param(cls, chkpt, prm, xtimish=True): """use config backend to lookup a parameter belonging to checkpoint @chkpt""" cprm = getattr(gconf, 'checkpoint_' + prm, None) if not cprm: return chkpt_mapped, val = cprm.split(':', 1) if unescape(chkpt_mapped) != chkpt: return if xtimish: val = cls.deserialize_xtime(val) return val
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append( edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append( edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True
def process_change(self, change, done): clist = [] entries = [] purges = set() links = set() datas = set() pfx = gauxpfx() try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct["op"] = op for k in ed: if k == "stat": st = ed[k] dst = dct["stat"] = {} dst["uid"] = st.st_uid dst["gid"] = st.st_gid dst["mode"] = st.st_mode else: dct[k] = ed[k] return dct for e in clist: e = e.strip() et = e[self.IDX_START : self.IDX_END] ec = e[self.IDX_END :].split(" ") if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ["UNLINK", "RMDIR"]: entries.append(edct(ty, gfid=gfid, entry=en)) purges.update([os.path.join(pfx, gfid)]) continue if not ty == "RENAME": go = os.path.join(pfx, gfid) st = self.lstat(go) if isinstance(st, int): logging.debug("file %s got purged in the interim" % go) continue if ty in ["CREATE", "MKDIR", "MKNOD"]: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == "LINK": entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) links.update([os.path.join(pfx, gfid)]) elif ty == "SYMLINK": entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=os.readlink(en))) elif ty == "RENAME": e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2)) else: pass elif et in self.TYPE_GFID: da = os.path.join(pfx, ec[0]) st = self.lstat(da) if isinstance(st, int): logging.debug("file %s got purged in the interim" % da) continue datas.update([da]) logging.debug("entries: %s" % repr(entries)) # sync namespace if entries: self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas - (purges - links)): if done: self.master.server.changelog_done(change) return True
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True