Example #1
0
 def parse_section(section):
     sl = section.split()
     st = sl.pop(0)
     sl = [unescape(u) for u in sl]
     if st == 'peersrx':
         sl = [re.compile(u) for u in sl]
     return sl
Example #2
0
 def parse_section(section):
     sl = section.split()
     st = sl.pop(0)
     sl = [unescape(u) for u in sl]
     if st == 'peersrx':
         sl = [re.compile(u) for u in sl]
     return sl
Example #3
0
 def parse_section(section):
     """retrieve peers sequence encoded by section name
        (as urls or regexen, depending on section type)
     """
     sl = section.split()
     st = sl.pop(0)
     sl = [unescape(u) for u in sl]
     if st == 'peersrx':
         sl = [re.compile(u) for u in sl]
     return sl
Example #4
0
 def _checkpt_param(cls, chkpt, prm, xtimish=True):
     """use config backend to lookup a parameter belonging to
        checkpoint @chkpt"""
     cprm = getattr(gconf, "checkpoint_" + prm, None)
     if not cprm:
         return
     chkpt_mapped, val = cprm.split(":", 1)
     if unescape(chkpt_mapped) != chkpt:
         return
     if xtimish:
         val = cls.deserialize_xtime(val)
     return val
Example #5
0
 def _checkpt_param(cls, chkpt, prm, xtimish=True):
     """use config backend to lookup a parameter belonging to
        checkpoint @chkpt"""
     cprm = getattr(gconf, 'checkpoint_' + prm, None)
     if not cprm:
         return
     chkpt_mapped, val = cprm.split(':', 1)
     if unescape(chkpt_mapped) != chkpt:
         return
     if xtimish:
         val = cls.deserialize_xtime(val)
     return val
Example #6
0
    def process_change(self, change, done, retry):
        pfx = gauxpfx()
        clist = []
        entries = []
        datas = set()

        # basic crawl stats: files and bytes
        files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []}
        try:
            f = open(change, "r")
            clist = f.readlines()
            f.close()
        except IOError:
            raise

        def edct(op, **ed):
            dct = {}
            dct['op'] = op
            for k in ed:
                if k == 'stat':
                    st = ed[k]
                    dst = dct['stat'] = {}
                    dst['uid'] = st.st_uid
                    dst['gid'] = st.st_gid
                    dst['mode'] = st.st_mode
                else:
                    dct[k] = ed[k]
            return dct

        # regular file update: bytes & count
        def _update_reg(entry, size):
            if not entry in files_pending['files']:
                files_pending['count'] += 1
                files_pending['bytes'] += size
                files_pending['files'].append(entry)

        # updates for directories, symlinks etc..
        def _update_rest():
            files_pending['count'] += 1

        # entry count
        def entry_update(entry, size, mode):
            if stat.S_ISREG(mode):
                _update_reg(entry, size)
            else:
                _update_rest()

        # purge count
        def purge_update():
            files_pending['purge'] += 1

        for e in clist:
            e = e.strip()
            et = e[self.IDX_START:self.IDX_END]
            ec = e[self.IDX_END:].split(' ')
            if et in self.TYPE_ENTRY:
                ty = ec[self.POS_TYPE]
                en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
                gfid = ec[self.POS_GFID]
                # definitely need a better way bucketize entry ops
                if ty in ['UNLINK', 'RMDIR']:
                    purge_update()
                    entries.append(edct(ty, gfid=gfid, entry=en))
                    continue
                go = os.path.join(pfx, gfid)
                st = lstat(go)
                if isinstance(st, int):
                    if ty == 'RENAME':
                        entries.append(edct('UNLINK', gfid=gfid, entry=en))
                    else:
                        logging.debug('file %s got purged in the interim' % go)
                    continue
                entry_update(go, st.st_size, st.st_mode)
                if ty in ['CREATE', 'MKDIR', 'MKNOD']:
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                elif ty == 'LINK':
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                elif ty == 'SYMLINK':
                    rl = errno_wrap(os.readlink, [en], [ENOENT])
                    if isinstance(rl, int):
                        continue
                    entries.append(
                        edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
                elif ty == 'RENAME':
                    e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
                    entries.append(
                        edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st))
                else:
                    logging.warn('ignoring %s [op %s]' % (gfid, ty))
            elif et in self.TYPE_GFID:
                go = os.path.join(pfx, ec[0])
                st = lstat(go)
                if isinstance(st, int):
                    logging.debug('file %s got purged in the interim' % go)
                    continue
                entry_update(go, st.st_size, st.st_mode)
                datas.update([go])
        logging.debug('entries: %s' % repr(entries))
        if not retry:
            self.update_cumulative_stats(files_pending)
        # sync namespace
        if (entries):
            self.slave.server.entry_ops(entries)
        # sync data
        if self.syncdata(datas):
            if done:
                self.master.server.changelog_done(change)
            return True
Example #7
0
    def process_change(self, change, done):
        clist = []
        entries = []
        purges = set()
        links = set()
        datas = set()
        pfx = gauxpfx()
        try:
            f = open(change, "r")
            clist = f.readlines()
            f.close()
        except IOError:
            raise

        def edct(op, **ed):
            dct = {}
            dct["op"] = op
            for k in ed:
                if k == "stat":
                    st = ed[k]
                    dst = dct["stat"] = {}
                    dst["uid"] = st.st_uid
                    dst["gid"] = st.st_gid
                    dst["mode"] = st.st_mode
                else:
                    dct[k] = ed[k]
            return dct

        for e in clist:
            e = e.strip()
            et = e[self.IDX_START : self.IDX_END]
            ec = e[self.IDX_END :].split(" ")
            if et in self.TYPE_ENTRY:
                ty = ec[self.POS_TYPE]
                en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
                gfid = ec[self.POS_GFID]
                # definitely need a better way bucketize entry ops
                if ty in ["UNLINK", "RMDIR"]:
                    entries.append(edct(ty, gfid=gfid, entry=en))
                    purges.update([os.path.join(pfx, gfid)])
                    continue
                if not ty == "RENAME":
                    go = os.path.join(pfx, gfid)
                    st = self.lstat(go)
                    if isinstance(st, int):
                        logging.debug("file %s got purged in the interim" % go)
                        continue
                if ty in ["CREATE", "MKDIR", "MKNOD"]:
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                elif ty == "LINK":
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                    links.update([os.path.join(pfx, gfid)])
                elif ty == "SYMLINK":
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=os.readlink(en)))
                elif ty == "RENAME":
                    e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
                    entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2))
                else:
                    pass
            elif et in self.TYPE_GFID:
                da = os.path.join(pfx, ec[0])
                st = self.lstat(da)
                if isinstance(st, int):
                    logging.debug("file %s got purged in the interim" % da)
                    continue
                datas.update([da])
        logging.debug("entries: %s" % repr(entries))
        # sync namespace
        if entries:
            self.slave.server.entry_ops(entries)
        # sync data
        if self.syncdata(datas - (purges - links)):
            if done:
                self.master.server.changelog_done(change)
            return True
Example #8
0
    def process_change(self, change, done, retry):
        pfx = gauxpfx()
        clist   = []
        entries = []
        datas = set()

        # basic crawl stats: files and bytes
        files_pending  = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []}
        try:
            f = open(change, "r")
            clist = f.readlines()
            f.close()
        except IOError:
            raise

        def edct(op, **ed):
            dct = {}
            dct['op'] = op
            for k in ed:
                if k == 'stat':
                    st = ed[k]
                    dst = dct['stat'] = {}
                    dst['uid'] = st.st_uid
                    dst['gid'] = st.st_gid
                    dst['mode'] = st.st_mode
                else:
                    dct[k] = ed[k]
            return dct

        # regular file update: bytes & count
        def _update_reg(entry, size):
            if not entry in files_pending['files']:
                files_pending['count'] += 1
                files_pending['bytes'] += size
                files_pending['files'].append(entry)
        # updates for directories, symlinks etc..
        def _update_rest():
            files_pending['count'] += 1

        # entry count
        def entry_update(entry, size, mode):
            if stat.S_ISREG(mode):
                _update_reg(entry, size)
            else:
                _update_rest()
        # purge count
        def purge_update():
            files_pending['purge'] += 1

        for e in clist:
            e = e.strip()
            et = e[self.IDX_START:self.IDX_END]
            ec = e[self.IDX_END:].split(' ')
            if et in self.TYPE_ENTRY:
                ty = ec[self.POS_TYPE]
                en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
                gfid = ec[self.POS_GFID]
                # definitely need a better way bucketize entry ops
                if ty in ['UNLINK', 'RMDIR']:
                    purge_update()
                    entries.append(edct(ty, gfid=gfid, entry=en))
                    continue
                go = os.path.join(pfx, gfid)
                st = lstat(go)
                if isinstance(st, int):
		    if ty == 'RENAME':
                        entries.append(edct('UNLINK', gfid=gfid, entry=en))
		    else:
                        logging.debug('file %s got purged in the interim' % go)
                    continue
                entry_update(go, st.st_size, st.st_mode)
                if ty in ['CREATE', 'MKDIR', 'MKNOD']:
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                elif ty == 'LINK':
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
                elif ty == 'SYMLINK':
                    rl = errno_wrap(os.readlink, [en], [ENOENT])
                    if isinstance(rl, int):
                        continue
                    entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
                elif ty == 'RENAME':
                    e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
                    entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st))
                else:
                    logging.warn('ignoring %s [op %s]' % (gfid, ty))
            elif et in self.TYPE_GFID:
                go = os.path.join(pfx, ec[0])
                st = lstat(go)
                if isinstance(st, int):
                    logging.debug('file %s got purged in the interim' % go)
                    continue
                entry_update(go, st.st_size, st.st_mode)
                datas.update([go])
        logging.debug('entries: %s' % repr(entries))
        if not retry:
            self.update_cumulative_stats(files_pending)
        # sync namespace
        if (entries):
            self.slave.server.entry_ops(entries)
        # sync data
        if self.syncdata(datas):
            if done:
                self.master.server.changelog_done(change)
            return True