def running_pid(proc_required=True, context=None): """ Return a list of pids if the crawler is running (per ps(1)) or [] otherwise """ cfg = CrawlConfig.add_config() rval = [] if proc_required: result = pidcmd() for line in result.split("\n"): if 'crawl start' in line: pid = int(line.split()[0]) pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid) if os.path.exists(pfpath): (ctx, xpath) = util.contents(pfpath).strip().split() rval.append((pid, ctx, xpath)) elif not os.path.exists(pfpath + '.DEFUNCT'): # crawler is running but the pid file has been lost ctx = context or cfg.get('crawler', 'context') xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx) make_pidfile(pid, ctx, xpath) rval.append((pid, ctx, xpath)) # if pfpath + '.DEFUNCT' exists, the crawler is shutting down # so we don't want to recreate the pid file. else: pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir()) for pid_n in pid_l: pid = int(os.path.basename(pid_n)) (ctx, xpath) = util.contents(pid_n).strip().split() rval.append((pid, ctx, xpath)) return rval
def __init__(self, *args, **kwargs): """ Set piddir for the object from the configuration, then call the parent's constructor. """ self.piddir = CrawlConfig.pid_dir() super(CrawlDaemon, self).__init__(*args, **kwargs)
def make_pidfile(pid, context, exitpath, just_check=False): """ Generate a pid file in the pid directory (defined in CrawlDaemon), creating the directory if necessary. """ ok = False piddir = CrawlConfig.pid_dir() if not os.path.exists(piddir): os.mkdir(piddir) ok = True if not ok: pf_l = [ x for x in glob.glob("%s/*" % piddir) if not x.endswith('.DEFUNCT') ] for pf_n in pf_l: data = util.contents(pf_n) if 0 == len(data): continue (ctx, xp) = data.strip().split() if ctx == context: raise StandardError("The pidfile for context %s exists" % context) pfname = "%s/%d" % (piddir, pid) if just_check: return pfname with open(pfname, 'w') as f: f.write("%s %s\n" % (context, exitpath)) return pfname
def make_pidfile(pid, context, exitpath, just_check=False): """ Generate a pid file in the pid directory (defined in CrawlDaemon), creating the directory if necessary. """ ok = False piddir = CrawlConfig.pid_dir() if not os.path.exists(piddir): os.mkdir(piddir) ok = True if not ok: pf_l = [x for x in glob.glob("%s/*" % piddir) if not x.endswith('.DEFUNCT')] for pf_n in pf_l: data = util.contents(pf_n) if 0 == len(data): continue (ctx, xp) = data.strip().split() if ctx == context: raise StandardError("The pidfile for context %s exists" % context) pfname = "%s/%d" % (piddir, pid) if just_check: return pfname with open(pfname, 'w') as f: f.write("%s %s\n" % (context, exitpath)) return pfname
def clean_defunct_pidfiles(context): """ Remove .DEFUNCT pid files for *context* """ cfg = CrawlConfig.add_config() pdir = CrawlConfig.pid_dir() for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')): c = util.contents(path) if context in c: os.unlink(path)