Ejemplo n.º 1
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Ejemplo n.º 2
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Ejemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     """
     Set piddir for the object from the configuration, then call the
     parent's constructor.
     """
     self.piddir = CrawlConfig.pid_dir()
     super(CrawlDaemon, self).__init__(*args, **kwargs)
Ejemplo n.º 4
0
def make_pidfile(pid, context, exitpath, just_check=False):
    """
    Generate a pid file in the pid directory (defined in CrawlDaemon), creating
    the directory if necessary.
    """
    ok = False
    piddir = CrawlConfig.pid_dir()
    if not os.path.exists(piddir):
        os.mkdir(piddir)
        ok = True

    if not ok:
        pf_l = [
            x for x in glob.glob("%s/*" % piddir) if not x.endswith('.DEFUNCT')
        ]
        for pf_n in pf_l:
            data = util.contents(pf_n)
            if 0 == len(data):
                continue
            (ctx, xp) = data.strip().split()
            if ctx == context:
                raise StandardError("The pidfile for context %s exists" %
                                    context)

    pfname = "%s/%d" % (piddir, pid)
    if just_check:
        return pfname

    with open(pfname, 'w') as f:
        f.write("%s %s\n" % (context, exitpath))

    return pfname
Ejemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     """
     Set piddir for the object from the configuration, then call the
     parent's constructor.
     """
     self.piddir = CrawlConfig.pid_dir()
     super(CrawlDaemon, self).__init__(*args, **kwargs)
Ejemplo n.º 6
0
def make_pidfile(pid, context, exitpath, just_check=False):
    """
    Generate a pid file in the pid directory (defined in CrawlDaemon), creating
    the directory if necessary.
    """
    ok = False
    piddir = CrawlConfig.pid_dir()
    if not os.path.exists(piddir):
        os.mkdir(piddir)
        ok = True

    if not ok:
        pf_l = [x for x in glob.glob("%s/*" % piddir)
                if not x.endswith('.DEFUNCT')]
        for pf_n in pf_l:
            data = util.contents(pf_n)
            if 0 == len(data):
                continue
            (ctx, xp) = data.strip().split()
            if ctx == context:
                raise StandardError("The pidfile for context %s exists" %
                                    context)

    pfname = "%s/%d" % (piddir, pid)
    if just_check:
        return pfname

    with open(pfname, 'w') as f:
        f.write("%s %s\n" % (context, exitpath))

    return pfname
Ejemplo n.º 7
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)
Ejemplo n.º 8
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)