def __init__(self, argv=sys.argv): op = get_option_parser() op.add_option("-l", "--log", dest="log", metavar="FILE", help="log file") op.add_option("-q", "--queue", dest="queue", metavar="FILE", help="queue file") op.add_option( "-Q", "--no-queue", dest="noqueue", action="store_true", help="do not use backup queue", default=False ) op.add_option("-c", "--cnf", dest="cnf", metavar="FILE", help="MySQL cnf file") op.add_option("-L", "--language", dest="lang", metavar="STR", help="language code") opts, args = op.parse_args(argv) if opts.lang is not None: self.LANG = opts.lang if opts.cnf: if os.path.exists(opts.cnf): self.cnf = os.path.realpath(opts.cnf) db = Database(self.cnf) else: raise ValueError("cnf file not found: " + opts.cnf) else: self.cnf = os.path.expanduser("~/.my.cnf") if os.path.exists(self.cnf): db = Database(self.cnf) else: raise ValueError("no cnf file found; use -c option") site = None group = None m = re.match(r"^[gtdhHc](?:([0-9]+)(?:-([0-9]+)(?:-([0-9]+))?)?)$", sys.argv[0].split("/")[-1]) if m: def f(x): if x: return int(x) else: return None sid, gid, tid = map(f, m.groups()) else: sid = gid = tid = None if opts.site: site = db.getSite(opts.site) if site is None and opts.sid: site = db.getSite(sid=opts.sid) if site is None and sid: site = db.getSite(sid=sid) if site is None: raise ValueError("invalid site or site is not specified") if opts.group: group = site.getGroup(opts.group) if group is None: raise ValueError("invalid group: " + opts.group) if group is None and opts.gid: group = site.getGroup(gid=opts.gid) if group is None: raise ValueError("invalid group id: %d" % opts.gid) if group is None and gid: group = site.getGroup(gid=gid) if group is None: raise ValueError("invalid group id: %s" % gid) if group and opts.lang is None and group.lang: self.LANG = group.lang if not opts.noqueue: if opts.queue: WorkerBaseQ.__init__(self, opts.queue) self.QUEUE_FILE = opts.queue elif m: fn = m.group(0) if not sid: fn += "%d" % site.id if not gid and group: fn += "-%d" % group.id fn += ".q" WorkerBaseQ.__init__(self, fn) self.QUEUE_FILE = fn else: fn = os.path.basename(sys.argv[0]) + "-%d" % site.id if group: fn += "-%d" % group.id fn += ".q" WorkerBaseQ.__init__(self, fn) self.QUEUE_FILE = fn else: WorkerBaseQ.__init__(self) self.QUEUE_FILE = None if opts.log: self.LOG_FILE = opts.log else: self.LOG_FILE = os.path.basename(sys.argv[0]) + ".log" # try to get group info from the top url store in the queue if not group: topurl = self.top() if hasattr(topurl, "gid"): group = site.getGroup(gid=topurl.gid) self.db = db self.site = site self.group = group self.opts = opts self.args = args
import os import re import mycurl from mycurl.db import Database import MySQLdb import time, datetime python = sys.executable db = Database('~/.my.cnf') HTMLDIR = mycurl.MYCURL_POOL_HTML SGMDIR = mycurl.MYCURL_POOL_SGM VALIDATOR = '/ldc/projects/GALE/tools/bin/validateWebSgm' op = mycurl.get_option_parser() op.add_option("-L", "--language", dest="language", metavar="STR", help="language", nargs=1) op.add_option("-d", "--first-date", dest="firstdate", metavar="STR", help="first post date (YYYY-MM-DD)", nargs=1) op.add_option("-u", "--last-date", dest="lastdate", metavar="STR", help="last post date (YYYY-MM-DD)", nargs=1) op.add_option("-b", "--base-dir", dest="base", metavar="DIR", help="webcol base directory", nargs=1) op.add_option("-t", "--test", dest="test", action="store_true", default=False, help="just test") op.remove_option("-g") op.remove_option("-s") opts, args = op.parse_args(sys.argv) if opts.base: