def init(self, parses_filename, output_dir): self.output_dir = output_dir reader = sling.RecordReader(parses_filename) self.category_name_to_qid = {} # category name -> qid self.category_frame = {} # category qid -> frame self.category_parses = {} # category qid -> parses self.signature_to_parse = defaultdict(list) # signature -> parse self.store = sling.Store() self.num_parses = 0 for index, (qid, value) in enumerate(reader): if (index + 1) % 20000 == 0: log.info("%d categories read" % index) qid = qid.decode('utf-8') frame = self.store.parse(value) self.category_name_to_qid[frame.name] = qid self.category_frame[qid] = frame self.category_parses[qid] = [] for parse in frame("parse"): element = Parse(self.num_parses, qid, frame, parse) signature = util.full_parse_signature(parse) self.signature_to_parse[signature].append(element) self.category_parses[qid].append(element) self.num_parses += 1 self.store.lockgc() self.store.freeze() self.store.unlockgc()
def build_knowledge_base(): # Merge categories from wikipedias. if flags.arg.merge_categories: log.info("Merge wikipedia categories") wf = wiki.WikiWorkflow("category-merging") wf.merge_wikipedia_categories() workflow.run(wf.wf) # Invert categories. if flags.arg.invert_categories: log.info("Invert categories") wf = wiki.WikiWorkflow("category-inversion") wf.invert_wikipedia_categories() workflow.run(wf.wf) # Extract link graph. if flags.arg.extract_wikilinks: log.info("Extract link graph") wf = wiki.WikiWorkflow("link-graph") wf.extract_links() workflow.run(wf.wf) # Fuse items. if flags.arg.fuse_items: log.info("Fuse items") wf = wiki.WikiWorkflow("fuse-items") wf.fuse_items() workflow.run(wf.wf) # Build knowledge base repository. if flags.arg.build_kb: log.info("Build knowledge base repository") wf = wiki.WikiWorkflow("knowledge-base") wf.build_knowledge_base() workflow.run(wf.wf)
def load_kb(task): kb = sling.Store() kb.load(task.input("kb").name) log.info("Knowledge base read") kb.freeze() log.info("Knowledge base frozen") return kb
def parse_wikipedia(): # Convert wikipedia pages to SLING documents. if flags.arg.parse_wikipedia: for language in flags.arg.languages: log.info("Parse " + language + " wikipedia") wf = wiki.WikiWorkflow(language + "-wikipedia-parsing") wf.parse_wikipedia(language=language) workflow.run(wf.wf)
def run(self): log.info("job queue", self.name, "ready to execute jobs") while True: job = self.pending.get() try: self.execute(job) except Exception as e: log.info("Error executing job", job.id, ":", e) traceback.print_exc() finally: self.pending.task_done()
def save_workflow_log(path): global active if not active: return False if path is None or len(path) == 0: return False if not os.path.exists(path): return False logfn = path + "/" + time.strftime("%Y%m%d-%H%M%S") + ".json" logfile = open(logfn, "w") logfile.write(statistics()) logfile.close() log.info("workflow stats saved in " + logfn) return True
def fuse_items(): # Merge categories from wikipedias. if flags.arg.merge_categories: log.info("Merge wikipedia categories") wf = wiki.WikiWorkflow("category-merging") wf.merge_wikipedia_categories() workflow.run(wf.wf) # Invert categories. if flags.arg.invert_categories: log.info("Invert categories") wf = wiki.WikiWorkflow("category-inversion") wf.invert_wikipedia_categories() workflow.run(wf.wf) # Compute item popularity. if flags.arg.compute_item_popularity: log.info("Compute item popularity") wf = wiki.WikiWorkflow("item-popularity") wf.compute_item_popularity() workflow.run(wf.wf) # Fuse items. if flags.arg.fuse_items: log.info("Fuse items") wf = wiki.WikiWorkflow("fuse-items") wf.fuse_items() workflow.run(wf.wf)
def build_knowledge_base(): # Build knowledge base repository. if flags.arg.build_kb: log.info("Build knowledge base repository") wf = wiki.WikiWorkflow("knowledge-base") wf.build_knowledge_base() workflow.run(wf.wf) # Extract item names from wikidata and wikipedia. if flags.arg.extract_names: for language in flags.arg.languages: log.info("Extract " + language + " names") wf = wiki.WikiWorkflow(language + "-name-extraction") wf.extract_names(language=language) workflow.run(wf.wf) # Build name table. if flags.arg.build_nametab: for language in flags.arg.languages: log.info("Build " + language + " name table") wf = wiki.WikiWorkflow(language + "-name-table") wf.build_name_table(language=language) workflow.run(wf.wf) # Build phrase table. if flags.arg.build_phrasetab: for language in flags.arg.languages: log.info("Build " + language + " phrase table") wf = wiki.WikiWorkflow(language + "-phrase-table") wf.build_phrase_table(language=language) workflow.run(wf.wf)
def extract_named_entities(): # Extract Wikipedia link graph. if flags.arg.extract_wikilinks: log.info("Extract Wikipedia link graph") wf = entity.EntityWorkflow("wiki-links") wf.extract_wikilinks() workflow.run(wf.wf) # Extract IDF table. if flags.arg.build_idf: wf = entity.EntityWorkflow("idf-table") for language in flags.arg.languages: log.info("Build " + language + " IDF table") wf.build_idf(language=language) workflow.run(wf.wf) # Fuse NER items. if flags.arg.fuse_ner_items: log.info("Fuse NER items") wf = entity.EntityWorkflow("fuse-ner-items") wf.fuse_items() workflow.run(wf.wf) # Build NER knowledge base. if flags.arg.build_ner_kb: log.info("Build NER knowledge base") wf = entity.EntityWorkflow("ner-knowledge-base") wf.build_knowledge_base() workflow.run(wf.wf)
def run_workflow(wf): # In dryrun mode the workflow is just dumped without running it. if flags.arg.dryrun: print wf.wf.dump() return # Start workflow. log.info("start workflow") wf.wf.start() # Wait until workflow completes. Poll every second to make the workflow # interruptible. done = False while not done: done = wf.wf.wait(1000)
def import_wiki(): if flags.arg.import_wikidata or flags.arg.import_wikipedia: wf = wiki.WikiWorkflow("wiki-import") # Import wikidata. if flags.arg.import_wikidata: log.info("Import wikidata") wf.wikidata() # Import wikipedia(s). if flags.arg.import_wikipedia: for language in flags.arg.languages: log.info("Import " + language + " wikipedia") wf.wikipedia(language=language) workflow.run(wf.wf)
def silver_annotation(): # Extract IDF table. if flags.arg.build_idf: wf = silver.SilverWorkflow("idf-table") for language in flags.arg.languages: log.info("Build " + language + " IDF table") wf.build_idf(language=language) workflow.run(wf.wf) # Run silver-labeling of Wikipedia documents. if flags.arg.silver_annotation: for language in flags.arg.languages: log.info("Silver-label " + language + " wikipedia") wf = silver.SilverWorkflow(language + "-silver") wf.silver_annotation(language=language) workflow.run(wf.wf)
def refresh_task_list(): global last_task_timestamp, tasks ts = os.stat(flags.arg.tasklist).st_mtime if ts == last_task_timestamp: return try: tasklist = {} store = sling.Store() for t in store.load(flags.arg.tasklist): tasklist[t.name] = Task(t) tasks = tasklist except: log.info("Error loading task list") traceback.print_exc(file=sys.stdout) return last_task_timestamp = ts log.info("Loaded", len(tasks), "tasks")
def read(self, parses_filename): reader = sling.RecordReader(parses_filename) self.category_name_to_qid = {} # category name -> qid self.category_frame = {} # category qid -> frame self.full_signature_to_parse = defaultdict(list) # signature -> parse self.coarse_signature_to_parse = defaultdict(list) # signature -> parse store = sling.Store() for index, (qid, value) in enumerate(reader): if index > 0 and index % 20000 == 0: log.info("%d categories read" % index) frame = store.parse(value) self.category_name_to_qid[frame.name] = qid self.category_frame[qid] = frame for parse in frame("parse"): element = (qid, frame, parse) full_signature = util.full_parse_signature(parse) self.full_signature_to_parse[full_signature].append(element) coarse_signature = util.coarse_parse_signature(parse) self.coarse_signature_to_parse[coarse_signature].append(element)
def __init__(self, kb, extractor): self.kb = kb self.extractor = extractor self.unique_properties = set() self.date_properties = set() # Collect unique-valued and date-valued properties. # The former will be used to compute CONFLICT counts, and the latter need to # be processed in a special manner while matching existing facts. constraint_role = kb["P2302"] unique = kb["Q19474404"] # single-value constraint w_time = kb["/w/time"] for prop in kb["/w/entity"]("role"): if prop.target == w_time: self.date_properties.add(prop) for constraint_type in prop(constraint_role): if constraint_type == unique or constraint_type["is"] == unique: self.unique_properties.add(prop) log.info("%d unique-valued properties" % len(self.unique_properties)) log.info("%d date-valued properties" % len(self.date_properties))
def train_embeddings(): # Extract vocabulary for word embeddings. if flags.arg.extract_vocabulary: for language in flags.arg.languages: log.info("Extract " + language + " vocabulary") wf = embedding.EmbeddingWorkflow(language + "-vocabulary") wf.extract_vocabulary(language=language) workflow.run(wf.wf) # Train word embeddings. if flags.arg.train_word_embeddings: for language in flags.arg.languages: log.info("Train " + language + " word embeddings") wf = embedding.EmbeddingWorkflow(language + "-word-embeddings") wf.train_word_embeddings(language=language) workflow.run(wf.wf) # Extract vocabulary for fact and category embeddings. if flags.arg.extract_fact_lexicon: log.info("Extract fact and category lexicons") wf = embedding.EmbeddingWorkflow("fact-lexicon") wf.extract_fact_lexicon() workflow.run(wf.wf) # Extract facts from knowledge base. if flags.arg.extract_facts: log.info("Extract facts from knowledge base") wf = embedding.EmbeddingWorkflow("fact-extraction") wf.extract_facts() workflow.run(wf.wf) # Train fact and category embeddings. if flags.arg.train_fact_embeddings: log.info("Train fact and category embeddings") wf = embedding.EmbeddingWorkflow("fact-embeddings") wf.train_fact_embeddings() workflow.run(wf.wf)
def run(self, task): filename = task.input("input").name store = sling.Store() log.info("Load store from", filename) store.load(filename) log.info("Coalesce store") store.coalesce() log.info("Snapshot store") store.snapshot(filename)
def run(self, task): # Get task parameters. name = task.param("shortname") baseurl = task.param("url") ratelimit = task.param("ratelimit", 0) chunksize = task.param("chunksize", 64 * 1024) priority = task.param("priority", 0) outputs = task.outputs("output") log.info("Download " + name + " from " + baseurl) for output in outputs: # Make sure directory exists. directory = os.path.dirname(output.name) if not os.path.exists(directory): os.makedirs(directory) # Do not overwrite existing file unless flag is set. if not flags.arg.overwrite and os.path.exists(output.name): raise Exception("file already exists: " + output.name + \ " (use --overwrite to overwrite existing files)") # Hold-off on low-prio tasks if priority > 0: time.sleep(priority) # Wait until we are below the rate limit. global download_concurrency if ratelimit > 0: while download_concurrency >= ratelimit: time.sleep(10) download_concurrency += 1 # Compute url. if len(outputs) > 1: url = baseurl + "/" + os.path.basename(output.name) else: url = baseurl # Download from url to file. if ratelimit > 0: log.info("Start download of " + output.name) conn = urlopen(url) last_modified = time.mktime( time.strptime(conn.headers['last-modified'], "%a, %d %b %Y %H:%M:%S GMT")) total_bytes = "bytes_downloaded" bytes = name + "_bytes_downloaded" with open(output.name, 'wb') as f: while True: chunk = conn.read(chunksize) if not chunk: break f.write(chunk) task.increment(total_bytes, len(chunk)) task.increment(bytes, len(chunk)) os.utime(output.name, (last_modified, last_modified)) if ratelimit > 0: download_concurrency -= 1 log.info(name + " downloaded")
def load_kb(task): if type(task) is str: filename = task # assume filename else: filename = task.input("kb").name if filename in _kb_cache: log.info("Retrieving cached KB") return _kb_cache[filename] else: kb = sling.Store() kb.load(filename) log.info("Knowledge base read") kb.lockgc() kb.freeze() kb.unlockgc() log.info("Knowledge base frozen") _kb_cache[filename] = kb return kb
def __init__(self, kb, extractor): self.kb = kb self.extractor = extractor self.unique_properties = set() self.date_properties = set() self.location_properties = set() # Collect unique-valued, date-valued, and location-valued properties. # The former will be used to compute CONFLICT counts, and the latter need to # be processed in a special manner while matching existing facts. constraint_role = kb["P2302"] unique = kb["Q19474404"] # single-value constraint w_time = kb["/w/time"] w_item = kb["/w/item"] p_subproperty_of = kb["P1647"] p_location = kb["P276"] for prop in kb["/w/entity"]("role"): if prop.target == w_time: self.date_properties.add(prop) if prop.target == w_item: for role, value in prop: if role == p_subproperty_of: if kb.resolve(value) == p_location: self.location_properties.add(prop) for constraint_type in prop(constraint_role): if constraint_type == unique or constraint_type["is"] == unique: self.unique_properties.add(prop) log.info("%d unique-valued properties" % len(self.unique_properties)) log.info("%d date-valued properties" % len(self.date_properties)) log.info("%d location-valued properties" % len(self.location_properties)) # Set closure properties. self.closure_properties = {} self.p_subclass = kb["P279"] self.p_parent_org = kb["P749"] p_located_in = kb["P131"] for p in self.location_properties: self.closure_properties[p] = p_located_in # 'Educated at' -> 'Part of'. self.closure_properties[kb["P69"]] = kb["P361"]
def fuse_items(): # Merge categories from wikipedias. if flags.arg.merge_categories: log.info("Merge wikipedia categories") wf = wiki.WikiWorkflow("category-merging") wf.merge_wikipedia_categories() run_workflow(wf) # Invert categories. if flags.arg.invert_categories: log.info("Invert categories") wf = wiki.WikiWorkflow("category-inversion") wf.invert_wikipedia_categories() run_workflow(wf) # Fuse items. if flags.arg.fuse_items: log.info("Fuse items") wf = wiki.WikiWorkflow("fuse-items") wf.fuse_items() run_workflow(wf)
def run(self, task): # Get task parameters. name = task.param("shortname") url = task.param("url") ratelimit = task.param("ratelimit", 0) chunksize = task.param("chunksize", 64 * 1024) output = task.output("output") log.info("Download " + name + " from " + url) # Make sure directory exists. directory = os.path.dirname(output.name) if not os.path.exists(directory): os.makedirs(directory) # Do not overwrite existing file. if os.path.exists(output.name): raise Exception("file already exists: " + output.name) # Wait until we are below the rate limit. global download_concurrency if ratelimit > 0: while download_concurrency >= ratelimit: time.sleep(10) download_concurrency += 1 # Download from url to file. if ratelimit > 0: log.info("Start download of " + url) conn = urllib2.urlopen(url) total_bytes = "bytes_downloaded" bytes = name + "_bytes_downloaded" with open(output.name, 'wb') as f: while True: chunk = conn.read(chunksize) if not chunk: break f.write(chunk) task.increment(total_bytes, len(chunk)) task.increment(bytes, len(chunk)) if ratelimit > 0: download_concurrency -= 1 log.info(name + " downloaded")
def build_alias_tables(): # Extract item names from wikidata and wikipedia. if flags.arg.extract_names: for language in flags.arg.languages: log.info("Extract " + language + " names") wf = wiki.WikiWorkflow(language + "-name-extraction") wf.extract_names(language=language) workflow.run(wf.wf) # Build name table. if flags.arg.build_nametab: for language in flags.arg.languages: log.info("Build " + language + " name table") wf = wiki.WikiWorkflow(language + "-name-table") wf.build_name_table(language=language) workflow.run(wf.wf) # Build phrase table. if flags.arg.build_phrasetab: for language in flags.arg.languages: log.info("Build " + language + " phrase table") wf = wiki.WikiWorkflow(language + "-phrase-table") wf.build_phrase_table(language=language) workflow.run(wf.wf)
def extract_wikimedia(): for language in flags.arg.languages: log.info("Extract " + language + " Wikipedia images") wf = WikiMediaWorkflow(language + "-wikimedia") wf.extract_media(language=language) run(wf.wf)
def twitter_profiles(): log.info("Extract twitter profiles") wf = TwitterWorkflow("twitter-profiles") wf.extract_twitter() run(wf.wf)
def run(self, task): # Get parameters. twitterdb = task.input("twitterdb").name # Load knowledge base. log.info("Load knowledge base") kb = sling.Store() kb.load(task.input("kb").name) p_id = kb["id"] p_is = kb["is"] p_twitter = kb["P2002"] p_image = kb["P18"] p_media = kb["media"] p_stated_in = kb["P248"] n_twitter = kb["Q918"] kb.freeze() # Open output file. fout = open(task.output("output").name, "w") # Find all items with twitter usernames. dbsession = requests.session() for item in kb: # Find twitter username for item. task.increment("items") imageurls = [] for twitter in item(p_twitter): username = kb.resolve(twitter) task.increment("twitter_users") # Fetch twitter profile from database. dburl = twitterdb + "/" + urllib.parse.quote(username) r = dbsession.get(dburl) if r.status_code == 404: task.increment("unknown_users") continue r.raise_for_status() profile = r.json() # Ignore if twitter profile does not exist. if "error" in profile: task.increment("deleted_users") continue # Ignore if there is no profile image. if profile["default_profile_image"]: task.increment("missing_profile_images") continue # Get profile image url. imageurl = profile["profile_image_url"] # Get url for original image url by removing "_normal". imageurl = ''.join(imageurl.rsplit("_normal", 1)) # Ignore known bad images. if imageurl in bad_images: task.increment("bad_profile_images") continue # Add twiter profile image to item. imageurls.append(imageurl) if len(imageurls) > 0: # Create item frame with twitter profile. store = sling.Store(kb) slots = [(p_id, item.id)] for imageurl in imageurls: image = store.frame([(p_is, imageurl), (p_stated_in, n_twitter)]) slots.append((p_media, image)) frame = store.frame(slots) fout.write(frame.data(utf8=True)) fout.write("\n") task.increment("profile_images") if p_image not in item: task.increment("imaged_items") fout.close()
self._text(" (%0.4f)" % other_parse.score) self._br() self._end("td") self._end("tr") self._end("table") if __name__ == "__main__": flags.define("--port", help="port number for the HTTP server", default=8001, type=int, metavar="PORT") flags.define("--parses", help="Recordio of category parses", default="local/data/e/wikicat/parses-with-match-statistics.rec", type=str, metavar="FILE") flags.define("--output", help="Output dir where Wikibot recordios will be generated.", default="local/data/e/wikicat/", type=str, metavar="DIR") flags.parse() log.info('Reading parses from %s' % flags.arg.parses) browser_globals.init(flags.arg.parses, flags.arg.output) server_address = ('', flags.arg.port) httpd = HTTPServer(server_address, Browser) log.info('Starting HTTP Server on port %d' % flags.arg.port) httpd.serve_forever()
def run(self, task): # Get parameters. language = task.param("language") # Load knowledge base. log.info("Load knowledge base") kb = sling.Store() kb.load(task.input("kb").name) n_infobox = kb["/wp/infobox"] n_page_item = kb["/wp/page/item"] n_file = kb["/wp/info/file"] n_media = kb["/wp/media"] image_fields = [ (kb["/wp/info/image"], kb["/wp/info/caption"]), (kb["/wp/info/cover"], kb["/wp/info/caption"]), (kb["/wp/info/logo"], kb["/wp/info/logo_caption"]), (kb["/wp/info/photo"], kb["/wp/info/photo_caption"]), (kb["/wp/info/flag_image"], kb["/wp/info/flag_caption"]), ] p_media = kb["media"] p_id = kb["id"] p_is = kb["is"] p_imported_from = kb["P143"] p_media_legend = kb["P2096"] image_properties = [ kb["P18"], # image kb["P154"], # logo image kb["P41"], # flag image ] lang = kb["/lang/" + language] wikipedia_item = lang["/lang/wikilang/wikipedia"] docschema = sling.DocumentSchema(kb) kb.freeze() # Fetch media titles for Wikipedia from yesterday. log.info("Fetch local media titles") yesterday = (date.today() - timedelta(days=1)).strftime("%Y%m%d") mediaurl = "https://dumps.wikimedia.org/other/mediatitles/%s/" \ "%swiki-%s-all-media-titles.gz" % (yesterday, language, yesterday) r = urllib.request.urlopen(mediaurl) mediatitles = set(gzip.decompress(r.read()).decode().split('\n')) task.increment("local_media_files", len(mediatitles)) # Open output file. fout = open(task.output("output").name, "w") # Process input articles. for res in task.inputs("input"): log.info("Extract media files from", res.name) for _, data in sling.RecordReader(res.name): # Read article into store. store = sling.Store(kb) doc = store.parse(data) task.increment("documents") # Find first infobox. infobox = None for theme in doc(docschema.document_theme): if theme.isa(n_infobox): infobox = theme break if infobox is None: continue task.increment("infoboxes") # Find images in infobox. imagelist = [] for n_image, n_caption in image_fields: image = infobox[n_image] caption = infobox[n_caption] if image is None: continue # Get image for repeated image field. if type(image) is sling.Frame: group = image image = group[n_file] caption = group[n_caption] if image is None: continue if "{" in image or "[" in image: # Structured annotations. annotations = sling.lex(image, store=store, schema=docschema) for theme in annotations.themes: if theme.isa(n_media): image = theme[p_is] if image is not None: imagelist.append((image, None)) task.increment("structured_annotations") else: # Image filename. imagelist.append((image, caption)) if len(imagelist) == 0: continue # Process list of images for item. known_images = 0 image_frames = [] item = doc[n_page_item] if item is None: continue for image, caption in imagelist: # Disregard direct URLs for now. if image.startswith("http://") or \ image.startswith("https://") or \ image.startswith("//"): task.increment("url_images") continue # Trim image name. Remove File: prefix. colon = image.find(':') if colon > 0 and colon < 10: image = image[colon + 1:] image = titlecase(image.strip()).replace('_', ' ') if len(image) == 0 or image in default_images: task.increment("empty_images") continue if image.endswith("‎"): image = image[:-5] frag = image.find('#') if frag > 0: image = image[:frag] image = html.unescape(image) image = urllib.parse.unquote(image) # Discard media files with unknown or ignored extensions. dot = image.rfind('.') ext = image[dot:].lower() if dot > 0 else None if ext in ignored_extensions: task.increment("ignored_image_format") continue if ext not in known_extensions: log.info("unknown format:", item.id, image) task.increment("unknown_image_format") continue # Get item from KB and check if image is already known. task.increment("images") known = False for prop in image_properties: for img in item(prop): img = kb.resolve(img) if img == image: known = True known_images += 1 if known: task.increment("known_images") continue task.increment("new_images") # Check if image is in local Wikipedia or Wikimedia Commons. fn = image.replace(' ', '_') if fn in mediatitles: urlbase = "https://upload.wikimedia.org/wikipedia/" + language task.increment("local_images") else: urlbase = "https://upload.wikimedia.org/wikipedia/commons" task.increment("commons_images") if known_images == 0: task.increment("commons_imaged_items") # Compute URL for image. md5 = md5hash(fn) fn = fn.replace("?", "%3F") fn = fn.replace("+", "%2B") fn = fn.replace("&", "%26") url = "%s/%s/%s/%s" % (urlbase, md5[0], md5[0:2], fn) # Create frame for item with media image. slots = [ (p_is, url), (p_imported_from, wikipedia_item), ] if caption != None: capdoc = sling.lex(caption, store=store, schema=docschema) captxt = capdoc.phrase(0, len(capdoc.tokens)) slots.append((p_media_legend, captxt)) image_frames.append(store.frame(slots)) # Create item frame with extra image info. if len(image_frames) == 0: continue slots = [(p_id, item.id)] for image_frame in image_frames: slots.append((p_media, image_frame)) frame = store.frame(slots) fout.write(frame.data(utf8=True)) fout.write("\n") if known_images == 0: task.increment("imaged_items") fout.close()
if __name__ == '__main__': # Parse command-line arguments. flags.parse() if flags.arg.build_wiki: flags.arg.import_wikidata = True flags.arg.import_wikipedia = True flags.arg.parse_wikipedia = True flags.arg.merge_categories = True flags.arg.invert_categories = True flags.arg.compute_item_popularity = True flags.arg.fuse_items = True flags.arg.build_kb = True flags.arg.extract_names = True flags.arg.build_nametab = True flags.arg.build_phrasetab = True # Run workflows. workflow.startup() download_corpora() import_wiki() parse_wikipedia() fuse_items() build_knowledge_base() train_embeddings() extract_named_entities() workflow.shutdown() # Done. log.info("Done")
def main(): # Parse command-line arguments. Load modules for commands before parsing # flags to allow each of these to register more flags. for arg in sys.argv: if arg.startswith("-"): continue for cmd in commands: if arg == cmd.name: if cmd.package is not None: importlib.import_module(cmd.package) if cmd.load is not None: for pkg in cmd.load: importlib.import_module(pkg) break flags.parse() # Output version information. if flags.arg.version: sling.which() sys.exit(0) # List commands. if flags.arg.list: print("commands:") for cmd in commands: if not cmd.internal: print(" %-30s %s" % (cmd.name, cmd.help)) sys.exit(0) # Run command in background if requested. if flags.arg.spawn: # Build command. cmd = [] for arg in sys.argv: if arg != "--spawn": cmd.append(arg) cmd.append("--flushlog") # Output to log file. logfn = flags.arg.logdir + "/" + time.strftime( "%Y%m%d-%H%M%S") + ".log" logfile = open(logfn, "w") # Start background job. process = subprocess.Popen(cmd, stdin=None, stdout=logfile, stderr=subprocess.STDOUT, bufsize=1, shell=False, close_fds=True) print("Running process", process.pid, "in background logging to", logfn) sys.exit(0) # Start up workflow system. workflow.startup() # Run commands. for cmd in commands: if cmd.name not in flags.arg.COMMAND: continue if cmd.package: # Load module with command. module = importlib.import_module(cmd.package) # Run command. if cmd.function is not None: log.info("Execute command " + cmd.name) getattr(module, cmd.function)() # Add triggered commands. if cmd.triggers is not None: for trigger in cmd.triggers: flags.arg.COMMAND.append(trigger) # Done. workflow.shutdown() log.info("Done")