def main(_): workflow.startup() squad_augmented_data = get_examples(ARGS.squad_dir, ARGS.input_filename) if squad_augmented_data is None: continue path = get_full_filename(ARGS.squad_dir, ARGS.output_filename) # with gzip.GzipFile(fileobj=tf.gfile.Open(path, "w")) as output_file: # for idx in nq_augmented_data.keys(): # json_line = nq_augmented_data[idx] # output_file.write((json.dumps(json_line) + "\n").encode('utf-8')) workflow.shutdown()
def main(_): workflow.startup() max_tasks = {"train": 25, "dev": 5} max_shards = {"train": 6, "dev": 16} for mode in ["train"]: # Parse all shards in each mode # Currently sequentially, can be parallelized later for task_id in range(12, max_tasks[mode]): for shard_id in range(0, max_shards[mode]): nq_augmented_data = get_examples(ARGS.nq_dir, mode, task_id, shard_id) if nq_augmented_data is None: continue path = get_full_filename(ARGS.output_data_dir, mode, task_id, shard_id) with gzip.GzipFile(fileobj=tf.gfile.Open(path, "w")) as output_file: for idx in nq_augmented_data.keys(): json_line = nq_augmented_data[idx] output_file.write((json.dumps(json_line) + "\n").encode('utf-8')) workflow.shutdown()
if __name__ == '__main__': # Parse command-line arguments. flags.parse() if flags.arg.build_wiki: flags.arg.import_wikidata = True flags.arg.import_wikipedia = True flags.arg.parse_wikipedia = True flags.arg.merge_categories = True flags.arg.invert_categories = True flags.arg.compute_item_popularity = True flags.arg.fuse_items = True flags.arg.build_kb = True flags.arg.extract_names = True flags.arg.build_nametab = True flags.arg.build_phrasetab = True # Run workflows. workflow.startup() download_corpora() import_wiki() parse_wikipedia() fuse_items() build_knowledge_base() train_embeddings() extract_named_entities() workflow.shutdown() # Done. log.info("Done")
def main(): # Parse command-line arguments. Load modules for commands before parsing # flags to allow each of these to register more flags. for arg in sys.argv: if arg.startswith("-"): continue for cmd in commands: if arg == cmd.name: if cmd.package is not None: importlib.import_module(cmd.package) if cmd.load is not None: for pkg in cmd.load: importlib.import_module(pkg) break flags.parse() # Output version information. if flags.arg.version: sling.which() sys.exit(0) # List commands. if flags.arg.list: print("commands:") for cmd in commands: if not cmd.internal: print(" %-30s %s" % (cmd.name, cmd.help)) sys.exit(0) # Run command in background if requested. if flags.arg.spawn: # Build command. cmd = [] for arg in sys.argv: if arg != "--spawn": cmd.append(arg) cmd.append("--flushlog") # Output to log file. logfn = flags.arg.logdir + "/" + time.strftime( "%Y%m%d-%H%M%S") + ".log" logfile = open(logfn, "w") # Start background job. process = subprocess.Popen(cmd, stdin=None, stdout=logfile, stderr=subprocess.STDOUT, bufsize=1, shell=False, close_fds=True) print("Running process", process.pid, "in background logging to", logfn) sys.exit(0) # Start up workflow system. workflow.startup() # Run commands. for cmd in commands: if cmd.name not in flags.arg.COMMAND: continue if cmd.package: # Load module with command. module = importlib.import_module(cmd.package) # Run command. if cmd.function is not None: log.info("Execute command " + cmd.name) getattr(module, cmd.function)() # Add triggered commands. if cmd.triggers is not None: for trigger in cmd.triggers: flags.arg.COMMAND.append(trigger) # Done. workflow.shutdown() log.info("Done")