def main(params: Parameters): # create_cas_from_apf(TEST_APF_PATH, TEST_SGM_PATH, OUTPUT_DIR_PATH) corpus_paths = params.arbitrary_list("corpus_paths") output_xmi_dir_path = params.creatable_directory("cached_xmi_path") type_system_path = params.existing_file("type_system_path") cas_xmi_template_path = params.existing_file("cas_xmi_template_path") # Load Typesystem with type_system_path.open('rb') as file: typesystem = load_typesystem(file) # Load xmi_template with cas_xmi_template_path.open('rb') as cas_xmi_file: cas_template = load_cas_from_xmi(cas_xmi_file, typesystem=typesystem) for ace_corpus_path in corpus_paths: print('Processing apf files from: ' + ace_corpus_path) start_time = time.perf_counter() for filename in os.listdir(ace_corpus_path): if filename.endswith(".apf.xml"): print("Processing " + filename) create_cas_from_apf(apf_filename=filename, apf_path=ace_corpus_path + filename, source_sgm_path=ace_corpus_path + filename.replace( ".apf.xml", ".sgm"), output_dir_path=output_xmi_dir_path, typesystem=typesystem, cas_template=cas_template) elapsed_time = time.perf_counter() - start_time print(f"Processing Completed. Time elapsed: {elapsed_time:0.4f} seconds")
def from_parameters(params: Parameters) -> KeyValueSource[str, str]: """ Construct a zip file key-value source from parameters. The "path" parameter should be the zip file to be opened. Currently we assume the zipfile contains a file with the IDs, which it will if it were created by the default CharSink.zip_character_sink(). Support for custom key functions will be added in the future. """ return KeyValueSource.zip_character_source(params.existing_file("path"))
def main(params: Parameters): input_file_path = params.existing_file("input_file") output_file_path = params.creatable_file("output_file") logging.info("Reading from input file: %s", str(input_file_path.absolute())) with input_file_path.open() as input_file: nums = [int(x.strip()) for x in input_file if x.strip() != ""] nums.sort() output_file_path.write_text("\n".join(immutableset([str(x) for x in nums])))
def main(params: Parameters): graph_def_file = params.existing_file("graph_def_file") checkpoint_glob = params.string("checkpoint_glob") vocab_file = params.existing_file("vocab_file") sentences_file = params.existing_file("sentences_file") output_file = params.creatable_file("output_file") do_profiling = params.optional_boolean_with_default("profile", False) with tensorflow.contrib.tfprof.ProfileContext( os.getcwd(), trace_steps=range(2, 10), dump_steps=range(1, 10, 2), enabled=params.optional_boolean_with_default("profile", False) ): lm = LM1B.load(graph_def_file=graph_def_file, checkpoint_file=checkpoint_glob, vocab=vocab_file) start_time = None num_tokens_processed = 0 with open(sentences_file, 'r', newline='') as inp: csv_input = csv.reader(inp, delimiter='\t') with open(output_file, 'w', newline='') as out: csv_output = csv.writer(out, delimiter='\t') for line in csv_input: tokens = line[0].split(' ') output_row = list(line) output_row.insert(0, lm.log_probability_of_sentence(tokens)) csv_output.writerow(output_row) # we delay till after the first sentence to avoid counting startup time if num_tokens_processed == 0: start_time = time.time() num_tokens_processed += len(tokens) elapsed_time = time.time() - start_time print(f"Processed {num_tokens_processed - 1} sentences in {elapsed_time} " f"seconds, {num_tokens_processed / elapsed_time} tokens per second. First sentence not " f"included in time calculation.")
def main(params: Parameters): input_file_path = params.existing_file("input_file") output_file_path = params.creatable_file("output_file") x = params.integer("x") logging.info("Reading from input file: %s", str(input_file_path.absolute())) with input_file_path.open() as input_file: with output_file_path.open("w") as output_file: for num in input_file: output_file.write(f"{int(num)*x}\n") logging.info("Writing to output file: %s", str(input_file_path.absolute())) # Pause so that we can examine the job on the SAGA cluster time.sleep(30)
def main(params: Parameters): # List of the six ACE corpus /adj/ folders (one for each type: bc, bn, cts, nw, un, wl) corpus_paths = params.arbitrary_list("corpus_paths") # Path to the project config file template (json file) json_template_path = params.existing_file("json_template_path") # Path to the cached_annotation_ser directory annotation_ser_path = params.existing_directory("annotation_ser_path") # Path to the cached_xmi directory cached_xmi_path = params.existing_directory("cached_xmi_path") # Path to target corpus (narrowed ACE-Corpus) cached_ace_data_path = params.creatable_directory("cached_ace_data_path") # List of users (strings) user_list = params.arbitrary_list("user_list") # List of event type (Format: "EVENT_TYPE.SUBTYPE" strings) event_list = params.arbitrary_list("event_list") # Output Directory Path where configured projects are moved to (use an empty directory) output_dir_path = params.creatable_directory("output_dir_path") flatten_ace_data(corpus_paths, cached_ace_data_path) complete_map = get_complete_project_to_doc_mapping(cached_ace_data_path) for user in user_list: for event_type in event_list: # For All events to be printed if event_type == "All": for event in complete_map: configure_and_generate_project( json_template_path=json_template_path, event_name=event, user_name=user, event_doc_map=complete_map, cached_ser_path=annotation_ser_path, cached_xmi_path=cached_xmi_path, output_dir_path=output_dir_path) else: configure_and_generate_project( json_template_path=json_template_path, event_name=event_type, user_name=user, event_doc_map=complete_map, cached_ser_path=annotation_ser_path, cached_xmi_path=cached_xmi_path, output_dir_path=output_dir_path)
def main(params: Parameters): conda_script_generator = CondaJobScriptGenerator.from_parameters(params) entry_point = params.string("entry_point") work_dir = params.optional_creatable_directory( "working_directory") or Path(os.getcwd()) stdout_file = params.string("log_file") or work_dir / "___stdout.log" shell_script = conda_script_generator.generate_shell_script( entry_point_name=entry_point, param_file=params.existing_file("job_param_file"), working_directory=work_dir, stdout_file=stdout_file, ) params.creatable_file("conda_script_path").write_text( # type: ignore shell_script, encoding="utf-8") if params.boolean("echo_template", default=False): print(shell_script)
def configure_logging_from(params: Parameters, *, log_params=False) -> None: """ Configures logging from parameters. This will examine the 'logging' namespace of the provided parameters. If that namespace has a 'config_file' parameter, logging will be configured based on the parameter file it points to. Otherwise, if 'logging.root_level' is specified, the logging level of the root logger will be set to its value. For reference, the standard values are CRITICAL, FATAL, ERROR, WARNING, INFO, and DEBUG. """ if "logging.config_file" in params: logging.config.fileConfig( str(params.existing_file("logging.config_file"))) else: _config_logging_from_params(params) if log_params: log.info(str(params))
def _doc_id_binary_source_from_params(params: Parameters) -> KeyValueSource[str, bytes]: return KeyValueSource.binary_from_doc_id_to_file_map(params.existing_file("path"))