def main(): save_dir = "../data" image_file = "../data/test/karyotype.bmp" model_path = "../model/default_inference.h5" Pipeline.run(image_file=image_file, save_dir=save_dir, model_path=model_path)
def test_pipeline_produced_expected_data() -> bool: delete_existing_outputs(STORAGE_CONFIG) filename = os.path.basename(EXPECTED_FILE) pipeline = Pipeline(PIPELINE_CONFIG, STORAGE_CONFIG) pipeline.run(EXAMPLE_FILE) # Retrieve the output data file loc_id = pipeline.config.pipeline_definition.location_id datastream = DSUtil.get_datastream_name(config=pipeline.config) root: str = pipeline.storage._root output_file = os.path.join(root, loc_id, datastream, filename) # Assert that the basename of the processed file and expected file match assert os.path.isfile(output_file) # Compare data and optionally attributes to ensure everything matches. ds_out: xr.Dataset = xr.open_dataset(output_file) ds_exp: xr.Dataset = xr.open_dataset(EXPECTED_FILE) xr.testing.assert_allclose(ds_out, ds_exp)
def main(): inputs = { 'channel_id': CHANNEL_ID, 'search_word': 'incredible', 'limit': 20, } steps = [ Preflight(), GetVideoList(), # 写成多行,增加易读性(最后一个建议有,) InitializeYT(), DownloadCaptions(), ReadCaption(), Search(), DownloadVideos(), EditVideo(), Postflight(), ] utils = Utils() p = Pipeline(steps) p.run(inputs, utils)
""") cursor.execute(""" CREATE USER vsmith WITH PASSWORD 'temppass123' NOSUPERUSER IN GROUP data_analyst; """) db_conn.commit() print("CREATED DB GROUP AND USERS") return db_conn @pipeline.task(depends_on=create_db_users_and_groups) def close_db_connection(db_conn): """After the work is done, close the database connection.""" db_conn.close() def get_table_row_count(db_conn, table_name): """Get basic table row count.""" cursor = db_conn.cursor() cursor.execute("SELECT COUNT(1) FROM {}".format(table_name)) return cursor.fetchone()[0] if __name__ == '__main__': pipeline.run()
def main(): config = Config() parser = argparse.ArgumentParser( description='Code for building the Gutenberg Dialog Dataset') parser.add_argument('-dg', '--dialog_gap', default=config.dialog_gap, help='Min. number of characters between two dialogs ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument( '-isn', '--include_surrounding_narratives', default=config.include_surrounding_narratives, help='Whether to include surrounding narratives in the output dataset', action='store_true') parser.add_argument('-mnl', '--max_narrative_length', default=config.max_narrative_length, help='Max. number of words in 1 narrative ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument( '-minl', '--min_intermediate_narrative_length', default=config.min_intermediate_narrative_length, help= 'Min. number of words in 1 intermediate narrative (a narrative which occurs in-line with dialog) ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-mul', '--max_utterance_length', default=config.max_utterance_length, help='Max. number of words in 1 utterance ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-mb', '--max_books', default=config.max_books, help='Limit the number of books in final dataset ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-md', '--min_delimiters', default=config.min_delimiters, help='Min delimiters / 10000 words needed in a book ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-mdd', '--min_double_delim', default=config.min_double_delim, help='Double delimiter threshold (romance languages ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-kl', '--kl_threshold', default=config.kl_threshold, help='KL divergence threshold for filtering books ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-st', '--size_threshold', default=config.size_threshold, help='#words threshold for filtering with KL' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-cd', '--clean_dialogs', default=config.clean_dialogs, help='Whether to run pre-processing on dialogs', action='store_true') parser.add_argument('-vt', '--vocab_threshold', default=config.vocab_threshold, help='Ratio of unknown words allowed in a dialog ' + '(default: %(default)s)', metavar='', type=int) parser.add_argument('-l', '--languages', default=config.languages, help='Comma separated language codes ' + 'for which to build datasets', metavar='', type=str) parser.add_argument('-d', '--download', default=config.download, help='Whether to run download step', action='store_true') parser.add_argument('-f1', '--pre_filter', default=config.pre_filter, help='Whether to run pre-filter step', action='store_true') parser.add_argument('-e', '--extract', default=config.extract, help='Whether to run extracting step', action='store_true') parser.add_argument('-f2', '--post_filter', default=config.post_filter, help='Whether to run post filter step', action='store_true') parser.add_argument('-c', '--create_dataset', default=config.create_dataset, help='Whether to run create dataset step', action='store_true') parser.add_argument('-a', '--run_all', default=config.run_all, help='Whether to run all steps', action='store_true') parser.add_argument('-dir', '--directory', default=config.directory, help='Directory where the language folders are', metavar='', type=str) parser.parse_args(namespace=config) p = Pipeline(config) p.run()
keywords = {} for title in titles: for word in title.split(): if word and word not in exclude_words: if word not in keywords: keywords[word] = 0 keywords[word] += 1 return keywords @pipeline.task(depends_on=build_keyword_dictionary) def extract_top_keywords(keywords): top_keywords = [] for word, count in sorted(keywords.items(), key=lambda item: item[1], reverse=True): top_keywords.append((word, count)) return top_keywords[:100] @pipeline.task(depends_on=extract_top_keywords) def save_final_csv_file(keywords): output_csv_file = open('top_keywords.csv', 'w', newline='') return csv_helper.build_csv_file(keywords, file=output_csv_file, header=['keyword', 'count']) output = pipeline.run()