def produce(t2wml_project_path: str, project_name: str, input_folder_path: str, output_folder_path: str): # set up the environment virtual_env = pd.__file__.replace("pandas/__init__.py", "backend") os.chdir(virtual_env) from driver import run_t2wml # set up the folders yaml_file = os.path.join(t2wml_project_path, "{}/{}.yaml".format(project_name, project_name)) wikifier_file = os.path.join(output_folder_path, "consolidated-wikifier.csv") data_file_folder = input_folder_path output_directory = os.path.join(output_folder_path, "t2wml-output") for filename in os.listdir(data_file_folder): if filename.endswith(".csv"): total_results = "" data_file_path = os.path.join(data_file_folder, filename) print("processing", filename) sheet_names = get_sheet_names(data_file_path) for sheet_name in sheet_names: run_t2wml(data_file_path, wikifier_file, yaml_file, output_directory, sheet_name, filetype="tsv", project_name=project_name) # move all files from folder for each_file in os.listdir(output_directory): full_path = os.path.join(output_directory, each_file) if os.path.isdir(full_path): file_path = os.path.join(output_directory, each_file, "results.tsv") if os.path.isfile(file_path): shutil.move(file_path, full_path + ".tsv") shutil.rmtree(full_path)
from driver import run_t2wml ethiopia_repo_path = '/Users/amandeep/Github/ethiopia-experiment' wikified_output_path = '/restricted/wikifier.csv' output_directory = '{}/restricted/food_prices/output'.format( ethiopia_repo_path) sparql_endpoint = 'https://dsbox02.isi.edu:8899/bigdata/namespace/wdq/sparql' data_file_path = '{}/restricted/food_prices/input/wfp_food_prices_ethiopia-item-name.csv'.format( ethiopia_repo_path) t2wml_spec = '{}/restricted/food_prices/t2wml_spec_food_prices_price.yaml'.format( ethiopia_repo_path) run_t2wml(data_file_path, wikified_output_path, t2wml_spec, output_directory, sparql_endpoint=sparql_endpoint) t2wml_spec = '{}/restricted/food_prices/t2wml_spec_food_prices_markets.yaml'.format( ethiopia_repo_path) data_file_path = '{}/restricted/food_prices/input/wfp_food_prices_ethiopia-item-name-market.csv'.format( ethiopia_repo_path) run_t2wml(data_file_path, wikified_output_path, t2wml_spec, output_directory, sparql_endpoint=sparql_endpoint)