def produce(t2wml_project_path: str, project_name: str, input_folder_path: str,
            output_folder_path: str):
    # set up the environment
    virtual_env = pd.__file__.replace("pandas/__init__.py", "backend")
    os.chdir(virtual_env)
    from driver import run_t2wml

    # set up the folders
    yaml_file = os.path.join(t2wml_project_path,
                             "{}/{}.yaml".format(project_name, project_name))
    wikifier_file = os.path.join(output_folder_path,
                                 "consolidated-wikifier.csv")
    data_file_folder = input_folder_path
    output_directory = os.path.join(output_folder_path, "t2wml-output")

    for filename in os.listdir(data_file_folder):
        if filename.endswith(".csv"):
            total_results = ""
            data_file_path = os.path.join(data_file_folder, filename)
            print("processing", filename)
            sheet_names = get_sheet_names(data_file_path)
            for sheet_name in sheet_names:
                run_t2wml(data_file_path,
                          wikifier_file,
                          yaml_file,
                          output_directory,
                          sheet_name,
                          filetype="tsv",
                          project_name=project_name)

    # move all files from folder
    for each_file in os.listdir(output_directory):
        full_path = os.path.join(output_directory, each_file)
        if os.path.isdir(full_path):
            file_path = os.path.join(output_directory, each_file,
                                     "results.tsv")
            if os.path.isfile(file_path):
                shutil.move(file_path, full_path + ".tsv")
            shutil.rmtree(full_path)
예제 #2
0
from driver import run_t2wml

ethiopia_repo_path = '/Users/amandeep/Github/ethiopia-experiment'
wikified_output_path = '/restricted/wikifier.csv'

output_directory = '{}/restricted/food_prices/output'.format(
    ethiopia_repo_path)
sparql_endpoint = 'https://dsbox02.isi.edu:8899/bigdata/namespace/wdq/sparql'

data_file_path = '{}/restricted/food_prices/input/wfp_food_prices_ethiopia-item-name.csv'.format(
    ethiopia_repo_path)
t2wml_spec = '{}/restricted/food_prices/t2wml_spec_food_prices_price.yaml'.format(
    ethiopia_repo_path)
run_t2wml(data_file_path,
          wikified_output_path,
          t2wml_spec,
          output_directory,
          sparql_endpoint=sparql_endpoint)

t2wml_spec = '{}/restricted/food_prices/t2wml_spec_food_prices_markets.yaml'.format(
    ethiopia_repo_path)
data_file_path = '{}/restricted/food_prices/input/wfp_food_prices_ethiopia-item-name-market.csv'.format(
    ethiopia_repo_path)
run_t2wml(data_file_path,
          wikified_output_path,
          t2wml_spec,
          output_directory,
          sparql_endpoint=sparql_endpoint)