from pathlib import Path import json from typing import Dict from sample import Sample if __name__ == "__main__": root = Path("../tweet-data/merged") biden_dir = root / "biden" trump_dir = root / "trump" save_dir = root.parent / (root.stem + "_no_dups") # remove samples duplicated within the same keywords biden_kw_data_lists = [ list( Sample.get_id_and_sample(file_path) for file_path in path.glob("*.json")) for path in biden_dir.glob("*") if path.is_dir() ] trump_kw_data_lists = [ list( Sample.get_id_and_sample(file_path) for file_path in path.glob("*.json")) for path in trump_dir.glob("*") if path.is_dir() ] biden_kw_data_dicts = [ dict(keyword_data) for keyword_data in biden_kw_data_lists ] trump_kw_data_dicts = [ dict(keyword_data) for keyword_data in trump_kw_data_lists ]