def main(argv): # Don't bother using the GPU for this os.environ["CUDA_VISIBLE_DEVICES"] = "" # ACM doesn't like Type 3 fonts # https://tex.stackexchange.com/q/18687 plt.rc('pdf', fonttype=42) plt.rc('ps', fonttype=42) # We only want to plot this one dataset list_of_datasets = ["wisdm_ar", "ucihar", "uwave", "ucihhar"] dataset_names_nice = ["WISDM AR", "UCI HAR", "uWave", "UCI HHAR"] # Get only the ones used in the SS-DA experiments dataset_which = [ [1, 3, 4, 2, 25, 7, 21, 2, 1, 0, 11, 15, 25, 29, 30, 31, 32, 7, 8], # [2, 7, 12, 12, 9, 14, 18, 6, 7, 17, 11, 13, 16, 18, 18, 19, 23, 24, 25], # [2, 3, 4, 2, 1, 2, 3, 1, 4, 7, 5, 6, 7, 8], # [1, 3, 4, 0, 1, 4, 5, 2, 3, 5, 3, 5, 6, 7, 8], # Just do for the first two adaptation problems in SS-DA experiments [2, 11, 7, 13], [2, 5, 3, 5], [1, 3, 3, 5], ] ymaxs = [70, 35, 16, 28] # We mostly care about WISDM AR and don't have enough space for all of them first_ns = [None, None, None, None] for i in range(len(list_of_datasets)): dataset_name = list_of_datasets[i] dataset_name_nice = dataset_names_nice[i] which = dataset_which[i] ymax = ymaxs[i] first_n = first_ns[i] # Get class balance for all users user_source_pairs = [] for user in datasets.get_dataset_users(dataset_name): # Note: train_on_everything=True means the training dataset consists # of all train/valid/test data. sources, _ = load_da(dataset_name, str(user), "", train_on_everything=True) # We load them one at a time assert len(sources) == 1 source = sources[0] user_source_pairs.append((user, source)) balance_data = compute_class_balances(dataset_name, user_source_pairs) # Plot it class_labels = datasets.get_dataset(dataset_name).class_labels generate_plot(dataset_name_nice, class_labels, balance_data, filename="class_balance_" + dataset_name + ".pdf", which=which, ymax=ymax, first_n=first_n)
def main(argv): counts = collections.defaultdict(dict) for dataset_name in datasets.list_datasets(): # Note: test=False so we only look at the training samples, which is what # we will vary in the vary-amount-of-target-data experiments for user in datasets.get_dataset_users(dataset_name): sources, _ = load_da(dataset_name, str(user), "", test=False) train_count = count_training_samples(sources) counts[dataset_name][user] = train_count print_dictionary(counts, "dataset_target_training_sample_counts")
def main(argv): # Don't bother using the GPU for this os.environ["CUDA_VISIBLE_DEVICES"] = "" for dataset_name in datasets.list_datasets(): for user in datasets.get_dataset_users(dataset_name): # Note: test=False so we only look at the training samples, where # train=80% of training set, test=20% of training set, i.e. the # validation set test = False sources, _ = load_da(dataset_name, str(user), "", test=test) assert len(sources) == 1 dataset = sources[0] print_stats(dataset_name + "_" + str(user), dataset, test=test)
def main(argv): # Don't bother using the GPU for this os.environ["CUDA_VISIBLE_DEVICES"] = "" for dataset_name in datasets.list_datasets(): user_source_pairs = [] for user in datasets.get_dataset_users(dataset_name): # Note: test=False so we only look at the training samples, where # train=80% of training set, test=20% of training set, i.e. the # validation set sources, _ = load_da(dataset_name, str(user), "", test=False) # We load them one at a time assert len(sources) == 1 source = sources[0] user_source_pairs.append((user, source)) print_class_balances(dataset_name, user_source_pairs)
pairs.append((dataset_name, str(source_user), str(target_user))) return pairs if __name__ == "__main__": # Sources-target pairs for training pairs = [] uids = [] for name in datasets.list_datasets(): # Tune on "watch_noother" not "watch" if name == "watch": continue users = datasets.get_dataset_users(name) # Since sources-target aren't stored in filename anymore (too long), we # would run into folder name conflicts if we didn't append a unique ID # to each sources-target pair uid = 0 # Make this repeatable random.seed(42) # Allows extra max_users for some datasets without changin uid's # # TODO get rid of all this confusing code once we decide what number # to set max_users to. If we don't need to change max_users, then # we can just increment uid's like before. bonus_uid = 0