def cv(task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any]) -> None: """Run cross validation (for hyperparameter selection).""" logging.info("Running cross validation for {}, {}".format(task.name, variant.name)) # get data. cv uses train data only. train_data, _ = data.get(task) labels, y_np = train_data labels_np = np.array(labels) x_np = data.features(task, variant, labels) # run k-fold cross validation folder = model_selection.KFold(n_splits=5, shuffle=True) overall_y_hat = np.zeros_like(y_np) for i, (train_index, test_index) in enumerate(folder.split(x_np)): # logging.info("Fold {}".format(i)) x_train, x_test = x_np[train_index], x_np[test_index] y_train, y_test = y_np[train_index], y_np[test_index] labels_train, labels_test = (labels_np[train_index], labels_np[test_index]) centering = train(model, x_train, y_train, config) y_test_hat = test(model, x_test, y_test, centering, config) # Uncomment the next line to report on individual folds. # metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task]) # Save results into overall aggregate. overall_y_hat[test_index] = y_test_hat # Report on overall results. metrics.report(overall_y_hat, y_np, labels, data.TASK_LABELS[task])
def train_and_test( task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any] ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]: """Run a final train + test run over a task.""" logging.info("Running train+test for {}, {}".format(task.name, variant.name)) # get data. cv uses train data only. train_data, test_data = data.get(task) # train labels_train, y_train_np = train_data train_label_to_y = {} for label, y in zip(labels_train, y_train_np): train_label_to_y[label] = y labels_train_unique = sorted(list(set(labels_train))) y_train = [train_label_to_y[label] for label in labels_train_unique] x_train_np = data.features(task, variant, labels_train_unique) centering = train(model, x_train_np, y_train, config) # test labels_test, y_test_np = test_data test_label_to_y = {} for label, y in zip(labels_test, y_test_np): test_label_to_y[label] = y labels_test_unique = sorted(list(set(labels_test))) y_test = [test_label_to_y[label] for label in labels_test_unique] x_test_np = data.features(task, variant, labels_test_unique) y_test_hat = test(model, x_test_np, y_test, centering, config) return metrics.report(y_test_hat, y_test, labels_test_unique, data.TASK_LABELS[task])
def __init__(self, task: Task, train: bool, seq_len: int = 20) -> None: """ Args: task: task to use train: True for train, False for test seq_len: sequence length. Set to 2 higher than you need for tokens to account for [CLS] and [SEP] """ self.seq_len = seq_len # load labels and y data train_data, test_data = get(task) split_data = train_data if train else test_data self.labels, self.y = split_data assert len(self.labels) == len(self.y) # load X index # line_mapping maps from word1/word2 label to sentence index in sentence list. self.line_mapping: Dict[str, int] = {} task_short = TASK_SHORTHAND[task] with open("data/sentences/index.csv", "r") as f: reader = csv.DictReader(f) for i, row in enumerate(reader): if row["task"] == task_short: self.line_mapping[row["uids"]] = i # TODO: check that i lines up and isn't off by one with open("data/sentences/sentences.txt", "r") as f: self.sentences = [line.strip() for line in f.readlines()] # show some samples. This is a really great idiom that huggingface does. Baking # little visible sanity checks like this into your code is just... *does gesture # where you kiss your fingers and throw them away from your mouth as if # describing great food.* n_sample = 5 print("{} Samples:".format(n_sample)) for i in random.sample(range(len(self.labels)), n_sample): label = self.labels[i] sentence = self.sentences[self.line_mapping[label]] print('- {}: "{}"'.format(label, sentence)) print("Loading tokenizer...") self.tokenizer = BertTokenizer.from_pretrained("bert-large-uncased", do_lower_case=True, do_basic_tokenize=True)
def train_and_test( task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any] ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]: """Run a final train + test run over a task.""" logging.info("Running train+test for {}, {}".format( task.name, variant.name)) # get data. cv uses train data only. train_data, test_data = data.get(task) # train labels_train, y_train_np = train_data x_train_np = data.features(task, variant, labels_train) centering = train(model, x_train_np, y_train_np, config) # test labels_test, y_test_np = test_data x_test_np = data.features(task, variant, labels_test) y_test_hat = test(model, x_test_np, y_test_np, centering, config) return metrics.report(y_test_hat, y_test_np, labels_test, data.TASK_LABELS[task])
def main() -> None: # settings. (onetime use so no flags.) perdatum_path = "data/results/Bert-situated-AP-perdatum.txt" task = data.Task.Situated_AffordancesProperties # load per-datum output with open(perdatum_path, "r") as f: perdatum = util.str2np(f.read()) # get test data: labels and groundtruth y-values _, test_data = data.get(task) labels, y = test_data y = y.squeeze() # the per-datum is y_hat == y. we want to recover y_hat so we can pass it back into # metrics to easily re-compute everything we need. probably a vectorized op that can # do this but oh well. y_hat = np.zeros_like(y) for i in range(len(y)): y_hat[i] = y[i] if perdatum[i] else 1 - y[i] # sanity check assert len(labels) == len(y_hat) assert len(y) == len(y_hat) _, _, _, category_cms, _ = metrics.report(y_hat, y, labels, data.TASK_LABELS[task]) # write out task_short = data.TASK_SHORTHAND[task] for i in [0, 1]: # e.g., "O" for objects, "P" for properties cat_short = task_short[i] out_path = "data/results/{}-{}-{}.txt".format("Bert", task_short, cat_short) print("Writing {} results to {}".format(cat_short, out_path)) with open(out_path, "w") as f: for item, cm in category_cms[i]["per-item"].items(): f.write("{} {}\n".format(item, util.np2str(cm)))
def baseline( func: Callable[[List[str], np.ndarray, List[str], Tuple[int, ...]], np.ndarray], name: str, shortname: str, ) -> str: # settings tasks = [ (Task.Abstract_ObjectsProperties, ["object", "property"]), (Task.Situated_ObjectsProperties, ["object", "property"]), (Task.Situated_ObjectsAffordances, ["object", "affordance"]), (Task.Situated_AffordancesProperties, ["affordance", "property"]), ] nums = [] for task, mf1_labs in tasks: logging.info("Running {} baseline for {}".format(name, task.name)) train_data, test_data = data.get(task) labels_train, y_train = train_data labels_test, y_test = test_data y_test_hat = func(labels_train, y_train, labels_test, y_test.shape) _, _, macro_f1s, _, per_datum = metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task]) for mf1_lab in mf1_labs: nums.append(macro_f1s[mf1_lab]) # write full results to file path = os.path.join( "data", "results", "{}-{}-perdatum.txt".format(shortname, TASK_MEDIUMHAND[task]), ) with open(path, "w") as f: f.write(util.np2str(per_datum) + "\n") logging.info("") return name + "," + ",".join(["{:.2f}".format(num) for num in nums])
def create() -> None: """This was run once to build the set that was annotated by humans.""" phase = "round1" for task in Task: _, _, test = data.get(task) labels, _, gold_2d = test gold = gold_2d.squeeze() index = random.sample(range(len(labels)), 100) selected_labels = np.array(labels)[index].tolist() selected_gold = gold[index].tolist() label_path = os.path.join( "data", "human", "{}-{}-labels.txt".format(TASK_MEDIUMHAND[task], phase)) with open(label_path, "w") as f: for label in selected_labels: f.write(label + "\n") gold_path = os.path.join( "data", "human", "{}-{}-gold.txt".format(TASK_MEDIUMHAND[task], phase)) with open(gold_path, "w") as f: for gold in selected_gold: f.write(str(gold) + "\n")
def __init__(self, task: Task, train: bool, seq_len: int = 20) -> None: self.seq_len = seq_len # load labels and y data train_data, test_data = get(task) split_data = train_data if train else test_data self.labels, self.y = split_data assert len(self.labels) == len(self.y) # load X index # line_mapping maps from word1/word2 label to sentence index in sentence list. self.line_mapping: Dict[str, int] = {} task_short = TASK_SHORTHAND[task] with open("data/sentences/index.csv", "r") as f: reader = csv.DictReader(f) for i, row in enumerate(reader): if row["task"] == task_short: self.line_mapping[row["uids"]] = i # TODO: check that i lines up and isn't off by one with open("data/sentences/sentences.txt", "r") as f: self.sentences = [line.strip() for line in f.readlines()] ''' n_sample = 5 print("{} Samples:".format(n_sample)) for i in random.sample(range(len(self.labels)), n_sample): label = self.labels[i] sentence = self.sentences[self.line_mapping[label]] print('- {}: "{}"'.format(label, sentence)) ''' print("Loading tokenizer...") self.tokenizer = BertTokenizer.from_pretrained( "bert-large-uncased", do_lower_case=True, do_basic_tokenize=True )
def __init__(self, task: Task, image_preprocesser: Any, train: bool, gan_imgs: bool = False, random_imgs: bool = False, text_only: bool = False, dev: bool = True) -> None: """ Args: task: task to use train: True for train, False for test """ # load labels and y data self.text_only = text_only train_data, test_data = get(task, dev) split_data = train_data if train else test_data labels, y_list = split_data self.label_to_y = {} for label, y in zip(labels, y_list): self.label_to_y[label] = y # load X index # line_mapping maps from word1/word2 label to sentence index in sentence list. self.line_mapping = {} self.line_mapping_r = {} task_short = TASK_SHORTHAND[task] with open("data/sentences/index.csv", "r") as f: reader = csv.DictReader(f) for i, row in enumerate(reader): if row["task"] == task_short: self.line_mapping[row["uids"]] = i self.line_mapping_r[i] = row["uids"] # TODO: check that i lines up and isn't off by one self.task_idxs = sorted( list(set([self.line_mapping[label] for label in labels ]))) #Using a list to keep deterministic ordering with open("data/sentences/sentences.txt", "r") as f: all_sentences = [line.strip() for line in f.readlines()] self.sent_idx_to_dataset_id = {} self.sentences = [] for i, sent in enumerate(all_sentences): if i in self.task_idxs: self.sentences.append(sent) self.sent_idx_to_dataset_id[i] = len(self.sentences) - 1 self.tokenized_sents = clip.tokenize(self.sentences) # Load map from sentence index to image names and get list of image names if not text_only: if gan_imgs: self.images = [ f"data/situated_sentence_images/{task_idx}.png" for task_idx in self.task_idxs ] else: sent_idx_to_image = pkl.load( open("data/clip/sent_idx_to_image.pkl", "rb"))[task] self.images = [ "data/mscoco/images/{}".format(sent_idx_to_image[task_idx]) for task_idx in self.task_idxs ] if random_imgs: random.shuffle(self.images) # show some samples. This is a really great idiom that huggingface does. Baking # little visible sanity checks like this into your code is just... *does gesture # where you kiss your fingers and throw them away from your mouth as if # describing great food.* n_sample = 5 print("{} Samples:".format(n_sample)) for i in random.sample(range(len(self.task_idxs)), n_sample): label = self.line_mapping_r[self.task_idxs[i]] sentence = self.sentences[i] if not text_only: image = self.images[i] print('- {}: "{}", "{}"'.format(label, sentence, image)) else: print('- {}: "{}"'.format(label, sentence)) if not text_only: self.image_preprocesser = image_preprocesser