def from_files(cls, reviews_file: str, labels_file: str, vocab: Vocabulary = None): with open(reviews_file, "r") as reviews, open(labels_file, "r") as labels: review_lines = reviews.readlines() label_lines = labels.readlines() examples = [ { # review is text, stored as a list of tokens "review": review_lines[line].strip("\n").split(" "), "label": int(label_lines[line].strip("\n")), } for line in range(len(review_lines)) ] # initialize a vocabulary from the reviews, if none is given if not vocab: vocab = Vocabulary.from_text_files( [reviews_file], special_tokens=(Vocabulary.UNK, SSTClassificationDataset.PAD), ) return cls(examples, vocab)