def save_vocab(fields): vocab = [] for k, f in fields.items(): if 'vocab' in f.__dict__: f.vocab.stoi = dict(f.vocab.stoi) vocab.append((k, f.vocab)) return vocab
def save_fields_to_vocab(fields): """ Save Vocab objects in Field objects to `vocab.pt` file. """ vocab = [] for k, f in fields.items(): if f is not None and 'vocab' in f.__dict__: f.vocab.stoi = dict(f.vocab.stoi) vocab.append((k, f.vocab)) return vocab
def file_stats(dataset, return_vocab=True, test_file=False, pd_data=False, return_len=False): """ JSON - return count of sentences (exclude conflict) and individual classes return vocab = return the list of dictionary test_file = If it's a test file """ vocab = [] count_pos = 0 count_neg = 0 count_neu = 0 max_len = 0 if return_vocab: data = json.loads(open(dataset).read()) if pd_data: data = json.loads(data) # To-do : Why do we need to json.loads() twice? Resolve this issue. else: data = dataset count1 = 0 count2 = 0 for d in data: if ~test_file: if d['sentiment'] != 'conflict': if len(spacy_tokenizer(d['sentence'])) < 101: vocab.append(d) max_len = max(max_len, len(spacy_tokenizer(d['sentence']))) if d['sentiment'] == 'positive': count_pos += 1 elif d['sentiment'] == 'negative': count_neg += 1 elif d['sentiment'] == 'neutral': count_neu += 1 else: if d['sentiment'] != 'conflict': vocab.append(d) max_len = max(max_len, len(spacy_tokenizer(d['sentence']))) print("Total number of Aspects: ", len(vocab)) if ~test_file: print("Count of Sentiment class: ", "pos ",count_pos, ", neg ", count_neg, ", neu ",count_neu) print("Max length of sentence: ", max_len) if return_vocab and return_len: return vocab, max_len elif return_vocab: return vocab elif return_len: return max_len
def save_fields_to_vocab(fields): """ fields: a dictionary whose keys are field names and whose values are Field objects returns: a list of (field name, vocab) pairs for the fields that have a vocabulary """ vocab = [] for k, f in fields.items(): if f is not None and 'vocab' in f.__dict__: f.vocab.stoi = f.vocab.stoi vocab.append((k, f.vocab)) return vocab