예제 #1
0
 def save_vocab(fields):
     vocab = []
     for k, f in fields.items():
         if 'vocab' in f.__dict__:
             f.vocab.stoi = dict(f.vocab.stoi)
             vocab.append((k, f.vocab))
     return vocab
예제 #2
0
def save_fields_to_vocab(fields):
    """
    Save Vocab objects in Field objects to `vocab.pt` file.
    """
    vocab = []
    for k, f in fields.items():
        if f is not None and 'vocab' in f.__dict__:
            f.vocab.stoi = dict(f.vocab.stoi)
            vocab.append((k, f.vocab))
    return vocab
예제 #3
0
def file_stats(dataset, return_vocab=True, test_file=False, pd_data=False, return_len=False):
	"""
	JSON - return count of sentences (exclude conflict) and individual classes
	return vocab = return the list of dictionary
	test_file = If it's a test file 
	"""
	vocab = []
	count_pos = 0
	count_neg = 0
	count_neu = 0

	max_len = 0

	if return_vocab:
		data = json.loads(open(dataset).read())
		if pd_data:
			data = json.loads(data)
			# To-do : Why do we need to json.loads() twice? Resolve this issue.
	else:
		data = dataset
	count1 = 0
	count2 = 0

	for d in data:
		if ~test_file:
			if d['sentiment'] != 'conflict':
				if len(spacy_tokenizer(d['sentence'])) < 101:
					vocab.append(d)
					max_len = max(max_len, len(spacy_tokenizer(d['sentence'])))

					if d['sentiment'] == 'positive':
						count_pos += 1
					elif d['sentiment'] == 'negative':
						count_neg += 1
					elif d['sentiment'] == 'neutral':
						count_neu += 1
		else:
			if d['sentiment'] != 'conflict':
				vocab.append(d)
				max_len = max(max_len, len(spacy_tokenizer(d['sentence'])))


	print("Total number of Aspects: ", len(vocab))
	if ~test_file:
		print("Count of Sentiment class: ", "pos ",count_pos, ", neg ", count_neg, ", neu ",count_neu)
	print("Max length of sentence: ", max_len)

	if return_vocab and return_len:
		return vocab, max_len
	elif return_vocab:
		return vocab
	elif return_len:
		return max_len
예제 #4
0
def save_fields_to_vocab(fields):
    """
    fields: a dictionary whose keys are field names and whose values are
            Field objects
    returns: a list of (field name, vocab) pairs for the fields that have a
             vocabulary
    """
    vocab = []
    for k, f in fields.items():
        if f is not None and 'vocab' in f.__dict__:
            f.vocab.stoi = f.vocab.stoi
            vocab.append((k, f.vocab))
    return vocab