Ejemplo n.º 1
0
def Unique_add_file_to_vocab(file, vocab):
    text = load_file(file)
    #tokens = text.split()
    tokens=clean_file(text)
    #print(tokens)
    # print(type(tokens))
    # UPDATING COUNTS
    vocab.update(tokens)
Ejemplo n.º 2
0
def add_file_to_vocab(file, vocab):
    text = load_file(file)
    tokens = clean_file(text)
    # print(type(tokens))
    # UPDATING COUNTS
    vocab.update(tokens)
Ejemplo n.º 3
0
def file_to_line(dir):
    doc = load_file(dir)
    tokens = clean_file(doc)
    #print(tokens)
    return ' '.join(tokens)
Ejemplo n.º 4
0
from os import listdir
#from nltk.corpus import stopwords
from Dataprocessing import load_file
from Dataprocessing import clean_file
from Createvocab import save_file
import pickle as pkl

print("In Savedata.py")
print("Creating .pkl files")

#LOADING VOCABULARY
#For 1600 review data
vocab = load_file('1600vocab.txt')
#For 10000 review data
##vocab=load_file('10000vocab.txt')
#For 25000 review data
##vocab=load_file('25000vocab.txt')

vocab = vocab.split()
#print(vocab)


def file_to_line(dir):
    doc = load_file(dir)
    tokens = clean_file(doc)
    #print(tokens)
    return ' '.join(tokens)


def convert_files(directory):
    lines = []
Ejemplo n.º 5
0
def Total_tokens(path):
    text = load_file(path)
    tokens = text.split()
    return tokens