# Find the training data
positiveFiles = ['./Data/IMDBData/train/pos/' + f for f in os.listdir('./Data/IMDBData/train/pos/') if os.path.isfile(os.path.join('./Data/IMDBData/train/pos/', f))]
negativeFiles = ['./Data/IMDBData/train/neg/' + f for f in os.listdir('./Data/IMDBData/train/neg/') if os.path.isfile(os.path.join('./Data/IMDBData/train/neg/', f))]

# Initialize the pre-processor and sentiment analyzer
processor = PreProcessor()
analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors)

# Load and process the training data
negativeSamples = []
positiveSamples = []
for pf in positiveFiles:
    with open(pf, "r", encoding="utf8") as f:
       lines = f.readlines()
       positiveSamples.extend(processor.cleanTextList(lines))
       print("Cleaned positive document: " + pf)

for nf in negativeFiles:
    with open(nf, "r", encoding="utf8") as f:
       lines = f.readlines()
       negativeSamples.extend(processor.cleanTextList(lines))
       print("Cleaned negative document: " + nf)

# Train the analyzer
analyzer.TrainModel(ITERATIONS, SAVE_FREQUENCY, positiveSamples, negativeSamples, "IMDBSA/model/pretrained_lstm.ckpt")

# Load a trained analyzer
# analyzer.LoadModel("models")

# Test the analyzer
# analyzer.TestModel(positiveSamples, [1 for x in positiveSamples])