# -*- coding: utf-8 -*-
import os

import numpy as np
from keras import layers
from keras.models import Sequential
from six.moves import range

from constants import INVERT
from data_gen import LazyDataLoader
from utils import colors
from utils import get_chars_and_ctable

ls -alrt

DATA_LOADER = LazyDataLoader()

INPUT_MAX_LEN, OUTPUT_MAX_LEN, TRAINING_SIZE = DATA_LOADER.statistics()

chars, ctable = get_chars_and_ctable()

if not os.path.exists('x_y.npz'):
    raise Exception('Please run the vectorization script before.')

print('Loading data from prefetch...')
data = np.load('x_y.npz')
x_train = data['x_train']
x_val = data['x_val']
y_train = data['y_train']
y_val = data['y_val']
    '--training_filename',
    type=str,
    help='Result of run_data_processing.py. '
    'Something like: /home/premy/BreachCompilationAnalysis/edit-distances/1.csv',
    required=True)
# parser.add_argument('--encoding_output_folder', type=str, help='Will be used for training')

arg_p = parser.parse_args()

print('Building vocabulary...')

build_vocabulary(arg_p.training_filename)

print('Vectorization...')

DATA_LOADER = LazyDataLoader(arg_p.training_filename)

_, _, training_records_count = DATA_LOADER.statistics()

# TOKEN_INDICES = get_token_indices()

chars, c_table = get_chars_and_ctable()

inputs = []
targets = []
print('Generating data...')
for i in tqdm(range(training_records_count),
              desc='Generating inputs and targets'):
    x_, y_ = DATA_LOADER.next()
    # Pad the data with spaces such that it is always MAXLEN.
    inputs.append(x_)
parser = argparse.ArgumentParser('Data Encoding Tool.')
parser.add_argument('--training_filename', type=str,
                    help='Result of run_data_processing.py. '
                         'Something like: /home/premy/BreachCompilationAnalysis/edit-distances/1.csv',
                    required=True)
# parser.add_argument('--encoding_output_folder', type=str, help='Will be used for training')

arg_p = parser.parse_args()

print('Building vocabulary...')

build_vocabulary(arg_p.training_filename)

print('Vectorization...')

DATA_LOADER = LazyDataLoader(arg_p.training_filename)

_, _, training_records_count = DATA_LOADER.statistics()

# TOKEN_INDICES = get_token_indices()

chars, c_table = get_chars_and_ctable()

inputs = []
targets = []
print('Generating data...')
for i in tqdm(range(training_records_count), desc='Generating inputs and targets'):
    x_, y_ = DATA_LOADER.next()
    # Pad the data with spaces such that it is always MAXLEN.
    inputs.append(x_)
    targets.append(y_)