from lib import helpers

helpers.printCurrentTime("start ./amazon_statistics.py")

import logging
logging.basicConfig(
    format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')

text_min_len = config.get_int('amazon-dump.statistics.review.text.min-len')
statistic_measures_cache_dir = config.get_string(
    'amazon-dump.statistics.measures.cache-dir')
reviews_count_file = config.get_string(
    'amazon-dump.statistics.measures.reviews-count.cache-file')
number_of_reviews_by_asin_file = config.get_string(
    'amazon-dump.statistics.measures.number-of-reviews-by-asin.cache-file')
number_of_reviews_by_person_file = config.get_string(
    'amazon-dump.statistics.measures.number-of-reviews-by-person.cache-file')
bow_by_asin_file = config.get_string(
    'amazon-dump.statistics.measures.bow-by-asin.cache-file')

import pickle
import os
from lib import helpers

helpers.printCurrentTime("start ./amazon_meta_data.py")





import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')
movie_meta_data_file = config.get_string('amazon-dump.files.meta-data.movies')

text_min_len = config.get_int('amazon-dump.statistics.review.text.min-len')
statistic_measures_cache_dir = config.get_string('amazon-dump.statistics.measures.cache-dir')
reviews_count_file = config.get_string('amazon-dump.statistics.measures.reviews-count.cache-file')
number_of_reviews_by_asin_file = config.get_string('amazon-dump.statistics.measures.number-of-reviews-by-asin.cache-file')
number_of_reviews_by_person_file = config.get_string('amazon-dump.statistics.measures.number-of-reviews-by-person.cache-file')
bow_by_asin_file = config.get_string('amazon-dump.statistics.measures.bow-by-asin.cache-file')



from lib import helpers

helpers.printCurrentTime("start ./train_lsi.py")





import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')
movie_meta_data_file = config.get_string('amazon-dump.files.meta-data.movies')

bow_cache_dir = config.get_string('amazon-dump.bow.cache-dir')
dictionary_cache_file = config.get_string('amazon-dump.bow.dictionary.cache-file')
corpus_cache_file = config.get_string('amazon-dump.bow.corpus.cache-file')
tfidf_cache_file = config.get_string('amazon-dump.bow.tfidf.cache-file')
lsi_cache_file = config.get_string('amazon-dump.bow.lsi.cache-file')
lsi_num_topics = config.get_int('amazon-dump.bow.lsi.num-topics')
lda_cache_file = config.get_string('amazon-dump.bow.lda.cache-file')
lda_num_topics = config.get_int('amazon-dump.bow.lda.num-topics')
hdp_cache_file = config.get_string('amazon-dump.bow.hdp.cache-file')
from lib import helpers

helpers.printCurrentTime("start ./train_doc2vec_concat.py")





import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')
movie_meta_data_file = config.get_string('amazon-dump.files.meta-data.movies')

model_size = config.get_int('amazon-dump.doc2vec.model.training.size')
model_window = config.get_int('amazon-dump.doc2vec.model.training.window')
model_min_count = config.get_int('amazon-dump.doc2vec.model.training.min_count')
model_workers = config.get_int('amazon-dump.doc2vec.model.training.workers')
model_cache_dir = config.get_string('amazon-dump.doc2vec.model.cache-dir')


from gensim.models import doc2vec

import gzip
import nltk
import os
from lib import helpers

helpers.printCurrentTime("start ./run_evaluation.py")





from load_perceptual_space import perceptual_space, amazon_ids
from train_doc2vec import doc2vec_model, doc2vec_labels
#from train_doc2vec_concat import doc2vec_model_concat
from train_dictionary_corpus import dictionary, corpus
from train_tfidf import tfidf, corpus_tfidf
from train_lsi import lsi, corpus_tfidf_lsi
#from train_lda import lda, corpus_lda
from amazon_statistics import number_of_reviews_by_asin, bow_by_asin





# Initialize all the models I need

perceptual_space
print("perceptual_space")

doc2vec_model
print("doc2vec_model")

tfidf
print("tfidf")
from lib import helpers

helpers.printCurrentTime("start ./train_perceptual_space.py")





import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
perceptual_space_dir = config.get_string('perceptual-space.file')

content = []

with open(perceptual_space_dir, 'r') as f:
  content = f.readlines()



perceptual_space = {}
amazon_ids = []
amazon_titles = []

for line in content[1:]: # skip first line of content which are the labels
from lib import helpers

helpers.printCurrentTime("start ./train_dictionary_corpus.py")





import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')
movie_meta_data_file = config.get_string('amazon-dump.files.meta-data.movies')

bow_cache_dir = config.get_string('amazon-dump.bow.cache-dir')
dictionary_cache_file = config.get_string('amazon-dump.bow.dictionary.cache-file')
corpus_cache_file = config.get_string('amazon-dump.bow.corpus.cache-file')
tfidf_cache_file = config.get_string('amazon-dump.bow.tfidf.cache-file')
lsi_cache_file = config.get_string('amazon-dump.bow.lsi.cache-file')
lsi_num_topics = config.get_int('amazon-dump.bow.lsi.num-topics')
lda_cache_file = config.get_string('amazon-dump.bow.lda.cache-file')
lda_num_topics = config.get_int('amazon-dump.bow.lda.num-topics')
hdp_cache_file = config.get_string('amazon-dump.bow.hdp.cache-file')
from lib import helpers

helpers.printCurrentTime("start ./train_tfid.py")

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# CONFIG

from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('./application.conf')
amazon_dump_dir = config.get_string('amazon-dump.dir')
movie_reviews_file = config.get_string('amazon-dump.files.reviews.movies')
movie_meta_data_file = config.get_string('amazon-dump.files.meta-data.movies')

bow_cache_dir = config.get_string('amazon-dump.bow.cache-dir')
dictionary_cache_file = config.get_string('amazon-dump.bow.dictionary.cache-file')
corpus_cache_file = config.get_string('amazon-dump.bow.corpus.cache-file')
tfidf_cache_file = config.get_string('amazon-dump.bow.tfidf.cache-file')
lsi_cache_file = config.get_string('amazon-dump.bow.lsi.cache-file')
lsi_num_topics = config.get_int('amazon-dump.bow.lsi.num-topics')
lda_cache_file = config.get_string('amazon-dump.bow.lda.cache-file')
lda_num_topics = config.get_int('amazon-dump.bow.lda.num-topics')
hdp_cache_file = config.get_string('amazon-dump.bow.hdp.cache-file')


# BAG OF WORDS AS DICTIONARY

import os