vocab_choice = term_prob

#Clear memory for unused variables
all_terms_list = all_terms.keys()
all_terms = {}
all_terms_prob = {}
vectorised_train_documents_tf = []

#Perform feature selection on terms
from tools.cooccurence_utils import feature_selection
temp = {}
for i in range(num_labels):
    temp[0] = term_prob[i]
    temp[1] = term_prob[num_labels + i]
    #feature_selection(temp, feature_list = all_terms_list, n_features = 0, percent = feature_percent)
    feature_selection(temp, feature_list=all_terms_list, n_features=5000)
    term_prob[i] = temp[0]
    term_prob[num_labels + i] = temp[1]

all_terms_list = []
for i in range(2 * num_labels):
    term_freq[i] = {k: v for k, v in term_freq[i].items() if k in term_prob[i]}
    all_terms_list.extend(term_prob[i].keys())
all_terms_list = set(all_terms_list)

print len(all_terms_list)

print "Generating term-weights complete and it took : ", print_time(start_time)
start_time = time.time()

#Find cooccurences for all classes and complements
#Clear memory for unused variables
cooccurences_by_class = []
term_freq = []

print "Calculating correlation-coefficients complete and it took : ", print_time(start_time)
start_time = time.time()

#Perform feature selection on terms
from tools.cooccurence_utils import feature_selection
#feature_selection(term_prob, feature_list = all_terms_list, n_features = 0, percent = feature_percent)
temp = {}
for i in range(num_labels):
    temp[0] = term_prob[i]
    temp[1] = term_prob[num_labels + i]
    feature_selection(temp, feature_list = all_terms_list, n_features = 0, percent = feature_percent)
    term_prob[i] = temp[0]
    term_prob[num_labels + i] = temp[1]




#----------------Classification--------------------------

classifier = CopulaClassifier(corcoeff, vocab_choice, priors)
predictions = classifier.predict_multilabelBR(test_docs)

print "The Classification is complete and it took", print_time(start_time)
#print "Avg time taken per doc: ", (print_time(start_time)/float(len(test_docs)))
start_time = time.time()

    #Find Correlation Coefficient Values
    if cor_type == "J":
        corcoeff = cooccurence_main.calc_corcoeff(cooccurences_by_class, term_freq, cor_type, boost = coorelation_boost/3)
    elif cor_type == "P":
        corcoeff = cooccurence_main.calc_corcoeff(cooccurences_by_class, term_prob, cor_type, boost = coorelation_boost)

    #Clear memory for unused variables
    cooccurences_by_class = []
    term_freq = []

    #"""
    #Perform feature selection on terms
    from tools.cooccurence_utils import feature_selection
    feature_selection(term_prob, feature_list = all_term.keys(), n_features = 0, percent = feature_percent)
    #"""

    print "Calculating correlation-coefficients complete and it took : ", print_time(start_time)
    start_time = time.time()




    #----------------Classification--------------------------

    classifier = CopulaClassifier(corcoeff, vocab_choice, priors)
    predictions = classifier.predict_multiclass(test_docs)

    print "The Classification is complete and it took", print_time(start_time)
    start_time = time.time()