vocab_choice = term_prob #Clear memory for unused variables all_terms_list = all_terms.keys() all_terms = {} all_terms_prob = {} vectorised_train_documents_tf = [] #Perform feature selection on terms from tools.cooccurence_utils import feature_selection temp = {} for i in range(num_labels): temp[0] = term_prob[i] temp[1] = term_prob[num_labels + i] #feature_selection(temp, feature_list = all_terms_list, n_features = 0, percent = feature_percent) feature_selection(temp, feature_list=all_terms_list, n_features=5000) term_prob[i] = temp[0] term_prob[num_labels + i] = temp[1] all_terms_list = [] for i in range(2 * num_labels): term_freq[i] = {k: v for k, v in term_freq[i].items() if k in term_prob[i]} all_terms_list.extend(term_prob[i].keys()) all_terms_list = set(all_terms_list) print len(all_terms_list) print "Generating term-weights complete and it took : ", print_time(start_time) start_time = time.time() #Find cooccurences for all classes and complements
#Clear memory for unused variables cooccurences_by_class = [] term_freq = [] print "Calculating correlation-coefficients complete and it took : ", print_time(start_time) start_time = time.time() #Perform feature selection on terms from tools.cooccurence_utils import feature_selection #feature_selection(term_prob, feature_list = all_terms_list, n_features = 0, percent = feature_percent) temp = {} for i in range(num_labels): temp[0] = term_prob[i] temp[1] = term_prob[num_labels + i] feature_selection(temp, feature_list = all_terms_list, n_features = 0, percent = feature_percent) term_prob[i] = temp[0] term_prob[num_labels + i] = temp[1] #----------------Classification-------------------------- classifier = CopulaClassifier(corcoeff, vocab_choice, priors) predictions = classifier.predict_multilabelBR(test_docs) print "The Classification is complete and it took", print_time(start_time) #print "Avg time taken per doc: ", (print_time(start_time)/float(len(test_docs))) start_time = time.time()
#Find Correlation Coefficient Values if cor_type == "J": corcoeff = cooccurence_main.calc_corcoeff(cooccurences_by_class, term_freq, cor_type, boost = coorelation_boost/3) elif cor_type == "P": corcoeff = cooccurence_main.calc_corcoeff(cooccurences_by_class, term_prob, cor_type, boost = coorelation_boost) #Clear memory for unused variables cooccurences_by_class = [] term_freq = [] #""" #Perform feature selection on terms from tools.cooccurence_utils import feature_selection feature_selection(term_prob, feature_list = all_term.keys(), n_features = 0, percent = feature_percent) #""" print "Calculating correlation-coefficients complete and it took : ", print_time(start_time) start_time = time.time() #----------------Classification-------------------------- classifier = CopulaClassifier(corcoeff, vocab_choice, priors) predictions = classifier.predict_multiclass(test_docs) print "The Classification is complete and it took", print_time(start_time) start_time = time.time()