#Percentage of total term features to kepp feature_percent = 17 #----------------Begin Program-------------------------- #Corpus Data from sklearn.datasets import fetch_rcv1 rcv1_info = fetch_rcv1() sklearn_labelMatrix = rcv1_info.target.toarray() sklearn_docIDs = rcv1_info.sample_id rcv1_info = [] from tools.getRCV1V2 import getRCV1V2 rcv1_data = getRCV1V2("/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/RCV1-V2/Raw Data/", testset=0) #Feature Extraction from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from nltk.corpus import stopwords stop_words = stopwords.words("english") import tools.cooccurence_main as cooccurence_main from tools.text_processing import tokenize, get_TF, get_TFIDF, freqToProbability import numpy #Classification from tools.CopulaClassifier import CopulaClassifier #Evaluation from sklearn.metrics import f1_score, precision_score, recall_score
coorelation_boost = 8 #Percentage of total term features to kepp feature_percent = 4 #-------------------------- Begin Program -------------------------- #Corpus Data from sklearn.datasets import fetch_rcv1 rcv1_info = fetch_rcv1() sklearn_labelMatrix = rcv1_info.target.toarray() sklearn_docIDs = rcv1_info.sample_id rcv1_info = [] RCV1V2Path = "/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/RCV1-V2/Raw Data/" from tools.getRCV1V2 import getRCV1V2 rcv1v2_data = getRCV1V2(RCV1V2Path, testset=1) from tools.getRCV1 import getRCV1 RCV1Path = "/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/rcv1/Data/" rcv1_data = getRCV1(RCV1Path, RCV1V2Path, testset=1) #Feature Extraction from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from nltk.corpus import stopwords stop_words = stopwords.words("english") import tools.cooccurence_main as cooccurence_main from tools.text_processing import tokenize, get_TF, get_TFIDF, freqToProbability import numpy #Classification from tools.CopulaClassifier import CopulaClassifier