import os from brnn import *#BiDirectionalRNN, sent_to_glove, clip, RNN, relu from utils import get_filtered_questions, clean_no_stopwords, clean, get_data_for_cognitive_classifiers from sklearn.externals import joblib from sklearn.model_selection import train_test_split from maxent import features from svm_glove import TfidfEmbeddingVectorizer from sklearn.decomposition import PCA from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, confusion_matrix CURSOR_UP_ONE = '\x1b[1A' ERASE_LINE = '\x1b[2K' domain = pickle.load(open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'), 'rb')) domain = { k : set(clean_no_stopwords(' '.join(list(domain[k])), stem=False)) for k in domain.keys() } domain_names = domain.keys() keywords = set() for k in domain: keywords = keywords.union(set(list(map(str.lower, map(str, list(domain[k])))))) mapping_cog = {'Remember': 0, 'Understand': 1, 'Apply': 2, 'Analyse': 3, 'Evaluate': 4, 'Create': 5} mapping_cog2 = { v : k for k, v in mapping_cog.items()} # transformation for BiRNN. This should actually become a part of the RNN for better code maintainability INPUT_SIZE = 300 NUM_QUESTIONS = 1000 filename = 'glove.6B.%dd.txt' %INPUT_SIZE if not os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/GloVe/%s_saved.pkl' %filename.split('.txt')[0])):
else: textbook = sys.argv[1] print('Loading corpus data') stopwords = set(stopwords.words('english')) domain = pickle.load( open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'), 'rb')) keywords = set() for k in domain: for word in domain[k]: keywords.add( clean_no_stopwords(word, lemmatize=False, stem=False, as_list=False)) #keywords = keywords.union(set(list(map(str.lower, map(str, list(domain[k])))))) stopwords = stopwords - keywords if textbook == 'ADA': contents = get_cleaned_section_text(textbook, 'section') questions = [] elif textbook == 'OS': contents = get_cleaned_section_text('OS', 'section') contents.extend(get_cleaned_section_text('OS2', 'section')) contents.extend(get_cleaned_section_text('OS3', 'section')) contents.extend(get_cleaned_section_text('OS4', 'section'))
import os import pickle import nltk import nltk.corpus from nltk import MaxentClassifier, classify from utils import clean_no_stopwords, get_data_for_cognitive_classifiers domain = pickle.load( open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'), 'rb')) domain = { k: set(clean_no_stopwords(' '.join(list(domain[k])), stem=False)) for k in domain.keys() } inverted_domain = {} for k in domain: for v in domain[k]: inverted_domain[v] = k domain_names = domain.keys() keywords = set() for k in domain: keywords = keywords.union( set(list(map(str.lower, map(str, list(domain[k])))))) mapping_cog = { 'Remember': 0,