Ejemplo n.º 1
0
import os
from brnn import *#BiDirectionalRNN, sent_to_glove, clip, RNN, relu
from utils import get_filtered_questions, clean_no_stopwords, clean, get_data_for_cognitive_classifiers
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from maxent import features
from svm_glove import TfidfEmbeddingVectorizer
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

CURSOR_UP_ONE = '\x1b[1A'
ERASE_LINE = '\x1b[2K'

domain = pickle.load(open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'),  'rb'))
domain = { k : set(clean_no_stopwords(' '.join(list(domain[k])), stem=False)) for k in domain.keys() }
domain_names = domain.keys()

keywords = set()
for k in domain:
    keywords = keywords.union(set(list(map(str.lower, map(str, list(domain[k]))))))

mapping_cog = {'Remember': 0, 'Understand': 1, 'Apply': 2, 'Analyse': 3, 'Evaluate': 4, 'Create': 5}
mapping_cog2 = { v : k for k, v in mapping_cog.items()}

# transformation for BiRNN. This should actually become a part of the RNN for better code maintainability
INPUT_SIZE = 300
NUM_QUESTIONS = 1000
filename = 'glove.6B.%dd.txt' %INPUT_SIZE

if not os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/GloVe/%s_saved.pkl' %filename.split('.txt')[0])):
Ejemplo n.º 2
0
    else:
        textbook = sys.argv[1]

    print('Loading corpus data')
    stopwords = set(stopwords.words('english'))

    domain = pickle.load(
        open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'),
             'rb'))

    keywords = set()
    for k in domain:
        for word in domain[k]:
            keywords.add(
                clean_no_stopwords(word,
                                   lemmatize=False,
                                   stem=False,
                                   as_list=False))

        #keywords = keywords.union(set(list(map(str.lower, map(str, list(domain[k]))))))
    stopwords = stopwords - keywords

    if textbook == 'ADA':
        contents = get_cleaned_section_text(textbook, 'section')
        questions = []

    elif textbook == 'OS':
        contents = get_cleaned_section_text('OS', 'section')
        contents.extend(get_cleaned_section_text('OS2', 'section'))
        contents.extend(get_cleaned_section_text('OS3', 'section'))
        contents.extend(get_cleaned_section_text('OS4', 'section'))
import os
import pickle

import nltk
import nltk.corpus
from nltk import MaxentClassifier, classify

from utils import clean_no_stopwords, get_data_for_cognitive_classifiers

domain = pickle.load(
    open(os.path.join(os.path.dirname(__file__), 'resources/domain.pkl'),
         'rb'))

domain = {
    k: set(clean_no_stopwords(' '.join(list(domain[k])), stem=False))
    for k in domain.keys()
}
inverted_domain = {}
for k in domain:
    for v in domain[k]:
        inverted_domain[v] = k

domain_names = domain.keys()

keywords = set()
for k in domain:
    keywords = keywords.union(
        set(list(map(str.lower, map(str, list(domain[k]))))))

mapping_cog = {
    'Remember': 0,