Python QAUtils.loadData Examples

Programming Language: Python

Class/Type: QAUtils

Method/Function: loadData

Examples at hotexamples.com: 6

Python QAUtils.loadData - 6 examples found. These are the top rated real world Python examples of QAUtils.loadData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

loadData(6)

saveData(5)

Frequently Used Methods

loadData (6)

saveData (5)

Example #1

Show file

File: Models.py Project: Jnanayogi33/ScienceQA

	def __init__(self, start, end, dataType, N):
		self.LETTERS = ['A', 'B', 'C', 'D']
		self.fullTest = self.validationSet()
		self.dataType = dataType
		self.test = [q for i, q in enumerate(self.fullTest) if (i < end and i >= start)]
		self.correct = 0
		self.incorrect = 0
		self.answerReport = []
		self.searchAnswerReport = []
		self.timeReport = []
		self.N = N

		# instantiate mindmaps
		if os.path.isfile(cache + 'mindmaps.p'): utils.loadData(cache + 'mindmaps.p')
		else:
			self.mindmaps = {}

Example #2

Show file

File: util.py Project: Jnanayogi33/ScienceQA

def getSearchFromFile():
	'''Opens local copy of search results'''
	searchResults = utils.loadData(cache + 'searchResults.p')
	searchObject = json.loads(searchResults)
	snippetDoc = ''
	items = searchObject['items']
	for i in items:
		snippetDoc += i['snippet']
	return snippetDoc

Example #3

Show file

File: populateRelations.py Project: Jnanayogi33/ScienceQA

import searchText as scraper
import util
import QAUtils as utils
from Models import Test
import pickle, os, time

cache = '../Dropbox/ScienceQASharedCache/'

# Get local copy of freebase
if os.path.isfile(cache + 'FB_relations.p'): freebaseRelations = utils.loadData(cache + 'FB_relations.p')
else:
	freebaseRelations = {}

# Setup for worker pool
poolWorkerNum = 200
poolIterations = 2
poolRedundancies = False

# Get all keywords
eightGradeExam = Test(start=0, end=8132, dataType='val', N=6)

keywords = eightGradeExam.getSecondOrderKeywords()

# save second order keywords
utils.saveData(keywords, cache + 'SecondOrderKeywords.p')
print('Keywords saved.')

# Filter keywords already in local freebaseRelations
keywords = [kw for kw in keywords if kw not in freebaseRelations]
print('Number of first order keywords left: {}'.format(len(keywords)))

Example #4

Show file

File: ScienceQAMain.py Project: Jnanayogi33/ScienceQA

#  - spacy word2vec cosine distance between question and answer (own and average of four)
#  - spacy word2vec cosine distance between answer option and other options (own and average of four)
print('- Basic formatting')
trainX = extractor.basicFormatFeatures(trainPairedQA)
valX = extractor.basicFormatFeatures(valPairedQA)
print(trainX.shape)


# Feature measuring proximity of a given Q-A pair to authoritative texts
#  - Q-A combined into a single statement then search carried out to see distance to closest sentence in text
#  - Authoritative text from wikipedia and CK12 free online textbooks for elementary school children
#  - Two measures given--one requiring relatively strict matches, one allowing loose matches
#  - return both absolute value as well as average of other 3 answers
print('- Text match features')
if os.path.isfile(cache + 'trainX'): 
    train_textMatch = utils.loadData(cache + 'trainX')
    print(train_textMatch.shape)
else:
    trainX = extractor.getTextMatchFeatures(trainPairedQA, kList=[100, 10, 100, 1000, 3])
trainX = extractor.concat(trainX, utils.loadData(cache + 'trainX'))

# if os.path.isfile(cache + 'valX'): 
#     valX = extractor.concat(valX, utils.loadData(cache + 'valX'))
# else:
#     valX = extractor.getTextMatchFeatures(valPairedQA, kList=[100, 10, 100, 1000, 3])
# print(trainX.shape)

# Features from the keyword graph from the Aristo paper
#  - size of question graph, size of answer graph, coherence score of answers, coherence score
#    of question keywords, number of pruned words for each Q-A pair
print('- Keyword graph features')

Example #5

Show file

File: InformationRetrieval.py Project: Jnanayogi33/ScienceQA

import os, sys
import QAUtils as utils
from whoosh.fields import *
from whoosh.index import *
from whoosh.query import *
from whoosh.qparser import QueryParser

# 0. Set global parameters
cache = '../Dropbox/ScienceQASharedCache/'

# 1. Get corpus
corpus = utils.loadData(cache + 'allTextLines')[:100]

# 2. Index using whoosh
schema = Schema(content=TEXT, stored_content=TEXT(stored=True))
if not os.path.exists(cache + 'IRindex'):
	os.mkdir(cache + 'IRindex')
ix = create_in(cache + 'IRindex', schema)
ix = open_dir(cache + 'IRindex')

writer = ix.writer()
for i, line in enumerate(corpus):
	sys.stdout.write('\rAdding line {} of {} to index'.format(i+1, len(corpus)))
	sys.stdout.flush()
	writer.add_document(content = line, stored_content = line)
writer.commit()

# Try out a search
with ix.searcher() as searcher:
	query = QueryParser('content', ix.schema).parse('Turkey')
	results = searcher.search(query)

Example #6

Show file

File: Models.py Project: Jnanayogi33/ScienceQA

import util
import pickle
import copy
import time
import os
import QAUtils as utils

cache = '../Dropbox/ScienceQASharedCache/'

regentsDataPath = cache + 'Regents_Train.tsv'
trainData = cache + 'training_set.tsv'
validationData = cache + 'validation_set.tsv'

# second order keywords
if os.path.isfile(cache + 'keywords.p'): localKeywords = utils.loadData(cache + 'keywords.p')
else: localKeywords = {}

class WordGraph:
	def __init__(self, question, N):
		# print('Question:', question)
		self.graph = {}
		self.N = N
		self.questionKeywords = util.getKeywords(question)
		# print('Question keywords extracted:', self.questionKeywords)

		self.importance = {kw: 1/len(self.questionKeywords) for kw in self.questionKeywords}
		# self.importance = util.getImportanceDict(question)
		# print('Keyword importance:', self.importance)

		self.secondOrderKeywords = localKeywords[question] if question in localKeywords else self.bestWords()