Python elementParser Examples

Programming Language: Python

Namespace/Package Name: utils.elementParser

Method/Function: elementParser

Examples at hotexamples.com: 4

Python elementParser - 4 examples found. These are the top rated real world Python examples of utils.elementParser.elementParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def createLSIPredictionFileSubTaskA(filePath, dictionary, numFeatures=200, withStops=True, fileTag=''):
	testQuestions = elementParser(filePath)
	head, tail = os.path.split(filePath)
	tail = tail.split('.')[0]
	if(len(fileTag) > 0):
		fileTag = '-' + fileTag + '-'
	if(withStops):
		predFile = tail + '-lda' + str(numFeatures) + '-with-stops' + fileTag + '.pred'
	else:
		predFile = tail + '-lda' + str(numFeatures) + fileTag +'.pred'
	modelPath = prepModelFolder()
	with open(predFile,'w') as tsvfile:
		writer = csv.writer(tsvfile, delimiter='\t')
		for t_question in testQuestions:
			t_question['question'] = filterPunctuation(t_question['question'])
			corpus = []
			for rel_comment in t_question['comments']:
				rel_comment['comment'] = filterPunctuation(rel_comment['comment'])
				corpus.append(dictionary.doc2bow(doc.lower().word_tokenize()))
			lda, index = generateLDAModel(corpus, dictionary, numFeatures)
			if(withStops):
				doc = t_question['question']
			else:
				t_question['question'] = ' '.join([i for i in t_question['question'] if i not in stops])
				doc = t_question['question']
			vec_bow = dictionary.doc2bow(doc.lower().word_tokenize())
			vec_lda = lda[vec_bow]
			sims = index[vec_lda]
			for idx, quest in enumerate(t_question['comments']):
				quest['simVal'] = sims[idx]
				writer.writerow([t_question['threadId'], row['comment_id'], 0, row['simVal'], row['comment_rel']])

Example #2

Show file

File: word2vec2.py Project: ajayrajvanamadi/NLP-CQA

def createW2VPredictionFileSubTaskA(filePath, model, withStops=True):
	testQuestions = elementParser(filePath)
	head, tail = os.path.split(filePath)
	tail = tail.split('.')[0]
	if(withStops):
		predFile = tail + '-w2v-with-stops.pred'
	else:
		predFile = tail + '-w2v.pred'
	modelPath = prepModelFolder()
	predFile = modelPath + predFile
	with open(predFile, "w") as tsvfile:
		writer = csv.writer(tsvfile, delimiter="\t")
		for t_question in testQuestions:
			t_question['question'] = filterPunctuation(t_question['question'])
			if(withStops):
				t_question['W2V_qVec1'] = generateQuestionVector(model,t_question['question'], DIM)
			else:
				t_question['relQNoStops'] = " ".join([i for i in t_question['question'].lower().split() if i not in stops])
				t_question['W2V_qVec1'] = generateQuestionVector(model,t_question['relQNoStops'], DIM)
			vecList = []
			for t_comment in t_question['comments']:
				t_comment['comment'] = filterPunctuation(t_comment['comment'])
				if(withStops):
					t_comment['W2V_cVec1'] = generateQuestionVector(model, t_comment['comment'], DIM)
				else:
					t_comment['relCNoStops'] = " ".join([i for i in t_comment['comment'].lower().split() if i not in stops])
					t_comment['W2V_cVec1'] = generateQuestionVector(model, t_comment['relCNoStops'], DIM)
				vecList.append(t_comment['W2V_cVec1'])
			simMatrix = cosineSimilarity(t_question['W2V_qVec1'], vecList)
			for idx, row in enumerate(t_question['comments']):
				row['simVal'] = simMatrix[idx]
				writer.writerow([t_question['threadId'], row['comment_id'], 0, row['simVal'], row['comment_rel']])

Example #3

Show file

File: QuestionFileCreator.py Project: techstone/CommunityQuestionAnswering

def QuestionCreator(filePaths=[]):
    thisList = []
    questions = []
    for filePath in filePaths:
        thisList += elementParser(filePath)
    for row in thisList:
        questions.append(row['question'])
    return questions

Example #4

Show file

File: sourceFiles.py Project: techstone/CommunityQuestionAnswering

if(Path("../tmp/QTL_list.p").is_file()):
	QTL_List = pickle.load(open("../tmp/QTL_List.p", "rb"))
else:
	QTL_List = []
	for filePath in QTLfilePaths:
		QTL_List += createObjectListFromJson(filePath)
	if not os.path.isdir('../tmp'):
		os.makedirs('../tmp')
	pickle.dump(QTL_List, open("../tmp/QTL_List.p", "wb"))


if(Path("../tmp/thisList.p").is_file()):
	thisList = pickle.load(open("../tmp/thisList.p", "rb"))
else:
	thisList = []
	for filePath in filePaths:
		thisList += elementParser(filePath)
	if not os.path.isdir('../tmp'):
		os.makedirs('../tmp')
	pickle.dump(thisList, open("../tmp/thisList.p", "wb"))

if(Path("../tmp/subTaskAList.p").is_file()):
	subTaskAList = pickle.load(open("../tmp/subTaskAList.p", "rb"))
else:
	subTaskAList = []
	for filePath in filePathsSubTaskA:
		subTaskAList += elementParser(filePath)
	if not os.path.isdir('../tmp'):
		os.makedirs('../tmp')
	pickle.dump(subTaskAList, open("../tmp/subTaskAList.p", "wb"))