#!/usr/bin/python import os; import helper; #author: maksym hontar #running notes: run from folder in terminal: ./index_vocabulary_build_matrix.py incident_matrix_file = open(helper.getFilePath(helper.incident_matrix_file_name), 'r'); incident_matrix = helper.readIncidentMatrixFromFile(incident_matrix_file); incident_matrix_file.close(); word1 = 'man'; boolExpression = 'AND'; word2 = 'stupid'; if len(incident_matrix): if len(word1) and len(word2) and len(boolExpression) : res = []; if word1 in incident_matrix and word2 in incident_matrix: word_list1 = incident_matrix[word1]; word_list2 = incident_matrix[word2]; if len(word_list1) and len(word_list2): for i in xrange(0,len(word_list1)): if (boolExpression == 'AND') : res.append(word_list1[i] & word_list2[i]); elif (boolExpression == 'OR') : res.append(word_list1[i] | word_list2[i]); books_names = []; for i in xrange(0,len(res)) :
# return file.read().split(new_line_char); # def fileSize(filePath): # statinfo = os.stat(filePath); # return str(statinfo.st_size); # #instantiate an array # file_paths = []; # for a_file_name in file_names: # file_paths.append(getFilePath(a_file_name)) if len(helper.file_paths): #open f voc_file = open(helper.getFilePath(helper.voc_file_name), 'r'); vocabulary = voc_file.read().split(helper.new_line_char); print 'vocabulary count before reading: ' + str(len(vocabulary)); voc_file.close(); for a_file_path in helper.file_paths: file = open(a_file_path, 'r'); if file: #files reading block print "files is being read " + a_file_path + '. file size: '+ helper.fileSize(a_file_path); file_content = file.read(); file.close(); # replace all \n with spaces file_content = file_content.replace(helper.new_line_char,helper.space_char);