def boolean_queries_with_proximity(query,proximity): #preprocessing posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) wordsIndex=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME) setlist=[] for word in query.split(): word=Index.applyFilters(word) if word in posting: setlist.append(set(posting[word])) wordList= [ Index.applyFilters(word) for word in query.split() if Index.applyFilters(word) in posting] DocIDList=list(set.intersection(*setlist)) answer=[] for word1 in wordList: wordList.remove(word1) for word2 in wordList: for DocID in DocIDList: for PosID1 in wordsIndex[word1][0][DocID]: for PosID2 in wordsIndex[word2][0][DocID]: if abs(PosID1-PosID2)<=proximity: if DocID not in answer: answer.append(DocID) return list(answer)
def createPostingList(sortTepDic): posting = {} for key in sortTepDic.keys(): posting[key] = [DocId for DocId in sortTepDic[key][0]] Data.write_dataStruct_to_file(Contants.POSTING_LIST_FILE_NAME, posting)
def createTermIndex(): sortTepDic = SortedDict() # Structure for each term # sortTepDic['term']=({'DocId1':['Pos1','Pos2'],'DocId2':['Pos1','Pos2']},'termFreq','DocFreq') for root, dirs, files in os.walk(Contants.DATA_DIRECTORY_NAME, topdown=True): for name in files: file_name = os.path.join(root, name) # 'r' when the file will only be read # 'w' for only writing (an existing file with the same name will be erased) # 'a' opens the file for appending; any data written to the file is automatically added to the end. # 'r+' opens the file for both reading and writing. mode = "r" file_object = open(file_name, mode) DocId = os.path.split(file_name)[1] wordPos = 0 for word in file_object.read().split(): wordPos = wordPos + 1 # increment word location lamma = applyFilters(word) if lamma: if lamma not in sortTepDic: sortTepDic[lamma] = [{DocId: [wordPos]}, 1, 1] # add a new term else: sortTepDic[lamma][1] = sortTepDic[lamma][1] + 1 # increment the term frequency if DocId in sortTepDic[lamma][0]: sortTepDic[lamma][0][DocId].append( wordPos ) # add new word position for the existing document else: sortTepDic[lamma][0][DocId] = [wordPos] # add a new document ID and he word position sortTepDic[lamma][2] = sortTepDic[lamma][2] + 1 # increment the document frequecy # covert lists to tuples for key in sortTepDic.keys(): for DocId in sortTepDic[key][0]: sortTepDic[key][0][DocId] = tuple(sortTepDic[key][0][DocId]) sortTepDic[key] = tuple(sortTepDic[key]) Data.write_dataStruct_to_file(Contants.WORD_INDEX_FILE_NAME, sortTepDic) createLexicon(sortTepDic) createPostingList(sortTepDic)
async def ready(self): print("ready") activity = Data.get_activity(self, self.name) await self.set_presence(activity) chosen = Data.one_liner await self.send("online log", chosen.replace("%", "Rich Uncle Pennybags")) return
def test_compare(self): obj1 = Data("foo") obj2 = Data("foo") obj3 = Data("bar") obj4 = Data("bar") obj4.addAttribute(BoolAttribute("testAttr", True)) self.assertEqual(obj1, obj2) self.assertNotEqual(obj1, obj3) self.assertNotEqual(obj3, obj4)
def run_whileloop(while_loop_body): code1_and_flag = take_tokens("WHILE", "REPEAT", while_loop_body) code2 = while_loop_body flag_value = TRUE while flag_value == TRUE: consume_tokens(copy(code1_and_flag)) if Data.pop() == TRUE: consume_tokens(copy(code2)) else: flag_value = FALSE
async def ready(self): # a callback for when the bot is ready print("ready") # sets presence for the bot activity = Data.get_activity(self, self.name) await self.set_presence(activity) # announce thyself chosen = Data.one_liner # this starts to get annoying while testing await self.send("online log", chosen.replace("%", "Providence")) return
def run_misc_worker(*args, **kwargs): logger = config.get_logger(kwargs['log_path'], kwargs['name']) try: db = Data(logger, kwargs['redis_host'], kwargs['redis_port'], kwargs['redis_db']) p = MiscWorker(logger, kwargs['name'], db, None, kwargs['config_path']) logger.info('Starting poller worker: {0}'.format(kwargs['name'])) p.run(args, kwargs) except Exception as e: logger.error('ERROR: Exception in run_misc_worker: {0}\r\n{1}'.format( e, traceback.format_exc()))
def boolean_queries(query): #preprocessing posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) setlist=[] for word in query.split(): word=Index.applyFilters(word) if word in posting: setlist.append(set(posting[word])) answer=set.intersection(*setlist) return list(answer)
def wlidCard_queries_using_permuterm_index(query): posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) query=Permuterm.standardize_wildcard_query(query) DocList=[] for word in posting: permutermIndexes=Permuterm.create_permuterm_indexes(word) for permuterm in permutermIndexes: if query in permuterm: DocList= DocList+ posting[word] break return set(DocList)
def __init__(self, mode, params_dict, exps=None): self.exps = exps self.exp_param = ExpParam.Experiment_param() # self.results = Results.Results() self.measurements = {} self.data = Data.Data(self) self.navigation_chronograms = None self.time_zoom_chronograms = None self.mini_PCHs = None self.file_name = None self.comment = "" self.defaultBinSize_s = 0.01 # default : 10ms self.new_exp(mode, params_dict)
async def send_bug_report(self, exc, **kwargs): # sourcery skip """ send a bug report to a channel\n :param exc: str, the reported exception :param kwargs: any arguments """ # sourcery will be skipped bc this function will grow with more exceptions out = self.get_channel(Data.get_channel("bot bugs")) if exc == "MemberNotFound": msg = f"MemberNotFound: could not find member '{kwargs['name']}' " \ f"in guild '{kwargs['guild']}'. Command was invoked by user {kwargs['author']}" elif exc == "KeyError": msg = f"KeyError: member {kwargs['name']} (id {kwargs['key']}) does not exist in {kwargs['data']}" else: raise ExceptionNotFound(exc) await out.send(msg) return
def boolean_queries_implement_using_lists(query): #preprocessing posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) p=[] for word in query.split(): word=Index.applyFilters(word) if word in posting: p.append(posting[word]) index1=0 index2=0 p1=p[0][index1] p2=p[1][index2] answer=[] while True: try: if p1 == p2: answer.append(p1) index1=index1+1 index2=index2+1 p1=p[0][index1] p2=p[1][index2] elif p1<p2: index1=index1+1 p1=p[0][index1] else: index2=index2+1 p2=p[1][index2] except IndexError: break return answer
def trailing_wildCard_queries_using_tree(query): query=query[:-1] print query indexedWords=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME) bt=BinaryTree.balancedTree(indexedWords) que=Queue() if bt.root.left: que.put(bt.root.left) if bt.root.left: que.put(bt.root.right) while not que.empty(): node=que.get() if node: if query in node.value: BinaryTree.DepthFirstSearchPrintNode(node) else: if node.right: que.put(node.left) if node.left: que.put(node.right)
''' Created on Sep 13, 2015 @author: Sheece Gardezi ''' from core import Index from core import Data from core import Contants from core import BinaryTree from core import Queries if __name__ == '__main__': # Index.createTermIndex() indexedWords = Data.read_dataStruct_from_file( Contants.WORD_INDEX_FILE_NAME) lexicons = Data.read_dataStruct_from_file(Contants.LEXICON_FILE_NAME) posting = Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) bt = BinaryTree.balancedTree(indexedWords) # BinaryTree.DepthFirstSearchPrintNodes(bt) #print(posting) # example usage query = 'four dell' proximity = 700 print Queries.boolean_queries(query) print Queries.boolean_queries_implement_using_lists(query) print Queries.boolean_queries_with_proximity(query, proximity) query = 'bi*sh' print Queries.wlidCard_queries_using_permuterm_index(query)
def run_doloop(word_list): _from, _to = Data.pop(), Data.pop() for i in range(_from, _to): input_list = _resolve_iterator(i, copy(word_list)) consume_tokens(input_list)
''' Created on Sep 13, 2015 @author: Sheece Gardezi ''' from core import Index from core import Data from core import Contants from core import BinaryTree from core import Queries if __name__ == '__main__': # Index.createTermIndex() indexedWords=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME) lexicons=Data.read_dataStruct_from_file(Contants.LEXICON_FILE_NAME) posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME) bt=BinaryTree.balancedTree(indexedWords) # BinaryTree.DepthFirstSearchPrintNodes(bt) #print(posting) # example usage query='four dell' proximity=700 print Queries.boolean_queries(query) print Queries.boolean_queries_implement_using_lists(query) print Queries.boolean_queries_with_proximity(query,proximity) query='bi*sh' print Queries.wlidCard_queries_using_permuterm_index(query) Queries.trailing_wildCard_queries_using_tree('del*')
def test_balanced_tree(): wordsIndex=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME)
def createLexicon(sortTepDic): keys = [key for key in sortTepDic.keys()] Data.write_dataStruct_to_file(Contants.LEXICON_FILE_NAME, keys)
def __init__(self, data: dict): data = self.data = Data(data) self.id_token: str = data['idToken']