def getScore(self, keyWords, avoids): lt = ListTool() lWords = lt.getNonUniques(keyWords, self.link) tWords = lt.getNonUniques(keyWords, self.title) hWords = lt.getNonUniques(keyWords, self.header) sWords = lt.getNonUniques(keyWords, self.spec) nWords = lt.getNonUniques(keyWords, self.norm) score = len(lWords) * self.lMulti score += len(tWords) * self.tMulti score += len(hWords) * self.hMulti score += len(sWords) * self.sMulti score += len(nWords) * self.nMulti lWords = lt.getNonUniques(avoids, self.link) tWords = lt.getNonUniques(avoids, self.title) hWords = lt.getNonUniques(avoids, self.header) sWords = lt.getNonUniques(avoids, self.spec) nWords = lt.getNonUniques(avoids, self.norm) score -= len(lWords) * self.lMulti score -= len(tWords) * self.tMulti score -= len(hWords) * self.hMulti score -= len(sWords) * self.sMulti score -= len(nWords) * self.nMulti self.score = score
class OwnThread(Thread): connections = [] listTool = None responseUrls = [] def __init__(self, connections): Thread.__init__(self) self.connections = connections self.listTool = ListTool() def run(self): for c in self.connections: c.startConnection() self.messengerLinks() self.messengerConfirm() for c in self.connections: c.waitForChild() def messengerLinks(self): numConns = len(self.connections) messages = 0 while (messages < numConns): for c in self.connections: if not c.hadMessage: c.hasMessage = c.pollMessage(1) for ind, c in enumerate(self.connections): if c.hasMessage: msg = c.getMessage() messages += 1 self.responseUrls = self.listTool.addOnlyUniqueFromList(msg, self.responseUrls) c.sendMessage(self.listTool.getNonUniques(msg, self.responseUrls)) def messengerConfirm(self): numConns = len(self.connections) messages = 0 for c in self.connections: c.hadMessage = False while (messages < numConns): for c in self.connections: if not c.hadMessage: c.hasMessage = c.pollMessage(1) for ind, c in enumerate(self.connections): if c.hasMessage: msg = c.getMessage() messages += 1 c.endResult = msg
def startSubProcesses(self): CM = ConnectionManager() lt = ListTool() sitesList = [] sitesList = lt.addOnlyUniqueFromList(self.__sites.goodSites, self.__sites.badSites) CM.initializeConnection( self.__associations.keywordsList, self.__associations.avoidsList, sitesList, self.__conf.siteToSearchList, self.__conf.pagesToSearch, self.__conf.searchParamsList) CM.startThread() CM.join() CM.parseResults() self.resultList = CM.getResults()
class Associations(): keywordsList = None # 2D format [val,occ] avoidsList = None # 2D format [val,occ] listTool = None def __init__(self): self.keywordsList = [] self.avoidsList = [] self.listTool = ListTool() def setParams(self, keywords, avoids, fKeywords, fAvoids): self.keywordsList = self.listTool.addOnlyUniqueFromList(keywords, fKeywords) self.avoidsList = self.listTool.addOnlyUniqueFromList(avoids, fAvoids) def addKeyword(self, word, occurance, isForced): self.keywordsList.append(Keywords(word, occurance, isForced)) def addAvoids(self, word, occurance, isForced): self.avoidsList.append(Avoids(word, occurance, isForced))
def parseResults(self): lt = ListTool() patt = "^[a-zA-Z0-9.,+-:;!-\"']*$" self.link = self.link.split(' ') self.title = self.title.split(' ') for ind, item in enumerate(self.link): if re.match(patt, item) == None or len(item) > self.maxLen: self.link.pop(ind) self.link = lt.removeDuplicates(self.link) for ind, item in enumerate(self.title): if re.match(patt, item) == None or len(item) > self.maxLen: self.title.pop(ind) self.title = lt.removeDuplicates(self.title) for ind, item in enumerate(self.header): if re.match(patt, item) == None or len(item) > self.maxLen: self.header.pop(ind) self.header = lt.removeDuplicates(self.header) for ind, item in enumerate(self.spec): if re.match(patt, item) == None or len(item) > self.maxLen: self.spec.pop(ind) self.spec = lt.removeDuplicates(self.spec) for ind, item in enumerate(self.norm): if re.match(patt, item) == None or len(item) > self.maxLen: self.norm.pop(ind) self.norm = lt.removeDuplicates(self.norm)
def createMasterInspectionXML(self, delChildXMLs = False): lt = ListTool() os = OSTool() sort = Sorter() insp = Inspector() xmls = os.getFilesInDir('results/') xmls = lt.popByWord(xmls, self.masterInspectionPath) XMLInspections = insp.getInspections(xmls) if len(XMLInspections) == 0: print('No files read.') exit() XMLInspections = sort.sortInspectionList(XMLInspections) xWriter = XMLWriter() xWriter.writeMIXML(XMLInspections, self.masterInspectionPath) if delChildXMLs: for xml in xmls: os.deleteFile(xml)
def sortInspectionList(self, li): if li == None or len(li) == 0: li = [] return li lt = ListTool() newList = [] done = False while not done: currHS = -1000000 currIN = -1 done = True for ind, obj in enumerate(li): if int(obj.score) > currHS: currHS = int(obj.score) currIN = ind done = False if not done: newList.append(li[currIN]) li = lt.pop(li, currIN) return newList
def getScore(self): lt = ListTool() for data in self.xmlInsp: pl = PageLoader(data.fil) if not pl.isReadable(): print('Abort. XMLInspections data corrupted. File not readable:', data.fil) return False pl.read() lWords = lt.getNonUniques(self.keyWords, pl.linkWords) tWords = lt.getNonUniques(self.keyWords, pl.titleWords) hWords = lt.getNonUniques(self.keyWords, pl.headerWords) sWords = lt.getNonUniques(self.keyWords, pl.specialWords) nWords = lt.getNonUniques(self.keyWords, pl.normalWords) score = len(lWords) * self.lMulti score += len(tWords) * self.tMulti score += len(hWords) * self.hMulti score += len(sWords) * self.sMulti score += len(nWords) * self.nMulti data.score = score return self.xmlInsp
def __init__(self, connections): Thread.__init__(self) self.connections = connections self.listTool = ListTool()
def __init__(self): self.keywordsList = [] self.avoidsList = [] self.listTool = ListTool()