def _getBlockInfo(self, freqs, densities): result = {} _lowerLimit, median, upperLimit = getMedianDistributionInfo( [density for density in densities if density > 0.0]) result['densityAverage'] = median result['densityUpperLimit'] = int(100 * upperLimit) / 100.00 _lowerLimit, median, upperLimit = getMedianDistributionInfo( [freq for freq in freqs if freq > 0]) result['freqAverage'] = median result['freqUpperLimit'] = max(math.ceil(upperLimit), settings.MANDATORY_TOKEN_MIN_QUANTITY) return result
def _termInfo(self): info = [] rawInfo = [] # lemma frec in referencedLemmas try: for bData in self.referencedLemmas: if self.lemma in bData: fdist = FreqDist(bData) freq = fdist[self.lemma] rawInfo.append(freq) lenTokenList = len(bData) if self.useWdfIdf: metric = math.log(freq * 1.0 + 1.0, 2) / math.log( lenTokenList + 1, 2) else: metric = freq if DISPLAY: app_logger.info( u'[%s] Apariciones: %s Len: %s Max: %s Metric: %s' % (self.lemma, fdist[self.lemma], len(bData), max(fdist.values()), metric)) info.append(metric) # lemma frec in textLemmas freq = self.fdistLemmas[self.lemma] _lowerLimit, _median, upperLimit = getMedianDistributionInfo( rawInfo) self.rawScore = int(freq) self.upperLimit = max(settings.MANDATORY_TOKEN_MIN_QUANTITY, int(upperLimit)) lenTokenList = len(self.textLemmas) if self.useWdfIdf: termFreq = math.log(freq * 1.0 + 1.0, 2) / math.log( lenTokenList + 1, 2) else: termFreq = freq # referencedLemmas mean/sigma of lemma lowerLimit, _median, upperLimit = getMedianDistributionInfo(info) if not self.useWdfIdf: lowerLimit = math.ceil(lowerLimit) upperLimit = math.ceil(upperLimit) return termFreq, lowerLimit, upperLimit except Exception as ex: raise ex
def _getScore(self): result = [] for seoDocument in self.bestSeoDocuments: if seoDocument.getLenRawTokens( ) > settings.DOCUMENT_MIN_CHARACTERS: lenTokensUnicos = len( set(seoDocument.getTextTokens(lemmatize=True))) density = lenTokensUnicos * 1.0 / seoDocument.getLenRawTokens() if DISPLAY: app_logger.info(u'Density %s' % density) result.append(density) lowerLimit, median, upperLimit = getMedianDistributionInfo(result) lenTokensUnicos = len( set(self.textSeoDocument.getTextTokens(lemmatize=True))) score = lenTokensUnicos * 1.0 / self.textSeoDocument.getLenRawTokens() spamScore = min(100, score * 100 / (upperLimit)) spamScore = int(spamScore * 100) / 100.0 lowerLimitScore = max(0, (lowerLimit) * 100.0 / (upperLimit)) self.lowerLimit = int(max(0, lowerLimit)) self.rawScore = int(score) self.upperLimit = int(upperLimit) if DISPLAY: app_logger.info(u'Text Density %s' % score) app_logger.info(u'Media Density %s' % median) return spamScore, lowerLimitScore
def _getScore(self): results = [] if DISPLAY: print('Ranked vs WebData') proofTermsScore = self._stadistic(self.rankedDataTokens, self.textLemmas) for dataTokens in self.referencedLemmas: if DISPLAY: # print '-'*20 # print 'Ranked vs BestData' app_logger.info('-' * 20) app_logger.info('Ranked vs BestData') results.append(self._stadistic(self.rankedDataTokens, dataTokens)) lowerLimit, median, upperLimit = getMedianDistributionInfo(results) self.lowerLimit = int(max(0, lowerLimit)) self.rawScore = int(proofTermsScore) self.upperLimit = int(upperLimit) if DISPLAY: app_logger.info(u'Mediana %s' % median) app_logger.info(u'LowerLimit %s' % lowerLimit) proofTermsScore = (proofTermsScore) * 100 / (upperLimit) proofTermsScore = int(proofTermsScore * 100) / 100.0 lowerLimitScore = max(0, (lowerLimit) * 100.0 / (upperLimit)) return proofTermsScore, lowerLimitScore
def _getScore(self): commonsLenList = [] for seoDocument in self.seoLibrary.seoDocuments[ 0:settings.MANDATORY_DOCUMENTS_LIMIT]: mandatoryTokens = _getMandatoryBlockTokens(seoDocument, self.mandatoryField, unique=True) commons = list(set(mandatoryTokens) & set(self.mandatoryTerms)) commonsLenList.append(len(commons)) lowerLimit, _median, upperLimit = getMedianDistributionInfo( commonsLenList) self.lowerLimit = int(max(0, lowerLimit)) mandatoryTokens = _getMandatoryBlockTokens(self.textSeoDocument, self.mandatoryField, unique=True) commons = list(set(mandatoryTokens) & set(self.mandatoryTerms)) self.rawScore = int(len(commons)) self.upperLimit = int(upperLimit) mandatoryBlockTokensScore = self.rawScore * 100.00 / max( 1.00, upperLimit) lowerLimitScoreLen = max(0, (lowerLimit)) return mandatoryBlockTokensScore, lowerLimitScoreLen
def _getScore(self, documentMinWords=settings.DOCUMENT_MIN_CHARACTERS): result = [] # bestDocuments = sorted(bestSeoDocuments.items(), key=operator.attrgetter('getLenRawTokens'), reverse=True)[0:] for seoDocument in self.bestSeoDocuments: if seoDocument.getLenRawTokens() > documentMinWords: if DISPLAY: app_logger.info(u'Numero tokens %s' % seoDocument.getLenRawTokens()) result.append(max(0.0, seoDocument.getLenRawTokens() * 1.0 - len(seoDocument.getTitleTokens()))) lowerLimit, median, upperLimit = getMedianDistributionInfo(result) self.lowerLimit = int(max(0,lowerLimit)) self.rawScore = max(0, int(self.textSeoDocument.getLenRawTokens()) - len(self.textSeoDocument.getTitleTokens())) self.upperLimit = int(upperLimit) proofTermsLengthScore = self.textSeoDocument.getLenRawTokens() * 100.00 / (upperLimit) proofTermsLengthScore = int(proofTermsLengthScore * 100) / 100.0 lowerLimitScore = max(0, 100 * (lowerLimit) / (upperLimit)) if DISPLAY: app_logger.info(u'Median Length %s' % median) app_logger.info(u'Lower Limit %s' % lowerLimitScore) app_logger.info(u'Score Final Length %s' % proofTermsLengthScore) return proofTermsLengthScore, lowerLimitScore
def _getMandatoryTermsDocumentScores(self, testSeoDocument): mandatoryFields = [ 'uriTokens', 'titleTokens', 'h1Tokens', 'h2Tokens', 'strongTokens', 'altTokens', 'metaDescriptionTokens' ] tokenScoreList = [] blocksScoreList = [] for seoDocument in self.seoLibrary.seoDocuments: scoreTokens = 0 totalTokens = 0 scoreBlocks = 0 totalBlocks = 0 for mandatoryField in mandatoryFields: tokens = _getMandatoryBlockTokens(seoDocument, mandatoryField, unique=True) intersection = list(set(tokens) & set(self.mandatoryTerms)) if intersection: totalBlocks += 1 scoreTokensStart = scoreTokens for token in intersection: totalTokens += 1 if token in _getMandatoryBlockTokens(testSeoDocument, mandatoryField, unique=True): scoreTokens += 1 if scoreTokensStart < scoreTokens: scoreBlocks += 1 tokenScoreList.append( int(scoreTokens * 100.00 / max(totalTokens, 1))) blocksScoreList.append( int(scoreBlocks * 100.00 / max(totalBlocks, 1))) _lowerLimit, median, _upperLimit = getMedianDistributionInfo( tokenScoreList) scoreTokens = int(max(1, median)) _lowerLimit, median, _upperLimit = getMedianDistributionInfo( blocksScoreList) scoreBlocks = int(max(1, median)) return scoreTokens, scoreBlocks
def _getScore(self): tokensScore = [] blocksScore = [] for seoDocument in self.seoLibrary.seoDocuments[ 0:settings.MANDATORY_DOCUMENTS_LIMIT]: tokenScore, blockScore = self._getMandatoryTermsDocumentScores( seoDocument) if blockScore > 0: # Todo aquel documento que haya obtenido un 0 en blockScore, podemos dar por hecho que no está posicionado por # SEO de contenido tokensScore.append(tokenScore) blocksScore.append(blockScore) tokenScore, blockScore = self._getMandatoryTermsDocumentScores( self.textSeoDocument) lowerLimit, _median, upperLimit = getMedianDistributionInfo( tokensScore) self.lowerLimitTokens = int(max(0, lowerLimit)) self.rawScoreTokens = int(tokenScore) self.upperLimitTokens = int(upperLimit) lowerLimit, _median, upperLimit = getMedianDistributionInfo(blockScore) self.lowerLimitBlock = int(max(0, lowerLimit)) self.rawScoreBlock = int(blockScore) self.upperLimitBlock = int(upperLimit) mandatoryTokensScore = int(tokenScore * 100.00 / (max(1, self.upperLimitTokens))) mandatoryTokensLowerLimit = max(0, self.lowerLimitTokens) mandatoryBlocksScore = int(blockScore * 100.00 / (max(1, self.upperLimitBlock))) mandatoryBlocksLowerLimit = max(0, self.lowerLimitBlock) return { 'tokens': mandatoryTokensScore, 'blocks': mandatoryBlocksScore }, { 'tokens': mandatoryTokensLowerLimit, 'blocks': mandatoryBlocksLowerLimit }
def _getScore(self): result = [] for seoDocument in self.bestSeoDocuments: result.append(self._getDocumentScore(seoDocument)) lowerLimit, _median, upperLimit = getMedianDistributionInfo(result) self.lowerLimit = int(max(0, lowerLimit)) self.rawScore = int(self._getDocumentScore(self.textSeoDocument)) self.upperLimit = int(upperLimit) readabilityScore = self._getDocumentScore( self.textSeoDocument) * 100.00 / (upperLimit) readabilityScore = int(readabilityScore * 100) / 100.0 lowerLimitScore = max(0, 100 * (lowerLimit) / (upperLimit)) return readabilityScore, lowerLimitScore
def _getScore(self): result = [] counter = 0 for seoDocument in self.bestSeoDocuments: lenMetaKeyWords = len( seoDocument.getMetaKeywordsTokens(unique=False)) if lenMetaKeyWords > 0: counter += 1 result.append(lenMetaKeyWords * 1.0) lowerLimit, _median, upperLimit = getMedianDistributionInfo(result) self.lowerLimit = 0 self.rawScore = int( len(self.textSeoDocument.getMetaKeywordsTokens(unique=False))) self.upperLimit = int(upperLimit) metaKeyWordsScore = self.rawScore * 100.00 / (upperLimit) metaKeyWordsScore = int(metaKeyWordsScore * 100) / 100.0 lowerLimitScore = max(0, 100 * (lowerLimit) / (upperLimit)) return metaKeyWordsScore, lowerLimitScore
def _getScore(self): lenList = [] for seoDocument in self.seoLibrary.seoDocuments: tokens = _getMandatoryBlockTokens(seoDocument, self.mandatoryField, unique=False) if tokens: lenList.append(len(tokens)) lowerLimit, _median, upperLimit = getMedianDistributionInfo(lenList) self.lowerLimit = int(lowerLimit) self.rawScore = int( len( _getMandatoryBlockTokens(self.textSeoDocument, self.mandatoryField, unique=False))) self.upperLimit = int(upperLimit) mandatoryBlockLengthScore = self.rawScore * 100.00 / (upperLimit) lowerLimitScoreLen = max(0, (lowerLimit)) return mandatoryBlockLengthScore, lowerLimitScoreLen