예제 #1
0
class Main:
    def __init__(self):

        self.logHandler = LogHandler("Main")
        self.serverHandler = ServerDatabaseHandler()
        self.run()

    def run(self):
        present = datetime.now()
        self.yearMonth = str(present.strftime('%Y%m'))
        print "[%s] Starting" % str(datetime.now())[:19]
        sys.stdout.flush()

        #===========================================================
        # Kaynaklar olusturuluyor
        #===========================================================
        self.newsSources = createNewsSourceByPresent(present)

        for data in self.newsSources.iteritems():
            category = data[0]
            sources = data[1]

            for sourceList in sources:
                startSource = datetime.now()
                source = sourceList[0]
                if not self.serverHandler.executeQuery(
                        "SELECT id FROM `links_%s` WHERE date BETWEEN DATE_SUB(NOW(), INTERVAL 3 DAY) AND NOW() and category='%s' and source='%s'"
                        % (self.yearMonth, category, source)):
                    print category, " -> ", source

        self.serverHandler.closeConnection()
        print "Finished\n"
        sys.stdout.flush()
예제 #2
0
class Main:
    def __init__(self):
	
        self.logHandler = LogHandler("Main")
        self.serverHandler = ServerDatabaseHandler()
        self.run()
	
    def run(self):
        present = datetime.now()
        self.yearMonth = str(present.strftime('%Y%m'))
        print "[%s] Starting" % str(datetime.now())[:19]
        sys.stdout.flush()
		
        #===========================================================
        # Kaynaklar olusturuluyor
        #===========================================================
        self.newsSources = createNewsSourceByPresent(present)
		
        for data in self.newsSources.iteritems():
            category = data[0]
            sources = data[1]
			
            for sourceList in sources:
                startSource = datetime.now()
                source = sourceList[0]
                if not self.serverHandler.executeQuery("SELECT id FROM `links_%s` WHERE date BETWEEN DATE_SUB(NOW(), INTERVAL 3 DAY) AND NOW() and category='%s' and source='%s'" % (self.yearMonth, category, source)):
                    print category, " -> ", source
				
        self.serverHandler.closeConnection()
        print "Finished\n"
        sys.stdout.flush()
예제 #3
0
	def __init__(self):		
		
		#=======================================================================
		# Configuration
		#=======================================================================
		#Alinmamasi gereken kelimeler
		self.blackWordListOne = []
		self.blackWordListTwo = []
		
		self.present = datetime.now().date()
		self.yesterday = self.present - timedelta(days=1)
		self.yearMonth = str(self.present.strftime('%Y%m'))
		self.presentLastMonth = self.present - timedelta(days=30)
		self.yearLastMonth = str(self.presentLastMonth.strftime('%Y%m'))
		
		self.serverHandler = ServerDatabaseHandler()
		print datetime.now()
		
		self.serverHandler.executeQuery("""CREATE TABLE IF NOT EXISTS `trendWords` (
  `id` int(11) NOT NULL,
  `date` date DEFAULT NULL,
  `category` varchar(50) DEFAULT NULL,
  `text` varchar(255) CHARACTER SET utf32 COLLATE utf32_unicode_ci DEFAULT NULL,
  `newscount` int(11) DEFAULT NULL
) ENGINE=InnoDB  DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;""")
		try:
			self.serverHandler.executeQuery("ALTER TABLE `trendWords` ADD PRIMARY KEY (`id`)")
		except:
			pass
		
		lsCategories = ["gundem", "videohaber", "spor", "koseyazilari", "kulturvesanat", "teknoloji"]
		lsDataTypes = ["guncel"]
		for category in lsCategories:
			for dataType in lsDataTypes:
				self.category = category
				self.dataType = dataType
				print self.category, self.dataType
				mostCommonWordsList = self.getMostCommonWordsFromMetaContent()
				for i in mostCommonWordsList:
					text = i[0]
					text = text.replace("'", "''")
					count = i[1]
					if 0 == self.serverHandler.executeQuery("SELECT COUNT(*) FROM `trendWords` WHERE category='%s' and text='%s' and date=CURRENT_DATE()"%(self.category, text))[0][0]:
						self.serverHandler.executeQuery("INSERT INTO `trendWords` VALUES(NULL, CURRENT_DATE(), '%s', '%s', %d)"%(self.category, text, count))
					print "\t", self.category, text, count
		print ""
		print ""
예제 #4
0
class Main:
	def __init__(self):		
		
		#=======================================================================
		# Configuration
		#=======================================================================
		#Alinmamasi gereken kelimeler
		self.blackWordListOne = []
		self.blackWordListTwo = []
		
		self.present = datetime.now().date()
		self.yesterday = self.present - timedelta(days=1)
		self.yearMonth = str(self.present.strftime('%Y%m'))
		self.presentLastMonth = self.present - timedelta(days=30)
		self.yearLastMonth = str(self.presentLastMonth.strftime('%Y%m'))
		
		self.serverHandler = ServerDatabaseHandler()
		print datetime.now()
		
		self.serverHandler.executeQuery("""CREATE TABLE IF NOT EXISTS `trendWords` (
  `id` int(11) NOT NULL,
  `date` date DEFAULT NULL,
  `category` varchar(50) DEFAULT NULL,
  `text` varchar(255) CHARACTER SET utf32 COLLATE utf32_unicode_ci DEFAULT NULL,
  `newscount` int(11) DEFAULT NULL
) ENGINE=InnoDB  DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;""")
		try:
			self.serverHandler.executeQuery("ALTER TABLE `trendWords` ADD PRIMARY KEY (`id`)")
		except:
			pass
		
		lsCategories = ["gundem", "videohaber", "spor", "koseyazilari", "kulturvesanat", "teknoloji"]
		lsDataTypes = ["guncel"]
		for category in lsCategories:
			for dataType in lsDataTypes:
				self.category = category
				self.dataType = dataType
				print self.category, self.dataType
				mostCommonWordsList = self.getMostCommonWordsFromMetaContent()
				for i in mostCommonWordsList:
					text = i[0]
					text = text.replace("'", "''")
					count = i[1]
					if 0 == self.serverHandler.executeQuery("SELECT COUNT(*) FROM `trendWords` WHERE category='%s' and text='%s' and date=CURRENT_DATE()"%(self.category, text))[0][0]:
						self.serverHandler.executeQuery("INSERT INTO `trendWords` VALUES(NULL, CURRENT_DATE(), '%s', '%s', %d)"%(self.category, text, count))
					print "\t", self.category, text, count
		print ""
		print ""

		
	def isUpper(self, text):
		try:
			wordDictU = {'İ':'I', 'Ö':'O', 'Ü':'U', 'Ç':'C', 'Ş':'S', 'Ğ':'G'}
			wordDictL = {'ı':'i', 'ö':'o', 'ü':'u', 'ç':'c', 'ş':'s', 'ğ':'g'}
			
			for data in wordDictU.iteritems():
				if text[0:2].find(data[0]) != -1:
					text = text.replace(data[0], data[1])
			
			for data in wordDictL.iteritems():
				if text[0:2].find(data[0]) != -1:
					text = text.replace(data[0], data[1])
			
			try:
				int(text)
				return True
			except ValueError:
				pass
			
			if text[0].isupper():
				return True
			else:
				return False
		except:
			self.logHandler.logger("isUpper", text)
			return False
		
	def getMostCommonWordsFromMetaContent(self):
		
		metaContents = ""
		linkForNews = []
		
		if self.category == "gundem":
			linkForNews.extend(list(self.serverHandler.executeQuery("SELECT title, description FROM `links_%s` WHERE tweetCount+facebookCount > 10 AND `date` = '%s' AND category = '%s'"% (self.yearMonth, self.present, self.category))))
		else:
			linkForNews.extend(list(self.serverHandler.executeQuery("SELECT title, description FROM `links_%s` WHERE `date` = '%s' AND category = '%s'"% (self.yearMonth, self.present, self.category))))
		for items in linkForNews:
			metaContents += ' ' + items[0] + ' ' + items[1] + ' '
		metaWordList = metaContents.split()
		wordsDict = {}
		indexList = []

		#Blackwordlist'de var ise sil
		for index, word in enumerate(metaWordList):
			try:
				self.blackWordListOne.index(word)
				metaWordList.remove(word)
			except:
				pass
	
		for index, word in enumerate(metaWordList):
			mostWord = []
			try:
				indexList.index(index)
				continue
			except:
				pass
			
			while True:
				try:
					metaWordList[index]
				except:
					break
				if self.isUpper(metaWordList[index]):
					mostWord.append(metaWordList[index])
					index += 1
					indexList.append(index)
				else:
					break
				
			mostWord = ' '.join(mostWord)
			
			#Icinde nokta virgul olanlari alma anlamsiz
			if mostWord[2:-2].find('.') != -1:
				continue
			if mostWord[2:-2].find(',') != -1:
				continue
			
			if mostWord:
				#' kaldiriliyor
				sp = mostWord.split()
				if len(sp) == 2:
					if sp[0].find("'") != -1:
						mostWord = ' '.join([sp[0][:sp[0].find("'")], sp[1]])
						sp = mostWord.split()
					if sp[1].find("'") != -1:
						mostWord = ' '.join([sp[0], sp[1][:sp[1].find("'")]])
						sp = mostWord.split()

					if sp[0].find("’") != -1:
						mostWord = ' '.join([sp[0][:sp[0].find("’")], sp[1]])
						sp = mostWord.split()
					if sp[1].find("’") != -1:
						mostWord = ' '.join([sp[0], sp[1][:sp[1].find("’")]])
		
				#en sondaki , varsa kaldiriliyor
				if mostWord[-1] == ',':
					mostWord = mostWord[:-1]
				
				#Blackwordlist'de var ise sil
				try:
					flag = False
					for blackWord in self.blackWordListTwo:
						if mostWord.find(blackWord) != -1:
							flag = True
					if flag:
						continue
				except:
					pass
				
				try:
					wordsDict[mostWord] += 1
				except:
					wordsDict[mostWord] = 1
	
		#En yaygin kelimeler aliniyor
		lastDict = {}
		index = 0
		while len(wordsDict):
			index += 1
			if index > 100:
				break;

			commonWord = max(wordsDict, key=wordsDict.get)
			#En az 3 haber olsun
			if wordsDict[commonWord] < 3:
				continue
	
			#Count 1'den fazla ise
			if len(commonWord.split()) == 2: 
				lastDict[' '.join(commonWord.split()[0:3])] = wordsDict[commonWord]
				index += 1
	
			del wordsDict[commonWord]
			
		return reversed(sorted(lastDict.iteritems(), key=operator.itemgetter(1)))


	def closeProcess(self, arg1, signal):
		sys.exit(1)
예제 #5
0
    def __init__(self):

        self.logHandler = LogHandler("Main")
        self.serverHandler = ServerDatabaseHandler()
        self.run()
예제 #6
0
    def __init__(self):
	
        self.logHandler = LogHandler("Main")
        self.serverHandler = ServerDatabaseHandler()
        self.run()