Esempio n. 1
0
 def send_msg(self, msg):
     broker = get_config(TopNewsKafka, 'KAFKA_BROKER_SERVERS')
     topic = get_config(TopNewsKafka, 'TOP_NEWS_KAFKA_TOPIC')
     kafka = KafkaApi(broker, topic)
     try:
         kafka.send_msg(msg, async=True)
         self.logger.info("Succssfully to send msg to api client, msg=%s" %
                          msg)
     except Exception, ex:
         self.logger.error(
             "Exception: Failed to send kafka msg to api client")
         self.logger.error(ex)
Esempio n. 2
0
    def updateStartTime(self):
        job_file = os.path.join(self._getOoziePath(self.PkgType.CONFIGS),
                                'job.properties')
        try:
            #get file content
            with open(job_file, 'r') as fh:
                content = fh.read()
                pattern = re.compile(r'((start=)(\S*))')
                result = pattern.findall(content)
                # get result: [('start=2016-06-08T10:10+0800', 'start=', '2016-06-08T10:10+0800')]

            #overwrite the file
            with open(job_file, 'w+') as fh:
                if result is not None:
                    #truncate the file content and overwrite the file
                    start_time = result[0][2]
                    now = datetime.datetime.now()
                    future = now + datetime.timedelta(
                        minutes=get_config(OozieConf, 'SUBMIT_DELAY_TIME'))
                    replace_time = future.strftime('%Y-%m-%dT%H:%M+0800')
                    fh.write(content.replace(start_time, replace_time))
                    fh.flush()
                    self.logger.info(
                        "The job %s start time updated from %s to %s" %
                        (self.oozie_job.name, start_time, replace_time))

        except Exception, ex:
            self.logger.error(ex)
            self.logger.error(
                "Failed to update the coordinator start time for job %s" %
                self.oozie_job.app)
Esempio n. 3
0
    def import2redis(self):
        host = get_config(ChannenNewsRedis, 'HOST')
        port = get_config(ChannenNewsRedis, 'PORT')
        data_file = get_config(TopNews, 'NEWS_DATA_FILE')
        redisConn = RedisClient(host, port).getConnection()
        if os.path.exists(data_file):
            with open(data_file) as f:
                lines = f.readlines()
                if len(lines) != 0:
                    self.logger.info("Get %s generated top news" % len(lines))
                    key_exist = redisConn.keys(
                        NewsRedisKey.RANK_TOPN_NEWS.keyPattern)
                    if len(key_exist) != 0:
                        self.logger.info(
                            "clear all the cached news before update top news")
                        redisConn.delete(
                            NewsRedisKey.RANK_TOPN_NEWS.keyPattern)
                        self.logger.info(
                            "the rank news key has alredy deleted")
                else:
                    self.logger.info(
                        "Exception: get empty result from top_news.txt, skip import2redis"
                    )
                    return False

                for data in lines:
                    try:
                        x = data.strip('\n').split('\t')
                        newsid, score = x[0], x[1]
                        key = News(newsid, NewsType.NEWS)
                        key = str(key)
                        redisConn.zadd(NewsRedisKey.RANK_TOPN_NEWS.keyPattern,
                                       key, round(float(score), 3))
                        #TODO  set expire time?
                        #redisConn.expire(NewsRedisKey.RANK_TOPN_NEWS.keyPattern, NewsRedisKey.RANK_TOPN_NEWS.ttl)

                    except Exception, ex:
                        self.logger.error(
                            "Exception: import news into redis failed, newsid=%s"
                            % newsid)
                        self.logger.error(ex)
                        return False
Esempio n. 4
0
    def exec_hive(self):
        batch = HQL_PATH + get_config(TopNews, 'BATCH_FILE')
        if os.path.exists(batch):
            cmd = 'sh %s' % batch
        else:
            self.logger.error("Exception: could not find the batch file %s" %
                              batch)

        result = os.system(cmd)
        if result == 0:
            self.logger.info(
                "hive job for generate top100 hot news successfully")
        else:
            self.logger.error("hive job execute failed")

        return result
Esempio n. 5
0
 def __init__(self):
     super(OozieRSClientV1,
           self).__init__(get_config(OozieConf, 'OOZIE_URL'), 'v1')