Beispiel #1
0
class TagCountBolt(Bolt):
    outputs = ['cls', 'tag', 'date', 'hour']

    def initialize(self, conf, ctx):
        self.counter = 0
        self.pid = os.getpid()
        self.total = 0
        self.classifier = Classifier()
        self.directory = str(os.getcwd()) + "/Tweet_Images"
        if not os.path.exists(self.directory):
            os.makedirs(self.directory)
            #self.logger.info("------CREATED FOLDER--------")

    def _increment(self, word, inc_by):
        self.counter[word] += inc_by
        self.total += inc_by

    def process(self, tup):
        data = json.loads(tup.values[0].encode('utf-8'))
        self.logger.info(data)
        if 'img_url' in data:
            path = "{}/{}.jpg".format(self.directory, self.counter)
            try:
                urllib.urlretrieve(data['img_url'], path)
                self.counter = self.counter + 1
                self.classifier.load_image(path)
                predicted_class = self.classifier.classify()
                #self.logger.info("\n [INFO_BOLT_PREDICTION] : "+ " ".join(predicted_class))
                if len(data['hash']) > 0:
                    tags = [
                        str(li['text']) for li in data['hash']
                        if li['text'][0:1] != "\\"
                    ]
                    #self.logger.info("\n [INFO_BOLT_TAGS] : "+ " ".join(tags))

                    now = datetime.datetime.now()
                    now_date = "{:04}-{:02}-{:02}".format(
                        now.year, now.month, now.day)
                    for cls in predicted_class:
                        if len(tags) > 0:
                            for tag in tags:
                                self.emit([cls, tag, now_date, str(now.hour)])
                                #self.logger.info("{0}/{1}".format(cls,tag))

                os.remove(path)

            except (KeyboardInterrupt, Exception):
                self.logger.info(Exception)

        else:
            self.logger.info("NO IMG URL!!!")
            #self.logger.info(json.dumps(data))

        if self.counter % 10 == 0:
            self.logger.info("Processed [{:,}] tweets".format(self.counter))
Beispiel #2
0
# -*- coding: utf-8 -*-

from classify import Classifier
import urllib

clf = Classifier()
clf.load_image("/home/hari/Documents/Big_data/bits-please/car.jpg")
predicted_class = clf.classify()
print (" ".join(predicted_class))