Ejemplo n.º 1
0
 def execute(self, top_num, bolt_num, rid, tuple_batch, collector,
             mmp_list):
     new_tuple_batch = TupleBatch(tuple_batch.timestamp)
     for big_tuple in tuple_batch.tuple_list:
         word = big_tuple.tup
         self.counter += word[1]
     tmp_tuple = Tuple(('result', self.counter))
     new_tuple_batch.add_tuple(tmp_tuple)
     collector.ack(top_num, bolt_num, new_tuple_batch, rid)
     self.counter = 0
Ejemplo n.º 2
0
 def execute(self, top_num, bolt_num, rid, tuple_batch, collector,
             mmp_list):
     new_tuple_batch = TupleBatch(tuple_batch.timestamp)
     for big_tup in tuple_batch.tuple_list:
         tup = big_tup.tup
         tup = tup.replace("\n", "")
         words = tup.split(' ')
         for word in words:
             tmp_tuple = Tuple(word)
             new_tuple_batch.add_tuple(tmp_tuple)
     collector.emit(top_num, bolt_num + 1, new_tuple_batch, rid)
Ejemplo n.º 3
0
 def execute(self, top_num, bolt_num, rid, tuple_batch, collector, mmp_list):
     new_tuple_batch = TupleBatch(tuple_batch.timestamp)
     for big_tup in tuple_batch.tuple_list:
         tup = big_tup.tup
         tup = tup.replace("\n", "")
         url_list = tup.split('\t')
         urls = [url_list[i] for i in range(len(url_list)) if i != 0]
         weight = len(urls) + 1
         for url in urls:
             tmp_tuple = Tuple((url, 1/weight))
             new_tuple_batch.add_tuple(tmp_tuple)
     collector.emit(top_num, bolt_num + 1, new_tuple_batch, rid)
Ejemplo n.º 4
0
 def execute(self, top_num, bolt_num, rid, tuple_batch, collector, mmp_list):
     new_tuple_batch = TupleBatch(tuple_batch.timestamp)
     for big_tup in tuple_batch.tuple_list:
         url, rank = big_tup.tup
         if url in self.ranks:
             rank += self.ranks.get(url)
         self.ranks[url] = rank
     for url, rank in self.ranks.items():
         tmp_tuple = Tuple((url, rank))
         new_tuple_batch.add_tuple(tmp_tuple)
     collector.ack(top_num, bolt_num, new_tuple_batch, rid)
     self.ranks.clear()
Ejemplo n.º 5
0
 def execute(self, top_num, bolt_num, rid, tuple_batch, collector,
             mmp_list):
     new_tuple_batch = TupleBatch(tuple_batch.timestamp)
     for big_tuple in tuple_batch.tuple_list:
         word = big_tuple.tup
         count = 0
         if word in self.counts:
             count = self.counts.get(word)
         count += 1
         self.counts[word] = count
     for word, count in self.counts.items():
         tmp_tuple = Tuple((word, count))
         new_tuple_batch.add_tuple(tmp_tuple)
     collector.ack(top_num, bolt_num, new_tuple_batch, rid)
     self.counts.clear()
Ejemplo n.º 6
0
 def start_top(self):
     curr_top = self.topology_list[self.topology_num]
     print(self.prefix, curr_top.name, " starting...")
     tuple_batch = TupleBatch(time.time())
     while True:
         tup = curr_top.spout.next_tup()
         if not tup:
             self.emit(tuple_batch, self.topology_num)
             break
         else:
             big_tuple = Tuple(tup)
             tuple_batch.add_tuple(big_tuple)
             if len(tuple_batch.tuple_list) >= CRANE_BATCH_SIZE:
                 self.emit(tuple_batch, self.topology_num)
                 tuple_batch = TupleBatch(time.time())
     print(self.prefix + 'All tuples transmitted. Spout closed down.')
     self.monitor_thread.start()
Ejemplo n.º 7
0
class URL(object):
    class _Dict(dict):
        _extra = {'basename', 'subdomain'}

        def regular(self, key):
            return key in self and key not in self._extra

        def __setitem__(self, key, value):
            if key is 'path':
                super().__setitem__('basename',
                                    os.path.basename(value) if value else '')
            elif key is 'netloc':
                domain = value.rsplit('.', 2)
                super().__setitem__('subdomain',
                                    domain[0] if len(domain) is 3 else '')

            return super().__setitem__(key, value)

    _keys = Tuple(SplitResult._fields)
    basename = None
    subdomain = None

    def __init__(self, url=None, cut=None):
        self.__dict__ = self._Dict()
        _keys = self._keys
        if isinstance(url, str):
            data = _keys * urlsplit(url)
        elif isinstance(url, dict):
            data = url
        elif url is not None:
            data = _keys * url
        else:
            data = []

        index = len(_keys)
        if isinstance(cut, str):
            index = _keys.index(cut, index)
        elif isinstance(cut, int):
            index = cut

        _dict = dict(data)
        for k in _keys[:index]:
            self._set(k, _dict.get(k, ''))
        for k in _keys[index:]:
            self._set(k)

    def _set(self, key, value=''):
        self.__dict__[key] = value

    def _regular(self, key):
        return self.__dict__.regular(key)

    def _key(self, key):
        return key if isinstance(key, str) else self._keys.get(key)

    def __getitem__(self, key):
        return self.__dict__[self._key(key)]

    def __setitem__(self, key, value):
        key = self._key(key)
        if self._regular(key):
            self._set(key, value)

    def __len__(self):
        return len(self._keys)

    def __iter__(self):
        for k in self._keys:
            yield self[k]

    def __str__(self):
        return urlunsplit(self)

    def format(self, *args, **kw):
        return str(self).format(*args, **kw)

    def __repr__(self):
        _name = self.__class__.__name__
        data = (f"{k}='{self[k]}'" for k in self._keys)
        return f"{_name}({', '.join(data)})"