def run(self):
     """
     Generate the concord Computation class and run
     via serve_computation
     """
     attrs = {
         'metadata': self._get_metadata(),
         'init': self._get_init(),
         'process_timer': self._get_process_timer(),
         'process_record': self._get_process_record(),
     }
     computation = type(self.name, (ComputationWrapper,), attrs)
     serve_computation(computation())
def serve_test_computation(handler):
    logger.info("About to serve computation and service")

    def tryGetEnv(key):
        try:
            return os.environ[key]
        except Exception as e:
            logger.error('Error getting os.environ[%s]' % key)
            logger.fatal(e)

    zookeeper_url = tryGetEnv('integration_test_zookeeper_url')
    test_id = tryGetEnv('integration_test_id')
    node_id = tryGetEnv('integration_test_node_id')


    handler.concord = ZookeeperContext(zookeeper_url,
                                       test_id,
                                       node_id)
    logger.info("Defering further init: concord.computation.serve_computation")
    serve_computation(handler)
        pass

    def metadata(self):
        return Metadata(name=self.name,
                        istreams=[],
                        ostreams=["nothing_going_here"])


class Gatherer(Computation):
    def process_record(self, ctx, record):
        self.concord_logger.info("{}:{}".format(record.key, record.data))

    def metadata(self):
        return Metadata(name="gatherer",
                        istreams=["nothing_going_here"],
                        ostreams=[])

    def init(self, ctx):
        pass

    def process_timer(self, ctx, key, timer):
        pass


if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1:
        serve_computation(MultiInstance(sys.argv[1]))
    else:
        serve_computation(Gatherer())
import json
import sys
import unicodedata
import logging
import concord
from concord.computation import (
    Computation,
    Metadata,
    serve_computation
)
logging.basicConfig()
log = logging.getLogger('CoinbasePricePrinter')
log.setLevel(logging.DEBUG)

class CoinbasePricePrinter(Computation):
    def init(self, ctx):
        log.info("Price Printer initialized")
    def process_record(self, ctx, record):
        r = json.loads(record.data)
        price = r.get('price', 'no-price-avail')
        log.info('Price: %s', price)
    def metadata(self):
        return Metadata(name='coinbase-price-printer',
                        istreams=['btcusd'], ostreams=[])

serve_computation(CoinbasePricePrinter())
Exemple #5
0
        urls = []
    return urls

class MedicalDevicesUrlGenerator(Computation):
    def init(self, ctx):
        self.concord_logger.info("MedicalDevicesUrlGenerator init")
        ctx.set_timer('loop', time_millis())
    def destroy(self):
        self.concord_logger.info("MedicalDevicesUrlGenerator destroyed")
    def process_timer(self, ctx, key, time):
        urls = raw_urls()
        for url in urls:
            # check in the cache if we have already processed this url
            h = url_hash(url)
            if len(ctx.get_state(h)) == 0:
                url_b = bytes(url)
                ctx.set_state(h, url_b)
                ctx.produce_record("m-device-urls", h, url_b)

        delay_ms = 1000 * 60 * 10; # 10 minutes
        ctx.set_timer(key, time_millis() + delay_ms)
    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')
    def metadata(self):
        return Metadata(
            name='m-devices',
            istreams=[],
            ostreams=['m-device-urls'])

serve_computation(MedicalDevicesUrlGenerator())
Exemple #6
0
class CoinbaseSource(Computation):
    def __init__(self):
        self.queue = Queue()

    def init(self, ctx):
        ctx.set_timer('loop', time_millis() + 1000)  # start in 1 sec
        log.info("Coinbase initialized")

    def process_timer(self, ctx, key, time):
        while not self.queue.empty():
            ctx.produce_record('btcusd', 'empty', self.queue.get())
        ctx.set_timer(key, time_millis() + 1000)  # every sec

    def metadata(self):
        return Metadata(name='coinbase-indx', istreams=[], ostreams=['btcusd'])


def gen_coinbase_source():
    ret = CoinbaseSource()
    factory = WebSocketClientFactory("wss://ws-feed.exchange.coinbase.com")
    factory.queue = ret.queue
    factory.close_cb = reactor.stop
    factory.protocol = ExchangeProtocol
    connectWS(factory)
    Thread(target=reactor.run, args=(False, )).start()
    return ret


serve_computation(gen_coinbase_source())
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

class RSSParser(Computation):
    def __init__(self):
        pass

    def init(self, ctx):
        self.count=1
        logger.info("Source initialized")

    def process_record(self, ctx, record):
        j=json.loads(record.data)
        if j['summary'].find('free') != -1:
            logger.info("Someone is giving away free stuff! url =" +j['url'])
        #logger.info("receiving = "+record.data)
        self.count+=1
        #logger.info("threads parsed:"+str(self.count))

    def metadata(self):
        return Metadata(
            name='rssfeedparser',
            istreams=['rssfeeds'],
            ostreams=[])

logger.info("Main")
serve_computation(RSSParser())
Exemple #8
0
    # this deployment cant be killed using the `concord kill -a` command due to timer needing to fire to process the kill
    def init(self, ctx):
        self.started = False
        self.concord_logger.info("init")
        ctx.set_timer("start_cycle", int(round(time.time() * 1000)))

    def destroy(self):
        pass

    def process_timer(self, ctx, key, timer):
        self.concord_logger.info("process_timer")
        ctx.produce_record("cycle", "something",
                           "=====start this cycle------>>>")
        if self.started == False:
            self.started = True
            ctx.set_timer("start_cycle", int(round(time.time() * 1000)))

    def process_record(self, ctx, record):
        self.concord_logger.info("{}:{}:{}".format(record.key, record.data,
                                                   record.time))
        time.sleep(2)
        ctx.produce_record("cycle", "something", "444")

    def metadata(self):
        return Metadata(name='cyclical',
                        istreams=[("cycle", StreamGrouping.GROUP_BY)],
                        ostreams=["cycle"])


serve_computation(Cyclical())
        
        # Update cache with freshest metric data
        self.providerCtr[provider] = (clicks, impressions)

    # metadata takes no arguments.
    # returns: An object of type 'Metadata'. This object has three named fields, name, istreams,
    # and ostreams. The data in this object will be used by the framework to determine what streams
    # this operator subscribes and/or publishes to.
    def metadata(self):
        # istreams is an array that expects tuples of a type: (string, StreamGrouping).
        # In this example there are multiple istreams, therefore this computation is performing
        # a type of stream join.
        
        # The GROUP_BY enum instructs the framework that an aggregation by key is to be performed.
        # This ensures that records with the same key are always sent to the same instance of
        # this operator.

        # By aggregating two streams by key we can be assured that records from any stream emitted
        # on a particular provider will always be sent to the same instance of CtrCalculator.
        return Metadata(
            name='ctr-calculator',
            istreams=[ ('impressions', StreamGrouping.GROUP_BY),
                        ('clicks', StreamGrouping.GROUP_BY) ]
            ostreams=[ 'click_through_rate' ])
    

# Initializes thrift server so the concord proxy can communicate with this process via thrift RPC.=
# Makes 'registerWithScheduler' RPC which begins a series of events that will place this operator into
# a concord topology.
serve_computation(CtrCalculator())
import json
import sys
import unicodedata
import logging
import concord
from concord.computation import (Computation, Metadata, serve_computation)

logging.basicConfig()
log = logging.getLogger('CoinbasePricePrinter')
log.setLevel(logging.DEBUG)


class CoinbasePricePrinter(Computation):
    def init(self, ctx):
        log.info("Price Printer initialized")

    def process_record(self, ctx, record):
        r = json.loads(record.data)
        price = r.get('price', 'no-price-avail')
        log.info('Price: %s', price)

    def metadata(self):
        return Metadata(name='coinbase-price-printer',
                        istreams=['btcusd'],
                        ostreams=[])


serve_computation(CoinbasePricePrinter())
    def __init__(self):
        self.dict = {}
        self.pidx = 0 # print index

    def init(self, ctx):
        self.concord_logger.info("Counter initialized")

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, time):
        raise Exception('process_timer not implemented')

    def process_record(self, ctx, record):
        self.pidx += 1
        if self.dict.has_key(record.key):
            self.dict[record.key] += 1
        else:
            self.dict[record.key] = 1

        if (self.pidx % 10000) == 0:
            self.concord_logger.info(self.dict)

    def metadata(self):
        return Metadata(
            name='word-counter',
            istreams=[('words', StreamGrouping.GROUP_BY)],
            ostreams=[])

serve_computation(WordCounter())
    def __init__(self):
        self.dict = {}
        self.pidx = 0  # print index

    def init(self, ctx):
        self.concord_logger.info("Counter initialized")

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, time):
        raise Exception('process_timer not implemented')

    def process_record(self, ctx, record):
        self.pidx += 1
        if self.dict.has_key(record.key):
            self.dict[record.key] += 1
        else:
            self.dict[record.key] = 1

        if (self.pidx % 10000) == 0:
            self.concord_logger.info(self.dict)

    def metadata(self):
        return Metadata(name='word-counter',
                        istreams=[('words', StreamGrouping.GROUP_BY)],
                        ostreams=[])


serve_computation(WordCounter())
                logger.info('Dumping matches!')
                output = '\n'.join([str(x) for x in self.batch])
                logger.debug(output)
                self.batch = []

    def watcher(self, children):
        """ Callback that is triggered when data is posted to zookeeper
        path '/regex'"""
        logger.info('detected new node %s' % children)
        for child in children:
            if child not in self.expressions:
                fn = partial(DistGrep.new_regex, self, child)
                self.zk_client.DataWatch('/regex/%s' % child, func=fn)

    def new_regex(self, child, data, stats):
        if data == None and stats == None:
            return True
        self.expressions[child] = data
        logger.info('Child reporting %s' % child)
        logger.info('registering new regex %s' % data)

    def metadata(self):
        """ Called when scheduler is initializing computation for launch.
        Must return object of type concord.Metadata"""
        return Metadata(name='dist-grep',
                        istreams=['logs'],
                        ostreams=[])

logger.info("Inside of Main")
serve_computation(DistGrep())
        d = feedparser.parse( completeurl )
        for a in d['entries']:
            #logger.info("reading from rss obj")
            summary =a['summary_detail']['value']
            link=a['links'][0]['href']
            #logger.info("creatign json data")
            data = {}
            data['url'] = link
            data['summary']=summary
            json_data = json.dumps(data)
            #logger.info("created json data")
            ctx.produce_record("rssfeeds", "content", json_data.encode('utf-8'))
            #logger.info("sent message")
            self.count+=1
        #logger.info("sent "+str(self.count)+" items")
        ctx.set_timer('loop', time_millis() + 1000)


    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')

    def metadata(self):
        return Metadata(
            name='rssfeedgenerator',
            istreams=[],
            ostreams=['rssfeeds'])

logger.info("Main")
serve_computation(RSSGenerator())
Exemple #15
0
                except Exception as e:
                    print "Exception closing readers ", e
                raise StopIteration
            except Exception as e:
                self.bad_records_parsed = self.bad_records_parsed + 1
                print "Unhandled error in url parsing, skipping record: ", e


class MedicalDevicesParser(Computation):
    def init(self, ctx):
        pass

    def destroy(self):
        pass

    def process_timer(self, ctx, key, time):
        pass

    def process_record(self, ctx, record):
        for obj in MedicalDeviceIterator(str(record.data)):
            try:
                ctx.produce_record("m-devices-json", bytes(obj.id), bytes(obj.to_json()))
            except Exception as e:
                print "Exception producing record", e

    def metadata(self):
        return Metadata(name="m-device-parser", istreams=["m-device-urls"], ostreams=["m-devices-json"])


serve_computation(MedicalDevicesParser())
    def __init__(self):
        self.words = ['foo', 'bar', 'baz', 'fiz', 'buzz']

    def sample(self):
        """returns a random word"""
        import random
        return random.choice(self.words)

    def init(self, ctx):
        self.concord_logger.info("Source initialized")
        ctx.set_timer('loop', time_millis())

    def process_timer(self, ctx, key, time):
        # stream, key, value. empty value, no need for val
        for _ in range(0, 1024):
            ctx.produce_record("words", self.sample(), '-')

        # emit records every 500ms
        ctx.set_timer("main_loop", time_millis() + 5000)

    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')

    def metadata(self):
        return Metadata(
            name='word-source',
            istreams=[],
            ostreams=['words'])

serve_computation(WordSource())
    def init(self, ctx):
        self.concord_logger.info("Operator initialized")
        if self.prune_time > 0:
            ctx.set_timer('loop', time_time() * 1000)

    def process_timer(self, ctx, key, time):
        """ Prune the cache of expired items every 'prune_time' seconds.
        Otherwise this would only happen when mutating the cache"""
        self.cache.expire()
        ctx.set_timer('cleanup_loop', (time.time() + self.prune_time) * 1000)

    def process_record(self, ctx, record):
        """ With GROUP_BY routing strategy, it is guaranteed that the same
        key will be sent to the same operator, regardless of scaling"""
        if record.stream == 'bids':
            self.cache[record.key] = record.data
        elif record.stream == 'imps':
            bid = self.cache[record.key]
            if bid is not None:
                ctx.process_record('winningbids', record.key, '-')

    def metadata(self):
        return Metadata(
            name='filter-winning-bids',
            istreams=[('bids', StreamGrouping.GROUP_BY),
                      ('imps', StreamGrouping.GROUP_BY)],
            ostreams=['winningbids'])

serve_computation(FilterWinningBids(5000000, 60))
Exemple #18
0
        import random
        return random.choice(self.sentences)

    def init(self, ctx):
        self.concord_logger.info("Source initialized")
        ctx.set_timer('loop', time_millis())

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, time):
        # stream, key, value. empty value, no need for val
        iterations = 10000
        while iterations > 0:
            iterations -= 1
            ctx.produce_record("sentences", self.sample(), '-')

        # emit records every 500ms
        ctx.set_timer("main_loop", time_millis())

    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')

    def metadata(self):
        return Metadata(name='sentence-generator',
                        istreams=[],
                        ostreams=['sentences'])


serve_computation(SentenceGenerator())
from concord.computation import (Computation, Metadata, serve_computation)
from concord.internal.thrift.ttypes import StreamGrouping


class SentenceSplitter(Computation):
    def init(self, ctx):
        self.concord_logger.info("Splitter initialized")

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, time):
        raise Exception('process_timer not implemented')

    def process_record(self, ctx, record):
        for word in record.key.split(" "):
            ctx.produce_record('words', word, '-')

    def metadata(self):
        return Metadata(name='sentence-splitter',
                        istreams=['sentences'],
                        ostreams=['words'])


serve_computation(SentenceSplitter())
Exemple #20
0
        sniff_on_connection_fail=True,
        # and also every 60 seconds
        sniffer_timeout=60
    )
    return es

class MDeviceIndexer(Computation):
    def init(self, ctx):
        self.es = get_elastic_search_connection()
        # ignore 400 cause by IndexAlreadyExistsException when creating an index
        self.es.indices.create(index='concord', ignore=400)
    def destroy(self): pass
    def process_timer(self, ctx, key, time): pass
    def process_record(self, ctx, record):
        try:
            res = self.es.index(index="concord",
                                doc_type="mdevice",
                                id=record.key,
                                body=record.data)
            if not res['created']:
                print "Error saving to elastic search: ", res
        except Exception as e:
            print "Couldn't index record: ", e
    def metadata(self):
        return Metadata(
            name='m-device-es',
            istreams=['m-devices-json'],
            ostreams=[])

serve_computation(MDeviceIndexer())
Exemple #21
0
def new_time(ctx, offset_in_millis):
    current_time = time.time()
    current_millis = current_time * 1000
    rounded_time = int(round(current_millis)) + offset_in_millis
    ctx.set_timer("init", rounded_time)


class First(Computation):
    def init(self, ctx):
        self.concord_logger.info("Counter initialized")
        new_time(ctx, 3000)

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, timer):
        self.concord_logger.info("process timer")
        ctx.produce_record("outstream", "hello world", "!")
        ctx.produce_record("outstream", "hello world", "!!!")
        ctx.produce_record("outstream", "hello world", "???")
        new_time(ctx, 3000)

    def process_record(self, ctx, record):
        self.concord_logger("process record")

    def metadata(self):
        return Metadata(name='first', istreams=[], ostreams=["outstream"])


serve_computation(First())
Exemple #22
0
    def onMessage(self, payload, *args, **kwargs):
        self.factory.queue.put(payload)
    def onClose(self, wasClean, code, reason):
        log.info("websocket closed because", reason)
        self.factory.close_cb()

class CoinbaseSource(Computation):
    def __init__(self):
        self.queue = Queue()
    def init(self, ctx):
        ctx.set_timer('loop', time_millis() + 1000) # start in 1 sec
        log.info("Coinbase initialized")
    def process_timer(self, ctx, key, time):
        while not self.queue.empty():
            ctx.produce_record('btcusd', 'empty', self.queue.get())
        ctx.set_timer(key, time_millis() + 1000) # every sec
    def metadata(self):
        return Metadata(name='coinbase-indx', istreams=[], ostreams=['btcusd'])

def gen_coinbase_source():
    ret = CoinbaseSource()
    factory = WebSocketClientFactory("wss://ws-feed.exchange.coinbase.com")
    factory.queue = ret.queue
    factory.close_cb = reactor.stop
    factory.protocol = ExchangeProtocol
    connectWS(factory)
    Thread(target=reactor.run, args=(False,)).start()
    return ret

serve_computation(gen_coinbase_source())
            u"These tools and their built-in counterparts also work well with the high-speed functions in the operator module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: sum(imap(operator.mul, vector1, vector2)).",
        ]

    def sample(self):
        """returns a random word"""
        import random
        return random.choice(self.sentences)

    def init(self, ctx):
        self.concord_logger.info("Source initialized")
        ctx.set_timer('loop', time_millis())

    def process_timer(self, ctx, key, time):
        # stream, key, value. empty value, no need for val
        for i in range(0, 1024):
            ctx.produce_record("sentences", str(i), self.sample())

        # emit records every 500ms
        ctx.set_timer("main_loop", time_millis() + 5000)

    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')

    def metadata(self):
        return Metadata(
            name='sentence-source',
            istreams=[],
            ostreams=['sentences'])

serve_computation(SentenceSource())
        import random
        return random.choice(self.sentences)

    def init(self, ctx):
        self.concord_logger.info("Source initialized")
        ctx.set_timer('loop', time_millis())

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, time):
        # stream, key, value. empty value, no need for val
        iterations = 10000
        while iterations > 0:
            iterations -= 1
            ctx.produce_record("sentences", self.sample(), '-')

        # emit records every 500ms
        ctx.set_timer("main_loop", time_millis())

    def process_record(self, ctx, record):
        raise Exception('process_record not implemented')

    def metadata(self):
        return Metadata(
            name='sentence-generator',
            istreams=[],
            ostreams=['sentences'])

serve_computation(SentenceGenerator())
Exemple #25
0
    Metadata,
    StreamGrouping,
    serve_computation
)

class Something(Computation):

    def init(self, ctx):
        self.concord_logger.info("Counter initialized")
        ctx.set_timer("init", int(round(time.time() * 1000)))

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, timer):
        self.concord_logger.info("process timer")
        ctx.set_timer("process timer", int(round(time.time() * 1000)))

    def process_record(self, ctx, record):
      self.concord_logger("process record")

    def metadata(self):
        return Metadata(
            name='something',
            istreams=[],
            ostreams=["outputstream"])

serve_computation(Something())
~                                                                                            
~                                                  
Exemple #26
0
import sys
import concord
import time
from concord.computation import (Computation, Metadata, StreamGrouping,
                                 serve_computation)


class Final(Computation):
    def init(self, ctx):
        self.concord_logger.info("initialized")

    def destroy(self):
        self.concord_logger.info("Source destroyed")

    def process_timer(self, ctx, key, timer):
        pass

    def process_record(self, ctx, record):
        self.concord_logger.info("process record")
        self.concord_logger.info("=====================")
        for key, val in record.__dict__.items():
            self.concord_logger.info("{}:{}".format(key, val))

    def metadata(self):
        return Metadata(name='final',
                        istreams=[("outstream", StreamGrouping.GROUP_BY)],
                        ostreams=[])


serve_computation(Final())