Ejemplo n.º 1
0
class WordCount(Topology):
    word_spout = KafkaSpout.spec()
    process_bolt = ProcessBolt.spec(inputs=[word_spout], par=2)
    senti_bolt = SentimentBolt.spec(
        inputs={process_bolt: Grouping.fields('tweet')}, par=2)
    entity_bolt = EntityBolt.spec(
        inputs={senti_bolt: Grouping.fields('tweet')}, par=2)
Ejemplo n.º 2
0
class WordCount(Topology):
    """组装topo结构"""
    word_spout = SentenceSpout.spec()
    split_bolt = SentenceSplitterBolt.spec(
        inputs={word_spout: Grouping.fields("sentence")}, par=2)
    count_bolt = SentenceCountBolt.spec(
        inputs={split_bolt: Grouping.fields('word', 'num')}, par=2)
Ejemplo n.º 3
0
class WordCount(Topology):
    product_spout = ProductSpout.spec()
    user_profile_spout = UserProfileSpout.spec()

    product_bolt = ProductBolt.spec(
        inputs={product_spout: Grouping.fields('product')}, par=2)
    profile_bolt = ProductBolt.spec(
        inputs={user_profile_spout: Grouping.fields('profile')}, par=2)
Ejemplo n.º 4
0
class TopWordFinderTopologyPartA(Topology):
    # TODO:
    # Task: wire up the topology
    # Make sure you use the following names for each component
    # RandomSentenceSpout -> "spout"
    spout = RandomSentenceSpout.spec()
    # SplitSentenceBolt -> "split"
    split = SplitSentenceBolt.spec(inputs={spout: Grouping.fields('word')})
    # WordCountBolt -> "count"
    count = WordCountBolt.spec(inputs={split: Grouping.fields('word')})
Ejemplo n.º 5
0
class NearestVectors(Topology):
    vector_spout = IndexSpout.spec()
    vector_bolt = VectorMapBolt.spec(inputs={vector_spout: Grouping.SHUFFLE},
                                     par=8)
    index_bolt = PairProcessBolt.spec(inputs={vector_bolt: Grouping.SHUFFLE},
                                      par=8)
    sum_bolt = VectorSumBolt.spec(
        inputs={index_bolt: Grouping.fields('vector_id')}, par=8)
    final_bolt = NearestBolt.spec(inputs={sum_bolt: Grouping.fields('final')},
                                  par=8)
Ejemplo n.º 6
0
class WordCount(Topology):
    redditStream_spout = streamRedditSpout.spec()
    titleSplit_bolt = titleSplitterBolt.spec(inputs={
        redditStream_spout:
        Grouping.fields('redditTitle', 'redditLink')
    },
                                             par=2)
    matchKeywords_bolt = matchKeywordsBolt.spec(inputs={
        titleSplit_bolt:
        Grouping.fields('splitTitle', 'redditTitle', 'redditLink')
    },
                                                par=2)
Ejemplo n.º 7
0
class TopWordFinderTopologyPartA(Topology):
    config = {'coursera.datafile': 'resources/data.txt'}

    # TODO:
    # Task: wire up the topology
    # Make sure you use the following names for each component
    # FileReaderSpout -> "spout"
    spout = FileReaderSpout.spec(inputs=config)
    # SplitSentenceBolt -> "split"
    split = SplitSentenceBolt.spec(inputs={spout: Grouping.fields('word')})
    # WordCountBolt -> "count"
    count = WordCountBolt.spec(inputs={split: Grouping.fields('word')})
Ejemplo n.º 8
0
class WordCount(Topology):
    #spout1
    word_spout = WordSpout.spec()

    #bolt1
    count_bolt = WordCountBolt.spec(
        inputs={word_spout: Grouping.fields('word')}, par=2)  #   process

    #bolt2
    count_bolt2 = WordCountBolt2.spec(
        inputs={count_bolt: Grouping.fields('word')}, par=2)

    #bolt3
    count_bolt3 = WordCountBolt3.spec(
        inputs={count_bolt: Grouping.fields('word')}, par=2)
Ejemplo n.º 9
0
class TopWordFinderTopologyPartC(Topology):
    config = {'coursera.datafile': 'resources/data.txt'}

    # TODO:
    # Task: wire up the topology
    # Make sure you use the following names for each component
    # FileReaderSpout -> "spout"
    # SplitSentenceBolt -> "split"
    # WordCountBolt -> "count"
    # NormalizerBolt -> "normalize"
    # TopNFinderBolt -> "top-n"

    # NOTE: will have to manually kill Topology after submission
    sentence_spout = FileReaderSpout.spec(name='spout')
    split_bolt = SplitSentenceBolt.spec(name='split', inputs=[sentence_spout])
    normalize_bolt = NormalizerBolt.spec(name='normalize', inputs=[split_bolt])
    count_bolt = WordCountBolt.spec(
        name='count', inputs={normalize_bolt: Grouping.fields('word')})
    topnfinder_bolt = TopNFinderBolt.spec(
        name='top-n', inputs={count_bolt: Grouping.fields('word')})
Ejemplo n.º 10
0
class PixelCount(Topology):
    pixel_spout = JavaSpout.spec(
        name="pixel-spout",
        full_class_name="pixelcount.spouts.PixelSpout",
        args_list=[],
        outputs=["pixel"])
    pixel_deserializer = PixelDeserializerBolt.spec(
        name='pixel-deserializer-bolt', inputs=[pixel_spout])
    pixel_counter = PixelCounterBolt.spec(
        name='pixel-count-bolt',
        inputs={pixel_deserializer: Grouping.fields('url')},
        config={"topology.tick.tuple.freq.secs": 1})
Ejemplo n.º 11
0
class TopWordFinderTopologyPartA(Topology):
    # TODO:
    # Task: wire up the topology
    # Make sure you use the following names for each component
    # RandomSentenceSpout -> "spout"
    # SplitSentenceBolt -> "split"
    # WordCountBolt -> "count"

    # NOTE: will have to manually kill Topology after submission

    sentence_spout = RandomSentenceSpout.spec(name='spout')
    split_bolt = SplitSentenceBolt.spec(name='split', inputs=[sentence_spout])
    count_bolt = WordCountBolt.spec(
        name='count', inputs={split_bolt: Grouping.fields('word')})
Ejemplo n.º 12
0
class TweetProcess(Topology):
    tweet_spout = TweetSpout.spec()

    amazon_bolt = ProcessAmazon.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    apple_bolt = ProcessApple.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    facebook_bolt = ProcessFacebook.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    google_bolt = ProcessGoogle.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    lyft_bolt = ProcessLyft.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    microsoft_bolt = ProcessMicrosoft.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    twitter_bolt = ProcessTwitter.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
    uber_bolt = ProcessUber.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
Ejemplo n.º 13
0
class WordCount(Topology):
    word_spout = WordSpout.spec()
    count_bolt = WordCountBolt.spec(
        inputs={word_spout: Grouping.fields('word')}, par=2)
class WordCount(Topology):
    tweet_spout = TweetSpout.spec()
    count_bolt = AMSBolt.spec(inputs={tweet_spout: Grouping.fields("tweet")}, par=1)
Ejemplo n.º 15
0
class WordCount(Topology):
    tweet_spout = Tweets.spec()
    count_bolt = WordCounter.spec(
        inputs={tweet_spout: Grouping.fields('tweet')}, par=55555)
Ejemplo n.º 16
0
class OutputDebugTopology(Topology):
    files_spout = FilesMailSpout.spec(
        name="files-mails")

    tokenizer = Tokenizer.spec(
        name="tokenizer",
        inputs=[files_spout],
        par=1)

    attachments = Attachments.spec(
        name="attachments",
        inputs={tokenizer['attachments']: Grouping.fields('sha256_random')},
        par=2)

    urls_body = UrlsHandlerBody.spec(
        name="urls-handler-body",
        inputs={tokenizer['body']: Grouping.fields('sha256_random')})

    urls_attachments = UrlsHandlerAttachments.spec(
        name="urls-handler-attachments",
        inputs={attachments: Grouping.fields('sha256_random')})

    phishing = Phishing.spec(
        name="phishing",
        inputs={
            tokenizer['mail']: Grouping.fields('sha256_random'),
            attachments: Grouping.fields('sha256_random'),
            urls_body: Grouping.fields('sha256_random'),
            urls_attachments: Grouping.fields('sha256_random')})

    forms = Forms.spec(
        name="forms",
        inputs={tokenizer['body']: Grouping.fields('sha256_random')})

    json = JsonMaker.spec(
        name="json",
        inputs={
            tokenizer['mail']: Grouping.fields('sha256_random'),
            phishing: Grouping.fields('sha256_random'),
            attachments: Grouping.fields('sha256_random'),
            forms: Grouping.fields('sha256_random'),
            urls_body: Grouping.fields('sha256_random'),
            urls_attachments: Grouping.fields('sha256_random')})

    output_debug = OutputDebug.spec(
        name="output-debug",
        inputs=[json])
Ejemplo n.º 17
0
class SurpriseNumberTwitter(Topology):
    tweet_spout = TweetSpout.spec()
    count_bolt = AMSBolt.spec(inputs={tweet_spout: Grouping.fields("tweet")},
                              par=1)
Ejemplo n.º 18
0
class Tagger(Topology):
    tweet_spout = TweetSpout.spec()
    count_bolt = TagCountBolt.spec(inputs=[tweet_spout],par=2)
    cassandra_bolt = SaveCountBolt.spec(inputs={count_bolt: Grouping.fields(['cls','tag'])},par=2) #SaveCountBolt.spec(inputs=[count_bolt],par=2)
    cassandra_trend_bolt = TrendCountBolt.spec(inputs={count_bolt: Grouping.fields(['cls','tag'])},par=2)
Ejemplo n.º 19
0
class WordCount(Topology):
    word_spout = WordSpout.spec(
    )  # spec sets the specification of a spout or bolt. can take a name, par, and config argument.
    count_bolt = WordCountBolt.spec(
        inputs={word_spout: Grouping.fields('word')}, par=2
    )  # par refers to the number of parallel processes for that operation.
class Twitter(Topology):

    tweet_spout = TweetSpout.spec()
    tweet_bolt = TweetBolt.spec(inputs={tweet_spout: Grouping.fields("lang")},
                                par=2)
Ejemplo n.º 21
0
class Processing(Topology):
    consumer_spout = ConsumerSpout.spec(par=5)
    processing_bolt = ProcessingBolt.spec(
        inputs={consumer_spout: Grouping.fields('sentences')}, par=5)
class OutputElasticsearchTopology(Topology):

    files_spout = FilesMailSpout.spec(
        name="files-mails")

    tokenizer = Tokenizer.spec(
        name="tokenizer",
        inputs=[files_spout],
        par=1)

    attachments = Attachments.spec(
        name="attachments",
        inputs={tokenizer['attachments']: Grouping.fields('sha256_random')},
        par=1)

    urls = Urls.spec(
        name="urls",
        inputs={
            attachments: Grouping.fields('sha256_random'),
            tokenizer['body']: Grouping.fields('sha256_random')})

    phishing = Phishing.spec(
        name="phishing",
        inputs={
            attachments: Grouping.fields('sha256_random'),
            tokenizer['mail']: Grouping.fields('sha256_random'),
            urls: Grouping.fields('sha256_random')})

    network = Network.spec(
        name="network",
        inputs={tokenizer['network']: Grouping.fields('sha256_random')},
        par=1)

    raw_mail = RawMail.spec(
        name="raw_mail",
        inputs={tokenizer['raw_mail']: Grouping.fields('sha256_random')},
        par=1)

    json_maker = JsonMaker.spec(
        name="json_maker",
        inputs={
            attachments: Grouping.fields('sha256_random'),
            network: Grouping.fields('sha256_random'),
            phishing: Grouping.fields('sha256_random'),
            raw_mail: Grouping.fields('sha256_random'),
            tokenizer['mail']: Grouping.fields('sha256_random'),
            urls: Grouping.fields('sha256_random')})

    output_elasticsearch = OutputElasticsearch.spec(
        name="output-elasticsearch",
        inputs=[json_maker])
Ejemplo n.º 23
0
class WordCount(Topology):
    event_spout = EventSpout.spec()
    count_bolt = DirectionBolt.spec(
        inputs={event_spout: Grouping.fields('array_event_id')}, par=2)