class WordCount(Topology): word_spout = KafkaSpout.spec() process_bolt = ProcessBolt.spec(inputs=[word_spout], par=2) senti_bolt = SentimentBolt.spec( inputs={process_bolt: Grouping.fields('tweet')}, par=2) entity_bolt = EntityBolt.spec( inputs={senti_bolt: Grouping.fields('tweet')}, par=2)
class WordCount(Topology): """组装topo结构""" word_spout = SentenceSpout.spec() split_bolt = SentenceSplitterBolt.spec( inputs={word_spout: Grouping.fields("sentence")}, par=2) count_bolt = SentenceCountBolt.spec( inputs={split_bolt: Grouping.fields('word', 'num')}, par=2)
class WordCount(Topology): product_spout = ProductSpout.spec() user_profile_spout = UserProfileSpout.spec() product_bolt = ProductBolt.spec( inputs={product_spout: Grouping.fields('product')}, par=2) profile_bolt = ProductBolt.spec( inputs={user_profile_spout: Grouping.fields('profile')}, par=2)
class TopWordFinderTopologyPartA(Topology): # TODO: # Task: wire up the topology # Make sure you use the following names for each component # RandomSentenceSpout -> "spout" spout = RandomSentenceSpout.spec() # SplitSentenceBolt -> "split" split = SplitSentenceBolt.spec(inputs={spout: Grouping.fields('word')}) # WordCountBolt -> "count" count = WordCountBolt.spec(inputs={split: Grouping.fields('word')})
class NearestVectors(Topology): vector_spout = IndexSpout.spec() vector_bolt = VectorMapBolt.spec(inputs={vector_spout: Grouping.SHUFFLE}, par=8) index_bolt = PairProcessBolt.spec(inputs={vector_bolt: Grouping.SHUFFLE}, par=8) sum_bolt = VectorSumBolt.spec( inputs={index_bolt: Grouping.fields('vector_id')}, par=8) final_bolt = NearestBolt.spec(inputs={sum_bolt: Grouping.fields('final')}, par=8)
class WordCount(Topology): redditStream_spout = streamRedditSpout.spec() titleSplit_bolt = titleSplitterBolt.spec(inputs={ redditStream_spout: Grouping.fields('redditTitle', 'redditLink') }, par=2) matchKeywords_bolt = matchKeywordsBolt.spec(inputs={ titleSplit_bolt: Grouping.fields('splitTitle', 'redditTitle', 'redditLink') }, par=2)
class TopWordFinderTopologyPartA(Topology): config = {'coursera.datafile': 'resources/data.txt'} # TODO: # Task: wire up the topology # Make sure you use the following names for each component # FileReaderSpout -> "spout" spout = FileReaderSpout.spec(inputs=config) # SplitSentenceBolt -> "split" split = SplitSentenceBolt.spec(inputs={spout: Grouping.fields('word')}) # WordCountBolt -> "count" count = WordCountBolt.spec(inputs={split: Grouping.fields('word')})
class WordCount(Topology): #spout1 word_spout = WordSpout.spec() #bolt1 count_bolt = WordCountBolt.spec( inputs={word_spout: Grouping.fields('word')}, par=2) # process #bolt2 count_bolt2 = WordCountBolt2.spec( inputs={count_bolt: Grouping.fields('word')}, par=2) #bolt3 count_bolt3 = WordCountBolt3.spec( inputs={count_bolt: Grouping.fields('word')}, par=2)
class TopWordFinderTopologyPartC(Topology): config = {'coursera.datafile': 'resources/data.txt'} # TODO: # Task: wire up the topology # Make sure you use the following names for each component # FileReaderSpout -> "spout" # SplitSentenceBolt -> "split" # WordCountBolt -> "count" # NormalizerBolt -> "normalize" # TopNFinderBolt -> "top-n" # NOTE: will have to manually kill Topology after submission sentence_spout = FileReaderSpout.spec(name='spout') split_bolt = SplitSentenceBolt.spec(name='split', inputs=[sentence_spout]) normalize_bolt = NormalizerBolt.spec(name='normalize', inputs=[split_bolt]) count_bolt = WordCountBolt.spec( name='count', inputs={normalize_bolt: Grouping.fields('word')}) topnfinder_bolt = TopNFinderBolt.spec( name='top-n', inputs={count_bolt: Grouping.fields('word')})
class PixelCount(Topology): pixel_spout = JavaSpout.spec( name="pixel-spout", full_class_name="pixelcount.spouts.PixelSpout", args_list=[], outputs=["pixel"]) pixel_deserializer = PixelDeserializerBolt.spec( name='pixel-deserializer-bolt', inputs=[pixel_spout]) pixel_counter = PixelCounterBolt.spec( name='pixel-count-bolt', inputs={pixel_deserializer: Grouping.fields('url')}, config={"topology.tick.tuple.freq.secs": 1})
class TopWordFinderTopologyPartA(Topology): # TODO: # Task: wire up the topology # Make sure you use the following names for each component # RandomSentenceSpout -> "spout" # SplitSentenceBolt -> "split" # WordCountBolt -> "count" # NOTE: will have to manually kill Topology after submission sentence_spout = RandomSentenceSpout.spec(name='spout') split_bolt = SplitSentenceBolt.spec(name='split', inputs=[sentence_spout]) count_bolt = WordCountBolt.spec( name='count', inputs={split_bolt: Grouping.fields('word')})
class TweetProcess(Topology): tweet_spout = TweetSpout.spec() amazon_bolt = ProcessAmazon.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) apple_bolt = ProcessApple.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) facebook_bolt = ProcessFacebook.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) google_bolt = ProcessGoogle.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) lyft_bolt = ProcessLyft.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) microsoft_bolt = ProcessMicrosoft.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) twitter_bolt = ProcessTwitter.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1) uber_bolt = ProcessUber.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=1)
class WordCount(Topology): word_spout = WordSpout.spec() count_bolt = WordCountBolt.spec( inputs={word_spout: Grouping.fields('word')}, par=2)
class WordCount(Topology): tweet_spout = TweetSpout.spec() count_bolt = AMSBolt.spec(inputs={tweet_spout: Grouping.fields("tweet")}, par=1)
class WordCount(Topology): tweet_spout = Tweets.spec() count_bolt = WordCounter.spec( inputs={tweet_spout: Grouping.fields('tweet')}, par=55555)
class OutputDebugTopology(Topology): files_spout = FilesMailSpout.spec( name="files-mails") tokenizer = Tokenizer.spec( name="tokenizer", inputs=[files_spout], par=1) attachments = Attachments.spec( name="attachments", inputs={tokenizer['attachments']: Grouping.fields('sha256_random')}, par=2) urls_body = UrlsHandlerBody.spec( name="urls-handler-body", inputs={tokenizer['body']: Grouping.fields('sha256_random')}) urls_attachments = UrlsHandlerAttachments.spec( name="urls-handler-attachments", inputs={attachments: Grouping.fields('sha256_random')}) phishing = Phishing.spec( name="phishing", inputs={ tokenizer['mail']: Grouping.fields('sha256_random'), attachments: Grouping.fields('sha256_random'), urls_body: Grouping.fields('sha256_random'), urls_attachments: Grouping.fields('sha256_random')}) forms = Forms.spec( name="forms", inputs={tokenizer['body']: Grouping.fields('sha256_random')}) json = JsonMaker.spec( name="json", inputs={ tokenizer['mail']: Grouping.fields('sha256_random'), phishing: Grouping.fields('sha256_random'), attachments: Grouping.fields('sha256_random'), forms: Grouping.fields('sha256_random'), urls_body: Grouping.fields('sha256_random'), urls_attachments: Grouping.fields('sha256_random')}) output_debug = OutputDebug.spec( name="output-debug", inputs=[json])
class SurpriseNumberTwitter(Topology): tweet_spout = TweetSpout.spec() count_bolt = AMSBolt.spec(inputs={tweet_spout: Grouping.fields("tweet")}, par=1)
class Tagger(Topology): tweet_spout = TweetSpout.spec() count_bolt = TagCountBolt.spec(inputs=[tweet_spout],par=2) cassandra_bolt = SaveCountBolt.spec(inputs={count_bolt: Grouping.fields(['cls','tag'])},par=2) #SaveCountBolt.spec(inputs=[count_bolt],par=2) cassandra_trend_bolt = TrendCountBolt.spec(inputs={count_bolt: Grouping.fields(['cls','tag'])},par=2)
class WordCount(Topology): word_spout = WordSpout.spec( ) # spec sets the specification of a spout or bolt. can take a name, par, and config argument. count_bolt = WordCountBolt.spec( inputs={word_spout: Grouping.fields('word')}, par=2 ) # par refers to the number of parallel processes for that operation.
class Twitter(Topology): tweet_spout = TweetSpout.spec() tweet_bolt = TweetBolt.spec(inputs={tweet_spout: Grouping.fields("lang")}, par=2)
class Processing(Topology): consumer_spout = ConsumerSpout.spec(par=5) processing_bolt = ProcessingBolt.spec( inputs={consumer_spout: Grouping.fields('sentences')}, par=5)
class OutputElasticsearchTopology(Topology): files_spout = FilesMailSpout.spec( name="files-mails") tokenizer = Tokenizer.spec( name="tokenizer", inputs=[files_spout], par=1) attachments = Attachments.spec( name="attachments", inputs={tokenizer['attachments']: Grouping.fields('sha256_random')}, par=1) urls = Urls.spec( name="urls", inputs={ attachments: Grouping.fields('sha256_random'), tokenizer['body']: Grouping.fields('sha256_random')}) phishing = Phishing.spec( name="phishing", inputs={ attachments: Grouping.fields('sha256_random'), tokenizer['mail']: Grouping.fields('sha256_random'), urls: Grouping.fields('sha256_random')}) network = Network.spec( name="network", inputs={tokenizer['network']: Grouping.fields('sha256_random')}, par=1) raw_mail = RawMail.spec( name="raw_mail", inputs={tokenizer['raw_mail']: Grouping.fields('sha256_random')}, par=1) json_maker = JsonMaker.spec( name="json_maker", inputs={ attachments: Grouping.fields('sha256_random'), network: Grouping.fields('sha256_random'), phishing: Grouping.fields('sha256_random'), raw_mail: Grouping.fields('sha256_random'), tokenizer['mail']: Grouping.fields('sha256_random'), urls: Grouping.fields('sha256_random')}) output_elasticsearch = OutputElasticsearch.spec( name="output-elasticsearch", inputs=[json_maker])
class WordCount(Topology): event_spout = EventSpout.spec() count_bolt = DirectionBolt.spec( inputs={event_spout: Grouping.fields('array_event_id')}, par=2)