sc.addPyFile('magichour.zip') from magichour.api.dist.events.eventGen import event_gen_fp_growth, event_gen_word2vec logLineURI = 'hdfs://namenode/magichour/tbird.500.templateEvalRDD' outputPath = 'hdfs://namenode/magichour/tbird.500.eventsRDD' minSupport = 0.2 numPartitions = 10 windowLen = 120 log_lines = sc.pickleFile(logLineURI) print 'FP Growth' fp_growth_events = event_gen_fp_growth(sc, log_lines, minSupport, numPartitions, windowLen) for event in fp_growth_events: print event print 'Word2Vec' word2vec_events = event_gen_word2vec(sc, log_lines, window_size=60) for event in word2vec_events: print event # Save to hdfs if outputPath: sc.parallelize(word2vec_events).saveAsPickleFile(outputPath, batchSize=1)
################## minSupport = 0.2 numPartitions = 10 windowLen = 120 # print 'FP Growth' # fp_growth_events = event_gen_fp_growth(sc, matched_logline_rdd, minSupport, numPartitions, windowLen) # for event in fp_growth_events: # print event template_lookup = {} for template in templates: template_lookup[template.id] = template.raw_str print '*********** Word2Vec ***********' word2vec_events = event_gen_word2vec(sc, matched_logline_rdd, window_size=60) for event in word2vec_events: print '--------Event %d-----------' % event.id for template_id in event.template_ids: try: print template_lookup[template_id] except: print 'Unknown Template: ', template_id # Save the event definitions locally pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb')) pickle.dump(event, open('event.pkl', 'wb')) ################## ## Event Eval ##
################## minSupport = 0.2 numPartitions = 10 windowLen = 120 # print 'FP Growth' # fp_growth_events = event_gen_fp_growth(sc, matched_logline_rdd, minSupport, numPartitions, windowLen) # for event in fp_growth_events: # print event template_lookup = {} for template in templates: template_lookup[template.id] = template.raw_str print '*********** Word2Vec ***********' word2vec_events = event_gen_word2vec(sc, matched_logline_rdd, window_size=60) for event in word2vec_events: print '--------Event %d-----------'%event.id for template_id in event.template_ids: try: print template_lookup[template_id] except: print 'Unknown Template: ', template_id # Save the event definitions locally pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb')) pickle.dump(event, open('event.pkl', 'wb')) ################## ## Event Eval ##