sc.addPyFile('magichour.zip') from magichour.api.dist.events.eventEval import event_eval_rdd from magichour.api.local.util.namedtuples import DistributedLogLine logLineURI = 'hdfs://namenode/magichour/tbird.500.templateEvalRDD' rddlogLines = sc.pickleFile(logLineURI) eventDefURI = 'hdfs://namenode/magichour/tbird.500.eventsRDD' eventDefs = sc.pickleFile(eventDefURI).collect() windowSeconds = 500 test = event_eval_rdd(sc, rddlogLines, eventDefs, windowSeconds) test.collect()
for template in templates: template_lookup[template.id] = template.raw_str print '*********** Word2Vec ***********' word2vec_events = event_gen_word2vec(sc, matched_logline_rdd, window_size=60) for event in word2vec_events: print '--------Event %d-----------' % event.id for template_id in event.template_ids: try: print template_lookup[template_id] except: print 'Unknown Template: ', template_id # Save the event definitions locally pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb')) pickle.dump(event, open('event.pkl', 'wb')) ################## ## Event Eval ## ################## windowSeconds = 500 found_events = event_eval_rdd(sc, matched_logline_rdd, word2vec_events, windowSeconds) event_output_URI = 'hdfs:///magichour/events' found_events.saveAsPickleFile(event_output_URI) found_events_local = found_events.take(10000) print found_events_local[:10]
for template in templates: template_lookup[template.id] = template.raw_str print '*********** Word2Vec ***********' word2vec_events = event_gen_word2vec(sc, matched_logline_rdd, window_size=60) for event in word2vec_events: print '--------Event %d-----------'%event.id for template_id in event.template_ids: try: print template_lookup[template_id] except: print 'Unknown Template: ', template_id # Save the event definitions locally pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb')) pickle.dump(event, open('event.pkl', 'wb')) ################## ## Event Eval ## ################## windowSeconds = 500 found_events = event_eval_rdd(sc, matched_logline_rdd, word2vec_events, windowSeconds) event_output_URI = 'hdfs:///magichour/events' found_events.saveAsPickleFile(event_output_URI) found_events_local = found_events.take(10000) print found_events_local[:10]