def handle (self, *args, **options): opts = settings.init () os.chdir (opts['WorkDir']) tasks.init (opts['TaskClassesFile']) total = updated = 0 for t in Task.objects.all (): groupName = tasks.classify (t.name) total += 1 if groupName != t.taskGroup.name: group = TaskGroup.objects.get_or_create (name=groupName)[0] t.taskGroup = group t.save () updated += 1 print "Processed %d tasks, updated %d" % (total, updated)
""" Watchers for task result file in jobtracker history dir, parses them and imports into DB. """ import os import sys import logging import time from counters.lib import parser from counters.lib import data from counters.lib import watcher from counters.lib import tasks from counters.lib import settings opts = settings.init () os.chdir (opts['WorkDir']) tasks.init (opts['TaskClassesFile']) JT_History = opts['JobTrackerHistoryDir'] watch_interval = int (opts['WatcherInterval']) logging.basicConfig (format="%(asctime)s: %(message)s", level=logging.INFO) w = watcher.HadoopWatcher (JT_History, JT_History + "/done", opts['WatcherStateFile']) importer = data.CounterDataImporter () iteration = 0
#!/usr/bin/env python2.7 import os import sys from counters.lib import settings if __name__ == "__main__": os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hdpstat.settings") from django.core.management import execute_from_command_line settings.init () execute_from_command_line(sys.argv)