def _final_std(self): """ Finish capturing of stdout and stderr """ task = self.current_task try: out, err = self._current_sub.communicate() if self.verbose: sys.stderr.write(err) sys.stdout.write(out) # save full message stdout = self._std['stdout'] + out stderr = self._std['stderr'] + err if self._save_log_to_db: log_err = LogEntry( 'worker', 'stderr from running task', stderr ) log_out = LogEntry( 'worker', 'stdout from running task', stdout ) self.project.logs.add(log_err) self.project.logs.add(log_out) task.stdout = log_out task.stderr = log_err except ValueError: pass
def HandleLine(self, line): """ Parses a line from a log file and adds the data to the plot data. Args: line: str, The line from the log file to parse Returns: None """ pline = LogEntry(line) for key in self.signal: value = self.signal[key] binary = key[0] struct_instance_name = key[1] data_search_path = key[2] boolean_multiplier = False multiplier = 1.0 # If the plot definition line ends with a "-b X" where X is a number then # that number gets drawn when the value is True. Zero gets drawn when the # value is False. if len(data_search_path) >= 2 and data_search_path[-2] == '-b': multiplier = float(data_search_path[-1]) boolean_multiplier = True data_search_path = data_search_path[:-2] if len(data_search_path) >= 2 and data_search_path[-2] == '-m': multiplier = float(data_search_path[-1]) data_search_path = data_search_path[:-2] # Make sure that we're looking at the right binary structure instance. if binary == pline.name: if pline.msg.startswith(struct_instance_name + ': '): # Traverse the structure as specified in `data_search_path`. # This lets the user access very deeply nested structures. _, _, data = pline.ParseStruct() for path in data_search_path: data = data[path] if boolean_multiplier: if data == 'T': value.Add(pline.time, multiplier) else: value.Add(pline.time, 0) else: value.Add(pline.time, float(data) * multiplier)
def testreport_lots_of_fails(self): log = ['[%d][1.1.1.%d][FAIL]' % (i, i//3) for i in range(900)] d = BadLoginDetector(3,3) for e in log: newentry = LogEntry.fromstring(e) d.process(newentry) self.assertEqual(len(d.report()), 300)
def testprocess_fails_far_apart(self): log = ['[%d][1.1.1.1][SUCCESS]' % i for i in range(1000)] log[100] = '[100][2.2.2.2][FAIL]' log[200] = '[200][2.2.2.2][FAIL]' log[300] = '[300][2.2.2.2][FAIL]' d = BadLoginDetector(3,200) for e in log: newentry = LogEntry.fromstring(e) self.assertTrue(d.process(newentry))
def testreport_onefail_too_far_apart(self): log = ['[%d][1.1.1.1][SUCCESS]' % i for i in range(1000)] log[100] = '[100][2.2.2.2][FAIL]' log[200] = '[200][2.2.2.2][FAIL]' log[300] = '[300][2.2.2.2][FAIL]' d = BadLoginDetector(3,150) for e in log: newentry = LogEntry.fromstring(e) d.process(newentry) self.assertEqual(d.report(), [])
def testreport_twofails_same_ip(self): log = ['[%d][1.1.1.1][SUCCESS]' % i for i in range(1000)] log[100] = '[100][2.2.2.2][FAIL]' log[200] = '[200][2.2.2.2][FAIL]' log[300] = '[300][2.2.2.2][FAIL]' log[400] = '[400][2.2.2.2][FAIL]' d = BadLoginDetector(3,1000) for e in log: newentry = LogEntry.fromstring(e) d.process(newentry) self.assertEqual(d.report(), ['2.2.2.2'])
def testprocess_somefails(self): log = ['[%d][1.1.1.1][SUCCESS]' % i for i in range(1000)] log[100] = '[100][2.2.2.2][FAIL]' log[200] = '[200][2.2.2.2][FAIL]' log[300] = '[300][2.2.2.2][FAIL]' d = BadLoginDetector(3,1000) for e in log: newentry = LogEntry.fromstring(e) if newentry.time == 300: self.assertFalse(d.process(newentry)) else: self.assertTrue(d.process(newentry))
def main(): args = initParser() config = initConfig(args.settings_file) db = initDatabase(config) initLogging(config) if exists(args.filename): log_entry = LogEntry(args.filename) db.insert_entry(log_entry) return 0 else: logging.error("%s does not exist. Exiting..." % args.filename) return 1
def run(self): """ Start the worker to execute tasks until it is shut down """ scheduler = self._scheduler project = self._project last = time.time() last_n_tasks = 0 self.seen = last def task_test(x): return x.ready and (not self.generators or ( hasattr(x.generator, 'name') and x.generator.name in self.generators)) print 'up and running ...' try: reconnect = True while reconnect: reconnect = False try: if len(scheduler.tasks) > 0: # must have been a DB connection problem, attempt reconnection print 'attempt reconnection' self._project.reconnect() print 'remove all pending tasks' # remove all pending tasks as much as possible for t in list(scheduler.tasks.values()): if t is not scheduler.current_task: if t.worker == self: t.state = 'created' t.worker = None del scheduler.tasks[t.__uuid__] # see, if we can salvage the currently running task # unless it has been cancelled and is running with another worker t = scheduler.current_task if t.worker == self and t.state == 'running': print 'continuing current task' # seems like the task is still ours to finish pass else: print 'current task has been captured. releasing.' scheduler.stop_current() # the main worker loop while scheduler.state != 'down': state = self.state # check the state of the worker if state in self._running_states: scheduler.advance() if scheduler.is_idle: for _ in range(self.prefetch): tasklist = scheduler( project.storage.tasks.modify_test_one( task_test, 'state', 'created', 'queued')) for task in tasklist: task.worker = self print 'queued a task [%s] from generator `%s`' % ( task.__class__.__name__, task.generator.name if task.generator else '---') self.n_tasks = len(scheduler.tasks) # handle commands # todo: Place all commands in a separate store and consume ?!? command = self.command if command == 'shutdown': # someone wants us to shutdown scheduler.shut_down() if command == 'kill': # someone wants us to shutdown immediately. No waiting scheduler.shut_down(False) elif command == 'release': scheduler.release_queued_tasks() elif command == 'halt': self._stop_current('halted') elif command == 'cancel': self._stop_current('cancelled') elif command and command.startswith('!'): result = subprocess.check_output(command[1:].split(' ')) project.logs.add( LogEntry( 'command', 'called `%s` on worker' % command[1:], result ) ) if command: self.command = None if time.time() - last > self.heartbeat: # heartbeat last = time.time() self.seen = last time.sleep(self.sleep) if self.walltime and time.time() - self.__time__ > self.walltime: # we have reached the set walltime and will shutdown print 'hit walltime of %s' % DT(self.walltime).length scheduler.shut_down() if scheduler.current_task is not self._last_current: self.current = scheduler.current_task self._last_current = self.current n_tasks = len(scheduler.tasks) if n_tasks != last_n_tasks: self.n_tasks = n_tasks last_n_tasks = n_tasks except (pymongo.errors.ConnectionFailure, pymongo.errors.AutoReconnect) as e: print 'pymongo connection error', e print 'try reconnection after %d seconds' % self.reconnect_time # lost connection to DB, try to reconnect after some time time.sleep(self.reconnect_time) reconnect = True except KeyboardInterrupt: scheduler.shut_down() pass
def advance(self): """ Advance checking if tasks are completed or failed Needs to be called in regular intervals. Usually by the main worker instance """ if self.current_task is None: if len(self.tasks) > 0: t = next(self.tasks.itervalues()) self.current_task = t self._start_job(t) else: task = self.current_task # get current outputs return_code = self._current_sub.poll() # update current stdout and stderr by 1024 bytes self._advance_std() if return_code is not None: # finish std catching self._final_std() if return_code == 0: # success all_files_present = True # see first if we have all claimed files for worker output staging transfer for f in task.targets: if isinstance(f, Transfer): if not os.path.exists(self.replace_prefix(f.source.url)): log = LogEntry( 'worker', 'execution error', 'failed to create file before staging %s' % f.source.short, objs={'file': f, 'task': task} ) self.project.logs.add(log) all_files_present = False if all_files_present: try: task.fire('success', self) task.state = 'success' print 'task succeeded' if self._cleanup_successful: print 'removing worker dir' # go to an existing folder before we delete os.chdir(self.path) script_location = self.current_task_dir if script_location is not None: shutil.rmtree(script_location) except IOError: task.state = 'fail' else: task.state = 'fail' else: # failed log = LogEntry( 'worker', 'task failed', 'see log files', objs={'task': task} ) self.project.logs.add(log) task.state = 'failed' try: task.fire('fail', self) except IOError: pass task.state = 'fail' del self.tasks[task.__uuid__] self._done_tasks.add(task.__uuid__) self._initialize_current()
def testprocess_all_success(self): log = ['[%d][1.1.1.1][SUCCESS]' % i for i in range(1000)] d = BadLoginDetector(3,1000) for e in log: newentry = LogEntry.fromstring(e) self.assertTrue(d.process(newentry))
def testprocess_allfails_far_apart(self): log = ['[%d][1.1.1.1][FAIL]' % i for i in range(1000)] d = BadLoginDetector(3,2) for e in log: newentry = LogEntry.fromstring(e) self.assertTrue(d.process(newentry))
with open("ignore.list") as ignorelistfile: ignorelist = [i.strip() for i in ignorelistfile.readlines()] for line in f: lineNmbr += 1 print lineNmbr, "\r", # Skip lines that may show AccessLogFilter initialized message. We need to get better at doing this. # 2013-02-15 17:01:18,140 INFO [main] [atlassian.confluence.util.AccessLogFilter] init AccessLogFilter initialized. Format is: <user> <url> <starting memory free (kb)> +- <difference in free mem (kb)> <query time (ms)> <remote address> if "init AccessLogFilter initialized" in line: continue #print line logentry = LogEntry(line) #print logentry if firstTimestamp == None: firstTimestamp = logentry.getTimestamp() lastTimestamp = logentry.getTimestamp() # Skip empty url because these URLs get redirected to Dashboard URL like homepage.action. In our case it's HOME wiki. if (logentry.relativeurl == ''): continue wikiurl = WikiUrl(logentry.relativeurl, logentry.userid, logentry.datetimestamp, logentry.ipaddress) #print wikiurl # Here are few entries that we don't want to log into our database as they are duplicate ones. # Example: a) http://<wiki base url>/display