def id2project(self, pid): # id try: p = Project.objects.get(pk=int(pid)) except (Project.DoesNotExist, ValueError): pass else: return [p] # textid p = Project.get_by_textid(pid) if p is not None: return [p] # rid try: p = Project.objects.get(rid=pid) except Project.DoesNotExist: pass else: return [p] # srid pqs = Project.objects.filter(owner__profile__rid=pid) if pqs.count() > 0: return pqs # name p = Project.objects.filter(name=pid).first() if p is not None: return [p] return None
def prepare(opts): created = 0 processed = 0 p = Project.get_by_textid(opts['textid']) started = time.time() for n in range(opts['num']): iname = opts['template'].format(n) try: i = p.get_indicator(iname) except ObjectDoesNotExist as e: i = Indicator.create(project=p, idname=iname) created += 1 i.status = 'OK' i.details = 'prepare...' i.save() print(i) processed += 1 passed = time.time() - started print("Prepared {} indicators (created {}) in {:.2f} seconds ({:.2f} i/sec)".format( processed, created, passed, processed / passed))
def clone(data, email): oldemail = data['email'] print("clone {} > {}".format(oldemail, email)) data['email'] = email for p in data['Project']: newid = Project.gentextid() p['ProjectTextID'] = [ dict(textid = newid) ] return data
def geti_benchmark(self, options): random.seed() p = Project.get_by_textid(options['textid']) print("Project:", p) started = time.time() for i in range(1, options['iter']): # print "iteration",i name = options['template'].format(random.randint(1, options['num'])) indicator = p.get_indicator(name) # print indicator stopped = time.time() print("{} iterations in {:.2f} seconds ({:.2f} i/sec)".format(i, stopped - started, i / (stopped - started)))
def bench(q, n, opts): # print("<{} ({})> started".format(n, os.getpid())) # time.sleep(10) # print("<{} ({})> stopped".format(n, os.getpid())) p = Project.get_by_textid(opts['textid']) stats = {'OK': 0, 'n': n} processed = 0 numexc = 0 stop = False started = time.time() if opts['shard']: startn = opts['num'] * n stopn = startn + opts['num'] else: startn = 0 stopn = opts['num'] while not stop: idx = random.randrange(startn, stopn) iname = opts['template'].format(idx) i = p.get_indicator(iname) i.status = 'OK' i.details = 'benchmark (try: {})'.format(processed) i.save() if not opts['quiet']: print("{:02d}: {}".format(n, i)) processed += 1 stats['OK'] += 1 passed = time.time() - started # stop? if opts['tries']: if processed >= args.tries: stop = True else: if passed > opts['seconds']: stop = True passed = time.time() - started stats['passed'] = passed stats['processed'] = processed q.put(stats)
def main(): cflist = ['/etc/okerr/process.conf'] parser = configargparse.ArgumentParser(description='okerr indicator local processor.', default_config_files = cflist) # parser = argparse.ArgumentParser(description='okerr indicator processor.') parser.add_argument('--single', dest='single', action='store_true', default=False, help='single run') parser.add_argument('--unlock', dest='unlock', action='store_true', default=False, help='unlock all locked indicators') parser.add_argument('--lockfile', dest='lockfile', default='/var/run/lock/okerr-process.pid') parser.add_argument('--ci', type=int, default=None, help='force ci') # parser.add_argument('--cc',dest='clientconf', default='/etc/okerrclient.conf', help='okerrClient Conf file name') parser.add_argument('--nomail', default=False, action='store_true', help='do not send any mail') parser.add_argument('--check', dest='check',action='store_true', default=False, help='check lockfile') parser.add_argument('--kill', dest='kill', action='store_true', default=False, help='kill by lockfile') parser.add_argument('--user', default='okerr') parser.add_argument('--id', dest='id', help='run this indicator (iname@textid)', default=None) parser.add_argument('-d', dest='daemon', action='store_true', default=False, help='daemon mode') g = parser.add_argument_group('Debugging') g.add_argument('--nokeepalive', action='store_true', default=False, help='Do not update keepalive indicators (for debug)') g.add_argument('--stderr', action='store_true', default=False, help='log to stderr') g.add_argument('--lifetime', metavar='SECONDS', default=None, type=int, help='suicide after this time') g.add_argument('-q', dest='quiet', action='store_true', default=False, help='quiet mode') g.add_argument('-v', dest='verbose', action='store_true', default=False, help='verbose mode') args = parser.parse_args() # oc.read_config(args.clientconf) if args.nomail: send_mail = False else: send_mail = True if args.verbose: log.setLevel(logging.DEBUG) log.debug('Verbose mode') if args.stderr: err = logging.StreamHandler(sys.stderr) log.addHandler(err) else: print("No logging to STDERR, use --stderr") log.info("no logging to stderr, use --stderr") # drop privileges pwnam = pwd.getpwnam(args.user) req_uid = pwnam.pw_uid req_gid = pwnam.pw_gid req_groups = [g.gr_gid for g in grp.getgrall() if args.user in g.gr_mem] if os.getuid() != req_uid: # log.info("set gid: {}".format(req_gid)) os.setgid(req_gid) # log.info("set groups: {}".format(req_groups)) os.setgroups(req_groups) log.info("switch to user {} u: {} g: {}".format( args.user, req_uid, [ req_gid ] + req_groups)) os.setuid(req_uid) if args.ci is None: ci = myci() else: ci = args.ci if args.check: if not args.quiet: log.debug("check lockfile", args.lockfile) daemon = MyDaemon(args.lockfile) pid = daemon.lockedpidfile() if pid: if not args.quiet: log.debug("pidfile {} locked by pid {}".format(args.lockfile,pid)) sys.exit(0) else: if not args.quiet: log.debug("pidfile not locked") sys.exit(1) if args.kill: try: with open(args.lockfile, 'r') as pf: pid = int(pf.read().strip()) except ValueError: log.debug("bad value in pidfile") sys.exit(1) except IOError as e: log.debug("IOError while read lockfile: {}".format(str(e))) sys.exit(0) log.debug("kill process {}".format(pid)) try: os.kill(pid,signal.SIGTERM) except OSError as e: log.debug("IOError while killing: {}".format(str(e))) if False: log.debug("remove pidfile {}".format(args.lockfile)) try: os.unlink(args.lockfile) except (OSError, IOError) as e: log.debug("OSError while unlunk lockfile: {}".format(str(e))) sys.exit(0) if args.unlock: unlockold() return if args.id: print("process indicator {}".format(args.id)) now=timezone.now() pid = os.getpid() iname, textid = args.id.split('@') p = Project.get_by_textid(textid) i = p.get_indicator(iname) if i is None: print("no such indicator") return # (re)lock i.lockpid=pid i.lockat=now updatei(i) return if args.single: log.info('process started in single mode') loop(ci, send_mail) else: if os.geteuid()==0: msg='you should not run okerr processor as root!' log.error(msg) sys.stderr.write("{}\n".format(msg)) return if args.daemon: daemon = MyDaemon(args.lockfile) if daemon.start(): log.debug("daemon started") else: log.debug("already started") sys.exit(1) else: log.debug("foreground mode") lockfh=lockpidfile(args.lockfile) if lockfh is None: if not args.quiet: log.error("pidfile {} already locked".format(args.lockfile)) sys.exit(1) maincode(ci, send_mail, args.lifetime, keepalive=(not args.nokeepalive)) lockfh.close() os.unlink(args.lockfile)
def cmd_qsum(update, ctx): bot = ctx.bot args = ctx.args reg_command(update, ctx) reported = 0 projects = list() if len(args): textid = args[0] else: textid = None chat_id = update.message.chat_id if textid: project = Project.get_by_textid(textid) if project is None: bot.send_message(chat_id=chat_id, text="No such project") return # has access? access = False for profile in Profile.objects.filter(telegram_chat_id=chat_id): if project.member(profile.user): access = True if access: projects.append(project) else: bot.send_message(chat_id=chat_id, text="No such project") return else: log.info('list all projects for #{}'.format(chat_id)) try: # all available projects for profile in Profile.objects.filter(telegram_chat_id=chat_id): for p in profile.projects(): if not p in projects: projects.append(p) except Exception as e: log.error('exc: {}'.format(e)) log.info("will list {} projects".format(len(projects))) if not projects: log.info("no projects!") bot.send_message(chat_id=chat_id, text="No projects", reply_markup=get_reply_markup(chat_id)) return for p in projects: rs = RemoteServer(ci=p.ci) data = rs.api_admin_qsum(p.get_textid()) log.info("show project {}".format(p.get_textid())) #msg = 'zzzzz' tpl = u''' Project *{}* ({}) Total {} (maintenance: {}, silent: {}, ERR: {}) ''' if data is None: log.error('api_admin_qsum for {} / {} returned None'.format( rs.name, p.get_textid())) bot.send_message( chat_id=chat_id, parse_mode=telegram.ParseMode.MARKDOWN, reply_markup=get_reply_markup(chat_id), text= 'Server {} for project {} unavailable at moment. Sorry. Try again later.' .format(rs.name, p.get_textid())) return msg = tpl.format(data['project'], data['textid'], data['cnt']['total'], data['cnt']['maintenance'], data['cnt']['silent'], data['cnt']['ERR']) for i in data['ERR'][:5]: try: link = rs.reverse('okerr:ilocator', { 'pid': data['textid'], 'iid': i['name'] }) msg += u'[{}]({}) = {} ({}) {} ago\n'.format( md_escape(i['name']), link, i['status'], md_escape(i['details']), i['age']) except Exception as e: print(e) if len(data['ERR']) > 5: msg += '(Only first 5 of {} shown)\n'.format(len(data['ERR'])) bot.send_message(chat_id=chat_id, parse_mode=telegram.ParseMode.MARKDOWN, reply_markup=get_reply_markup(chat_id), text=msg)
def process_tproc_reply(channel, data): name = data['_machine']['name'] remoteip = data['_machine']['ip'] now = timezone.now() project = Project.get_by_textid(data['textid']) if project is None: raise TProcNoProject(name=data['name'], textid=data['textid']) try: i = project.get_indicator(data['name']) except Indicator.DoesNotExist: raise TProcNoIndicator(name=data['name'], textid=data['textid']) if int(data['code']) == 200: if data['mtime'] == dt2unixtime(i.mtime): if i.expected: if data['status'] != i.status \ or now >= i.expected - datetime.timedelta(seconds=settings.MQ_PROCESS_TIME): log.info('GET {}@{} = {} ({}) {}'.format( data['name'], data['textid'], data['status'], data['details'], name)) backlog = int(time.time() - i.expected.timestamp()) if backlog > 30: log.debug("backlog: {}".format(dhms(backlog))) i.apply_tproc(data, name, location=None, throttled=data.get('_throttled')) log.debug("rescheduled: exp: {} sch: {}".format( shorttime(i.expected), shorttime(i.scheduled))) i.usave() else: send_kill( channel, data['_machine'], "Too early. now {} < exp: {}".format( now.strftime('%H:%M:%S'), i.expected.strftime('%H:%M:%S'))) else: send_kill( channel, data['_machine'], "Not expected update for {}@{}".format( data['name'], data['textid'])) else: send_kill( channel, data['_machine'], "Too old mtime {} ({})".format( data['mtime'], dt2unixtime(i.mtime) - data['mtime'])) elif int(data['code']) >= 500 and int(data['code']) < 600: log.info('apply_tproc_fail {} {} {}:"{}" {}@{} = {}'.format( name, remoteip, data['code'], data['code_message'], data['name'], data['textid'], data['status'])) i.last_fail_machine = name i.alert('Permanent error ({}): {}.'.format(data['code'], data['code_message'])) i.problem = True i.usave() log.error("Permanent error with {} (problem: {})".format(i, i.problem)) else: # code not 200 log.info('apply_tproc_fail {} {} {}:"{}" {}@{} = {}'.format( name, remoteip, data['code'], data['code_message'], data['name'], data['textid'], data['status'])) i.last_fail_machine = name i.log('Temprorary internal error ({}): {}. Do not worry.'.format( data['code'], data['code_message'])) i.scheduled = timezone.now() + datetime.timedelta( seconds=settings.MQ_RETRY_TIME) i.usave()