def writeAdminRunnerOpMsg(self, msg): msg = "@ %s: %s\n" % ( time.strftime("%Y/%m/%d %H:%M:%S", E.getLocaltime(time.time())), msg) self.oplog.write(msg) self.oplog.flush() # in case of log(s) smaller than buffer size
def HealthzHandler(self): ''' When the adminrunner runs on Python 2.1 and 2.2, E.getLocaltime has failed after the adminrunner has been running for several hours. This handler tries to run E.getLocaltime so loop_AdminRunner.py can restart the adminrunner if E.getLocaltime fails. ''' success = True try: E.getLocaltime(time.time()) except: success = False if success: return self.HEALTHZ_OK else: return ''
def __init__(self, enthome, box_keys_dir="", license_keys_dir="", install_state='ACTIVE', startup_mode=0): self.entHome = enthome self.install_state = install_state self.globalLock = threading.Lock() # Init the global params self.mach_param_cache = machine_param_cache.MachineParamCache() self.globalParams = entconfig.EntConfig(self.entHome, 1) # Allow writes if not self.globalParams.Load(): logging.error( "Cannot load the global parameters - think about reinstall") if install_state != 'INSTALL': if not self.globalParams.ValidateAll(): logging.error("Global parameters were not validated correctly") # The operator log file oplogname = "%s/AdminRunner.OPERATOR.%s" % ( self.globalParams.var("LOGDIR"), time.strftime("%Y_%m_%d_%H_%M_%S", E.getLocaltime(time.time()))) self.oplog = open(oplogname, "w") self.lm = license_manager.LicenseManager(self, box_keys_dir, license_keys_dir) self.um = user_manager.UserManager(self) self.logmanager = log_manager.LogManager(self) self.crawlqueuemanager = crawlqueue_manager.CrawlQueueManager(self) self.startup_mode = startup_mode # statup in startup mode self.startup_time = int(time.time()) self.graphs = { 'SUM_URLS_TOTAL': self.graph_data(file_name="sumurlstotalgraph.png", vars=[ "num_urls_in_index_total[24h]", "num_urls_available_total[24h]" ], last_modified=-1), 'SUM_URLS_CRAWLED': self.graph_data(file_name="sumurlscrawledgraph.png", vars=["num_urls_in_index_total[24h]"], last_modified=-1), 'SUM_URLS_AVAILABLE': self.graph_data(file_name="sumurlsavailablegraph.png", vars=["num_urls_available_total[24h]"], last_modified=-1), 'QUERIES_PER_MINUTE': self.graph_data(file_name="queriesperminutegraph.png", vars=["gws_searches_per_minute[24h]"], last_modified=-1), 'SUM_URLS_TOTAL_THUMBNAIL': self.graph_data(file_name="sumurlstotalthumb.png", vars=[ "num_urls_in_index_total[24h]", "num_urls_available_total[24h]" ], last_modified=-1), 'QUERIES_PER_MINUTE_THUMBNAIL': self.graph_data(file_name="queriesperminutethumb.png", vars=["gws_searches_per_minute[24h]"], last_modified=-1), 'TMP_GRAPH': self.graph_data(file_name="tmp_graph.png", vars=[], last_modified=-1), 'ERROR_TEXT': self.graph_data(file_name="errortext.png", vars=[], last_modified=-1), } self.graphs_lock_ = threading.Lock()
def writeAdminRunnerOpMsg(self, msg): msg = "@ %s: %s\n" % (time.strftime("%Y/%m/%d %H:%M:%S", E.getLocaltime(time.time())), msg) self.oplog.write(msg) self.oplog.flush() # in case of log(s) smaller than buffer size
def send(cfg, to, problem, subject, msgText, logSubject): # No mails in install mode if cfg.getInstallState() == "INSTALL": return smtpHost = cfg.getGlobalParam("SMTP_SERVER") fromEmail = cfg.getGlobalParam("OUTGOING_EMAIL_SENDER") allSubject = None if not to: if problem: to = cfg.getGlobalParam("PROBLEM_EMAIL") else: to = cfg.getGlobalParam("NOTIFICATION_EMAIL") # if to is empty, we still go through the whole process, but bail out right # before actually sending the email. we want all the side effects to still # happen (logging, testing hooks, etc..) # if "*EMAIL" is None, commands.mkarg() will throw a TypeError exception if not to: to = "" else: to = string.strip(to) if problem: allSubject = "%s: %s" % ( cfg.getGlobalParam("ENT_LICENSE_INFORMATION").get("ENT_BOX_ID", ""), cfg.getGlobalParam("EMAIL_PROBLEM_PREFIX") ) else: allSubject = "%s: %s" % ( cfg.getGlobalParam("ENT_LICENSE_INFORMATION").get("ENT_BOX_ID", ""), cfg.getGlobalParam("EMAIL_NOTIFICATION_PREFIX") ) allSubject = allSubject + subject # Log the subject if logSubject: logging.info('Sending mail: To: %s; Subject: %s' % (repr(to), repr(subject))) cfg.writeAdminRunnerOpMsg(subject) dateString = time.strftime("%Y/%m/%d %H:%M:%S", E.getLocaltime(time.time())) if msgText: msgText = msgText + "\n" else: msgText = "" ipAddr = cfg.getGlobalParam("EXTERNAL_WEB_IP") body = "%s [%s @%s from %s]" % (msgText, M.MSG_MAIL_AUTOGENERATED, dateString, ipAddr) # if we have a testing hook, call that if TEST_MAIL_HANDLER_HOOK != None: logging.info("calling TEST_MAIL_HANDLER_HOOK instead of mailnotify") to_list = filter(None, map(string.strip, string.split(to, ","))) TEST_MAIL_HANDLER_HOOK(smtpHost, to_list, fromEmail, allSubject, body) return # if to is empty, bail out if not (to and smtpHost): return # We execute this and not call the function directly because mailnotify # uses signal and we might call this from a secondary thread. E.execute([E.LOCALHOST], ". %s; cd %s/local/google3/enterprise/legacy/util; "\ "./mailnotify.py %s %s %s %s %s" % ( cfg.getGlobalParam('ENTERPRISE_BASHRC'), E.getEnterpriseHome(), commands.mkarg(smtpHost), commands.mkarg(allSubject), commands.mkarg(fromEmail), commands.mkarg(to), commands.mkarg(body)), None, 0)
def send(cfg, to, problem, subject, msgText, logSubject): # No mails in install mode if cfg.getInstallState() == "INSTALL": return smtpHost = cfg.getGlobalParam("SMTP_SERVER") fromEmail = cfg.getGlobalParam("OUTGOING_EMAIL_SENDER") allSubject = None if not to: if problem: to = cfg.getGlobalParam("PROBLEM_EMAIL") else: to = cfg.getGlobalParam("NOTIFICATION_EMAIL") # if to is empty, we still go through the whole process, but bail out right # before actually sending the email. we want all the side effects to still # happen (logging, testing hooks, etc..) # if "*EMAIL" is None, commands.mkarg() will throw a TypeError exception if not to: to = "" else: to = string.strip(to) if problem: allSubject = "%s: %s" % ( cfg.getGlobalParam("ENT_LICENSE_INFORMATION").get( "ENT_BOX_ID", ""), cfg.getGlobalParam("EMAIL_PROBLEM_PREFIX")) else: allSubject = "%s: %s" % ( cfg.getGlobalParam("ENT_LICENSE_INFORMATION").get( "ENT_BOX_ID", ""), cfg.getGlobalParam("EMAIL_NOTIFICATION_PREFIX")) allSubject = allSubject + subject # Log the subject if logSubject: logging.info('Sending mail: To: %s; Subject: %s' % (repr(to), repr(subject))) cfg.writeAdminRunnerOpMsg(subject) dateString = time.strftime("%Y/%m/%d %H:%M:%S", E.getLocaltime(time.time())) if msgText: msgText = msgText + "\n" else: msgText = "" ipAddr = cfg.getGlobalParam("EXTERNAL_WEB_IP") body = "%s [%s @%s from %s]" % (msgText, M.MSG_MAIL_AUTOGENERATED, dateString, ipAddr) # if we have a testing hook, call that if TEST_MAIL_HANDLER_HOOK != None: logging.info("calling TEST_MAIL_HANDLER_HOOK instead of mailnotify") to_list = filter(None, map(string.strip, string.split(to, ","))) TEST_MAIL_HANDLER_HOOK(smtpHost, to_list, fromEmail, allSubject, body) return # if to is empty, bail out if not (to and smtpHost): return # We execute this and not call the function directly because mailnotify # uses signal and we might call this from a secondary thread. E.execute([E.LOCALHOST], ". %s; cd %s/local/google3/enterprise/legacy/util; "\ "./mailnotify.py %s %s %s %s %s" % ( cfg.getGlobalParam('ENTERPRISE_BASHRC'), E.getEnterpriseHome(), commands.mkarg(smtpHost), commands.mkarg(allSubject), commands.mkarg(fromEmail), commands.mkarg(to), commands.mkarg(body)), None, 0)
def sync(self, entry): '''Run TableCrawler to sync database source.''' if DatabaseHandler.crawling_sources.has_key(entry): return "1" config = self.cfg.globalParams cl = cli.CommandLine() cl.Add(servertype_prod.UlimitPrefix(config)) # The table crawler and table server use similar commandline parameters, # so to reduce code, we borrow the tableserver commandline parameters # and change them, instead of reconstructing them again. binary_name = servertype.GetBinaryName('enttableserver') googlebin=os.path.join(config.var('MAINDIR'), 'bin') # change PWD to bin directory cl.Add('cd %s && ' % googlebin) libdir=os.path.join(config.var('MAINDIR'), 'bin', '%s_libs' % binary_name) jdbcjars='' for jar in servertype_prod.THIRD_PARTY_JDBC_JARS: realpath=(jar % os.environ) jdbcjars=('%s:%s' % (jdbcjars, realpath)) classpath = ('%s/bin/TableCrawler.jar:' '%s/third_party/java/saxon/saxon.jar') % ( config.var('MAINDIR'), config.var('MAINDIR')) cl.Add(servertype_prod.JavaServerExecutablePrefix(config, 'enttableserver', ('-classpath %s ' '-Djava.security.manager ' '-Djava.security.policy==%s/bin/java.policy ' '-Djavax.xml.transform.TransformerFactory=' 'com.icl.saxon.TransformerFactoryImpl ' '-Xbootclasspath/a:%s -Djava.library.path=%s ' '-Dswigdeps=%s/TableCrawler_swigdeps.so') % (classpath, config.var('MAINDIR'), jdbcjars, libdir, libdir), no_loop=1, run_as_class=1, java_max_heap_mb=900)) cl.Add('--dbinfo=%s' % config.var('DATABASES')) if config.var('DATABASE_STYLESHEET_DIR'): cl.Add('--stylesheet_dir=%s' % config.var('DATABASE_STYLESHEET_DIR')) cl.Add('--tablename=%s' % entry) # bug 67413, use public doctype instead of system doctype cl.Add('--doctype_public="-//Google//DTD GSA Feeds//EN"') cl.Add('--doctype_system=gsafeed.dtd') cl.Add(servertype.mkarg("--bnsresolver_use_svelte=false")) if config.var('GFS_ALIASES'): cl.Add(servertype.mkarg("--gfs_aliases=%s" % config.var('GFS_ALIASES'))) # bug 63082 timestamp the feed file name to separate feeds on same source date_string = time.strftime('%Y%m%d_%H%M%S', E.getLocaltime(time.time())) feedfile = os.path.join(config.var('FEEDS_DIR'), '%s_%s.xml' % (entry, date_string)) logdir = self.cfg.getGlobalParam('DATABASE_LOGS_DIR') if not os.path.exists(logdir): os.mkdir(logdir) logfile = '%s.log' % os.path.join(logdir, entry) lastfile = '%s.last' % os.path.join(logdir, entry) if os.path.exists(lastfile): # possible incremental crawl filemtime = time.localtime(os.path.getmtime(lastfile)) cl.Add('--lastvisit="%s"' % time.strftime('%Y/%m/%d %H:%M:%S %Z', filemtime)) cl.Add('--lastvisitformat="yyyy/MM/dd HH:mm:ss z"') cl.Add('--feedfile=%s' % feedfile) # feed using the static, externally visible port srv_mngr = config.GetServerManager() set = srv_mngr.Set('entfrontend') feedergateservers = set.BackendHostPorts('feedergate') cl.Add("--feedergate_servers=%s" % serverflags.MakeHostPortsArg(feedergateservers)) # serve database tuples thru frontend cl.Add('--serveprefix=googledb://') tablecrawler_command = cl.ToString().replace('TableServer', 'TableCrawler') tablecrawler_command = tablecrawler_command.replace(' TableCrawler ', ' com.google.enterprise.database.TableCrawler ') tablecrawler_command = ('%s >& %s && touch %s' % (tablecrawler_command, logfile, lastfile)) logging.info('TableCrawler commandline=%s' % tablecrawler_command) syncthread = TableCrawlerThread(group=DatabaseHandler.crawling_sources, source=entry, log=logfile, last=lastfile, cmd=tablecrawler_command, db_handler=self) syncthread.start() return "0"
def __init__(self, enthome, box_keys_dir = "", license_keys_dir = "", install_state = 'ACTIVE', startup_mode = 0): self.entHome = enthome self.install_state = install_state self.globalLock = threading.Lock() # Init the global params self.mach_param_cache = machine_param_cache.MachineParamCache() self.globalParams = entconfig.EntConfig(self.entHome, 1) # Allow writes if not self.globalParams.Load(): logging.error( "Cannot load the global parameters - think about reinstall") if install_state != 'INSTALL': if not self.globalParams.ValidateAll(): logging.error("Global parameters were not validated correctly") # The operator log file oplogname = "%s/AdminRunner.OPERATOR.%s" % ( self.globalParams.var("LOGDIR"), time.strftime("%Y_%m_%d_%H_%M_%S", E.getLocaltime(time.time()))) self.oplog = open(oplogname, "w") self.lm = license_manager.LicenseManager(self, box_keys_dir, license_keys_dir) self.um = user_manager.UserManager(self) self.logmanager = log_manager.LogManager(self) self.crawlqueuemanager = crawlqueue_manager.CrawlQueueManager(self) self.startup_mode = startup_mode # statup in startup mode self.startup_time = int(time.time()) self.graphs = { 'SUM_URLS_TOTAL': self.graph_data( file_name = "sumurlstotalgraph.png", vars = ["num_urls_in_index_total[24h]", "num_urls_available_total[24h]"], last_modified = -1), 'SUM_URLS_CRAWLED': self.graph_data( file_name = "sumurlscrawledgraph.png", vars = ["num_urls_in_index_total[24h]"], last_modified = -1), 'SUM_URLS_AVAILABLE': self.graph_data( file_name = "sumurlsavailablegraph.png", vars = ["num_urls_available_total[24h]"], last_modified = -1), 'QUERIES_PER_MINUTE': self.graph_data( file_name = "queriesperminutegraph.png", vars = ["gws_searches_per_minute[24h]"], last_modified = -1), 'SUM_URLS_TOTAL_THUMBNAIL': self.graph_data( file_name = "sumurlstotalthumb.png", vars = ["num_urls_in_index_total[24h]", "num_urls_available_total[24h]"], last_modified = -1), 'QUERIES_PER_MINUTE_THUMBNAIL': self.graph_data( file_name = "queriesperminutethumb.png", vars = ["gws_searches_per_minute[24h]"], last_modified = -1), 'TMP_GRAPH' : self.graph_data( file_name = "tmp_graph.png", vars = [], last_modified = -1), 'ERROR_TEXT' : self.graph_data( file_name = "errortext.png", vars = [], last_modified = -1), } self.graphs_lock_ = threading.Lock();