def fulltext(args): # we must initialize all node types to import fulltexts init.full_init() nid_mod_or_all = args.nid_mod_or_all.lower() remove_versioning() if nid_mod_or_all == "all": import_count = utils.search.import_fulltexts(args.overwrite) logg.info("loaded fulltexts for %s nodes", import_count) elif nid_mod_or_all.startswith("mod"): mod_n, mod_i = [int(x) for x in nid_mod_or_all.split(" ")[1:]] import_count = utils.search.import_fulltexts(args.overwrite, mod_n, mod_i) logg.info("loaded fulltexts for %s nodes with id mod %s == %s", import_count, mod_n, mod_i) else: nid = int(nid_mod_or_all) node = q(Node).get(nid) if node is None: logg.warn("node # %s not found!", nid) return imported = utils.search.import_node_fulltext(node, args.overwrite) if imported: logg.info("loaded fulltext for node # %s", nid) else: logg.info("nothing imported for node # %s", nid)
def run(host=None, http_port=None, redis_sessions=False, force_test_db=None, loglevel=None, automigrate=False): """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested""" # init.full_init() must be done as early as possible to init logging etc. from core import init init.full_init(force_test_db=force_test_db, root_loglevel=loglevel, automigrate=automigrate) # init all web components from core import webconfig from core import athana webconfig.initContexts() @athana.request_finished def request_finished_db_session(*args): from core import db db.session.close() # start main web server, Z.39.50 and FTP, if configured if config.get('z3950.activate', '').lower() == 'true': z3950port = int(config.get("z3950.port", "2021")) else: z3950port = None athana.setThreads(int(config.get("host.threads", "8"))) if redis_sessions: print("WARNING: using experimental persistent redis session support, only for testing!!!") athana.USE_PERSISTENT_SESSIONS = True import datetime with open("/tmp/mediatum.started", "w") as wf: wf.write(datetime.datetime.now().isoformat()) wf.write("\n") athana.run(host or config.get("host.host", "0.0.0.0"), int(http_port or config.get("host.port", "8081")), z3950port)
def main(): """ create or append a logfile with name yyyy-mm.log as an excerpt of mediatum.log of lines beginning with period and containing the string 'INFO' and containing one of the strings: 'GET', 'POST' or 'HEAD are excerpted usage: find /home/congkhacdung/logrotated/ -type f -iname 'mediatum.*.log' | sort | xargs cat | python bin/stats.py --skip-ip 127.0.0.1 --skip-ip 129.187.87.37 2018 2 """ parser = argparse.ArgumentParser(description='Extract info needed for statistics.') parser.add_argument('--skip-ip', dest='skip_ip', action='append', default=[], help='ip to skip') parser.add_argument('year', type=int, help='year') parser.add_argument('month', type=int, help='month') args = parser.parse_args() period = "{:4}-{:0>2}".format(args.year, args.month) skip_ip = args.skip_ip outdir = os.path.join(config.get("logging.save", config.get("logging.path", "/tmp"))) match = re.compile('^({period}.{{17}}).*(INFO).{{2}}(.*(?:GET|POST|HEAD).*)'.format(period=period)).match lines = sys.stdin lines = imap(match, lines) lines = ifilter(None, lines) lines = imap(operator.methodcaller('groups'), lines) skip_ip_pattern = map("([^0-9.]{}[^0-9.])".format, skip_ip) skip_ip_pattern = '|'.join(skip_ip_pattern) match = re.compile(skip_ip_pattern).match lines = ifilter(lambda g: not match(g[2]), lines) lines = imap(operator.concat, lines, repeat(("\n",))) lines = imap("".join,lines) with tempfile.NamedTemporaryFile(dir=outdir) as tmpfile: tmpfile.writelines(lines) tmpfile.flush() init.full_init() buildStatAll([], period, tmpfile.name)
def init(self, line): args = parse_argstring(self.init, line) new_state = args.type if new_state == "basic": initmodule.basic_init() if not self.init_state: self.init_state = INIT_STATES[new_state] else: initmodule.full_init() self.init_state = INIT_STATES[new_state]
def init(self, line): args = parse_argstring(self.init, line) new_state = args.state if new_state == "basic": initmodule.basic_init(**INIT_ARGS) elif new_state == "full": # drop reassignment warnings because we want to reassign node classes when plugins are loaded later, for example warnings.filterwarnings("ignore", "Reassigning polymorphic.*") initmodule.full_init(**INIT_ARGS) else: print("current init state is: " + initmodule.get_current_init_state())
def main(): """ create or append a logfile with name yyyy-mm.log as an excerpt of mediatum.log of lines beginning with period and containing the string 'INFO' and containing one of the strings: 'GET', 'POST' or 'HEAD are excerpted usage: find /home/congkhacdung/logrotated/ -type f -iname 'mediatum.*.log' | sort | xargs cat | python bin/stats.py --skip-ip 127.0.0.1 --skip-ip 129.187.87.37 2018 2 """ parser = argparse.ArgumentParser( description='Extract info needed for statistics.') parser.add_argument('--skip-ip', dest='skip_ip', action='append', default=[], help='ip to skip') parser.add_argument('year', type=int, help='year') parser.add_argument('month', type=int, help='month') args = parser.parse_args() period = "{:4}-{:0>2}".format(args.year, args.month) skip_ip = args.skip_ip outdir = os.path.join( config.get("logging.save", config.get("logging.path", "/tmp"))) match = re.compile( '^({period}.{{17}}).*(INFO).{{2}}(.*(?:GET|POST|HEAD).*)'.format( period=period)).match lines = sys.stdin lines = imap(match, lines) lines = ifilter(None, lines) lines = imap(operator.methodcaller('groups'), lines) skip_ip_pattern = map("([^0-9.]{}[^0-9.])".format, skip_ip) skip_ip_pattern = '|'.join(skip_ip_pattern) match = re.compile(skip_ip_pattern).match lines = ifilter(lambda g: not match(g[2]), lines) lines = imap(operator.concat, lines, repeat(("\n", ))) lines = imap("".join, lines) with tempfile.NamedTemporaryFile(dir=outdir) as tmpfile: tmpfile.writelines(lines) tmpfile.flush() init.full_init() buildStatAll([], period, tmpfile.name)
def run(host=None, http_port=None, redis_sessions=False, force_test_db=None, loglevel=None, automigrate=False): """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested""" # init.full_init() must be done as early as possible to init logging etc. from core import init init.full_init(force_test_db=force_test_db, root_loglevel=loglevel, automigrate=automigrate) # init all web components from core import webconfig from core import athana webconfig.initContexts() @athana.request_finished def request_finished_db_session(*args): from core import db db.session.close() # start main web server, Z.39.50 and FTP, if configured if config.get('z3950.activate', '').lower() == 'true': z3950port = int(config.get("z3950.port", "2021")) else: z3950port = None athana.setThreads(int(config.get("host.threads", "8"))) if redis_sessions: print("WARNING: using experimental persistent redis session support, only for testing!!!") athana.USE_PERSISTENT_SESSIONS = True # # if the pid path is given in mediatum.cfg, section paths the starting time will be printed in this file # if "paths.pidfile" in config.settings: with open(config.settings["paths.pidfile"], "w") as wf: wf.write(str(_os.getpid())) wf.write("\n") athana.run(host or config.get("host.host", "0.0.0.0"), int(http_port or config.get("host.port", "8081")), z3950port)
def run(host=None, http_port=None, force_test_db=None, loglevel=None, automigrate=False): """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested""" # init.full_init() must be done as early as possible to init logging etc. from core import init init.full_init(force_test_db=force_test_db, root_loglevel=loglevel, automigrate=automigrate) # init all web components from core import webconfig from core import athana from core.request_handler import request_finished as _request_finished webconfig.initContexts() @_request_finished def request_finished_db_session(*args): from core import db db.session.close() athana.setThreads(int(config.get("host.threads", "8"))) # # if the pid path is given in mediatum.cfg, section paths the starting time will be printed in this file # if "paths.pidfile" in config.settings: with open(config.settings["paths.pidfile"], "w") as wf: wf.write(str(_os.getpid())) wf.write("\n") athana.run(host or config.get("host.host", "0.0.0.0"), int(http_port or config.get("host.port", "8081")))
(at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import sys sys.path += ["../", "."] from core.init import full_init full_init() import core.tree as tree import core.acl as acl import re import string import core.xmlnode as xmlnode import schema.schema as metadatatypes rootaccess = acl.getRootAccess() path = [] node = tree.getRoot() lastnodes = []
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import sys from core import config, webconfig, init from core import athana init.full_init() ### init all web components webconfig.initContexts() ### scheduler thread import core.schedules try: core.schedules.startThread() except: msg = "Error starting scheduler thread: %s %s" % (str(sys.exc_info()[0]), str(sys.exc_info()[1])) core.schedules.OUT(msg, logger='backend', print_stdout=True, level='error') ### full text search thread if config.get("config.searcher", "").startswith("fts"): import core.search.ftsquery
import os import sys sys.path += ['../..', '../', '.'] import core.config as config import logging import datetime import time import urllib2 import re from lxml import etree from math import ceil from core.init import full_init full_init(prefer_config_filename="sitemap.log") from core import Node from core import db from contenttypes import Collections q = db.query # XXX: alias handling must be fixed before switching this on USE_ALIASES = False PING_GOOGLE = True PING_URL_ENCODED = 'http://www.google.com/webmasters/tools/ping?sitemap=http%3A%2F%2Fmediatum.ub.tum.de%2Fsitemap-index.xml' class Sitemap:
if line[0:period_len] > period: break pos = line.find("INFO") if pos < 0: continue if line[pos:].find('"GET') < 0 and line[pos:].find('"POST') < 0 and line[pos:].find('"HEAD') < 0: continue if line[pos:].find('127.0.0.1:') > 0 or line[pos:].find('129.187.87.37:') > 0: continue fout.write(line[0:24] + line[pos:pos+4] + line[pos+6:]) fin.close() fout.close() init.full_init() args = sys.argv period = time.strftime("%Y-%m") fname = None create_new_logfile = True if len(args) >= 2 and args[1] == "-nolog": create_new_logfile = False args = args[1:] if len(args) == 2: # period given period = args[1] if len(args) == 3: # period and filename given period = args[1] fname = args[2]
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ __author__ = "Andrew Darrohn [email protected]" import os import sys sys.path += ["../..", "../", "."] from core.init import full_init full_init() import core.users as users import core.acl as acl import core.tree as tree import core.config as config import logging import datetime import time import urllib2 import re from lxml import etree from math import ceil USE_ALIASES = False PING_GOOGLE = True
import os import sys sys.path += ['../..', '../', '.'] import core.config as config import logging import datetime import time import urllib2 import re from lxml import etree from math import ceil from core.init import full_init full_init(prefer_config_filename="sitemap.log") from core import Node from core import db from contenttypes import Collections q = db.query # XXX: alias handling must be fixed before switching this on USE_ALIASES = False PING_GOOGLE = True PING_URL_ENCODED = 'http://www.google.com/webmasters/tools/ping?sitemap=http%3A%2F%2Fmediatum.ub.tum.de%2Fsitemap-index.xml' class Sitemap: """