Ejemplo n.º 1
0
def fulltext(args):
    # we must initialize all node types to import fulltexts
    init.full_init()

    nid_mod_or_all = args.nid_mod_or_all.lower()

    remove_versioning()

    if nid_mod_or_all == "all":
        import_count = utils.search.import_fulltexts(args.overwrite)
        logg.info("loaded fulltexts for %s nodes", import_count)
    elif nid_mod_or_all.startswith("mod"):
        mod_n, mod_i = [int(x) for x in nid_mod_or_all.split(" ")[1:]]
        import_count = utils.search.import_fulltexts(args.overwrite, mod_n, mod_i)
        logg.info("loaded fulltexts for %s nodes with id mod %s == %s", import_count, mod_n, mod_i)
        
    else:
        nid = int(nid_mod_or_all)
        node = q(Node).get(nid)
        if node is None:
            logg.warn("node # %s not found!", nid)
            return
        imported = utils.search.import_node_fulltext(node, args.overwrite)
        if imported:
            logg.info("loaded fulltext for node # %s", nid)
        else:
            logg.info("nothing imported for node # %s", nid)
Ejemplo n.º 2
0
def run(host=None, http_port=None, redis_sessions=False, force_test_db=None, loglevel=None, automigrate=False):
    """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested"""
    # init.full_init() must be done as early as possible to init logging etc.
    from core import init
    init.full_init(force_test_db=force_test_db, root_loglevel=loglevel, automigrate=automigrate)

    # init all web components
    from core import webconfig
    from core import athana
    webconfig.initContexts()

    @athana.request_finished
    def request_finished_db_session(*args):
        from core import db
        db.session.close()

    # start main web server, Z.39.50 and FTP, if configured
    if config.get('z3950.activate', '').lower() == 'true':
        z3950port = int(config.get("z3950.port", "2021"))
    else:
        z3950port = None

    athana.setThreads(int(config.get("host.threads", "8")))
    if redis_sessions:
        print("WARNING: using experimental persistent redis session support, only for testing!!!")
        athana.USE_PERSISTENT_SESSIONS = True

    import datetime
    with open("/tmp/mediatum.started", "w") as wf:
        wf.write(datetime.datetime.now().isoformat())
        wf.write("\n")

    athana.run(host or config.get("host.host", "0.0.0.0"), int(http_port or config.get("host.port", "8081")), z3950port)
Ejemplo n.º 3
0
def main():
    """
    create or append a logfile with name yyyy-mm.log as an excerpt of mediatum.log
    of lines beginning with period and containing the string 'INFO' and containing one of the strings:
    'GET', 'POST' or 'HEAD are excerpted
    usage: find /home/congkhacdung/logrotated/ -type f -iname 'mediatum.*.log' | sort | xargs cat | python bin/stats.py --skip-ip 127.0.0.1 --skip-ip 129.187.87.37 2018 2
    """
    parser = argparse.ArgumentParser(description='Extract info needed for statistics.')
    parser.add_argument('--skip-ip', dest='skip_ip', action='append', default=[], help='ip to skip')
    parser.add_argument('year', type=int, help='year')
    parser.add_argument('month', type=int, help='month')
    args = parser.parse_args()
    period = "{:4}-{:0>2}".format(args.year, args.month)
    skip_ip = args.skip_ip

    outdir = os.path.join(config.get("logging.save", config.get("logging.path", "/tmp")))
    match = re.compile('^({period}.{{17}}).*(INFO).{{2}}(.*(?:GET|POST|HEAD).*)'.format(period=period)).match
    lines = sys.stdin
    lines = imap(match, lines)
    lines = ifilter(None, lines)
    lines = imap(operator.methodcaller('groups'), lines)

    skip_ip_pattern = map("([^0-9.]{}[^0-9.])".format, skip_ip)
    skip_ip_pattern = '|'.join(skip_ip_pattern)
    match = re.compile(skip_ip_pattern).match
    lines = ifilter(lambda g: not match(g[2]), lines)
    lines = imap(operator.concat, lines, repeat(("\n",)))
    lines = imap("".join,lines)

    with tempfile.NamedTemporaryFile(dir=outdir) as tmpfile:
        tmpfile.writelines(lines)
        tmpfile.flush()
        init.full_init()
        buildStatAll([], period, tmpfile.name)
Ejemplo n.º 4
0
def fulltext(args):
    # we must initialize all node types to import fulltexts
    init.full_init()

    nid_mod_or_all = args.nid_mod_or_all.lower()

    remove_versioning()

    if nid_mod_or_all == "all":
        import_count = utils.search.import_fulltexts(args.overwrite)
        logg.info("loaded fulltexts for %s nodes", import_count)
    elif nid_mod_or_all.startswith("mod"):
        mod_n, mod_i = [int(x) for x in nid_mod_or_all.split(" ")[1:]]
        import_count = utils.search.import_fulltexts(args.overwrite, mod_n,
                                                     mod_i)
        logg.info("loaded fulltexts for %s nodes with id mod %s == %s",
                  import_count, mod_n, mod_i)

    else:
        nid = int(nid_mod_or_all)
        node = q(Node).get(nid)
        if node is None:
            logg.warn("node # %s not found!", nid)
            return
        imported = utils.search.import_node_fulltext(node, args.overwrite)
        if imported:
            logg.info("loaded fulltext for node # %s", nid)
        else:
            logg.info("nothing imported for node # %s", nid)
Ejemplo n.º 5
0
 def init(self, line):
     args = parse_argstring(self.init, line)
     new_state = args.type
     if new_state == "basic":
         initmodule.basic_init()
         if not self.init_state:
             self.init_state = INIT_STATES[new_state]
     else:
         initmodule.full_init()
         self.init_state = INIT_STATES[new_state]
Ejemplo n.º 6
0
 def init(self, line):
     args = parse_argstring(self.init, line)
     new_state = args.type
     if new_state == "basic":
         initmodule.basic_init()
         if not self.init_state:
             self.init_state = INIT_STATES[new_state]
     else:
         initmodule.full_init()
         self.init_state = INIT_STATES[new_state]
Ejemplo n.º 7
0
 def init(self, line):
     args = parse_argstring(self.init, line)
     new_state = args.state
     if new_state == "basic":
         initmodule.basic_init(**INIT_ARGS)
     elif new_state == "full":
         # drop reassignment warnings because we want to reassign node classes when plugins are loaded later, for example
         warnings.filterwarnings("ignore", "Reassigning polymorphic.*")
         initmodule.full_init(**INIT_ARGS)
     else:
         print("current init state is: " + initmodule.get_current_init_state())
Ejemplo n.º 8
0
 def init(self, line):
     args = parse_argstring(self.init, line)
     new_state = args.state
     if new_state == "basic":
         initmodule.basic_init(**INIT_ARGS)
     elif new_state == "full":
         # drop reassignment warnings because we want to reassign node classes when plugins are loaded later, for example
         warnings.filterwarnings("ignore", "Reassigning polymorphic.*")
         initmodule.full_init(**INIT_ARGS)
     else:
         print("current init state is: " + initmodule.get_current_init_state())
Ejemplo n.º 9
0
def main():
    """
    create or append a logfile with name yyyy-mm.log as an excerpt of mediatum.log
    of lines beginning with period and containing the string 'INFO' and containing one of the strings:
    'GET', 'POST' or 'HEAD are excerpted
    usage: find /home/congkhacdung/logrotated/ -type f -iname 'mediatum.*.log' | sort | xargs cat | python bin/stats.py --skip-ip 127.0.0.1 --skip-ip 129.187.87.37 2018 2
    """
    parser = argparse.ArgumentParser(
        description='Extract info needed for statistics.')
    parser.add_argument('--skip-ip',
                        dest='skip_ip',
                        action='append',
                        default=[],
                        help='ip to skip')
    parser.add_argument('year', type=int, help='year')
    parser.add_argument('month', type=int, help='month')
    args = parser.parse_args()
    period = "{:4}-{:0>2}".format(args.year, args.month)
    skip_ip = args.skip_ip

    outdir = os.path.join(
        config.get("logging.save", config.get("logging.path", "/tmp")))
    match = re.compile(
        '^({period}.{{17}}).*(INFO).{{2}}(.*(?:GET|POST|HEAD).*)'.format(
            period=period)).match
    lines = sys.stdin
    lines = imap(match, lines)
    lines = ifilter(None, lines)
    lines = imap(operator.methodcaller('groups'), lines)

    skip_ip_pattern = map("([^0-9.]{}[^0-9.])".format, skip_ip)
    skip_ip_pattern = '|'.join(skip_ip_pattern)
    match = re.compile(skip_ip_pattern).match
    lines = ifilter(lambda g: not match(g[2]), lines)
    lines = imap(operator.concat, lines, repeat(("\n", )))
    lines = imap("".join, lines)

    with tempfile.NamedTemporaryFile(dir=outdir) as tmpfile:
        tmpfile.writelines(lines)
        tmpfile.flush()
        init.full_init()
        buildStatAll([], period, tmpfile.name)
Ejemplo n.º 10
0
def run(host=None, http_port=None, redis_sessions=False, force_test_db=None, loglevel=None, automigrate=False):
    """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested"""
    # init.full_init() must be done as early as possible to init logging etc.
    from core import init
    init.full_init(force_test_db=force_test_db, root_loglevel=loglevel, automigrate=automigrate)

    # init all web components
    from core import webconfig
    from core import athana
    webconfig.initContexts()

    @athana.request_finished
    def request_finished_db_session(*args):
        from core import db
        db.session.close()

    # start main web server, Z.39.50 and FTP, if configured
    if config.get('z3950.activate', '').lower() == 'true':
        z3950port = int(config.get("z3950.port", "2021"))
    else:
        z3950port = None

    athana.setThreads(int(config.get("host.threads", "8")))
    if redis_sessions:
        print("WARNING: using experimental persistent redis session support, only for testing!!!")
        athana.USE_PERSISTENT_SESSIONS = True

    #
    # if the pid path is given in mediatum.cfg, section paths the starting time will be printed in this file
    #

    if "paths.pidfile" in config.settings:
        with open(config.settings["paths.pidfile"], "w") as wf:
            wf.write(str(_os.getpid()))
            wf.write("\n")


    athana.run(host or config.get("host.host", "0.0.0.0"), int(http_port or config.get("host.port", "8081")), z3950port)
Ejemplo n.º 11
0
def run(host=None,
        http_port=None,
        force_test_db=None,
        loglevel=None,
        automigrate=False):
    """Serve mediaTUM from the Athana HTTP Server and start FTP and Z3950, if requested"""
    # init.full_init() must be done as early as possible to init logging etc.
    from core import init
    init.full_init(force_test_db=force_test_db,
                   root_loglevel=loglevel,
                   automigrate=automigrate)

    # init all web components
    from core import webconfig
    from core import athana
    from core.request_handler import request_finished as _request_finished
    webconfig.initContexts()

    @_request_finished
    def request_finished_db_session(*args):
        from core import db
        db.session.close()

    athana.setThreads(int(config.get("host.threads", "8")))

    #
    # if the pid path is given in mediatum.cfg, section paths the starting time will be printed in this file
    #

    if "paths.pidfile" in config.settings:
        with open(config.settings["paths.pidfile"], "w") as wf:
            wf.write(str(_os.getpid()))
            wf.write("\n")

    athana.run(host or config.get("host.host", "0.0.0.0"),
               int(http_port or config.get("host.port", "8081")))
Ejemplo n.º 12
0
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import sys

sys.path += ["../", "."]

from core.init import full_init
full_init()

import core.tree as tree
import core.acl as acl
import re
import string
import core.xmlnode as xmlnode
import schema.schema as metadatatypes

rootaccess = acl.getRootAccess()

path = []
node = tree.getRoot()
lastnodes = []

Ejemplo n.º 13
0
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import sys

from core import config, webconfig, init
from core import athana

init.full_init()

### init all web components
webconfig.initContexts()

### scheduler thread
import core.schedules
try:
    core.schedules.startThread()
except:
    msg = "Error starting scheduler thread: %s %s" % (str(sys.exc_info()[0]), str(sys.exc_info()[1]))
    core.schedules.OUT(msg, logger='backend', print_stdout=True, level='error')

### full text search thread
if config.get("config.searcher", "").startswith("fts"):
    import core.search.ftsquery
Ejemplo n.º 14
0
import os
import sys
sys.path += ['../..', '../', '.']

import core.config as config
import logging
import datetime
import time
import urllib2
import re
from lxml import etree
from math import ceil

from core.init import full_init
full_init(prefer_config_filename="sitemap.log")

from core import Node
from core import db
from contenttypes import Collections

q = db.query

# XXX: alias handling must be fixed before switching this on
USE_ALIASES = False
PING_GOOGLE = True
PING_URL_ENCODED = 'http://www.google.com/webmasters/tools/ping?sitemap=http%3A%2F%2Fmediatum.ub.tum.de%2Fsitemap-index.xml'


class Sitemap:
Ejemplo n.º 15
0
        if line[0:period_len] > period:
            break
        pos = line.find("INFO")
        if pos < 0:
            continue
        if line[pos:].find('"GET') < 0 and line[pos:].find('"POST') < 0 and line[pos:].find('"HEAD') < 0:
            continue
        if line[pos:].find('127.0.0.1:') > 0 or line[pos:].find('129.187.87.37:') > 0:
            continue
        fout.write(line[0:24] + line[pos:pos+4] + line[pos+6:])

    fin.close()
    fout.close()


init.full_init()
args = sys.argv
period = time.strftime("%Y-%m")
fname = None
create_new_logfile = True

if len(args) >= 2 and args[1] == "-nolog":
    create_new_logfile = False
    args = args[1:]

if len(args) == 2:  # period given
    period = args[1]

if len(args) == 3:  # period and filename given
    period = args[1]
    fname = args[2]
Ejemplo n.º 16
0
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
__author__ = "Andrew Darrohn [email protected]"

import os
import sys

sys.path += ["../..", "../", "."]

from core.init import full_init

full_init()

import core.users as users
import core.acl as acl
import core.tree as tree
import core.config as config
import logging
import datetime
import time
import urllib2
import re
from lxml import etree
from math import ceil

USE_ALIASES = False
PING_GOOGLE = True
Ejemplo n.º 17
0
import os
import sys
sys.path += ['../..', '../', '.']

import core.config as config
import logging
import datetime
import time
import urllib2
import re
from lxml import etree
from math import ceil

from core.init import full_init
full_init(prefer_config_filename="sitemap.log")

from core import Node
from core import db
from contenttypes import Collections

q = db.query

# XXX: alias handling must be fixed before switching this on
USE_ALIASES = False
PING_GOOGLE = True
PING_URL_ENCODED = 'http://www.google.com/webmasters/tools/ping?sitemap=http%3A%2F%2Fmediatum.ub.tum.de%2Fsitemap-index.xml'


class Sitemap:
    """