def parse(self, load_config_file_path):
     """Parse external config file"""
     load_config_file = load_module("external_load_config", load_config_file_path)
     for a in dir(load_config_file):
         if a == "parser_factory":
             value = getattr(load_config_file, a)
             self.config["parser_factory"] = value
         elif not a.startswith("__") and not isinstance(getattr(load_config_file, a), collections.Callable):
             value = getattr(load_config_file, a)
             if value:
                 if a == "navigable_objects":
                     self.config["load_filters"] = LoadFilters.set_load_filters(navigable_objects=value)
                 if a == "words_to_index":
                     word_list = set([])
                     with open(value) as fh:
                         for line in fh:
                             word_list.add(line.strip())
                     self.config["words_to_index"] = word_list
                 elif a == "plain_text_obj":
                     if "load_filters" not in self.config:
                         self.config["load_filters"] = LoadFilters.DefaultLoadFilters
                     self.config["load_filters"].append(LoadFilters.store_in_plain_text(*value))
                 elif a == "store_words_and_ids":
                     if "load_filters" not in self.config:
                         self.config["load_filters"] = LoadFilters.DefaultLoadFilters
                     self.config["load_filters"].append(LoadFilters.store_words_and_philo_ids)
                 elif a == "pos_tagger":
                     if "load_filters" not in self.config:
                         self.config["load_filters"] = LoadFilters.DefaultLoadFilters
                     self.config["load_filters"].append(LoadFilters.pos_tagger(value))
                 else:
                     self.config[a] = value
Exemple #2
0
    def __init__(self, **loader_options):
        self.parse_pool = None
        self.default_object_level = loader_options["default_object_level"]
        self.token_regex = loader_options["token_regex"]

        os.system(f"mkdir -p {self.destination}")
        os.mkdir(self.workdir)
        os.mkdir(self.textdir)

        load_config_path = os.path.join(loader_options["data_destination"],
                                        "load_config.py")
        # Loading these from a load_config would crash the parser for a number of reasons...
        values_to_ignore = [
            "load_filters",
            "post_filters",
            "parser_factory",
            "data_destination",
            "db_destination",
            "dbname",
        ]
        if loader_options["load_config"]:
            shutil.copy(loader_options["load_config"], load_config_path)
            config_obj = load_module("external_load_config",
                                     loader_options["load_config"])
            already_configured_values = {}
            for attribute in dir(config_obj):
                if not attribute.startswith("__") and not isinstance(
                        getattr(config_obj, attribute), collections.Callable):
                    already_configured_values[attribute] = getattr(
                        config_obj, attribute)
            with open(load_config_path, "a") as load_config_copy:
                print("\n\n## The values below were also used for loading ##",
                      file=load_config_copy)
                for option in loader_options:
                    if (option not in already_configured_values
                            and option not in values_to_ignore
                            and option != "web_config"):
                        print("%s = %s\n" %
                              (option, repr(loader_options[option])),
                              file=load_config_copy)
        else:
            with open(load_config_path, "w") as load_config_copy:
                print("#!/usr/bin/env python3", file=load_config_copy)
                print(
                    '"""This is a dump of the default configuration used to load this database,',
                    file=load_config_copy)
                print(
                    "including non-configurable options. You can use this file to reload",
                    file=load_config_copy)
                print(
                    'the current database using the -l flag. See load documentation for more details"""\n\n',
                    file=load_config_copy,
                )
                for option in loader_options:
                    if option not in values_to_ignore and option != "web_config":
                        print("%s = %s\n" %
                              (option, repr(loader_options[option])),
                              file=load_config_copy)

        if "web_config" in loader_options:
            web_config_path = os.path.join(loader_options["data_destination"],
                                           "web_config.cfg")
            print("\nSaving predefined web_config.cfg file to %s..." %
                  web_config_path)
            with open(web_config_path, "w") as w:
                w.write(loader_options["web_config"])
            self.predefined_web_config = True
        else:
            self.predefined_web_config = False

        self.theme = loader_options["theme"]

        self.filenames = []
        self.raw_files = []
        self.deleted_files = []
        self.metadata_fields = []
        self.metadata_hierarchy = []
        self.metadata_types = {}
        self.normalized_fields = []
        self.metadata_fields_not_found = []
        self.sort_order = ""
Exemple #3
0
#!/usr/bin/env python3
"""Bootstrap Web app"""


import os.path

from philologic.runtime import WebConfig
from philologic.runtime import WSGIHandler
from philologic.runtime import access_control
from philologic.utils import load_module

config = WebConfig(os.path.abspath(os.path.dirname(__file__)))
global_config = load_module("philologic4", config.global_config_location)
path = os.path.abspath(os.path.dirname(__file__))
dbname = path.strip().split("/")[-1]

config = WebConfig(os.path.abspath(os.path.dirname(__file__)))
config_location = os.path.join("app/assets/css/split/", os.path.basename(config.theme))
if os.path.realpath(os.path.abspath(config.theme)) == os.path.realpath(os.path.abspath(config_location)):
    theme = config_location
elif os.path.exists(config_location) and config.production:
    theme = config_location
else:
    os.system("cp %s %s" % (config.theme, config_location))
    theme = config_location


css_files = [
    "app/assets/css/bootstrap.min.css",
    "app/assets/css/split/style.css",
    "app/assets/css/split/searchForm.css",
Exemple #4
0
#!/usr/bin env python3
"""CLI parser for philoload4 command"""

import collections
import os
import sys
from glob import glob
from argparse import ArgumentParser

from philologic.loadtime import Loader, LoadFilters, Parser, PlainTextParser, PostFilters
from philologic.utils import pretty_print, load_module

# Load global config
CONFIG_PATH = os.getenv("PHILOLOGIC_CONFIG", "/etc/philologic/philologic4.cfg")
CONFIG_FILE = load_module("philologic4", CONFIG_PATH)

if CONFIG_FILE.url_root is None:
    print("url_root variable is not set in /etc/philologic/philologic4.cfg",
          file=sys.stderr)
    print(
        "See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.",
        file=sys.stderr)
    exit()
elif CONFIG_FILE.web_app_dir is None:
    print("web_app_dir variable is not set in /etc/philologic/philologic4.cfg",
          file=sys.stderr)
    print(
        "See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.",
        file=sys.stderr)
    exit()
elif CONFIG_FILE.database_root is None:
Exemple #5
0
    def __init__(self, **loader_options):
        self.omax = [1, 1, 1, 1, 1, 1, 1, 1, 1]
        self.parse_pool = None
        self.types = OBJECT_TYPES
        self.tables = DEFAULT_TABLES
        self.sort_by_word = SORT_BY_WORD
        self.sort_by_id = SORT_BY_ID
        self.debug = loader_options["debug"]
        self.default_object_level = loader_options["default_object_level"]
        self.post_filters = loader_options["post_filters"]
        self.words_to_index = loader_options["words_to_index"]
        self.token_regex = loader_options["token_regex"]

        self.parser_config = {}
        for option in PARSER_OPTIONS:
            try:
                self.parser_config[option] = loader_options[option]
            except KeyError:  # option hasn't been set
                pass

        self.setup_dir(loader_options["data_destination"])
        load_config_path = os.path.join(loader_options["data_destination"],
                                        "load_config.py")
        # Loading these from a load_config would crash the parser for a number of reasons...
        values_to_ignore = [
            "load_filters", "post_filters", "parser_factory",
            "data_destination", "db_destination", "dbname"
        ]
        if loader_options["load_config"]:
            shutil.copy(loader_options["load_config"], load_config_path)
            config_obj = load_module("external_load_config",
                                     loader_options["load_config"])
            already_configured_values = {}
            for attribute in dir(config_obj):
                if not attribute.startswith("__") and not isinstance(
                        getattr(config_obj, attribute), collections.Callable):
                    already_configured_values[attribute] = getattr(
                        config_obj, attribute)
            with open(load_config_path, "a") as load_config_copy:
                print("\n\n## The values below were also used for loading ##",
                      file=load_config_copy)
                for option in loader_options:
                    if option not in already_configured_values and option not in values_to_ignore and option != "web_config":
                        print("%s = %s\n" %
                              (option, repr(loader_options[option])),
                              file=load_config_copy)
        else:
            with open(load_config_path, "w") as load_config_copy:
                print("#!/usr/bin/env python3", file=load_config_copy)
                print(
                    '"""This is a dump of the default configuration used to load this database,',
                    file=load_config_copy)
                print(
                    "including non-configurable options. You can use this file to reload",
                    file=load_config_copy)
                print(
                    'the current database using the -l flag. See load documentation for more details"""\n\n',
                    file=load_config_copy)
                for option in loader_options:
                    if option not in values_to_ignore and option != "web_config":
                        print("%s = %s\n" %
                              (option, repr(loader_options[option])),
                              file=load_config_copy)

        if "web_config" in loader_options:
            web_config_path = os.path.join(loader_options["data_destination"],
                                           "web_config.cfg")
            print("\nSaving predefined web_config.cfg file to %s..." %
                  web_config_path)
            with open(web_config_path, "w") as w:
                w.write(loader_options["web_config"])
            self.predefined_web_config = True
        else:
            self.predefined_web_config = False

        self.theme = loader_options["theme"]

        self.metadata_fields = []
        self.metadata_hierarchy = []
        self.metadata_types = {}
        self.normalized_fields = []
        self.metadata_fields_not_found = []
def check_access(environ, config):
    db = DB(config.db_path + "/data/")
    incoming_address, match_domain = get_client_info(environ)

    if config.access_file:
        if os.path.isabs(config.access_file):
            access_file = config.access_file
        else:
            access_file = os.path.join(config.db_path, "data",
                                       config.access_file)
        if not os.path.isfile(access_file):
            print(
                f"ACCESS FILE DOES NOT EXIST. UNAUTHORIZED ACCESS TO: {incoming_address} from domain {match_domain}",
                file=sys.stderr,
            )
            return ()
    else:
        print("UNAUTHORIZED ACCESS TO: %s from domain %s" %
              (incoming_address, match_domain),
              file=sys.stderr)
        return ()

    # Load access config file. If loading fails, don't grant access.
    try:
        access_config = load_module("access_config", access_file)
    except Exception as e:
        print("ACCESS ERROR", repr(e), file=sys.stderr)
        print("UNAUTHORIZED ACCESS TO: %s from domain %s" %
              (incoming_address, match_domain),
              file=sys.stderr)
        return ()

    # Let's first check if the IP is local and grant access if it is.
    for ip_range in ip_ranges:
        if ip_range.search(incoming_address):
            return make_token(incoming_address, db)

    try:
        domain_list = set(access_config.domain_list)
    except:
        domain_list = []

    try:
        allowed_ips = set([])
        for ip in access_config.allowed_ips:
            split_numbers = ip.split(".")
            if len(split_numbers) == 4:
                if re.search(r"\d+-\d+", split_numbers[3]):
                    for last_num in range(
                            int(split_numbers[3].split("-")[0]),
                            int(split_numbers[3].split("-")[1]) + 1):
                        allowed_ips.add(".".join(split_numbers[:3]) + "." +
                                        str(last_num))
                elif re.search(r"\d+-\A", split_numbers[3]):
                    for last_num in range(int(split_numbers[3].split("-")[0]),
                                          255):
                        allowed_ips.add(".".join(split_numbers[:3]) + "." +
                                        str(last_num))
                else:
                    allowed_ips.add(ip)
            else:
                allowed_ips.add(ip)
    except Exception as e:
        print(repr(e), file=sys.stderr)
        allowed_ips = []
    try:
        blocked_ips = set(access_config.blocked_ips)
    except:
        blocked_ips = []

    if incoming_address not in blocked_ips:
        if match_domain in domain_list:
            return make_token(incoming_address, db)
        else:
            for domain in domain_list:
                if domain in match_domain:
                    return make_token(incoming_address, db)
        for ip_range in allowed_ips:
            if re.search(r"^%s.*" % ip_range, incoming_address):
                print("PASS", file=sys.stderr)
                return make_token(incoming_address, db)

    # If no token returned, we block access.
    print("UNAUTHORIZED ACCESS TO: %s from domain %s" %
          (incoming_address, match_domain),
          file=sys.stderr)
    return ()
#!/usr/bin env python3
"""CLI parser for philoload4 command"""

import collections
import os
import sys
from glob import glob
from argparse import ArgumentParser

from philologic.loadtime import Loader, LoadFilters, Parser, PlainTextParser, PostFilters
from philologic.utils import pretty_print, load_module

# Load global config
CONFIG_PATH = os.getenv("PHILOLOGIC_CONFIG", "/etc/philologic/philologic4.cfg")
CONFIG_FILE = load_module("philologic4", CONFIG_PATH)

if CONFIG_FILE.url_root is None:
    print("url_root variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr)
    print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr)
    exit()
elif CONFIG_FILE.web_app_dir is None:
    print("web_app_dir variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr)
    print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr)
    exit()
elif CONFIG_FILE.database_root is None:
    print("database_root variable is not set in /etc/philologic/philologic4.cfg", file=sys.stderr)
    print("See https://github.com/ARTFL-Project/PhiloLogic4/blob/master/docs/installation.md.", file=sys.stderr)
    exit()


class LoadOptions:
Exemple #8
0
#!/usr/bin/env python3
"""Bootstrap Web app"""

import os.path

from philologic.runtime import WebConfig
from philologic.runtime import WSGIHandler
from philologic.runtime import access_control
from philologic.utils import load_module

config = WebConfig(os.path.abspath(os.path.dirname(__file__)))
global_config = load_module("philologic4", config.global_config_location)
path = os.path.abspath(os.path.dirname(__file__))
dbname = path.strip().split("/")[-1]

config = WebConfig(os.path.abspath(os.path.dirname(__file__)))
config_location = os.path.join("app/assets/css/split/",
                               os.path.basename(config.theme))
if os.path.realpath(os.path.abspath(config.theme)) == os.path.realpath(
        os.path.abspath(config_location)):
    theme = config_location
elif os.path.exists(config_location) and config.production:
    theme = config_location
else:
    os.system("cp %s %s" % (config.theme, config_location))
    theme = config_location

css_files = [
    "app/assets/css/bootstrap.min.css",
    "app/assets/css/split/style.css",
    "app/assets/css/split/searchForm.css",
Exemple #9
0
    def __init__(self, **loader_options):
        self.omax = [1, 1, 1, 1, 1, 1, 1, 1, 1]
        self.parse_pool = None
        self.types = OBJECT_TYPES
        self.tables = DEFAULT_TABLES
        self.sort_by_word = SORT_BY_WORD
        self.sort_by_id = SORT_BY_ID
        self.debug = loader_options["debug"]
        self.default_object_level = loader_options["default_object_level"]
        self.post_filters = loader_options["post_filters"]
        self.words_to_index = loader_options["words_to_index"]
        self.token_regex = loader_options["token_regex"]

        self.parser_config = {}
        for option in PARSER_OPTIONS:
            try:
                self.parser_config[option] = loader_options[option]
            except KeyError:  # option hasn't been set
                pass

        self.setup_dir(loader_options["data_destination"])
        load_config_path = os.path.join(loader_options["data_destination"], "load_config.py")
        # Loading these from a load_config would crash the parser for a number of reasons...
        values_to_ignore = ["load_filters", "post_filters", "parser_factory", "data_destination", "db_destination", "dbname"]
        if loader_options["load_config"]:
            shutil.copy(loader_options["load_config"], load_config_path)
            config_obj = load_module("external_load_config", loader_options["load_config"])
            already_configured_values = {}
            for attribute in dir(config_obj):
                if not attribute.startswith("__") and not isinstance(getattr(config_obj, attribute), collections.Callable):
                    already_configured_values[attribute] = getattr(config_obj, attribute)
            with open(load_config_path, "a") as load_config_copy:
                print("\n\n## The values below were also used for loading ##", file=load_config_copy)
                for option in loader_options:
                    if option not in already_configured_values and option not in values_to_ignore and option != "web_config":
                        print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy)
        else:
            with open(load_config_path, "w") as load_config_copy:
                print("#!/usr/bin/env python3", file=load_config_copy)
                print('"""This is a dump of the default configuration used to load this database,', file=load_config_copy)
                print("including non-configurable options. You can use this file to reload", file=load_config_copy)
                print('the current database using the -l flag. See load documentation for more details"""\n\n', file=load_config_copy)
                for option in loader_options:
                    if option not in values_to_ignore and option != "web_config":
                        print("%s = %s\n" % (option, repr(loader_options[option])), file=load_config_copy)

        if "web_config" in loader_options:
            web_config_path = os.path.join(loader_options["data_destination"], "web_config.cfg")
            print("\nSaving predefined web_config.cfg file to %s..." % web_config_path)
            with open(web_config_path, "w") as w:
                w.write(loader_options["web_config"])
            self.predefined_web_config = True
        else:
            self.predefined_web_config = False

        self.theme = loader_options["theme"]

        self.filenames = []
        self.raw_files = []
        self.deleted_files = []
        self.metadata_fields = []
        self.metadata_hierarchy = []
        self.metadata_types = {}
        self.normalized_fields = []
        self.metadata_fields_not_found = []
        self.sort_order = ""