Ejemplo n.º 1
0
 def setUpClass(cls):
     # ORM environment and database setup
     iepy.setup()
     cls.dj_runner = DiscoverRunner()
     cls.dj_runner.setup_test_environment()
     cls.old_config = cls.dj_runner.setup_databases()
     # Creating Manager instance (if requested)
     if hasattr(cls, 'ManagerClass'):
         cls.manager = cls.ManagerClass()
Ejemplo n.º 2
0
    def upgrade(self):
        if not os.path.exists(self.folder_path):
            print("Error: instance folder does not exist")
            sys.exit(1)

        try:
            actual_path = iepy.setup(self.folder_path, _safe_mode=True)
        except ValueError as err:
            print(err)
            sys.exit(1)
        finally:
            self.folder_path = actual_path
            self.abs_folder_path = os.path.abspath(self.folder_path)

        from django.conf import settings
        self.old_version = settings.IEPY_VERSION
        if settings.IEPY_VERSION == iepy.__version__:
            print("Iepy instance '{}' is already up to date.".format(
                self.folder_path))
            return
        print("Upgrading iepy instance '{}' from {} to {}".format(
            self.folder_path, self.old_version, iepy.__version__))
        self.creating = False
        self.old_version_path = self.download_old_iepy_version()
        self._run_steps()
Ejemplo n.º 3
0
    def upgrade(self):
        if not os.path.exists(self.folder_path):
            print("Error: instance folder does not exist")
            sys.exit(1)

        try:
            actual_path = iepy.setup(self.folder_path, _safe_mode=True)
        except ValueError as err:
            print(err)
            sys.exit(1)
        finally:
            self.folder_path = actual_path
            self.abs_folder_path = os.path.abspath(self.folder_path)

        from django.conf import settings
        self.old_version = settings.IEPY_VERSION
        if settings.IEPY_VERSION == iepy.__version__:
            print("Iepy instance '{}' is already up to date.".format(self.folder_path))
            return
        print("Upgrading iepy instance '{}' from {} to {}".format(
            self.folder_path, self.old_version, iepy.__version__))
        self.creating = False
        self.old_version_path = self.download_old_iepy_version()
        self._run_steps()
Ejemplo n.º 4
0
Birthdate corpus preprocessing script

Usage:
    preprocess.py
    preprocess.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
import logging

from docopt import docopt

import iepy
iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner


if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(level=logging.INFO, format='%(message)s')
    opts = docopt(__doc__, version=iepy.__version__)
    docs = DocumentManager()
    pipeline = PreProcessPipeline([
        StanfordPreprocess(),
        SyntacticSegmenterRunner(increment=True)
Ejemplo n.º 5
0
"""
IEPY database loader from csv file

Usage:
    csv_to_iepy.py <filename>
    csv_to_iepy.py -h | --help

The <filename> argument can be a .csv file or a .csv.gz file containing the
corpus in two columns: 'freebase_mid' and 'description'.

Options:
  -h --help             Show this screen
  --version             Version number
"""

import logging

from docopt import docopt

import iepy
iepy.setup(__file__)
from iepy.utils import csv_to_iepy

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(message)s')
    opts = docopt(__doc__, version=iepy.__version__)
    filepath = opts["<filename>"]
    csv_to_iepy(filepath)
Ejemplo n.º 6
0
"""
IEPY DB Abstraction level.

The goal of this module is to provide some thin abstraction between
the chosen database engine and ORM and the IEPY core and tools.
"""

from collections import defaultdict, namedtuple
from functools import lru_cache
from random import shuffle
import logging

import iepy
iepy.setup()

from iepy.data.models import (IEDocument, IEDocumentMetadata, TextSegment,
                              Relation, Entity, EntityKind, EntityOccurrence,
                              EvidenceLabel, EvidenceCandidate)

from iepy.preprocess import segmenter
from iepy.preprocess.pipeline import PreProcessSteps

IEPYDBConnector = namedtuple('IEPYDBConnector', 'segments documents')

# Number of entities that will be cached on get_entity function.
ENTITY_CACHE_SIZE = 20  # reasonable compromise

logger = logging.getLogger(__name__)


class DocumentManager(object):
Ejemplo n.º 7
0
 def migrate_db(self):
     # Setup IEPY with the new instance
     os.chdir(self.abs_folder_path)
     iepy.setup(self.abs_folder_path)
     django_command_line(["", "migrate"])
Ejemplo n.º 8
0
Archivo: db.py Proyecto: 52nlp/iepy
"""
IEPY DB Abstraction level.

The goal of this module is to provide some thin abstraction between
the chosen database engine and ORM and the IEPY core and tools.
"""

from collections import defaultdict, namedtuple
from functools import lru_cache
from random import shuffle
import logging

import iepy
iepy.setup()

from iepy.data.models import (
    IEDocument, IEDocumentMetadata,
    TextSegment, Relation,
    Entity, EntityKind, EntityOccurrence,
    EvidenceLabel, EvidenceCandidate
)

from iepy.preprocess import segmenter
from iepy.preprocess.pipeline import PreProcessSteps


IEPYDBConnector = namedtuple('IEPYDBConnector', 'segments documents')

# Number of entities that will be cached on get_entity function.
ENTITY_CACHE_SIZE = 20  # reasonable compromise
Ejemplo n.º 9
0
def execute_from_command_line(argv=None):
    opts = docopt(__doc__, argv=argv, version=iepy.__version__)
    folder_path = opts["<folder_path>"]

    if opts["--download-third-party-data"]:
        download_third_party_data()
        return

    abs_folder_path = os.path.abspath(folder_path)
    if os.path.exists(folder_path):
        print("Error: folder already exists")
        sys.exit(1)

    files_to_copy = [
        os.path.join(THIS_FOLDER, "csv_to_iepy.py"),
        os.path.join(THIS_FOLDER, "preprocess.py"),
        os.path.join(THIS_FOLDER, "iepy_runner.py"),
        os.path.join(THIS_FOLDER, "iepy_rules_runner.py"),
        os.path.join(THIS_FOLDER, "manage.py"),
    ]

    # Create folders
    bin_folder = os.path.join(folder_path, "bin")

    os.mkdir(folder_path)
    os.mkdir(bin_folder)

    for filepath in files_to_copy:
        filename = os.path.basename(filepath)
        destination = os.path.join(bin_folder, filename)
        shutil.copyfile(filepath, destination)

    # Create empty rules file
    rules_filepath = os.path.join(folder_path, "rules.py")
    with open(rules_filepath, "w") as filehandler:
        filehandler.write("# Write here your rules\n")
        filehandler.write("# RELATION = 'your relation here'\n")

    # Create extractor config
    extractor_config_filepath = os.path.join(folder_path, "extractor_config.json")
    with open(extractor_config_filepath, "w") as filehandler:
        json.dump(defaults.extractor_config, filehandler, indent=4)

    # Create the settings file
    print("Initializing database")
    folder_name = folder_path.rsplit(os.sep, 1)
    folder_name = folder_name[1] if len(folder_name) > 1 else folder_name[0]

    database_name = input("Database name [{}]: ".format(folder_name))
    if not database_name:
        database_name = folder_name
    database_path = os.path.join(abs_folder_path, database_name)
    new_settings_filepath = "{}_settings.py".format(folder_name)
    settings_filepath = os.path.join(folder_path, new_settings_filepath)
    settings_data = get_settings_string(database_path)
    with open(settings_filepath, "w") as filehandler:
        filehandler.write(settings_data)

    # Setup IEPY with the new instance
    os.chdir(abs_folder_path)
    iepy.setup(abs_folder_path)
    django_command_line(["", "migrate"])

    # Setup the database user
    print("\nCreating database user")
    django_command_line(["", "createsuperuser"])

    print("\n IEPY instance ready to use at '{}'".format(abs_folder_path))
Ejemplo n.º 10
0
                                           as is. Should be used with --trained-extractor
  --tune-for=<tune-for>                    Predictions tuning. Options are high-prec
                                           or high-recall [default: high-prec]
  --extractor-config=<config.json>         Sets the extractor config
  --version                                Version number
  -h --help                                Show this screen
"""

import os
import json
import logging
from docopt import docopt
from sys import exit

import iepy
INSTANCE_PATH = iepy.setup(__file__)

from iepy.extraction.active_learning_core import ActiveLearningCore, HIPREC, HIREC
from iepy.data.db import CandidateEvidenceManager
from iepy.data.models import Relation
from iepy.extraction.terminal import TerminalAdministration
from iepy.data import output


def print_all_relations():
    print("All available relations:")
    for relation in Relation.objects.all():
        print("  {}".format(relation))


def load_labeled_evidences(relation, evidences):
Ejemplo n.º 11
0
def execute_from_command_line(argv=None):
    opts = docopt(__doc__, argv=argv, version=0.1)
    folder_path = opts["<folder_path>"]

    if opts["--download-third-party-data"]:
        download_third_party_data()
        return

    abs_folder_path = os.path.abspath(folder_path)
    if os.path.exists(folder_path):
        print("Error: folder already exists")
        sys.exit(1)

    files_to_copy = [
        os.path.join(THIS_FOLDER, "csv_to_iepy.py"),
        os.path.join(THIS_FOLDER, "preprocess.py"),
        os.path.join(THIS_FOLDER, "iepy_runner.py"),
        os.path.join(THIS_FOLDER, "iepy_rules_runner.py"),
        os.path.join(THIS_FOLDER, "manage.py"),
    ]

    # Create folders
    bin_folder = os.path.join(folder_path, "bin")

    os.mkdir(folder_path)
    os.mkdir(bin_folder)

    for filepath in files_to_copy:
        filename = os.path.basename(filepath)
        destination = os.path.join(bin_folder, filename)
        shutil.copyfile(filepath, destination)

    # Create empty rules file
    rules_filepath = os.path.join(folder_path, "rules.py")
    with open(rules_filepath, "w") as filehandler:
        filehandler.write("# Write here your rules\n")
        filehandler.write("# RELATION = 'your relation here'\n")

    # Create extractor config
    extractor_config_filepath = os.path.join(folder_path,
                                             "extractor_config.json")
    with open(extractor_config_filepath, "w") as filehandler:
        json.dump(defaults.extractor_config, filehandler, indent=4)

    # Create the settings file
    print("Initializing database")
    folder_name = folder_path.rsplit(os.sep, 1)
    folder_name = folder_name[1] if len(folder_name) > 1 else folder_name[0]

    database_name = input("Database name [{}]: ".format(folder_name))
    if not database_name:
        database_name = folder_name
    database_path = os.path.join(abs_folder_path, database_name)
    new_settings_filepath = "{}_settings.py".format(folder_name)
    settings_filepath = os.path.join(folder_path, new_settings_filepath)
    settings_data = get_settings_string(database_path)
    with open(settings_filepath, "w") as filehandler:
        filehandler.write(settings_data)

    # Setup IEPY with the new instance
    os.chdir(abs_folder_path)
    iepy.setup(abs_folder_path)
    django_command_line(["", "migrate"])

    # Setup the database user
    print("\nCreating database user")
    django_command_line(["", "createsuperuser"])

    print("\n IEPY instance ready to use at '{}'".format(abs_folder_path))
Ejemplo n.º 12
0
  --classifier=<classifier_path>     Load an already trained classifier
  --no-questions                     Won't generate questions to answer and will try to predict as is. Should be used with --classifier
  -h --help                          Show this screen
  --tune-for=<tune-for>              Predictions tuning. Options are high-prec or high-recall [default: high-prec]
  --extractor-config=<config.json>   Sets the extractor config
  --version                          Version number
"""

import os
import json
import logging
from docopt import docopt
from sys import exit

import iepy
INSTANCE_PATH = iepy.setup(__file__)

from iepy.extraction.active_learning_core import ActiveLearningCore, HIPREC, HIREC
from iepy.data.db import CandidateEvidenceManager
from iepy.data.models import Relation
from iepy.extraction.terminal import TerminalAdministration
from iepy.data import output


def print_all_relations():
    print("All available relations:")
    for relation in Relation.objects.all():
        print("  {}".format(relation))


def load_labeled_evidences(relation, evidences):
Ejemplo n.º 13
0
# iepy setup in order to be able to run individual test modules
import iepy
iepy.setup(_safe_mode=True)