def get_nih(db):
    nih_df = pd.read_csv(from_root("temp\\test_data\\nih_cleaned.csv"))
    db.insert(nih_df, "tmp_nih", "dbo")

    nih_metamap_df = annotate(nih_df)
    db.insert(nih_metamap_df, "tmp_nih_metamap", "dbo")
    write_df(from_root("temp\\test_data\\nih_metamap.csv"), nih_metamap_df)
    def __init__(self, cfg_filepath: str):
        set_deterministic_seed()
        with open(from_root(cfg_filepath), "r") as file:
            self.cfg = DictObject(json.load(file))

        for sub_dirname in ("logs", "checkpoints", "debug"):
            os.makedirs(self.from_out(sub_dirname), exist_ok=True)

        self.model = create_resnet(self.cfg)
        self.model = self.model.to(self.cfg.model.device)

        self.train_loader, self.infer_train_loader, self.infer_val_loader, \
            train_mean, train_std = create_dataset(self.cfg)
        self.model.normalize.set_parameters(train_mean, train_std)

        self.logger = SummaryWriter(log_dir=self.from_out("logs"))

        self.optimizer = create_optimizer(self.model, self.logger, self.cfg)
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            self.cfg.scheduler.milestones,
            gamma=self.cfg.scheduler.gamma)

        if isinstance(self.model, ResNet_Softmax):
            self.criterion = lambda logits, targets: F.nll_loss(
                logits, targets, reduction="mean")
        else:
            assert isinstance(self.model, ResNet_Gaussian)
            self.criterion = lambda sqr_distances, targets: mmc_loss(
                sqr_distances, targets, reduction="mean")

        Tracker.reset(self.cfg.optimizer.n_epochs)
        if self.cfg.load_checkpoint is not None:
            self.load_checkpoint(self.cfg.load_checkpoint)
    def test_log_domain(self) -> None:
        images = [
            Image.open(from_root("test_data/log_bilinear_4x/1.png")),
            Image.open(from_root("test_data/log_bilinear_4x/2.png"))
        ]
        transform = transforms.ToTensor()
        x = torch.stack([transform(image) for image in images], dim=0)

        expected = F.interpolate(x, scale_factor=4, mode="bilinear")
        expected = torch.log(expected)

        actual = torch.log(x)
        actual = log_bilinear_4x(actual)

        self.assert_no_nan(actual)
        self.assert_tensors_almost_equal(expected, actual, 1e-6)
def classify_culture(db, to_module, l1ml_module, l1s_module, l2_module):
    culture_df = db.extract(from_root("temp\\sql\\test_culture.sql"))
    keys = ["test_key", "result_key"]

    culture_to_results = to_module.classify(culture_df)
    culture_l1ml_results = l1ml_module.classify(culture_df)
    culture_l1s_results = l1s_module.classify(culture_df)
    culture_l2_results = l2_module.classify(culture_df)

    culture_results = culture_to_results\
        .merge(culture_l1ml_results, how="inner", on=keys)\
        .merge(culture_l1s_results, how="inner", on=keys)\
        .merge(culture_l2_results, how="inner", on=keys)

    db.insert(culture_results, "tmp_culture_predictions", "dbo")
    write_df(from_root("temp\\predictions\\culture.csv"), culture_results)
def classify_random(db, to_module, l1ml_module, l1s_module, l2_module):
    random_df = db.extract(from_root("temp\\sql\\test_random.sql"))
    keys = ["test_key", "result_key"]

    random_to_results = to_module.classify(random_df)
    random_l1ml_results = l1ml_module.classify(random_df)
    random_l1s_results = l1s_module.classify(random_df)
    random_l2_results = l2_module.classify(random_df)

    random_results = random_to_results\
        .merge(random_l1ml_results, how="inner", on=keys)\
        .merge(random_l1s_results, how="inner", on=keys)\
        .merge(random_l2_results, how="inner", on=keys)

    db.insert(random_results, "tmp_random_predictions", "dbo")
    write_df(from_root("temp\\predictions\\random.csv"), random_results)
def classify_nih(db, tp_module, to_module, l1ml_module, l1s_module, l2_module):
    nih_df = db.extract(from_root("temp\\sql\\test_nih.sql"))
    keys = ["test_key", "result_key"]

    nih_tp_results = tp_module.classify(nih_df)
    nih_to_results = to_module.classify(nih_df)
    nih_l1ml_results = l1ml_module.classify(nih_df)
    nih_l1s_results = l1s_module.classify(nih_df)
    nih_l2_results = l2_module.classify(nih_df)

    nih_results = nih_tp_results\
        .merge(nih_to_results, how="inner", on=keys)\
        .merge(nih_l1ml_results, how="inner", on=keys)\
        .merge(nih_l1s_results, how="inner", on=keys)\
        .merge(nih_l2_results, how="inner", on=keys)

    db.insert(nih_results, "tmp_nih_predictions", "dbo")
    write_df(from_root("temp\\predictions\\nih.csv"), nih_results)
    def test_linear_domain(self) -> None:
        images = [
            Image.open(from_root("test_data/log_bilinear_4x/1.png")),
            Image.open(from_root("test_data/log_bilinear_4x/2.png"))
        ]
        transform = transforms.ToTensor()
        x = torch.stack([transform(image) for image in images], dim=0)

        os.makedirs(from_root("test_results/log_bilinear_4x"), exist_ok=True)

        expected = F.interpolate(x, scale_factor=4, mode="bilinear")
        torchvision.utils.save_image(
            expected, from_root("test_results/log_bilinear_4x/expected.png"))

        actual = torch.log(x)
        actual = log_bilinear_4x(actual)
        actual = torch.exp(actual)
        torchvision.utils.save_image(
            actual, from_root("test_results/log_bilinear_4x/actual.png"))

        self.assert_no_nan(actual)
        self.assert_tensors_almost_equal(expected, actual, 1e-6)
    def init_surrogate_model(self, surrogate_cfg_filepath: str) -> None:
        with open(from_root(surrogate_cfg_filepath), "r") as file:
            cfg = DictObject(json.load(file))

        self.surrogate_model = create_resnet(cfg)
        self.surrogate_model = self.surrogate_model.to(cfg.model.device)

        best_epoch_filepath = os.path.join(from_root(cfg.out_dirpath),
                                           "checkpoints/best_epoch.txt")
        with open(best_epoch_filepath, "r") as file:
            epoch = int(file.read())

        checkpoint_filepath = os.path.join(
            from_root(cfg.out_dirpath), f"checkpoints/checkpoint_{epoch}.pth")
        checkpoint = torch.load(checkpoint_filepath,
                                map_location=model_device(
                                    self.surrogate_model))
        self.surrogate_model.load_state_dict(checkpoint["model_state_dict"])

        self.surrogate_model.eval()
        for param in self.surrogate_model.parameters():
            param.requires_grad = False
def load_modules():
    # Test performed
    tp_module = TestPerformedModule.load_from_file(
        from_root("temp\\pkl\\test_performed_module.pkl"))

    # Test outcome
    to_module = TestOutcomeModule.load_from_file(
        from_root("temp\\pkl\\test_outcome_module.pkl"))

    # Level 1 (machine learning)
    l1ml_module = Level1MLModule.load_from_file(
        from_root("temp\\pkl\\level_1_ml_module.pkl"))

    # Level 1 (symbolic)
    l1s_module = Level1SymbolicModule(to_module).load_from_file(
        from_root("temp\\pkl\\level_1_symbolic_module.pkl"))

    # Level 2
    l2_module = Level2Module(l1ml_module).load_from_file(
        from_root("temp\\pkl\\level_2_module.pkl"))

    print("Finished loading modules.")
    return tp_module, to_module, l1ml_module, l1s_module, l2_module
    def __init__(self, cfg_filepath: str):
        set_deterministic_seed()
        with open(from_root(cfg_filepath), "r") as file:
            self.cfg = DictObject(json.load(file))

        for sub_dirname in ("logs", "checkpoints", "debug"):
            os.makedirs(self.from_out(sub_dirname), exist_ok=True)

        self.model = RefineNet_4Cascaded()
        self.model = self.model.cuda()
        state_dict = torch.load("TODO", map_location=model_device(self.model))
        self.model.backbone.load_state_dict(state_dict)

        self.train_loader = load_pascal_voc_train(16)
        self.infer_train_loader = load_pascal_voc_infer("train", 16)
        self.infer_val_loader = load_pascal_voc_infer("val", 16)
Exemple #11
0
    def __init__(self, cfg_filepath: str, data_loader: DataLoader,
                 adversary: Optional[Adversary], out_name: str,
                 visualize_adversary: int):
        set_deterministic_seed()
        with open(from_root(cfg_filepath), "r") as file:
            self.cfg = DictObject(json.load(file))

        os.makedirs(self.from_out("inference"), exist_ok=True)
        os.makedirs(self.from_out(f"inference_debug/{out_name}"),
                    exist_ok=True)

        self.model = create_resnet(self.cfg)
        self.model = self.model.to(self.cfg.model.device)
        self.load_best_epoch()

        self.loader = data_loader
        self.adversary = adversary
        self.out_name = out_name
        self.visualize_adversary = visualize_adversary

        Tracker.reset()
Exemple #12
0
def train_test_performed(db):
    tp_df = db.extract(from_root("temp\\sql\\train_test_performed.sql"))
    tp_module = TestPerformedModule(organisms=True)
    tp_module.retrain(tp_df)
    tp_module.save_to_file(from_root("temp\\pkl\\test_performed_module.pkl"))
import multiprocessing
import os
from typing import Union

import torchvision
from torch.utils.data import DataLoader, ConcatDataset, Subset
from torchvision import transforms

from root import from_root
from src.misc.utils import read_lines

DATA_DIRPATH = from_root("data/svhn")
SPLIT_DIRPATH = from_root("splits/svhn")
SVHN_TRAIN_MEAN = [0.4310, 0.4303, 0.4464]
SVHN_TRAIN_STD = [0.1965, 0.1983, 0.1994]


def load_svhn_infer(split: str, batch_size: int,
                    n_workers: Union[str, int]) -> DataLoader:
    if split not in {"train", "val", "test"}:
        raise ValueError("Split must be 'train', 'val', or 'test'!")
    if batch_size <= 0:
        raise ValueError("Batch_size must be positive!")
    if type(n_workers) == str and n_workers != "n_cores":
        raise ValueError("If n_workers is a string, it must be 'n_cores'!")
    if type(n_workers) == int and n_workers < 0:
        raise ValueError("If n_workers is an int, it must be non-negative!")

    transform = transforms.ToTensor()

    if split == "train":
def get_culture(db):
    culture_df = db.extract(from_root("temp\\sql\\get_test_culture.sql"))
    culture_df = culture_df.sample(n=100)

    db.insert(culture_df, "tmp_culture", "dbo")
    write_df(from_root("temp\\test_data\\culture.csv"), culture_df)
def get_random(db):
    random_df = db.extract(from_root("temp\\sql\\get_test_random.sql"))
    random_df = random_df.sample(n=100)

    db.insert(random_df, "tmp_random", "dbo")
    write_df(from_root("temp\\test_data\\random.csv"), random_df)
import os
import ssl

import torch
import torchvision

from root import from_root
from src.datasets.load_imagenet import IMAGENET_TRAIN_MEAN, IMAGENET_TRAIN_STD

OUT_FILEPATH = from_root("weights/softmax_resnet50_imagenet/pretrained.pth")


def main():
    ssl._create_default_https_context = ssl._create_unverified_context
    torch_model = torchvision.models.resnet50(pretrained=True)

    new_state_dict = {
        "normalize.mean": torch.tensor(IMAGENET_TRAIN_MEAN).reshape(1, -1, 1, 1),
        "normalize.std": torch.tensor(IMAGENET_TRAIN_STD).reshape(1, -1, 1, 1)
    }

    for key, value in torch_model.state_dict().items():
        breadcrumbs = key.split(".")

        # change "layer1" to "stage2"
        if breadcrumbs[0].startswith("layer"):
            index = int(breadcrumbs[0][5:])
            breadcrumbs[0] = "stage" + str(index + 1)

        # change "downsample" to "projection"
        if len(breadcrumbs) >= 3 and breadcrumbs[2] == "downsample":
def annotate(df, observations=False, options=""):
    """
    Returns MetaMap annotations for the given DataFrame.
    :param df: the DataFrame containing the result_full_descriptions to annotate
    - required columns: {"test_key", "result_key", "obs_seq_nbr" (if
      observations is True), "result_full_description"}
    :param observations: True if the data is given at the observation level,
    False if the data is given at the test level
    :param options: MetaMap options; input string of form "-y -D" or "-yD"
    :return: a DataFrame containing the MetaMap annotations
    - columns: {"test_key", "result_key", "obs_seq_nbr" (if observations is
      True), "tags", "candidates"}
    """
    proc = subprocess.Popen(
        ["java", "-jar", from_root("libs\\MetaMapBuild.jar")], shell=False)

    # initialize variables to suppress PyCharm warnings
    gateway = None
    metamap = None
    api = None

    try:
        connected = False
        for i in range(5):
            try:
                gateway = JavaGateway()
                metamap = gateway.entry_point
                api = metamap.getApi()

                connected = True
                print(f"Connected to Java server on attempt {i + 1}")
                break
            except:
                time.sleep(1)

        if not connected:
            raise Exception("Error connecting to Java server")

        tags = []
        candidates = []

        if options.strip():
            # Process a string with additional options string
            api.setOptions(options + " -c")
        else:
            # Process a string without additional options string
            api.setOptions("-c")

        for description in df["result_full_description"]:
            result = api.processCitationsFromString(description).get(0)

            # parse result from MetaMap to tags and candidates
            tags.append(metamap.formatOneResultToString(result, "tags"))
            candidates.append(
                metamap.formatOneResultToString(result, "candidates"))

        keys = get_keys(observations)

        return_value = df.loc[:, keys]
        return_value["tags"] = tags
        return_value["candidates"] = candidates

        return return_value
    finally:
        if gateway is not None:
            gateway.shutdown()
        proc.terminate()
def annotate(df, observations=False, options=""):
    proc = subprocess.Popen(
        ["java", "-jar", from_root("libs\\MetaMapBuild.jar")], shell=False)

    try:
        attempts = 0
        successful = False

        while attempts < 5:
            try:
                gateway = JavaGateway()
                metamap = gateway.entry_point
                api = metamap.getApi()
                print("Connected to Java server on attempt " +
                      str(attempts + 1))
                successful = True
                break
            except:
                time.sleep(1)
                attempts += 1

        if not successful:
            raise Exception("Error connecting to Java server")

        tags = []
        candidates = []

        if options.strip():
            # Process a string with additional options string
            api.setOptions(options + " -c")
        else:
            # Process a string without additional options string
            api.setOptions("-c")

        f = open(from_root("pre-tagging_log.txt"), "w")
        connected = True
        for index, row in df.iterrows():
            description = row["result_full_description"]
            test_key = row["test_key"]
            result_key = row["result_key"]
            if description is "":
                tags.append("{}")
                candidates.append("{}")
            else:
                if connected is False:
                    f.write("test_key: " + str(test_key) + " ")
                    f.write("result_key: " + str(result_key) + " ")
                    f.write("MetaMap Connection Error \n")
                    tags.append("MetaMap Connection Errors")
                    candidates.append("MetaMap Connection Errors")
                else:
                    try:
                        result = api.processCitationsFromString(
                            description).get(0)
                    except Py4JJavaError as e:
                        error_msg = e.java_exception.getMessage()
                        f.write("test_key: " + str(test_key) + " ")
                        f.write("result_key: " + str(result_key) + " ")
                        if "Index 0 out-of-bounds for length 0" in error_msg:
                            f.write("Memory Error \n")
                            tags.append("Memory Error")
                            candidates.append("Memory Error")
                            continue
                        if "Connection refused" in error_msg:
                            f.write("MetaMap Connection Error \n")
                            tags.append("MetaMap Connection Error")
                            candidates.append("MetaMap Connection Error")
                            print(
                                "Quit tagging latter rows due to MetaMap Server connection error."
                            )
                            connected = False
                            continue
                    except Exception:
                        tags.append("Other Errors")
                        candidates.append("Other Errors")
                        f.write("test_key: " + str(test_key) + " ")
                        f.write("result_key: " + str(result_key) + " ")
                        f.write("Other Error \n")
                        continue
                    # parse result from MetaMap to tags and candidates
                    tags.append(metamap.formatOneResultToString(
                        result, "tags"))
                    candidates.append(
                        metamap.formatOneResultToString(result, "candidates"))
        print("MetaMap server connection is: ", metamap.isConnectAPI())
        if metamap.isConnectAPI() == "true":
            api.disconnect()
            if metamap.isConnectAPI() == "false":
                print("disconnect API from MetaMap server")
        return_value = df.loc[:, ["test_key", "result_key"]]
        return_value["tags"] = tags
        return_value["candidates"] = candidates

    finally:
        gateway.shutdown()
        proc.terminate()

    return return_value
Exemple #19
0
    org_false_results = tp_org_false_results\
        .merge(to_org_false_results, how="outer", on=["test_key", "result_key"])

    retall_results = l1s_retall_results\
        .merge(l2_retall_results, how="outer", on=["test_key", "result_key"])

    write_df(from_root("results\\predictions.csv"), results)
    write_df(from_root("results\\predictions_org_false.csv"), org_false_results)
    write_df(from_root("results\\predictions_retall.csv"), retall_results)

    db.insert(results, "predictions", "dbo")

    print("Finished writing results to CSV and database.")


if __name__ == "__main__":
    print("Started executing script.\n")
    start_time = datetime.now()

    logger = logging.getLogger(__name__)
    set_params(logger, from_root("log\\test.log"))

    try:
        main()
    except Exception as e:
        logger.exception("test.py: Fatal error")
        sys.exit(1)

    print(f"\nExecution time: {datetime.now() - start_time}")
    print("Finished executing script.")
    # Symbolic
    l1s_module = Level1SymbolicModule(to_module)
    l1s_module.retrain(l1_df)
    l1s_module.save_to_file(from_root("pkl\\level_1_symbolic_module.pkl"))

    # ==========================================================================
    # Level 2

    l2_df = db.extract(from_root("sql\\train\\level_2.sql"))
    l2_module = Level2Module(l1s_module)
    l2_module.retrain(l2_df)
    l2_module.save_to_file(from_root("pkl\\level_2_module.pkl"))


if __name__ == "__main__":
    print("Started executing script.\n")
    start_time = datetime.now()

    logger = logging.getLogger(__name__)
    set_params(logger, from_root("log\\train.log"))

    try:
        main()
    except Exception as e:
        logger.exception("train.py: Fatal error")
        sys.exit(1)

    print(f"\nExecution time: {datetime.now() - start_time}")
    print("Finished executing script.")
Exemple #21
0
import matplotlib.pyplot as plt

from datetime import datetime

from io_.db import Database
from io_.fs import write_plot
from modules.level_1_ml_module import Level1MLModule
from modules.level_1_symbolic_module import Level1SymbolicModule
from modules.level_2_module import Level2Module
from modules.test_outcome_module import TestOutcomeModule
from modules.test_performed_module import TestPerformedModule
from root import from_root
from util.logger import set_params
from util.timer import timer

TP_SQL = from_root("sql\\train\\test_performed.sql")
TO_SQL = from_root("sql\\train\\test_outcome.sql")
L1_SQL = from_root("sql\\train\\level_1.sql")
L2_SQL = from_root("sql\\train\\level_2.sql")

SIZES = [i for i in range(2000, 100000 + 1, 2000)]
ORGANISMS = True

SAVE_TO = from_root("results\\complexity.png")


def main():
    db = Database.get_instance()

    # ==========================================================================
    # Test performed
Exemple #22
0
import os
import random

import torchvision

from root import from_root
from src.misc.utils import set_deterministic_seed, write_lines

DATA_DIRPATH = from_root("data/mnist")
SPLIT_DIRPATH = from_root("splits/mnist")
TRAIN_SIZE = 50000


def main() -> None:
    set_deterministic_seed()

    dataset = torchvision.datasets.MNIST(DATA_DIRPATH, train=True, download=True)
    size = len(dataset)

    indices = list(range(size))
    random.shuffle(indices)

    train_indices = indices[:TRAIN_SIZE]
    write_lines(os.path.join(SPLIT_DIRPATH, "train.txt"), train_indices)

    val_indices = indices[TRAIN_SIZE:]
    write_lines(os.path.join(SPLIT_DIRPATH, "val.txt"), val_indices)


if __name__ == "__main__":
    main()
import multiprocessing
import os
from typing import Union

import torchvision
from torch.utils.data import DataLoader, Subset
from torchvision import transforms

from root import from_root
from src.misc.utils import read_lines

DATA_DIRPATH = from_root("data/cifar10")
SPLIT_DIRPATH = from_root("splits/cifar10")
CIFAR10_TRAIN_MEAN = [0.4913, 0.4820, 0.4464]
CIFAR10_TRAIN_STD = [0.2470, 0.2434, 0.2616]


def load_cifar10_train(batch_size: int, n_workers: Union[str,
                                                         int]) -> DataLoader:
    if batch_size <= 0:
        raise ValueError("Batch_size must be positive!")
    if type(n_workers) == str and n_workers != "n_cores":
        raise ValueError("If n_workers is a string, it must be 'n_cores'!")
    if type(n_workers) == int and n_workers < 0:
        raise ValueError("If n_workers is an int, it must be non-negative!")

    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor()
    ])
Exemple #24
0
def train_test_outcome(db):
    to_df = db.extract(from_root("temp\\sql\\train_test_outcome.sql"))
    to_module = TestOutcomeModule(organisms=True)
    to_module.retrain(to_df)
    to_module.save_to_file(from_root("temp\\pkl\\test_outcome_module.pkl"))
    return to_module
def main():
    db = Database.get_instance()

    # ==========================================================================
    # Test performed

    tp_df = db.extract(from_root("sql\\train\\test_performed.sql"))

    tp_module = TestPerformedModule()
    tp_module.retrain(tp_df)
    tp_module.save_to_file(from_root("pkl\\test_performed_module.pkl"))

    tp_module_org_false = TestPerformedModule(organisms=False)
    tp_module_org_false.retrain(tp_df)
    tp_module_org_false.save_to_file(
        from_root("pkl\\test_performed_organisms_false_module.pkl"))

    # ==========================================================================
    # Test outcome

    to_df = db.extract(from_root("sql\\train\\test_outcome.sql"))

    to_module = TestOutcomeModule()
    to_module.retrain(to_df)
    to_module.save_to_file(from_root("pkl\\test_outcome_module.pkl"))

    to_module_org_false = TestOutcomeModule(organisms=False)
    to_module_org_false.retrain(to_df)
    to_module_org_false.save_to_file(
        from_root("pkl\\test_outcome_organisms_false_module.pkl"))

    # ==========================================================================
    # Level 1

    l1_df = db.extract(from_root("sql\\train\\level_1.sql"))

    # Machine learning
    l1ml_module = Level1MLModule()
    l1ml_module.retrain(l1_df)
    l1ml_module.save_to_file(from_root("pkl\\level_1_ml_module.pkl"))

    # Symbolic
    l1s_module = Level1SymbolicModule(to_module)
    l1s_module.retrain(l1_df)
    l1s_module.save_to_file(from_root("pkl\\level_1_symbolic_module.pkl"))

    # ==========================================================================
    # Level 2

    l2_df = db.extract(from_root("sql\\train\\level_2.sql"))
    l2_module = Level2Module(l1s_module)
    l2_module.retrain(l2_df)
    l2_module.save_to_file(from_root("pkl\\level_2_module.pkl"))
Exemple #26
0
def train_level_1_machine_learning(db):
    l1ml_df = db.extract(from_root("temp\\sql\\train_level_1_ml.sql"))
    l1ml_module = Level1MLModule()
    l1ml_module.retrain(l1ml_df)
    l1ml_module.save_to_file(from_root("temp\\pkl\\level_1_ml_module.pkl"))
    return l1ml_module
Exemple #27
0
def main():
    # ==========================================================================
    # Load the DataFrames to classify

    db = Database.get_instance()

    tp_df = db.extract(from_root("sql\\test\\test_performed.sql"))
    to_df = db.extract(from_root("sql\\test\\test_outcome.sql"))
    l1_df = db.extract(from_root("sql\\test\\level_1.sql"))
    l2_df = db.extract(from_root("sql\\test\\level_2.sql"))

    print("Finished loading the DataFrames.")

    # ==========================================================================
    # Load modules

    tp_module = TestPerformedModule.load_from_file(
        from_root("pkl\\test_performed_module.pkl"))

    to_module = TestOutcomeModule.load_from_file(
        from_root("pkl\\test_outcome_module.pkl"))

    l1ml_module = Level1MLModule.load_from_file(
        from_root("pkl\\level_1_ml_module.pkl"))

    l1s_module = Level1SymbolicModule(to_module).load_from_file(
        from_root("pkl\\level_1_symbolic_module.pkl"))

    l2_module = Level2Module(l1ml_module).load_from_file(
        from_root("pkl\\level_2_module.pkl"))

    tp_module_org_false = TestPerformedModule.load_from_file(
        from_root("pkl\\test_performed_organisms_false_module.pkl"))

    to_module_org_false = TestOutcomeModule.load_from_file(
        from_root("pkl\\test_outcome_organisms_false_module.pkl"))

    print("Finished loading modules.")

    # ==========================================================================
    # Classify the DataFrames

    tp_results = tp_module.classify(tp_df)
    to_results = to_module.classify(to_df)
    l1ml_results = l1ml_module.classify(l1_df)
    l1s_results = l1s_module.classify(l1_df)
    l2_results = l2_module.classify(l2_df)

    tp_org_false_results = tp_module_org_false.classify(tp_df)
    to_org_false_results = to_module_org_false.classify(to_df)

    l1s_retall_results = l1s_module.classify(l1_df, return_all=True)
    l2_retall_results = l2_module.classify(l2_df, return_all=True)

    print("Finished classifying the DataFrames.")

    # ==========================================================================
    # Write final prediction results to CSV and database

    results = tp_results\
        .merge(to_results, how="outer", on=["test_key", "result_key"])\
        .merge(l1ml_results, how="outer", on=["test_key", "result_key"])\
        .merge(l1s_results, how="outer", on=["test_key", "result_key"])\
        .merge(l2_results, how="outer", on=["test_key", "result_key"])

    org_false_results = tp_org_false_results\
        .merge(to_org_false_results, how="outer", on=["test_key", "result_key"])

    retall_results = l1s_retall_results\
        .merge(l2_retall_results, how="outer", on=["test_key", "result_key"])

    write_df(from_root("results\\predictions.csv"), results)
    write_df(from_root("results\\predictions_org_false.csv"), org_false_results)
    write_df(from_root("results\\predictions_retall.csv"), retall_results)

    db.insert(results, "predictions", "dbo")

    print("Finished writing results to CSV and database.")
Exemple #28
0
def train_level_1_symbolic(db, to_module):
    l1s_df = db.extract(from_root("sql\\train\\level_1.sql"))
    l1s_module = Level1SymbolicModule(to_module)
    l1s_module.retrain(l1s_df)
    l1s_module.save_to_file(
        from_root("temp\\pkl\\level_1_symbolic_module.pkl"))
Exemple #29
0
 def from_out(self, relative_path: str) -> str:
     return os.path.join(from_root(self.cfg.out_dirpath), relative_path)
Exemple #30
0
def train_level_2(db, l1ml_module):
    l2_df = db.extract(from_root("sql\\train\\level_2.sql"))
    l2_module = Level2Module(l1ml_module)
    l2_module.retrain(l2_df)
    l2_module.save_to_file(from_root("temp\\pkl\\level_2_module.pkl"))