Example #1
0
import os
import time
import urllib.parse
from datetime import datetime

import h5py
from pymongo import MongoClient

from batchlib.base import BatchJobOnContainer
from batchlib.mongo.utils import ASSAY_ANALYSIS_RESULTS, ASSAY_METADATA, create_plate_doc
from batchlib.util import get_logger, get_commit_id
from batchlib.util.io import read_table

logger = get_logger('Workflow.BatchJob.DbResultWriter')

# essential tables to be saved in DB
TABLES_TO_SAVE = ['wells/default', 'images/default']


def _table_object(column_names, table):
    # document attributes cannot contain '.'
    column_names = [cn.replace('.', '_') for cn in column_names]
    return [dict(zip(column_names, table_row)) for table_row in table]


def _get_analysis_tables(in_file):
    with h5py.File(in_file, 'r') as f:
        tables = []
        for table_name in TABLES_TO_SAVE:
            column_names, table = read_table(f, table_name)
            tables.append({
Example #2
0
import argparse
import urllib.parse

import pymongo
from pymongo import MongoClient

from batchlib.mongo.utils import ASSAY_METADATA, ASSAY_ANALYSIS_RESULTS
from batchlib.util import get_logger

logger = get_logger('MongoDB Migrator')


def create_indexes(db):
    logger.info(
        f'Creating indexes on {ASSAY_METADATA} and {ASSAY_ANALYSIS_RESULTS}')
    assay_metadata = db[ASSAY_METADATA]
    assay_results = db[ASSAY_ANALYSIS_RESULTS]
    # create necessary indexes
    assay_metadata.create_index([('name', pymongo.ASCENDING)], unique=True)
    # create unique compound index on (workflow_name, plate_name, batchlib_version), i.e. reject result objects
    # for which those 3 values already exist in the collection
    assay_results.create_index([
        ('workflow_name', pymongo.ASCENDING),
        ('plate_name', pymongo.ASCENDING),
        ('batchlib_version', pymongo.ASCENDING),
    ],
                               unique=True)


def update_well_assessment(plate_name, well_assessments):
    # TODO: implement when we have this info in parseable format
import os
import numpy as np
from concurrent import futures
from functools import partial
from tqdm import tqdm

from batchlib.base import BatchJobOnContainer
from batchlib.util import get_logger, open_file, normalize_percentile

logger = get_logger('Workflow.BatchJob.StardistPrediction')


def limit_gpu_memory(fraction, allow_growth=False):
    import tensorflow as tf
    from keras import backend as K
    config = tf.ConfigProto()
    if fraction is not None:
        config.gpu_options.per_process_gpu_memory_fraction = fraction
    config.gpu_options.allow_growth = bool(allow_growth)
    session = tf.Session(config=config)
    K.tensorflow_backend.set_session(session)


class StardistPrediction(BatchJobOnContainer):
    """
    """
    def __init__(self,
                 model_root,
                 model_name,
                 input_key,
                 output_key,
Example #4
0
from batchlib.mongo.result_writer import DbResultWriter
from batchlib.outliers.outlier import get_outlier_predicate
from batchlib.preprocessing import get_barrel_corrector, get_serum_keys, Preprocess
from batchlib.segmentation import SeededWatershed
from batchlib.segmentation.stardist_prediction import StardistPrediction
from batchlib.segmentation.torch_prediction import TorchPrediction
from batchlib.segmentation.unet import UNet2D
from batchlib.segmentation.voronoi_ring_segmentation import ErodeSegmentation  # , VoronoiRingSegmentation
from batchlib.reporting import (SlackSummaryWriter, export_tables_for_plate,
                                WriteBackgroundSubtractedImages)
from batchlib.util import get_logger, open_file, read_table, has_table
from batchlib.util.logger import setup_logger
from batchlib.util.plate_visualizations import all_plots

logger = get_logger('Workflow.CellAnalysis')

DEFAULT_PLOT_NAMES = [
    'ratio_of_q0.5_of_means', 'ratio_of_q0.5_of_sums', 'robust_z_score_sums',
    'robust_z_score_means'
]

# these are the default min serum intensities that are used for QC, if we DO NOT have
# empty wells.
# the intensity thresholds are derived from 3 * mad background, see
# https://github.com/hci-unihd/antibodies-analysis-issues/issues/84#issuecomment-632658726
DEFAULT_MIN_SERUM_INTENSITIES = {
    'serum_IgG': 301.23,
    'serum_IgA': 392.76,
    'serum_IgM': None
}
Example #5
0
import argparse
import urllib.parse

from pymongo import MongoClient

from batchlib.mongo.utils import ASSAY_METADATA
from batchlib.util import get_logger
from batchlib.util.cohort_parser import CohortIdParser

logger = get_logger('CohortImporter')


def import_cohort_ids(db):
    # parse excel files containing the cohort id for each well
    cohort_id_parser = CohortIdParser()

    # get metadata collection
    assay_metadata = db[ASSAY_METADATA]
    # iterate over all plates in the DB
    for plate_doc in assay_metadata.find({}):
        plate_name = plate_doc['name']
        logger.info(f'Importing cohort ids for plate: {plate_name}')

        plate_cohorts = cohort_id_parser.get_cohorts_for_plate(plate_name)

        if not plate_cohorts:
            logger.warning(
                f"No cohort metadata for plate: {plate_name}. Check your cohort excel files."
            )
            continue
Example #6
0
import os
from math import ceil

import numpy as np
import torch
from tqdm import tqdm

from batchlib.base import BatchJobOnContainer
from batchlib.segmentation.unet import UNet2D
from batchlib.util import get_logger, open_file, files_to_jobs, standardize, DelayedKeyboardInterrupt

logger = get_logger('Workflow.BatchJob.TorchPrediction')


# TODO
# - to optimize gpu throughput further could use torch.parallel / torch.data_parallel
#   or dask.delayed to parallelize the input loading and output writing
class TorchPrediction(BatchJobOnContainer):
    """
    """
    def __init__(self,
                 input_key,
                 output_key,
                 model_path,
                 model_class=None,
                 model_kwargs={},
                 input_channel=None,
                 **super_kwargs):
        self.input_channel = input_channel
        input_ndim = 2 if self.input_channel is None else 3
        super().__init__(input_key=input_key,
Example #7
0
import argparse
import urllib.parse

from pymongo import MongoClient

from batchlib.mongo.utils import ASSAY_METADATA
from batchlib.util import get_logger
from batchlib.util.elisa_results_parser import ElisaResultsParser

logger = get_logger('ElisaImporter')


def import_elisa_results(db):
    # create elisa results
    elisa_results_parser = ElisaResultsParser()
    # get metadata collection
    assay_metadata = db[ASSAY_METADATA]

    # iterate over all plates
    for plate_doc in assay_metadata.find({}):
        plate_name = plate_doc['name']

        should_replace = False
        for well in plate_doc["wells"]:
            cohort_id = well.get("cohort_id", None)
            if cohort_id is None:
                continue
            # make sure cohort_id matching is not case sensitive
            cohort_id = cohort_id.lower()

            if cohort_id in elisa_results_parser.elisa_results:
from concurrent import futures

import numpy as np
import skimage.morphology as morph
from scipy import ndimage as ndi
from skimage.segmentation import watershed
from tqdm.auto import tqdm

from batchlib.base import BatchJobOnContainer
from batchlib.util import open_file, seg_to_edges, get_logger, in_file_to_image_name

logger = get_logger('Workflow.BatchJob.VoronoiRingSegmentation')


class VoronoiRingSegmentation(BatchJobOnContainer):
    """
    """
    def validate_params(self, ring_width, radius_factor):
        have_width = ring_width is not None
        have_fraction = radius_factor is not None
        if not (have_width != have_fraction):
            raise ValueError("Need either ring_width or radius_factor")

        if have_width:
            logger.info(
                f"{self.name}: using fixed width {ring_width} for dilation")
        else:
            logger.info(
                f"{self.name}: using radius fraction {radius_factor} for dilation"
            )
Example #9
0
import argparse
import urllib.parse
import os
from pymongo import MongoClient

from batchlib.mongo.utils import ASSAY_METADATA
from batchlib.outliers.outlier import OutlierPredicate
from batchlib.util import get_logger

logger = get_logger('OutlierImporter')


def import_outliers(db, outlier_dir):
    assert outlier_dir is not None
    # get metadata collection
    assay_metadata = db[ASSAY_METADATA]

    # iterate over all plates
    for plate_doc in assay_metadata.find({}):
        plate_name = plate_doc['name']
        outlier_predicate = OutlierPredicate(outlier_dir, plate_name)

        should_replace = False
        for well in plate_doc['wells']:
            for im in well['images']:
                im_file = im['name']
                outlier_current = outlier_predicate(im_file)
                outlier_previous = im['outlier']
                if outlier_current != outlier_previous:
                    # outlier status changed -> update and replace
                    logger.info(
Example #10
0
import csv
import glob
import os

from batchlib.util import get_logger

logger = get_logger('Workflow.Outliers')


def get_outlier_predicate(config):
    if not hasattr(config, 'misc_folder') or config.misc_folder is None:
        raise ValueError(
            "Invalid config passed to 'get_outlier_predicate', needs 'misc_folder' attribute"
        )
    outliers_dir = os.path.join(config.misc_folder, 'tagged_outliers')
    if not os.path.exists(outliers_dir):
        raise ValueError(
            f"The outliers directory {outliers_dir} does not exist")

    logger.info(
        f"Trying to parse 'plate_name' from the input folder: {config.input_folder}"
    )
    plate_name = plate_name_from_input_folder(config.input_folder,
                                              outliers_dir)
    if plate_name is not None:
        logger.info(f"plate_name found: {plate_name}")
    else:
        logger.warning(
            f"Did not find outliers for {config.input_folder}. Outlier detection will be skipped"
        )
        # no plate name was given and it cannot be parsed from the config.input_folder
import argparse
import urllib.parse

from pymongo import MongoClient

from batchlib.mongo.utils import ASSAY_METADATA
from batchlib.util import get_logger

logger = get_logger('Workflow.PlateMetadataRepository')

TEST_NAMES = [
    'ELISA IgG', 'ELISA IgA', 'ELISA IgM', 'mpBio IgG', 'mpBio IgM', 'Luminex',
    'NT', 'Roche', 'Abbot', 'Rapid test IgM', 'Rapid test IgG', 'IF IgG',
    'IF IgA', 'days_after_onset'
]


class PlateMetadataRepository:
    """
    Simple Monogo API used to get the positive (cohort: C) and control (cohort: B) wells for a given plate
    as well as Elisa test results if available.

    """
    def __init__(self, db):
        self.assay_metadata_collection = db[ASSAY_METADATA]

    def _get_wells(self, plate_name):
        plate_doc = self.assay_metadata_collection.find_one(
            {"name": plate_name})
        if plate_doc is None:
            logger.info(f"No plate for name {plate_name} was found in the DB")
Example #12
0
import pandas as pd
from tqdm import tqdm

from batchlib.mongo.plate_metadata_repository import TEST_NAMES
from batchlib.util import (get_logger, read_table, open_file, has_table,
                           image_name_to_site_name, image_name_to_well_name)
from batchlib.util.cohort_parser import get_cohort_class, get_cohort

SUPPORTED_TABLE_FORMATS = {'excel': '.xlsx',
                           'csv': '.csv',
                           'tsv': '.tsv'}
DEFAULT_SCORE_PATTERNS = ('IgG_robust_z_score_means', 'IgG_ratio_of_q0.5_of_means',
                          'IgA_robust_z_score_means', 'IgA_ratio_of_q0.5_of_means',
                          'IgM_robust_z_score_means', 'IgM_ratio_of_q0.5_of_means')

logger = get_logger('Workflow.TableExporter')


def _round_column(col, decim=2):
    def _round(x):
        if isinstance(x, float):
            return round(x, decim)
        return x

    return col.apply(_round)


def format_to_extension(format_):
    if format_ not in SUPPORTED_TABLE_FORMATS:
        supported_formats = list(SUPPORTED_TABLE_FORMATS.keys())
        raise ValueError(f"Format {format_} is not supported, expect one of {supported_formats}")