import os import time import urllib.parse from datetime import datetime import h5py from pymongo import MongoClient from batchlib.base import BatchJobOnContainer from batchlib.mongo.utils import ASSAY_ANALYSIS_RESULTS, ASSAY_METADATA, create_plate_doc from batchlib.util import get_logger, get_commit_id from batchlib.util.io import read_table logger = get_logger('Workflow.BatchJob.DbResultWriter') # essential tables to be saved in DB TABLES_TO_SAVE = ['wells/default', 'images/default'] def _table_object(column_names, table): # document attributes cannot contain '.' column_names = [cn.replace('.', '_') for cn in column_names] return [dict(zip(column_names, table_row)) for table_row in table] def _get_analysis_tables(in_file): with h5py.File(in_file, 'r') as f: tables = [] for table_name in TABLES_TO_SAVE: column_names, table = read_table(f, table_name) tables.append({
import argparse import urllib.parse import pymongo from pymongo import MongoClient from batchlib.mongo.utils import ASSAY_METADATA, ASSAY_ANALYSIS_RESULTS from batchlib.util import get_logger logger = get_logger('MongoDB Migrator') def create_indexes(db): logger.info( f'Creating indexes on {ASSAY_METADATA} and {ASSAY_ANALYSIS_RESULTS}') assay_metadata = db[ASSAY_METADATA] assay_results = db[ASSAY_ANALYSIS_RESULTS] # create necessary indexes assay_metadata.create_index([('name', pymongo.ASCENDING)], unique=True) # create unique compound index on (workflow_name, plate_name, batchlib_version), i.e. reject result objects # for which those 3 values already exist in the collection assay_results.create_index([ ('workflow_name', pymongo.ASCENDING), ('plate_name', pymongo.ASCENDING), ('batchlib_version', pymongo.ASCENDING), ], unique=True) def update_well_assessment(plate_name, well_assessments): # TODO: implement when we have this info in parseable format
import os import numpy as np from concurrent import futures from functools import partial from tqdm import tqdm from batchlib.base import BatchJobOnContainer from batchlib.util import get_logger, open_file, normalize_percentile logger = get_logger('Workflow.BatchJob.StardistPrediction') def limit_gpu_memory(fraction, allow_growth=False): import tensorflow as tf from keras import backend as K config = tf.ConfigProto() if fraction is not None: config.gpu_options.per_process_gpu_memory_fraction = fraction config.gpu_options.allow_growth = bool(allow_growth) session = tf.Session(config=config) K.tensorflow_backend.set_session(session) class StardistPrediction(BatchJobOnContainer): """ """ def __init__(self, model_root, model_name, input_key, output_key,
from batchlib.mongo.result_writer import DbResultWriter from batchlib.outliers.outlier import get_outlier_predicate from batchlib.preprocessing import get_barrel_corrector, get_serum_keys, Preprocess from batchlib.segmentation import SeededWatershed from batchlib.segmentation.stardist_prediction import StardistPrediction from batchlib.segmentation.torch_prediction import TorchPrediction from batchlib.segmentation.unet import UNet2D from batchlib.segmentation.voronoi_ring_segmentation import ErodeSegmentation # , VoronoiRingSegmentation from batchlib.reporting import (SlackSummaryWriter, export_tables_for_plate, WriteBackgroundSubtractedImages) from batchlib.util import get_logger, open_file, read_table, has_table from batchlib.util.logger import setup_logger from batchlib.util.plate_visualizations import all_plots logger = get_logger('Workflow.CellAnalysis') DEFAULT_PLOT_NAMES = [ 'ratio_of_q0.5_of_means', 'ratio_of_q0.5_of_sums', 'robust_z_score_sums', 'robust_z_score_means' ] # these are the default min serum intensities that are used for QC, if we DO NOT have # empty wells. # the intensity thresholds are derived from 3 * mad background, see # https://github.com/hci-unihd/antibodies-analysis-issues/issues/84#issuecomment-632658726 DEFAULT_MIN_SERUM_INTENSITIES = { 'serum_IgG': 301.23, 'serum_IgA': 392.76, 'serum_IgM': None }
import argparse import urllib.parse from pymongo import MongoClient from batchlib.mongo.utils import ASSAY_METADATA from batchlib.util import get_logger from batchlib.util.cohort_parser import CohortIdParser logger = get_logger('CohortImporter') def import_cohort_ids(db): # parse excel files containing the cohort id for each well cohort_id_parser = CohortIdParser() # get metadata collection assay_metadata = db[ASSAY_METADATA] # iterate over all plates in the DB for plate_doc in assay_metadata.find({}): plate_name = plate_doc['name'] logger.info(f'Importing cohort ids for plate: {plate_name}') plate_cohorts = cohort_id_parser.get_cohorts_for_plate(plate_name) if not plate_cohorts: logger.warning( f"No cohort metadata for plate: {plate_name}. Check your cohort excel files." ) continue
import os from math import ceil import numpy as np import torch from tqdm import tqdm from batchlib.base import BatchJobOnContainer from batchlib.segmentation.unet import UNet2D from batchlib.util import get_logger, open_file, files_to_jobs, standardize, DelayedKeyboardInterrupt logger = get_logger('Workflow.BatchJob.TorchPrediction') # TODO # - to optimize gpu throughput further could use torch.parallel / torch.data_parallel # or dask.delayed to parallelize the input loading and output writing class TorchPrediction(BatchJobOnContainer): """ """ def __init__(self, input_key, output_key, model_path, model_class=None, model_kwargs={}, input_channel=None, **super_kwargs): self.input_channel = input_channel input_ndim = 2 if self.input_channel is None else 3 super().__init__(input_key=input_key,
import argparse import urllib.parse from pymongo import MongoClient from batchlib.mongo.utils import ASSAY_METADATA from batchlib.util import get_logger from batchlib.util.elisa_results_parser import ElisaResultsParser logger = get_logger('ElisaImporter') def import_elisa_results(db): # create elisa results elisa_results_parser = ElisaResultsParser() # get metadata collection assay_metadata = db[ASSAY_METADATA] # iterate over all plates for plate_doc in assay_metadata.find({}): plate_name = plate_doc['name'] should_replace = False for well in plate_doc["wells"]: cohort_id = well.get("cohort_id", None) if cohort_id is None: continue # make sure cohort_id matching is not case sensitive cohort_id = cohort_id.lower() if cohort_id in elisa_results_parser.elisa_results:
from concurrent import futures import numpy as np import skimage.morphology as morph from scipy import ndimage as ndi from skimage.segmentation import watershed from tqdm.auto import tqdm from batchlib.base import BatchJobOnContainer from batchlib.util import open_file, seg_to_edges, get_logger, in_file_to_image_name logger = get_logger('Workflow.BatchJob.VoronoiRingSegmentation') class VoronoiRingSegmentation(BatchJobOnContainer): """ """ def validate_params(self, ring_width, radius_factor): have_width = ring_width is not None have_fraction = radius_factor is not None if not (have_width != have_fraction): raise ValueError("Need either ring_width or radius_factor") if have_width: logger.info( f"{self.name}: using fixed width {ring_width} for dilation") else: logger.info( f"{self.name}: using radius fraction {radius_factor} for dilation" )
import argparse import urllib.parse import os from pymongo import MongoClient from batchlib.mongo.utils import ASSAY_METADATA from batchlib.outliers.outlier import OutlierPredicate from batchlib.util import get_logger logger = get_logger('OutlierImporter') def import_outliers(db, outlier_dir): assert outlier_dir is not None # get metadata collection assay_metadata = db[ASSAY_METADATA] # iterate over all plates for plate_doc in assay_metadata.find({}): plate_name = plate_doc['name'] outlier_predicate = OutlierPredicate(outlier_dir, plate_name) should_replace = False for well in plate_doc['wells']: for im in well['images']: im_file = im['name'] outlier_current = outlier_predicate(im_file) outlier_previous = im['outlier'] if outlier_current != outlier_previous: # outlier status changed -> update and replace logger.info(
import csv import glob import os from batchlib.util import get_logger logger = get_logger('Workflow.Outliers') def get_outlier_predicate(config): if not hasattr(config, 'misc_folder') or config.misc_folder is None: raise ValueError( "Invalid config passed to 'get_outlier_predicate', needs 'misc_folder' attribute" ) outliers_dir = os.path.join(config.misc_folder, 'tagged_outliers') if not os.path.exists(outliers_dir): raise ValueError( f"The outliers directory {outliers_dir} does not exist") logger.info( f"Trying to parse 'plate_name' from the input folder: {config.input_folder}" ) plate_name = plate_name_from_input_folder(config.input_folder, outliers_dir) if plate_name is not None: logger.info(f"plate_name found: {plate_name}") else: logger.warning( f"Did not find outliers for {config.input_folder}. Outlier detection will be skipped" ) # no plate name was given and it cannot be parsed from the config.input_folder
import argparse import urllib.parse from pymongo import MongoClient from batchlib.mongo.utils import ASSAY_METADATA from batchlib.util import get_logger logger = get_logger('Workflow.PlateMetadataRepository') TEST_NAMES = [ 'ELISA IgG', 'ELISA IgA', 'ELISA IgM', 'mpBio IgG', 'mpBio IgM', 'Luminex', 'NT', 'Roche', 'Abbot', 'Rapid test IgM', 'Rapid test IgG', 'IF IgG', 'IF IgA', 'days_after_onset' ] class PlateMetadataRepository: """ Simple Monogo API used to get the positive (cohort: C) and control (cohort: B) wells for a given plate as well as Elisa test results if available. """ def __init__(self, db): self.assay_metadata_collection = db[ASSAY_METADATA] def _get_wells(self, plate_name): plate_doc = self.assay_metadata_collection.find_one( {"name": plate_name}) if plate_doc is None: logger.info(f"No plate for name {plate_name} was found in the DB")
import pandas as pd from tqdm import tqdm from batchlib.mongo.plate_metadata_repository import TEST_NAMES from batchlib.util import (get_logger, read_table, open_file, has_table, image_name_to_site_name, image_name_to_well_name) from batchlib.util.cohort_parser import get_cohort_class, get_cohort SUPPORTED_TABLE_FORMATS = {'excel': '.xlsx', 'csv': '.csv', 'tsv': '.tsv'} DEFAULT_SCORE_PATTERNS = ('IgG_robust_z_score_means', 'IgG_ratio_of_q0.5_of_means', 'IgA_robust_z_score_means', 'IgA_ratio_of_q0.5_of_means', 'IgM_robust_z_score_means', 'IgM_ratio_of_q0.5_of_means') logger = get_logger('Workflow.TableExporter') def _round_column(col, decim=2): def _round(x): if isinstance(x, float): return round(x, decim) return x return col.apply(_round) def format_to_extension(format_): if format_ not in SUPPORTED_TABLE_FORMATS: supported_formats = list(SUPPORTED_TABLE_FORMATS.keys()) raise ValueError(f"Format {format_} is not supported, expect one of {supported_formats}")