def __init__( self, index_file: Path = None, lineage_column: str = 'mlst', verbose: bool = True ): PoreLogger.__init__( self, level=logging.INFO if verbose else logging.ERROR ) if index_file: self.index = pandas.read_csv( index_file, sep='\t', header=0 ) if 'idx' not in self.index.columns: self.logger.info('Adding Mash index column "idx" to genotype index') self.index.index = [i for i in range( len(self.index) )] self.index.index.name = 'idx' else: self.logger.info('Using column "idx" as Mash index column in genotype index') self.index.index = self.index.set_index('idx') else: self.index = None self.lineage_column = lineage_column
def __init__( self, fastx: Path, sketch: Path, prefix: str = 'sketchy', outdir: Path = Path('sketchy_out'), verbose: bool = False, ): PoreLogger.__init__( self, level=logging.INFO if verbose else logging.ERROR, name='Compute' ) self.fastx = fastx self.sketch = sketch self.prefix = prefix self.outdir = outdir self.verbose = verbose self.logger.info(f'Sketchy wrapper v{__version__}') self.logger.info(f'Prefix: {prefix}') self.logger.info(f'Fastq file: {fastx.absolute()}') self.logger.info(f'Output directory: {outdir.absolute()}') self.outdir.mkdir(exist_ok=True, parents=True)
def __init__(self, survey_directory: Path): PoreLogger.__init__(self) self.survey_data = SurveyData() self.logger.info(f'Parse survey directory: {survey_directory}') self.missing = '-' self.survey_data.read(survey_directory)
def __init__(self): PoreLogger.__init__(self) self.inter = pandas.DataFrame() # MASH outputs, updated self.interim = pandas.DataFrame() # MASH outputs, updated, with data self.lineage = Counter() # Prime lineage counter self.genotype = dict() # Genotype counters by lineage self.susceptibility = dict() # Susceptibility counters by lineage self.continuous = list() self.start_time_regex = r'start_time=(.*)Z'
def __init__( self, sssh: Path, index: Path, key: Path, stable: int = None, ssh: Path = None, verbose: bool = False ): PoreLogger.__init__(self, name="Evaluate") if verbose: self.logger.setLevel(level=logging.INFO) self.top_feature_values = 5 self.read_limit = 1000 self.preference_threshold = 0.6 self.na_color = 'darkgray' self.stable = stable self.logger.info(f"Loading data for evaluations from Sketchy Rust") self.logger.info(f"Ranked sum of shared hashes: {sssh}") self.logger.info(f"Sum of shared hashes: {ssh}") self.logger.info(f"Genotype feature index: {index}") self.logger.info(f"Genotype feature key: {key}") self.feature_key = self.read_feature_key(file=key) # key to headers and categories self.feature_index, self.feature_data = self.read_feature_index(file=index) self.ssh = self.read_ssh(file=ssh) self.sssh = self.read_sssh(file=sssh) self.features = self.feature_index.columns.tolist() if self.ssh is not None: # Merge ssh and feature index for heatmap self.ssh_features = self.ssh \ .join(self.feature_data, how='inner') \ .sort_values(['read', 'rank']) self.reads = len( self.ssh_features['read'].unique() ) self.ranks = len( self.ssh_features['rank'].unique() )
def __init__(self): PoreLogger.__init__(self) self.data: pandas.DataFrame = pandas.DataFrame(None)
def __init__(self): PoreLogger.__init__(self) self.watcher = None # active watcher