def process_method(method, properties, body) -> dict: metadata = json.loads(body.decode('utf-8')) job_id = metadata['job_id'] app_logger.info('New Job Queued: {}'.format(job_id)) meta = read_data_from_s3(metadata['file_meta'], s3_bucket_name, index_column_first=False) counts = read_data_from_s3(metadata['file_counts'], s3_bucket_name, index_column_first=True) subsampler = Subsampler(bool(metadata['log']), int(metadata['num_pc']), int(metadata['num_cells']) if metadata.get('num_cells', False) else None ) if metadata.get('subsampling', False) else None database_version = metadata.get('database_version', 'latest') if database_version not in list_local_versions() + ['latest']: database_version = 'latest' app = cpdb_app.create_app(verbose=False, database_file=find_database_for(database_version)) if metadata['iterations']: response = statistical_analysis(app, meta, counts, job_id, metadata, subsampler) else: response = non_statistical_analysis(app, meta, counts, job_id, metadata, subsampler) return response
def test_non_statistical_method_subsampled_data_test__threshold__01__precision_3__num_pc_4__num_cells_4(self): data = 'test_subsampled' project_name = 'test_data' threshold = 0.1 result_precision = 3 subsampler = Subsampler(False, 4, 4, debug_seed=0) self._method_call(data, project_name, threshold, result_precision, subsampler)
def cpdb_method_analysis_launcher( self, raw_meta: pd.DataFrame, counts: pd.DataFrame, counts_data: str, threshold: float, result_precision: int, subsampler: Subsampler = None, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame): if threshold < 0 or threshold > 1: raise ThresholdValueException(threshold) meta = method_preprocessors.meta_preprocessor(raw_meta) counts = self._counts_validations(counts, meta) if subsampler is not None: counts = subsampler.subsample(counts) meta = meta.filter(items=list(counts), axis=0) interactions = self.database_manager.get_repository( 'interaction').get_all_expanded(include_gene=False) genes = self.database_manager.get_repository('gene').get_all_expanded() complex_composition = self.database_manager.get_repository( 'complex').get_all_compositions() complex_expanded = self.database_manager.get_repository( 'complex').get_all_expanded() means, significant_means, deconvoluted = cpdb_analysis_method.call( meta, counts, counts_data, interactions, genes, complex_expanded, complex_composition, self.separator, threshold, result_precision) return means, significant_means, deconvoluted
def analysis(meta_filename: str, counts_filename: str, counts_data: str, project_name: str, threshold: float, result_precision: int, output_path: str, output_format: str, means_result_name: str, significant_means_result_name: str, deconvoluted_result_name: str, verbose: bool, database: Optional[str], subsampling: bool, subsampling_log: bool, subsampling_num_pc: int, subsampling_num_cells: Optional[int] ): try: subsampler = Subsampler(subsampling_log, subsampling_num_pc, subsampling_num_cells, verbose) if subsampling else None LocalMethodLauncher(cpdb_app.create_app(verbose, database)).cpdb_analysis_local_method_launcher(meta_filename, counts_filename, counts_data, project_name, threshold, output_path, output_format, means_result_name, significant_means_result_name, deconvoluted_result_name, result_precision, subsampler, ) except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException, AllCountsFilteredException) as e: app_logger.error(str(e) + (':' if (hasattr(e, 'description') and e.description) or ( hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '') ) except EmptyResultException as e: app_logger.warning(str(e) + (':' if (hasattr(e, 'description') and e.description) or ( hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '') ) except: app_logger.error('Unexpected error') if verbose: traceback.print_exc(file=sys.stdout)
def analysis_scanpy(adata, var_names, obs_names, obs_key, var_key=None, gene_id_format=None, project_name='', threshold=0.1, result_precision='3', output_path='', output_format='csv', means_result_name='means', significant_means_result_name='significant_means', deconvoluted_result_name='deconvoluted', verbose=True, database='latest', subsampling=False, subsampling_log=True, subsampling_num_pc=100, subsampling_num_cells=None, write=False, add_to_uns=True): try: subsampler = Subsampler(subsampling_log, subsampling_num_pc, subsampling_num_cells, verbose) if subsampling else None out = LocalMethodLauncher(cpdb_app.create_app( verbose, database)).cpdb_analysis_local_method_launcher_scanpy( adata, var_names, obs_names, obs_key, var_key, gene_id_format, project_name, threshold, output_path, output_format, means_result_name, significant_means_result_name, deconvoluted_result_name, result_precision, subsampler, write, add_to_uns) return out except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException, AllCountsFilteredException) as e: app_logger.error( str(e) + (':' if (hasattr(e, 'description') and e.description) or (hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')) except EmptyResultException as e: app_logger.warning( str(e) + (':' if (hasattr(e, 'description') and e.description) or (hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')) except: app_logger.error('Unexpected error') if verbose: traceback.print_exc(file=sys.stdout)
def cpdb_statistical_analysis_launcher( self, raw_meta: pd.DataFrame, counts: pd.DataFrame, counts_data: str, iterations: int, threshold: float, threads: int, debug_seed: int, result_precision: int, pvalue: float, subsampler: Subsampler = None, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame): if threads < 1: core_logger.info('Using Default thread number: %s' % self.default_threads) threads = self.default_threads if threshold < 0 or threshold > 1: raise ThresholdValueException(threshold) meta = method_preprocessors.meta_preprocessor(raw_meta) counts = self._counts_validations(counts, meta) if subsampler is not None: counts = subsampler.subsample(counts) meta = meta.filter(items=(list(counts)), axis=0) interactions = self.database_manager.get_repository( 'interaction').get_all_expanded(include_gene=False) genes = self.database_manager.get_repository('gene').get_all_expanded() complex_composition = self.database_manager.get_repository( 'complex').get_all_compositions() complex_expanded = self.database_manager.get_repository( 'complex').get_all_expanded() deconvoluted, means, pvalues, significant_means = \ cpdb_statistical_analysis_method.call(meta, counts, counts_data, interactions, genes, complex_expanded, complex_composition, iterations, threshold, threads, debug_seed, result_precision, pvalue, self.separator) return pvalues, means, significant_means, deconvoluted
def statistical_analysis(meta_filename: str, counts_filename: str, counts_data='ensembl', project_name='', threshold=0.1, result_precision='3', output_path='', output_format='csv', means_result_name='means', significant_means_result_name='significant_means', deconvoluted_result_name='deconvoluted', verbose=True, database='latest', subsampling=False, subsampling_log=True, subsampling_num_pc=100, subsampling_num_cells=None, debug_seed='-1', pvalue=0.05, pvalues_result_name='pvalues', iterations=1000, threads=4) -> None: database = choose_database(None, None, value=database) try: subsampler = Subsampler(subsampling_log, subsampling_num_pc, subsampling_num_cells, verbose) if subsampling else None LocalMethodLauncher(cpdb_app.create_app(verbose, database)). \ cpdb_statistical_analysis_local_method_launcher(meta_filename, counts_filename, counts_data, project_name, iterations, threshold, output_path, output_format, means_result_name, pvalues_result_name, significant_means_result_name, deconvoluted_result_name, debug_seed, threads, result_precision, pvalue, subsampler, ) except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException, AllCountsFilteredException) as e: app_logger.error( str(e) + (':' if (hasattr(e, 'description') and e.description) or (hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')) except EmptyResultException as e: app_logger.warning( str(e) + (':' if (hasattr(e, 'description') and e.description) or (hasattr(e, 'hint') and e.hint) else '') + (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') + (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')) except: app_logger.error('Unexpected error') if verbose: traceback.print_exc(file=sys.stdout)