예제 #1
0
 def module_dbg_logger(msg: str):
     if ({module_name_simple, "*"}
             & set(CONFIG_GLOBAL["LOG_DBG_MODULES"])) and (
                 CONFIG_GLOBAL["LOG_LEVEL"] == "debug"):
         logger = getLogger()
         logger.setLevel(logging.DEBUG)
         debug(module_name_simple + ": " + msg)
         logger.setLevel(logging.INFO)
예제 #2
0
    def setup(self, args):
        # setup env from .env file
        load_dotenv(Path().cwd() / '.env')

        # read config file
        config = self.read_config(args.config_file)

        # combine settings from args, os.environ, and config
        self.build_settings(args, os.environ, config)

        # set create pathes and set default values
        if self.UNCHECKED_PATH is not None:
            self.UNCHECKED_PATH = Path(self.UNCHECKED_PATH).expanduser()
        else:
            self.UNCHECKED_PATH = Path().cwd()

        if self.CHECKED_PATH is not None:
            self.CHECKED_PATH = Path(self.CHECKED_PATH).expanduser()

        self.LOG_LEVEL = self.LOG_LEVEL.upper()
        if self.LOG_PATH is not None:
            self.LOG_PATH = Path(self.LOG_PATH).expanduser()

        # setup logs
        colorlog.basicConfig(
            level=self.LOG_LEVEL,
            format=' %(log_color)s%(levelname)-8s : %(message)s%(reset)s')

        # set the path
        self.SCHEMA_PATH = Path(args.schema_path)
        self.SIMULATION_ROUND, self.PRODUCT, self.SECTOR = self.SCHEMA_PATH.parts[
            0:3]

        # fetch definitions pattern and schema
        self.DEFINITIONS = fetch_definitions(self.PROTOCOL_LOCATIONS.split(),
                                             self.SCHEMA_PATH)
        self.PATTERN = fetch_pattern(self.PROTOCOL_LOCATIONS.split(),
                                     self.SCHEMA_PATH)
        self.SCHEMA = fetch_schema(self.PROTOCOL_LOCATIONS.split(),
                                   self.SCHEMA_PATH)

        # log settings
        colorlog.debug(self)
예제 #3
0
파일: models.py 프로젝트: Mazda35/gdsctools
    def init(self):
        # Some preprocessing to speed up data access in ANOVA
        ic50_parse = self.ic50.df.copy().unstack().dropna()
        # for each drug, we store the IC50s (Y) and corresponding indices
        # of cosmic identifiers + since v0.13 the real indices
        # Create a dictionary version of the data
        # to be accessed per drug where NA have already been
        # removed. Each drug is a dictionary with 2 keys:
        # Y for the data and indices for the cosmicID where
        # there is an IC50 measured.
        self.ic50_dict = dict([(d, {
            'indices': ic50_parse.loc[d].index,
            'Y': ic50_parse.loc[d].values
        }) for d in self.ic50.drugIds])
        cosmicIds = list(self.ic50.df.index)
        for key in self.ic50_dict.keys():
            indices = [
                cosmicIds.index(this)
                for this in self.ic50_dict[key]['indices']
            ]
            self.ic50_dict[key]['real_indices'] = indices

        # save the tissues
        self._autoset_tissue_factor()

        # and MSI (Microsatellite instability) status of the samples.
        self._autoset_msi_factor()

        # and (growth) media factor
        self._autoset_media_factor()

        # dictionaries to speed up code.
        self.msi_dict = {}
        self.tissue_dict = {}
        self.media_dict = {}
        # fill the dictionaries for each drug once for all
        for drug_name in self.ic50.drugIds:
            # NOTE: indices are actually cosmid ids (not indices from 0 to N)
            indices = self.ic50_dict[drug_name]['indices']

            # MSI, media and tissue are not large data files and can be stored
            # enterily
            if self.features.found_msi:
                self.msi_dict[drug_name] = self.msi_factor.loc[indices]

            if self.settings.include_media_factor:
                self.media_dict[drug_name] = self.media_factor.loc[indices]

            self.tissue_dict[drug_name] = self.tissue_factor.loc[indices]

        # some preprocessing for the OLS computation.
        # We create the dummies for the tissue factor once for all
        # Note that to agree with R convention, we have to resort the column
        # to agree with R convention that is a<B==b<c instead of
        # where A<B<C<a<b<c (in Python)
        self._tissue_dummies = pd.get_dummies(self.tissue_factor)
        columns = self._tissue_dummies.columns
        columns = sorted(columns, key=lambda s: s.lower())
        columns = ['C(tissue)[T.' + x + ']' for x in columns]
        self._tissue_dummies.columns = columns

        if self.settings.include_media_factor:
            self._media_dummies = pd.get_dummies(self.media_factor)
            columns = self._media_dummies.columns
            columns = ['C(media)[T.' + x + ']' for x in columns]
            self._media_dummies.columns = columns
            for col in columns:
                self._tissue_dummies[col] = self._media_dummies[col]

        N = len(self._tissue_dummies)
        self._tissue_dummies['C(msi)[T.1]'] = [1] * N
        self._tissue_dummies['feature'] = [1] * N
        self._tissue_dummies.insert(0, 'Intercept', [1] * N)

        # drop first feature in the tissues that seems to be used as a
        # reference in the regression
        #tissues = [x for x in self._tissue_dummies.columns if 'tissue' in x]
        #self._tissue_dummies.drop(tissues[0], axis=1, inplace=True)
        """if self.settings.include_media_factor:
            # Drop first category in the media factor ?! like for tissues.
            # What is the rationale ?
            media = [x for x in self._tissue_dummies.columns if 'media' in x]
            self._tissue_dummies.drop(media[0], axis=1, inplace=True)
        """
        # reset the buffer.
        self.individual_anova = {}

        if self.verbose and self._init_called is False:
            for this in ['tissue', 'media', 'msi', 'feature']:
                if this in self._get_analysis_mode():
                    logger.debug(this.upper() + " FACTOR : included")
                else:
                    logger.debug(this.upper() + " FACTOR : NOT included")
        self._init_called = True
예제 #4
0
def main():
    colorlog.debug("Which logging format do we have?")