Exemple #1
0
def load_table_dict(config):
    fn_pkl = get_save_path(config) + '/' + get_file_name(config) + '.pkl'
    fn_xlsx = get_save_path(config) + '/' + get_file_name(config) + '.xlsx'
    if os.path.isfile(fn_pkl):
        table_dict = load_table_dict_pkl(config)
    elif os.path.isfile(fn_xlsx):
        table_dict = load_table_dict_xlsx(config)
    return table_dict
Exemple #2
0
 def is_result_exist(self, config, configs_child):
     fn = get_save_path(config) + '/' + \
         get_file_name(config) + '.*'
     if glob.glob(fn):
         return True
     else:
         return False
Exemple #3
0
def load_table_dict_pkl(config):
    fn = get_save_path(config) + '/' + get_file_name(config) + '.pkl'
    if os.path.isfile(fn):
        f = open(fn, 'rb')
        table_dict = pickle.load(f)
        f.close()
        return table_dict
    else:
        raise IOError(f'No such file: {fn}')
Exemple #4
0
def load_table_dict(config):
    fn = get_save_path(config) + '/' + get_file_name(config) + '.xlsx'
    if os.path.isfile(fn):
        df = pd.read_excel(fn)
        tmp_dict = df.to_dict()
        table_dict = {}
        for key in tmp_dict:
            curr_dict = tmp_dict[key]
            table_dict[key] = list(curr_dict.values())
        return table_dict
    else:
        raise IOError(f'No such file: {fn}')
Exemple #5
0
def cpg_proc_special_clock_linreg_dev(
    data,
    annotations,
    attributes,
    file,
    params=None,
):
    if os.path.isfile(file):

        head, tail = os.path.split(file)
        fn = os.path.splitext(tail)[0]
        ext = os.path.splitext(tail)[1]

        config_root = Config(data=copy.deepcopy(data),
                             experiment=Experiment(
                                 type=DataType.cpg,
                                 task=Task.clock,
                                 method=Method.linreg,
                                 params=copy.deepcopy(params)),
                             annotations=copy.deepcopy(annotations),
                             attributes=copy.deepcopy(attributes),
                             is_run=True,
                             is_root=True)
        root = Node(name=str(config_root), config=config_root)

        config_child = Config(data=copy.deepcopy(data),
                              experiment=Experiment(type=DataType.cpg,
                                                    task=Task.table,
                                                    method=Method.special,
                                                    params={'file_name': fn}),
                              annotations=copy.deepcopy(annotations),
                              attributes=copy.deepcopy(attributes),
                              is_run=False,
                              is_root=False)

        Node(name=str(config_child), config=config_child, parent=root)

        build_tree(root)

        new_file = get_save_path(config_child) + '/' + \
             get_file_name(config_child) + ext

        copyfile(file, new_file)

        calc_tree(root)

    else:
        raise FileNotFoundError(f'File {file} not found.')
Exemple #6
0
 def save(self, config, configs_child):
     fn = get_save_path(config) + '/' + \
         get_file_name(config)
     save_figure(fn, config.experiment_data['fig'])
Exemple #7
0
 def save(self, config, configs_child):
     fn = get_save_path(config) + '/' + \
         get_file_name(config)
     save_table_dict(fn, config.metrics)
Exemple #8
0
def betas_clock_special(
    data,
    annotations,
    attributes,
    file,
    method_params=None,
):
    """
        Producing epigentic clock, using best CpGs which are provided in input file.

        Epigentic clock represents as table:
        Each row corresponds to clocks, which are built on all CpGs from the previous rows including the current row.
        Columns:

        * item: CpG id.
        * aux: gene, on which CpG is mapped.
        * R2: determination coefficient of linear regression between real and predicted target observable.
          A statistical measure of how well the regression line approximates the data points.
        * r: correlation coefficient of linear regression between real and predicted target observable.
        * evs: explained variance regression score.
        * mae: mean absolute error regression loss.
        * rmse: root mean square error

        Possible parameters of experiment:

        * ``'type'``: type of clocks. \n
          Possible options: \n
          ``'all'``: iterative building of clocks starting from one element in the model,
          ending with ``'size'`` elements in the model. \n
          ``'single '``: building of clocks only with ``'size'`` elements in the model. \n
          ``'deep'``: iterative building of clocks starting from one element in the model,
          ending with ``'size'`` elements in the model, but choosing all possible combinations from ``'size'`` elements.
        * ``'part'``: the proportion of considered number of subject in the test set. From ``0.0`` to ``1.0``.
        * ``'size'``: maximum number of exogenous variables in a model.
        * ``'runs'`` number of bootstrap runs in model

        :param data: pdm.Data instance, which specifies information about dataset.
        :param annotations: pdm.Annotations instance, which specifies subset of CpGs.
        :param attributes: pdm.Attributes instance, which specifies information about subjects.
        :param method_params: parameters of experiment.
     """

    if os.path.isfile(file):

        head, tail = os.path.split(file)
        fn = os.path.splitext(tail)[0]
        ext = os.path.splitext(tail)[1]

        config_root = Config(data=copy.deepcopy(data),
                             experiment=Experiment(
                                 data=DataType.betas,
                                 task=Task.clock,
                                 method=Method.linreg,
                                 method_params=copy.deepcopy(method_params)),
                             annotations=copy.deepcopy(annotations),
                             attributes=copy.deepcopy(attributes),
                             is_run=True,
                             is_root=True)
        root = Node(name=str(config_root), config=config_root)

        config_child = Config(data=copy.deepcopy(data),
                              experiment=Experiment(
                                  data=DataType.betas,
                                  task=Task.table,
                                  method=Method.special,
                                  method_params={'file_name': fn}),
                              annotations=copy.deepcopy(annotations),
                              attributes=copy.deepcopy(attributes),
                              is_run=False,
                              is_root=False)

        Node(name=str(config_child), config=config_child, parent=root)

        build_tree(root)

        new_file = get_save_path(config_child) + '/' + \
            get_file_name(config_child) + ext

        copyfile(file, new_file)

        calc_tree(root)

    else:
        raise FileNotFoundError(f'File {file} not found.')