Ejemplo n.º 1
0
    def to_json(self, save_to_file=None):
        """Save the estimator configuration as a json file, except for `model`, `bs_stats`, `conv_stats` as
         these are not json-able
        """

        config_dict = self.to_dict()
        _ = config_dict.pop('model', None)
        if 'bs_stats' in config_dict.keys():
            config_dict['bs_stats'] = {key: func.__repr__ for key, func in config_dict['bs_stats']}
        if 'conv_stats' in config_dict.keys():
            config_dict['conv_stats'] = {key: func.__repr__ for key, func in config_dict['conv_stats']}
        if 'grouper' in config_dict.keys():
            config_dict['grouper'] = config_dict['grouper'].group

        if save_to_file:
            from pathlib import Path
            path = Path(save_to_file)
            if path.suffix == '.json':
                # its a named json file
                check_dir(path.parent)
                dump_json(obj=config_dict, path=path, indent=2)
            elif path.suffix == '':
                # its a directory
                check_dir(path)
                dump_json(obj=config_dict, path=str(path) + '/config.json', indent=2)
            else:
                logging.error('Unrecognized saving path', error_type=NameError)
        else:
            return dump_json(config_dict, indent=0)
Ejemplo n.º 2
0
    def to_pickle(self, output_dir, bs_record=True, conv_record=True):
        """Save fitting results as a single pickled dict, suitable for small dataset.
        For large dataset `to_json` is preferred

        Args:
             output_dir (str): path to saved results, should have suffix of ``.pkl``
             bs_record (bool): if output bs_record, default True
             conv_record (bool): if output conv_record, default True
        """
        check_dir(Path(output_dir).parent)
        data_to_dump = {'summary': self.summary}
        if bs_record:
            bs_record = self.bs_record()
            if isinstance(bs_record, dict):
                # check is type 1
                data_to_dump['bs_record'] = bs_record
            else:
                logging.error('bs_record is not a loaded dict of pd.DataFrame',
                              error_type=TypeError)

        if conv_record:
            conv_record = self.conv_record()
            if isinstance(conv_record, dict):
                # check is type 1
                data_to_dump['conv_record'] = conv_record
            else:
                logging.error(
                    'conv_record is not a loaded dict of pd.DataFrame',
                    error_type=TypeError)
        dump_pickle(obj=data_to_dump, path=output_dir)
Ejemplo n.º 3
0
    def __init__(self,
                 estimator,
                 bootstrap_num,
                 bs_record_num,
                 bs_method,
                 grouper=None,
                 bs_stats=None,
                 record_full=False):

        self.bs_method = bs_method
        if bs_method == 'stratified':
            from ..data.grouper import Grouper
            if isinstance(grouper, Grouper):
                grouper = grouper.group
            if isinstance(grouper, dict):
                self.grouper = grouper
            else:
                logging.error(
                    'Unsupported grouper type for stratified bootstrap',
                    error_type=TypeError)
        self.estimator = estimator
        self.bootstrap_num = bootstrap_num
        self.bs_record_num = bs_record_num
        self.bs_stats = bs_stats
        self.record_full = record_full
Ejemplo n.º 4
0
def seq_variance(seq_table, grouper):
    """Get the spread (standard deviation) of sequence abundance across replicates, provided by grouper

    Returns:
        if single group, returns a pd.DataFrame with columns ('mean', 'sd')
        if multiple groups, returns two pd.DataFrame (mean, sd) with columns of each group
    """

    from .grouper import Grouper

    if isinstance(grouper, Grouper):
        sub_tables = grouper.get_table(target=seq_table)
    elif isinstance(grouper, (list, pd.Series, dict)):
        from .grouper import get_group
        sub_tables = get_group(seq_table, grouper)
    else:
        logging.error("Unknown types of grouper", TypeError)
        sub_tables = None

    if isinstance(sub_tables, pd.DataFrame):
        return pd.DataFrame({
            'mean': sub_tables.mean(axis=1),
            'sd': sub_tables.std(axis=1)
        })
    elif isinstance(sub_tables, dict):
        mean = {}
        sd = {}
        for key, subtable in sub_tables.items():
            mean[key] = subtable.mean(axis=1)
            sd[key] = subtable.std(axis=1)
        return pd.DataFrame(mean), pd.DataFrame(sd)
Ejemplo n.º 5
0
    def func(target, input_samples, reduce_method='median', remove_empty=True):

        method_mapper = {
            'med': np.nanmedian,
            'median': np.nanmedian,
            'mean': np.nanmean,
            'avg': np.nanmean
        }
        if callable(reduce_method):
            base = reduce_method(target[input_samples])
        else:
            if reduce_method.lower() in method_mapper.keys():
                base = method_mapper[reduce_method](target[input_samples],
                                                    axis=1)
            else:
                logging.error('Unknown reduce_method', ValueError)

        mask = base > 0  # if any does not exist in input samples
        reacted_frac = target.loc[mask,
                                  ~target.columns.isin(input_samples)].divide(
                                      base[mask], axis=0)
        if remove_empty:
            return reacted_frac.loc[reacted_frac.sum(axis=1) > 0]
        else:
            return reacted_frac
Ejemplo n.º 6
0
def main():
    """Main function for fitting"""

    from k_seq.estimate import BatchFitter
    from k_seq.model.kinetic import BYOModel

    work_table, x_data, sigma, seq_data = read_table()
    if args.bs_method.lower() == 'stratified':
        try:
            grouper = getattr(seq_data.grouper, args.stratified_grouper).group
        except:
            logging.error('Can not find grouper for stratified bootstrapping',
                          error_type=ValueError)
            sys.exit(1)
    else:
        grouper = None

    logging.info(f'exclude_zero: {args.exclude_zero}')
    logging.info(f'inverse_weight: {args.inverse_weight}')
    logging.info(f'fit_top_n: {args.fit_top_n}')
    logging.info(f'large_data: {args.large_data}')
    logging.info(f'convergence: {args.convergence_num > 0}')
    logging.info(f'bootstrap: {args.bootstrap_num > 0}')

    batch_fitter = BatchFitter(y_dataframe=work_table,
                               x_data=x_data,
                               sigma=sigma,
                               bounds=[[0, 0], [np.inf, 1]],
                               metrics={'kA': kA},
                               model=BYOModel.reacted_frac(broadcast=False),
                               exclude_zero=args.exclude_zero,
                               grouper=grouper,
                               bootstrap_num=args.bootstrap_num,
                               bs_record_num=args.bs_record_num,
                               bs_method=args.bs_method,
                               bs_stats={},
                               conv_reps=args.convergence_num,
                               conv_init_range=((0, 10), (0, 1)),
                               conv_stats={},
                               large_dataset=True,
                               note=args.note,
                               rnd_seed=args.seed)
    stream_to = args.output_dir if args.large_data else None
    batch_fitter.fit(parallel_cores=args.core_num,
                     point_estimate=True,
                     bootstrap=args.bootstrap_num > 0,
                     convergence_test=args.convergence_num > 0,
                     stream_to=stream_to,
                     overwrite=args.overwrite)

    batch_fitter.summary(save_to=f'{args.output_dir}/fit_summary.csv')
    batch_fitter.save_model(output_dir=args.output_dir,
                            results=True,
                            bs_record=False,
                            tables=True)

    # zip seq info
    os.system(
        f"cd {str(args.output_dir)} && tar -czf seq.tar.gz seqs && rm -r seqs")
Ejemplo n.º 7
0
 def format_stat(res):
     if isinstance(res, (int, float, bool, dict)):
         return res
     elif isinstance(res, pd.Series):
         return res.to_dict()
     else:
         logging.error('Unrecognized return value for bs_stats',
                       error_type=TypeError)
Ejemplo n.º 8
0
 def target(self, value):
     """You update the target, you update the mask"""
     if value is None:
         self._target = None
     elif isinstance(value, pd.DataFrame):
         # otherwise do the filtering the update the mask
         self._target = value
         self.mask = self.get_mask(target=value)
     else:
         logging.error("target can only be pd.DataFrame or SeqTable",
                       error_type=TypeError)
Ejemplo n.º 9
0
Archivo: simu.py Proyecto: ynshen/k-seq
 def get_uncertainty_accuracy(self, param, pred_type='bs_ci95'):
     """Return the accuracy of uncertainty estimation if uncertainty range includes the truth"""
     if pred_type in ['bs_ci95', 'bootstrap_ci95']:
         return self._get_bs_ci95_accuracy(param=param)
     elif pred_type in ['bs_sd', 'bootstrap_sd']:
         return self._get_bs_sd_accuracy(param=param)
     elif pred_type in ['rep_sd']:
         return self._get_rep_sd_accuracy(param=param)
     else:
         logging.error(
             "Unknown pred_type, choose from 'bs_ci95', 'bs_sd', 'rep_sd'")
Ejemplo n.º 10
0
    def add_curve(data, plot_args):
        if isinstance(data, dict):
            y_ = model(xs, **data)
        elif isinstance(data, pd.Series):
            y_ = model(xs, **data.to_dict())
        else:
            logging.error(
                'Unknown parameter input type, should be pd.Series or dict',
                error_type=TypeError)

        ax.plot(xs, y_, marker=None, **plot_args)
Ejemplo n.º 11
0
def get_file_list(file_root,
                  pattern=None,
                  file_list=None,
                  black_list=None,
                  full_path=True):
    """Return files under the given `file root` match the `template` if applicable, folders are not included

    Args:
        file_root (str of list of str): root directory/directories to search
        pattern (str): optional, include all the files under directories if None
        file_list (list of str): optional, only includes the files with names in the file_list if exists
        black_list (list of str): optional, file names included in black_list will be excluded
        full_path (bool): if return the full path or only name of the file, by default, if file_root is one string,
          only file name will be returned; if file_root contains multiple strings, full path will be returned

    Returns:
        list of str (file names) or path.Path (full directory)
    """

    if pattern is None:
        pattern = '*'
    else:
        pattern = '*{}*'.format(pattern)

    if black_list is None:
        black_list = []

    if isinstance(file_root, (str, Path)):
        files = [
            file for file in Path(file_root).glob(pattern)
            if file.name not in black_list
        ]
    elif isinstance(file_root, list):
        files = []
        for root_path in file_root:
            files += [
                file for file in Path(root_path).glob(pattern)
                if file.name not in black_list
            ]
    else:
        logging.error('count_files should be a string or list of string',
                      error_type=TypeError)

    if file_list is not None:
        files = [file for file in files if str(file.name) in file_list]

    if full_path:
        return files
    else:
        return [file.name for file in files]
Ejemplo n.º 12
0
    def __iter__(self):
        """Group iterator to return a generator of subtables"""
        if self.target is None:
            logging.error(
                'seq_data.target is None, please assign before iteration',
                error_type=ValueError)

        if self.type == 0:
            if self.axis == 0:
                return (self.target.loc[ix] for ix in self.group)
            else:
                return (self.target[ix] for ix in self.group)
        else:
            return (self.get_table(group) for group in self.group.keys())
Ejemplo n.º 13
0
 def split(self, target=None, remove_zero=False):
     if target is None:
         target = self.target
     if target is None:
         logging.error("Please indicate target seq_table to group")
     if self.type == 0:
         return self.get_table(target=target, remove_zero=remove_zero)
     else:
         return {
             group: self.get_table(target=target,
                                   group=group,
                                   remove_zero=remove_zero)
             for group in self.group.keys()
         }
Ejemplo n.º 14
0
 def bs_method(self, bs_method):
     implemented_methods = {
         'pct_res': 'pct_res',
         'resample percent residues': 'pct_res',
         'rel_res': 'pct_res',
         'resample data points': 'data',
         'data': 'data',
         'stratified': 'stratified',
     }
     if bs_method in implemented_methods.keys():
         self._bs_method = bs_method
     else:
         logging.error(f'Bootstrap method {bs_method} is not implemented',
                       error_type=NotImplementedError)
Ejemplo n.º 15
0
def multinomial(p, N, seed=None):
    """Multinomial distribution for a given probability p and total number of draws"""
    if seed is not None:
        np.random.seed(seed)

    if np.sum(p) != 1:
        p = np.array(p) / np.sum(p)

    from scipy.stats import multinomial
    if isinstance(N, (list, np.ndarray, pd.Series)):
        return np.array([multinomial.rvs(n=int(n), p=p) for n in N])
    elif is_numeric(N):
        return multinomial.rvs(n=int(N), p=p)
    else:
        logging.error("Unknown N type", error_type=TypeError)
Ejemplo n.º 16
0
def first_order(c, k, A, alpha, t, broadcast=False):

    if check_scalar(c):
        c = np.array([to_scalar(c)])
    else:
        c = np.array(c)
    if check_scalar(k):
        k = np.array([to_scalar(k)])
    else:
        k = np.array(k)
    if check_scalar(A):
        A = np.array([to_scalar(A)])
    else:
        A = np.array(A)

    if broadcast:
        # dim  param
        #  0     A
        #  1     k
        #  2     c
        y = np.outer(A, (1 - np.exp(-alpha * t * np.outer(k, c))))
        y = y.reshape((len(A), len(k), len(c)))
        y[:, :, c < 0] = 1

        dim_to_squeeze = []
        for dim in (0, 1, 2):
            if y.shape[dim] == 1:
                dim_to_squeeze.append(dim)
    else:
        # dim param
        #  0   k, A
        #  1    c

        if len(k) != len(A):
            logging.error(
                'k and A should have same length when broadcasting is disabled',
                error_type=ValueError)

        y = np.expand_dims(A, -1) * (1 - np.exp(-alpha * t * np.outer(k, c)))
        y[:, c < 0] = 1

        dim_to_squeeze = []
        for dim in (0, 1):
            if y.shape[dim] == 1:
                dim_to_squeeze.append(dim)

    y = np.squeeze(y, axis=tuple(dim_to_squeeze))
    return y
Ejemplo n.º 17
0
Archivo: pool.py Proyecto: ynshen/k-seq
    def __init__(self,
                 count_model,
                 kinetic_model=None,
                 param_table=None,
                 note=None,
                 **params):
        """Initialize a pool model with given kinetic models and count_model
        Args:
            count_model (`ModelBase` or `callable`): model for sequencing counts
            kinetic_model (`ModalBase` or `callable`): model for pool kinetics, no react if not given
            **params:
        """
        def _static_pool(p0):
            """Static pool with no reaction"""
            return p0

        super().__init__()
        if kinetic_model is None:
            self.kinetic_model = _static_pool
        elif isclass(kinetic_model) and issubclass(kinetic_model, ModelBase):
            self.kinetic_model = kinetic_model.func
        elif callable(kinetic_model):
            self.kinetic_model = kinetic_model
        else:
            logging.error('model should be a ModelBase subclass or a callable',
                          error_type=TypeError)

        if isclass(count_model) and issubclass(count_model, ModelBase):
            self.count_model = count_model.func
        elif callable(count_model):
            self.count_model = count_model
        else:
            logging.error('model should be a ModelBase subclass or a callable',
                          error_type=TypeError)

        self.kinetic_params = get_func_params(self.kinetic_model,
                                              required_only=True)
        self.count_params = get_func_params(self.count_model,
                                            required_only=True)
        if param_table is not None:
            params.update(
                {col: param_table[col]
                 for col in param_table.columns})
        self.params = params
        self.note = note
Ejemplo n.º 18
0
    def get_FitResult(self, seq=None):
        """Get FitResults from a JSON file
        """

        from .least_squares import FitResults
        if self._bs_record is None:
            logging.error('No bootstrap or convergence test record found',
                          error_type=TypeError)
        else:
            seq_to_hash = self._bs_record

        if seq is None:
            return seq_to_hash

        if isinstance(seq_to_hash[seq], (list, tuple)):
            # new hierarchical format
            tg_ix, hash_ix = seq_to_hash[seq]
            result = FitResults.from_json(json_path=f'{hash_ix}.json',
                                          tarfile=self.result_path.joinpath(
                                              'seqs', f'{tg_ix}.tar.gz'))
        else:
            # old format
            if self.result_path.joinpath('seqs',
                                         f"{seq_to_hash[seq]}.json").exists():
                logging.info(f"load result from {seq_to_hash[seq]}.json")
                result = FitResults.from_json(
                    self.result_path.joinpath('seqs',
                                              f'{seq_to_hash[seq]}.json'))
            elif self.result_path.joinpath('seqs.tar.gz').exists():
                try:
                    result = FitResults.from_json(
                        json_path=f'seqs/{seq_to_hash[seq]}.json',
                        tarfile=self.result_path.joinpath('seqs.tar.gz'))
                except:
                    result = FitResults.from_json(
                        json_path=f'results/seqs/{seq_to_hash[seq]}.json',
                        tarfile=self.result_path.joinpath('seqs.tar.gz'))

        if result.data.x_data is None and self.data.y_dataframe is not None:
            # add from data attribute
            result.data.x_data = self.data.x_data
            result.data.y_data = self.data.y_dataframe.loc[seq]

        return result
Ejemplo n.º 19
0
    def func(target, norm_factor):
        """Normalize counts in `name` by `norm_factor`"""

        if not isinstance(target, pd.DataFrame):
            logging.error('name needs to be pd.DataFrame')

        def sample_normalize(col):
            return col.astype('float') * norm_factor[col.name]

        sample_list = []
        for sample in target.columns:
            if sample in norm_factor.keys():
                sample_list.append(sample)
            else:
                logging.warning(
                    f'Sample {sample} is not in spike-in norm_factor, skip this sample'
                )

        return target[sample_list].apply(sample_normalize, axis=0)
Ejemplo n.º 20
0
    def __init__(self, group, target=None, axis=1):
        """Initialize a Grouper instance
        Args:
            group (list or dict): list creates a Type 0 Grouper (single group) and dict creates a Type 1 Grouper
                (multiple groups)
            target (pd.DataFrame): optional, target seq_table
            axis (0 or 1): axis to apply the grouper
        """

        if isinstance(group, (list, np.ndarray, pd.Series, str)):
            self.type = 0
            self.group = list(group)
        elif isinstance(group, dict):
            self.type = 1
            self.group = {key: list(members) for key, members in group.items()}
        else:
            logging.error('group should be list-like or dictionary')
        self.target = target
        self.axis = axis
Ejemplo n.º 21
0
def read_table_files(file_path, col_name=None, header=1):
    """Read common seq_table files
    - .xls or .xlsx: first sheet will be read with first row as header
    - .csv: read the csv files with first row as header, separator is ','
    - .tsv: read the tsv files with first row as header, separator is '/t'
    """
    from pathlib import Path
    import pandas as pd

    file_path = Path(file_path)
    if file_path.suffix in ['xls', 'xlsx']:
        df = pd.read_excel(io=file_path, sheet_name=0, header=header)
    elif file_path.suffix in ['csv']:
        df = pd.read_csv(file_path, header=header)
    elif file_path.suffix in ['tsv']:
        df = pd.read_csv(file_path, header=header, sep='/t')
    else:
        logging.error('File type not identified', error_type=TypeError)

    return df[col_name]
Ejemplo n.º 22
0
 def get_table(self, group=None, target=None, axis=None, remove_zero=False):
     """Return a sub-seq_table from target given group"""
     if target is None:
         target = self.target
     if target is None:
         logging.error("Please indicate target seq_table to group")
     if axis is None:
         axis = self.axis
     if self.type == 0:
         # ignore group argument
         return slice_table(table=target,
                            keys=self.group,
                            axis=axis,
                            remove_empty=remove_zero)
     else:
         if group is None:
             logging.error('Please indicate the group')
         return slice_table(table=target,
                            keys=self.group[group],
                            axis=axis,
                            remove_empty=remove_zero)
Ejemplo n.º 23
0
def table_object_to_dataframe(obj, table_name=None):
    """Convert object (`file path`, `SeqData`) to `pd.DataFrame`
    """
    from pathlib import Path, PosixPath
    import pandas as pd
    from ..data.seq_data import SeqData

    if isinstance(obj, (str, Path, PosixPath)):
        if Path(obj).is_file():
            if Path(obj).suffix == '.csv':
                return pd.read_csv(Path(obj), index_col=0)
            elif Path(obj).suffix in ['.pkl', '.pickle']:
                obj = read_pickle(obj)
            else:
                logging.error(f'{obj} is not a valid file',
                              error_type=FileNotFoundError)
        else:
            logging.error(f'{obj} is not a valid file',
                          error_type=FileNotFoundError)
    if isinstance(obj, pd.DataFrame):
        return obj
    elif isinstance(obj, SeqData):
        if table_name is None:
            return obj.table.original
        else:
            return getattr(obj.table, table_name)
    else:
        logging.error('SeqTable should be a `pd.DataFrame` or `SeqData`',
                      error_type=TypeError)
Ejemplo n.º 24
0
    def __init__(self, data, data_unit=None, sample_list=None, seq_list=None, data_note=None, use_sparse=True,
                 seq_metadata=None, sample_metadata=None,
                 grouper=None, x_values=None, x_unit=None, note=None, dataset_metadata=None):

        # initialize metadata
        from datetime import datetime
        self.metadata = AttrScope(created_time=datetime.now(), note=note)
        # add metadata
        if dataset_metadata is not None:
            self.metadata.add(dataset_metadata)
        if sample_metadata is not None:
            self.metadata.samples = AttrScope(sample_metadata)
        if seq_metadata is not None:
            self.metadata.seqs = AttrScope(seq_metadata)
        logging.info('SeqData created')

        # add original seq_table
        self.table = AttrScope(original=SeqTable(data, columns=sample_list, index=seq_list,
                                                 unit=data_unit, note=data_note, use_sparse=use_sparse))
        # add x values
        if x_values is None:
            self.x_values = None
            self.x_unit = None
        elif isinstance(x_values, (dict, pd.Series)):
            self.x_values = pd.Series(x_values)
            self.x_unit = x_unit
        elif isinstance(x_values, (list, np.ndarray)):
            self.x_values = pd.Series(x_values, index=self.table.original.samples)
            self.x_unit = x_unit
        else:
            logging.error('Unknown type for x_values', error_type=TypeError)
        self.x_values = self.x_values[self.table.original.samples]
        self.x_values = self.x_values[~self.x_values.isna()]

        if grouper is not None:
            from .grouper import GrouperCollection
            self.grouper = GrouperCollection()
            self.grouper.add(**grouper)

        self.update_analysis()
Ejemplo n.º 25
0
def get_func_params(func, required_only=True):
    """Get the name of arguments for a function (callable), or the arguments in __init__ for a Class (self not included)

    Args:
        func (`callable`): the function
        required_only (bool): if exclude arguments with default values

    Returns: a list of arguments name in order
    """
    from inspect import signature

    if not callable(func):
        logging.error('func is not a callable', error_type=TypeError)

    sign = signature(func)
    if required_only:
        return [
            key for key, param in sign.parameters.items()
            if param.default is sign.return_annotation
        ]
    else:
        return list(sign.parameters.keys())
Ejemplo n.º 26
0
    def filter_axis(self, filter, axis=0, remove_empty=False, inplace=False):

        allowed_axis = {
            'sample': 1,
            'observation': 1,
            1: 1,
            'seq': 0,
            'sequences': 0,
            'seqs': 0,
            0: 0
        }
        if isinstance(axis, str):
            axis = axis.lower()
        if axis not in allowed_axis.keys():
            logging.error("Unknown axis, please use 'sample'/1 or 'sequence'/1", error_type=ValueError)
        else:
            axis = allowed_axis[axis]

        if inplace:
            sliced = slice_table(self, axis=axis, keys=filter, remove_empty=remove_empty)
            self.reindex(index=sliced.index, columns=sliced.columns, copy=False)
        return slice_table(self, axis=axis, keys=filter, remove_empty=remove_empty)
Ejemplo n.º 27
0
Archivo: simu.py Proyecto: ynshen/k-seq
    def get_est_results(self, param, pred_type='point_est'):
        """Return the estimation (pred) and truth of given parameter"""
        if pred_type in [
                'pe', 'point_est', 'point est', 'point_estimation',
                'point estimation'
        ]:
            pred = self.results[param]
        elif pred_type in ['mean', 'bs_mean', 'bootstrap_mean']:
            pred = self.results[self._bs_prefix + param + '_mean']
        elif pred_type in ['median', 'bs_median', 'bootstrap_median']:
            pred = self.results[self._bs_prefix + param + '_50%']
        elif pred_type in ['rep_mean', 'replicate_mean']:
            pred = self.results[self._bs_prefix + param + '_mean']
        else:
            logging.error(
                "Unknown pred_type, choose from 'point_est', 'bs_mean', 'bs_median', 'rep_mean'",
                ValueError)

        truth = self.truth[param]
        return pd.DataFrame({
            'pred': pred[self.seq_list],
            'truth': truth[self.seq_list]
        })
Ejemplo n.º 28
0
    def spike_in_amount(self, spike_in_amount):
        """Check and reformat spike_in_amount type, and update norm_factors"""

        if isinstance(spike_in_amount, (list, np.ndarray)):
            # if unkey array, must be same length as base_table's columns
            if len(self.base_table.columns) != len(spike_in_amount):
                logging.error(
                    'Length of spike_in_amount does not match sample number',
                    ValueError)
            else:
                self._spike_in_amount = pd.Series(
                    data=spike_in_amount, index=self.base_table.columns)
        elif isinstance(spike_in_amount, dict):
            self._spike_in_amount = pd.Series(spike_in_amount)
        elif isinstance(spike_in_amount, pd.Series):
            self._spike_in_amount = spike_in_amount
        else:
            logging.error(
                'Unknown spike_in_amount type, it should be list-like, pd.Series, or dict'
            )

        if hasattr(self, 'spike_in_members'):
            self._update_norm_factors()
Ejemplo n.º 29
0
    def filter(cls,
               target,
               axis=None,
               remove_empty=False,
               reverse=False,
               **kwargs):
        """Classmethod to directly apply filters"""

        mask = cls._get_mask(target=target, axis=axis, **kwargs)
        if reverse:
            mask = ~mask
        if axis is None:
            logging.error('Please indicate axis to filter',
                          error_type=ValueError)
        if axis == 0:
            return slice_table(table=target,
                               keys=target.index[mask],
                               axis=0,
                               remove_empty=remove_empty)
        else:
            return slice_table(table=target,
                               keys=target.columns[mask],
                               axis=1,
                               remove_empty=remove_empty)
Ejemplo n.º 30
0
Archivo: simu.py Proyecto: ynshen/k-seq
        def generate_params(param_input):
            """Parse single distribution input and reformat as generated results
            """

            from types import GeneratorType

            if isinstance(param_input, (list, np.ndarray, pd.Series)):
                if len(param_input) == uniq_seq_num:
                    return param_input
                else:
                    logging.info(
                        'Size fo input param list and expected uniq_seq_num does not match, '
                        'resample to given uniq_seq_num with replacement')
                    return np.random.choice(param_input,
                                            replace=True,
                                            size=uniq_seq_num)
            elif isinstance(param_input, GeneratorType):
                # assume only generate one realization
                return [next(param_input) for _ in range(uniq_seq_num)]
            elif callable(param_input):
                try:
                    # if there is a uniq_seq_num parameter to pass
                    param_output = param_input(size=uniq_seq_num)
                    if isinstance(param_output, (list, np.ndarray, pd.Series)):
                        return param_output
                    elif isinstance(param_output, GeneratorType):
                        return next(param_output)
                    else:
                        logging.error(
                            "Unknown input to draw a distribution value",
                            error_type=TypeError)
                except TypeError:
                    # if can not pass uniq_seq_num, assume generate single samples
                    param_output = param_input()
                    if isinstance(param_output, GeneratorType):
                        return [
                            next(param_output) for _ in range(uniq_seq_num)
                        ]
                    elif isinstance(param_output, (float, int)):
                        return [param_input() for _ in range(uniq_seq_num)]
                    else:
                        logging.error(
                            "Unknown callable return type for distribution",
                            error_type=TypeError)
            else:
                logging.error("Unknown input to draw a distribution value",
                              error_type=TypeError)