Ejemplo n.º 1
0
    def mc(self):
        r_obj = r['mc']
        colnames = r.colnames(r_obj)
        obj = array(r_obj).T
        print(colnames)

        return Struct(dict(zip(colnames, obj)))
Ejemplo n.º 2
0
    def model(self):
        def _fix_nones_list(column_list):
            return [
                None if isinstance(x, NARealType) else x for x in column_list
            ]

        r_obj = r['model']
        colnames = r.colnames(r_obj)
        table = [_fix_nones_list(x) for x in r_obj]

        return Struct(dict(zip(colnames, table)))
Ejemplo n.º 3
0
    def parstat(self):
        obj = array(r['ParStat']).T
        colnames = r.colnames(obj)
        properties = [
            "xopt", "x1per", "x99per", "x10per", "x90per", "xminus", "xplus",
            "mode", "mean", "sd", "skewness", "kurtosis"
        ]

        parameter_values = [
            dict(zip(properties, obj[i])) for i in range(len(colnames))
        ]
        return Struct(dict(zip(colnames, parameter_values)))
Ejemplo n.º 4
0
    def run(self, data_object):
        """Read canned dataset from R to a pandas dataframe

        Returns:
            data_object (DataObject): DataObject instance
            terminate (bool): should we terminate the DAG? true or false

        """
        dataset = self.node_config["dataset"]
        logging.info("Reading {} from R".format(dataset))

        try:
            from rpy2.robjects.packages import importr, data
        except ImportError:  # pragma: no cover
            raise ImportError(
                "This example needs Rpy2."
                "Please refer to the R requirements in the README"
            )
        datasets = importr("datasets")
        r_env = data(datasets).fetch(dataset)

        import rpy2.robjects as robjects

        # why we do this:
        # > data(euro)
        # > euro
        # ATS         BEF         DEM         ESP         FIM         FRF         IEP         ITL         LUF         NLG         PTE
        # 13.760300   40.339900    1.955830  166.386000    5.945730    6.559570    0.787564 1936.270000   40.339900    2.203710  200.482000
        #
        # > as.data.frame(euro)
        #        euro
        # ATS   13.760300
        # BEF   40.339900
        # DEM    1.955830
        data = robjects.r("as.data.frame(%s)" % dataset)

        # at time of writing, rpy2's R dataframe to pandas dataframe was not fully supported
        # However, as python list() seems to work for FloatVector, StrVector, and FactorVector, let's use it
        from rpy2.robjects import r

        colnames = r.colnames(data)
        pandas_data = {}
        # convert each column of the R dataframe in turn
        for i, colname in enumerate(colnames):
            pandas_data[colname] = list(data[i])
        # Unfortunately, some datasets have rownames that should be an ID column (e.g., see mtcars where rownames=names of the cars).
        # This is the best we can do: pull it out as an additional column for each and every dataset
        pandas_data["row_names"] = list(data.rownames)

        df = pd.DataFrame(pandas_data)
        data_object.add(self, df)
        terminate = df.empty
        return data_object, terminate
Ejemplo n.º 5
0
def _parse_assayData(assayData, assay):
    """Parse Rpy2 assayData (Environment object)

    assayData: Rpy2 Environment object.
    assay: An assay name indicating the data to be loaded.

    Return a parsed expression dataframe (Pandas).
    """
    pandas2ri.activate()
    mat = assayData[assay]  # rpy2 expression matrix object
    data = pandas2ri.ri2py(mat)
    features = pandas2ri.ri2py(r.rownames(mat))
    samples = pandas2ri.ri2py(r.colnames(mat))
    return pd.DataFrame(data, index=features, columns=samples)
Ejemplo n.º 6
0
    def parstat(self):
        r_obj = r['ParStat']
        obj = array(r_obj).T
        colnames = r.colnames(r_obj)

        properties = [
            "xopt", "x1per", "x99per", "x10per", "x90per", "xminus", "xplus",
            "mode", "mean", "sd", "skewness", "kurtosis"
        ]
        values = [dict(zip(properties, obj[i])) for i in range(len(colnames))]

        matrix = [
            [""] + properties,
        ]
        for i, param in enumerate(colnames):
            temp = [
                param,
            ]
            for prop in properties:
                temp.append(values[i][prop])
            matrix.append(temp)

        return matrix
Ejemplo n.º 7
0
# Extract required arguments.
data = pd.read_table(snakemake.input.data, index_col=0)  # Input Gene-by-Sample raw count data.
condition = pd.read_table(snakemake.input.condition, index_col=0, names=['condition'])  # Input condition file which indicates to which condition each sample belongs.
logger.info('%d(genes) x %d(samples) data matrix and %d sample conditions are given.' % (data.shape[0], data.shape[1], len(condition.index)))
logger.debug('Headers: %s...' % ' '.join(data.columns[:3]))
logger.debug('Gene identifiers: %s...' % ' '.join(data.index[:3]))

intersecting_samples = [sample for sample in data.columns if sample in condition.index]
data = data[intersecting_samples]

condition = list(condition.loc[intersecting_samples].condition.values)
logger.info('%d samples will be used for DEG discovery.' % len(intersecting_samples))


r_data_matrix = r['data.matrix'](pandas2ri.py2ri(data))
r_samples = r.colnames(r_data_matrix)
r_conditions = ro.FactorVector(condition)

logger.debug('Computing size factors.')
r_size_factors = ebseq.MedianNorm(r_data_matrix)

logger.info('Discovering DEGs.')
logger.info('Running EBTest.')

num_iteration = 0
while True:
    # Increase iteration numbers if the conditons are not met.
    # Hopefully most of the tie, 10 iterations will be enough for convergence.
    num_iteration += 10
    r_eb_out = ebseq.EBTest(Data=r_data_matrix, Conditions=r_conditions, sizeFactors=r_size_factors, maxround=num_iteration)
    logger.info('Running GetDEResults. (FDR cutoff = %.3f)' % cutoff)