Beispiel #1
0
    def __call__(self, content):
        from subprocess import Popen, PIPE
        from sys import stdout

        if not isinstance(content, (str, bytes)):
            raise TypeError('Unknown input type for paging.')

        try:
            cmd = ['less', '-F', '-R', '-S', '-X', '-K']
            proc = Popen(cmd, stdin=PIPE, stdout=stdout)

            if isinstance(content, str):
                proc._stdin_write(content.encode())
            else:
                proc._stdin_write(content)

            proc.stdin.close()
            proc.wait()
        except KeyboardInterrupt:
            pass
Beispiel #2
0
def gsva(
    expression: Union[ExpressionWithControls, Profile], gene_sets_path: str, method: str = 'gsva',
    single_sample=False, permutations=1000, mx_diff=True, cores=1, _cache=True, limit_to_gene_sets=False,
    verbose=False
):
    """
    Excerpt from GSVA documentation:
        An important argument of the gsva() function is the flag mx.diff which is set to TRUE by default.

        Under this default setting, GSVA enrichment scores are calculated using Equation 5, and therefore, are
        more amenable by analysis techniques that assume the data to be normally distributed.  When setting
        mx.diff=FALSE , then Equation 4 is employed, calculating enrichment in an analogous way to classical
        GSEA which typically provides a bimodal distribution of GSVA enrichment scores for each gene.
    """

    if not single_sample and permutations:
        raise warn('permutations are not supported when not single_sample')

    key = (expression.hashable, method, gene_sets_path)

    if key in GSVA_CACHE:
        return GSVA_CACHE[key]

    if single_sample:
        assert isinstance(expression, Profile)
        joined = DataFrame(
            concat([expression.top.up, expression.top.down]),
        )
        joined.columns = ['condition']
        joined['control'] = 0
        joined.index = joined.index.astype(str)

        expression_classes = Series(['case', 'control'])
        expression = joined

    else:
        joined = expression.joined

        joined = DataFrame(joined)
        joined.index = joined.index.astype(str)

        nulls = joined.isnull().any(axis=0).reset_index(drop=True)
        if nulls.any():
            print(f'Following columns contain nulls and will be skipped: {list(joined.columns[nulls])}')
        joined = joined[joined.columns[~nulls]]

        expression_classes = expression.classes.loc[~nulls.reset_index(drop=True)]
        expression = joined

    mx_diff = 'T' if mx_diff else 'F'
    procedure = 'gene_permutation' if single_sample else 'bayes'
    cwd = Path(__file__).parent

    with NamedTemporaryFile(mode='w', prefix=gsva_tmp_dir) as f_expression, NamedTemporaryFile(prefix=gsva_tmp_dir) as f_result:
        expression.to_csv(f_expression)
        script = f"""
        source("{cwd}/gsva.R")
        expression = read.csv('{f_expression.name}', row.names=1)
        expression_classes = c{tuple(expression_classes)}
        gene_sets = readRDS('{gene_sets_path}')

        result = gsva.with_probabilities(
            expression, expression_classes, gene_sets, '{procedure}',
            method = '{method}', mx.diff={mx_diff}, include_control=F, cores={cores},
            limit_to_gene_sets={'c' + str(tuple(limit_to_gene_sets)) if limit_to_gene_sets is not False else 'F'}, progress=F
            {', permutations = ' + str(permutations) if procedure == 'permutations' else ''}
        )
        write.csv(result, '{f_result.name}')
        """

        process = Popen(vanilla_R, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        r = process._stdin_write(script.encode())
        if verbose:
            from helpers.streams import handle_streams
            from signature_scoring.evaluation import display

            handlers = {'out': display, 'err': warn}
            handle_streams(process, handlers)
        else:
            process.wait()
        try:
            result = read_csv(f_result.name, index_col=0)
        except EmptyDataError:
            result = DataFrame()

    if _cache:
        GSVA_CACHE[key] = result
    return result