Esempio n. 1
0
def mergesort(filename, output=None, key=None, maxitems=1e6, progress=True):
    """Given an input file sort it by performing a merge sort on disk.

    :param filename: Either a filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type filename:  ``str`` or ``py._path.local.LocalPath``

    :param output: An optional output filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type output:  ``str`` or ``py._path.local.LocalPath`` or ``None``

    :param key: An optional key to sort the data on.
    :type key:  ``function`` or ``None``

    :param maxitems: Maximum number of items to hold in memory at a time.
    :type maxitems:  ``int``

    :param progress: Whether or not to display a progress bar
    :type progress: ``bool``

    This uses ``py._path.local.LocalPath.make_numbered_dir`` to create temporry scratch space to work
    with when splitting the input file into sorted chunks. The mergesort is processed iteratively in-memory
    using the ``~merge`` function which is almost identical to ``~heapq.merge`` but adds in the support of
    an optional key function.
    """

    p = filename if isinstance(filename, LocalPath) else LocalPath(filename)
    output = p if output is None else output
    key = key if key is not None else lambda x: x

    scratch = LocalPath.make_numbered_dir(prefix="mergesort-")

    nlines = sum(1 for line in p.open("r"))

    # Compute a reasonable chunksize < maxitems
    chunksize = first(ifilter(lambda x: x < maxitems, imap(lambda x: nlines / (2**x), count(1))))

    # Split the file up into n sorted files
    if progress:
        bar = ProgressBar("Split/Sorting Data", max=(nlines / chunksize))
    for i, items in enumerate(ichunks(chunksize, jsonstream(p))):
        with scratch.ensure("{0:d}.json".format(i)).open("w") as f:
            f.write("\n".join(map(dumps, sorted(items, key=key))))
        if progress:
            bar.next()
    if progress:
        bar.finish()

    q = scratch.listdir("*.json")
    with output.open("w") as f:
        if progress:
            bar = ProgressBar("Merge/Sorting Data", max=nlines)
        for item in merge(*imap(jsonstream, q)):
            f.write("{0:s}\n".format(dumps(item)))
            if progress:
                bar.next()
        if progress:
            bar.finish()
Esempio n. 2
0
    def kick(self, sock, source, name, nick, reason=None):
        user = models.User.objects.filter(sock=sock).first()

        channel = models.Channel.objects.filter(name=name).first()
        if channel is None:
            return ERR_NOSUCHCHANNEL(name)

        if not user.oper and user not in channel.operators:
            return ERR_CHANOPRIVSNEEDED(channel.name)

        if nick not in imap(attrgetter("nick"), channel.users):
            return ERR_USERNOTINCHANNEL(nick, channel.name)

        nick = models.User.objects.filter(nick=nick).first()

        self.notify(
            channel.users[:],
            Message(u"KICK",
                    channel.name,
                    nick.nick,
                    reason or nick.nick,
                    prefix=user.prefix))

        nick.channels.remove(channel)
        nick.save()

        channel.users.remove(nick)
        if user in channel.operators:
            channel.operators.remove(user)
        if user in channel.voiced:
            channel.voiced.remove(user)
        channel.save()

        if not channel.users:
            channel.delete()
Esempio n. 3
0
    def kick(self, sock, source, name, nick, reason=None):
        user = models.User.objects.filter(sock=sock).first()

        channel = models.Channel.objects.filter(name=name).first()
        if channel is None:
            return ERR_NOSUCHCHANNEL(name)

        if not user.oper and user not in channel.operators:
            return ERR_CHANOPRIVSNEEDED(channel.name)

        if nick not in imap(attrgetter("nick"), channel.users):
            return ERR_USERNOTINCHANNEL(nick, channel.name)

        nick = models.User.objects.filter(nick=nick).first()

        self.notify(
            channel.users[:],
            KICK(channel.name, nick.nick, reason or nick.nick, prefix=user.prefix)
        )

        nick.channels.remove(channel)
        nick.save()

        channel.users.remove(nick)
        if user in channel.operators:
            channel.operators.remove(user)
        if user in channel.voiced:
            channel.voiced.remove(user)
        channel.save()

        if not channel.users:
            channel.delete()
Esempio n. 4
0
    def environment(self, vars_):
        """Set up environment variables to trigger analysis dumps from clang.

        We'll store all the havested metadata in the plugins temporary folder.

        """
        tree = self.tree
        plugin_folder = os.path.dirname(__file__)
        flags = [
            '-load',
            os.path.join(plugin_folder,
                         'libclang-index-plugin.so'), '-add-plugin',
            'dxr-index', '-plugin-arg-dxr-index', tree.source_folder
        ]
        flags_str = " ".join(imap('-Xclang {}'.format, flags))

        env = {
            'CC': "clang %s" % flags_str,
            'CXX': "clang++ %s" % flags_str,
            'DXR_CLANG_FLAGS': flags_str,
            'DXR_CXX_CLANG_OBJECT_FOLDER': tree.object_folder,
            'DXR_CXX_CLANG_TEMP_FOLDER': self._temp_folder,
        }
        env['DXR_CC'] = env['CC']
        env['DXR_CXX'] = env['CXX']
        return merge(vars_, env)
Esempio n. 5
0
    def environment(self, vars_):
        """Set up environment variables to trigger analysis dumps from clang.

        We'll store all the havested metadata in the plugins temporary folder.

        """
        tree = self.tree
        plugin_folder = os.path.dirname(__file__)
        flags = [
            '-load', os.path.join(plugin_folder, 'libclang-index-plugin.so'),
            '-add-plugin', 'dxr-index',
            '-plugin-arg-dxr-index', tree.source_folder
        ]
        flags_str = " ".join(imap('-Xclang {}'.format, flags))

        env = {
            'CC': "clang %s" % flags_str,
            'CXX': "clang++ %s" % flags_str,
            'DXR_CLANG_FLAGS': flags_str,
            'DXR_CXX_CLANG_OBJECT_FOLDER': tree.object_folder,
            'DXR_CXX_CLANG_TEMP_FOLDER': self._temp_folder,
        }
        env['DXR_CC'] = env['CC']
        env['DXR_CXX'] = env['CXX']
        return merge(vars_, env)
Esempio n. 6
0
def load_config(config):
    if "basicauth" not in config:
        raise ConfigError("Basic Auth not configured!")

    for param in ("passwd",):
        if param not in config["basicauth"]:
            raise ConfigError("Basic Auth not configured! Missing: {0}".format(repr(param)))

    config = config["basicauth"]

    realm = config.get("realm", "kdb")
    hasher = config.get("hasher", "sha")

    if hasher not in HASHERS:
        raise ConfigError("Unsupported hasher: {0}".format(repr(hasher)))

    with open(config["passwd"], "r") as f:
        users = dict(imap(rpartial(str.split, ":"), imap(str.strip, f)))

    return users, realm, hasher
Esempio n. 7
0
 def annotations_by_line(self):
     icon = "background-image: url('{0}/static/icons/warning.png');".format(
         self.tree.config.www_root)  # TODO: DRY
     getter = itemgetter('msg', 'opt', 'span')
     for msg, opt, span in imap(getter, self.condensed.get('warnings', [])):
         if opt:
             msg = "{0}[{1}]".format(msg, opt)
         annotation = {
             'title': msg,
             'class': "note note-warning",
             'style': icon
         }
         yield annotation, span
Esempio n. 8
0
 def annotations_by_line(self):
     icon = "background-image: url('{0}/static/icons/warning.png');".format(
         self.tree.config.www_root)  # TODO: DRY
     getter = itemgetter('msg', 'opt', 'span')
     for msg, opt, span in imap(getter, self.condensed.get('warnings', [])):
         if opt:
             msg = "{0}[{1}]".format(msg, opt)
         annotation = {
             'title': msg,
             'class': "note note-warning",
             'style': icon
         }
         yield annotation, span
Esempio n. 9
0
def jsonstream(filename, encoding="utf-8"):
    """Stream every line in the given file interpreting each line as JSON.

    :param filename: A ``str`` filename, A ``py._path.local.LocalPath`` instance or open ``file`` instnace.
    :type filename:  ``str``, ``py._path.local.LocalPath`` or ``file``.

    :param encoding: A ``str`` indicating the charset/encoding to use.
    :type encoding:  ``str``

    :param stripchars: An iterable of characters to strip from the surrounding line. ``line.strip(...)`` is used.
    :type stripchars: ``list``, ``tuple`` or ``str``

    This is a wrappedaround ``stream`` except that it wraps each line in a ``dumps`` call essentially treating
    each line as a piece of valid JSON.
    """

    return imap(loads, stream(filename, encoding=encoding))
Esempio n. 10
0
 def replace(self, task_or_uuid, new_task):
     src_task = self.task(task_or_uuid)
     tasks = imap(lambda t: self.task(new_task) if t == src_task else t, self)
     self._atomic_write(tasks)
Esempio n. 11
0
def perform_analysis(analysis, debug=False):
    logger.info('Started %s analysis', analysis.analysis_name)
    with log_durations(logger.debug,
                       'Loading dataframe for %s' % analysis.analysis_name):
        df = get_analysis_df(analysis.case_query, analysis.control_query,
                             analysis.modifier_query)
    debug and df.to_csv("%s.analysis_df.csv" % analysis.analysis_name)

    logger.info('Matching sources: %d' %
                df.groupby(['series_id', 'platform_id']).ngroups)

    # Remove single-class sources
    query = df.groupby(['series_id', 'platform_id'
                        ]).sample_class.agg(lambda x: set(x)) >= {0, 1}
    df = filter_sources(df, query, 'as single-class')

    # Check for minimum number of samples
    if analysis.min_samples:
        counts = df.groupby(['series_id', 'platform_id'
                             ]).sample_class.value_counts().unstack()
        query = (counts[0] >= analysis.min_samples) & (counts[1] >=
                                                       analysis.min_samples)
        df = filter_sources(df, query, 'by min samples')

    # Check number of sources
    sources = df.groupby(['series_id', 'platform_id']).ngroups
    if sources <= 1:
        logger.error("FAIL Can't perform meta-analysis on %s" %
                     ('single source' if sources else 'no data'))
        return

    # Calculating stats
    analysis.series_count = len(df.series_id.unique())
    analysis.platform_count = len(df.platform_id.unique())
    analysis.sample_count = len(df.sample_id.unique())
    analysis.series_ids = df.series_id.unique().tolist()
    analysis.platform_ids = df.platform_id.unique().tolist()
    analysis.sample_ids = df.sample_id.unique().tolist()
    # analysis.save(update_fields=['series_count', 'platform_count', 'sample_count',
    #                              'series_ids', 'platform_ids', 'sample_ids'])
    logger.info('Stats: %d sources, %d series, %d platforms, %d samples' %
                (sources, analysis.series_count, analysis.platform_count,
                 analysis.sample_count))

    # Load GSE data, make and concat all fold change analyses results.
    # NOTE: we are doing load_gse() lazily here to avoid loading all matrices at once.
    logger.info('Loading data and calculating fold changes for %s',
                analysis.analysis_name)
    with log_durations(logger.debug,
                       'Load/fold for %s' % analysis.analysis_name):
        gses = (load_gse(df, series_id)
                for series_id in sorted(df.series_id.unique()))
        fold_changes = pd.concat(imap(get_fold_change_analysis, gses))
        debug and fold_changes.to_csv("%s.fc.csv" % debug)

    logger.info('Meta-Analyzing %s', analysis.analysis_name)
    with log_durations(logger.debug,
                       'Meta analysis for %s' % analysis.analysis_name):
        balanced = getFullMetaAnalysis(fold_changes, debug=debug).reset_index()
        debug and balanced.to_csv("%s.meta.csv" % debug)

    # logger.info('Inserting %s analysis results', analysis.analysis_name)
    # with log_durations(logger.debug, 'Saving results of %s' % analysis.analysis_name):#, \
    #         # transaction.atomic():
    #     balanced['analysis'] = analysis
    #     balanced.columns = balanced.columns.map(lambda x: x.replace(".", "_").lower())
    # field_names = [f.name for f in MetaAnalysis._meta.fields if f.name != 'id']
    # rows = balanced[field_names].T.to_dict().values()
    # Delete old values in case we recalculating analysis
    # MetaAnalysis.objects.filter(analysis=analysis).delete()
    # MetaAnalysis.objects.bulk_create(MetaAnalysis(**row) for row in rows)

    logger.info('DONE %s analysis', analysis.analysis_name)
    return balanced
Esempio n. 12
0
def process_function(props):
    # Compute FuncSig based on args:
    input_args = tuple(
        ifilter(bool, imap(str.lstrip, props['args'][1:-1].split(","))))
    props['type'] = c_type_sig(input_args, props['type'])
    return props
Esempio n. 13
0
def perform_analysis(conn, analysis, debug=False):
    cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

    logger.info('Started %s analysis', analysis.analysis_name)
    with log_durations(logger.debug, 'Loading dataframe for %s' % analysis.analysis_name):
        df = get_analysis_df(conn, analysis.case_query, analysis.control_query, analysis.modifier_query)
    debug and df.to_csv("%s.analysis_df.csv" % analysis.analysis_name)

    logger.info('Matching sources: %d' % df.groupby(['series_id', 'platform_id']).ngroups)

    # Remove single-class sources
    query = df.groupby(['series_id', 'platform_id']).sample_class.agg(lambda x: set(x)) >= {0, 1}
    df = filter_sources(df, query, 'as single-class')

    # Check for minimum number of samples
    if analysis.min_samples:
        counts = df.groupby(['series_id', 'platform_id']).sample_class.value_counts().unstack()
        query = (counts[0] >= analysis.min_samples) & (counts[1] >= analysis.min_samples)
        df = filter_sources(df, query, 'by min samples')

    # Check number of sources
    sources = df.groupby(['series_id', 'platform_id']).ngroups
    if sources <= 1:
        logger.error("FAIL Can't perform meta-analysis on %s"
                     % ('single source' if sources else 'no data'))
        return

    # Calculating stats
    analysis.series_count = len(df.series_id.unique())
    analysis.platform_count = len(df.platform_id.unique())
    analysis.sample_count = len(df.sample_id.unique())
    analysis.series_ids = df.series_id.unique().tolist()
    analysis.platform_ids = df.platform_id.unique().tolist()
    analysis.sample_ids = df.sample_id.unique().tolist()
    # analysis.save(update_fields=['series_count', 'platform_count', 'sample_count',
    #                              'series_ids', 'platform_ids', 'sample_ids'])
    logger.info('Stats: %d sources, %d series, %d platforms, %d samples'
                % (sources, analysis.series_count, analysis.platform_count, analysis.sample_count))

    # Load GSE data, make and concat all fold change analyses results.
    # NOTE: we are doing load_gse() lazily here to avoid loading all matrices at once.
    logger.info('Loading data and calculating fold changes for %s', analysis.analysis_name)
    with log_durations(logger.debug, 'Load/fold for %s' % analysis.analysis_name):
        gses = (load_gse(cursor, df, series_id) for series_id in sorted(df.series_id.unique()))
        fold_changes = pd.concat(imap(get_fold_change_analysis, gses))
        debug and fold_changes.to_csv("%s.fc.csv" % debug)

    logger.info('Meta-Analyzing %s', analysis.analysis_name)
    with log_durations(logger.debug, 'Meta analysis for %s' % analysis.analysis_name):
        balanced = getFullMetaAnalysis(fold_changes, debug=debug).reset_index()
        debug and balanced.to_csv("%s.meta.csv" % debug)

    # logger.info('Inserting %s analysis results', analysis.analysis_name)
    # with log_durations(logger.debug, 'Saving results of %s' % analysis.analysis_name):#, \
    #         # transaction.atomic():
    #     balanced['analysis'] = analysis
    #     balanced.columns = balanced.columns.map(lambda x: x.replace(".", "_").lower())
        # field_names = [f.name for f in MetaAnalysis._meta.fields if f.name != 'id']
        # rows = balanced[field_names].T.to_dict().values()
        # Delete old values in case we recalculating analysis
        # MetaAnalysis.objects.filter(analysis=analysis).delete()
        # MetaAnalysis.objects.bulk_create(MetaAnalysis(**row) for row in rows)

    logger.info('DONE %s analysis', analysis.analysis_name)
    return balanced
Esempio n. 14
0
def perform_analysis(analysis,
                     debug=False,
                     impute=False,
                     nperm=0,
                     mygene_filter=None):
    """
    Returns a tuple of sample_df, fold_change, balanced_permutations, permutations
    """
    logger.info('Started %s analysis', analysis.analysis_name)
    # from multiprocessing import Pool
    # pool = Pool(processes=4)

    with log_durations(logger.debug,
                       'Loading dataframe for %s' % analysis.analysis_name):
        df = get_analysis_df(analysis.case_query, analysis.control_query,
                             analysis.modifier_query)
    debug and df.to_csv("%s.analysis_df.csv" % analysis.analysis_name)

    logger.info('Matching sources: %d' %
                df.groupby(['series_id', 'platform_id']).ngroups)

    # Remove single-class sources
    query = df.groupby([
        'series_id', 'platform_id'
    ]).sample_class.agg(lambda x: set(x)).map(lambda x: x >= {0, 1})
    df = filter_sources(df, query, 'as single-class')

    # Check for minimum number of samples
    if analysis.min_samples:
        counts = df.groupby(['series_id', 'platform_id'
                             ]).sample_class.value_counts().unstack()
        query = (counts[0] >= analysis.min_samples) & (counts[1] >=
                                                       analysis.min_samples)
        df = filter_sources(df, query, 'by min samples')

    # Check number of sources
    sources = df.groupby(['series_id', 'platform_id']).ngroups
    if sources <= 1:
        logger.error("FAIL Can't perform meta-analysis on %s" %
                     ('single source' if sources else 'no data'))
        return df, None, None, None

    # Calculating stats
    analysis.series_count = len(df.series_id.unique())
    analysis.platform_count = len(df.platform_id.unique())
    analysis.sample_count = len(df.sample_id.unique())
    analysis.series_ids = df.series_id.unique().tolist()
    analysis.platform_ids = df.platform_id.unique().tolist()
    analysis.sample_ids = df.sample_id.unique().tolist()
    # analysis.save(update_fields=['series_count', 'platform_count', 'sample_count',
    #                              'series_ids', 'platform_ids', 'sample_ids'])
    logger.info('Stats: %d sources, %d series, %d platforms, %d samples' %
                (sources, analysis.series_count, analysis.platform_count,
                 analysis.sample_count))

    # Load GSE data, make and concat all fold change analyses results.
    # NOTE: we are doing load_gse() lazily here to avoid loading all matrices at once.
    logger.info('Loading data and calculating fold change for %s',
                analysis.analysis_name)
    with log_durations(logger.debug,
                       'Load/fold for %s' % analysis.analysis_name):
        gses = (load_gse(df, series_id, impute)
                for series_id in sorted(df.series_id.unique()))
        debugs = [debug] * df.series_id.nunique()
        nperms = [nperm] * df.series_id.nunique()
        mygene_filters = [mygene_filter] * df.series_id.nunique()

        # start a pool with 4 processes
        fold_change = pd.concat(
            imap(get_gene_fold_change, gses, debugs, nperms, mygene_filters))
        # fold_change = pd.concat(pool.imap(multi_run_wrapper, zip(gses, debugs, nperms)))
        debug and fold_change.to_csv("%s.fc.csv" % debug)

    #Start metaanalysis
    logger.info('Meta-Analyzing %s', analysis.analysis_name)
    with log_durations(logger.debug,
                       'Meta analysis for %s' % analysis.analysis_name):
        # logger.info('Meta analysis of real data for %s' % analysis.analysis_name)
        with log_durations(
                logger.debug,
                'meta analysis of real data for %s' % analysis.analysis_name):
            balanced = get_full_meta(fold_change.query("""perm == 0"""),
                                     debug=debug)
            debug and balanced.to_csv("%s.meta.csv" % debug)
        # logger.info('Meta-Analyzing of permutations for %s', analysis.analysis_name)
        with log_durations(
                logger.debug, 'meta analysis of permutations for %s' %
                analysis.analysis_name):
            permutations = pd.DataFrame()
            fold_change = fold_change.reset_index().sort('perm').set_index(
                'perm')
            for i in range(nperm):
                perm = i + 1
                # logger.info('Meta analysis of permutation %s for %s' % (perm, analysis.analysis_name))
                with log_durations(
                        logger.debug,
                        'meta analysis of permutation %s / %s for %s' %
                    (perm, nperm, analysis.analysis_name)):
                    # balanced_perm = get_full_meta(fold_change.query("""perm == %s"""%perm), debug=debug)
                    balanced_perm = get_full_meta(fold_change.ix[perm],
                                                  debug=debug)
                    permutation = balanced_perm[['random_TE', 'fixed_TE']]
                    permutation['perm'] = perm
                    permutations = pd.concat([permutations, permutation])
        balanced_permutations = get_balanced_permutations(
            balanced, permutations)

    logger.info('DONE %s analysis', analysis.analysis_name)
    return df, fold_change, balanced_permutations, permutations
Esempio n. 15
0
def perform_analysis(analysis, debug=False, impute=False, nperm=0, mygene_filter=None):
    """
    Returns a tuple of sample_df, fold_change, balanced_permutations, permutations
    """
    logger.info("Started %s analysis", analysis.analysis_name)
    # from multiprocessing import Pool
    # pool = Pool(processes=4)

    with log_durations(logger.debug, "Loading dataframe for %s" % analysis.analysis_name):
        df = get_analysis_df(analysis.case_query, analysis.control_query, analysis.modifier_query)
    debug and df.to_csv("%s.analysis_df.csv" % analysis.analysis_name)

    logger.info("Matching sources: %d" % df.groupby(["series_id", "platform_id"]).ngroups)

    # Remove single-class sources
    query = df.groupby(["series_id", "platform_id"]).sample_class.agg(lambda x: set(x)).map(lambda x: x >= {0, 1})
    df = filter_sources(df, query, "as single-class")

    # Check for minimum number of samples
    if not df.empty and analysis.min_samples:
        counts = df.groupby(["series_id", "platform_id"]).sample_class.value_counts().unstack()
        query = (counts[0] >= analysis.min_samples) & (counts[1] >= analysis.min_samples)
        df = filter_sources(df, query, "by min samples")

    # Check number of sources
    sources = df.groupby(["series_id", "platform_id"]).ngroups
    if sources <= 1:
        logger.error("FAIL Can't perform meta-analysis on %s" % ("single source" if sources else "no data"))
        return df, None, None, None

    # Calculating stats
    analysis.series_count = len(df.series_id.unique())
    analysis.platform_count = len(df.platform_id.unique())
    analysis.sample_count = len(df.sample_id.unique())
    analysis.series_ids = df.series_id.unique().tolist()
    analysis.platform_ids = df.platform_id.unique().tolist()
    analysis.sample_ids = df.sample_id.unique().tolist()
    # analysis.save(update_fields=['series_count', 'platform_count', 'sample_count',
    #                              'series_ids', 'platform_ids', 'sample_ids'])
    logger.info(
        "Stats: %d sources, %d series, %d platforms, %d samples"
        % (sources, analysis.series_count, analysis.platform_count, analysis.sample_count)
    )

    # Load GSE data, make and concat all fold change analyses results.
    # NOTE: we are doing load_gse() lazily here to avoid loading all matrices at once.
    logger.info("Loading data and calculating fold change for %s", analysis.analysis_name)
    with log_durations(logger.debug, "Load/fold for %s" % analysis.analysis_name):
        gses = (load_gse(df, series_id, impute) for series_id in sorted(df.series_id.unique()))
        debugs = [debug] * df.series_id.nunique()
        nperms = [nperm] * df.series_id.nunique()
        mygene_filters = [mygene_filter] * df.series_id.nunique()

        # start a pool with 4 processes
        fold_change = pd.concat(imap(get_gene_fold_change, gses, debugs, nperms, mygene_filters))
        # fold_change = pd.concat(pool.imap(multi_run_wrapper, zip(gses, debugs, nperms)))
        debug and fold_change.to_csv("%s.fc.csv" % debug)

    # Start metaanalysis
    logger.info("Meta-Analyzing %s", analysis.analysis_name)
    with log_durations(logger.debug, "Meta analysis for %s" % analysis.analysis_name):
        # logger.info('Meta analysis of real data for %s' % analysis.analysis_name)
        with log_durations(logger.debug, "meta analysis of real data for %s" % analysis.analysis_name):
            balanced = get_full_meta(fold_change.query("""perm == 0"""), debug=debug)
            if balanced is None:
                logger.error("FAIL Got empty meta-analysis")
                return df, fold_change, None, None
            debug and balanced.to_csv("%s.meta.csv" % debug)

        # logger.info('Meta-Analyzing of permutations for %s', analysis.analysis_name)
        with log_durations(logger.debug, "meta analysis of permutations for %s" % analysis.analysis_name):
            permutations = pd.DataFrame()
            fold_change = fold_change.reset_index().sort("perm").set_index("perm")
            for i in range(nperm):
                perm = i + 1
                # logger.info('Meta analysis of permutation %s for %s' % (perm, analysis.analysis_name))
                with log_durations(
                    logger.debug, "meta analysis of permutation %s / %s for %s" % (perm, nperm, analysis.analysis_name)
                ):
                    # balanced_perm = get_full_meta(fold_change.query("""perm == %s"""%perm), debug=debug)
                    balanced_perm = get_full_meta(fold_change.ix[perm], debug=debug)
                    permutation = balanced_perm[["random_TE", "fixed_TE"]]
                    permutation["perm"] = perm
                    permutations = pd.concat([permutations, permutation])
        balanced_permutations = get_balanced_permutations(analysis, balanced, permutations)

    logger.info("DONE %s analysis", analysis.analysis_name)
    return df, fold_change, balanced_permutations, permutations
Esempio n. 16
0
def process_function(props):
    # Compute FuncSig based on args:
    input_args = tuple(ifilter(
        bool, imap(str.lstrip, props['args'][1:-1].split(","))))
    props['type'] = c_type_sig(input_args, props['type'])
    return props