Ejemplo n.º 1
0
    def load(self, max_num_line=0):
        self.srcdata = [[] for _ in range(len(self.bucketsize))]
        self.trgdata = [[] for _ in range(len(self.bucketsize))]
        self.mtadata = [[] for _ in range(len(self.bucketsize))]

        srcstream, trgstream, mtastream = open(self.srcfile, 'r'), open(
            self.trgfile, 'r'), open(self.mtafile, 'r')
        num_line = 0
        for (s, t, m) in zip(srcstream, trgstream, mtastream):
            s = [int(x) for x in s.split()]
            t = [constants.BOS] + [int(x) for x in t.split()] + [constants.EOS]
            m = [float(x) for x in m.split()]

            num_line += self.insert(s, t, m)
            if num_line >= max_num_line and max_num_line > 0: break
            if num_line % 500000 == 0:
                print("Read line {}".format(num_line))
        ## if vliadate is True we merge all buckets into one
        if self.validate == True:
            self.srcdata = np.array(merge(*self.srcdata))
            self.trgdata = np.array(merge(*self.trgdata))
            self.mtadata = np.array(merge(*self.mtadata))

            self.start = 0
            self.size = len(self.srcdata)
        else:
            self.srcdata = list(map(np.array, self.srcdata))
            self.trgdata = list(map(np.array, self.trgdata))
            self.mtadata = list(map(np.array, self.mtadata))

            self.allocation = list(map(len, self.srcdata))
            self.p = np.array(self.allocation) / sum(self.allocation)
        srcstream.close(), trgstream.close(), mtastream.close()
Ejemplo n.º 2
0
def generate_sql_list_with_params(list_params, name_value):
    """ Генерируем итеративный кусок кода для sql.
    :param list_params: список со значениями
    :param name_value: название сравниваемой переменно
    :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...)
    """

    # если строка, оборачиваем в дополнительные ковычки
    ifstr = lambda p: "'%s'" % p if type(p) == str else "'%s'" % p

    if type(list_params) == list:
        if len(list_params) > 1:
            cur_part = ""
            for param in list_params[:-1]:
                cur_part = funcy.merge(
                    cur_part, " %s=%s OR" % (name_value, ifstr(param)))
            cur_part = funcy.merge(
                cur_part, " %s=%s" % (name_value, ifstr(list_params[-1])))
            return "(" + cur_part + ")"
        elif len(list_params) == 1:
            return " %s=%s " % (name_value, ifstr(list_params[0]))
        else:
            return None
    else:
        return None
Ejemplo n.º 3
0
    def __call__(self, inputs, latches=None, *, lift=None):
        inputs_l = project(inputs, self.left.inputs)
        omap_l, lmap_l = self.left(inputs_l, latches=latches, lift=lift)

        inputs_r = project(inputs, self.right.inputs)
        inputs_r.update(omap_l)  # <--- Cascade setup happens here.
        omap_l = omit(omap_l, self._interface)

        omap_r, lmap_r = self.right(inputs_r, latches=latches, lift=lift)
        return fn.merge(omap_l, omap_r), fn.merge(lmap_l, lmap_r)
Ejemplo n.º 4
0
class LabelsStream(BaseStream):

    API_PATH = '/api/v2/admin/labels'
    TABLE = 'labels'
    SCHEMA = with_properties(
        merge(
            DEFAULT_DATE_FIELDS, {
                "id": {
                    "type": ["integer", "null"]
                },
                "name": {
                    "type": ["string", "null"]
                },
                "full_name": {
                    "type": ["string", "null"]
                },
                "level": {
                    "type": ["integer", "null"]
                },
                "open_suggestions_count": {
                    "type": ["integer", "null"]
                },
                "links": {
                    "type": "object",
                    "properties": {
                        "parent": {
                            "type": ["integer", "null"]
                        }
                    }
                }
            }))

    def get_stream_data(self, result):
        return result.get('labels')
Ejemplo n.º 5
0
def get_processes(services) -> Iterable[Coroutine]:
    for name, service in services.items():
        env: dict = dict(os.environ)
        if "env_file" in service:
            env_file = service["env_file"]
            if not isinstance(env_file, list):
                env_file = [env_file]
            env.update(merge(*[DotEnv(path) for path in env_file]))
        if "environment" in service:
            environment = service["environment"]
            if isinstance(environment, list):
                env.update({b.key: b.value for b in lmap(parse_binding, environment)})
            else:
                env.update(environment)
        cmd = service.get("entrypoint", "") + " " + service.get("command")
        if not cmd:
            raise Exception("cannot run without commands on the config")
        build = service["build"]
        if isinstance(build, str):
            cwd = build
        else:
            cwd = build.get("context", ".")

        async def f(name, cmd, env, cwd):
            print("Attaching to " + name)
            color = random.choice(colors)
            log = lambda x: sys.stdout.write(getattr(Fore, color) + f"{name} | " + Fore.RESET + x)
            p = await exec(cmd, env=env, cwd=cwd, stdout=log, stderr=log)
            if p:
                log(f"{name} exited with code {p.returncode}" + "\n")

        yield f(name, cmd, env, cwd)
Ejemplo n.º 6
0
 def start(self, children):
     ref = self.ref or get_first_key(children[0])
     return {
         "$schema": "http://json-schema.org/draft-07/schema#",
         "$ref": "#/definitions/" + ref,
         "definitions": merge(*children),
     }
Ejemplo n.º 7
0
class CommentsStream(BaseStream):

    API_PATH = '/api/v2/admin/comments'
    TABLE = 'comments'
    SCHEMA = with_properties(merge(
        DEFAULT_DATE_FIELDS,
        {
            "id": {"type": "integer"},
            "body": {"type": "string"},
            "body_mime_type": {"type": "string"},
            "state": {"type": "string"},
            "inappropriate_flags_count": {"type": "integer"},
            "is_admin_comment": {"type": "boolean"},
            "channel": {"type": "string"},
            "links": {
                "type": "object",
                "properties": {
                    "suggestion": {"type": "integer"},
                    "created_by": {"type": "integer"}
                }
            }
        }))

    def get_stream_data(self, result):
        return result.get('comments')
Ejemplo n.º 8
0
def prepare(topic_model, corpus, dictionary, **kargs):
    """Transforms the Gensim TopicModel and related corpus and dictionary into
    the data structures needed for the visualization.

    Parameters
    ----------
    topic_model : gensim.models.ldamodel.LdaModel
        An already trained Gensim LdaModel. The other gensim model types are
    not supported (PRs welcome).
    corpus : array-like list of bag of word docs in tuple form
        The corpus in bag of word form, the same docs used to train the model.
    For example: [(50, 3), (63, 5), ....]
    dictionary: gensim.corpora.Dictionary
        The dictionary object used to create the corpus. Needed to extract the
    actual terms (not ids).
    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb
    """
    opts = fp.merge(_extract_data(topic_model, corpus, dictionary), kargs)
    return vis_prepare(**opts)
Ejemplo n.º 9
0
class ExternalUsersStream(BaseStream):

    API_PATH = '/api/v2/admin/external_users'
    TABLE = 'external_users'
    SCHEMA = with_properties(merge(
        DEFAULT_DATE_FIELDS,
        make_date_field("external_created_at"),
        make_date_field("last_seen_at"),
        {
            "email": {"type": ["string", "null"]},
            "external_id": {"type": ["string", "null"]},
            "id": {"type": ["integer", "null"]},
            "ip": {"type": ["string", "null"]},
            "links": {
                "type": "object",
                "properties": {
                    "external_accounts": {"type": ["integer", "null"]},
                    "external_users": {"type": ["integer", "null"]},
                },
            },
            "name": {"type": ["string", "null"]},
            "seen_days": {"type": ["integer", "null"]},
            "type": {"type": ["string", "null"]},
        }),
        additional=True)

    def get_stream_data(self, result):
        return result.get('external_users')
Ejemplo n.º 10
0
class CategoriesStream(BaseStream):

    API_PATH = '/api/v2/admin/categories'
    TABLE = 'categories'
    SCHEMA = with_properties(
        merge(
            DEFAULT_DATE_FIELDS, {
                "id": {
                    "type": "integer"
                },
                "name": {
                    "type": "string"
                },
                "suggestions_count": {
                    "type": "integer",
                },
                "open_suggestions_count": {
                    "type": "integer",
                },
                "links": {
                    "type": "object",
                    "properties": {
                        "forum": {
                            "type": "integer"
                        }
                    }
                }
            }))

    def get_stream_data(self, result):
        return result.get('categories')
Ejemplo n.º 11
0
 def __init__(self, **kwargs):
     options = merge(settings.MONGO_DEFAULT, kwargs)
     self.client = MongoClient(host=options['host'],
                               port=options['port'],
                               connect=False)
     self.db = self.client[options['db']]
     self.collection = self.db[options['collection']]
Ejemplo n.º 12
0
    def environment(self, vars_):
        """Set up environment variables to trigger analysis dumps from clang.

        We'll store all the havested metadata in the plugins temporary folder.

        """
        tree = self.tree
        plugin_folder = os.path.dirname(__file__)
        flags = [
            '-load',
            os.path.join(plugin_folder,
                         'libclang-index-plugin.so'), '-add-plugin',
            'dxr-index', '-plugin-arg-dxr-index', tree.source_folder
        ]
        flags_str = " ".join(imap('-Xclang {}'.format, flags))

        env = {
            'CC': "clang %s" % flags_str,
            'CXX': "clang++ %s" % flags_str,
            'DXR_CLANG_FLAGS': flags_str,
            'DXR_CXX_CLANG_OBJECT_FOLDER': tree.object_folder,
            'DXR_CXX_CLANG_TEMP_FOLDER': self._temp_folder,
        }
        env['DXR_CC'] = env['CC']
        env['DXR_CXX'] = env['CXX']
        return merge(vars_, env)
Ejemplo n.º 13
0
 def extract_map(name_map, names):
     lookup_root = fn.merge(*({v: k
                               for v in vals} for k, vals in name_map))
     mapping = fn.group_by(lambda x: lookup_root[x.split('##time_')[0]],
                           names)
     mapping = fn.walk_values(tuple, mapping)  # Make hashable.
     return frozenset(mapping.items())
Ejemplo n.º 14
0
def prepare(docs, vect, lda, **kwargs):
    """Create Prepared Data from sklearn's vectorizer and Latent Dirichlet
    Application.

    Parameters
    ----------
    docs : Pandas Series.
        Documents to be passed as an input.
    vect : Scikit-Learn Vectorizer (CountVectorizer,TfIdfVectorizer).
        vectorizer to convert documents into matrix sparser
    lda  : sklearn.decomposition.LatentDirichletAllocation.
        Latent Dirichlet Allocation

    **kwargs: Keyword argument to be passed to pyLDAvis.prepare()


    Returns
    -------
    prepared_data : PreparedData
          the data structures used in the visualization


    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
  
    opts = fp.merge(_extract_data(docs, vect, lda)[2], kwargs)

    return vis_prepare(**opts)
Ejemplo n.º 15
0
def prepare_profiles():
    """
    Prepares a dict 'app.model' -> profile, for use in model_profile()
    """
    # NOTE: this is a compatibility for old style config,
    # TODO: remove in cacheops 3.0
    if hasattr(settings, 'CACHEOPS_PROFILES'):
        profiles.update(settings.CACHEOPS_PROFILES)

    if hasattr(settings, 'CACHEOPS_DEFAULTS'):
        profile_defaults.update(settings.CACHEOPS_DEFAULTS)

    model_profiles = {}
    ops = getattr(settings, 'CACHEOPS', {})
    for app_model, profile in ops.items():
        # NOTE: this is a compatibility for old style config,
        # TODO: remove in cacheops 3.0
        if is_tuple(profile):
            profile_name, timeout = profile[:2]

            try:
                model_profiles[app_model] = mp = deepcopy(profiles[profile_name])
            except KeyError:
                raise ImproperlyConfigured('Unknown cacheops profile "%s"' % profile_name)

            if len(profile) > 2:
                mp.update(profile[2])
            mp['timeout'] = timeout
            mp['ops'] = set(mp['ops'])
        else:
            model_profiles[app_model] = merge(profile_defaults, profile)

    return model_profiles
Ejemplo n.º 16
0
def prepare(lda_model, dtm, labels, **kwargs):
    """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer.

    Parameters
    ----------
    lda_model : sklearn.decomposition.LatentDirichletAllocation.
        Latent Dirichlet Allocation model from sklearn fitted with `dtm`

    dtm : array-like or sparse matrix, shape=(n_samples, n_features)
        Document-term matrix used to fit on LatentDirichletAllocation model (`lda_model`)

    vectorizer : sklearn.feature_extraction.text.(CountVectorizer, TfIdfVectorizer).
        vectorizer used to convert raw documents to document-term matrix (`dtm`)

    **kwargs: Keyword argument to be passed to pyLDAvis.prepare()


    Returns
    -------
    prepared_data : PreparedData
          the data structures used in the visualization


    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    opts = fp.merge(_extract_data(lda_model, dtm, labels), kwargs)
    return plb.prepare(**opts)
Ejemplo n.º 17
0
def prepare_profiles():
    """
    Prepares a dict 'app.model' -> profile, for use in model_profile()
    """
    profile_defaults = {
        'ops': (),
        'local_get': False,
        'db_agnostic': True,
        'write_only': False,
        'lock': False,
    }
    profile_defaults.update(settings.CACHEOPS_DEFAULTS)

    model_profiles = {}
    for app_model, profile in settings.CACHEOPS.items():
        if profile is None:
            model_profiles[app_model.lower()] = None
            continue

        model_profiles[app_model.lower()] = mp = merge(profile_defaults,
                                                       profile)
        if mp['ops'] == 'all':
            mp['ops'] = ALL_OPS
        # People will do that anyway :)
        if isinstance(mp['ops'], six.string_types):
            mp['ops'] = {mp['ops']}
        mp['ops'] = set(mp['ops'])

        if 'timeout' not in mp:
            raise ImproperlyConfigured(
                'You must specify "timeout" option in "%s" CACHEOPS profile' %
                app_model)

    return model_profiles
Ejemplo n.º 18
0
class ProductAreasStream(BaseStream):

    API_PATH = '/api/v2/admin/product_areas'
    TABLE = 'product_areas'
    SCHEMA = with_properties(
        merge(
            DEFAULT_DATE_FIELDS, {
                "id": {
                    "type": ["integer", "null"]
                },
                "name": {
                    "type": ["string", "null"]
                },
                "links": {
                    "type": "object",
                    "properties": {
                        "updated_by": {
                            "type": ["integer", "null"]
                        },
                        "created_by": {
                            "type": ["integer", "null"]
                        }
                    }
                }
            }))

    def get_stream_data(self, result):
        return result.get('product_areas')
Ejemplo n.º 19
0
def prepare(model_data_path, ignore_topics=[], ignore_terms=[], **kwargs):
    """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer.

    Parameters
    ----------
    model_data_path : Path where TwitterLDA stored it's data output

    Returns
    -------
    prepared_data : PreparedData
          the data structures used in the visualization


    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    opts = fp.merge(
        _extract_data(model_data_path, ignore_topics, ignore_terms), kwargs)
    opts['sort_topics'] = False
    return pyLDAvis.prepare(**opts)
Ejemplo n.º 20
0
def prepare_profiles():
    """
    Prepares a dict 'app.model' -> profile, for use in model_profile()
    """
    # NOTE: this is a compatibility for old style config,
    # TODO: remove in cacheops 3.0
    if hasattr(settings, 'CACHEOPS_PROFILES'):
        profiles.update(settings.CACHEOPS_PROFILES)

    if hasattr(settings, 'CACHEOPS_DEFAULTS'):
        profile_defaults.update(settings.CACHEOPS_DEFAULTS)

    model_profiles = {}
    ops = getattr(settings, 'CACHEOPS', {})
    for app_model, profile in ops.items():
        # NOTE: this is a compatibility for old style config,
        # TODO: remove in cacheops 3.0
        if is_tuple(profile):
            profile_name, timeout = profile[:2]

            try:
                model_profiles[app_model] = mp = deepcopy(profiles[profile_name])
            except KeyError:
                raise ImproperlyConfigured('Unknown cacheops profile "%s"' % profile_name)

            if len(profile) > 2:
                mp.update(profile[2])
            mp['timeout'] = timeout
            mp['ops'] = set(mp['ops'])
        else:
            model_profiles[app_model] = merge(profile_defaults, profile)

    return model_profiles
Ejemplo n.º 21
0
class StatusesStream(BaseStream):

    API_PATH = '/api/v2/admin/statuses'
    TABLE = 'statuses'
    SCHEMA = with_properties(
        merge(
            DEFAULT_DATE_FIELDS, {
                "id": {
                    "type": "integer"
                },
                "name": {
                    "type": ["null", "string"]
                },
                "is_open": {
                    "type": ["boolean", "null"]
                },
                "hex_color": {
                    "type": ["string", "null"]
                },
                "position": {
                    "type": ["integer", "null"]
                },
                "allow_comments": {
                    "type": ["boolean", "null"]
                },
            }))

    def get_stream_data(self, result):
        return result.get('statuses')
Ejemplo n.º 22
0
def prepare(topic_model, docs, **kargs):
    """Transforms the GraphLab TopicModel and related corpus data into
    the data structures needed for the visualization.

    Parameters
    ----------
    topic_model : graphlab.toolkits.topic_model.topic_model.TopicModel
        An already trained GraphLab topic model.
    docs : SArray of dicts
        The corpus in bag of word form, the same docs used to train the model.
    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/GraphLab.ipynb
    """
    opts = fp.merge(_extract_data(topic_model, docs), kargs)
    return vis_prepare(**opts)
Ejemplo n.º 23
0
def prepare(topic_model, corpus, dictionary, **kargs):
    """Transforms the Gensim TopicModel and related corpus and dictionary into
    the data structures needed for the visualization.

    Parameters
    ----------
    topic_model : gensim.models.ldamodel.LdaModel
        An already trained Gensim LdaModel. The other gensim model types are
    not supported (PRs welcome).
    corpus : array-like list of bag of word docs in tuple form
        The corpus in bag of word form, the same docs used to train the model.
    For example: [(50, 3), (63, 5), ....]
    dictionary: gensim.corpora.Dictionary
        The dictionary object used to create the corpus. Needed to extract the
    actual terms (not ids).
    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb
    """
    opts = fp.merge(_extract_data(topic_model, corpus, dictionary), kargs)
    return vis_prepare(**opts)
Ejemplo n.º 24
0
def prepare_profiles():
    """
    Prepares a dict 'app.model' -> profile, for use in model_profile()
    """
    profile_defaults = {
        'ops': (),
        'local_get': False,
        'db_agnostic': True,
        'lock': False,
    }
    profile_defaults.update(settings.CACHEOPS_DEFAULTS)

    model_profiles = {}
    for app_model, profile in settings.CACHEOPS.items():
        if profile is None:
            model_profiles[app_model.lower()] = None
            continue

        model_profiles[app_model.lower()] = mp = merge(profile_defaults, profile)
        if mp['ops'] == 'all':
            mp['ops'] = ALL_OPS
        # People will do that anyway :)
        if isinstance(mp['ops'], six.string_types):
            mp['ops'] = {mp['ops']}
        mp['ops'] = set(mp['ops'])

        if 'timeout' not in mp:
            raise ImproperlyConfigured(
                'You must specify "timeout" option in "%s" CACHEOPS profile' % app_model)

    return model_profiles
Ejemplo n.º 25
0
    def find_env_input(self, start, action, end):
        """
        Returns the probability of transitioning from start to end
        using action.
        """
        coin = self._transition_coin(start, action, end)
        query = coin.expr & coin.valid
        default = {
            i: query.aigbv.imap[i].size * (False, )
            for i in query.inputs
        }

        try:
            from aiger_sat.sat_bv import solve
        except ImportError:
            msg = "Need to install py-aiger-sat to use this method."
            raise ImportError(msg)

        model = solve(query)

        if model is None:
            return None

        model = fn.merge(default, solve(query))
        return {remove_suffix(k, '##time_0'): model[k] for k in query.inputs}
Ejemplo n.º 26
0
def prepare(lda_model, dtm, vectorizer, **kwargs):
    """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer.

    Parameters
    ----------
    lda_model : sklearn.decomposition.LatentDirichletAllocation.
        Latent Dirichlet Allocation model from sklearn fitted with `dtm`

    dtm : array-like or sparse matrix, shape=(n_samples, n_features)
        Document-term matrix used to fit on LatentDirichletAllocation model (`lda_model`)

    vectorizer : sklearn.feature_extraction.text.(CountVectorizer, TfIdfVectorizer).
        vectorizer used to convert raw documents to document-term matrix (`dtm`)

    **kwargs: Keyword argument to be passed to pyLDAvis.prepare()


    Returns
    -------
    prepared_data : PreparedData
          the data structures used in the visualization


    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    opts = fp.merge(_extract_data(lda_model, dtm, vectorizer), kwargs)
    return pyLDAvis.prepare(**opts)
Ejemplo n.º 27
0
def settings(**values):
    old_values = SETTINGS.get()
    try:
        token = SETTINGS.set(merge(old_values, values))
        yield
    finally:
        SETTINGS.reset(token)
Ejemplo n.º 28
0
    def environment(self, vars_):
        """Set up environment variables to trigger analysis dumps from clang.

        We'll store all the havested metadata in the plugins temporary folder.

        """
        tree = self.tree
        plugin_folder = os.path.dirname(__file__)
        flags = [
            '-load', os.path.join(plugin_folder, 'libclang-index-plugin.so'),
            '-add-plugin', 'dxr-index',
            '-plugin-arg-dxr-index', tree.source_folder
        ]
        flags_str = " ".join(imap('-Xclang {}'.format, flags))

        env = {
            'CC': "clang %s" % flags_str,
            'CXX': "clang++ %s" % flags_str,
            'DXR_CLANG_FLAGS': flags_str,
            'DXR_CXX_CLANG_OBJECT_FOLDER': tree.object_folder,
            'DXR_CXX_CLANG_TEMP_FOLDER': self._temp_folder,
        }
        env['DXR_CC'] = env['CC']
        env['DXR_CXX'] = env['CXX']
        return merge(vars_, env)
Ejemplo n.º 29
0
def _blast(bvname2vals, name_map):
    """Helper function to expand (blast) str -> int map into str ->
    bool map. This is used to send word level inputs to aiger."""
    if len(name_map) == 0:
        return dict()
    return fn.merge(*(dict(zip(names, bvname2vals[bvname]))
                      for bvname, names in name_map))
Ejemplo n.º 30
0
def prepare(docs, vect, lda, **kwargs):
    """Create Prepared Data from sklearn's vectorizer and Latent Dirichlet
    Application.

    Parameters
    ----------
    docs : Pandas Series.
        Documents to be passed as an input.
    vect : Scikit-Learn Vectorizer (CountVectorizer,TfIdfVectorizer).
        vectorizer to convert documents into matrix sparser
    lda  : sklearn.decomposition.LatentDirichletAllocation.
        Latent Dirichlet Allocation

    **kwargs: Keyword argument to be passed to pyLDAvis.prepare()


    Returns
    -------
    prepared_data : PreparedData
          the data structures used in the visualization


    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """

    opts = fp.merge(_extract_data(docs, vect, lda)[2], kwargs)

    return vis_prepare(**opts)
Ejemplo n.º 31
0
 def __init__(
     self,
     iterable=None,
     disable=None,
     level=logging.ERROR,
     desc=None,
     leave=False,
     bar_format=None,
     bytes=False,  # pylint: disable=W0622
     file=None,
     **kwargs
 ):
     """
     bytes   : shortcut for
         `unit='B', unit_scale=True, unit_divisor=1024, miniters=1`
     desc  : persists after `close()`
     level  : effective logging level for determining `disable`;
         used only if `disable` is unspecified
     disable  : If (default: None), will be determined by logging level.
         May be overridden to `True` due to non-TTY status.
         Skip override by specifying env var `DVC_IGNORE_ISATTY`.
     kwargs  : anything accepted by `tqdm.tqdm()`
     """
     kwargs = kwargs.copy()
     kwargs.setdefault("unit_scale", True)
     if bytes:
         bytes_defaults = dict(
             unit="B", unit_scale=True, unit_divisor=1024, miniters=1
         )
         kwargs = merge(bytes_defaults, kwargs)
     if file is None:
         file = sys.stderr
     self.desc_persist = desc
     # auto-disable based on `logger.level`
     if disable is None:
         disable = logger.getEffectiveLevel() > level
     # auto-disable based on TTY
     if (
         not disable
         and not env2bool("DVC_IGNORE_ISATTY")
         and hasattr(file, "isatty")
     ):
         disable = not file.isatty()
     super(Tqdm, self).__init__(
         iterable=iterable,
         disable=disable,
         leave=leave,
         desc=desc,
         bar_format="!",
         **kwargs
     )
     if bar_format is None:
         if self.__len__():
             self.bar_format = self.BAR_FMT_DEFAULT
         else:
             self.bar_format = self.BAR_FMT_NOTOTAL
     else:
         self.bar_format = bar_format
     self.refresh()
Ejemplo n.º 32
0
def parse_state(string):
    spaces_with_no_backslashes = r'((?<!\\)\s)+'
    tokens = filter(lambda x: x!=' ', re.split(spaces_with_no_backslashes, string))
    options = map(parse_option, (ifilter(lambda x: x.startswith('-'), tokens)))
    sources = map(parse_source, (ifilter(lambda x: x and not x.startswith('-'), tokens)))
    map(validate_option, options)
    map(validate_source, sources)
    return fn.merge(dict(options), dict(sources=sources))
Ejemplo n.º 33
0
    def pipeline(input=None, **kwargs2):
        a = input
        kwargs = fy.merge(kwargs1, kwargs2)

        for executable_step in executable_steps:
            a = executable_step(a, **kwargs)

        return a
Ejemplo n.º 34
0
    def __init__(self, bad_input_checks, errors):
        '''Raises a random error if any input check returns True'''
        super().__init__()

        self._check = funcy.any_fn(*bad_input_checks)
        self._errors = errors

        self._random = random.Random()
        self._random.seed = hash(funcy.merge(bad_input_checks, errors))
Ejemplo n.º 35
0
    def __init__(self, bad_input_checks, errors):
        """Raises a random error if any input check returns True"""
        super().__init__()

        self._check = any_fn(*bad_input_checks)
        self._errors = errors

        self._random = random.Random()
        self._random.seed = hash(merge(bad_input_checks, errors))
Ejemplo n.º 36
0
def prepare(topic_model,
            corpus,
            dictionary,
            doc_topic_dist=None,
            verbose=0,
            **kwargs):
    """Transforms the Gensim TopicModel and related corpus and dictionary into
    the data structures needed for the visualization.

    Parameters
    ----------
    topic_model : gensim.models.ldamodel.LdaModel
        An already trained Gensim LdaModel. The other gensim model types are
        not supported (PRs welcome).

    corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix
        The corpus in bag of word form, the same docs used to train the model.
        The corpus is transformed into a csc matrix internally, if you intend to
        call prepare multiple times it is a good idea to first call
        `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead.

    For example: [(50, 3), (63, 5), ....]

    dictionary: gensim.corpora.Dictionary
        The dictionary object used to create the corpus. Needed to extract the
        actual terms (not ids).

    doc_topic_dist (optional): Document topic distribution from LDA (default=None)
        The document topic distribution that is eventually visualised, if you will
        be calling `prepare` multiple times it's a good idea to explicitly pass in
        `doc_topic_dist` as inferring this for large corpora can be quite
        expensive.

    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    opts = fp.merge(
        _extract_data(topic_model,
                      corpus,
                      dictionary,
                      doc_topic_dist,
                      verbose=verbose), kwargs)
    return vis_prepare(**opts)
Ejemplo n.º 37
0
 def run_cmd(self, args: Sequence[str], **kwargs):
     defaults = {
         "check": True,
         "stdout": subprocess.PIPE,
         "text": True,
         "cwd": str(self.repo_dir),
     }
     kw = merge(defaults, kwargs)
     return subprocess.run(args, **kw)
Ejemplo n.º 38
0
def load_all_words():
    word_list = []
    for i in range(1, 9):
        filename = "wordlists/scrabble_words_%d.txt" % i
        infile = open(filename, 'r')
        content = infile.read()
        infile.close()
        content = content.strip().lower()
        word_list = f.merge(word_list, content.split("\n"))
    return word_list
Ejemplo n.º 39
0
def generate_sql_list_with_params_like(list_params, name_value):
    """ Генерируем итеративный кусок кода для sql с параметром LIKE.
    :param list_params: список со значениями
    :param name_value: название сравниваемой переменно
    :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...)
    """
    if type(list_params) == list:
        if len(list_params) > 1:
            cur_part = ""
            for param in list_params[:-1]:
                cur_part = funcy.merge(cur_part, " %s LIKE '%s' OR" % (name_value, ('%'+param+'%')))
            cur_part = funcy.merge(cur_part, " %s LIKE '%s'" % (name_value, ('%'+list_params[-1]+'%')))
            return "(" + cur_part + ")"
        elif len(list_params) == 1:
            return " %s LIKE '%s' " % (name_value, ('%'+list_params[0]+'%'))
        else:
            return None
    else:
        return None
Ejemplo n.º 40
0
    def to_json(self, short=False):
        data = {
            'id': self.id,
            'name': self._name,
        }

        return data if short else merge(data, {
            'isAuthenticated': self.is_authenticated,
            'game': self.game and self.game.to_json(),
            'side': self.side,
        })
Ejemplo n.º 41
0
def check_perms(view, request, app, model_name):
    for guess in ('%s.%s' % (app, model_name), '%s.*' % app, '*.*'):
        if guess in APPS:
            perms = merge(MINIREST_DEFAULT_PERMS, MINIREST[guess]['perms'])
            if isinstance(perms[view], six.types.FunctionType):
                return perms[view](request)
            elif isinstance(perms[view], six.string_types):
                return import_string(perms[view])(request)
            return False
    else:
        return False
Ejemplo n.º 42
0
 def setup_slug(sender, instance, **kwargs):
     data = walk_values(compose(slugify, unicode),
                        instance.__dict__)
     related = {item: data['_{0}_cache'.format(item)]
                for item in map(lambda u: u[0:-3],
                                filter(lambda u: u.endswith('_id'),
                                       data.keys()))}
     data = merge(data, related)
     slug = self.populate_from.format(
         **data)[:self.max_length]
     if slug != getattr(instance, name):
         setattr(instance, name, slug)
         instance.save()
Ejemplo n.º 43
0
def prepare(topic_model, corpus, dictionary, doc_topic_dist=None, **kwargs):
    """Transforms the Gensim TopicModel and related corpus and dictionary into
    the data structures needed for the visualization.

    Parameters
    ----------
    topic_model : gensim.models.hdpmodel.HdpModel
        An already trained Gensim HdpModel. 

    corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix
        The corpus in bag of word form, the same docs used to train the model.
        The corpus is transformed into a csc matrix internally, if you intend to
        call prepare multiple times it is a good idea to first call
        `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead.

    For example: [(50, 3), (63, 5), ....]

    dictionary: gensim.corpora.Dictionary
        The dictionary object used to create the corpus. Needed to extract the
        actual terms (not ids).

    doc_topic_dist (optional): Document topic distribution from LDA (default=None)
        The document topic distribution that is eventually visualised, if you will
        be calling `prepare` multiple times it's a good idea to explicitly pass in
        `doc_topic_dist` as inferring this for large corpora can be quite
        expensive.

    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    Example
    --------
    For example usage please see this notebook:
    http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    # we use sklearn's multi-dimensional scaling as the default measure to approximate distance between topics
    # should be a slightly more stable implementation compared to skbio's PCoA 
    if 'mds' not in kwargs:
      kwargs['mds'] = js_MDS 
      
    opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
    return vis_prepare(**opts)
Ejemplo n.º 44
0
def generate_sql_list_with_params(list_params, name_value):
    """ Генерируем итеративный кусок кода для sql.
    :param list_params: список со значениями
    :param name_value: название сравниваемой переменно
    :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...)
    """

    # если строка, оборачиваем в дополнительные ковычки
    ifstr = lambda p: "'%s'" % p if type(p) == str else "'%s'" % p

    if type(list_params) == list:
        if len(list_params) > 1:
            cur_part = ""
            for param in list_params[:-1]:
                cur_part = funcy.merge(cur_part, " %s=%s OR" % (name_value, ifstr(param)))
            cur_part = funcy.merge(cur_part, " %s=%s" % (name_value, ifstr(list_params[-1])))
            return "(" + cur_part + ")"
        elif len(list_params) == 1:
            return " %s=%s " % (name_value, ifstr(list_params[0]))
        else:
            return None
    else:
        return None
Ejemplo n.º 45
0
 def get(self):
   auth_db = models.AuthProviders.get_master_db()
   auth_providers_config = copy.deepcopy(auth.PROVIDERS_CONFIG.values())
   auth_providers_config = sorted(auth_providers_config, key=lambda x: x.get('name'))
   provider_fields = {}
   for provider in auth_providers_config:
     for field in provider.get('fields', {}).iterkeys():
       try:
         getattr(auth_db, field)
         provider_fields[field] = fields.String
       except AttributeError:
         pass
   provider_fields = funcy.merge(base.base_fields, provider_fields)
   return helpers.make_response(auth_db, provider_fields)
Ejemplo n.º 46
0
def _extract_date_tags(page: BeautifulSoup, tags_file: str, verbose: bool = False):
    result = []
    _extract_func_date = partial(_extract_date, verbose=verbose)
    _tags = loadyaml(tags_file)
    for tag, tags_params in _tags.items():
        # if verbose:
        #     print("Processing tag - '%s'" % tag)
        result = merge(
            result,
            list(mapcat(
                partial(_extract_func_date, tag),
                mapcat(page.findAll,
                       [tag] * len(tags_params), tags_params))))
    return list(map(prepare_date, result))
Ejemplo n.º 47
0
def _introspect(func, seen):
    seen.add(func)

    if inspect.isbuiltin(func) or not hasattr(func, '__module__') or not hasattr(func, '__name__'):
        return {}

    if isinstance(func, type):
        methods = inspect.getmembers(func, predicate=inspect.ismethod)
        return join(_introspect(meth, seen) for _, meth in methods if meth not in seen) or {}

    if not hasattr(func, '__defaults__') or not hasattr(func, '__code__'):
        return {}

    func_name = _full_name(func)
    consts = merge(get_defaults(func), get_assignments(func))
    consts_spec = walk_keys(lambda k: '%s.%s' % (func_name, k), consts)
    consts_spec.update({'%s.%s' % (func.__module__, name): value
                        for name, value in get_closure(func).items()})

    # Recurse
    callables = filter(callable, consts_spec.values())
    recurse_specs = (_introspect(f, seen) for f in callables if f not in seen)
    return merge(join(recurse_specs) or {}, consts_spec)
Ejemplo n.º 48
0
def prepare(corpus, dictionary, doc_topic_file, topic_term_file, **kwargs):
    """Transforms the Mahout LDA and related corpus and dictionary into
    the data structures needed for the visualization.

    Parameters
    ----------

    corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix
        The corpus in bag of word form, the same docs used to train the model.
        The corpus is transformed into a csc matrix internally, if you intend to
        call prepare multiple times it is a good idea to first call
        `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead.

    For example: [(50, 3), (63, 5), ....]

    dictionary: gensim.corpora.Dictionary
        The dictionary object used to create the corpus. Needed to extract the
        actual terms (not ids).
    
    doc_topic_file : file handle to Document topic distribution from Mahout LDA 
        The document topic distribution that is eventually visualised

    topic_term_file : file handle to topic term distribution from Mahout LDA 
        The document topic distribution that is eventually visualised

    **kwargs :
        additional keyword arguments are passed through to :func:`pyldavis.prepare`.

    Returns
    -------
    prepared_data : PreparedData
        the data structures used in the visualization

    See
    ------
    See `pyLDAvis.prepare` for **kwargs.
    """
    # we use sklearn's multi-dimensional scaling as the default measure to approximate distance between topics
    # should be a slightly more stable implementation compared to skbio's PCoA 
    if 'mds' not in kwargs:
      kwargs['mds'] = js_MDS 
    
    doc_topic_dist = get_doc_topic(doc_topic_file)
    topic_term_dists = get_topic_term(topic_term_file, dictionary)
    
    opts = fp.merge(_extract_data(corpus, dictionary, doc_topic_dist, topic_term_dists), kwargs)
    return vis_prepare(**opts)
Ejemplo n.º 49
0
def prepare_profiles():
    """
    Prepares a dict 'app.model' -> profile, for use in model_profile()
    """
    # NOTE: this is a compatibility for old style config,
    # TODO: remove in cacheops 3.0
    if hasattr(settings, 'CACHEOPS_PROFILES'):
        profiles.update(settings.CACHEOPS_PROFILES)

    if hasattr(settings, 'CACHEOPS_DEFAULTS'):
        profile_defaults.update(settings.CACHEOPS_DEFAULTS)

    model_profiles = {}
    ops = getattr(settings, 'CACHEOPS', {})
    for app_model, profile in ops.items():
        if profile is None:
            model_profiles[app_model] = None
            continue

        # NOTE: this is a compatibility for old style config,
        # TODO: remove in cacheops 3.0
        if is_tuple(profile):
            profile_name, timeout = profile[:2]

            try:
                model_profiles[app_model] = mp = deepcopy(profiles[profile_name])
            except KeyError:
                raise ImproperlyConfigured('Unknown cacheops profile "%s"' % profile_name)

            if len(profile) > 2:
                mp.update(profile[2])
            mp['timeout'] = timeout
            mp['ops'] = set(mp['ops'])
        else:
            model_profiles[app_model] = mp = merge(profile_defaults, profile)
            if mp['ops'] == 'all':
                mp['ops'] = ALL_OPS
            # People will do that anyway :)
            if isinstance(mp['ops'], six.string_types):
                mp['ops'] = [mp['ops']]
            mp['ops'] = set(mp['ops'])

        if 'timeout' not in mp:
            raise ImproperlyConfigured(
                'You must specify "timeout" option in "%s" CACHEOPS profile' % app_model)

    return model_profiles
Ejemplo n.º 50
0
  @classmethod
  def is_username_available(cls, username, self_key=None):
    if self_key is None:
      return cls.get_by('username', username) is None
    user_keys, _, _ = util.get_keys(cls.query(), username=username, limit=2)
    return not user_keys or self_key in user_keys and not user_keys[1:]

  @classmethod
  def is_email_available(cls, email, self_key=None):
    if not config.CONFIG_DB.check_unique_email:
      return True
    user_keys, _, _ = util.get_keys(
        cls.query(), email=email, verified=True, limit=2)
    return not user_keys or self_key in user_keys and not user_keys[1:]


user_fields = funcy.merge(
    base.base_fields, {
        'active': fields.Boolean,
        'admin': fields.Boolean,
        'auth_ids': fields.List(fields.String),
        'avatar_url': fields.String,
        'email': fields.String,
        'name': fields.String,
        'username': fields.String,
        'permissions': fields.List(fields.String),
        'token': fields.String,
        'verified': fields.Boolean,
  })
Ejemplo n.º 51
0
def start(cmdline):
    ''' Entry point '''
    with exception_to_vim_errormsg():
        start_unite(fn.merge(variables.state, parse_state(cmdline)))
Ejemplo n.º 52
0
def process((group, nodes)):
    """Based on the group, transform a list a nodes int a list of metadata."""
    process_val = lambda node: merge(
        add_span(node), PROCESS.get(group, identity)(node))
    return group, map(process_val, nodes)
Ejemplo n.º 53
0
    def __init__(self, name, unvalidated_tree, sections, config):
        """Fix up settings that depend on the [DXR] section or have
        inter-setting dependencies. (schema can't do multi-setting validation
        yet, and configobj can't do cross-section interpolation.)

        Add a ``config`` attr to trees as a shortcut back to the [DXR] section
        and a ``name`` attr to save cumbersome tuple unpacks in callers.

        """
        self.config = config
        self.name = name

        schema = Schema({
            Optional('build_command', default='make -j {workers}'): basestring,
            Optional('clean_command', default='make clean'): basestring,
            Optional('description', default=''): basestring,
            Optional('disabled_plugins', default=plugin_list('')): Plugins,
            Optional('enabled_plugins', default=plugin_list('*')): Plugins,
            Optional('es_index', default=config.es_index): basestring,
            Optional('es_shards', default=5):
                Use(int, error='"es_shards" must be an integer.'),
            Optional('ignore_patterns',
                     default=['.hg', '.git', 'CVS', '.svn', '.bzr',
                              '.deps', '.libs', '.DS_Store', '.nfs*', '*~',
                              '._*']): WhitespaceList,
            Optional('object_folder', default=None): AbsPath,
            'source_folder': AbsPath,
            Optional('source_encoding', default='utf-8'): basestring,
            Optional('temp_folder', default=None): AbsPath,
            Optional('p4web_url', default='http://p4web/'): basestring,
            Optional('workers', default=None): WORKERS_VALIDATOR,
            Optional(basestring): dict})
        tree = schema.validate(unvalidated_tree)

        if tree['temp_folder'] is None:
            tree['temp_folder'] = config.temp_folder
        if tree['object_folder'] is None:
            tree['object_folder'] = tree['source_folder']
        if tree['workers'] is None:
            tree['workers'] = config.workers

        # Convert enabled_plugins to a list of plugins:
        if tree['disabled_plugins'].is_all:
            # * doesn't really mean "all" in a tree. It means "everything the
            # [DXR] section enabled".
            tree['disabled_plugins'] = config.enabled_plugins
        else:
            # Add anything globally disabled to our local disabled list:
            tree['disabled_plugins'].extend(p for p in config.disabled_plugins
                                            if p not in
                                            tree['disabled_plugins'])

        if tree['enabled_plugins'].is_all:
            tree['enabled_plugins'] = [p for p in config.enabled_plugins
                                       if p not in tree['disabled_plugins']]
        tree['enabled_plugins'].insert(0, core_plugin())

        # Split ignores into paths and filenames:
        tree['ignore_paths'] = [i for i in tree['ignore_patterns']
                                if i.startswith('/')]
        tree['ignore_filenames'] = [i for i in tree['ignore_patterns']
                                    if not i.startswith('/')]

        # Delete misleading, useless, or raw values people shouldn't use:
        del tree['ignore_patterns']
        del tree['disabled_plugins']

        # Validate plugin config:
        enableds_with_all_optional_config = set(
            p for p in tree['enabled_plugins']
            if all(isinstance(k, Optional) for k in p.config_schema.iterkeys()))
        plugin_schema = Schema(merge(
            dict((Optional(name) if plugin in enableds_with_all_optional_config
                                    or plugin not in tree['enabled_plugins']
                                 else name,
                  plugin.config_schema)
                 for name, plugin in all_plugins_but_core().iteritems()),
            # And whatever isn't a plugin section, that we don't care about:
            {object: object}))
        # Insert empty missing sections for enabled plugins with entirely
        # optional config so their defaults get filled in. (Don't insert them
        # if the plugin has any required options; then we wouldn't produce the
        # proper error message about the section being absent.)
        for plugin in enableds_with_all_optional_config:
            tree.setdefault(plugin.name, {})
        tree = plugin_schema.validate(tree)

        super(TreeConfig, self).__init__(tree)
Ejemplo n.º 54
0
Archivo: app.py Proyecto: gartung/dxr
def _browse_file(tree, path, line_docs, file_doc, config, date=None, contents=None):
    """Return a rendered page displaying a source file.

    :arg string tree: name of tree on which file is found
    :arg string path: relative path from tree root of file
    :arg list line_docs: LINE documents as defined in the mapping of core.py,
        where the `content` field is dereferenced
    :arg file_doc: the FILE document as defined in core.py
    :arg config: TreeConfig object of this tree
    :arg date: a formatted string representing the generated date, default to now
    :arg string contents: the contents of the source file, defaults to joining
        the `content` field of all line_docs
    """
    def sidebar_links(sections):
        """Return data structure to build nav sidebar from. ::

            [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])]

        """
        # Sort by order, resolving ties by section name:
        return sorted(sections, key=lambda section: (section['order'],
                                                     section['heading']))

    if not date:
        # Then assume that the file is generated now. Remark: we can't use this
        # as the default param because that is only evaluated once, so the same
        # time would always be used.
        date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")

    common = _build_common_file_template(tree, path, date, config)
    links = file_doc.get('links', [])
    if is_image(path):
        return render_template(
            'image_file.html',
            **common)
    else:  # We don't allow browsing binary files, so this must be a text file.
        # We concretize the lines into a list because we iterate over it multiple times
        lines = [doc['content'] for doc in line_docs]
        if not contents:
            # If contents are not provided, we can reconstruct them by
            # stitching the lines together.
            contents = ''.join(lines)
        offsets = cumulative_sum(imap(len, lines))
        tree_config = config.trees[tree]
        # Construct skimmer objects for all enabled plugins that define a
        # file_to_skim class.
        skimmers = [plugin.file_to_skim(path,
                                        contents,
                                        plugin.name,
                                        tree_config,
                                        file_doc,
                                        line_docs)
                    for plugin in tree_config.enabled_plugins
                    if plugin.file_to_skim]
        skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs))
        index_refs = (Ref.es_to_triple(ref, tree_config) for ref in
                      chain.from_iterable(doc.get('refs', [])
                                          for doc in line_docs))
        index_regions = (Region.es_to_triple(region) for region in
                         chain.from_iterable(doc.get('regions', [])
                                             for doc in line_docs))
        tags = finished_tags(lines,
                             chain(chain.from_iterable(refses), index_refs),
                             chain(chain.from_iterable(regionses), index_regions))
        return render_template(
            'text_file.html',
            **merge(common, {
                # Someday, it would be great to stream this and not concretize
                # the whole thing in RAM. The template will have to quit
                # looping through the whole thing 3 times.
                'lines': [(html_line(doc['content'], tags_in_line, offset),
                           doc.get('annotations', []) + skim_annotations)
                          for doc, tags_in_line, offset, skim_annotations
                              in izip(line_docs, tags_per_line(tags), offsets, annotationses)],
                'is_text': True,
                'sections': sidebar_links(links + skim_links)}))
Ejemplo n.º 55
0
def _extract_data(topic_model, docs):
    doc_data = _extract_doc_data(docs)
    model_data = _extract_model_data(topic_model, docs, doc_data['vocab'])
    return fp.merge(doc_data, model_data)
Ejemplo n.º 56
0
def parse_source(string):
    colons_with_no_backslashes = r'(?<!\\):'
    splits = re.split(colons_with_no_backslashes, string)
    return fn.merge(variables.source, dict(name=splits[0], args=splits[1:]))
Ejemplo n.º 57
0
from core.api import fields as cfields
from core import base
from core import util


class Feedback(base.Base):
  name = ndb.StringProperty()
  subject = ndb.StringProperty()
  message = ndb.TextProperty()
  email = ndb.StringProperty()
  comment = ndb.TextProperty()
  is_read = ndb.BooleanProperty(default=False)
  user = ndb.KeyProperty()

  @classmethod
  def get_dbs(cls, is_read=None, **kwargs):
    return super(Feedback, cls).get_dbs(
        is_read=is_read or util.param('is_read', bool),
        **kwargs
      )

feedback_fields = funcy.merge(base.base_fields, {
    'name': fields.String,
    'subject': fields.String,
    'message': fields.String,
    'email': fields.String,
    'comment': fields.String,
    'is_read': fields.Boolean,
    'user': cfields.Key,
  })
Ejemplo n.º 58
0
Archivo: app.py Proyecto: klibby/dxr
def _browse_file(tree, path, line_docs, file_doc, config, is_binary,
                 date=None, contents=None, image_rev=None):
    """Return a rendered page displaying a source file.

    :arg string tree: name of tree on which file is found
    :arg string path: relative path from tree root of file
    :arg list line_docs: LINE documents as defined in the mapping of core.py,
        where the `content` field is dereferenced
    :arg file_doc: the FILE document as defined in core.py
    :arg config: TreeConfig object of this tree
    :arg is_binary: Whether file is binary or not
    :arg date: a formatted string representing the generated date, default to now
    :arg string contents: the contents of the source file, defaults to joining
        the `content` field of all line_docs
    :arg image_rev: revision number of a textual or binary image, for images
        displayed at a certain rev
    """
    def process_link_templates(sections):
        """Look for {{line}} in the links of given sections, and duplicate them onto
        a 'template' field.
        """
        for section in sections:
            for link in section['items']:
                if '{{line}}' in link['href']:
                    link['template'] = link['href']
                    link['href'] = link['href'].replace('{{line}}', '')

    def sidebar_links(sections):
        """Return data structure to build nav sidebar from. ::

            [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])]

        """
        process_link_templates(sections)
        # Sort by order, resolving ties by section name:
        return sorted(sections, key=lambda section: (section['order'],
                                                     section['heading']))

    if not date:
        # Then assume that the file is generated now. Remark: we can't use this
        # as the default param because that is only evaluated once, so the same
        # time would always be used.
        date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")

    common = _build_common_file_template(tree, path, is_binary, date, config)
    links = file_doc.get('links', [])
    if is_binary_image(path):
        return render_template(
            'image_file.html',
            **merge(common, {
                'sections': sidebar_links(links),
                'revision': image_rev}))
    elif is_binary:
        return render_template(
            'text_file.html',
            **merge(common, {
                'lines': [],
                'is_binary': True,
                'sections': sidebar_links(links)}))
    else:
        # We concretize the lines into a list because we iterate over it multiple times
        lines = [doc['content'] for doc in line_docs]
        if not contents:
            # If contents are not provided, we can reconstruct them by
            # stitching the lines together.
            contents = ''.join(lines)
        offsets = build_offset_map(lines)
        tree_config = config.trees[tree]
        if is_textual_image(path) and image_rev:
            # Add a link to view textual images on revs:
            links.extend(dictify_links([
                (4,
                 'Image',
                 [('svgview', 'View', url_for('.raw_rev',
                                              tree=tree_config.name,
                                              path=path,
                                              revision=image_rev))])]))
        # Construct skimmer objects for all enabled plugins that define a
        # file_to_skim class.
        skimmers = [plugin.file_to_skim(path,
                                        contents,
                                        plugin.name,
                                        tree_config,
                                        file_doc,
                                        line_docs)
                    for plugin in tree_config.enabled_plugins
                    if plugin.file_to_skim]
        skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs))
        index_refs = (Ref.es_to_triple(ref, tree_config) for ref in
                      chain.from_iterable(doc.get('refs', [])
                                          for doc in line_docs))
        index_regions = (Region.es_to_triple(region) for region in
                         chain.from_iterable(doc.get('regions', [])
                                             for doc in line_docs))
        tags = finished_tags(lines,
                             chain(chain.from_iterable(refses), index_refs),
                             chain(chain.from_iterable(regionses), index_regions))
        return render_template(
            'text_file.html',
            **merge(common, {
                # Someday, it would be great to stream this and not concretize
                # the whole thing in RAM. The template will have to quit
                # looping through the whole thing 3 times.
                'lines': [(html_line(doc['content'], tags_in_line, offset),
                           doc.get('annotations', []) + skim_annotations)
                          for doc, tags_in_line, offset, skim_annotations
                              in izip(line_docs, tags_per_line(tags), offsets, annotationses)],
                'sections': sidebar_links(links + skim_links),
                'query': request.args.get('q', ''),
                'bubble': request.args.get('redirect_type')}))
Ejemplo n.º 59
0
def tab_open(string):
    pass

def window_open(string):
    pass

def split_open(string):
    pass

def vsplit_open(string):
    pass

openable_actions = fn.merge(common_actions, dict(
    tab_open = tab_open,
    window_open = window_open,
    split_open = split_open,
    vsplit_open = vsplit_open,
))

def read(string):
    pass

def rename(string):
    pass

def remove(string):
    pass

def shell_cmd(string):
    # Run shell command on file
    pass