def load(self, max_num_line=0): self.srcdata = [[] for _ in range(len(self.bucketsize))] self.trgdata = [[] for _ in range(len(self.bucketsize))] self.mtadata = [[] for _ in range(len(self.bucketsize))] srcstream, trgstream, mtastream = open(self.srcfile, 'r'), open( self.trgfile, 'r'), open(self.mtafile, 'r') num_line = 0 for (s, t, m) in zip(srcstream, trgstream, mtastream): s = [int(x) for x in s.split()] t = [constants.BOS] + [int(x) for x in t.split()] + [constants.EOS] m = [float(x) for x in m.split()] num_line += self.insert(s, t, m) if num_line >= max_num_line and max_num_line > 0: break if num_line % 500000 == 0: print("Read line {}".format(num_line)) ## if vliadate is True we merge all buckets into one if self.validate == True: self.srcdata = np.array(merge(*self.srcdata)) self.trgdata = np.array(merge(*self.trgdata)) self.mtadata = np.array(merge(*self.mtadata)) self.start = 0 self.size = len(self.srcdata) else: self.srcdata = list(map(np.array, self.srcdata)) self.trgdata = list(map(np.array, self.trgdata)) self.mtadata = list(map(np.array, self.mtadata)) self.allocation = list(map(len, self.srcdata)) self.p = np.array(self.allocation) / sum(self.allocation) srcstream.close(), trgstream.close(), mtastream.close()
def generate_sql_list_with_params(list_params, name_value): """ Генерируем итеративный кусок кода для sql. :param list_params: список со значениями :param name_value: название сравниваемой переменно :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...) """ # если строка, оборачиваем в дополнительные ковычки ifstr = lambda p: "'%s'" % p if type(p) == str else "'%s'" % p if type(list_params) == list: if len(list_params) > 1: cur_part = "" for param in list_params[:-1]: cur_part = funcy.merge( cur_part, " %s=%s OR" % (name_value, ifstr(param))) cur_part = funcy.merge( cur_part, " %s=%s" % (name_value, ifstr(list_params[-1]))) return "(" + cur_part + ")" elif len(list_params) == 1: return " %s=%s " % (name_value, ifstr(list_params[0])) else: return None else: return None
def __call__(self, inputs, latches=None, *, lift=None): inputs_l = project(inputs, self.left.inputs) omap_l, lmap_l = self.left(inputs_l, latches=latches, lift=lift) inputs_r = project(inputs, self.right.inputs) inputs_r.update(omap_l) # <--- Cascade setup happens here. omap_l = omit(omap_l, self._interface) omap_r, lmap_r = self.right(inputs_r, latches=latches, lift=lift) return fn.merge(omap_l, omap_r), fn.merge(lmap_l, lmap_r)
class LabelsStream(BaseStream): API_PATH = '/api/v2/admin/labels' TABLE = 'labels' SCHEMA = with_properties( merge( DEFAULT_DATE_FIELDS, { "id": { "type": ["integer", "null"] }, "name": { "type": ["string", "null"] }, "full_name": { "type": ["string", "null"] }, "level": { "type": ["integer", "null"] }, "open_suggestions_count": { "type": ["integer", "null"] }, "links": { "type": "object", "properties": { "parent": { "type": ["integer", "null"] } } } })) def get_stream_data(self, result): return result.get('labels')
def get_processes(services) -> Iterable[Coroutine]: for name, service in services.items(): env: dict = dict(os.environ) if "env_file" in service: env_file = service["env_file"] if not isinstance(env_file, list): env_file = [env_file] env.update(merge(*[DotEnv(path) for path in env_file])) if "environment" in service: environment = service["environment"] if isinstance(environment, list): env.update({b.key: b.value for b in lmap(parse_binding, environment)}) else: env.update(environment) cmd = service.get("entrypoint", "") + " " + service.get("command") if not cmd: raise Exception("cannot run without commands on the config") build = service["build"] if isinstance(build, str): cwd = build else: cwd = build.get("context", ".") async def f(name, cmd, env, cwd): print("Attaching to " + name) color = random.choice(colors) log = lambda x: sys.stdout.write(getattr(Fore, color) + f"{name} | " + Fore.RESET + x) p = await exec(cmd, env=env, cwd=cwd, stdout=log, stderr=log) if p: log(f"{name} exited with code {p.returncode}" + "\n") yield f(name, cmd, env, cwd)
def start(self, children): ref = self.ref or get_first_key(children[0]) return { "$schema": "http://json-schema.org/draft-07/schema#", "$ref": "#/definitions/" + ref, "definitions": merge(*children), }
class CommentsStream(BaseStream): API_PATH = '/api/v2/admin/comments' TABLE = 'comments' SCHEMA = with_properties(merge( DEFAULT_DATE_FIELDS, { "id": {"type": "integer"}, "body": {"type": "string"}, "body_mime_type": {"type": "string"}, "state": {"type": "string"}, "inappropriate_flags_count": {"type": "integer"}, "is_admin_comment": {"type": "boolean"}, "channel": {"type": "string"}, "links": { "type": "object", "properties": { "suggestion": {"type": "integer"}, "created_by": {"type": "integer"} } } })) def get_stream_data(self, result): return result.get('comments')
def prepare(topic_model, corpus, dictionary, **kargs): """Transforms the Gensim TopicModel and related corpus and dictionary into the data structures needed for the visualization. Parameters ---------- topic_model : gensim.models.ldamodel.LdaModel An already trained Gensim LdaModel. The other gensim model types are not supported (PRs welcome). corpus : array-like list of bag of word docs in tuple form The corpus in bag of word form, the same docs used to train the model. For example: [(50, 3), (63, 5), ....] dictionary: gensim.corpora.Dictionary The dictionary object used to create the corpus. Needed to extract the actual terms (not ids). **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb """ opts = fp.merge(_extract_data(topic_model, corpus, dictionary), kargs) return vis_prepare(**opts)
class ExternalUsersStream(BaseStream): API_PATH = '/api/v2/admin/external_users' TABLE = 'external_users' SCHEMA = with_properties(merge( DEFAULT_DATE_FIELDS, make_date_field("external_created_at"), make_date_field("last_seen_at"), { "email": {"type": ["string", "null"]}, "external_id": {"type": ["string", "null"]}, "id": {"type": ["integer", "null"]}, "ip": {"type": ["string", "null"]}, "links": { "type": "object", "properties": { "external_accounts": {"type": ["integer", "null"]}, "external_users": {"type": ["integer", "null"]}, }, }, "name": {"type": ["string", "null"]}, "seen_days": {"type": ["integer", "null"]}, "type": {"type": ["string", "null"]}, }), additional=True) def get_stream_data(self, result): return result.get('external_users')
class CategoriesStream(BaseStream): API_PATH = '/api/v2/admin/categories' TABLE = 'categories' SCHEMA = with_properties( merge( DEFAULT_DATE_FIELDS, { "id": { "type": "integer" }, "name": { "type": "string" }, "suggestions_count": { "type": "integer", }, "open_suggestions_count": { "type": "integer", }, "links": { "type": "object", "properties": { "forum": { "type": "integer" } } } })) def get_stream_data(self, result): return result.get('categories')
def __init__(self, **kwargs): options = merge(settings.MONGO_DEFAULT, kwargs) self.client = MongoClient(host=options['host'], port=options['port'], connect=False) self.db = self.client[options['db']] self.collection = self.db[options['collection']]
def environment(self, vars_): """Set up environment variables to trigger analysis dumps from clang. We'll store all the havested metadata in the plugins temporary folder. """ tree = self.tree plugin_folder = os.path.dirname(__file__) flags = [ '-load', os.path.join(plugin_folder, 'libclang-index-plugin.so'), '-add-plugin', 'dxr-index', '-plugin-arg-dxr-index', tree.source_folder ] flags_str = " ".join(imap('-Xclang {}'.format, flags)) env = { 'CC': "clang %s" % flags_str, 'CXX': "clang++ %s" % flags_str, 'DXR_CLANG_FLAGS': flags_str, 'DXR_CXX_CLANG_OBJECT_FOLDER': tree.object_folder, 'DXR_CXX_CLANG_TEMP_FOLDER': self._temp_folder, } env['DXR_CC'] = env['CC'] env['DXR_CXX'] = env['CXX'] return merge(vars_, env)
def extract_map(name_map, names): lookup_root = fn.merge(*({v: k for v in vals} for k, vals in name_map)) mapping = fn.group_by(lambda x: lookup_root[x.split('##time_')[0]], names) mapping = fn.walk_values(tuple, mapping) # Make hashable. return frozenset(mapping.items())
def prepare(docs, vect, lda, **kwargs): """Create Prepared Data from sklearn's vectorizer and Latent Dirichlet Application. Parameters ---------- docs : Pandas Series. Documents to be passed as an input. vect : Scikit-Learn Vectorizer (CountVectorizer,TfIdfVectorizer). vectorizer to convert documents into matrix sparser lda : sklearn.decomposition.LatentDirichletAllocation. Latent Dirichlet Allocation **kwargs: Keyword argument to be passed to pyLDAvis.prepare() Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ opts = fp.merge(_extract_data(docs, vect, lda)[2], kwargs) return vis_prepare(**opts)
def prepare_profiles(): """ Prepares a dict 'app.model' -> profile, for use in model_profile() """ # NOTE: this is a compatibility for old style config, # TODO: remove in cacheops 3.0 if hasattr(settings, 'CACHEOPS_PROFILES'): profiles.update(settings.CACHEOPS_PROFILES) if hasattr(settings, 'CACHEOPS_DEFAULTS'): profile_defaults.update(settings.CACHEOPS_DEFAULTS) model_profiles = {} ops = getattr(settings, 'CACHEOPS', {}) for app_model, profile in ops.items(): # NOTE: this is a compatibility for old style config, # TODO: remove in cacheops 3.0 if is_tuple(profile): profile_name, timeout = profile[:2] try: model_profiles[app_model] = mp = deepcopy(profiles[profile_name]) except KeyError: raise ImproperlyConfigured('Unknown cacheops profile "%s"' % profile_name) if len(profile) > 2: mp.update(profile[2]) mp['timeout'] = timeout mp['ops'] = set(mp['ops']) else: model_profiles[app_model] = merge(profile_defaults, profile) return model_profiles
def prepare(lda_model, dtm, labels, **kwargs): """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer. Parameters ---------- lda_model : sklearn.decomposition.LatentDirichletAllocation. Latent Dirichlet Allocation model from sklearn fitted with `dtm` dtm : array-like or sparse matrix, shape=(n_samples, n_features) Document-term matrix used to fit on LatentDirichletAllocation model (`lda_model`) vectorizer : sklearn.feature_extraction.text.(CountVectorizer, TfIdfVectorizer). vectorizer used to convert raw documents to document-term matrix (`dtm`) **kwargs: Keyword argument to be passed to pyLDAvis.prepare() Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ opts = fp.merge(_extract_data(lda_model, dtm, labels), kwargs) return plb.prepare(**opts)
def prepare_profiles(): """ Prepares a dict 'app.model' -> profile, for use in model_profile() """ profile_defaults = { 'ops': (), 'local_get': False, 'db_agnostic': True, 'write_only': False, 'lock': False, } profile_defaults.update(settings.CACHEOPS_DEFAULTS) model_profiles = {} for app_model, profile in settings.CACHEOPS.items(): if profile is None: model_profiles[app_model.lower()] = None continue model_profiles[app_model.lower()] = mp = merge(profile_defaults, profile) if mp['ops'] == 'all': mp['ops'] = ALL_OPS # People will do that anyway :) if isinstance(mp['ops'], six.string_types): mp['ops'] = {mp['ops']} mp['ops'] = set(mp['ops']) if 'timeout' not in mp: raise ImproperlyConfigured( 'You must specify "timeout" option in "%s" CACHEOPS profile' % app_model) return model_profiles
class ProductAreasStream(BaseStream): API_PATH = '/api/v2/admin/product_areas' TABLE = 'product_areas' SCHEMA = with_properties( merge( DEFAULT_DATE_FIELDS, { "id": { "type": ["integer", "null"] }, "name": { "type": ["string", "null"] }, "links": { "type": "object", "properties": { "updated_by": { "type": ["integer", "null"] }, "created_by": { "type": ["integer", "null"] } } } })) def get_stream_data(self, result): return result.get('product_areas')
def prepare(model_data_path, ignore_topics=[], ignore_terms=[], **kwargs): """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer. Parameters ---------- model_data_path : Path where TwitterLDA stored it's data output Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ opts = fp.merge( _extract_data(model_data_path, ignore_topics, ignore_terms), kwargs) opts['sort_topics'] = False return pyLDAvis.prepare(**opts)
class StatusesStream(BaseStream): API_PATH = '/api/v2/admin/statuses' TABLE = 'statuses' SCHEMA = with_properties( merge( DEFAULT_DATE_FIELDS, { "id": { "type": "integer" }, "name": { "type": ["null", "string"] }, "is_open": { "type": ["boolean", "null"] }, "hex_color": { "type": ["string", "null"] }, "position": { "type": ["integer", "null"] }, "allow_comments": { "type": ["boolean", "null"] }, })) def get_stream_data(self, result): return result.get('statuses')
def prepare(topic_model, docs, **kargs): """Transforms the GraphLab TopicModel and related corpus data into the data structures needed for the visualization. Parameters ---------- topic_model : graphlab.toolkits.topic_model.topic_model.TopicModel An already trained GraphLab topic model. docs : SArray of dicts The corpus in bag of word form, the same docs used to train the model. **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/GraphLab.ipynb """ opts = fp.merge(_extract_data(topic_model, docs), kargs) return vis_prepare(**opts)
def prepare_profiles(): """ Prepares a dict 'app.model' -> profile, for use in model_profile() """ profile_defaults = { 'ops': (), 'local_get': False, 'db_agnostic': True, 'lock': False, } profile_defaults.update(settings.CACHEOPS_DEFAULTS) model_profiles = {} for app_model, profile in settings.CACHEOPS.items(): if profile is None: model_profiles[app_model.lower()] = None continue model_profiles[app_model.lower()] = mp = merge(profile_defaults, profile) if mp['ops'] == 'all': mp['ops'] = ALL_OPS # People will do that anyway :) if isinstance(mp['ops'], six.string_types): mp['ops'] = {mp['ops']} mp['ops'] = set(mp['ops']) if 'timeout' not in mp: raise ImproperlyConfigured( 'You must specify "timeout" option in "%s" CACHEOPS profile' % app_model) return model_profiles
def find_env_input(self, start, action, end): """ Returns the probability of transitioning from start to end using action. """ coin = self._transition_coin(start, action, end) query = coin.expr & coin.valid default = { i: query.aigbv.imap[i].size * (False, ) for i in query.inputs } try: from aiger_sat.sat_bv import solve except ImportError: msg = "Need to install py-aiger-sat to use this method." raise ImportError(msg) model = solve(query) if model is None: return None model = fn.merge(default, solve(query)) return {remove_suffix(k, '##time_0'): model[k] for k in query.inputs}
def prepare(lda_model, dtm, vectorizer, **kwargs): """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer. Parameters ---------- lda_model : sklearn.decomposition.LatentDirichletAllocation. Latent Dirichlet Allocation model from sklearn fitted with `dtm` dtm : array-like or sparse matrix, shape=(n_samples, n_features) Document-term matrix used to fit on LatentDirichletAllocation model (`lda_model`) vectorizer : sklearn.feature_extraction.text.(CountVectorizer, TfIdfVectorizer). vectorizer used to convert raw documents to document-term matrix (`dtm`) **kwargs: Keyword argument to be passed to pyLDAvis.prepare() Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/sklearn.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ opts = fp.merge(_extract_data(lda_model, dtm, vectorizer), kwargs) return pyLDAvis.prepare(**opts)
def settings(**values): old_values = SETTINGS.get() try: token = SETTINGS.set(merge(old_values, values)) yield finally: SETTINGS.reset(token)
def _blast(bvname2vals, name_map): """Helper function to expand (blast) str -> int map into str -> bool map. This is used to send word level inputs to aiger.""" if len(name_map) == 0: return dict() return fn.merge(*(dict(zip(names, bvname2vals[bvname])) for bvname, names in name_map))
def __init__( self, iterable=None, disable=None, level=logging.ERROR, desc=None, leave=False, bar_format=None, bytes=False, # pylint: disable=W0622 file=None, **kwargs ): """ bytes : shortcut for `unit='B', unit_scale=True, unit_divisor=1024, miniters=1` desc : persists after `close()` level : effective logging level for determining `disable`; used only if `disable` is unspecified disable : If (default: None), will be determined by logging level. May be overridden to `True` due to non-TTY status. Skip override by specifying env var `DVC_IGNORE_ISATTY`. kwargs : anything accepted by `tqdm.tqdm()` """ kwargs = kwargs.copy() kwargs.setdefault("unit_scale", True) if bytes: bytes_defaults = dict( unit="B", unit_scale=True, unit_divisor=1024, miniters=1 ) kwargs = merge(bytes_defaults, kwargs) if file is None: file = sys.stderr self.desc_persist = desc # auto-disable based on `logger.level` if disable is None: disable = logger.getEffectiveLevel() > level # auto-disable based on TTY if ( not disable and not env2bool("DVC_IGNORE_ISATTY") and hasattr(file, "isatty") ): disable = not file.isatty() super(Tqdm, self).__init__( iterable=iterable, disable=disable, leave=leave, desc=desc, bar_format="!", **kwargs ) if bar_format is None: if self.__len__(): self.bar_format = self.BAR_FMT_DEFAULT else: self.bar_format = self.BAR_FMT_NOTOTAL else: self.bar_format = bar_format self.refresh()
def parse_state(string): spaces_with_no_backslashes = r'((?<!\\)\s)+' tokens = filter(lambda x: x!=' ', re.split(spaces_with_no_backslashes, string)) options = map(parse_option, (ifilter(lambda x: x.startswith('-'), tokens))) sources = map(parse_source, (ifilter(lambda x: x and not x.startswith('-'), tokens))) map(validate_option, options) map(validate_source, sources) return fn.merge(dict(options), dict(sources=sources))
def pipeline(input=None, **kwargs2): a = input kwargs = fy.merge(kwargs1, kwargs2) for executable_step in executable_steps: a = executable_step(a, **kwargs) return a
def __init__(self, bad_input_checks, errors): '''Raises a random error if any input check returns True''' super().__init__() self._check = funcy.any_fn(*bad_input_checks) self._errors = errors self._random = random.Random() self._random.seed = hash(funcy.merge(bad_input_checks, errors))
def __init__(self, bad_input_checks, errors): """Raises a random error if any input check returns True""" super().__init__() self._check = any_fn(*bad_input_checks) self._errors = errors self._random = random.Random() self._random.seed = hash(merge(bad_input_checks, errors))
def prepare(topic_model, corpus, dictionary, doc_topic_dist=None, verbose=0, **kwargs): """Transforms the Gensim TopicModel and related corpus and dictionary into the data structures needed for the visualization. Parameters ---------- topic_model : gensim.models.ldamodel.LdaModel An already trained Gensim LdaModel. The other gensim model types are not supported (PRs welcome). corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix The corpus in bag of word form, the same docs used to train the model. The corpus is transformed into a csc matrix internally, if you intend to call prepare multiple times it is a good idea to first call `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead. For example: [(50, 3), (63, 5), ....] dictionary: gensim.corpora.Dictionary The dictionary object used to create the corpus. Needed to extract the actual terms (not ids). doc_topic_dist (optional): Document topic distribution from LDA (default=None) The document topic distribution that is eventually visualised, if you will be calling `prepare` multiple times it's a good idea to explicitly pass in `doc_topic_dist` as inferring this for large corpora can be quite expensive. **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ opts = fp.merge( _extract_data(topic_model, corpus, dictionary, doc_topic_dist, verbose=verbose), kwargs) return vis_prepare(**opts)
def run_cmd(self, args: Sequence[str], **kwargs): defaults = { "check": True, "stdout": subprocess.PIPE, "text": True, "cwd": str(self.repo_dir), } kw = merge(defaults, kwargs) return subprocess.run(args, **kw)
def load_all_words(): word_list = [] for i in range(1, 9): filename = "wordlists/scrabble_words_%d.txt" % i infile = open(filename, 'r') content = infile.read() infile.close() content = content.strip().lower() word_list = f.merge(word_list, content.split("\n")) return word_list
def generate_sql_list_with_params_like(list_params, name_value): """ Генерируем итеративный кусок кода для sql с параметром LIKE. :param list_params: список со значениями :param name_value: название сравниваемой переменно :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...) """ if type(list_params) == list: if len(list_params) > 1: cur_part = "" for param in list_params[:-1]: cur_part = funcy.merge(cur_part, " %s LIKE '%s' OR" % (name_value, ('%'+param+'%'))) cur_part = funcy.merge(cur_part, " %s LIKE '%s'" % (name_value, ('%'+list_params[-1]+'%'))) return "(" + cur_part + ")" elif len(list_params) == 1: return " %s LIKE '%s' " % (name_value, ('%'+list_params[0]+'%')) else: return None else: return None
def to_json(self, short=False): data = { 'id': self.id, 'name': self._name, } return data if short else merge(data, { 'isAuthenticated': self.is_authenticated, 'game': self.game and self.game.to_json(), 'side': self.side, })
def check_perms(view, request, app, model_name): for guess in ('%s.%s' % (app, model_name), '%s.*' % app, '*.*'): if guess in APPS: perms = merge(MINIREST_DEFAULT_PERMS, MINIREST[guess]['perms']) if isinstance(perms[view], six.types.FunctionType): return perms[view](request) elif isinstance(perms[view], six.string_types): return import_string(perms[view])(request) return False else: return False
def setup_slug(sender, instance, **kwargs): data = walk_values(compose(slugify, unicode), instance.__dict__) related = {item: data['_{0}_cache'.format(item)] for item in map(lambda u: u[0:-3], filter(lambda u: u.endswith('_id'), data.keys()))} data = merge(data, related) slug = self.populate_from.format( **data)[:self.max_length] if slug != getattr(instance, name): setattr(instance, name, slug) instance.save()
def prepare(topic_model, corpus, dictionary, doc_topic_dist=None, **kwargs): """Transforms the Gensim TopicModel and related corpus and dictionary into the data structures needed for the visualization. Parameters ---------- topic_model : gensim.models.hdpmodel.HdpModel An already trained Gensim HdpModel. corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix The corpus in bag of word form, the same docs used to train the model. The corpus is transformed into a csc matrix internally, if you intend to call prepare multiple times it is a good idea to first call `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead. For example: [(50, 3), (63, 5), ....] dictionary: gensim.corpora.Dictionary The dictionary object used to create the corpus. Needed to extract the actual terms (not ids). doc_topic_dist (optional): Document topic distribution from LDA (default=None) The document topic distribution that is eventually visualised, if you will be calling `prepare` multiple times it's a good idea to explicitly pass in `doc_topic_dist` as inferring this for large corpora can be quite expensive. **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb See ------ See `pyLDAvis.prepare` for **kwargs. """ # we use sklearn's multi-dimensional scaling as the default measure to approximate distance between topics # should be a slightly more stable implementation compared to skbio's PCoA if 'mds' not in kwargs: kwargs['mds'] = js_MDS opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs) return vis_prepare(**opts)
def generate_sql_list_with_params(list_params, name_value): """ Генерируем итеративный кусок кода для sql. :param list_params: список со значениями :param name_value: название сравниваемой переменно :return: кусок sql-кода (name_value=1 OR name_value=2 OR name_value=3 ...) """ # если строка, оборачиваем в дополнительные ковычки ifstr = lambda p: "'%s'" % p if type(p) == str else "'%s'" % p if type(list_params) == list: if len(list_params) > 1: cur_part = "" for param in list_params[:-1]: cur_part = funcy.merge(cur_part, " %s=%s OR" % (name_value, ifstr(param))) cur_part = funcy.merge(cur_part, " %s=%s" % (name_value, ifstr(list_params[-1]))) return "(" + cur_part + ")" elif len(list_params) == 1: return " %s=%s " % (name_value, ifstr(list_params[0])) else: return None else: return None
def get(self): auth_db = models.AuthProviders.get_master_db() auth_providers_config = copy.deepcopy(auth.PROVIDERS_CONFIG.values()) auth_providers_config = sorted(auth_providers_config, key=lambda x: x.get('name')) provider_fields = {} for provider in auth_providers_config: for field in provider.get('fields', {}).iterkeys(): try: getattr(auth_db, field) provider_fields[field] = fields.String except AttributeError: pass provider_fields = funcy.merge(base.base_fields, provider_fields) return helpers.make_response(auth_db, provider_fields)
def _extract_date_tags(page: BeautifulSoup, tags_file: str, verbose: bool = False): result = [] _extract_func_date = partial(_extract_date, verbose=verbose) _tags = loadyaml(tags_file) for tag, tags_params in _tags.items(): # if verbose: # print("Processing tag - '%s'" % tag) result = merge( result, list(mapcat( partial(_extract_func_date, tag), mapcat(page.findAll, [tag] * len(tags_params), tags_params)))) return list(map(prepare_date, result))
def _introspect(func, seen): seen.add(func) if inspect.isbuiltin(func) or not hasattr(func, '__module__') or not hasattr(func, '__name__'): return {} if isinstance(func, type): methods = inspect.getmembers(func, predicate=inspect.ismethod) return join(_introspect(meth, seen) for _, meth in methods if meth not in seen) or {} if not hasattr(func, '__defaults__') or not hasattr(func, '__code__'): return {} func_name = _full_name(func) consts = merge(get_defaults(func), get_assignments(func)) consts_spec = walk_keys(lambda k: '%s.%s' % (func_name, k), consts) consts_spec.update({'%s.%s' % (func.__module__, name): value for name, value in get_closure(func).items()}) # Recurse callables = filter(callable, consts_spec.values()) recurse_specs = (_introspect(f, seen) for f in callables if f not in seen) return merge(join(recurse_specs) or {}, consts_spec)
def prepare(corpus, dictionary, doc_topic_file, topic_term_file, **kwargs): """Transforms the Mahout LDA and related corpus and dictionary into the data structures needed for the visualization. Parameters ---------- corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix The corpus in bag of word form, the same docs used to train the model. The corpus is transformed into a csc matrix internally, if you intend to call prepare multiple times it is a good idea to first call `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead. For example: [(50, 3), (63, 5), ....] dictionary: gensim.corpora.Dictionary The dictionary object used to create the corpus. Needed to extract the actual terms (not ids). doc_topic_file : file handle to Document topic distribution from Mahout LDA The document topic distribution that is eventually visualised topic_term_file : file handle to topic term distribution from Mahout LDA The document topic distribution that is eventually visualised **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization See ------ See `pyLDAvis.prepare` for **kwargs. """ # we use sklearn's multi-dimensional scaling as the default measure to approximate distance between topics # should be a slightly more stable implementation compared to skbio's PCoA if 'mds' not in kwargs: kwargs['mds'] = js_MDS doc_topic_dist = get_doc_topic(doc_topic_file) topic_term_dists = get_topic_term(topic_term_file, dictionary) opts = fp.merge(_extract_data(corpus, dictionary, doc_topic_dist, topic_term_dists), kwargs) return vis_prepare(**opts)
def prepare_profiles(): """ Prepares a dict 'app.model' -> profile, for use in model_profile() """ # NOTE: this is a compatibility for old style config, # TODO: remove in cacheops 3.0 if hasattr(settings, 'CACHEOPS_PROFILES'): profiles.update(settings.CACHEOPS_PROFILES) if hasattr(settings, 'CACHEOPS_DEFAULTS'): profile_defaults.update(settings.CACHEOPS_DEFAULTS) model_profiles = {} ops = getattr(settings, 'CACHEOPS', {}) for app_model, profile in ops.items(): if profile is None: model_profiles[app_model] = None continue # NOTE: this is a compatibility for old style config, # TODO: remove in cacheops 3.0 if is_tuple(profile): profile_name, timeout = profile[:2] try: model_profiles[app_model] = mp = deepcopy(profiles[profile_name]) except KeyError: raise ImproperlyConfigured('Unknown cacheops profile "%s"' % profile_name) if len(profile) > 2: mp.update(profile[2]) mp['timeout'] = timeout mp['ops'] = set(mp['ops']) else: model_profiles[app_model] = mp = merge(profile_defaults, profile) if mp['ops'] == 'all': mp['ops'] = ALL_OPS # People will do that anyway :) if isinstance(mp['ops'], six.string_types): mp['ops'] = [mp['ops']] mp['ops'] = set(mp['ops']) if 'timeout' not in mp: raise ImproperlyConfigured( 'You must specify "timeout" option in "%s" CACHEOPS profile' % app_model) return model_profiles
@classmethod def is_username_available(cls, username, self_key=None): if self_key is None: return cls.get_by('username', username) is None user_keys, _, _ = util.get_keys(cls.query(), username=username, limit=2) return not user_keys or self_key in user_keys and not user_keys[1:] @classmethod def is_email_available(cls, email, self_key=None): if not config.CONFIG_DB.check_unique_email: return True user_keys, _, _ = util.get_keys( cls.query(), email=email, verified=True, limit=2) return not user_keys or self_key in user_keys and not user_keys[1:] user_fields = funcy.merge( base.base_fields, { 'active': fields.Boolean, 'admin': fields.Boolean, 'auth_ids': fields.List(fields.String), 'avatar_url': fields.String, 'email': fields.String, 'name': fields.String, 'username': fields.String, 'permissions': fields.List(fields.String), 'token': fields.String, 'verified': fields.Boolean, })
def start(cmdline): ''' Entry point ''' with exception_to_vim_errormsg(): start_unite(fn.merge(variables.state, parse_state(cmdline)))
def process((group, nodes)): """Based on the group, transform a list a nodes int a list of metadata.""" process_val = lambda node: merge( add_span(node), PROCESS.get(group, identity)(node)) return group, map(process_val, nodes)
def __init__(self, name, unvalidated_tree, sections, config): """Fix up settings that depend on the [DXR] section or have inter-setting dependencies. (schema can't do multi-setting validation yet, and configobj can't do cross-section interpolation.) Add a ``config`` attr to trees as a shortcut back to the [DXR] section and a ``name`` attr to save cumbersome tuple unpacks in callers. """ self.config = config self.name = name schema = Schema({ Optional('build_command', default='make -j {workers}'): basestring, Optional('clean_command', default='make clean'): basestring, Optional('description', default=''): basestring, Optional('disabled_plugins', default=plugin_list('')): Plugins, Optional('enabled_plugins', default=plugin_list('*')): Plugins, Optional('es_index', default=config.es_index): basestring, Optional('es_shards', default=5): Use(int, error='"es_shards" must be an integer.'), Optional('ignore_patterns', default=['.hg', '.git', 'CVS', '.svn', '.bzr', '.deps', '.libs', '.DS_Store', '.nfs*', '*~', '._*']): WhitespaceList, Optional('object_folder', default=None): AbsPath, 'source_folder': AbsPath, Optional('source_encoding', default='utf-8'): basestring, Optional('temp_folder', default=None): AbsPath, Optional('p4web_url', default='http://p4web/'): basestring, Optional('workers', default=None): WORKERS_VALIDATOR, Optional(basestring): dict}) tree = schema.validate(unvalidated_tree) if tree['temp_folder'] is None: tree['temp_folder'] = config.temp_folder if tree['object_folder'] is None: tree['object_folder'] = tree['source_folder'] if tree['workers'] is None: tree['workers'] = config.workers # Convert enabled_plugins to a list of plugins: if tree['disabled_plugins'].is_all: # * doesn't really mean "all" in a tree. It means "everything the # [DXR] section enabled". tree['disabled_plugins'] = config.enabled_plugins else: # Add anything globally disabled to our local disabled list: tree['disabled_plugins'].extend(p for p in config.disabled_plugins if p not in tree['disabled_plugins']) if tree['enabled_plugins'].is_all: tree['enabled_plugins'] = [p for p in config.enabled_plugins if p not in tree['disabled_plugins']] tree['enabled_plugins'].insert(0, core_plugin()) # Split ignores into paths and filenames: tree['ignore_paths'] = [i for i in tree['ignore_patterns'] if i.startswith('/')] tree['ignore_filenames'] = [i for i in tree['ignore_patterns'] if not i.startswith('/')] # Delete misleading, useless, or raw values people shouldn't use: del tree['ignore_patterns'] del tree['disabled_plugins'] # Validate plugin config: enableds_with_all_optional_config = set( p for p in tree['enabled_plugins'] if all(isinstance(k, Optional) for k in p.config_schema.iterkeys())) plugin_schema = Schema(merge( dict((Optional(name) if plugin in enableds_with_all_optional_config or plugin not in tree['enabled_plugins'] else name, plugin.config_schema) for name, plugin in all_plugins_but_core().iteritems()), # And whatever isn't a plugin section, that we don't care about: {object: object})) # Insert empty missing sections for enabled plugins with entirely # optional config so their defaults get filled in. (Don't insert them # if the plugin has any required options; then we wouldn't produce the # proper error message about the section being absent.) for plugin in enableds_with_all_optional_config: tree.setdefault(plugin.name, {}) tree = plugin_schema.validate(tree) super(TreeConfig, self).__init__(tree)
def _browse_file(tree, path, line_docs, file_doc, config, date=None, contents=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs """ def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, date, config) links = file_doc.get('links', []) if is_image(path): return render_template( 'image_file.html', **common) else: # We don't allow browsing binary files, so this must be a text file. # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = cumulative_sum(imap(len, lines)) tree_config = config.trees[tree] # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim] skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable(doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable(doc.get('regions', []) for doc in line_docs)) tags = finished_tags(lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge(common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'is_text': True, 'sections': sidebar_links(links + skim_links)}))
def _extract_data(topic_model, docs): doc_data = _extract_doc_data(docs) model_data = _extract_model_data(topic_model, docs, doc_data['vocab']) return fp.merge(doc_data, model_data)
def parse_source(string): colons_with_no_backslashes = r'(?<!\\):' splits = re.split(colons_with_no_backslashes, string) return fn.merge(variables.source, dict(name=splits[0], args=splits[1:]))
from core.api import fields as cfields from core import base from core import util class Feedback(base.Base): name = ndb.StringProperty() subject = ndb.StringProperty() message = ndb.TextProperty() email = ndb.StringProperty() comment = ndb.TextProperty() is_read = ndb.BooleanProperty(default=False) user = ndb.KeyProperty() @classmethod def get_dbs(cls, is_read=None, **kwargs): return super(Feedback, cls).get_dbs( is_read=is_read or util.param('is_read', bool), **kwargs ) feedback_fields = funcy.merge(base.base_fields, { 'name': fields.String, 'subject': fields.String, 'message': fields.String, 'email': fields.String, 'comment': fields.String, 'is_read': fields.Boolean, 'user': cfields.Key, })
def _browse_file(tree, path, line_docs, file_doc, config, is_binary, date=None, contents=None, image_rev=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg is_binary: Whether file is binary or not :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs :arg image_rev: revision number of a textual or binary image, for images displayed at a certain rev """ def process_link_templates(sections): """Look for {{line}} in the links of given sections, and duplicate them onto a 'template' field. """ for section in sections: for link in section['items']: if '{{line}}' in link['href']: link['template'] = link['href'] link['href'] = link['href'].replace('{{line}}', '') def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ process_link_templates(sections) # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, is_binary, date, config) links = file_doc.get('links', []) if is_binary_image(path): return render_template( 'image_file.html', **merge(common, { 'sections': sidebar_links(links), 'revision': image_rev})) elif is_binary: return render_template( 'text_file.html', **merge(common, { 'lines': [], 'is_binary': True, 'sections': sidebar_links(links)})) else: # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = build_offset_map(lines) tree_config = config.trees[tree] if is_textual_image(path) and image_rev: # Add a link to view textual images on revs: links.extend(dictify_links([ (4, 'Image', [('svgview', 'View', url_for('.raw_rev', tree=tree_config.name, path=path, revision=image_rev))])])) # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim] skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable(doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable(doc.get('regions', []) for doc in line_docs)) tags = finished_tags(lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge(common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'sections': sidebar_links(links + skim_links), 'query': request.args.get('q', ''), 'bubble': request.args.get('redirect_type')}))
def tab_open(string): pass def window_open(string): pass def split_open(string): pass def vsplit_open(string): pass openable_actions = fn.merge(common_actions, dict( tab_open = tab_open, window_open = window_open, split_open = split_open, vsplit_open = vsplit_open, )) def read(string): pass def rename(string): pass def remove(string): pass def shell_cmd(string): # Run shell command on file pass