コード例 #1
0
ファイル: nb2html.py プロジェクト: wcbeard/blog-bak
def main(files):
    list(map(z.comp(convert_and_update, z.do(print)), files))
コード例 #2
0
ファイル: utils.py プロジェクト: d10genes/word2vec
    key in `ks`"""
    lowtxt = txt.lower()
    find_icase = partial(findall_ignore_case, low_txt=lowtxt, txt=txt)
    casedata = (DataFrame([(k, len(find_all(txt, k)), len(find_icase(k)))
                for k in ks], columns=['Word', 'Case', 'Nocase'])
        .query('Case != Nocase').assign(Ratio=lambda x: x.eval('Case / Nocase'))
        .sort_values('Ratio', ascending=True).reset_index(drop=1)
    )
    return {diffcase: phrase2wd(k) for k in casedata.query('Ratio < @thresh').Word
        for diffcase in set(find_icase(k))}


# Gensim
def to_gensim_params(cnf, **kw):
    gparams = dict(
        size=cnf.N, # 80, #
        alpha=cnf.eta,
        min_alpha=cnf.min_eta,
        window=cnf.C / 2,
        sample=0,
        negative=cnf.K,  #[5, 7, 10, 12, 15, 17], 0
        sg=1,
        # iter=4,
    )
    gparams.update( **kw)
    return gparams


phrase2wd = lambda x: '_'.join(re.split(r'[ -]', x))
take = z.comp(list, islice)
ilen = lambda xs: sum(1 for _ in xs)
コード例 #3
0
ファイル: nb2html.py プロジェクト: wcbeard/blog-bak
# import itertools as it
from os.path import basename, splitext, join, abspath
import re
import subprocess
import toolz.curried as z
import argparse
import yaml

fst = z.operator.itemgetter(0)
snd = z.operator.itemgetter(1)

NOTEBOOK_DIR = '_notebooks'
# NOTEBOOK_HTML_DIR = 'ipy_html'
NOTEBOOK_HTML_DIR = '_includes'

base = z.comp(fst, splitext, basename)
htmlname = z.comp(z.curry(join, NOTEBOOK_HTML_DIR), '{}.html'.format, base)

pat = re.compile(r'''^---\n
(.+?)\n
---
''', re.VERBOSE + re.DOTALL)  # # ^---
# print(pat.findall(txt)[0])


def post_todict(posttxt, ret_yaml=False):
    m = pat.findall(posttxt)
    if not m:
        if ret_yaml:
            return (None, None)
        return
コード例 #4
0
ファイル: filters.py プロジェクト: peteut/migen
import operator
import toolz.curried as toolz
import pyramda as R

reg_typename = "{}_reg_t".format

rin_name = "{}_rin".format

r_name = "{}_r".format

v_name = "{}_v".format

architecture_id = "two_process_{}".format

indent = R.curry_n(
    2,
    toolz.comp(
        "\n".join,
        R.apply(map),
        R.unapply(toolz.juxt([
            toolz.comp(R.add, R.multiply("    "), toolz.first),
            toolz.comp(operator.methodcaller("split", "\n"), toolz.second)]))))
コード例 #5
0
import re

from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
from requests import get
from tqdm import tqdm
from yaml import dump
from toolz.curried import comp

data_path = 'data.yaml'
country_list_url = 'https://simple.wikipedia.org/wiki/List_of_European_countries'
border_list_url = 'https://en.wikipedia.org/wiki/List_of_countries_and_territories_by_land_borders'
base_url = 'https://en.wikipedia.org/wiki/'
wikilimiter = comp(sleep_and_retry, limits(1, 1))

@wikilimiter
def download_countries():
    country_list_page = BeautifulSoup(get(country_list_url).text, features='html.parser')
    country_table = [row.find_all('a', href=re.compile(r'^/wiki/'))[1:]
                     for row in country_list_page.find('tbody').find_all('tr')[1:]]
    return {str(a[0].string): str(a[-1]['href']).replace('/wiki/', '') for a in country_table}

@wikilimiter
def download_borders():
    border_list_page = BeautifulSoup(get(border_list_url).text, features='html.parser')
    return dict(((q:=[str(a.string) for a in row.find_all('a', href=re.compile(r'^/wiki/')) if str(a.string)[0].isupper()])[0], q[1:])
                for row in border_list_page.find('tbody').find_all('tr')[2:] if not ('overseas' in str(row) and 'excluding' not in str(row)))

@wikilimiter
def download_coords(capital):
    capital_page = BeautifulSoup(get(base_url + capital).text, features='html.parser')
コード例 #6
0
from itertools import repeat, islice, count
import numpy as np
import numpy.random as nr
from numpy.linalg import norm
from operator import itemgetter as itg
from pandas import Series, DataFrame, Index
from numba import jit

import toolz.curried as z
from voluptuous import Any, Invalid, Schema, ALLOW_EXTRA
import numba_utils as nbu
import utils as ut

nopython = jit(nopython=True)

map = z.comp(list, builtins.map)
UNKNOWN = '<UNK>'


# Matrix weight representation
class Cat(object):
    "Join and split W matrices for passing as single arg"
    @staticmethod
    def join(w1, w2):
        return np.hstack([w1, w2.T])

    @staticmethod
    def split(Wall):
        n = Wall.shape[1] / 2
        W1, W2_ = Wall[:, :n], Wall[:, n:]
        return W1, W2_.T
コード例 #7
0
import operator
import toolz.curried as toolz
import pyramda as R

reg_typename = "{}_reg_t".format

rin_name = "{}_rin".format

r_name = "{}_r".format

v_name = "{}_v".format

architecture_id = "two_process_{}".format

indent = R.curry_n(
    2,
    toolz.comp(
        "\n".join, R.apply(map),
        R.unapply(
            toolz.juxt([
                toolz.comp(R.add, R.multiply("    "), toolz.first),
                toolz.comp(operator.methodcaller("split", "\n"), toolz.second)
            ]))))
コード例 #8
0
    casedata = (DataFrame(
        [(k, len(find_all(txt, k)), len(find_icase(k))) for k in ks],
        columns=['Word', 'Case', 'Nocase']).query('Case != Nocase').assign(
            Ratio=lambda x: x.eval('Case / Nocase')).sort_values(
                'Ratio', ascending=True).reset_index(drop=1))
    return {
        diffcase: phrase2wd(k)
        for k in casedata.query('Ratio < @thresh').Word
        for diffcase in set(find_icase(k))
    }


# Gensim
def to_gensim_params(cnf, **kw):
    gparams = dict(
        size=cnf.N,  # 80, #
        alpha=cnf.eta,
        min_alpha=cnf.min_eta,
        window=cnf.C / 2,
        sample=0,
        negative=cnf.K,  #[5, 7, 10, 12, 15, 17], 0
        sg=1,
        # iter=4,
    )
    gparams.update(**kw)
    return gparams


phrase2wd = lambda x: '_'.join(re.split(r'[ -]', x))
take = z.comp(list, islice)
ilen = lambda xs: sum(1 for _ in xs)
コード例 #9
0
ファイル: nb2html.py プロジェクト: d10genes/blog
def main(files):
    list(map(z.comp(convert_and_update, z.do(print)), files))
コード例 #10
0
ファイル: nb2html.py プロジェクト: d10genes/blog
# import itertools as it
from os.path import basename, splitext, join, abspath
import re
import subprocess
import toolz.curried as z
import argparse
import yaml

fst = z.operator.itemgetter(0)
snd = z.operator.itemgetter(1)

NOTEBOOK_DIR = '_notebooks'
# NOTEBOOK_HTML_DIR = 'ipy_html'
NOTEBOOK_HTML_DIR = '_includes'

base = z.comp(fst, splitext, basename)
htmlname = z.comp(z.curry(join, NOTEBOOK_HTML_DIR), '{}.html'.format, base)

pat = re.compile(r'''^---\n
(.+?)\n
---
''', re.VERBOSE + re.DOTALL)  # # ^---
# print(pat.findall(txt)[0])


def post_todict(posttxt, ret_yaml=False):
    m = pat.findall(posttxt)
    if not m:
        if ret_yaml:
            return (None, None)
        return
コード例 #11
0
ファイル: helper.py プロジェクト: peteut/migen
import toolz.curried as toolz
import pyramda as R

get_sigtype = R.if_else(
    toolz.comp(R.equals(1), len), R.always("std_ulogic"),
    toolz.comp("std_logic_vector({} downto 0)".format, R.dec, len))
コード例 #12
0
ファイル: helper.py プロジェクト: peteut/migen
import toolz.curried as toolz
import pyramda as R

get_sigtype = R.if_else(
    toolz.comp(R.equals(1), len),
    R.always("std_ulogic"),
    toolz.comp("std_logic_vector({} downto 0)".format, R.dec, len))