Beispiel #1
0
def examples(encoder, gen):

    exes = tz.thread_last(gen, (tz.sliding_window, 3),
                          (map, lambda x: [encoder.encode_raw(i) for i in x]),
                          (map, tz.compose(list, tz.concat)), (map, np.array))

    return exes
Beispiel #2
0
def despam_results(results):
    """ Remove spammy looking posts. """
    def _filters(result):
        # do not remove this filter, the app requires it
        if not result.get('json_metadata') or type(
                result['json_metadata']) != dict:
            return False

        # filter out spam and unloved posts
        if len(result['json_metadata'].get('links', [])) > 15:
            return False
        if len(result['json_metadata'].get('users', [])) > 10:
            return False
        if result.get('net_votes', 0) < 5:
            return False
        if int(result.get('author_reputation', -1)) < 0:
            return False
        if int(result.get('net_rshares')) < 0:
            return False

        # todo add more filters
        return True

    def _clean_body(result):
        # todo sanitize non https links
        if result.get('body'):
            result['body'] = result['body'].replace('steemitboard.com',
                                                    'localhost')
        return result

    return thread_last(results, (filter, _filters), (map, _clean_body), list)
def remove_whitespace(string):
    """Removes spaces, tabs and newlines from the string
    """
    return thread_last(string,
                       (re.sub, r'\t*', ''),
                       (re.sub, r' *', ''),
                       (re.sub, r'\n*', ''))
Beispiel #4
0
def get_groups(ip):
    def _get_infs(record):
        name = re_find(r'(Smartgroup:\d+)', record)
        if name:
            name = name.lower().replace(':', '')
        infs = re_all(r'(x?gei_\d+/\d+/\d+)\s?selected', record)
        return dict(name=name, infs=infs)

    def _get_desc_mode(child, group):
        rslt = do_some(child, 'show run int {name}'.format(name=group['name']))
        desc = re_find(r'description\s+(\S+)', rslt)
        group['desc'] = desc
        rslt = do_some(child, 'show run int {inf}'.format(
            inf=group['infs'][0]))
        mode = re_find(r'smartgroup\s\d+\smode\s(\S+)', rslt)
        group['mode'] = mode
        return group

    try:
        child = telnet(ip)
        rslt = re.split(r'\r\n\s*\r\n', do_some(child, 'show lacp internal'))
        groups = thread_last(rslt,
                             (lmap, _get_infs),
                             (select, lambda x: x['name'] and x['infs']))
        lmap(partial(_get_desc_mode, child), groups)
        close(child)
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    return ('success', groups, ip)
Beispiel #5
0
def get_groups(ip):
    def _get_infs(record):
        name = re_find(r'(Smartgroup:\d+)', record)
        if name:
            name = name.lower().replace(':', '')
        infs = re_all(r'(x?gei_\d+/\d+/\d+)\s?selected', record)
        return dict(name=name, infs=infs)

    def _get_desc_mode(child, group):
        rslt = do_some(child, 'show run int {name}'.format(name=group['name']))
        desc = re_find(r'description\s+(\S+)', rslt)
        group['desc'] = desc
        rslt = do_some(child,
                       'show run int {inf}'.format(inf=group['infs'][0]))
        mode = re_find(r'smartgroup\s\d+\smode\s(\S+)', rslt)
        group['mode'] = mode
        return group

    try:
        child = telnet(ip)
        rslt = re.split(r'\r\n\s*\r\n', do_some(child, 'show lacp internal'))
        groups = thread_last(rslt, (lmap, _get_infs),
                             (select, lambda x: x['name'] and x['infs']))
        lmap(partial(_get_desc_mode, child), groups)
        close(child)
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    return ('success', groups, ip)
Beispiel #6
0
def collect_tweets(
    es_client,
    track,
    twitter_consumer_key,
    twitter_consumer_secret,
    twitter_access_token_key,
    twitter_access_token_secret,
    elasticsearch_index="profanity-power-index",
    drop_index=False,
    batch_size=10,
):

    if es_client.indices.exists(elasticsearch_index):
        logger.warning(f"Index {elasticsearch_index} exists.")
        if drop_index:
            logger.warning(f"Dropping {elasticsearch_index}.")
            es_client.indices.delete(elasticsearch_index)
            logger.info(f"Creating {elasticsearch_index}.")
            es_client.indices.create(index=elasticsearch_index,
                                     body=TWEET_MAPPING)
    else:
        logger.info(f"Creating {elasticsearch_index}.")
        es_client.indices.create(index=elasticsearch_index, body=TWEET_MAPPING)
        logger.info(f"{elasticsearch_index} successfully created.")

    api = twitter.Api(
        consumer_key=twitter_consumer_key,
        consumer_secret=twitter_consumer_secret,
        access_token_key=twitter_access_token_key,
        access_token_secret=twitter_access_token_secret,
    )

    logger.info(f"Connecting to twitter stream. Tracking {', '.join(track)}.")
    tweet_stream = api.GetStreamFilter(track=track)

    tweet_to_bulk = curry(_tweet_to_bulk)(elasticsearch_index)
    tweet_doc_stream = thread_last(
        tweet_stream,
        # Filter out tweets that don't contain profanity.
        (filter, _contains_profanity),
        # Convert the tweets to a bulk-indexable document.
        (map, tweet_to_bulk),
        # Partition for bulk writes.
        (partition_all, batch_size),
    )

    logger.info(f"Sending tweets to {elasticsearch_index}.")
    failed = 0
    succeeded = 0
    logger.info(f"{failed + succeeded} tweets processed: "
                f"{succeeded} succeeded, {failed} failed.")
    # Since the doc stream is partitioned we get the tweets in batches.
    for tweet_batch in tweet_doc_stream:
        ok, fail = es_bulk(es_client, tweet_batch, stats_only=True)
        succeeded += ok
        failed += fail
        if (failed + succeeded) % 100 == 0:
            logger.info(f"{failed + succeeded} tweets processed: "
                        f"{succeeded} succeeded, {failed} failed.")
Beispiel #7
0
def find_tme_files(dir):
    return tz.thread_last(
        dir,
        os.listdir,
        (filter, lambda x: re.search(".*\.html$", x)),
        (map, lambda x: f"{dir}/{x}"),
        sorted,
    )
Beispiel #8
0
def groupby_and_summarize(dataframe,col,funcs = [],fnames = []):
    return thread_last(
        dataframe,
        lambda x: x.groupby(col),
        (map, snd),
        (map,summarize(funcs = funcs, 
                       fnames = fnames)),
        pd.concat,
        reset_index)
Beispiel #9
0
def search_boards(key, token, board_name):
    method = "get"
    path = "members/me/boards"
    all_boards = execute_request(key, token, method, path, filter="open", fields="name")
    try:
        return toolz.thread_last(
            all_boards, (filter, lambda x: x["name"] == board_name), toolz.first, (toolz.get, "id")
        )
    except:
        raise ValueError("No board found with that name")
def invariant_output(string):
    """Removes all changing elements from the string

    * Replaces all numbers in a string with the 'X' character
    * Removes all appearances of activate_autocompletion
    * Removes all newlines and whitespace from the string
    """
    numbers_regexp = r'\d'
    activate_autocomp_regexp = (r'((\n?[ ]*)|,)'
                                'activate_autocompletion'
                                '(:.*\n)?')
    return thread_last(string,
                       (re.sub, numbers_regexp, 'X'),
                       (re.sub, activate_autocomp_regexp, ''))
Beispiel #11
0
def tokenize(doc, with_stem=False):
    """Given a document string, return a list of tokens.
    """
    pipeline = [
        (filter, is_alpha),
        (filter, not_proper),
        (map, lower),
        (filter, not_stopword)]
    if with_stem:
        pipeline += [(map, stem)]
    pipeline += [(map, remove_pos)]
    return list(tz.thread_last(
        nltk.tag.pos_tag(nltk.tokenize.word_tokenize(doc)),
        *pipeline))
Beispiel #12
0
def parse_file(content: str) -> Tuple[str, Set[str], str]:
    lines = content.splitlines()
    if not lines[0] == '---' and lines[1:].count('---') == 1:
        error('''
            The md file must contain exactly 2 lines consisting of
            ---
            The first of which must be the first line.
            Please correct the file by calling `mdn edit _e`''')
    front_matter = AttrDict(
        yaml.safe_load('\n'.join(lines[1:lines[1:].index('---') + 1])))
    assert_front_matter_correct(front_matter)
    tags = t.thread_last(tag_pattern.findall(content), (map, str.lower), (set))
    return front_matter.title, tags, front_matter.group,\
            front_matter.get("doi", None)
Beispiel #13
0
def normalize_data(sentences: np.ndarray):
    def stringify(x):
        return str(x)

    def unicode2ascii(s):
        return ''.join(c for c in unicodedata.normalize('NFD', s)
                       if unicodedata.category(c) != 'Mn')

    def normalize_string(s):
        s = unicode2ascii(s.lower().strip())
        s = re.sub(r"([.!?])", r" \1", s)
        s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
        return s

    return tlz.thread_last(sentences, (map, stringify),
                           (map, normalize_string))
Beispiel #14
0
def parse_label_group(string):
    """ Takes string containing all data for one field, and creates a 
        tidy dataframe with two columns: 'Well Name', and field. """
    letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    raw_dataframe = pd.read_csv(StringIO(string))
    label_name = raw_dataframe.columns[0]
    return thread_last(
        raw_dataframe.values[:,1:],
        lambda values: df(values,columns = map(lambda num: stringify(num,2),range(1,values.shape[1] + 1))),
        lambda dataframe: add_col(dataframe,'Row',pd.Series(letters[:len(dataframe)])),
        lambda dataframe: pd.melt(dataframe,id_vars=['Row']),
        lambda dataframe: add_col(dataframe,'Well Name',dataframe['Row'] + dataframe['variable']),
        lambda dataframe: dataframe.drop(['Row','variable'],axis=1),
        lambda dataframe: dataframe.rename(columns={'value': label_name}),
        lambda dataframe: dataframe[['Well Name', label_name]]
        )
Beispiel #15
0
def get_layout_data(path):
    """ Given a path to a file with proper format (see below), return a dataframe
        with 'Well Name' column and additional columns for each provided parameter.

        Format: Parameter Name, 1, 2 ...
                A, Value, Value ...
                B, Value, Value ...
        Notes: '\r' is present in csv output on windows (or google docs) and can confuse pandas `read_csv` function.
               Algorithm partitions by whether row is empty (each section of data should be separated by a blank line),
                 then filters out groups where row is empty (text of row contains only commas).
                ...   """
    return thread_last(
        path, from_file, split_on_newlines,
        (map, lambda line: line.rstrip(',')), (partitionby, string_is_empty),
        (filter, lambda group: not string_is_empty(group[0])),
        (map, lambda strings: str.join('\n', strings)),
        (map, parse_label_group),
        (reduce, lambda left, right: pd.merge(left, right, on='Well Name')))
Beispiel #16
0
def parse_label_group(string):
    """ Takes string containing all data for one field, and creates a
        tidy dataframe with two columns: 'Well Name', and field. """
    letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    raw_dataframe = pd.read_csv(StringIO(string))
    label_name = raw_dataframe.columns[0]
    return thread_last(
        raw_dataframe.values[:, 1:],
        lambda values: df(values,
                          columns=map(lambda num: stringify(num, 2),
                                      range(1, values.shape[1] + 1))),
        lambda dataframe: add_col(dataframe, 'Row',
                                  pd.Series(letters[:len(dataframe)])),
        lambda dataframe: pd.melt(dataframe, id_vars=['Row']),
        lambda dataframe: add_col(dataframe, 'Well Name', dataframe['Row'] +
                                  dataframe['variable']),
        lambda dataframe: dataframe.drop(['Row', 'variable'], axis=1),
        lambda dataframe: dataframe.rename(columns={'value': label_name}),
        lambda dataframe: dataframe[['Well Name', label_name]])
Beispiel #17
0
def resolve(query: Querylike[T], api: Api[T_auth], loaders: load.Registry,
            auth: T_auth, sender: http.Sender) -> T:
    """resolve a querylike object.

    Parameters
    ----------
    query
        the querylike object to evaluate
    api
        the API to handle the request
    loaders
        The registry of object loaders
    auth
        The authentication object
    sender
        The request sender
    """
    return thread_last(query, attrgetter('__req__'), api.prepare,
                       (flip(api.add_auth), auth), sender, api.parse,
                       loaders(query.__rtype__))
Beispiel #18
0
def get_layout_data(path):
    """ Given a path to a file with proper format (see below), return a dataframe 
        with 'Well Name' column and additional columns for each provided parameter.
        
        Format: Parameter Name, 1, 2 ...
                A, Value, Value ...
                B, Value, Value ...
        Notes: '\r' is present in csv output on windows (or google docs) and can confuse pandas `read_csv` function.
               Algorithm partitions by whether row is empty (each section of data should be separated by a blank line), 
                 then filters out groups where row is empty (text of row contains only commas).
                ...   """
    return thread_last(
        path,
        from_file,
        split_on_newlines,
        (map, lambda line: line.rstrip(',')),
        (partitionby, string_is_empty),
        (filter, lambda group: not string_is_empty(group[0])),
        (map, lambda strings: str.join('\n', strings)),
        (map, parse_label_group),
        (reduce, lambda left, right: pd.merge(left, right, on = 'Well Name')))
Beispiel #19
0
def get_ports(ip):
    def _get_info(record):
        name = re_find(r'^((?:xg|g|f)ei\S+) is \w+ ?\w+,', record)
        state = re_find(r'^(?:xg|g|f)ei\S+ is (\w+ ?\w+),', record)
        desc = re_find(r'Description is (\S+ *\S+)', record)
        inTraffic = int(
            re_find(r'120 seconds input.*:\s+(\d+)\sBps', record) or 0) * 8 / 1000000
        outTraffic = int(
            re_find(r'120 seconds output.*:\s+(\d+)\sBps', record) or 0) * 8 / 1000000
        return dict(name=name, desc=desc, state=state, inTraffic=inTraffic, outTraffic=outTraffic)

    try:
        child = telnet(ip)
        rslt = do_some(child, 'show interface')
        close(child)
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    rslt = thread_last(rslt,
                       (re.split, r'\r\r\n *\r\r\n *'),
                       (select, r'^(?:xg|g|f)ei_'),
                       (lmap, _get_info))
    return ('success', rslt, ip)
def process_msg(q):
    return toolz.thread_last(qget(q), (map, json.loads),
                             (map, lambda v: v['event']),
                             (filter, lambda v: str_filter(v['event_name'])))
Beispiel #21
0
def create_well_df(cell_dict):
    return thread_last(cell_dict,
                       (mapdict,lambda k,v: {"Cell Type":k,"Well Name":v}),
                       (map, df),
                       pd.concat)
import functools

from toolz import thread_last, curry, pipe

print(list(map(lambda x: x + 10, [1, 2, 3])))  # => [11, 12, 13]
print(list(filter(lambda x: x > 5, [3, 4, 5, 6, 7])))  # => [6, 7]
print(functools.reduce(lambda x, y: x + y, [1, 2, 3, 4, 5]))  # => 15

x = functools.reduce(lambda x, y: x + y,
                     map(lambda x: x + 10,
                         filter(lambda x: x > 5, [3, 4, 5, 6, 7])))  # => 33
print('combine all operations together:', x)

x = thread_last([3, 4, 5, 6, 7], (filter, lambda x: x > 5),
                (map, lambda x: x + 10),
                (functools.reduce, lambda x, y: x + y))
print('use toolz:', x)

from operator import add, lt
from toolz.curried import filter, map, reduce
add = curry(add)
lt = curry(lt)

x = pipe([3, 4, 5, 6, 7], filter(lt(5)), map(add(10)), reduce(add))
print('another toolz style:', x)
    Tuple,
    TypeVar,
)

from toolz import excepts, identity, memoize, thread_last, update_in

A = TypeVar("A")
K = TypeVar("K")
V = TypeVar("V")
A_List = Tuple[Tuple[K, V], ...]

_ingredient_patterns: Iterable[Pattern] = thread_last(
    [
        r"(?P<quantity>\d*\.?\d+)\s(?P<unit>\w+)\.?\s(of\s)?(?P<ingredient>.+)",
        r"(?P<quantity>\d*\.?\d+)\s(?P<ingredient>.+)",
        r"(?P<ingredient>.+)",
    ],
    (map, re.compile),
    tuple,
)

_ingredient_cleanup_funcs: A_List[str, Optional[Callable]] = (
    ("quantity", None),
    ("unit", lambda s: s[0].lower() if s else None),
    ("ingredient", None),
)

next_or_none: Callable[[Iterator[A]],
                       Optional[A]] = excepts(StopIteration,
                                              lambda a: next(filter(None, a)),
                                              lambda __: None)
Beispiel #24
0
def execute_request(key, token, method, path, *args, **kwargs):
    url = "https://api.trello.com/1/{0}".format(path).format(**kwargs)
    payload = toolz.thread_last(kwargs, (remove_used_fields, path), (bundle_auth, key, token))
    req = requests.request(method, url, data=payload)
    req.raise_for_status()
    return req.json()
Beispiel #25
0
def groupby_and_summarize(dataframe, col, funcs=[], fnames=[]):
    return thread_last(dataframe, lambda x: x.groupby(col), (map, snd),
                       (map, summarize(funcs=funcs, fnames=fnames)), pd.concat,
                       reset_index)
def parse(patterns: Iterable[Pattern],
          post_func_map: A_List[str, Optional[Callable]], s: str) -> Mapping:
    return thread_last(s, (pattern_match, patterns), groupdict,
                       (post_process, post_func_map))
Beispiel #27
0
def load_opensudoku(difficulty):
    resp = requests.get(
        f'https://opensudoku.moire.org/sudoku/{difficulty}.opensudoku')
    parsed = xmltodict.parse(resp.text)
    return thread_last(parsed, (get_in, ['opensudoku', 'game']),
                       (pluck, '@data'), tuple)
 def from_string(cls, raw):
     return thread_last(raw, parse_raw, cls)
Beispiel #29
0
def create_well_df(cell_dict):
    return thread_last(cell_dict, (mapdict, lambda k, v: {
        "Cell Type": k,
        "Well Name": v
    }), (map, df), pd.concat)
Beispiel #30
0
from toolz import thread_last

from sudoku.fp.load import load_opensudoku
from sudoku.fp.solve import combined_step
from sudoku.mp.base import Sudoku
from sudoku.mp.solve import Solver

thread_last(
    load_opensudoku('easy')[3], Sudoku.from_string,
    Solver(combined_step, max_tries=100), print)