Ejemplo n.º 1
0
 def list(self) -> QueryResult[Starship]:
     responses = self.store.get("planets")
     response, responses = toolz.peek(responses)
     return QueryResultBuilder() \
         .count(response.count) \
         .iterator(toolz.mapcat(lambda r: (Planet(p) for p in r.results), responses)) \
         .build()
Ejemplo n.º 2
0
def searchdirectories(filepattern: str,
                      directories: Iterable[str]) -> List[str]:
    """Search directories for files matching filepattern regex."""
    return list(
        sorted(
            mapcat(_crawldirectoryforfilesmatching(filepattern),
                   findfiles(directories))))
Ejemplo n.º 3
0
 def list(self) -> QueryResult[Vehicle]:
     responses = self.store.get("vehicle")
     response, responses = toolz.peek(responses)
     return QueryResultBuilder() \
         .count(response.count) \
         .iterator(toolz.mapcat(lambda r: (Vehicle(p) for p in r.results), responses)) \
         .build()
    def tokenize(self, message: str, stopwords=[]) -> List[str]:
        """
        Tokenize string passed in
        If self.data is not a string or is the empty string, return []
        """
        if not isinstance(message, str) or message == "":
            print("received bad input...")
            try:
                print(f"message length = {len(message)}")
            except TypeError as e:
                print(e)
            print(f"message is {str(message)}")
            return []
        else:
            tokens: List[str] = []
            # explicitly run all other methods on input
            sents = self.run_pipeline(message)
            # we now have a list of sentences, remove falsey elements
            sents = t.remove(self.empty_sent_re.match, sents)
            # tokenize each sentence and concat resultant arrays
            tokens = list(t.mapcat(nltk.word_tokenize, sents))

            if stopwords:
                tokens = [x for x in tokens if x.lower() not in stopwords]
            return tokens
Ejemplo n.º 5
0
def functional():
  return count_by(itemgetter('hour'),
                  map(json.loads,
                      filter(None,
                             mapcat(lambda output: output.strip().split('\n'),
                                    map(lambda date: logs[date.strftime('%Y/%m/%d')],
                                        map(lambda days_ago: today - timedelta(days=days_ago),
                                            range(1, days_of_logs + 1)))))))
Ejemplo n.º 6
0
def merge_related_elements(interval_group):
    """Merge block and superblock ids for a group of identical intervals.

  Args:
    interval_group (list): list of identical intervals

  Returns:
    BaseInterval: unified interval with merged related element ids
  """
    # extract block and superblock ids from each of the intervals
    block_ids = mapcat(attrgetter('block_ids'), interval_group)
    superblock_ids = mapcat(attrgetter('superblock_ids'), interval_group)

    return BaseInterval(
        *interval_group[0][:6],  # use first interval as base
        block_ids=list(block_ids),  # resolve and add
        superblock_ids=list(superblock_ids)  # do
    )
Ejemplo n.º 7
0
def piped():
    return (_ | range(1, days_of_logs + 1)
            | map(lambda days_ago: today - timedelta(days=days_ago))
            | map(lambda date: logs[date.strftime('%Y/%m/%d')])
            | mapcat(lambda output: output.strip().split('\n'))
            | filter(None)
            | map(json.loads)
            | count_by(itemgetter('hour'))
            | _)
Ejemplo n.º 8
0
def piped():
  return (_| range(1, days_of_logs + 1)
           | map(lambda days_ago: today - timedelta(days=days_ago))
           | map(lambda date: logs[date.strftime('%Y/%m/%d')])
           | mapcat(lambda output: output.strip().split('\n'))
           | filter(None)
           | map(json.loads)
           | count_by(itemgetter('hour'))
           |_)
Ejemplo n.º 9
0
def merge_related_elements(interval_group):
  """Merge block and superblock ids for a group of identical intervals.

  Args:
    interval_group (list): list of identical intervals

  Returns:
    BaseInterval: unified interval with merged related element ids
  """
  # extract block and superblock ids from each of the intervals
  block_ids = mapcat(attrgetter('block_ids'), interval_group)
  superblock_ids = mapcat(attrgetter('superblock_ids'), interval_group)

  return BaseInterval(
    *interval_group[0][:6],              # use first interval as base
    block_ids=list(block_ids),           # resolve and add
    superblock_ids=list(superblock_ids)  # do
  )
Ejemplo n.º 10
0
def eqn_nums(nums: Numbers) -> Equations:
    # all combs/perms of ops
    ops = combinations_with_replacement("+-/*", len(nums) - 1)
    op_perms = mapcat(permutations, ops)
    # all number permutations, compute these now (not a gen)
    # and take the set, this will save on redundant eqns
    num_perms = set(permutations(nums))

    eqns = product(num_perms, op_perms)
    return (list(p) + list(o) for p, o in eqns)
Ejemplo n.º 11
0
    def p(new_df: pd.DataFrame) -> pd.DataFrame:
        make_dummies = lambda col: dict(map(lambda categ: ("fklearn_feat__" + col + "==" + str(categ),
                                                           (new_df[col] == categ).astype(int)),
                                            vec[col][int(drop_first_column):]))

        oh_cols = dict(mapcat(lambda col: merge(make_dummies(col),
                                                {"fklearn_feat__" + col + "==" + "nan":
                                                    (~new_df[col].isin(vec[col])).astype(int)} if hardcode_nans
                                                else {}).items(),
                              columns_to_categorize))

        return new_df.assign(**oh_cols).drop(columns_to_categorize, axis=1)
Ejemplo n.º 12
0
def functional():
    return count_by(
        itemgetter('hour'),
        map(
            json.loads,
            filter(
                None,
                mapcat(
                    lambda output: output.strip().split('\n'),
                    map(
                        lambda date: logs[date.strftime('%Y/%m/%d')],
                        map(lambda days_ago: today - timedelta(days=days_ago),
                            range(1, days_of_logs + 1)))))))
Ejemplo n.º 13
0
def main():
    args = parse_cli()
    not_media = Counter()
    getType = lambda x: x.rsplit('.', 1)[-1] 
    notMedia = lambda path: not_media.update([getType(path)])
    isMedia = lambda x: getType(x).lower() in media_types
    buildFullPath = toolz.curry(lambda path, file: '/'.join([path, file]) if isMedia(file) else notMedia(file))
    iterFiles = lambda (path, dirs, files): map(buildFullPath(path), files)
    file_list = filter(None, toolz.mapcat(iterFiles, os.walk(args.base_path)))
    print '{0} total files'.format(len(file_list))

    image_scan = mediaStruct()
    for media in file_list:
        image_scan.add_media(media)
    image_scan.save(args.output_file)
    print 'file types not imported', not_media
def iter_unlooped_nodes(node, loop_variables_nodes, unloop_keys=None):
    loop_variables_names, sequences = zip(*map(
        lambda loop_variable_node: (
            loop_variable_node['name'],
            mapcat(
                enumeration_node_to_sequence,
                loop_variable_node['enumerations'],
                ),
            ),
        loop_variables_nodes,
        ))
    loop_variables_values_list = itertools.product(*sequences)
    for loop_variables_values in loop_variables_values_list:
        value_by_loop_variable_name = dict(zip(loop_variables_names, loop_variables_values))
        yield unlooped(
            node=node,
            unloop_keys=unloop_keys,
            value_by_loop_variable_name=value_by_loop_variable_name,
            )
Ejemplo n.º 15
0
def findfiles(patterns: Iterable[str]) -> List[str]:
    return list(sorted(mapcat(_matchfilepattern, patterns)))
Ejemplo n.º 16
0
def unzip(path, tree):
  if isinstance(tree, str) or not hasattr(tree, '__iter__'):
    yield path + [tree]
  else:
    for p in t.mapcat(t.partial(unzip, path + tree[0:1]), tree[1:]):
      yield p
def visit_regle(node):
    return mapcat(
        lambda node1: visit_node(node1) if node1['type'] == 'pour_formula' else [visit_node(node1)],
        node['formulas'],
        )
Ejemplo n.º 18
0
import sys
import itertools
import toolz

from gensim.models import word2vec

data_file = sys.argv[1]

sentences = [
    s for s in word2vec.LineSentence(data_file)
    if toolz.count(toolz.unique(s)) >= 2
]

cmb = toolz.frequencies(
    toolz.mapcat(lambda s: itertools.combinations(sorted(toolz.unique(s)), 2),
                 sentences))

for (k1, k2), v in sorted(cmb.items(), key=lambda x: -x[1]):
    print(f"item1 = {k1}, item2 = {k2}, freq = {v}")