Example #1
0
def wf_summary(metadata, opts):
    overall_wf_attributes = ('id', 'status', 'workflowName', 'workflowRoot',
                             'submission', 'start')

    (wf_id, wf_status, wf_name, wf_root, wf_submission, wf_start) = \
            [metadata[x] for x in overall_wf_attributes]

    wf_end = get('end', metadata, default="-")

    puts('')
    puts("ID         : {}".format(wf_id))
    puts("Status     : {}".format(wf_status))
    puts("Submit Time: {} (UTC)".format(wf_submission))
    puts("Start  Time: {} (UTC)".format(wf_start))
    puts("End    Time: {} (UTC)".format(wf_end))
    puts("Root       : {}".format(wf_root))
    puts('')

    (calls, states, stats) = _get_wf_call_statuses(metadata)

    table = []
    for c in calls:
        counts = [stats[c][s] for s in states]
        row = [c]
        row.extend(counts)
        table.append(row)

    headers = ['call']
    headers.extend([s for s in states])
    print(tabulate(table, headers=headers))
Example #2
0
    def assemble(pair):
        a, b = pair
        if a is not None:
            joined = get(on_left, a)
        else:
            joined = get(on_right, b)

        if a is not None:
            left_entries = get(left_self_columns, a)
        else:
            left_entries = (None,) * (len(t.lhs.fields) - len(on_left))

        if b is not None:
            right_entries = get(right_self_columns, b)
        else:
            right_entries = (None,) * (len(t.rhs.fields) - len(on_right))

        return joined + left_entries + right_entries
Example #3
0
    def assemble(pair):
        a, b = pair
        if a is not None:
            joined = get(on_left, a)
        else:
            joined = get(on_right, b)

        if a is not None:
            left_entries = get(left_self_columns, a)
        else:
            left_entries = (None, ) * (len(t.lhs.fields) - len(on_left))

        if b is not None:
            right_entries = get(right_self_columns, b)
        else:
            right_entries = (None, ) * (len(t.rhs.fields) - len(on_right))

        return joined + left_entries + right_entries
Example #4
0
def _get_wf_call_failures(metadata, opts):
    calls = []
    if 'calls' in opts:
        calls = opts['calls'].split(',')
    else:
        calls = metadata['calls'].keys()

    jobids = None
    if 'jobids' in opts:
        jobids = set(opts['jobids'].split(','))

    fails = {}

    for c in calls:
        tasks = metadata['calls'][c]
        failures = pipe(
            tasks,
            filter(lambda x: get('executionStatus', x) == 'Failed'),
            filter(lambda x: _valid_job_id(jobids, get('jobId', x))),
            map(
                lambda x: {
                    'jobId': get('jobId', x),
                    #                                               'inputs'  : get('inputs', x),
                    'stderr': get('stderr', x),
                    'shard': get('shardIndex', x),
                    'err_msg': get_in(['failures', 0, 'message'], x, 'NA'),
                    #                                               'jes'     : get('jes', x),
                    #                                               'runtime' : get('runtimeAttributes', x),
                    'rc': get('returnCode', x, 'NA'),
                }),
            list)
        fails[c] = failures

    return fails
Example #5
0
def array2sbow(array, zero_tol=1e-07):
    """ Convert from Gensim "sparse bag-of-words" format to array
    
    This isn't actually needed for wrapping Gensim
    """
    if sps.issparse(array):
        array = array.tocoo()
        coo_dta = zip(array.row, array.col, array.data)
        for _, grp in it.groupby(coo_dta, key=tzc.get(0)):
            yield tuple((j, value) for _, j, value in grp)
    else:
        for row in array:
            yield tuple((j, value) for j, value in enumerate(row)
                        if abs(value) < zero_tol)
Example #6
0
def rowfunc(t):
    """ Rowfunc provides a function that can be mapped onto a sequence.

    >>> accounts = TableSymbol('accounts', '{name: string, amount: int}')
    >>> f = rowfunc(accounts['amount'])

    >>> row = ('Alice', 100)
    >>> f(row)
    100

    See Also:
        compute<Rowwise, Sequence>
    """
    from cytoolz.curried import get
    indices = [t._child.fields.index(col) for col in t.fields]
    return get(indices)
Example #7
0
def rowfunc(t):
    """ Rowfunc provides a function that can be mapped onto a sequence.

    >>> accounts = symbol('accounts', 'var * {name: string, amount: int}')
    >>> f = rowfunc(accounts['amount'])

    >>> row = ('Alice', 100)
    >>> f(row)
    100

    See Also:
        compute<Rowwise, Sequence>
    """
    from cytoolz.curried import get
    indices = [t._child.fields.index(col) for col in t.fields]
    return get(indices)
Example #8
0
def _get_wf_call_statuses(metadata):
    calls = metadata['calls'].keys()
    states = set([])
    call_stats = {}

    for c in calls:
        tasks = metadata['calls'][c]
        counts = pipe(tasks, map(get('executionStatus')), frequencies)
        new_states = list(filter(lambda x: x not in states, counts.keys()))
        if new_states:
            for s in new_states:
                states.add(s)
        call_stats[c] = counts

    base_states = {s: 0 for s in states}

    final_stats = valmap(lambda d: merge(base_states, d), call_stats)
    return (calls, sorted(states), final_stats)
Example #9
0
# In[29]:

clf.fit(docs, labels)
clf.predict(docs)

# In[30]:


def get_step_by_name(pipe, name):
    return [trans for name_, trans in pipe.steps if name_.startswith(name)][0]


# In[31]:

cnt_vects_pipe = get_step_by_name(tfidf_pipe, "cnt_vects")

cnt_vects = [
    get_step_by_name(pipe, "cnt_vect_")
    for _name, pipe in cnt_vects_pipe.transformer_list
]

vocabulary_map = pipe(
    enumerate(concat(cnt_vect.vocabulary_ for cnt_vect in cnt_vects)),
    groupby(get(1)),
    valmap(lambda vals: list(pluck(0, vals))),
)
vocabulary_map

# In[ ]:
                             valmap)

accounts = [
    (1, 'Alice', 100, 'F'),  # id, name, balance, gender
    (2, 'Bob', 200, 'M'),
    (3, 'Charlie', 150, 'M'),
    (4, 'Dennis', 50, 'M'),
    (5, 'Edith', 300, 'F')
]

# I. SELECTING WITH `MAP()` AND `FILTER()`
# SELECT name, balance FROM accounts WHERE balance > 150

# Functional version with pipeline and curry
acc1 = pipe(accounts, filter(lambda account: account[2] > 150),
            map(get([1, 2])), list)
print(acc1)

# List comprehensions version (more Pythonic):
acc2 = [(name, balance) for (id, name, balance, gender) in accounts
        if balance > 150]
print(acc2)

# II. SPLIT-APPLY-COMBINE WITH `GROUPBY` AND `REDUCEBY`:
# 1. Split the dataset into groups by some property
# 2. Reduce each of the groups with some synopsis function

# In Memory Split-Apply-Combine
# SELECT gender, SUM(balance) FROM accounts GROUP BY gender;
print(groupby(get(3), accounts))
# {'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')], 'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')]}
Example #11
0
 def get_colnames(self):
     """ Get the column names that would be assigned to a DataFrame of the result """
     return list(
         map(tzc.get(1), sorted(self.model_.items(), key=tzc.get(0))))[:5]