Esempio n. 1
0
def _get_validation_losses(sess, data, losses, batch_generator, feed_builder,
                           verbose):
    """Calculate losses for a given dataset.

    Args:
        sess: tensorflow.Session
        data: Dataset instance
        losses: list of Loss tuples
        batch_generator: callable
        feed_builder: callable
        verbose: int

    Returns:
        dict of str -> float
    """
    loss_tensor_names = funcy.ldistinct([loss.tensor for loss in losses])
    tensor_dict = {name: name for name in loss_tensor_names}
    result_dict = _run_predictions(sess, tensor_dict, data, batch_generator,
                                   feed_builder)

    loss_dict = {
        loss.name: loss.function(data.outputs[loss.field],
                                 result_dict[loss.tensor])
        for loss in losses
    }

    if verbose > 0:
        print('Validation Scores:')
        for loss_name, loss in loss_dict.items():
            print('{}: {:0.5}'.format(loss_name, loss))
    return loss_dict
Esempio n. 2
0
File: param.py Progetto: pmrowla/dvc
    def workspace_status(self):
        if not self.exists:
            return {str(self): "deleted"}
        if self.hash_info.value is None:
            return {str(self): "new"}

        from funcy import ldistinct

        status = defaultdict(dict)
        info = self.hash_info.value if self.hash_info else {}
        actual = self.read_params()

        # NOTE: we want to preserve the order of params as specified in the
        # status. In case of tracking the whole file, the order is top-level
        # keys in the file and then the keys in the `info` from `dvc.lock`
        # (which are alphabetically sorted).
        params = self.params or ldistinct([*actual.keys(), *info.keys()])
        for param in params:
            if param not in actual:
                st = "deleted"
            elif param not in info:
                st = "new"
            elif actual[param] != info[param]:
                st = "modified"
            else:
                assert actual[param] == info[param]
                continue

            status[str(self)][param] = st

        return status
Esempio n. 3
0
def chain_nodes(nodes, chain):
    """Returns nodes matched by chain."""
    if not chain:
        return nodes

    link, *rest = chain
    if link.func is Ops.const:
        return []
    elif link.func is Ops.multi:
        coll, = link.args
        if isinstance(coll, list):
            coll = dict(enumerate(coll))
        return {
            k: chain_nodes(nodes, subchain + rest)
            for k, subchain in coll.items()
        }
    elif link.func is notnone_fn:
        return ldistinct(
            lcat(
                chain_nodes(nodes, subchain + rest) for subchain in link.args))
    else:
        # Doing this manually in case the link encapsulates a chain we can unpack
        func = link.func if link.args is None else link.func(*link.args)
        if isinstance(func, Chain):
            return chain_nodes(nodes, func + rest)
        else:
            next_value = func(nodes)
            return chain_nodes(next_value,
                               rest) if is_elements(next_value) else nodes
Esempio n. 4
0
def parse_name(text):
    # Names to try:
    #   - full with no trailing nor dup spaces
    #   - one with single junk cleared from both ends
    #   - any part longer than 1 char
    name_parts = text.strip().split()
    full_name = ' '.join(name_parts)  # No trailing and dup spaces
    clean_name = re.sub(r'^\w\s|\s\w$', '', full_name)
    names = ldistinct([full_name, clean_name] +
                      [n for n in name_parts if len(n) > 1])

    # Use "ocr normalized" version if raw one fails
    names = ldistinct(interleave(names, map(ocr_normalize, names)))

    versions = lmap(guess_name, names)
    return first((guess, warning) for guess, warning in versions if not warning) \
        or first((guess, warning) for guess, warning in versions if guess) \
        or (None, None)
def test_delete_elements_but_property_always_hold(xs):
    dyn_arr = dynamic_array(xs)
    for x in xs:
        prev_len = len(F.lflatten(dyn_arr))
        delete(dyn_arr, x)
        assert prev_len - 1 == len(F.lflatten(dyn_arr))
        # dynamic array properties
        for arr in dyn_arr:
            assert is_sorted(arr)
            assert is_power_of_two(len(arr))
        assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
def test_delete_elem_not_in_arr_then_nothing_happen(xs_z):
    xs, z = xs_z
    dyn_arr = dynamic_array(xs)

    prev_len = len(F.lflatten(dyn_arr))
    delete(dyn_arr, z)
    assert prev_len == len(F.lflatten(dyn_arr))
    # dynamic array properties
    for arr in dyn_arr:
        assert is_sorted(arr)
        assert is_power_of_two(len(arr))
    assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
Esempio n. 7
0
    def save(self, **kwargs):
        # Only set specie when it's non-controversial
        taxid = ldistinct(
            keep(self.attrs.get, ['platform_taxid', 'sample_taxid']))
        if len(taxid) == 1:
            self.specie = SPECIES.get(taxid[0])
        else:
            self.specie = ''

        self.platforms = re_all(r'GPL\d+', self.attrs['platform_id'])
        self.samples_count = len(self.attrs['sample_id'].split())

        super(Series, self).save(**kwargs)
Esempio n. 8
0
def mygene_fetch(platform, probes, scopes):
    """Queries mygene.info for current entrezid and sym, given an identifier."""
    if scopes == "dna":
        probes = get_dna_probes(platform, probes)
        scopes = "accession"

    def extract_queries(lines):
        lines = remove(r'^(IMAGE:\d+|--[\w>-]+)$', lines)
        queries = cat(re_iter(r'[\w+.-]+', l) for l in lines)
        queries = remove(r'_at$|^\d+-\d+$', queries)  # No such thing
        return queries
        # Clean unicode for mygene
        # http://stackoverflow.com/questions/15321138/removing-unicode-u2026-like-characters
        return [
            q.decode('unicode_escape').encode('ascii', 'ignore')
            for q in queries
        ]

    _by_probe = group_values(probes.items())
    queries_by_probe = walk_values(extract_queries, _by_probe)

    # Collect all possible queries to make a single request to mygene
    queries = set(cat(queries_by_probe.values()))

    if not queries:
        return []
    mygenes = _mygene_fetch(queries, scopes, platform.specie)

    # Form results into rows
    results = []
    dups = 0
    for probe, queries in queries_by_probe.items():
        matches = ldistinct(keep(mygenes.get, queries))
        # Skip dups
        if len(matches) > 1:
            dups += 1
        elif matches:
            entrez, sym = matches[0]
            results.append({
                'probe': probe,
                'mygene_sym': sym,
                'mygene_entrez': entrez
            })
    if dups:
        cprint('-> Produced %d dups' % dups, 'red')
    return results
Esempio n. 9
0
def _fleiss_kappa(sample_sets):
    # If there is only one set then it can't be measured
    if len(sample_sets) == 1:
        return float('nan')

    all_samples_annos = lcat(sample_sets)
    categories = ldistinct(sv.annotation for sv in all_samples_annos)
    # If there is only one label then it can't be measured
    if len(categories) == 1:
        return float('nan')
    category_index = {c: i for i, c in enumerate(categories)}

    stats = defaultdict(lambda: [0] * len(categories))
    for sv in all_samples_annos:
        stats[sv.sample_id][category_index[sv.annotation]] += 1

    return fleiss_kappa(list(stats.values()))
Esempio n. 10
0
def _cohens_kappa(annos1, annos2):
    assert set(s.sample_id for s in annos1) == set(s.sample_id for s in annos2)

    categories = ldistinct(sv.annotation for sv in chain(annos1, annos2))
    # If there is only one label then it can't be measured
    if len(categories) == 1:
        return float('nan')
    category_index = {c: i for i, c in enumerate(categories)}

    table = np.zeros((len(categories), len(categories)))
    annos1 = sorted(annos1, key=attrgetter('sample_id'))
    annos2 = sorted(annos2, key=attrgetter('sample_id'))
    for sv1, sv2 in zip(annos1, annos2):
        table[category_index[sv1.annotation],
              category_index[sv2.annotation]] += 1

    return cohens_kappa(table, return_results=False)
Esempio n. 11
0
def test_length_of_arrays_of_dyn_arr_are_all_different(xs):
    dyn_arr = dynamic_array()
    for x in xs:
        insert(dyn_arr, x)
        assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
Esempio n. 12
0
 def add_metrics(self, values: List[Metric]):
     self._params['metric'] = funcy.ldistinct(
         self._params.get('metric', []) + [v.value for v in values])
     return self
Esempio n. 13
0
 def add_metric(self, value: Metric):
     self._params['metric'] = funcy.ldistinct(
         self._params.get('metric', []).append(value.value))
     return self
Esempio n. 14
0
def search(request):
    # Save last specie in session
    specie = request.GET.get('specie')
    if specie != request.session.get('specie'):
        request.session['specie'] = specie

    q = request.GET.get('q')
    if not q:
        return {'series': None}

    exclude_tags = lkeep(silent(int), request.GET.getlist('exclude_tags'))
    series_tags, tag_series, tag_ids = series_tags_data()

    # Parse query
    q_string, q_tags = _parse_query(q)
    q_tags, wrong_tags = lsplit(lambda t: t.lower() in tag_ids, q_tags)
    if wrong_tags:
        message = 'Unknown tag%s %s.' % ('s' if len(wrong_tags) > 1 else '',
                                         ', '.join(wrong_tags))
        messages.warning(request, message)
    if not q_string and not q_tags:
        return {'series': None}

    # Build qs
    qs = search_series_qs(q_string)
    if specie:
        qs = qs.filter(specie=specie)

    if q_tags:
        q_tag_ids = lkeep(tag_ids.get(t.lower()) for t in q_tags)
        include_series = reduce(set.intersection,
                                (tag_series[t] for t in q_tag_ids))
        if include_series:
            qs = qs.filter(id__in=include_series)
        else:
            message = 'No series annotated with %s.' \
                % (q_tags[0] if len(q_tags) == 1 else 'all these tags simultaneously')
            messages.warning(request, message)
            return {'series': []}

    series_ids = qs.values_list('id', flat=True).order_by()
    tags = ldistinct(mapcat(series_tags, series_ids), key=itemgetter('id'))

    if exclude_tags:
        exclude_series = join(tag_series[t] for t in exclude_tags)
        qs = qs.exclude(id__in=exclude_series)

    series = paginate(request, qs, 10)

    # Get annotations statuses
    annos_qs = SeriesAnnotation.objects.filter(series__in=series) \
                               .values_list('series_id', 'tag_id', 'best_cohens_kappa')
    tags_validated = {(s, t): k == 1 for s, t, k in annos_qs}

    return dict(
        {
            'series': series,
            'page': series,
            'tags_validated': tags_validated,
            'tags': tags,
            'series_tags': series_tags,
        }, **_search_stats(qs))