Python splitter Examples

Programming Language: Python

Namespace/Package Name: hepdata.utils.miscellanous

Method/Function: splitter

Examples at hotexamples.com: 6

Python splitter - 6 examples found. These are the top rated real world Python examples of hepdata.utils.miscellanous.splitter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: utils.py Project: HEPData/hepdata3

def push_keywords(docs):
    """
        Add keywords from datatables to the corresponding publication record
    """
    from hepdata.utils.miscellanous import splitter
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)
    if len(publications) == 0 and len(datatables) == 0:
        raise ValueError("Documents provided are not appropriate " +
                         "for pushing keywords")

    # check the related publication field

    for pub in publications:
        data = filter(lambda table:
                      table['related_publication'] == pub['recid'],
                      datatables)

        keywords = reduce(lambda acc, d: acc + d['keywords'], data, [])

        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw['name']].append(kw['value'])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        pub['data_keywords'] = agg_keywords

    return publications + datatables

Example #2

Show file

File: utils.py Project: ruphy/hepdata

def push_keywords(docs):
    """
        Add keywords from datatables to the corresponding publication record
    """
    from hepdata.utils.miscellanous import splitter
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)
    if len(publications) == 0 and len(datatables) == 0:
        raise ValueError("Documents provided are not appropriate " +
                         "for pushing keywords")

    # check the related publication field

    for pub in publications:
        data = filter(lambda table:
                      table['related_publication'] == pub['recid'],
                      datatables)

        keywords = reduce(lambda acc, d: acc + d['keywords'], data, [])

        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw['name']].append(kw['value'])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        pub['data_keywords'] = agg_keywords

    return publications + datatables

Example #3

Show file

File: utils_test.py Project: HEPData/hepdata3

def test_utils():
    docs = [{'id': 1, 'type': 'publication'},
            {'related_publication': 1, 'id': 2, 'type': 'data_table'},
            {'related_publication': 1, 'id': 3, 'type': 'data_table'},
            {'related_publication': 1, 'id': 4, 'type': 'data_table'},
            {'related_publication': 1, 'id': 5, 'type': 'data_table'},
            {'related_publication': 1, 'id': 6, 'type': 'data_table'},
            {'related_publication': 1, 'id': 7, 'type': 'data_table'},
            {'related_publication': 1, 'id': 8, 'type': 'data_table'}]
    datatables, publications = splitter(docs, lambda d: 'related_publication' in d)

    assert (publications[0]['id'] == 1)
    assert (publications[0]['type'] == 'publication')
    assert (datatables[0]['type'] == 'data_table')

Example #4

Show file

File: process_results.py Project: HEPData/hepdata3

def map_result(es_result):
    hits = es_result["hits"]
    total_hits = es_result["total"]
    aggregations = es_result["aggregations"]

    # Separate
    tables, papers = splitter(hits, is_datatable)
    fetch_remaining_papers(tables, papers)
    aggregated = match_tables_to_papers(tables, papers)
    results = []
    for paper, datatables in aggregated:
        mapped_hit = get_basic_record_information(paper)
        data = map(get_basic_record_information, datatables)
        mapped_hit.update({"data": data, "total_tables": len(data)})
        results.append(mapped_hit)

    facets = parse_aggregations(aggregations)

    return {"results": results, "facets": facets, "total": total_hits}

Example #5

Show file

def test_utils():
    docs = [{
        'id': 1,
        'type': 'publication'
    }, {
        'related_publication': 1,
        'id': 2,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 3,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 4,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 5,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 6,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 7,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 8,
        'type': 'data_table'
    }]
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)

    assert (publications[0]['id'] == 1)
    assert (publications[0]['type'] == 'publication')
    assert (datatables[0]['type'] == 'data_table')

Example #6

Show file

File: process_results.py Project: ruphy/hepdata

def map_result(es_result):
    hits = es_result['hits']
    total_hits = es_result['total']
    aggregations = es_result['aggregations']

    # Separate
    tables, papers = splitter(hits, is_datatable)
    fetch_remaining_papers(tables, papers)
    aggregated = match_tables_to_papers(tables, papers)
    results = []
    for paper, datatables in aggregated:
        mapped_hit = get_basic_record_information(paper)
        data = map(get_basic_record_information, datatables)
        mapped_hit.update({
            'data': data,
            'total_tables': len(data),
        })
        results.append(mapped_hit)

    facets = parse_aggregations(aggregations)

    return {'results': results, 'facets': facets, 'total': total_hits}