Ejemplo n.º 1
0
def push_keywords(docs):
    """
        Add keywords from datatables to the corresponding publication record
    """
    from hepdata.utils.miscellanous import splitter
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)
    if len(publications) == 0 and len(datatables) == 0:
        raise ValueError("Documents provided are not appropriate " +
                         "for pushing keywords")

    # check the related publication field

    for pub in publications:
        data = filter(lambda table:
                      table['related_publication'] == pub['recid'],
                      datatables)

        keywords = reduce(lambda acc, d: acc + d['keywords'], data, [])

        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw['name']].append(kw['value'])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        pub['data_keywords'] = agg_keywords

    return publications + datatables
Ejemplo n.º 2
0
def push_keywords(docs):
    """
        Add keywords from datatables to the corresponding publication record
    """
    from hepdata.utils.miscellanous import splitter
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)
    if len(publications) == 0 and len(datatables) == 0:
        raise ValueError("Documents provided are not appropriate " +
                         "for pushing keywords")

    # check the related publication field

    for pub in publications:
        data = filter(lambda table:
                      table['related_publication'] == pub['recid'],
                      datatables)

        keywords = reduce(lambda acc, d: acc + d['keywords'], data, [])

        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw['name']].append(kw['value'])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        pub['data_keywords'] = agg_keywords

    return publications + datatables
Ejemplo n.º 3
0
def test_utils():
    docs = [{'id': 1, 'type': 'publication'},
            {'related_publication': 1, 'id': 2, 'type': 'data_table'},
            {'related_publication': 1, 'id': 3, 'type': 'data_table'},
            {'related_publication': 1, 'id': 4, 'type': 'data_table'},
            {'related_publication': 1, 'id': 5, 'type': 'data_table'},
            {'related_publication': 1, 'id': 6, 'type': 'data_table'},
            {'related_publication': 1, 'id': 7, 'type': 'data_table'},
            {'related_publication': 1, 'id': 8, 'type': 'data_table'}]
    datatables, publications = splitter(docs, lambda d: 'related_publication' in d)

    assert (publications[0]['id'] == 1)
    assert (publications[0]['type'] == 'publication')
    assert (datatables[0]['type'] == 'data_table')
Ejemplo n.º 4
0
def map_result(es_result):
    hits = es_result["hits"]
    total_hits = es_result["total"]
    aggregations = es_result["aggregations"]

    # Separate
    tables, papers = splitter(hits, is_datatable)
    fetch_remaining_papers(tables, papers)
    aggregated = match_tables_to_papers(tables, papers)
    results = []
    for paper, datatables in aggregated:
        mapped_hit = get_basic_record_information(paper)
        data = map(get_basic_record_information, datatables)
        mapped_hit.update({"data": data, "total_tables": len(data)})
        results.append(mapped_hit)

    facets = parse_aggregations(aggregations)

    return {"results": results, "facets": facets, "total": total_hits}
Ejemplo n.º 5
0
def test_utils():
    docs = [{
        'id': 1,
        'type': 'publication'
    }, {
        'related_publication': 1,
        'id': 2,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 3,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 4,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 5,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 6,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 7,
        'type': 'data_table'
    }, {
        'related_publication': 1,
        'id': 8,
        'type': 'data_table'
    }]
    datatables, publications = splitter(docs,
                                        lambda d: 'related_publication' in d)

    assert (publications[0]['id'] == 1)
    assert (publications[0]['type'] == 'publication')
    assert (datatables[0]['type'] == 'data_table')
Ejemplo n.º 6
0
def map_result(es_result):
    hits = es_result['hits']
    total_hits = es_result['total']
    aggregations = es_result['aggregations']

    # Separate
    tables, papers = splitter(hits, is_datatable)
    fetch_remaining_papers(tables, papers)
    aggregated = match_tables_to_papers(tables, papers)
    results = []
    for paper, datatables in aggregated:
        mapped_hit = get_basic_record_information(paper)
        data = map(get_basic_record_information, datatables)
        mapped_hit.update({
            'data': data,
            'total_tables': len(data),
        })
        results.append(mapped_hit)

    facets = parse_aggregations(aggregations)

    return {'results': results, 'facets': facets, 'total': total_hits}