Esempio n. 1
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Esempio n. 2
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Esempio n. 3
0
def is_being_harvested_on_legacy(record):
    """Return True if the record is being harvested on Legacy.

    If the record belongs to one of the CORE arXiv categories then it
    is already being harvested on Legacy.
    """
    arxiv_categories = get_arxiv_categories(record)
    legacy_categories = current_app.config.get(
        'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY', [])

    return len(set(arxiv_categories) & set(legacy_categories)) > 0
Esempio n. 4
0
def is_being_harvested_on_legacy(record):
    """Return True if the record is being harvested on Legacy.

    If the record belongs to one of the CORE arXiv categories then it
    is already being harvested on Legacy.
    """
    arxiv_categories = get_arxiv_categories(record)
    legacy_categories = current_app.config.get(
        'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY', [])

    return len(set(arxiv_categories) & set(legacy_categories)) > 0
Esempio n. 5
0
def has_fully_harvested_category(record):
    """Check if the record in `obj.data` has fully harvested categories.

    Arguments:
        record(dict): the ingested article.

    Return:
        bool: True when the record belongs to an arXiv category that is fully
        harvested, otherwise False.
    """
    record_categories = set(get_arxiv_categories(record))
    harvested_categories = current_app.config.get('ARXIV_CATEGORIES', {})
    return len(record_categories & set(
        harvested_categories.get('core') +
        harvested_categories.get('non-core'))) > 0
Esempio n. 6
0
def get_coreness(record):
    """Return the coreness of the record harvested.

    Args:
        record(dict): the record harvested.

    Return:
        a Coreness value in according to the record's coreness level.
    """
    arxiv_categories = set(get_arxiv_categories(record))
    relevant_categories = current_app.config.get('ARXIV_CATEGORIES', [])

    if arxiv_categories & set(relevant_categories['core']):
        return Coreness.core
    elif arxiv_categories & set(relevant_categories['non-core']):
        return Coreness.non_core
    else:
        return Coreness.non_relevant
Esempio n. 7
0
def has_fully_harvested_category(record):
    """Check if the record in `obj.data` has fully harvested categories.

    Arguments:
        record(dict): the ingested article.

    Return:
        bool: True when the record belongs to an arXiv category that is fully
        harvested, otherwise False.
    """
    record_categories = set(get_arxiv_categories(record))
    harvested_categories = current_app.config.get('ARXIV_CATEGORIES', {})
    return len(
        record_categories &
        set(
            harvested_categories.get('core') +
            harvested_categories.get('non-core')
        )
    ) > 0
Esempio n. 8
0
def test_get_arxiv_categories_returns_all_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th',
                ],
                'value': '1605.03898'
            },
        ],
    }  # literature/1458300
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['nucl-th']
    result = get_arxiv_categories(record)

    assert expected == result
Esempio n. 9
0
def is_experimental_paper(obj, eng):
    """Check if a workflow contains an experimental paper.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        bool: whether the workflow contains an experimental paper.

    """
    arxiv_categories = get_arxiv_categories(obj.data)
    inspire_categories = get_inspire_categories(obj.data)

    has_experimental_arxiv_category = len(
        set(arxiv_categories) & set(EXPERIMENTAL_ARXIV_CATEGORIES)) > 0
    has_experimental_inspire_category = len(
        set(inspire_categories) & set(EXPERIMENTAL_INSPIRE_CATEGORIES)) > 0

    return has_experimental_arxiv_category or has_experimental_inspire_category
Esempio n. 10
0
def test_get_arxiv_categories_returns_all_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th',
                ],
                'value': '1605.03898'
            },
        ],
    }  # literature/1458300
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['nucl-th']
    result = get_arxiv_categories(record)

    assert expected == result
Esempio n. 11
0
def test_get_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-th',
                    'hep-ph',
                ],
                'value': '1612.08928',
            },
        ],
    }
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['hep-th', 'hep-ph']
    result = get_arxiv_categories(record)

    assert expected == result
Esempio n. 12
0
 def _is_core(record):
     return set(get_arxiv_categories(record)) & \
         set(current_app.config.get('ARXIV_CATEGORIES', {}).get('core'))
Esempio n. 13
0
def physics_data_an_is_primary_category(record):
    record_categories = get_arxiv_categories(record)
    if record_categories:
        return record_categories[0] == 'physics.data-an'
    return False
Esempio n. 14
0
def jlab_ticket_needed(obj, eng):
    """Check if the a JLab curation ticket is needed."""
    jlab_categories = set(current_app.config['JLAB_ARXIV_CATEGORIES'])
    arxiv_categories = set(get_arxiv_categories(obj.data))
    return bool(jlab_categories & arxiv_categories)
Esempio n. 15
0
 def _is_core(record):
     return set(get_arxiv_categories(record)) & \
         set(current_app.config.get('ARXIV_CATEGORIES', {}).get('core'))
Esempio n. 16
0
def physics_data_an_is_primary_category(record):
    record_categories = get_arxiv_categories(record)
    if record_categories:
        return record_categories[0] == 'physics.data-an'
    return False