Ejemplo n.º 1
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Ejemplo n.º 2
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Ejemplo n.º 3
0
def inspire_categories(self, key, value):
    schema = load_schema('elements/inspire_field')
    valid_sources = schema['properties']['source']['enum']

    inspire_categories = self.get('inspire_categories', [])

    scheme = force_single_element(value.get('2'))
    if scheme == 'arXiv':          # XXX: we skip arXiv categories here because
        return inspire_categories  # we're going to add them later in a filter.

    source = force_single_element(value.get('9', '')).lower()
    if source not in valid_sources:
        if source == 'automatically added based on dcc, ppf, dk':
            source = 'curator'
        elif source == 'submitter':
            source = 'user'
        else:
            source = None

    terms = force_list(value.get('a'))
    for _term in terms:
        term = classify_field(_term)
        if term:
            inspire_categories.append({
                'term': term,
                'source': source,
            })

    return inspire_categories
Ejemplo n.º 4
0
    def parse(self):
        """Extract an arXiv record into an Inspire HEP record.

        Returns:
            dict: the same record in the Inspire Literature schema.
        """
        self.builder.add_abstract(abstract=self.abstract, source=self.source)
        self.builder.add_title(title=self.title, source=self.source)
        for license in self.licenses:
            self.builder.add_license(**license)
        for author in self.authors:
            self.builder.add_author(author)
        self.builder.add_number_of_pages(self.number_of_pages)
        self.builder.add_publication_info(**self.publication_info)
        for collab in self.collaborations:
            self.builder.add_collaboration(collab)
        for doi in self.dois:
            self.builder.add_doi(**doi)
        self.builder.add_preprint_date(self.preprint_date)
        if self.public_note:
            self.builder.add_public_note(self.public_note, self.source)
        for rep_number in self.report_numbers:
            self.builder.add_report_number(rep_number, self.source)
        self.builder.add_arxiv_eprint(self.arxiv_eprint, self.arxiv_categories)
        self.builder.add_private_note(self.private_note)
        self.builder.add_document_type(self.document_type)
        normalized_categories = [
            classify_field(arxiv_cat) for arxiv_cat in self.arxiv_categories
        ]
        self.builder.add_inspire_categories(dedupe_list(normalized_categories),
                                            'arxiv')

        return self.builder.record
Ejemplo n.º 5
0
def test_classify_field_ignores_case():
    expected = 'Astrophysics'
    result = utils.classify_field('ASTRO-PH.CO')

    assert expected == result
Ejemplo n.º 6
0
def test_classify_field_returns_other_if_category_not_found():
    expected = 'Other'
    result = utils.classify_field('quant-bio')

    assert expected == result
Ejemplo n.º 7
0
def test_classify_field_normalizes_arxiv_category():
    expected = 'Math and Math Physics'
    result = utils.classify_field('math-dg')

    assert expected == result
Ejemplo n.º 8
0
def test_classify_field_returns_category_for_inspire_category():
    expected = 'Astrophysics'
    result = utils.classify_field('Astrophysics')

    assert expected == result
Ejemplo n.º 9
0
def test_classify_field_returns_category_for_arxiv_category():
    expected = 'Math and Math Physics'
    result = utils.classify_field('math.AG')

    assert expected == result
Ejemplo n.º 10
0
def test_classify_field_returns_none_on_non_string_value():
    assert utils.classify_field(0) is None
Ejemplo n.º 11
0
def test_classify_field_returns_none_on_falsy_value():
    assert utils.classify_field('') is None