예제 #1
0
 def _transform_classification(cls, clsn: Classification) -> Optional[dict]:
     category = clsn.get('category')
     if category is None:
         return None
     return {'group': clsn.get('group'),
             'archive': clsn.get('archive'),
             'category': category}
예제 #2
0
def _to_classification(value: str) -> Tuple[Classification, ...]:
    clsns = []
    if value in taxonomy.definitions.GROUPS:
        klass = taxonomy.Group
        field = "group"
    elif value in taxonomy.definitions.ARCHIVES:
        klass = taxonomy.Archive
        field = "archive"
    elif value in taxonomy.definitions.CATEGORIES:
        klass = taxonomy.Category
        field = "category"
    else:
        raise ValueError("not a valid classification")
    cast_value = klass(value)
    clsns.append(Classification(**{field: {"id": value}}))  # type: ignore
    if cast_value.unalias() != cast_value:
        clsns.append(
            Classification(  # type: ignore # noqa: E501 # fmt: off
                **{field: {
                    "id": cast_value.unalias()
                }}))
    if (cast_value.canonical != cast_value
            and cast_value.canonical != cast_value.unalias()):
        clsns.append(
            Classification(  # type: ignore # noqa: E501 # fmt: off
                **{field: {
                    "id": cast_value.canonical
                }}))
    return tuple(clsns)
예제 #3
0
def _update_query_with_classification(q: AdvancedQuery,
                                      data: MultiDict) -> AdvancedQuery:
    q.classification = ClassificationList()
    archives = [
        ("computer_science", "cs"),
        ("economics", "econ"),
        ("eess", "eess"),
        ("mathematics", "math"),
        ("q_biology", "q-bio"),
        ("q_finance", "q-fin"),
        ("statistics", "stat"),
    ]
    for field, archive in archives:
        if data.get(field):
            # Fix for these typing issues is coming soon!
            #  See: https://github.com/python/mypy/pull/4397
            q.classification.append(
                Classification(archive={"id": archive})  # type: ignore
            )
    if data.get("physics") and "physics_archives" in data:
        if "all" in data["physics_archives"]:
            q.classification.append(
                Classification(group={"id": "grp_physics"})  # type: ignore
            )
        else:
            q.classification.append(
                Classification(  # type: ignore
                    group={"id": "grp_physics"},
                    archive={"id": data["physics_archives"]},
                ))
    return q
예제 #4
0
def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \
        -> AdvancedQuery:
    q.classification = ClassificationList()
    archives = [('computer_science', 'cs'), ('economics', 'econ'),
                ('eess', 'eess'), ('mathematics', 'math'),
                ('q_biology', 'q-bio'), ('q_finance', 'q-fin'),
                ('statistics', 'stat')]
    for field, archive in archives:
        if data.get(field):
            # Fix for these typing issues is coming soon!
            #  See: https://github.com/python/mypy/pull/4397
            q.classification.append(
                Classification(archive={'id': archive})  # type: ignore
            )
    if data.get('physics') and 'physics_archives' in data:
        if 'all' in data['physics_archives']:
            q.classification.append(
                Classification(group={'id': 'grp_physics'})  # type: ignore
            )
        else:
            q.classification.append(
                Classification(  # type: ignore
                    group={'id': 'grp_physics'},
                    archive={'id': data['physics_archives']}))
    return q
예제 #5
0
 def _transform_classification(
         cls, clsn: Classification) -> Optional[Dict[Any, Any]]:
     category = clsn.get("category")
     if category is None:
         return None
     return {
         "group": clsn.get("group"),
         "archive": clsn.get("archive"),
         "category": category,
     }
예제 #6
0
 def _transform_classification(
     clsn: Classification,
 ) -> Optional[Dict[str, Optional[str]]]:
     category = clsn.get("category")
     if category is None:
         return None
     return {  # type:ignore
         "group": clsn.get("group"),
         "archive": clsn.get("archive"),
         "category": category,
     }
예제 #7
0
    def test_archive_subsumed_classification(self, mock_index):
        """Request with a subsumed archive as primary classification."""
        archive = 'chao-dyn'
        params = MultiDict({'primary_classification': archive})
        data, code, headers = api.search(params)

        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
        query = mock_index.search.call_args[0][0]
        self.assertEqual(len(query.primary_classification), 2)
        self.assertEqual(query.primary_classification[0],
                         Classification(archive={'id': archive}))
        self.assertEqual(query.primary_classification[1],
                         Classification(archive={'id': 'nlin.CD'}),
                         "The canonical archive is used instead")
예제 #8
0
def display_classification(classification: Classification) -> str:
    """Generate a display-friendly label for a classification."""
    group = classification.get("group")
    category = classification.get("category")
    archive = classification.get("archive")
    parts = []
    if group is not None:
        parts.append(group.get("name",
                               taxonomy.get_group_display(group["id"])))
    if archive is not None:
        parts.append(
            archive.get("name", taxonomy.get_archive_display(archive["id"])))
    if category is not None:
        parts.append(
            category.get("name",
                         taxonomy.get_category_display(category["id"])))
    return "::".join(parts)
예제 #9
0
    def test_archive_subsumed_classification(self, mock_index):
        """Request with a subsumed archive as primary classification."""
        archive = "chao-dyn"
        params = MultiDict({"primary_classification": archive})
        data, code, headers = api.search(params)

        self.assertEqual(code, HTTPStatus.OK, "Returns 200 OK")
        query = mock_index.search.call_args[0][0]
        self.assertEqual(len(query.primary_classification), 2)
        self.assertEqual(
            query.primary_classification[0],
            Classification(archive={"id": archive}),
        )
        self.assertEqual(
            query.primary_classification[1],
            Classification(archive={"id": "nlin.CD"}),
            "The canonical archive is used instead",
        )
예제 #10
0
    def test_archive_primary_classification(self, mock_index):
        """Request with an archive as primary classification."""
        archive = 'physics'
        params = MultiDict({'primary_classification': archive})
        data, code, headers = api.search(params)

        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
        query = mock_index.search.call_args[0][0]
        self.assertEqual(len(query.primary_classification), 1)
        self.assertEqual(query.primary_classification[0],
                         Classification(archive={'id': archive}))
예제 #11
0
    def test_category_primary_classification(self, mock_index):
        """Request with a category as primary classification."""
        category = 'cs.DL'
        params = MultiDict({'primary_classification': category})
        data, code, headers = api.search(params)

        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
        query = mock_index.search.call_args[0][0]
        self.assertEqual(len(query.primary_classification), 1)
        self.assertEqual(query.primary_classification[0],
                         Classification(category={'id': category}))
예제 #12
0
    def test_group_primary_classification(self, mock_index):
        """Request with a group as primary classification."""
        group = "grp_physics"
        params = MultiDict({"primary_classification": group})
        data, code, headers = api.search(params)

        self.assertEqual(code, HTTPStatus.OK, "Returns 200 OK")
        query = mock_index.search.call_args[0][0]
        self.assertEqual(len(query.primary_classification), 1)
        self.assertEqual(
            query.primary_classification[0],
            Classification(group={"id": group}),
        )
예제 #13
0
def _update_with_archives(q: SimpleQuery, archives: List[str]) -> SimpleQuery:
    """
    Search within a group or archive.

    Parameters
    ----------
    q : :class:`SimpleQuery`
    groups_or_archives : str

    Returns
    -------
    :class:`SimpleQuery`
    """
    logger.debug('Search within %s', archives)
    q.classification = ClassificationList([
        Classification(archive={'id': archive})  # type: ignore
        for archive in archives
    ])
    return q
예제 #14
0
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
    """Transform an ES search result back into a :class:`.Document`."""
    # typing: ignore
    result: Dict[str, Any] = {}

    result['match'] = {}  # Hit on field, but no highlighting.
    result['truncated'] = {}  # Preview is truncated.

    for key in Document.fields():
        if type(raw) is Hit:
            if not hasattr(raw, key):
                continue
            value = getattr(raw, key)

        elif type(raw) is dict:
            if key not in raw:
                continue
            value = raw.get(key)
        else:
            continue

        # We want to prevent ES-specific data types from escaping the module
        # API.
        if isinstance(value, AttrList):
            value = value._l_
        elif isinstance(value, AttrDict):
            value = value.to_dict()

        if key == 'primary_classification':
            value = Classification(**value)  # type: ignore
        elif key == 'secondary_classification':
            value = [Classification(**v) for v in value]  # type: ignore
        elif key in ['authors', 'owners']:
            value = [_to_author(au) for au in value]
        elif key == 'submitter':
            value = _to_author(value)

        elif key == 'announced_date_first' and \
                value and isinstance(value, str):
            value = datetime.strptime(value, '%Y-%m').date()
        elif key in [
                'submitted_date', 'submitted_date_first',
                'submitted_date_latest'
        ]:
            try:
                value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S%z')
            except (ValueError, TypeError):
                logger.warning(f'Could not parse {key}: {value} as datetime')
                pass
        elif key in ['acm_class', 'msc_class'] and value:
            value = '; '.join(value)

        result[key] = value

    if type(raw) is Response:
        result['score'] = raw.meta.score  # type: ignore

    if type(result.get('abstract')) is str and highlight:
        if 'preview' not in result:
            result['preview'] = {}
        result['preview']['abstract'] = preview(result['abstract'])
        if result['preview']['abstract'].endswith('…'):
            result['truncated']['abstract'] = True

    if highlight and type(raw) in [Response, Hit]:
        result['highlight'] = {}
        logger.debug('%s: add highlighting to result',
                     raw.paper_id)  # type: ignore
        result = add_highlighting(result, raw)

    return Document(**result)  # type: ignore
예제 #15
0
    def test_advanced_query(self, mock_Elasticsearch, mock_Search):
        """:class:`.index.search` supports :class:`AdvancedQuery`."""
        mock_results = mock.MagicMock()
        mock_results.__getitem__.return_value = {'total': 53}
        rdata = dict(authors=[{'full_name': 'N. Ame'}],
                     owners=[{'full_name': 'N. Ame'}],
                     submitter={'full_name': 'N. Ame'},
                     paper_id='1234.56789')
        mock_result = mock.MagicMock(_d_=rdata, **rdata)
        mock_result.meta.score = 1
        mock_results.__iter__.return_value = [mock_result]
        mock_Search.execute.return_value = mock_results

        # Support the chaining API for py-ES.
        mock_Search.return_value = mock_Search
        mock_Search.filter.return_value = mock_Search
        mock_Search.highlight.return_value = mock_Search
        mock_Search.highlight_options.return_value = mock_Search
        mock_Search.query.return_value = mock_Search
        mock_Search.sort.return_value = mock_Search
        mock_Search.__getitem__.return_value = mock_Search

        query = AdvancedQuery(
            order='relevance',
            size=10,
            date_range=DateRange(
                start_date=datetime.now() - timedelta(days=5),
                end_date=datetime.now()
            ),
            classification=ClassificationList([
                Classification(
                    group={'id': 'physics'},
                    archive={'id': 'physics'},
                    category={'id': 'hep-th'}
                )
            ]),
            terms=FieldedSearchList([
                FieldedSearchTerm(operator='AND', field='title', term='foo'),
                FieldedSearchTerm(operator='AND', field='author', term='joe'),
                FieldedSearchTerm(operator='OR', field='abstract', term='hmm'),
                FieldedSearchTerm(operator='NOT', field='comments', term='eh'),
                FieldedSearchTerm(operator='AND', field='journal_ref',
                                  term='jref (1999) 1:2-3'),
                FieldedSearchTerm(operator='AND', field='acm_class',
                                  term='abc123'),
                FieldedSearchTerm(operator='AND', field='msc_class',
                                  term='abc123'),
                FieldedSearchTerm(operator='OR', field='report_num',
                                  term='abc123'),
                FieldedSearchTerm(operator='OR', field='doi',
                                  term='10.01234/56789'),
                FieldedSearchTerm(operator='OR', field='orcid',
                                  term='0000-0000-0000-0000'),
                FieldedSearchTerm(operator='OR', field='author_id',
                                  term='Bloggs_J'),
            ])
        )
        document_set = index.SearchSession.search(query)
        # self.assertIsInstance(document_set, DocumentSet)
        self.assertEqual(document_set['metadata']['start'], 0)
        self.assertEqual(document_set['metadata']['total'], 53)
        self.assertEqual(document_set['metadata']['current_page'], 1)
        self.assertEqual(document_set['metadata']['total_pages'], 6)
        self.assertEqual(document_set['metadata']['size'], 10)
        self.assertEqual(len(document_set['results']), 1)
예제 #16
0
def category_name(classification: Classification) -> str:
    """Get the category display name for a classification."""
    category = classification.get("category")
    if not category:
        raise ValueError("No category")
    return category.get("name", taxonomy.get_category_display(category["id"]))
예제 #17
0
    def test_advanced_query(self, mock_Elasticsearch, mock_Search):
        """:class:`.index.search` supports :class:`AdvancedQuery`."""
        mock_results = mock.MagicMock()
        mock_results.__getitem__.return_value = {"total": 53}
        rdata = mock_rdata()
        mock_result = mock.MagicMock(_d_=rdata, **rdata)
        mock_result.meta.score = 1
        mock_results.__iter__.return_value = [mock_result]
        mock_Search.execute.return_value = mock_results

        # Support the chaining API for py-ES.
        mock_Search.return_value = mock_Search
        mock_Search.filter.return_value = mock_Search
        mock_Search.highlight.return_value = mock_Search
        mock_Search.highlight_options.return_value = mock_Search
        mock_Search.query.return_value = mock_Search
        mock_Search.sort.return_value = mock_Search
        mock_Search.__getitem__.return_value = mock_Search

        query = AdvancedQuery(
            order="relevance",
            size=10,
            date_range=DateRange(
                start_date=datetime.now() - timedelta(days=5),
                end_date=datetime.now(),
            ),
            classification=ClassificationList([
                Classification(
                    group={"id": "physics"},
                    archive={"id": "physics"},
                    category={"id": "hep-th"},
                )
            ]),
            terms=FieldedSearchList([
                FieldedSearchTerm(operator="AND", field="title", term="foo"),
                FieldedSearchTerm(operator="AND", field="author", term="joe"),
                FieldedSearchTerm(operator="OR", field="abstract", term="hmm"),
                FieldedSearchTerm(operator="NOT", field="comments", term="eh"),
                FieldedSearchTerm(
                    operator="AND",
                    field="journal_ref",
                    term="jref (1999) 1:2-3",
                ),
                FieldedSearchTerm(operator="AND",
                                  field="acm_class",
                                  term="abc123"),
                FieldedSearchTerm(operator="AND",
                                  field="msc_class",
                                  term="abc123"),
                FieldedSearchTerm(operator="OR",
                                  field="report_num",
                                  term="abc123"),
                FieldedSearchTerm(operator="OR",
                                  field="doi",
                                  term="10.01234/56789"),
                FieldedSearchTerm(
                    operator="OR",
                    field="orcid",
                    term="0000-0000-0000-0000",
                ),
                FieldedSearchTerm(operator="OR",
                                  field="author_id",
                                  term="Bloggs_J"),
            ]),
        )
        document_set = index.SearchSession.search(query)
        # self.assertIsInstance(document_set, DocumentSet)
        self.assertEqual(document_set["metadata"]["start"], 0)
        self.assertEqual(document_set["metadata"]["total_results"], 53)
        self.assertEqual(document_set["metadata"]["current_page"], 1)
        self.assertEqual(document_set["metadata"]["total_pages"], 6)
        self.assertEqual(document_set["metadata"]["size"], 10)
        self.assertEqual(len(document_set["results"]), 1)