def _transform_classification(cls, clsn: Classification) -> Optional[dict]: category = clsn.get('category') if category is None: return None return {'group': clsn.get('group'), 'archive': clsn.get('archive'), 'category': category}
def _to_classification(value: str) -> Tuple[Classification, ...]: clsns = [] if value in taxonomy.definitions.GROUPS: klass = taxonomy.Group field = "group" elif value in taxonomy.definitions.ARCHIVES: klass = taxonomy.Archive field = "archive" elif value in taxonomy.definitions.CATEGORIES: klass = taxonomy.Category field = "category" else: raise ValueError("not a valid classification") cast_value = klass(value) clsns.append(Classification(**{field: {"id": value}})) # type: ignore if cast_value.unalias() != cast_value: clsns.append( Classification( # type: ignore # noqa: E501 # fmt: off **{field: { "id": cast_value.unalias() }})) if (cast_value.canonical != cast_value and cast_value.canonical != cast_value.unalias()): clsns.append( Classification( # type: ignore # noqa: E501 # fmt: off **{field: { "id": cast_value.canonical }})) return tuple(clsns)
def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) -> AdvancedQuery: q.classification = ClassificationList() archives = [ ("computer_science", "cs"), ("economics", "econ"), ("eess", "eess"), ("mathematics", "math"), ("q_biology", "q-bio"), ("q_finance", "q-fin"), ("statistics", "stat"), ] for field, archive in archives: if data.get(field): # Fix for these typing issues is coming soon! # See: https://github.com/python/mypy/pull/4397 q.classification.append( Classification(archive={"id": archive}) # type: ignore ) if data.get("physics") and "physics_archives" in data: if "all" in data["physics_archives"]: q.classification.append( Classification(group={"id": "grp_physics"}) # type: ignore ) else: q.classification.append( Classification( # type: ignore group={"id": "grp_physics"}, archive={"id": data["physics_archives"]}, )) return q
def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \ -> AdvancedQuery: q.classification = ClassificationList() archives = [('computer_science', 'cs'), ('economics', 'econ'), ('eess', 'eess'), ('mathematics', 'math'), ('q_biology', 'q-bio'), ('q_finance', 'q-fin'), ('statistics', 'stat')] for field, archive in archives: if data.get(field): # Fix for these typing issues is coming soon! # See: https://github.com/python/mypy/pull/4397 q.classification.append( Classification(archive={'id': archive}) # type: ignore ) if data.get('physics') and 'physics_archives' in data: if 'all' in data['physics_archives']: q.classification.append( Classification(group={'id': 'grp_physics'}) # type: ignore ) else: q.classification.append( Classification( # type: ignore group={'id': 'grp_physics'}, archive={'id': data['physics_archives']})) return q
def _transform_classification( cls, clsn: Classification) -> Optional[Dict[Any, Any]]: category = clsn.get("category") if category is None: return None return { "group": clsn.get("group"), "archive": clsn.get("archive"), "category": category, }
def _transform_classification( clsn: Classification, ) -> Optional[Dict[str, Optional[str]]]: category = clsn.get("category") if category is None: return None return { # type:ignore "group": clsn.get("group"), "archive": clsn.get("archive"), "category": category, }
def test_archive_subsumed_classification(self, mock_index): """Request with a subsumed archive as primary classification.""" archive = 'chao-dyn' params = MultiDict({'primary_classification': archive}) data, code, headers = api.search(params) self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK") query = mock_index.search.call_args[0][0] self.assertEqual(len(query.primary_classification), 2) self.assertEqual(query.primary_classification[0], Classification(archive={'id': archive})) self.assertEqual(query.primary_classification[1], Classification(archive={'id': 'nlin.CD'}), "The canonical archive is used instead")
def display_classification(classification: Classification) -> str: """Generate a display-friendly label for a classification.""" group = classification.get("group") category = classification.get("category") archive = classification.get("archive") parts = [] if group is not None: parts.append(group.get("name", taxonomy.get_group_display(group["id"]))) if archive is not None: parts.append( archive.get("name", taxonomy.get_archive_display(archive["id"]))) if category is not None: parts.append( category.get("name", taxonomy.get_category_display(category["id"]))) return "::".join(parts)
def test_archive_subsumed_classification(self, mock_index): """Request with a subsumed archive as primary classification.""" archive = "chao-dyn" params = MultiDict({"primary_classification": archive}) data, code, headers = api.search(params) self.assertEqual(code, HTTPStatus.OK, "Returns 200 OK") query = mock_index.search.call_args[0][0] self.assertEqual(len(query.primary_classification), 2) self.assertEqual( query.primary_classification[0], Classification(archive={"id": archive}), ) self.assertEqual( query.primary_classification[1], Classification(archive={"id": "nlin.CD"}), "The canonical archive is used instead", )
def test_archive_primary_classification(self, mock_index): """Request with an archive as primary classification.""" archive = 'physics' params = MultiDict({'primary_classification': archive}) data, code, headers = api.search(params) self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK") query = mock_index.search.call_args[0][0] self.assertEqual(len(query.primary_classification), 1) self.assertEqual(query.primary_classification[0], Classification(archive={'id': archive}))
def test_category_primary_classification(self, mock_index): """Request with a category as primary classification.""" category = 'cs.DL' params = MultiDict({'primary_classification': category}) data, code, headers = api.search(params) self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK") query = mock_index.search.call_args[0][0] self.assertEqual(len(query.primary_classification), 1) self.assertEqual(query.primary_classification[0], Classification(category={'id': category}))
def test_group_primary_classification(self, mock_index): """Request with a group as primary classification.""" group = "grp_physics" params = MultiDict({"primary_classification": group}) data, code, headers = api.search(params) self.assertEqual(code, HTTPStatus.OK, "Returns 200 OK") query = mock_index.search.call_args[0][0] self.assertEqual(len(query.primary_classification), 1) self.assertEqual( query.primary_classification[0], Classification(group={"id": group}), )
def _update_with_archives(q: SimpleQuery, archives: List[str]) -> SimpleQuery: """ Search within a group or archive. Parameters ---------- q : :class:`SimpleQuery` groups_or_archives : str Returns ------- :class:`SimpleQuery` """ logger.debug('Search within %s', archives) q.classification = ClassificationList([ Classification(archive={'id': archive}) # type: ignore for archive in archives ]) return q
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document: """Transform an ES search result back into a :class:`.Document`.""" # typing: ignore result: Dict[str, Any] = {} result['match'] = {} # Hit on field, but no highlighting. result['truncated'] = {} # Preview is truncated. for key in Document.fields(): if type(raw) is Hit: if not hasattr(raw, key): continue value = getattr(raw, key) elif type(raw) is dict: if key not in raw: continue value = raw.get(key) else: continue # We want to prevent ES-specific data types from escaping the module # API. if isinstance(value, AttrList): value = value._l_ elif isinstance(value, AttrDict): value = value.to_dict() if key == 'primary_classification': value = Classification(**value) # type: ignore elif key == 'secondary_classification': value = [Classification(**v) for v in value] # type: ignore elif key in ['authors', 'owners']: value = [_to_author(au) for au in value] elif key == 'submitter': value = _to_author(value) elif key == 'announced_date_first' and \ value and isinstance(value, str): value = datetime.strptime(value, '%Y-%m').date() elif key in [ 'submitted_date', 'submitted_date_first', 'submitted_date_latest' ]: try: value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S%z') except (ValueError, TypeError): logger.warning(f'Could not parse {key}: {value} as datetime') pass elif key in ['acm_class', 'msc_class'] and value: value = '; '.join(value) result[key] = value if type(raw) is Response: result['score'] = raw.meta.score # type: ignore if type(result.get('abstract')) is str and highlight: if 'preview' not in result: result['preview'] = {} result['preview']['abstract'] = preview(result['abstract']) if result['preview']['abstract'].endswith('…'): result['truncated']['abstract'] = True if highlight and type(raw) in [Response, Hit]: result['highlight'] = {} logger.debug('%s: add highlighting to result', raw.paper_id) # type: ignore result = add_highlighting(result, raw) return Document(**result) # type: ignore
def test_advanced_query(self, mock_Elasticsearch, mock_Search): """:class:`.index.search` supports :class:`AdvancedQuery`.""" mock_results = mock.MagicMock() mock_results.__getitem__.return_value = {'total': 53} rdata = dict(authors=[{'full_name': 'N. Ame'}], owners=[{'full_name': 'N. Ame'}], submitter={'full_name': 'N. Ame'}, paper_id='1234.56789') mock_result = mock.MagicMock(_d_=rdata, **rdata) mock_result.meta.score = 1 mock_results.__iter__.return_value = [mock_result] mock_Search.execute.return_value = mock_results # Support the chaining API for py-ES. mock_Search.return_value = mock_Search mock_Search.filter.return_value = mock_Search mock_Search.highlight.return_value = mock_Search mock_Search.highlight_options.return_value = mock_Search mock_Search.query.return_value = mock_Search mock_Search.sort.return_value = mock_Search mock_Search.__getitem__.return_value = mock_Search query = AdvancedQuery( order='relevance', size=10, date_range=DateRange( start_date=datetime.now() - timedelta(days=5), end_date=datetime.now() ), classification=ClassificationList([ Classification( group={'id': 'physics'}, archive={'id': 'physics'}, category={'id': 'hep-th'} ) ]), terms=FieldedSearchList([ FieldedSearchTerm(operator='AND', field='title', term='foo'), FieldedSearchTerm(operator='AND', field='author', term='joe'), FieldedSearchTerm(operator='OR', field='abstract', term='hmm'), FieldedSearchTerm(operator='NOT', field='comments', term='eh'), FieldedSearchTerm(operator='AND', field='journal_ref', term='jref (1999) 1:2-3'), FieldedSearchTerm(operator='AND', field='acm_class', term='abc123'), FieldedSearchTerm(operator='AND', field='msc_class', term='abc123'), FieldedSearchTerm(operator='OR', field='report_num', term='abc123'), FieldedSearchTerm(operator='OR', field='doi', term='10.01234/56789'), FieldedSearchTerm(operator='OR', field='orcid', term='0000-0000-0000-0000'), FieldedSearchTerm(operator='OR', field='author_id', term='Bloggs_J'), ]) ) document_set = index.SearchSession.search(query) # self.assertIsInstance(document_set, DocumentSet) self.assertEqual(document_set['metadata']['start'], 0) self.assertEqual(document_set['metadata']['total'], 53) self.assertEqual(document_set['metadata']['current_page'], 1) self.assertEqual(document_set['metadata']['total_pages'], 6) self.assertEqual(document_set['metadata']['size'], 10) self.assertEqual(len(document_set['results']), 1)
def category_name(classification: Classification) -> str: """Get the category display name for a classification.""" category = classification.get("category") if not category: raise ValueError("No category") return category.get("name", taxonomy.get_category_display(category["id"]))
def test_advanced_query(self, mock_Elasticsearch, mock_Search): """:class:`.index.search` supports :class:`AdvancedQuery`.""" mock_results = mock.MagicMock() mock_results.__getitem__.return_value = {"total": 53} rdata = mock_rdata() mock_result = mock.MagicMock(_d_=rdata, **rdata) mock_result.meta.score = 1 mock_results.__iter__.return_value = [mock_result] mock_Search.execute.return_value = mock_results # Support the chaining API for py-ES. mock_Search.return_value = mock_Search mock_Search.filter.return_value = mock_Search mock_Search.highlight.return_value = mock_Search mock_Search.highlight_options.return_value = mock_Search mock_Search.query.return_value = mock_Search mock_Search.sort.return_value = mock_Search mock_Search.__getitem__.return_value = mock_Search query = AdvancedQuery( order="relevance", size=10, date_range=DateRange( start_date=datetime.now() - timedelta(days=5), end_date=datetime.now(), ), classification=ClassificationList([ Classification( group={"id": "physics"}, archive={"id": "physics"}, category={"id": "hep-th"}, ) ]), terms=FieldedSearchList([ FieldedSearchTerm(operator="AND", field="title", term="foo"), FieldedSearchTerm(operator="AND", field="author", term="joe"), FieldedSearchTerm(operator="OR", field="abstract", term="hmm"), FieldedSearchTerm(operator="NOT", field="comments", term="eh"), FieldedSearchTerm( operator="AND", field="journal_ref", term="jref (1999) 1:2-3", ), FieldedSearchTerm(operator="AND", field="acm_class", term="abc123"), FieldedSearchTerm(operator="AND", field="msc_class", term="abc123"), FieldedSearchTerm(operator="OR", field="report_num", term="abc123"), FieldedSearchTerm(operator="OR", field="doi", term="10.01234/56789"), FieldedSearchTerm( operator="OR", field="orcid", term="0000-0000-0000-0000", ), FieldedSearchTerm(operator="OR", field="author_id", term="Bloggs_J"), ]), ) document_set = index.SearchSession.search(query) # self.assertIsInstance(document_set, DocumentSet) self.assertEqual(document_set["metadata"]["start"], 0) self.assertEqual(document_set["metadata"]["total_results"], 53) self.assertEqual(document_set["metadata"]["current_page"], 1) self.assertEqual(document_set["metadata"]["total_pages"], 6) self.assertEqual(document_set["metadata"]["size"], 10) self.assertEqual(len(document_set["results"]), 1)