def test_gather_revisions_default(root_doc, client):
    """The default is requests if revision count is unspecified."""
    path = root_doc.get_absolute_url()
    source = DocumentHistorySource(path)
    html = client.get(path + '$history').content
    requester = mock_requester(content=html, status_code=200)
    storage = mock_storage(spec=[
        'get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    history_path = path + '$history'
    requester.request.assert_called_once_with(history_path,
                                              raise_for_status=False)
    rev1, rev2 = root_doc.revisions.all()
    revision_pattern = path + '$revision/%d'
    expected_resources = [
        ('revision', revision_pattern % rev.id, {}) for rev in [rev2]]
    assert resources == expected_resources
    assert source.state == source.STATE_DONE
    expected_data = {
        'is_all': False,
        'revisions': [
            ('revision', revision_pattern % rev2.id, {}),
            ('revision', revision_pattern % rev1.id, {})]}
    storage.save_document_history.assert_called_once_with(
        'en-US', 'Root', expected_data)
def test_gather_error():
    """If the $history endpoint errors, scraping stops."""
    source = DocumentHistorySource('/en-US/docs/Error')
    requester = mock_requester(content="missing", status_code=404)
    storage = mock_storage(spec=[
        'get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_ERROR
    assert not storage.save_document_history.called
def test_gather_rev_existing():
    """If previously called, populate history from storage."""
    source = DocumentHistorySource('/en-US/docs/Root')
    storage = mock_storage(spec=['get_document_history'])
    storage.get_document_history.return_value = [
        ('revision', '/en-US/docs/Root$revision/%d' % num, {})
        for num in range(10, 1, -1)
    ]
    resources = source.gather(None, storage)
    assert resources == [('revision', '/en-US/docs/Root$revision/10', {})]
    assert source.state == source.STATE_DONE
def test_gather_rev_existing():
    """If previously called, populate history from storage."""
    source = DocumentHistorySource('/en-US/docs/Root')
    storage = mock_storage(spec=['get_document_history'])
    storage.get_document_history.return_value = {
        'is_all': False,
        'revisions': [
            ('revision', '/en-US/docs/Root$revision/%d' % num, {})
            for num in range(10, 1, -1)]}
    resources = source.gather(None, storage)
    assert resources == [('revision', '/en-US/docs/Root$revision/10', {})]
    assert source.state == source.STATE_DONE
def test_gather_translated(translated_doc, client):
    """A translated document may include the English source doc."""
    path = translated_doc.get_absolute_url()
    source = DocumentHistorySource(path)
    html = client.get(path + '$history').content
    requester = mock_requester(content=html, status_code=200)
    storage = mock_storage(
        spec=['get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    history_path = path + '$history'
    requester.request.assert_called_once_with(history_path,
                                              raise_for_status=False)
    rev = translated_doc.current_revision
    rev_path = rev.get_absolute_url()
    based_on_path = rev.based_on.get_absolute_url()
    expected_resources = [('revision', rev_path, {'based_on': based_on_path})]
    assert resources == expected_resources
    assert source.state == source.STATE_DONE
    storage.save_document_history.assert_called_once_with(
        'fr', 'Racine', expected_resources)
def test_gather_revisions_multiple(root_doc, client):
    """If a revision count is specified, that many are requested."""
    path = root_doc.get_absolute_url()
    source = DocumentHistorySource(path, revisions=2)
    html = client.get(path + '$history').content
    requester = mock_requester(content=html, status_code=200)
    storage = mock_storage(
        spec=['get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    history_path = path + '$history?limit=2'
    requester.request.assert_called_once_with(history_path,
                                              raise_for_status=False)
    rev1, rev2 = root_doc.revisions.all()
    revision_pattern = path + '$revision/%d'
    expected_resources = [('revision', revision_pattern % rev.id, {})
                          for rev in [rev1, rev2]]
    assert resources == expected_resources
    assert source.state == source.STATE_DONE
    storage.save_document_history.assert_called_once_with(
        'en-US', 'Root', expected_resources[::-1])
def test_gather_revisions_more_than_available(root_doc, client):
    """If a revision count is more than the revisions, take note."""
    path = root_doc.get_absolute_url()
    source = DocumentHistorySource(path, revisions=3)
    html = client.get(path + '$history').content
    requester = mock_requester(content=html, status_code=200)
    storage = mock_storage(spec=[
        'get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    history_path = path + '$history?limit=3'
    requester.request.assert_called_once_with(history_path,
                                              raise_for_status=False)
    rev1, rev2 = root_doc.revisions.all()
    revision_pattern = path + '$revision/%d'
    expected_resources = [
        ('revision', revision_pattern % rev.id, {}) for rev in [rev1, rev2]]
    assert resources == expected_resources
    assert source.state == source.STATE_DONE
    expected_data = {
        'is_all': True,
        'revisions': expected_resources[::-1]}
    storage.save_document_history.assert_called_once_with(
        'en-US', 'Root', expected_data)
def test_gather_translated(translated_doc, client):
    """A translated document may include the English source doc."""
    path = translated_doc.get_absolute_url()
    source = DocumentHistorySource(path)
    html = client.get(path + '$history').content
    requester = mock_requester(content=html, status_code=200)
    storage = mock_storage(spec=[
        'get_document_history', 'save_document_history'])
    resources = source.gather(requester, storage)
    history_path = path + '$history'
    requester.request.assert_called_once_with(history_path,
                                              raise_for_status=False)
    rev = translated_doc.current_revision
    rev_path = rev.get_absolute_url()
    based_on_path = rev.based_on.get_absolute_url()
    expected_resources = [('revision', rev_path, {'based_on': based_on_path})]
    assert resources == expected_resources
    assert source.state == source.STATE_DONE
    expected_data = {
        'is_all': False,
        'revisions': expected_resources}
    storage.save_document_history.assert_called_once_with(
        'fr', 'Racine', expected_data)