Example #1
0
def test_gather_older_revision(root_doc, client):
    """Older revisions can be imported without some tags."""
    doc_path = root_doc.get_absolute_url()
    old_rev = root_doc.revisions.order_by('id')[0]
    assert old_rev != root_doc.current_revision
    rev_path = doc_path + '$revision/%d' % old_rev.id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    source.state = source.STATE_PREREQ
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'])
    storage.get_document.return_value = root_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    # Missing review and localization tags if it is not the current revision
    expected_data = {
        'id': old_rev.id,
        'comment': '',
        'content': '<p>Getting started...</p>',
        'created': datetime(2016, 1, 1),
        'creator': {u'username': u'creator'},
        'document': root_doc,
        'is_current': False,
        'slug': 'Root',
        'tags': [],
        'title': 'Root Document',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #2
0
def test_gather_with_prereqs(tagged_doc, client):
    """On the first call, multiple items are requested from storage."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    source.state = source.STATE_PREREQ
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'])
    storage.get_document.return_value = tagged_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    expected_data = {
        'id': tagged_doc.current_revision_id,
        'comment': 'Frist Post!',
        'content': '<p>The HTML element <code>&lt;input&gt;</code>...',
        'created': datetime(2016, 12, 15, 17, 23),
        'creator': {u'username': u'creator'},
        'document': tagged_doc,
        'is_current': True,
        'localization_tags': [],
        'review_tags': [],
        'slug': 'Test',
        'tags': ['One', 'Three', 'Two'],
        'title': 'Test Document',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #3
0
def test_gather_missing_revision_is_error(tagged_doc):
    """If the revision HTML can't be fetched, the source is errored."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%s' % tagged_doc.current_revision_id
    source = RevisionSource(rev_path)
    requester = mock_requester(status_code=404)
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html'])
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_ERROR
    assert source.freshness == source.FRESH_UNKNOWN
Example #4
0
def test_gather_document_slug_wins(tagged_doc, client):
    """
    If the document slug is different from the revision, doc wins.

    This appears to be common around page moves.
    """
    orig_doc_path = tagged_doc.get_absolute_url()
    orig_rev_path = orig_doc_path + ('$revision/%d' %
                                     tagged_doc.current_revision_id)
    html = client.get(orig_rev_path).content
    tagged_doc.slug = 'Other'
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    source = RevisionSource(rev_path)
    source.state = source.STATE_PREREQ
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'
    ])
    storage.get_document.return_value = tagged_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    expected_data = {
        'id': tagged_doc.current_revision_id,
        'comment': 'Frist Post!',
        'content': '<p>The HTML element <code>&lt;input&gt;</code>...',
        'created': datetime(2016, 12, 15, 17, 23),
        'creator': {
            u'username': u'creator'
        },
        'document': tagged_doc,
        'is_current': True,
        'localization_tags': [],
        'review_tags': [],
        'slug': 'Other',
        'tags': ['One', 'Three', 'Two'],
        'title': 'Test Document',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #5
0
def test_gather_existing_rev_and_doc():
    """If prereqs are present then source is done."""
    doc_path = '/en-US/docs/Test'
    rev_path = doc_path + '$revision/100'
    source = RevisionSource(rev_path)
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=['get_document', 'get_revision'])
    storage.get_document.return_value = "existing"
    revision = mock.Mock(spec_set=['document'])
    revision.document = "existing"
    storage.get_revision.return_value = revision

    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_NO
    storage.get_document.assert_called_once_with('en-US', 'Test')
    storage.get_revision.assert_called_once_with(100)
Example #6
0
def test_gather_existing_doc(tagged_doc, client):
    """If only the doc is present, then full gather is performed."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    requester = mock_requester(content=html)
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'save_revision_html', 'get_user'
    ])
    storage.get_document.return_value = tagged_doc
    resources = source.gather(requester, storage)
    assert resources == [('document_meta', doc_path, {}),
                         ('user', 'creator', {})]
    assert source.state == source.STATE_PREREQ
    assert source.freshness == source.FRESH_UNKNOWN
    storage.save_revision_html.assert_called_once_with(rev_path, html)
Example #7
0
def test_gather_no_prereqs(tagged_doc, client):
    """On the first call, multiple items are requested from storage."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    requester = mock_requester(content=html)
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'save_revision_html', 'get_user'
    ])
    resources = source.gather(requester, storage)
    assert resources == [('document', doc_path, {}),
                         ('document_meta', doc_path, {}),
                         ('user', 'creator', {})]
    assert source.state == source.STATE_PREREQ
    assert source.freshness == source.FRESH_UNKNOWN
    storage.save_revision_html.assert_called_once_with(rev_path, html)
def test_gather_existing_rev_and_doc():
    """If prereqs are present then source is done."""
    doc_path = '/en-US/docs/Test'
    rev_path = doc_path + '$revision/100'
    source = RevisionSource(rev_path)
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=['get_document', 'get_revision'])
    storage.get_document.return_value = "existing"
    revision = mock.Mock(spec_set=['document'])
    revision.document = "existing"
    storage.get_revision.return_value = revision

    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_NO
    storage.get_document.assert_called_once_with('en-US', 'Test')
    storage.get_revision.assert_called_once_with(100)
def test_gather_existing_doc(tagged_doc, client):
    """If only the doc is present, then full gather is performed."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    requester = mock_requester(content=html)
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'save_revision_html', 'get_user'])
    storage.get_document.return_value = tagged_doc
    resources = source.gather(requester, storage)
    assert resources == [
        ('document_meta', doc_path, {}),
        ('user', 'creator', {})
    ]
    assert source.state == source.STATE_PREREQ
    assert source.freshness == source.FRESH_UNKNOWN
    storage.save_revision_html.assert_called_once_with(rev_path, html)
Example #10
0
def test_gather_no_prereqs(tagged_doc, client):
    """On the first call, multiple items are requested from storage."""
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    html = client.get(rev_path).content
    source = RevisionSource(rev_path)
    requester = mock_requester(content=html)
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'save_revision_html', 'get_user'])
    resources = source.gather(requester, storage)
    assert resources == [
        ('document', doc_path, {}),
        ('document_meta', doc_path, {}),
        ('user', 'creator', {})
    ]
    assert source.state == source.STATE_PREREQ
    assert source.freshness == source.FRESH_UNKNOWN
    storage.save_revision_html.assert_called_once_with(rev_path, html)
Example #11
0
def test_gather_based_on_is_needed(translated_doc, client):
    """If a based_on revision is specified, it is required."""
    rev = translated_doc.current_revision
    rev_path = rev.get_absolute_url()
    based_on_path = rev.based_on.get_absolute_url()
    source = RevisionSource(rev_path, based_on=based_on_path)
    html = client.get(rev_path).content
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'])
    storage.get_document.return_value = translated_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    resources = source.gather(requester, storage)
    assert resources == [('revision', based_on_path, {})]
    assert source.state == source.STATE_PREREQ
    assert source.freshness == source.FRESH_UNKNOWN
Example #12
0
def test_gather_document_slug_wins(tagged_doc, client):
    """
    If the document slug is different from the revision, doc wins.

    This appears to be common around page moves.
    """
    orig_doc_path = tagged_doc.get_absolute_url()
    orig_rev_path = orig_doc_path + ('$revision/%d' %
                                     tagged_doc.current_revision_id)
    html = client.get(orig_rev_path).content
    tagged_doc.slug = 'Other'
    doc_path = tagged_doc.get_absolute_url()
    rev_path = doc_path + '$revision/%d' % tagged_doc.current_revision_id
    source = RevisionSource(rev_path)
    source.state = source.STATE_PREREQ
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'])
    storage.get_document.return_value = tagged_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    expected_data = {
        'id': tagged_doc.current_revision_id,
        'comment': 'Frist Post!',
        'content': '<p>The HTML element <code>&lt;input&gt;</code>...',
        'created': datetime(2016, 12, 15, 17, 23),
        'creator': {u'username': u'creator'},
        'document': tagged_doc,
        'is_current': True,
        'localization_tags': [],
        'review_tags': [],
        'slug': 'Other',
        'tags': ['One', 'Three', 'Two'],
        'title': 'Test Document',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #13
0
def test_gather_based_on_is_available(translated_doc, client):
    """If a based_on revision is specified and available, it is used."""
    rev = translated_doc.current_revision
    rev_path = rev.get_absolute_url()
    based_on_path = rev.based_on.get_absolute_url()
    source = RevisionSource(rev_path, based_on=based_on_path)
    html = client.get(rev_path).content
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'
    ])
    storage.get_document.return_value = translated_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    # First call is for this revision, return None to scrape
    # Second call is for the based_on revision, return it
    storage.get_revision.side_effect = [None, rev.based_on]
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    expected_data = {
        'id': rev.id,
        'based_on_id': rev.based_on.id,
        'comment': '',
        'content': '<p>Commencer...</p>',
        'created': datetime(2017, 6, 1, 15, 28),
        'creator': {
            u'username': u'creator'
        },
        'document': translated_doc,
        'is_current': True,
        'localization_tags': [],
        'review_tags': [],
        'slug': 'Racine',
        'tags': [],
        'title': 'Document Racine',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #14
0
def test_gather_doc_mismatch_is_error():
    """If stored doc does not agree with stored revision doc, then error."""
    doc_path = '/en-US/docs/Test'
    rev_path = doc_path + '$revision/100'
    source = RevisionSource(rev_path)
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=['get_document', 'get_revision'])
    doc1 = mock.Mock(spec_set=['get_absolute_url'])
    doc1.get_absolute_url.return_value = '/en-US/docs/Foo'
    doc2 = mock.Mock(spec_set=['get_absolute_url'])
    doc2.get_absolute_url.return_value = '/en-US/docs/Test'
    storage.get_document.return_value = doc1
    revision = mock.Mock(spec_set=['document'])
    revision.document = doc2
    storage.get_revision.return_value = revision

    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_ERROR
    assert source.freshness == source.FRESH_NO
    storage.get_document.assert_called_once_with('en-US', 'Test')
    storage.get_revision.assert_called_once_with(100)
Example #15
0
def test_gather_doc_mismatch_is_error():
    """If stored doc does not agree with stored revision doc, then error."""
    doc_path = '/en-US/docs/Test'
    rev_path = doc_path + '$revision/100'
    source = RevisionSource(rev_path)
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=['get_document', 'get_revision'])
    doc1 = mock.Mock(spec_set=['get_absolute_url'])
    doc1.get_absolute_url.return_value = '/en-US/docs/Foo'
    doc2 = mock.Mock(spec_set=['get_absolute_url'])
    doc2.get_absolute_url.return_value = '/en-US/docs/Test'
    storage.get_document.return_value = doc1
    revision = mock.Mock(spec_set=['document'])
    revision.document = doc2
    storage.get_revision.return_value = revision

    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_ERROR
    assert source.freshness == source.FRESH_NO
    storage.get_document.assert_called_once_with('en-US', 'Test')
    storage.get_revision.assert_called_once_with(100)
Example #16
0
def test_gather_based_on_is_available(translated_doc, client):
    """If a based_on revision is specified and available, it is used."""
    rev = translated_doc.current_revision
    rev_path = rev.get_absolute_url()
    based_on_path = rev.based_on.get_absolute_url()
    source = RevisionSource(rev_path, based_on=based_on_path)
    html = client.get(rev_path).content
    requester = mock_requester(requester_spec=[])
    storage = mock_storage(spec=[
        'get_document', 'get_revision', 'get_document_metadata',
        'get_revision_html', 'get_user', 'save_revision'])
    storage.get_document.return_value = translated_doc
    storage.get_document_metadata.return_value = {'is_meta': True}
    storage.get_revision_html.return_value = html
    storage.get_user.return_value = {'username': '******'}
    # First call is for this revision, return None to scrape
    # Second call is for the based_on revision, return it
    storage.get_revision.side_effect = [None, rev.based_on]
    resources = source.gather(requester, storage)
    assert resources == []
    assert source.state == source.STATE_DONE
    assert source.freshness == source.FRESH_YES
    expected_data = {
        'id': rev.id,
        'based_on_id': rev.based_on.id,
        'comment': 'Root Racine',
        'content': '<p>Commencer...</p>',
        'created': datetime(2017, 6, 1, 15, 28),
        'creator': {u'username': u'creator'},
        'document': translated_doc,
        'is_current': True,
        'localization_tags': [],
        'review_tags': [],
        'slug': 'Racine',
        'tags': [],
        'title': 'Document Racine',
    }
    storage.save_revision.assert_called_once_with(expected_data)
Example #17
0
def test_invalid_path():
    """A bad revision path is detected at initialization."""
    source = RevisionSource('/bad/path')
    assert source.state == source.STATE_ERROR