Example #1
0
 def setUp(self):
     original_website = 'http://www.scielo.br'
     revistas_path = REVISTAS_PATH
     img_revistas_path = IMG_REVISTAS_PATH
     static_files_path = HTDOCS
     self.migration = PageMigration(original_website, revistas_path,
                                    img_revistas_path, static_files_path)
Example #2
0
 def setUp(self):
     original_website = 'http://www.scielo.br'
     self.revistas_path = TESTS_REVISTAS_PATH
     self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
     self.static_files_path = None
     self.migration = PageMigration(original_website, self.revistas_path,
                                    self.img_revistas_path,
                                    self.static_files_path)
     self.page = MigratedPage(self.migration, '', acron='aa', lang='es')
Example #3
0
def migrate_page_content(content, language, acron=None, page_name=None):
    """
    Função que migra o conteúdo de qualquer página HTML
    Retorna o novo conteúdo
    Parâmetro content: conteúdo em HTML da página
    Parâmetro acron: acrônimo do periódico se a página for de periódico
    Parâmetro page_name: título da página se não é de periódico
    Parâmetro language: idioma do texto da página
    """
    if content:
        if not acron and not page_name:
            raise IOError('migrate_page_content() requer acron ou page_name')

        pages_source_path = current_app.config['JOURNAL_PAGES_SOURCE_PATH']
        images_source_path = current_app.config['JOURNAL_IMAGES_SOURCE_PATH']
        original_website = current_app.config['JOURNAL_PAGES_ORIGINAL_WEBSITE']

        migration = PageMigration(
            original_website, pages_source_path, images_source_path)

        page = MigratedPage(
            migration, content,
            acron=acron, page_name=page_name, lang=language)
        page.migrate_urls(migrate_page_create_file, migrate_page_create_image)
        return page.content
Example #4
0
class UtilsMigratedABMVZJournalPageTestCase(BaseTestCase):
    def create_item(self, source, dest, check_if_exists=False):
        return ''

    def setUp(self):
        original_website = 'http://www.scielo.br'
        self.revistas_path = TESTS_REVISTAS_PATH
        self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
        self.static_files_path = None
        create_image = self.create_item
        create_file = self.create_item
        self.migration = PageMigration(original_website, self.revistas_path,
                                       self.img_revistas_path,
                                       self.static_files_path)
        self.page = MigratedPage(self.migration, '', acron='abmvz', lang='es')

    @patch('requests.get')
    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files_from_downloaded_files(
        self,
        mocked_create_file_function,
        mocked_requests_get,
    ):
        mocked_response = Mock()
        mocked_response.status_code = 200
        mocked_response.content = b'content'
        mocked_requests_get.return_value = mocked_response

        pdf_file_path = (
            'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf'
        )
        self.page.content = '<a href="{}"/>'.format(pdf_file_path)
        self.assertIn('/revistas/abmvz/{}'.format(pdf_file_path),
                      self.page.content)

        files = list(self.page.files)
        result = self.migration.get_possible_locations(files[0]['href'])
        self.assertIn('{}/abmvz/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path),
                      result)

        mocked_create_file_function.side_effect = [
            '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        _file_info = self.page.get_file_info(files[0]['href'])
        file_info = (
            '/tmp/tmpcjnmoyos/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf',
            'abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
            True)
        self.assertEqual(file_info[1], _file_info[1])
        self.assertEqual(file_info[2], _file_info[2])

        self.page.create_files(mocked_create_file_function)
        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        expected = pdf_file_path
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
Example #5
0
 def setUp(self):
     original_website = 'http://www.scielo.br'
     revistas_path = REVISTAS_PATH
     img_revistas_path = IMG_REVISTAS_PATH
     static_files_path = HTDOCS
     self.migration = PageMigration(original_website, revistas_path,
                                    img_revistas_path, static_files_path)
     self.page = MigratedPage(self.migration, '', acron='acron', lang='es')
Example #6
0
class UtilsMigratedJournalPageTestCase(BaseTestCase):
    def setUp(self):
        original_website = 'http://www.scielo.br'
        self.revistas_path = TESTS_REVISTAS_PATH
        self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
        self.static_files_path = None
        self.migration = PageMigration(original_website, self.revistas_path,
                                       self.img_revistas_path,
                                       self.static_files_path)
        self.page = MigratedPage(self.migration, '', acron='aa', lang='es')

    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files(self, mocked_create_file_function):
        pdf_file_path = 'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf'
        self.page.content = '<a href="{}"/>'.format(pdf_file_path)
        self.assertIn('/revistas/aa/{}'.format(pdf_file_path),
                      self.page.content)

        for a in self.page.files:
            result = self.migration.get_possible_locations(a['href'])
            self.assertIn(
                '{}/aa/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path), result)

        mocked_create_file_function.side_effect = [
            '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        _file_info = self.page.get_file_info(list(self.page.files)[0]['href'])

        file_info = (
            'opac/tests/fixtures/pages/revistas/aa/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf',
            'aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
            False)
        self.assertEqual(file_info, _file_info)
        self.page.create_files(mocked_create_file_function)
        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        expected = pdf_file_path
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
Example #7
0
class UtilsPageMigrationTestCase(BaseTestCase):
    def setUp(self):
        original_website = 'http://www.scielo.br'
        revistas_path = REVISTAS_PATH
        img_revistas_path = IMG_REVISTAS_PATH
        static_files_path = HTDOCS
        self.migration = PageMigration(original_website, revistas_path,
                                       img_revistas_path, static_files_path)

    def test_original_web_site(self):
        self.assertEqual(self.migration.original_website, 'www.scielo.br')

    def test_replace_by_relative_url_pdf(self):
        old = 'www.scielo.br/revistas/icse/levels.pdf'
        new = '/revistas/icse/levels.pdf'
        self.assertEqual(new, self.migration.replace_by_relative_url(old))

    def test_replace_by_relative_url_pdf_img_revistas(self):
        old = 'www.scielo.br/img/revistas/icse/levels.pdf'
        new = '/img/revistas/icse/levels.pdf'
        self.assertEqual(new, self.migration.replace_by_relative_url(old))

    def test_replace_by_relative_url(self):
        old = 'http://www.scielo.br'
        new = '/'
        self.assertEqual(new, self.migration.replace_by_relative_url(old))

    def test_replace_by_relative_url_any_image(self):
        old = 'http://www.scielo.br/abc/img2.jpg'
        new = '/abc/img2.jpg'
        self.assertEqual(new, self.migration.replace_by_relative_url(old))

    def test_replace_by_relative_url_scielo_php(self):
        old = 'http://www.scielo.br/scielo.php?script=sci_serial&pid=0102-4450&lng=en&nrm=iso'
        new = '/scielo.php?script=sci_serial&pid=0102-4450&lng=en&nrm=iso'
        self.assertEqual(new, self.migration.replace_by_relative_url(old))

    def test_link_display_text_1(self):
        expected = 'www.scielo.br/revistas/icse/levels.pdf'
        text = self.migration.link_display_text(
            '/revistas/icse/levels.pdf',
            'www.scielo.br/revistas/icse/levels.pdf',
            'www.scielo.br/revistas/icse/levels.pdf')
        self.assertEqual(text, expected)

    def test_link_display_text_2(self):
        expected = 'www.scielo.br/img/revistas/icse/levels.pdf'
        text = self.migration.link_display_text(
            '/img/revistas/icse/levels.pdf',
            'www.scielo.br/img/revistas/icse/levels.pdf',
            'www.scielo.br/img/revistas/icse/levels.pdf')
        self.assertEqual(text, expected)

    def test_link_display_text_3(self):
        expected = 'www.scielo.br/journal/icse/about/#instructions'
        text = self.migration.link_display_text(
            '/journal/icse/about/#instructions',
            'www.scielo.br/revistas/icse/iinstruc.htm',
            'www.scielo.br/revistas/icse/iinstruc.htm')
        self.assertEqual(text, expected)

    def test_link_display_text_4(self):
        expected = 'www.scielo.br'
        text = self.migration.link_display_text('/', 'www.scielo.br',
                                                'www.scielo.br ')
        self.assertEqual(text, expected)

    def test_get_possible_locations_img_revistas(self):
        expected = 'htdocs/img/revistas/abc.jpg'
        expected_items = ['htdocs/img/revistas/abc.jpg']
        result = self.migration.get_possible_locations(
            'www.scielo.br/img/revistas/abc.jpg')
        self.assertIn(expected, result)
        self.assertEqual(set(expected_items), set(result))

    def test_get_possible_locations_revistas(self):
        expected = 'htdocs/revistas/abc.jpg'
        expected_items = ['htdocs/revistas/abc.jpg']
        result = self.migration.get_possible_locations(
            'www.scielo.br/revistas/abc.jpg')
        self.assertIn(expected, result)
        self.assertEqual(set(expected_items), set(result))

    def test_get_possible_locations_page(self):
        expected = 'htdocs/abc/abc.jpg'
        expected_items = ['htdocs/abc/abc.jpg']
        result = self.migration.get_possible_locations(
            'www.scielo.br/abc/abc.jpg')
        self.assertIn(expected, result)
        self.assertEqual(set(expected_items), set(result))

    def test_get_possible_locations_page_relative(self):
        expected = 'htdocs/abc/abc.jpg'
        expected_items = [
            'htdocs/abc/abc.jpg',
        ]
        result = self.migration.get_possible_locations('/abc/abc.jpg')
        self.assertIn(expected, result)
        self.assertEqual(set(expected_items), set(result))

    def test_get_possible_locations_page_relative_2(self):
        expected = 'htdocs/abc.jpg'
        expected_items = [
            'htdocs/img/revistas/abc.jpg', 'htdocs/revistas/abc.jpg',
            'htdocs/abc.jpg'
        ]
        result = self.migration.get_possible_locations('abc.jpg')
        self.assertIn(expected, result)
        self.assertEqual(set(expected_items), set(result))