Beispiel #1
0
    def helper_get_details(self, test_publish):
        """
        Generates a bunch of relevant information about the stub data being
        used. The attribute names should be relevant.

        :param test_publish: the file to the test stub file
        :return: no return
        """

        with open(os.path.join(self.proj_home, test_publish), "r") as f:
            lines = f.readlines()
            self.nor = len(lines)

        self.bibcode, self.ft_source, self.provider = \
            lines[0].strip().split('\t')
        self.bibcode_list = [i.strip().split('\t')[0] for i in lines]

        self.test_expected = checker.create_meta_path(
            {'bibcode': self.bibcode}, self.app.conf['FULLTEXT_EXTRACT_PATH'])

        self.meta_list = \
            [checker.create_meta_path(
                {"bibcode": j},
                self.app.conf['FULLTEXT_EXTRACT_PATH']
            ).replace('meta.json', '') for j in self.bibcode_list]

        self.meta_path = self.test_expected.replace('meta.json', '')

        self.number_of_PDFs = len(
            list(
                filter(lambda x: x.lower().endswith('.pdf'),
                       [i.strip().split("\t")[-2] for i in lines])))

        self.number_of_standard_files = self.nor - self.number_of_PDFs
Beispiel #2
0
    def helper_get_details(self, test_publish):
        """
        Generates a bunch of relevant information about the stub data being
        used. The attribute names should be relevant.

        :param test_publish: the file to the test stub file
        :return: no return
        """

        with open(os.path.join(self.proj_home, test_publish), "r") as f:
            lines = f.readlines()
            self.nor = len(lines)

        self.bibcode, self.ft_source, self.provider = \
            lines[0].strip().split('\t')
        self.bibcode_list = [i.strip().split('\t')[0] for i in lines]

        self.test_expected = checker.create_meta_path(
            {'bibcode': self.bibcode},
            self.app.conf['FULLTEXT_EXTRACT_PATH']
        )

        self.meta_list = \
            [checker.create_meta_path(
                {"bibcode": j},
                self.app.conf['FULLTEXT_EXTRACT_PATH']
            ).replace('meta.json', '') for j in self.bibcode_list]

        self.meta_path = self.test_expected.replace('meta.json', '')

        self.number_of_PDFs = len(
            list(
                filter(lambda x: x.lower().endswith('.pdf'),
                       [i.strip().split("\t")[-2] for i in lines])
            )
        )

        self.number_of_standard_files = self.nor - self.number_of_PDFs
Beispiel #3
0
    def calculate_expected_folders(self, full_text_links):
        """
        Determines the paths that should exist if the test data was extracted.

        :param full_text_links: file that contains the full text links stub data
        :return: list of expected paths that would be created when the full text
        was extracted
        """

        with open(os.path.join(self.proj_home, full_text_links), "r") as inf:
            lines = inf.readlines()

        expected_paths = \
            [checker.create_meta_path(
                {'bibcode': line.strip().split('\t')[0]},
                self.app.conf['FULLTEXT_EXTRACT_PATH']
            ).replace('meta.json', '') for line in lines]

        return expected_paths
Beispiel #4
0
    def calculate_expected_folders(self, full_text_links):
        """
        Determines the paths that should exist if the test data was extracted.

        :param full_text_links: file that contains the full text links stub data
        :return: list of expected paths that would be created when the full text
        was extracted
        """

        with open(os.path.join(self.proj_home, full_text_links), "r") as inf:
            lines = inf.readlines()

        expected_paths = \
            [checker.create_meta_path(
                {'bibcode': line.strip().split('\t')[0]},
                self.app.conf['FULLTEXT_EXTRACT_PATH']
            ).replace('meta.json', '') for line in lines]

        return expected_paths