Ejemplo n.º 1
0
    def helper_get_details(self, test_publish):
        """
        Generates a bunch of relevant information about the stub data being
        used. The attribute names should be relevant.

        :param test_publish: the file to the test stub file
        :return: no return
        """

        with open(os.path.join(PROJ_HOME, test_publish), "r") as f:
            lines = f.readlines()
            self.nor = len(lines)

        self.bibcode, self.ft_source, self.provider = \
            lines[0].strip().split('\t')
        self.bibcode_list = [i.strip().split('\t')[0] for i in lines]

        self.test_expected = check_if_extract.create_meta_path(
            {'bibcode': self.bibcode},
            extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST')

        self.meta_list = \
            [check_if_extract.create_meta_path(
                {"bibcode": j},
                extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST'
            ).replace('meta.json', '') for j in self.bibcode_list]

        self.meta_path = self.test_expected.replace('meta.json', '')

        self.number_of_PDFs = len(
            list(
                filter(lambda x: x.lower().endswith('.pdf'),
                       [i.strip().split("\t")[-2] for i in lines])))

        self.number_of_standard_files = self.nor - self.number_of_PDFs
Ejemplo n.º 2
0
    def helper_get_details(self, test_publish):
        """
        Generates a bunch of relevant information about the stub data being
        used. The attribute names should be relevant.

        :param test_publish: the file to the test stub file
        :return: no return
        """

        with open(os.path.join(PROJ_HOME, test_publish), "r") as f:
            lines = f.readlines()
            self.nor = len(lines)

        self.bibcode, self.ft_source, self.provider = \
            lines[0].strip().split('\t')
        self.bibcode_list = [i.strip().split('\t')[0] for i in lines]

        self.test_expected = check_if_extract.create_meta_path(
            {'bibcode': self.bibcode},
            extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST'
        )

        self.meta_list = \
            [check_if_extract.create_meta_path(
                {"bibcode": j},
                extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST'
            ).replace('meta.json', '') for j in self.bibcode_list]

        self.meta_path = self.test_expected.replace('meta.json', '')

        self.number_of_PDFs = len(
            list(
                filter(lambda x: x.lower().endswith('.pdf'),
                       [i.strip().split("\t")[-2] for i in lines])
            )
        )

        self.number_of_standard_files = self.nor - self.number_of_PDFs
Ejemplo n.º 3
0
    def calculate_expected_folders(self, full_text_links):
        """
        Determines the paths that should exist if the test data was extracted.

        :param full_text_links: file that contains the full text links stub data
        :return: list of expected paths that would be created when the full text
        was extracted
        """

        with open(os.path.join(PROJ_HOME, full_text_links), "r") as inf:
            lines = inf.readlines()

        expected_paths = \
            [check_if_extract.create_meta_path(
                {CONSTANTS['BIBCODE']: line.strip().split('\t')[0]},
                extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST'
            ).replace('meta.json', '') for line in lines]

        return expected_paths
Ejemplo n.º 4
0
    def calculate_expected_folders(self, full_text_links):
        """
        Determines the paths that should exist if the test data was extracted.

        :param full_text_links: file that contains the full text links stub data
        :return: list of expected paths that would be created when the full text
        was extracted
        """

        with open(os.path.join(PROJ_HOME, full_text_links), "r") as inf:
            lines = inf.readlines()

        expected_paths = \
            [check_if_extract.create_meta_path(
                {CONSTANTS['BIBCODE']: line.strip().split('\t')[0]},
                extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST'
            ).replace('meta.json', '') for line in lines]

        return expected_paths