def helper_get_details(self, test_publish): """ Generates a bunch of relevant information about the stub data being used. The attribute names should be relevant. :param test_publish: the file to the test stub file :return: no return """ with open(os.path.join(self.proj_home, test_publish), "r") as f: lines = f.readlines() self.nor = len(lines) self.bibcode, self.ft_source, self.provider = \ lines[0].strip().split('\t') self.bibcode_list = [i.strip().split('\t')[0] for i in lines] self.test_expected = checker.create_meta_path( {'bibcode': self.bibcode}, self.app.conf['FULLTEXT_EXTRACT_PATH']) self.meta_list = \ [checker.create_meta_path( {"bibcode": j}, self.app.conf['FULLTEXT_EXTRACT_PATH'] ).replace('meta.json', '') for j in self.bibcode_list] self.meta_path = self.test_expected.replace('meta.json', '') self.number_of_PDFs = len( list( filter(lambda x: x.lower().endswith('.pdf'), [i.strip().split("\t")[-2] for i in lines]))) self.number_of_standard_files = self.nor - self.number_of_PDFs
def helper_get_details(self, test_publish): """ Generates a bunch of relevant information about the stub data being used. The attribute names should be relevant. :param test_publish: the file to the test stub file :return: no return """ with open(os.path.join(self.proj_home, test_publish), "r") as f: lines = f.readlines() self.nor = len(lines) self.bibcode, self.ft_source, self.provider = \ lines[0].strip().split('\t') self.bibcode_list = [i.strip().split('\t')[0] for i in lines] self.test_expected = checker.create_meta_path( {'bibcode': self.bibcode}, self.app.conf['FULLTEXT_EXTRACT_PATH'] ) self.meta_list = \ [checker.create_meta_path( {"bibcode": j}, self.app.conf['FULLTEXT_EXTRACT_PATH'] ).replace('meta.json', '') for j in self.bibcode_list] self.meta_path = self.test_expected.replace('meta.json', '') self.number_of_PDFs = len( list( filter(lambda x: x.lower().endswith('.pdf'), [i.strip().split("\t")[-2] for i in lines]) ) ) self.number_of_standard_files = self.nor - self.number_of_PDFs
def calculate_expected_folders(self, full_text_links): """ Determines the paths that should exist if the test data was extracted. :param full_text_links: file that contains the full text links stub data :return: list of expected paths that would be created when the full text was extracted """ with open(os.path.join(self.proj_home, full_text_links), "r") as inf: lines = inf.readlines() expected_paths = \ [checker.create_meta_path( {'bibcode': line.strip().split('\t')[0]}, self.app.conf['FULLTEXT_EXTRACT_PATH'] ).replace('meta.json', '') for line in lines] return expected_paths
def calculate_expected_folders(self, full_text_links): """ Determines the paths that should exist if the test data was extracted. :param full_text_links: file that contains the full text links stub data :return: list of expected paths that would be created when the full text was extracted """ with open(os.path.join(self.proj_home, full_text_links), "r") as inf: lines = inf.readlines() expected_paths = \ [checker.create_meta_path( {'bibcode': line.strip().split('\t')[0]}, self.app.conf['FULLTEXT_EXTRACT_PATH'] ).replace('meta.json', '') for line in lines] return expected_paths