def runTest(self): ''' Checks to see if the filename for this test parses correctly. ''' expected_series = self.__testdata[1] expected_issue_num = self.__testdata[2] expected_year = self.__testdata[3] filename = self.__testdata[0] try: actual_series, actual_issue_num, actual_year = extract(filename) except Exception as e: self.assertFalse(True, "Unexpected error parsing: " + filename + "\n" + utils.sstr(e) ) error = 'error parsing filename "' + filename + '"\n -->' +\ 'got series "' + actual_series + '", issue "' + actual_issue_num +\ '" and year "' + actual_year + '"' self.assertEqual(expected_series, actual_series, error) self.assertEqual(expected_issue_num, actual_issue_num, error) self.assertEqual(expected_year, actual_year, error)
def __parse_extra_details_from_path(self): ''' Series name, issue number, and volume year are all critical bits of data for scraping purposes--yet fresh, unscraped files often do not have them. So when some or all of these values are missing, this method tries to fill them in by parsing them out of the comic's path. ''' bd = self.__bookdata no_series = BookData.blank("series_s") == bd.series_s no_issuenum = BookData.blank("issue_num_s") == bd.issue_num_s no_year = BookData.blank("pub_year_n") == bd.pub_year_n if no_series or no_issuenum or no_year: if bd.path_s: # 1. at least one detail is missing, and we have a path name to # work with, so lets try to extract some details that way. filename = Path.GetFileName(bd.path_s) config = self.__scraper.config regex = config.alt_search_regex_s extracted = None # 2. first, extract using the user specified regex, if there is one if regex: extracted = fnameparser.regex(filename, regex) if not extracted: extracted = fnameparser.extract(filename) # never fails # 3. now that we have some extracted data, use it to fill in # any gaps in our details. if no_series: bd.series_s = extracted[0] if no_issuenum: bd.issue_num_s = extracted[1] if no_year: bd.pub_year_n = int(extracted[2]) \ if is_number(extracted[2])\ else BookData.blank("pub_year_n")