Beispiel #1
0
 def runTest(self):
    ''' Checks to see if the filename for this test parses correctly. '''
    
    expected_series = self.__testdata[1] 
    expected_issue_num = self.__testdata[2]
    expected_year = self.__testdata[3]
    filename = self.__testdata[0]
    try:
       actual_series, actual_issue_num, actual_year = extract(filename)
    except Exception as e:
       self.assertFalse(True, "Unexpected error parsing: "
           + filename + "\n" + utils.sstr(e) )
       
    error = 'error parsing filename "' + filename + '"\n   -->' +\
       'got series "' + actual_series + '", issue "' + actual_issue_num +\
       '" and year "' + actual_year + '"'
    self.assertEqual(expected_series, actual_series, error) 
    self.assertEqual(expected_issue_num, actual_issue_num, error) 
    self.assertEqual(expected_year, actual_year, error) 
 def __parse_extra_details_from_path(self):
    ''' 
    Series name, issue number, and volume year are all critical bits of data 
    for scraping purposes--yet fresh, unscraped files often do not have them.
    So when some or all of these values are missing, this method tries to fill
    them in by parsing them out of the comic's path.
    '''
    
    bd  = self.__bookdata
    no_series = BookData.blank("series_s") == bd.series_s
    no_issuenum = BookData.blank("issue_num_s") == bd.issue_num_s
    no_year = BookData.blank("pub_year_n") == bd.pub_year_n
    if no_series or no_issuenum or no_year:
       if bd.path_s:
          # 1. at least one detail is missing, and we have a path name to
          #    work with, so lets try to extract some details that way.
          filename = Path.GetFileName(bd.path_s)
          config = self.__scraper.config
          regex = config.alt_search_regex_s
          extracted = None
          
          # 2. first, extract using the user specified regex, if there is one
          if regex:
             extracted = fnameparser.regex(filename, regex) 
          if not extracted:
             extracted = fnameparser.extract(filename) # never fails
             
          # 3. now that we have some extracted data, use it to fill in
          #    any gaps in our details.
          if no_series:
             bd.series_s = extracted[0]
          if no_issuenum:
             bd.issue_num_s = extracted[1]
          if no_year:
             bd.pub_year_n = int(extracted[2]) \
                if is_number(extracted[2])\
                   else BookData.blank("pub_year_n")
             
             
    def __parse_extra_details_from_path(self):
        ''' 
      Series name, issue number, and volume year are all critical bits of data 
      for scraping purposes--yet fresh, unscraped files often do not have them.
      So when some or all of these values are missing, this method tries to fill
      them in by parsing them out of the comic's path.
      '''

        bd = self.__bookdata
        no_series = BookData.blank("series_s") == bd.series_s
        no_issuenum = BookData.blank("issue_num_s") == bd.issue_num_s
        no_year = BookData.blank("pub_year_n") == bd.pub_year_n
        if no_series or no_issuenum or no_year:
            if bd.path_s:
                # 1. at least one detail is missing, and we have a path name to
                #    work with, so lets try to extract some details that way.
                filename = Path.GetFileName(bd.path_s)
                config = self.__scraper.config
                regex = config.alt_search_regex_s
                extracted = None

                # 2. first, extract using the user specified regex, if there is one
                if regex:
                    extracted = fnameparser.regex(filename, regex)
                if not extracted:
                    extracted = fnameparser.extract(filename)  # never fails

                # 3. now that we have some extracted data, use it to fill in
                #    any gaps in our details.
                if no_series:
                    bd.series_s = extracted[0]
                if no_issuenum:
                    bd.issue_num_s = extracted[1]
                if no_year:
                    bd.pub_year_n = int(extracted[2]) \
                       if is_number(extracted[2])\
                          else BookData.blank("pub_year_n")