Python _query_series_details_dom Exemples, cvconnection._query_series_details_dom Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : cvdb.py Projet : NordomWhistleklik/comic-vine-scraper

def _query_series_refs(search_terms_s, callback_function):
    """ ComicVine implementation of the identically named method in the db.py """

    series_refs = set()

    # 1. clean up the search terms (to make them more palatable to comicvine
    # databases) before our first attempt at searching with them
    search_s = __cleanup_search_terms(search_terms_s, False)
    if search_s:
        series_refs = __query_series_refs(search_s, callback_function)

        # 2. if first search failed, cleanup terms more aggressively, try again
        if not series_refs:
            altsearch_s = __cleanup_search_terms(search_s, True)
            if search_terms_s and altsearch_s != search_s:
                series_refs = __query_series_refs(altsearch_s, callback_function)

        # 3. if second search failed, try interpreting the search terms as
        #    a comicvine ID or the URL for a comicvine volume's webpage
        if not series_refs:
            search_terms_s = search_terms_s.strip()
            pattern = r"(^(49-|4050-)?(?<num>\d+)$)|" + r"(^https?://.*comicvine\.com/.*/(49-|4050-)(?<num>\d+)(/.*)?$)"

            match = re.match(pattern, search_terms_s, re.I)
            if match:
                series_key_s = match.group("num")
                try:
                    dom = cvconnection._query_series_details_dom(__api_key, series_key_s)
                    num_results_n = int(dom.number_of_total_results)
                    if num_results_n == 1:
                        series_refs.add(__volume_to_seriesref(dom.results))
                except:
                    pass  # happens when the user enters an non-existent key

    return series_refs

Exemple #2

0

Afficher le fichier

def __issue_parse_series_details(issue, dom):
    ''' Parses the current comic's series details out of the DOM '''

    series_id = dom.results.volume.id

    # if the start year and publisher_s have been cached (because we already
    # accessed them once this session) use the cached values.  else
    # grab those values from comicvine, and cache em so we don't have to
    # hit comic vine for them again (at least not in this session)
    global __series_details_cache
    if __series_details_cache == None:
        raise Exception(__name__ + " module isn't initialized!")
    cache = __series_details_cache
    if series_id in cache:
        volume_year_n = cache[series_id][0]
        publisher_s = cache[series_id][1]
    else:
        # contact comicvine to extract details for this comic book
        series_dom = cvconnection._query_series_details_dom(
            __api_key, series_id)
        if series_dom is None:
            raise Exception("can't get details about series " + series_id)

        # start year
        volume_year_n = -1
        if "start_year" in series_dom.results.__dict__ and \
              is_string(series_dom.results.start_year):
            try:
                volume_year_n = int(series_dom.results.start_year)
            except:
                pass  # bad start year format...just keep going

        # publisher
        publisher_s = ''
        if "publisher" in series_dom.results.__dict__ and \
           "name" in series_dom.results.publisher.__dict__ and \
           is_string(series_dom.results.publisher.name):
            publisher_s = series_dom.results.publisher.name

        cache[series_id] = (volume_year_n, publisher_s)

    # check if there's the current publisher really is the true publisher, or
    # if it's really an imprint of another publisher.
    issue.publisher_s = cvimprints.find_parent_publisher(publisher_s)
    if issue.publisher_s != publisher_s:
        issue.imprint_s = publisher_s
    issue.volume_year_n = volume_year_n

Exemple #3

0

Afficher le fichier

Fichier : cvdb.py Projet : NordomWhistleklik/comic-vine-scraper

def __issue_parse_series_details(issue, dom):
    """ Parses the current comic's series details out of the DOM """

    series_id = dom.results.volume.id

    # if the start year and publisher_s have been cached (because we already
    # accessed them once this session) use the cached values.  else
    # grab those values from comicvine, and cache em so we don't have to
    # hit comic vine for them again (at least not in this session)
    global __series_details_cache
    if __series_details_cache == None:
        raise Exception(__name__ + " module isn't initialized!")
    cache = __series_details_cache
    if series_id in cache:
        volume_year_n = cache[series_id][0]
        publisher_s = cache[series_id][1]
    else:
        # contact comicvine to extract details for this comic book
        series_dom = cvconnection._query_series_details_dom(__api_key, series_id)
        if series_dom is None:
            raise Exception("can't get details about series " + series_id)

        # start year
        volume_year_n = -1
        if "start_year" in series_dom.results.__dict__ and is_string(series_dom.results.start_year):
            try:
                volume_year_n = int(series_dom.results.start_year)
            except:
                pass  # bad start year format...just keep going

        # publisher
        publisher_s = ""
        if (
            "publisher" in series_dom.results.__dict__
            and "name" in series_dom.results.publisher.__dict__
            and is_string(series_dom.results.publisher.name)
        ):
            publisher_s = series_dom.results.publisher.name

        cache[series_id] = (volume_year_n, publisher_s)

    # check if there's the current publisher really is the true publisher, or
    # if it's really an imprint of another publisher.
    issue.publisher_s = cvimprints.find_parent_publisher(publisher_s)
    if issue.publisher_s != publisher_s:
        issue.imprint_s = publisher_s
    issue.volume_year_n = volume_year_n

Exemple #4

0

Afficher le fichier

def _check_magic_file(path_s):
    ''' ComicVine implementation of the identically named method in the db.py '''
    series_key_s = None
    file_s = None
    try:
        # 1. get the directory to search for a cvinfo file in, or None
        dir_s = path_s if path_s and Directory.Exists(path_s) else \
           Path.GetDirectoryName(path_s) if path_s else None
        dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None

        if dir_s:
            # 2. search in that directory for a properly named cvinfo file
            #    note that Windows filenames are not case sensitive.
            for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]:
                if File.Exists(f):
                    file_s = f

            # 3. if we found a file, read it's contents in, and parse the
            #    comicvine series id out of it, if possible.
            if file_s:
                with StreamReader(file_s, Encoding.UTF8, False) as sr:
                    line = sr.ReadToEnd()
                    line = line.strip() if line else line
                    match = re.match(r"^.*?\b(49|4050)-(\d{2,})\b.*$", line)
                    line = match.group(2) if match else line
                    if utils.is_number(line):
                        series_key_s = utils.sstr(int(line))
    except:
        log.debug_exc("bad cvinfo file: " + sstr(file_s))

    # 4. did we find a series key?  if so, query comicvine to build a proper
    #    SeriesRef object for that series key.
    series_ref = None
    if series_key_s:
        try:
            dom = cvconnection._query_series_details_dom(
                __api_key, utils.sstr(series_key_s))
            num_results_n = int(dom.number_of_total_results)
            series_ref =\
               __volume_to_seriesref(dom.results) if num_results_n==1 else None
        except:
            log.debug_exc("error getting SeriesRef for: " + sstr(series_key_s))

    if file_s and not series_ref:
        log.debug("ignoring bad cvinfo file: ", sstr(file_s))
    return series_ref  # may be None!

Exemple #5

0

Afficher le fichier

Fichier : cvdb.py Projet : Blackbird88/comic-vine-scraper

def _check_magic_file(path_s):
   ''' ComicVine implementation of the identically named method in the db.py '''
   series_key_s = None
   file_s = None
   try:
      # 1. get the directory to search for a cvinfo file in, or None
      dir_s = path_s if path_s and Directory.Exists(path_s) else \
         Path.GetDirectoryName(path_s) if path_s else None
      dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None
      
      if dir_s:
         # 2. search in that directory for a properly named cvinfo file
         #    note that Windows filenames are not case sensitive.
         for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]:
            if File.Exists(f):
               file_s = f 
            
         # 3. if we found a file, read it's contents in, and parse the 
         #    comicvine series id out of it, if possible.
         if file_s:
            with StreamReader(file_s, Encoding.UTF8, False) as sr:
               line = sr.ReadToEnd()
               line = line.strip() if line else line
               match = re.match(r"^.*?\b(49|4050)-(\d{2,})\b.*$", line)
               line = match.group(2) if match else line
               if utils.is_number(line):
                  series_key_s = utils.sstr(int(line))
   except:
      log.debug_exc("bad cvinfo file: " + sstr(file_s))
      
   # 4. did we find a series key?  if so, query comicvine to build a proper
   #    SeriesRef object for that series key.
   series_ref = None
   if series_key_s:
      try:
         dom = cvconnection._query_series_details_dom(
            __api_key, utils.sstr(series_key_s))
         num_results_n = int(dom.number_of_total_results)
         series_ref =\
            __volume_to_seriesref(dom.results) if num_results_n==1 else None
      except:
         log.debug_exc("error getting SeriesRef for: " + sstr(series_key_s))
         
   if file_s and not series_ref:
      log.debug("ignoring bad cvinfo file: ", sstr(file_s))
   return series_ref # may be None!

Exemple #6

0

Afficher le fichier

Fichier : cvdb.py Projet : Tomservov2/comic-vine-scraper

def __url_to_seriesref(url_s):
    ''' 
   Converts a ComicVine URL into a SeriesRef.  The URL has to contain
   a magic number of the form 4050-XXXXXXXX (a series) or 4000-XXXXXXXX
   (an issue.)   If the given URL has a usable magic number, use it to query
   the db and construct a SeriesRef for the series associated with that 
   number.  Returns none if the url couldn't be converted, for any reason. 
   '''
    series_ref = None

    # 1. try interpreting the url as a comicvine issue (i.e. 4000-XXXXXXXX)
    if not series_ref:
        url_s = url_s.strip()
        pattern = r"^.*?\b(4000)-(?<num>\d{2,})\b.*$"

        match = re.match(pattern, url_s, re.I)
        if match:
            issueid_s = match.group("num")
            try:
                dom = cvconnection._query_issue_details_dom(
                    __api_key, issueid_s)
                num_results_n = int(dom.number_of_total_results)
                if num_results_n == 1:
                    # convert url into the series id for this issue
                    url_s = "4050-" + dom.results.volume.id
            except:
                pass  # happens when the user enters an non-existent key

    # 2. now try interpreting the url as a comicvine series (4050-XXXXXX)
    if not series_ref:
        url_s = url_s.strip()
        pattern = r"^.*?\b(49|4050)-(?<num>\d{2,})\b.*$"

        match = re.match(pattern, url_s, re.I)
        if match:
            seriesid_s = match.group("num")
            try:
                dom = cvconnection._query_series_details_dom(
                    __api_key, seriesid_s)
                num_results_n = int(dom.number_of_total_results)
                if num_results_n == 1:
                    series_ref = __volume_to_seriesref(dom.results)
            except:
                pass  # happens when the user enters an non-existent key

    return series_ref

Exemple #7

0

Afficher le fichier

Fichier : cvdb.py Projet : cbanack/comic-vine-scraper

def __url_to_seriesref(url_s):
   ''' 
   Converts a ComicVine URL into a SeriesRef.  The URL has to contain
   a magic number of the form 4050-XXXXXXXX (a series) or 4000-XXXXXXXX
   (an issue.)   If the given URL has a usable magic number, use it to query
   the db and construct a SeriesRef for the series associated with that 
   number.  Returns none if the url couldn't be converted, for any reason. 
   '''
   series_ref = None
   
   # 1. try interpreting the url as a comicvine issue (i.e. 4000-XXXXXXXX)
   if not series_ref:
      url_s = url_s.strip()
      pattern=r"^.*?\b(4000)-(?<num>\d{2,})\b.*$"
         
      match = re.match(pattern, url_s, re.I)
      if match:
         issueid_s = match.group("num")
         try:
            dom = cvconnection._query_issue_details_dom( __api_key, issueid_s)
            num_results_n = int(dom.number_of_total_results)
            if num_results_n == 1:
               # convert url into the series id for this issue
               url_s = "4050-"+dom.results.volume.id
         except:
            pass # happens when the user enters an non-existent key

   # 2. now try interpreting the url as a comicvine series (4050-XXXXXX) 
   if not series_ref:
      url_s = url_s.strip()
      pattern=r"^.*?\b(49|4050)-(?<num>\d{2,})\b.*$"
         
      match = re.match(pattern, url_s, re.I)
      if match:
         seriesid_s = match.group("num")
         try:
            dom = cvconnection._query_series_details_dom(__api_key, seriesid_s)
            num_results_n = int(dom.number_of_total_results)
            if num_results_n == 1:
               series_ref = __volume_to_seriesref(dom.results)
         except:
            pass # happens when the user enters an non-existent key

   return series_ref

Exemple #8

0

Afficher le fichier

def _query_series_refs(search_terms_s, callback_function):
    ''' ComicVine implementation of the identically named method in the db.py '''

    series_refs = set()

    # 1. clean up the search terms (to make them more palatable to comicvine
    # databases) before our first attempt at searching with them
    search_s = __cleanup_search_terms(search_terms_s, False)
    if search_s:
        series_refs = __query_series_refs(search_s, callback_function)

        # 2. if first search failed, cleanup terms more aggressively, try again
        if not series_refs:
            altsearch_s = __cleanup_search_terms(search_s, True)
            if search_terms_s and altsearch_s != search_s:
                series_refs = __query_series_refs(altsearch_s,
                                                  callback_function)

        # 3. if second search failed, try interpreting the search terms as
        #    a comicvine ID or the URL for a comicvine volume's webpage
        if not series_refs:
            search_terms_s = search_terms_s.strip()
            pattern = r"(^(49-|4050-)?(?<num>\d+)$)|" + \
               r"(^https?://.*comicvine\.com/.*/(49-|4050-)(?<num>\d+)(/.*)?$)"

            match = re.match(pattern, search_terms_s, re.I)
            if match:
                series_key_s = match.group("num")
                try:
                    dom = cvconnection._query_series_details_dom(
                        __api_key, series_key_s)
                    num_results_n = int(dom.number_of_total_results)
                    if num_results_n == 1:
                        series_refs.add(__volume_to_seriesref(dom.results))
                except:
                    pass  # happens when the user enters an non-existent key

    return series_refs