def storyarcinfo(xmlid): comicLibrary = listLibrary() arcinfo = {} if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI: logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.') comicapi = mylar.DEFAULT_CVAPI else: comicapi = mylar.COMICVINE_API #respawn to the exact id for the story arc and count the # of issues present. ARCPULL_URL = mylar.CVURL + 'story_arc/4045-' + str(xmlid) + '/?api_key=' + str(comicapi) + '&field_list=issues,name,first_appeared_in_issue,deck,image&format=xml&offset=0' logger.fdebug('arcpull_url:' + str(ARCPULL_URL)) #new CV API restriction - one api request / second. if mylar.CVAPI_RATE is None or mylar.CVAPI_RATE < 2: time.sleep(2) else: time.sleep(mylar.CVAPI_RATE) #download the file: payload = None verify = False try: r = requests.get(ARCPULL_URL, params=payload, verify=verify, headers=mylar.CV_HEADERS) except Exception, e: logger.warn('Error fetching data from ComicVine: %s' % (e)) return
def storyarcinfo(xmlid): comicLibrary = listLibrary() arcinfo = {} if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI: logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.') comicapi = mylar.DEFAULT_CVAPI else: comicapi = mylar.COMICVINE_API #respawn to the exact id for the story arc and count the # of issues present. ARCPULL_URL = mylar.CVURL + 'story_arc/4045-' + str(xmlid) + '/?api_key=' + str(comicapi) + '&field_list=issues,name,first_appeared_in_issue,deck,image&format=xml&offset=0' logger.fdebug('arcpull_url:' + str(ARCPULL_URL)) if mylar.CVAPI_COUNT == 0 or mylar.CVAPI_COUNT >= mylar.CVAPI_MAX: chkit = cvapi_check() if chkit == False: return 'apireached' try: file = urllib2.urlopen(ARCPULL_URL) except urllib2.HTTPError, err: logger.error('err : ' + str(err)) logger.error('There was a major problem retrieving data from ComicVine - on their end.') return
def findComic(name, mode, issue, limityear=None, explicit=None, type=None): #with mb_lock: comicResults = None comicLibrary = listLibrary() comiclist = [] arcinfolist = [] if type == 'story_arc': chars = set('!?*&') else: chars = set('!?*&-') if any((c in chars) for c in name) or 'annual' in name: name = '"' +name +'"' #print ("limityear: " + str(limityear)) if limityear is None: limityear = 'None' comicquery = name #comicquery=name.replace(" ", "%20") if explicit is None: #logger.fdebug('explicit is None. Setting to Default mode of ALL search words.') #comicquery=name.replace(" ", " AND ") explicit = 'all' #OR if ' and ' in comicquery.lower(): logger.fdebug('Enforcing exact naming match due to operator in title (and)') explicit = 'all' if explicit == 'loose': logger.fdebug('Changing to loose mode - this will match ANY of the search words') comicquery = name.replace(" ", " OR ") elif explicit == 'explicit': logger.fdebug('Changing to explicit mode - this will match explicitly on the EXACT words') comicquery=name.replace(" ", " AND ") else: logger.fdebug('Default search mode - this will match on ALL search words') #comicquery = name.replace(" ", " AND ") explicit = 'all' if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI: logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.') comicapi = mylar.DEFAULT_CVAPI else: comicapi = mylar.COMICVINE_API if type is None: type = 'volume' #let's find out how many results we get from the query... searched = pullsearch(comicapi, comicquery, 0, explicit, type) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False if int(totalResults) > 1000: logger.warn('Search returned more than 1000 hits [' + str(totalResults) + ']. Only displaying first 1000 results - use more specifics or the exact ComicID if required.') totalResults = 1000 countResults = 0 while (countResults < int(totalResults)): #logger.fdebug("querying " + str(countResults)) if countResults > 0: #2012/22/02 - CV API flipped back to offset usage instead of page if explicit == 'all' or explicit == 'loose': #all / loose uses page for offset offsetcount = (countResults /100) + 1 else: #explicit uses offset offsetcount = countResults searched = pullsearch(comicapi, comicquery, offsetcount, explicit, type) comicResults = searched.getElementsByTagName(type) #('volume') body = '' n = 0 if not comicResults: break for result in comicResults: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: arclist = [] if type == 'story_arc': #call cv.py here to find out issue count in story arc try: logger.fdebug('story_arc ascension') names = len(result.getElementsByTagName('name')) n = 0 logger.fdebug('length: ' + str(names)) xmlpub = None #set this incase the publisher field isn't populated in the xml while (n < names): logger.fdebug(result.getElementsByTagName('name')[n].parentNode.nodeName) if result.getElementsByTagName('name')[n].parentNode.nodeName == 'story_arc': logger.fdebug('yes') try: xmlTag = result.getElementsByTagName('name')[n].firstChild.wholeText xmlTag = xmlTag.rstrip() logger.fdebug('name: ' + xmlTag) except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return elif result.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher': logger.fdebug('publisher check.') xmlpub = result.getElementsByTagName('name')[n].firstChild.wholeText n+=1 except: logger.warn('error retrieving story arc search results.') return siteurl = len(result.getElementsByTagName('site_detail_url')) s = 0 logger.fdebug('length: ' + str(names)) xmlurl = None while (s < siteurl): logger.fdebug(result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName) if result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName == 'story_arc': try: xmlurl = result.getElementsByTagName('site_detail_url')[s].firstChild.wholeText except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return s+=1 xmlid = result.getElementsByTagName('id')[0].firstChild.wholeText if xmlid is not None: arcinfolist = storyarcinfo(xmlid) logger.info('[IMAGE] : ' + arcinfolist['comicimage']) comiclist.append({ 'name': xmlTag, 'comicyear': arcinfolist['comicyear'], 'comicid': xmlid, 'cvarcid': xmlid, 'url': xmlurl, 'issues': arcinfolist['issues'], 'comicimage': arcinfolist['comicimage'], 'publisher': xmlpub, 'description': arcinfolist['description'], 'deck': arcinfolist['deck'], 'arclist': arcinfolist['arclist'], 'haveit': arcinfolist['haveit'] }) else: comiclist.append({ 'name': xmlTag, 'comicyear': arcyear, 'comicid': xmlid, 'url': xmlurl, 'issues': issuecount, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'arclist': arclist, 'haveit': haveit }) logger.fdebug('IssueID\'s that are a part of ' + xmlTag + ' : ' + str(arclist)) else: xmlcnt = result.getElementsByTagName('count_of_issues')[0].firstChild.wholeText #here we can determine what called us, and either start gathering all issues or just limited ones. if issue is not None and str(issue).isdigit(): #this gets buggered up with NEW/ONGOING series because the db hasn't been updated #to reflect the proper count. Drop it by 1 to make sure. limiter = int(issue) - 1 else: limiter = 0 #get the first issue # (for auto-magick calcs) try: xmlfirst = result.getElementsByTagName('issue_number')[0].firstChild.wholeText if '\xbd' in xmlfirst: xmlfirst = "1" #if the first issue is 1/2, just assume 1 for logistics except: xmlfirst = '1' #logger.info('There are : ' + str(xmlcnt) + ' issues in this series.') #logger.info('The first issue started at # ' + str(xmlfirst)) cnt_numerical = int(xmlcnt) + int(xmlfirst) # (of issues + start of first issue = numerical range) #logger.info('The maximum issue number should be roughly # ' + str(cnt_numerical)) #logger.info('The limiter (issue max that we know of) is # ' + str(limiter)) if cnt_numerical >= limiter: cnl = len (result.getElementsByTagName('name')) cl = 0 xmlTag = 'None' xmlimage = "cache/blankcover.jpg" xml_lastissueid = 'None' while (cl < cnl): if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'volume': xmlTag = result.getElementsByTagName('name')[cl].firstChild.wholeText #break if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'image': xmlimage = result.getElementsByTagName('super_url')[0].firstChild.wholeText if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'last_issue': xml_lastissueid = result.getElementsByTagName('id')[cl].firstChild.wholeText cl+=1 if (result.getElementsByTagName('start_year')[0].firstChild) is not None: xmlYr = result.getElementsByTagName('start_year')[0].firstChild.wholeText else: xmlYr = "0000" yearRange = [] tmpYr = re.sub('\?', '', xmlYr) if tmpYr.isdigit(): yearRange.append(tmpYr) tmpyearRange = int(xmlcnt) / 12 if float(tmpyearRange): tmpyearRange +1 possible_years = int(tmpYr) + tmpyearRange for i in range(int(tmpYr), int(possible_years),1): if not any(int(x) == int(i) for x in yearRange): yearRange.append(str(i)) logger.fdebug('[RESULT][' + str(limityear) + '] ComicName:' + xmlTag + ' -- ' + str(xmlYr) + ' [Series years: ' + str(yearRange) + ']') if tmpYr != xmlYr: xmlYr = tmpYr if any(map(lambda v: v in limityear, yearRange)) or limityear == 'None': xmlurl = result.getElementsByTagName('site_detail_url')[0].firstChild.wholeText idl = len (result.getElementsByTagName('id')) idt = 0 xmlid = None while (idt < idl): if result.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume': xmlid = result.getElementsByTagName('id')[idt].firstChild.wholeText break idt+=1 if xmlid is None: logger.error('Unable to figure out the comicid - skipping this : ' + str(xmlurl)) continue publishers = result.getElementsByTagName('publisher') if len(publishers) > 0: pubnames = publishers[0].getElementsByTagName('name') if len(pubnames) >0: xmlpub = pubnames[0].firstChild.wholeText else: xmlpub = "Unknown" else: xmlpub = "Unknown" #ignore specific publishers on a global scale here. if mylar.BLACKLISTED_PUBLISHERS is not None and any([x for x in mylar.BLACKLISTED_PUBLISHERS if x.lower() == xmlpub.lower()]): # #'panini' in xmlpub.lower() or 'deagostini' in xmlpub.lower() or 'Editorial Televisa' in xmlpub.lower(): logger.fdebug('Blacklisted publisher [' + xmlpub + ']. Ignoring this result.') continue try: xmldesc = result.getElementsByTagName('description')[0].firstChild.wholeText except: xmldesc = "None" #this is needed to display brief synopsis for each series on search results page. try: xmldeck = result.getElementsByTagName('deck')[0].firstChild.wholeText except: xmldeck = "None" xmltype = None if xmldeck != 'None': if any(['print' in xmldeck.lower(), 'digital' in xmldeck.lower()]): if 'print' in xmldeck.lower(): xmltype = 'Print' elif 'digital' in xmldeck.lower(): xmltype = 'Digital' if xmldesc != 'None' and xmltype is None: if 'print' in xmldesc[:60].lower() and 'print edition can be found' not in xmldesc.lower(): xmltype = 'Print' elif 'digital' in xmldesc[:60].lower() and 'digital edition can be found' not in xmldesc.lower(): xmltype = 'Digital' else: xmltype = 'Print' if xmlid in comicLibrary: haveit = comicLibrary[xmlid] else: haveit = "No" comiclist.append({ 'name': xmlTag, 'comicyear': xmlYr, 'comicid': xmlid, 'url': xmlurl, 'issues': xmlcnt, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'type': xmltype, 'haveit': haveit, 'lastissueid': xml_lastissueid, 'seriesrange': yearRange # returning additional information about series run polled from CV }) #logger.fdebug('year: ' + str(xmlYr) + ' - constraint met: ' + str(xmlTag) + '[' + str(xmlYr) + '] --- 4050-' + str(xmlid)) else: pass #logger.fdebug('year: ' + str(xmlYr) + ' - contraint not met. Has to be within ' + str(limityear)) n+=1 #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 return comiclist, explicit
def findComic(name, mode, issue, limityear=None, explicit=None, type=None): #with mb_lock: comicResults = None comicLibrary = listLibrary() comiclist = [] arcinfolist = [] chars = set('!?*') if any((c in chars) for c in name): name = '"' +name +'"' #print ("limityear: " + str(limityear)) if limityear is None: limityear = 'None' comicquery = name #comicquery=name.replace(" ", "%20") if explicit is None: #logger.fdebug('explicit is None. Setting to Default mode of ALL search words.') #comicquery=name.replace(" ", " AND ") explicit = 'all' #OR if explicit == 'loose': logger.fdebug('Changing to loose mode - this will match ANY of the search words') comicquery = name.replace(" ", " OR ") elif explicit == 'explicit': logger.fdebug('Changing to explicit mode - this will match explicitly on the EXACT words') comicquery=name.replace(" ", " AND ") else: logger.fdebug('Default search mode - this will match on ALL search words') comicquery = name.replace(" ", " AND ") explicit = 'all' if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI: logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.') comicapi = mylar.DEFAULT_CVAPI else: comicapi = mylar.COMICVINE_API if type is None: type = 'volume' #let's find out how many results we get from the query... searched = pullsearch(comicapi, comicquery, 0, explicit, type) if searched is None: return False elif searched == 'apireached': return 'apireached' totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): #logger.fdebug("querying " + str(countResults)) if countResults > 0: #2012/22/02 - CV API flipped back to offset usage instead of page if explicit == 'all' or explicit == 'loose': #all / loose uses page for offset offsetcount = (countResults /100) + 1 else: #explicit uses offset offsetcount = countResults searched = pullsearch(comicapi, comicquery, offsetcount, explicit, type) if searched == 'apireached': return 'apireached' comicResults = searched.getElementsByTagName(type) #('volume') body = '' n = 0 if not comicResults: break for result in comicResults: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: arclist = [] if type == 'story_arc': #call cv.py here to find out issue count in story arc try: logger.fdebug('story_arc ascension') names = len(result.getElementsByTagName('name')) n = 0 logger.fdebug('length: ' + str(names)) xmlpub = None #set this incase the publisher field isn't populated in the xml while (n < names): logger.fdebug(result.getElementsByTagName('name')[n].parentNode.nodeName) if result.getElementsByTagName('name')[n].parentNode.nodeName == 'story_arc': logger.fdebug('yes') try: xmlTag = result.getElementsByTagName('name')[n].firstChild.wholeText xmlTag = xmlTag.rstrip() logger.fdebug('name: ' + str(xmlTag)) except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return elif result.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher': logger.fdebug('publisher check.') xmlpub = result.getElementsByTagName('name')[n].firstChild.wholeText n+=1 except: logger.warn('error retrieving story arc search results.') return siteurl = len(result.getElementsByTagName('site_detail_url')) s = 0 logger.fdebug('length: ' + str(names)) xmlurl = None while (s < siteurl): logger.fdebug(result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName) if result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName == 'story_arc': try: xmlurl = result.getElementsByTagName('site_detail_url')[s].firstChild.wholeText except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return s+=1 xmlid = result.getElementsByTagName('id')[0].firstChild.wholeText if xmlid is not None: arcinfolist = storyarcinfo(xmlid) comiclist.append({ 'name': xmlTag, 'comicyear': arcinfolist['comicyear'], 'comicid': xmlid, 'cvarcid': xmlid, 'url': xmlurl, 'issues': arcinfolist['issues'], 'comicimage': arcinfolist['comicimage'], 'publisher': xmlpub, 'description': arcinfolist['description'], 'deck': arcinfolist['deck'], 'arclist': arcinfolist['arclist'], 'haveit': arcinfolist['haveit'] }) else: comiclist.append({ 'name': xmlTag, 'comicyear': arcyear, 'comicid': xmlid, 'url': xmlurl, 'issues': issuecount, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'arclist': arclist, 'haveit': haveit }) logger.fdebug('IssueID\'s that are a part of ' + xmlTag + ' : ' + str(arclist)) else: xmlcnt = result.getElementsByTagName('count_of_issues')[0].firstChild.wholeText #here we can determine what called us, and either start gathering all issues or just limited ones. if issue is not None and str(issue).isdigit(): #this gets buggered up with NEW/ONGOING series because the db hasn't been updated #to reflect the proper count. Drop it by 1 to make sure. limiter = int(issue) - 1 else: limiter = 0 #get the first issue # (for auto-magick calcs) try: xmlfirst = result.getElementsByTagName('issue_number')[0].firstChild.wholeText if '\xbd' in xmlfirst: xmlfirst = "1" #if the first issue is 1/2, just assume 1 for logistics except: xmlfirst = '1' #logger.info('There are : ' + str(xmlcnt) + ' issues in this series.') #logger.info('The first issue started at # ' + str(xmlfirst)) cnt_numerical = int(xmlcnt) + int(xmlfirst) # (of issues + start of first issue = numerical range) #logger.info('The maximum issue number should be roughly # ' + str(cnt_numerical)) #logger.info('The limiter (issue max that we know of) is # ' + str(limiter)) if cnt_numerical >= limiter: cnl = len (result.getElementsByTagName('name')) cl = 0 xmlTag = 'None' xmlimage = "cache/blankcover.jpg" while (cl < cnl): if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'volume': xmlTag = result.getElementsByTagName('name')[cl].firstChild.wholeText #break if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'image': xmlimage = result.getElementsByTagName('super_url')[0].firstChild.wholeText cl+=1 if (result.getElementsByTagName('start_year')[0].firstChild) is not None: xmlYr = result.getElementsByTagName('start_year')[0].firstChild.wholeText else: xmlYr = "0000" #logger.info('name:' + str(xmlTag) + ' -- ' + str(xmlYr)) if xmlYr in limityear or limityear == 'None': xmlurl = result.getElementsByTagName('site_detail_url')[0].firstChild.wholeText idl = len (result.getElementsByTagName('id')) idt = 0 xmlid = None while (idt < idl): if result.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume': xmlid = result.getElementsByTagName('id')[idt].firstChild.wholeText break idt+=1 if xmlid is None: logger.error('Unable to figure out the comicid - skipping this : ' + str(xmlurl)) continue #logger.info('xmlid: ' + str(xmlid)) publishers = result.getElementsByTagName('publisher') if len(publishers) > 0: pubnames = publishers[0].getElementsByTagName('name') if len(pubnames) >0: xmlpub = pubnames[0].firstChild.wholeText else: xmlpub = "Unknown" else: xmlpub = "Unknown" try: xmldesc = result.getElementsByTagName('description')[0].firstChild.wholeText except: xmldesc = "None" #this is needed to display brief synopsis for each series on search results page. try: xmldeck = result.getElementsByTagName('deck')[0].firstChild.wholeText except: xmldeck = "None" if xmlid in comicLibrary: haveit = comicLibrary[xmlid] else: haveit = "No" comiclist.append({ 'name': xmlTag, 'comicyear': xmlYr, 'comicid': xmlid, 'url': xmlurl, 'issues': xmlcnt, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'haveit': haveit }) #logger.fdebug('year: ' + str(xmlYr) + ' - constraint met: ' + str(xmlTag) + '[' + str(xmlYr) + '] --- 4050-' + str(xmlid)) else: pass #logger.fdebug('year: ' + str(xmlYr) + ' - contraint not met. Has to be within ' + str(limityear)) n+=1 #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 return comiclist, explicit
def findComic(name, mode, issue, limityear=None, type=None): #with mb_lock: comicResults = None comicLibrary = listLibrary() comiclist = [] arcinfolist = [] commons = ['and', 'the', '&', '-'] for x in commons: cnt = 0 for m in re.finditer(x, name.lower()): cnt +=1 tehstart = m.start() tehend = m.end() if any([x == 'the', x == 'and']): if len(name) == tehend: tehend =-1 if not all([tehstart == 0, name[tehend] == ' ']) or not all([tehstart != 0, name[tehstart-1] == ' ', name[tehend] == ' ']): continue else: name = name.replace(x, ' ', cnt) originalname = name if '+' in name: name = re.sub('\+', 'PLUS', name) pattern = re.compile(ur'\w+', re.UNICODE) name = pattern.findall(name) if '+' in originalname: y = [] for x in name: y.append(re.sub("PLUS", "%2B", x)) name = y if limityear is None: limityear = 'None' comicquery = name if mylar.CONFIG.COMICVINE_API == 'None' or mylar.CONFIG.COMICVINE_API is None: logger.warn('You have not specified your own ComicVine API key - this is a requirement. Get your own @ http://api.comicvine.com.') return else: comicapi = mylar.CONFIG.COMICVINE_API if type is None: type = 'volume' #let's find out how many results we get from the query... searched = pullsearch(comicapi, comicquery, 0, type) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False if int(totalResults) > 1000: logger.warn('Search returned more than 1000 hits [' + str(totalResults) + ']. Only displaying first 1000 results - use more specifics or the exact ComicID if required.') totalResults = 1000 countResults = 0 while (countResults < int(totalResults)): #logger.fdebug("querying " + str(countResults)) if countResults > 0: offsetcount = countResults searched = pullsearch(comicapi, comicquery, offsetcount, type) comicResults = searched.getElementsByTagName(type) body = '' n = 0 if not comicResults: break for result in comicResults: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: arclist = [] if type == 'story_arc': #call cv.py here to find out issue count in story arc try: logger.fdebug('story_arc ascension') names = len(result.getElementsByTagName('name')) n = 0 logger.fdebug('length: ' + str(names)) xmlpub = None #set this incase the publisher field isn't populated in the xml while (n < names): logger.fdebug(result.getElementsByTagName('name')[n].parentNode.nodeName) if result.getElementsByTagName('name')[n].parentNode.nodeName == 'story_arc': logger.fdebug('yes') try: xmlTag = result.getElementsByTagName('name')[n].firstChild.wholeText xmlTag = xmlTag.rstrip() logger.fdebug('name: ' + xmlTag) except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return elif result.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher': logger.fdebug('publisher check.') xmlpub = result.getElementsByTagName('name')[n].firstChild.wholeText n+=1 except: logger.warn('error retrieving story arc search results.') return siteurl = len(result.getElementsByTagName('site_detail_url')) s = 0 logger.fdebug('length: ' + str(names)) xmlurl = None while (s < siteurl): logger.fdebug(result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName) if result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName == 'story_arc': try: xmlurl = result.getElementsByTagName('site_detail_url')[s].firstChild.wholeText except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.') return s+=1 xmlid = result.getElementsByTagName('id')[0].firstChild.wholeText if xmlid is not None: arcinfolist = storyarcinfo(xmlid) logger.info('[IMAGE] : ' + arcinfolist['comicimage']) comiclist.append({ 'name': xmlTag, 'comicyear': arcinfolist['comicyear'], 'comicid': xmlid, 'cvarcid': xmlid, 'url': xmlurl, 'issues': arcinfolist['issues'], 'comicimage': arcinfolist['comicimage'], 'publisher': xmlpub, 'description': arcinfolist['description'], 'deck': arcinfolist['deck'], 'arclist': arcinfolist['arclist'], 'haveit': arcinfolist['haveit'] }) else: comiclist.append({ 'name': xmlTag, 'comicyear': arcyear, 'comicid': xmlid, 'url': xmlurl, 'issues': issuecount, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'arclist': arclist, 'haveit': haveit }) logger.fdebug('IssueID\'s that are a part of ' + xmlTag + ' : ' + str(arclist)) else: xmlcnt = result.getElementsByTagName('count_of_issues')[0].firstChild.wholeText #here we can determine what called us, and either start gathering all issues or just limited ones. if issue is not None and str(issue).isdigit(): #this gets buggered up with NEW/ONGOING series because the db hasn't been updated #to reflect the proper count. Drop it by 1 to make sure. limiter = int(issue) - 1 else: limiter = 0 #get the first issue # (for auto-magick calcs) iss_len = len(result.getElementsByTagName('name')) i=0 xmlfirst = '1' xmllast = None try: while (i < iss_len): if result.getElementsByTagName('name')[i].parentNode.nodeName == 'first_issue': xmlfirst = result.getElementsByTagName('issue_number')[i].firstChild.wholeText if '\xbd' in xmlfirst: xmlfirst = '1' #if the first issue is 1/2, just assume 1 for logistics elif result.getElementsByTagName('name')[i].parentNode.nodeName == 'last_issue': xmllast = result.getElementsByTagName('issue_number')[i].firstChild.wholeText if all([xmllast is not None, xmlfirst is not None]): break i+=1 except: xmlfirst = '1' if all([xmlfirst == xmllast, xmlfirst.isdigit(), xmlcnt == '0']): xmlcnt = '1' #logger.info('There are : ' + str(xmlcnt) + ' issues in this series.') #logger.info('The first issue started at # ' + str(xmlfirst)) cnt_numerical = int(xmlcnt) + int(xmlfirst) # (of issues + start of first issue = numerical range) #logger.info('The maximum issue number should be roughly # ' + str(cnt_numerical)) #logger.info('The limiter (issue max that we know of) is # ' + str(limiter)) if cnt_numerical >= limiter: cnl = len (result.getElementsByTagName('name')) cl = 0 xmlTag = 'None' xmlimage = "cache/blankcover.jpg" xml_lastissueid = 'None' while (cl < cnl): if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'volume': xmlTag = result.getElementsByTagName('name')[cl].firstChild.wholeText #break if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'image': xmlimage = result.getElementsByTagName('super_url')[0].firstChild.wholeText if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'last_issue': xml_lastissueid = result.getElementsByTagName('id')[cl].firstChild.wholeText cl+=1 if (result.getElementsByTagName('start_year')[0].firstChild) is not None: xmlYr = result.getElementsByTagName('start_year')[0].firstChild.wholeText else: xmlYr = "0000" yearRange = [] tmpYr = re.sub('\?', '', xmlYr) if tmpYr.isdigit(): yearRange.append(tmpYr) tmpyearRange = int(xmlcnt) / 12 if float(tmpyearRange): tmpyearRange +1 possible_years = int(tmpYr) + tmpyearRange for i in range(int(tmpYr), int(possible_years),1): if not any(int(x) == int(i) for x in yearRange): yearRange.append(str(i)) logger.fdebug('[RESULT][' + str(limityear) + '] ComicName:' + xmlTag + ' -- ' + str(xmlYr) + ' [Series years: ' + str(yearRange) + ']') if tmpYr != xmlYr: xmlYr = tmpYr if any(map(lambda v: v in limityear, yearRange)) or limityear == 'None': xmlurl = result.getElementsByTagName('site_detail_url')[0].firstChild.wholeText idl = len (result.getElementsByTagName('id')) idt = 0 xmlid = None while (idt < idl): if result.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume': xmlid = result.getElementsByTagName('id')[idt].firstChild.wholeText break idt+=1 if xmlid is None: logger.error('Unable to figure out the comicid - skipping this : ' + str(xmlurl)) continue publishers = result.getElementsByTagName('publisher') if len(publishers) > 0: pubnames = publishers[0].getElementsByTagName('name') if len(pubnames) >0: xmlpub = pubnames[0].firstChild.wholeText else: xmlpub = "Unknown" else: xmlpub = "Unknown" #ignore specific publishers on a global scale here. if mylar.CONFIG.BLACKLISTED_PUBLISHERS is not None and any([x for x in mylar.CONFIG.BLACKLISTED_PUBLISHERS if x.lower() == xmlpub.lower()]): logger.fdebug('Blacklisted publisher [' + xmlpub + ']. Ignoring this result.') continue try: xmldesc = result.getElementsByTagName('description')[0].firstChild.wholeText except: xmldesc = "None" #this is needed to display brief synopsis for each series on search results page. try: xmldeck = result.getElementsByTagName('deck')[0].firstChild.wholeText except: xmldeck = "None" xmltype = None if xmldeck != 'None': if any(['print' in xmldeck.lower(), 'digital' in xmldeck.lower(), 'paperback' in xmldeck.lower(), 'hardcover' in xmldeck.lower()]): if 'print' in xmldeck.lower(): xmltype = 'Print' elif 'digital' in xmldeck.lower(): xmltype = 'Digital' elif 'paperback' in xmldeck.lower(): xmltype = 'TPB' elif 'hardcover' in xmldeck.lower(): xmltype = 'HC' if xmldesc != 'None' and xmltype is None: if 'print' in xmldesc[:60].lower() and 'print edition can be found' not in xmldesc.lower(): xmltype = 'Print' elif 'digital' in xmldesc[:60].lower() and 'digital edition can be found' not in xmldesc.lower(): xmltype = 'Digital' elif all(['paperback' in xmldesc[:60].lower(), 'paperback can be found' not in xmldesc.lower()]) or 'collects' in xmldesc.lower(): xmltype = 'TPB' elif 'hardcover' in xmldesc[:60].lower() and 'hardcover can be found' not in xmldesc.lower(): xmltype = 'HC' else: xmltype = 'Print' if xmlid in comicLibrary: haveit = comicLibrary[xmlid] else: haveit = "No" comiclist.append({ 'name': xmlTag, 'comicyear': xmlYr, 'comicid': xmlid, 'url': xmlurl, 'issues': xmlcnt, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'type': xmltype, 'haveit': haveit, 'lastissueid': xml_lastissueid, 'seriesrange': yearRange # returning additional information about series run polled from CV }) #logger.fdebug('year: %s - constraint met: %s [%s] --- 4050-%s' % (xmlYr,xmlTag,xmlYr,xmlid)) else: #logger.fdebug('year: ' + str(xmlYr) + ' - contraint not met. Has to be within ' + str(limityear)) pass n+=1 #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 return comiclist
def findComic(name, mode, issue, limityear=None, type=None): #with mb_lock: comicResults = None comicLibrary = listLibrary() comiclist = [] arcinfolist = [] commons = ['and', 'the', '&', '-'] for x in commons: cnt = 0 for m in re.finditer(x, name.lower()): cnt += 1 tehstart = m.start() tehend = m.end() if any([x == 'the', x == 'and']): if len(name) == tehend: tehend = -1 if not all([tehstart == 0, name[tehend] == ' ']) or not all([ tehstart != 0, name[tehstart - 1] == ' ', name[tehend] == ' ' ]): continue else: name = name.replace(x, ' ', cnt) originalname = name if '+' in name: name = re.sub('\+', 'PLUS', name) pattern = re.compile(ur'\w+', re.UNICODE) name = pattern.findall(name) if '+' in originalname: y = [] for x in name: y.append(re.sub("PLUS", "%2B", x)) name = y if limityear is None: limityear = 'None' comicquery = name if mylar.CONFIG.COMICVINE_API == 'None' or mylar.CONFIG.COMICVINE_API is None: logger.warn( 'You have not specified your own ComicVine API key - this is a requirement. Get your own @ http://api.comicvine.com.' ) return else: comicapi = mylar.CONFIG.COMICVINE_API if type is None: type = 'volume' #let's find out how many results we get from the query... searched = pullsearch(comicapi, comicquery, 0, type) if searched is None: return False totalResults = searched.getElementsByTagName( 'number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False if int(totalResults) > 1000: logger.warn( 'Search returned more than 1000 hits [' + str(totalResults) + ']. Only displaying first 1000 results - use more specifics or the exact ComicID if required.' ) totalResults = 1000 countResults = 0 while (countResults < int(totalResults)): #logger.fdebug("querying " + str(countResults)) if countResults > 0: offsetcount = countResults searched = pullsearch(comicapi, comicquery, offsetcount, type) comicResults = searched.getElementsByTagName(type) body = '' n = 0 if not comicResults: break for result in comicResults: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: arclist = [] if type == 'story_arc': #call cv.py here to find out issue count in story arc try: logger.fdebug('story_arc ascension') names = len(result.getElementsByTagName('name')) n = 0 logger.fdebug('length: ' + str(names)) xmlpub = None #set this incase the publisher field isn't populated in the xml while (n < names): logger.fdebug( result.getElementsByTagName('name') [n].parentNode.nodeName) if result.getElementsByTagName( 'name')[n].parentNode.nodeName == 'story_arc': logger.fdebug('yes') try: xmlTag = result.getElementsByTagName( 'name')[n].firstChild.wholeText xmlTag = xmlTag.rstrip() logger.fdebug('name: ' + xmlTag) except: logger.error( 'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.' ) return elif result.getElementsByTagName( 'name')[n].parentNode.nodeName == 'publisher': logger.fdebug('publisher check.') xmlpub = result.getElementsByTagName( 'name')[n].firstChild.wholeText n += 1 except: logger.warn('error retrieving story arc search results.') return siteurl = len(result.getElementsByTagName('site_detail_url')) s = 0 logger.fdebug('length: ' + str(names)) xmlurl = None while (s < siteurl): logger.fdebug( result.getElementsByTagName('site_detail_url') [s].parentNode.nodeName) if result.getElementsByTagName('site_detail_url')[ s].parentNode.nodeName == 'story_arc': try: xmlurl = result.getElementsByTagName( 'site_detail_url')[s].firstChild.wholeText except: logger.error( 'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.' ) return s += 1 xmlid = result.getElementsByTagName( 'id')[0].firstChild.wholeText if xmlid is not None: arcinfolist = storyarcinfo(xmlid) logger.info('[IMAGE] : ' + arcinfolist['comicimage']) comiclist.append({ 'name': xmlTag, 'comicyear': arcinfolist['comicyear'], 'comicid': xmlid, 'cvarcid': xmlid, 'url': xmlurl, 'issues': arcinfolist['issues'], 'comicimage': arcinfolist['comicimage'], 'publisher': xmlpub, 'description': arcinfolist['description'], 'deck': arcinfolist['deck'], 'arclist': arcinfolist['arclist'], 'haveit': arcinfolist['haveit'] }) else: comiclist.append({ 'name': xmlTag, 'comicyear': arcyear, 'comicid': xmlid, 'url': xmlurl, 'issues': issuecount, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'arclist': arclist, 'haveit': haveit }) logger.fdebug('IssueID\'s that are a part of ' + xmlTag + ' : ' + str(arclist)) else: xmlcnt = result.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText #here we can determine what called us, and either start gathering all issues or just limited ones. if issue is not None and str(issue).isdigit(): #this gets buggered up with NEW/ONGOING series because the db hasn't been updated #to reflect the proper count. Drop it by 1 to make sure. limiter = int(issue) - 1 else: limiter = 0 #get the first issue # (for auto-magick calcs) iss_len = len(result.getElementsByTagName('name')) i = 0 xmlfirst = '1' xmllast = None try: while (i < iss_len): if result.getElementsByTagName('name')[ i].parentNode.nodeName == 'first_issue': xmlfirst = result.getElementsByTagName( 'issue_number')[i].firstChild.wholeText if '\xbd' in xmlfirst: xmlfirst = '1' #if the first issue is 1/2, just assume 1 for logistics elif result.getElementsByTagName( 'name')[i].parentNode.nodeName == 'last_issue': xmllast = result.getElementsByTagName( 'issue_number')[i].firstChild.wholeText if all([xmllast is not None, xmlfirst is not None]): break i += 1 except: xmlfirst = '1' if all( [xmlfirst == xmllast, xmlfirst.isdigit(), xmlcnt == '0']): xmlcnt = '1' #logger.info('There are : ' + str(xmlcnt) + ' issues in this series.') #logger.info('The first issue started at # ' + str(xmlfirst)) cnt_numerical = int(xmlcnt) + int( xmlfirst ) # (of issues + start of first issue = numerical range) #logger.info('The maximum issue number should be roughly # ' + str(cnt_numerical)) #logger.info('The limiter (issue max that we know of) is # ' + str(limiter)) if cnt_numerical >= limiter: cnl = len(result.getElementsByTagName('name')) cl = 0 xmlTag = 'None' xmlimage = "cache/blankcover.jpg" xml_lastissueid = 'None' while (cl < cnl): if result.getElementsByTagName( 'name')[cl].parentNode.nodeName == 'volume': xmlTag = result.getElementsByTagName( 'name')[cl].firstChild.wholeText #break if result.getElementsByTagName( 'name')[cl].parentNode.nodeName == 'image': xmlimage = result.getElementsByTagName( 'super_url')[0].firstChild.wholeText if result.getElementsByTagName('name')[ cl].parentNode.nodeName == 'last_issue': xml_lastissueid = result.getElementsByTagName( 'id')[cl].firstChild.wholeText cl += 1 if (result.getElementsByTagName('start_year')[0].firstChild ) is not None: xmlYr = result.getElementsByTagName( 'start_year')[0].firstChild.wholeText else: xmlYr = "0000" yearRange = [] tmpYr = re.sub('\?', '', xmlYr) if tmpYr.isdigit(): yearRange.append(tmpYr) tmpyearRange = int(xmlcnt) / 12 if float(tmpyearRange): tmpyearRange + 1 possible_years = int(tmpYr) + tmpyearRange for i in range(int(tmpYr), int(possible_years), 1): if not any(int(x) == int(i) for x in yearRange): yearRange.append(str(i)) logger.fdebug('[RESULT][' + str(limityear) + '] ComicName:' + xmlTag + ' -- ' + str(xmlYr) + ' [Series years: ' + str(yearRange) + ']') if tmpYr != xmlYr: xmlYr = tmpYr if any(map(lambda v: v in limityear, yearRange)) or limityear == 'None': xmlurl = result.getElementsByTagName( 'site_detail_url')[0].firstChild.wholeText idl = len(result.getElementsByTagName('id')) idt = 0 xmlid = None while (idt < idl): if result.getElementsByTagName( 'id')[idt].parentNode.nodeName == 'volume': xmlid = result.getElementsByTagName( 'id')[idt].firstChild.wholeText break idt += 1 if xmlid is None: logger.error( 'Unable to figure out the comicid - skipping this : ' + str(xmlurl)) continue publishers = result.getElementsByTagName('publisher') if len(publishers) > 0: pubnames = publishers[0].getElementsByTagName( 'name') if len(pubnames) > 0: xmlpub = pubnames[0].firstChild.wholeText else: xmlpub = "Unknown" else: xmlpub = "Unknown" #ignore specific publishers on a global scale here. if mylar.CONFIG.BLACKLISTED_PUBLISHERS is not None and any( [ x for x in mylar.CONFIG.BLACKLISTED_PUBLISHERS if x.lower() == xmlpub.lower() ]): logger.fdebug('Blacklisted publisher [' + xmlpub + ']. Ignoring this result.') continue try: xmldesc = result.getElementsByTagName( 'description')[0].firstChild.wholeText except: xmldesc = "None" #this is needed to display brief synopsis for each series on search results page. try: xmldeck = result.getElementsByTagName( 'deck')[0].firstChild.wholeText except: xmldeck = "None" xmltype = None if xmldeck != 'None': if any([ 'print' in xmldeck.lower(), 'digital' in xmldeck.lower(), 'paperback' in xmldeck.lower(), 'hardcover' in xmldeck.lower() ]): if 'print' in xmldeck.lower(): xmltype = 'Print' elif 'digital' in xmldeck.lower(): xmltype = 'Digital' elif 'paperback' in xmldeck.lower(): xmltype = 'TPB' elif 'hardcover' in xmldeck.lower(): xmltype = 'HC' if xmldesc != 'None' and xmltype is None: if 'print' in xmldesc[:60].lower( ) and 'print edition can be found' not in xmldesc.lower( ): xmltype = 'Print' elif 'digital' in xmldesc[:60].lower( ) and 'digital edition can be found' not in xmldesc.lower( ): xmltype = 'Digital' elif 'paperback' in xmldesc[:60].lower( ) and 'paperback can be found' not in xmldesc.lower( ): xmltype = 'TPB' elif 'hardcover' in xmldesc[:60].lower( ) and 'hardcover can be found' not in xmldesc.lower( ): xmltype = 'HC' else: xmltype = 'Print' if xmlid in comicLibrary: haveit = comicLibrary[xmlid] else: haveit = "No" comiclist.append({ 'name': xmlTag, 'comicyear': xmlYr, 'comicid': xmlid, 'url': xmlurl, 'issues': xmlcnt, 'comicimage': xmlimage, 'publisher': xmlpub, 'description': xmldesc, 'deck': xmldeck, 'type': xmltype, 'haveit': haveit, 'lastissueid': xml_lastissueid, 'seriesrange': yearRange # returning additional information about series run polled from CV }) #logger.fdebug('year: %s - constraint met: %s [%s] --- 4050-%s' % (xmlYr,xmlTag,xmlYr,xmlid)) else: #logger.fdebug('year: ' + str(xmlYr) + ' - contraint not met. Has to be within ' + str(limityear)) pass n += 1 #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 return comiclist