def latestdate_fix(): import db, logger datefix = [] myDB = db.DBConnection() comiclist = myDB.select('SELECT * FROM comics') if comiclist is None: logger.fdebug('No Series in watchlist to correct latest date') return for cl in comiclist: latestdate = cl['LatestDate'] #logger.fdebug("latestdate: " + str(latestdate)) if latestdate[8:] == '': #logger.fdebug("invalid date " + str(latestdate) + " appending 01 for day to avoid errors") if len(latestdate) <= 7: finddash = latestdate.find('-') #logger.info('dash found at position ' + str(finddash)) if finddash != 4: #format of mm-yyyy lat_month = latestdate[:finddash] lat_year = latestdate[finddash+1:] else: #format of yyyy-mm lat_month = latestdate[finddash+1:] lat_year = latestdate[:finddash] latestdate = (lat_year) + '-' + str(lat_month) + '-01' datefix.append({"comicid": cl['ComicID'], "latestdate": latestdate}) #logger.info('latest date: ' + str(latestdate)) #now we fix. if len(datefix) > 0: for df in datefix: newCtrl = {"ComicID": df['comicid']} newVal = {"LatestDate": df['latestdate']} myDB.upsert("comics", newVal, newCtrl) return
def LoadAlternateSearchNames(seriesname_alt, comicid): import logger #seriesname_alt = db.comics['AlternateSearch'] AS_Alt = [] Alternate_Names = {} alt_count = 0 #logger.fdebug('seriesname_alt:' + str(seriesname_alt)) if seriesname_alt is None or seriesname_alt == 'None': logger.fdebug('no Alternate name given. Aborting search.') return "no results" else: chkthealt = seriesname_alt.split('##') if chkthealt == 0: AS_Alternate = seriesname_alt AS_Alt.append(seriesname_alt) for calt in chkthealt: AS_Alter = re.sub('##','',calt) u_altsearchcomic = AS_Alter.encode('ascii', 'ignore').strip() AS_formatrem_seriesname = re.sub('\s+', ' ', u_altsearchcomic) if AS_formatrem_seriesname[:1] == ' ': AS_formatrem_seriesname = AS_formatrem_seriesname[1:] AS_Alt.append({"AlternateName": AS_formatrem_seriesname}) alt_count+=1 Alternate_Names['AlternateName'] = AS_Alt Alternate_Names['ComicID'] = comicid Alternate_Names['Count'] = alt_count #logger.info('AlternateNames returned:' + str(Alternate_Names)) return Alternate_Names
def renamefile_readingorder(readorder): import logger logger.fdebug('readingorder#: ' + str(readorder)) if int(readorder) < 10: readord = "00" + str(readorder) elif int(readorder) > 10 and int(readorder) < 99: readord = "0" + str(readorder) else: readord = str(readorder) return readord
def getComic(comicid,type,issueid=None,arc=None,arcid=None,arclist=None,comicidlist=None): if type == 'issue': offset = 1 issue = {} ndic = [] issuechoice = [] comicResults = [] firstdate = '2099-00-00' #let's find out how many results we get from the query... if comicid is None: #if comicid is None, it's coming from the story arc search results. id = arcid islist = arclist else: id = comicid islist = None searched = pulldetails(id,'issue',None,0,islist) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): logger.fdebug("querying " + str(countResults)) if countResults > 0: #new api - have to change to page # instead of offset count offsetcount = countResults searched = pulldetails(id,'issue',None,offsetcount,islist) issuechoice,tmpdate = GetIssuesInfo(id,searched,arcid) if tmpdate < firstdate: firstdate = tmpdate ndic = ndic + issuechoice #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 issue['issuechoice'] = ndic issue['firstdate'] = firstdate return issue elif type == 'comic': dom = pulldetails(comicid,'comic',None,1) return GetComicInfo(comicid,dom) elif type == 'firstissue': dom = pulldetails(comicid,'firstissue',issueid,1) return GetFirstIssue(issueid,dom) elif type == 'storyarc': dom = pulldetails(arc,'storyarc',None,1) return GetComicInfo(issueid,dom) elif type == 'comicyears': #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year. #this grabs each issue based on issueid, and then subsets the comicid for each to be used later. #set the offset to 0, since we're doing a filter. dom = pulldetails(arcid,'comicyears',offset=0,comicidlist=comicidlist) return GetSeriesYears(dom)
def listFiles(dir,watchcomic,AlternateSearch=None): # use AlternateSearch to check for filenames that follow that naming pattern # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV) logger.fdebug("comic: " + watchcomic) basedir = dir logger.fdebug("Looking in: " + dir) watchmatch = {} comiclist = [] comiccnt = 0 for item in os.listdir(basedir): #print item #subname = os.path.join(basedir, item) subname = item #print subname subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname)) modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(watchcomic)) modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip() subname = re.sub('\s+', ' ', str(subname)).strip() if AlternateSearch is not None: altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(AlternateSearch)) altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip() else: #create random characters so it will never match. altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf" #if '_' in subname: # subname = subname.replace('_', ' ') logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname)) if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower(): if 'annual' in subname.lower(): #print ("it's an annual - unsure how to proceed") continue comicpath = os.path.join(basedir, item) logger.fdebug( modwatchcomic + " - watchlist match on : " + comicpath) comicsize = os.path.getsize(comicpath) #print ("Comicsize:" + str(comicsize)) comiccnt+=1 comiclist.append({ 'ComicFilename': item, 'ComicLocation': comicpath, 'ComicSize': comicsize }) watchmatch['comiclist'] = comiclist else: pass #print ("directory found - ignoring") logger.fdebug("you have a total of " + str(comiccnt) + " " + str(watchcomic) + " comics") watchmatch['comiccount'] = comiccnt return watchmatch
def GetImportList(results): importlist = results.getElementsByTagName('issue') serieslist = [] importids = {} tempseries = {} for implist in importlist: try: totids = len(implist.getElementsByTagName('id')) idt = 0 while (idt < totids): if implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume': tempseries['ComicID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText elif implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue': tempseries['IssueID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText idt += 1 except: tempseries['ComicID'] = None try: totnames = len(implist.getElementsByTagName('name')) tot = 0 while (tot < totnames): if implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume': tempseries['ComicName'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText elif implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue': try: tempseries['Issue_Name'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText except: tempseries['Issue_Name'] = None tot += 1 except: tempseries['ComicName'] = 'None' try: tempseries['Issue_Number'] = implist.getElementsByTagName('issue_number')[0].firstChild.wholeText except: logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.') logger.info('tempseries:' + str(tempseries)) serieslist.append({"ComicID": tempseries['ComicID'], "IssueID": tempseries['IssueID'], "ComicName": tempseries['ComicName'], "Issue_Name": tempseries['Issue_Name'], "Issue_Number": tempseries['Issue_Number']}) return serieslist
def traverse_directories(dir): filelist = [] for (dirname, subs, files) in os.walk(dir): for fname in files: if dirname == dir: direc = '' else: direc = dirname filelist.append({"directory": direc, "filename": fname}) logger.fdebug('there are ' + str(len(filelist)) + ' files.') #logger.fdeubg(filelist) return filelist
def getComic(comicid,type,issueid=None,arc=None): if type == 'issue': offset = 1 issue = {} ndic = [] issuechoice = [] comicResults = [] firstdate = '2099-00-00' #let's find out how many results we get from the query... searched = pulldetails(comicid,'issue',None,0) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): logger.fdebug("querying " + str(countResults)) if countResults > 0: #new api - have to change to page # instead of offset count offsetcount = countResults searched = pulldetails(comicid,'issue',None,offsetcount) issuechoice,tmpdate = GetIssuesInfo(comicid,searched) if tmpdate < firstdate: firstdate = tmpdate ndic = ndic + issuechoice #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 issue['issuechoice'] = ndic issue['firstdate'] = firstdate return issue elif type == 'comic': dom = pulldetails(comicid,'comic',None,1) return GetComicInfo(comicid,dom) elif type == 'firstissue': dom = pulldetails(comicid,'firstissue',issueid,1) return GetFirstIssue(issueid,dom) elif type == 'storyarc': dom = pulldetails(arc,'storyarc',None,1) return GetComicInfo(issueid,dom)
def sender(self, filename, test=False): if mylar.CONFIG.NZBGET_PRIORITY: if any([ mylar.CONFIG.NZBGET_PRIORITY == 'Default', mylar.CONFIG.NZBGET_PRIORITY == 'Normal' ]): nzbgetpriority = 0 elif mylar.CONFIG.NZBGET_PRIORITY == 'Low': nzbgetpriority = -50 elif mylar.CONFIG.NZBGET_PRIORITY == 'High': nzbgetpriority = 50 #there's no priority for "paused", so set "Very Low" and deal with that later... elif mylar.CONFIG.NZBGET_PRIORITY == 'Paused': nzbgetpriority = -100 else: #if nzbget priority isn't selected, default to Normal (0) nzbgetpriority = 0 in_file = open(filename, 'r') nzbcontent = in_file.read() in_file.close() nzbcontent64 = standard_b64encode(nzbcontent) try: logger.fdebug('sending now to %s' % self.nzb_url) sendresponse = self.server.append(filename, nzbcontent64, mylar.CONFIG.NZBGET_CATEGORY, nzbgetpriority, False, False, '', 0, 'SCORE') except Exception as e: logger.warn('uh-oh: %s' % e) return {'status': False} else: if sendresponse <= 0: logger.warn( 'Invalid response received after sending to NZBGet: %s' % sendresponse) return {'status': False} else: #sendresponse is the NZBID that we use to track the progress.... return {'status': True, 'NZBID': sendresponse}
def cvapi_check(web=None): import logger if web is None: logger.fdebug('[ComicVine API] ComicVine API Check Running...') if mylar.CVAPI_TIME is None or mylar.CVAPI_TIME == '': c_date = now() c_obj_date = datetime.datetime.strptime(c_date,"%Y-%m-%d %H:%M:%S") mylar.CVAPI_TIME = c_obj_date else: if isinstance(mylar.CVAPI_TIME, unicode): c_obj_date = datetime.datetime.strptime(mylar.CVAPI_TIME,"%Y-%m-%d %H:%M:%S") else: c_obj_date = mylar.CVAPI_TIME if web is None: logger.fdebug('[ComicVine API] API Start Monitoring Time (~15mins): ' + str(mylar.CVAPI_TIME)) now_date = now() n_date = datetime.datetime.strptime(now_date,"%Y-%m-%d %H:%M:%S") if web is None: logger.fdebug('[ComicVine API] Time now: ' + str(n_date)) absdiff = abs(n_date - c_obj_date) mins = round(((absdiff.days * 24 * 60 * 60 + absdiff.seconds) / 60.0),2) if mins < 15: if web is None: logger.info('[ComicVine API] Comicvine API count now at : ' + str(mylar.CVAPI_COUNT) + ' / ' + str(mylar.CVAPI_MAX) + ' in ' + str(mins) + ' minutes.') if mylar.CVAPI_COUNT > mylar.CVAPI_MAX: cvleft = 15 - mins if web is None: logger.warn('[ComicVine API] You have already hit your API limit (' + str(mylar.CVAPI_MAX) + ' with ' + str(cvleft) + ' minutes. Best be slowing down, cowboy.') elif mins > 15: mylar.CVAPI_COUNT = 0 c_date = now() mylar.CVAPI_TIME = datetime.datetime.strptime(c_date,"%Y-%m-%d %H:%M:%S") if web is None: logger.info('[ComicVine API] 15 minute API interval resetting [' + str(mylar.CVAPI_TIME) + ']. Resetting API count to : ' + str(mylar.CVAPI_COUNT)) if web is None: return else: line = str(mylar.CVAPI_COUNT) + ' hits / ' + str(mins) + ' minutes' return line
def getComic(comicid,type,issueid=None): if type == 'issue': offset = 1 issue = {} ndic = [] issuechoice = [] comicResults = [] firstdate = '2099-00-00' #let's find out how many results we get from the query... searched = pulldetails(comicid,'issue',None,0) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): logger.fdebug("querying " + str(countResults)) if countResults > 0: #new api - have to change to page # instead of offset count offsetcount = countResults searched = pulldetails(comicid,'issue',None,offsetcount) issuechoice,tmpdate = GetIssuesInfo(comicid,searched) if tmpdate < firstdate: firstdate = tmpdate ndic = ndic + issuechoice #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 issue['issuechoice'] = ndic issue['firstdate'] = firstdate return issue elif type == 'comic': dom = pulldetails(comicid,'comic',None,1) return GetComicInfo(comicid,dom) elif type == 'firstissue': dom = pulldetails(comicid,'firstissue',issueid,1) return GetFirstIssue(issueid,dom)
def sender(self, filename, test=False): if mylar.CONFIG.NZBGET_PRIORITY: if any([mylar.CONFIG.NZBGET_PRIORITY == 'Default', mylar.CONFIG.NZBGET_PRIORITY == 'Normal']): nzbgetpriority = 0 elif mylar.CONFIG.NZBGET_PRIORITY == 'Low': nzbgetpriority = -50 elif mylar.CONFIG.NZBGET_PRIORITY == 'High': nzbgetpriority = 50 #there's no priority for "paused", so set "Very Low" and deal with that later... elif mylar.CONFIG.NZBGET_PRIORITY == 'Paused': nzbgetpriority = -100 else: #if nzbget priority isn't selected, default to Normal (0) nzbgetpriority = 0 in_file = open(filename, 'r') nzbcontent = in_file.read() in_file.close() nzbcontent64 = standard_b64encode(nzbcontent) try: logger.fdebug('sending now to %s' % self.nzb_url) if mylar.CONFIG.NZBGET_CATEGORY is None: nzb_category = '' else: nzb_category = mylar.CONFIG.NZBGET_CATEGORY sendresponse = self.server.append(filename, nzbcontent64, nzb_category, nzbgetpriority, False, False, '', 0, 'SCORE') except Exception as e: logger.warn('uh-oh: %s' % e) return {'status': False} else: if sendresponse <= 0: logger.warn('Invalid response received after sending to NZBGet: %s' % sendresponse) return {'status': False} else: #sendresponse is the NZBID that we use to track the progress.... return {'status': True, 'NZBID': sendresponse}
def latestdate_fix(): import db, logger datefix = [] myDB = db.DBConnection() comiclist = myDB.action('SELECT * FROM comics') if comiclist is None: logger.fdebug('No Series in watchlist to correct latest date') return for cl in comiclist: latestdate = cl['LatestDate'] #logger.fdebug("latestdate: " + str(latestdate)) if latestdate[8:] == '': #logger.fdebug("invalid date " + str(latestdate) + " appending 01 for day to avoid errors") if len(latestdate) <= 7: finddash = latestdate.find('-') #logger.info('dash found at position ' + str(finddash)) if finddash != 4: #format of mm-yyyy lat_month = latestdate[:finddash] lat_year = latestdate[finddash + 1:] else: #format of yyyy-mm lat_month = latestdate[finddash + 1:] lat_year = latestdate[:finddash] latestdate = (lat_year) + '-' + str(lat_month) + '-01' datefix.append({ "comicid": cl['ComicID'], "latestdate": latestdate }) #logger.info('latest date: ' + str(latestdate)) #now we fix. if len(datefix) > 0: for df in datefix: newCtrl = {"ComicID": df['comicid']} newVal = {"LatestDate": df['latestdate']} myDB.upsert("comics", newVal, newCtrl) return
def GetIssuesInfo(comicid,dom): subtracks = dom.getElementsByTagName('issue') if not mylar.CV_ONLY: cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText logger.fdebug("issues I've counted: " + str(len(subtracks))) logger.fdebug("issues CV says it has: " + str(int(cntiss))) if int(len(subtracks)) != int(cntiss): logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks))) cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count cntiss = int(cntiss) n = cntiss-1 else: n = int(len(subtracks)) tempissue = {} issuech = [] firstdate = '2099-00-00' for subtrack in subtracks: if not mylar.CV_ONLY: if (dom.getElementsByTagName('name')[n].firstChild) is not None: issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText else: issue['Issue_Name'] = 'None' issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText issuech.append({ 'Issue_ID': issue['Issue_ID'], 'Issue_Number': issue['Issue_Number'], 'Issue_Name': issue['Issue_Name'] }) else: try: tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[0].firstChild.wholeText except: tempissue['Issue_Name'] = 'None' tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[0].firstChild.wholeText try: tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText except: tempissue['CoverDate'] = '0000-00-00' tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText issuech.append({ 'Issue_ID': tempissue['Issue_ID'], 'Issue_Number': tempissue['Issue_Number'], 'Issue_Date': tempissue['CoverDate'], 'Issue_Name': tempissue['Issue_Name'] }) if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00': firstdate = tempissue['CoverDate'] n-=1 #issue['firstdate'] = firstdate return issuech, firstdate
def GetComicInfo(comicid,dom): #comicvine isn't as up-to-date with issue counts.. #so this can get really buggered, really fast. tracks = dom.getElementsByTagName('issue') try: cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText except: cntit = len(tracks) trackcnt = len(tracks) logger.fdebug("number of issues I counted: " + str(trackcnt)) logger.fdebug("number of issues CV says it has: " + str(cntit)) # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason if int(trackcnt) != int(cntit): cntit = trackcnt vari = "yes" else: vari = "no" logger.fdebug("vari is set to: " + str(vari)) #if str(trackcnt) != str(int(cntit)+2): # cntit = int(cntit) + 1 comic = {} comicchoice = [] cntit = int(cntit) #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: comic['ComicName'] = dom.getElementsByTagName('name')[trackcnt].firstChild.wholeText comic['ComicName'] = comic['ComicName'].rstrip() comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[0].firstChild.wholeText if vari == "yes": comic['ComicIssues'] = str(cntit) else: comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText comic['ComicPublisher'] = dom.getElementsByTagName('name')[trackcnt+1].firstChild.wholeText comicchoice.append({ 'ComicName': comic['ComicName'], 'ComicYear': comic['ComicYear'], 'Comicid': comicid, 'ComicURL': comic['ComicURL'], 'ComicIssues': comic['ComicIssues'], 'ComicImage': comic['ComicImage'], 'ComicPublisher': comic['ComicPublisher'] }) comic['comicchoice'] = comicchoice return comic
def checkthepub(ComicID): import db, logger myDB = db.DBConnection() publishers = ['marvel', 'dc', 'darkhorse'] pubchk = myDB.selectone("SELECT * FROM comics WHERE ComicID=?", [ComicID]).fetchone() if pubchk is None: logger.fdebug('No publisher information found to aid in determining series..defaulting to base check of 55 days.') return mylar.BIGGIE_PUB else: for publish in publishers: if publish in str(pubchk['ComicPublisher']).lower(): logger.fdebug('Biggie publisher detected - ' + str(pubchk['ComicPublisher'])) return mylar.BIGGIE_PUB logger.fdebug('Indie publisher detected - ' + str(pubchk['ComicPublisher'])) return mylar.INDIE_PUB
def worker(self): myDB = DBConnection() #this should be in it's own thread somewhere, constantly polling the queue and sending them to the writer. logger.fdebug('worker started.') while True: thisthread = threading.currentThread().name if not mylarQueue.empty(): # Rename the main thread logger.fdebug('[' + str(thisthread) + '] queue is not empty yet...') (QtableName, QvalueDict, QkeyDict) = mylarQueue.get(block=True, timeout=None) logger.fdebug('[REQUEUE] Table: ' + str(QtableName) + ' values: ' + str(QvalueDict) + ' keys: ' + str(QkeyDict)) sqlResult = myDB.upsert(QtableName, QvalueDict, QkeyDict) if sqlResult: mylarQueue.task_done() return sqlResult else: time.sleep(1)
def historycheck(self, nzbid): history = self.server.history() found = False hq = [hs for hs in history if hs['NZBID'] == nzbid and 'SUCCESS' in hs['Status']] if len(hq) > 0: logger.fdebug('found matching completed item in history. Job has a status of %s' % hq[0]['Status']) if hq[0]['DownloadedSizeMB'] == hq[0]['FileSizeMB']: logger.fdebug('%s has final file size of %sMB' % (hq[0]['Name'], hq[0]['DownloadedSizeMB'])) if os.path.isdir(hq[0]['DestDir']): logger.fdebug('location found @ %s' % hq[0]['DestDir']) return {'status': True, 'name': re.sub('.nzb', '', hq[0]['NZBName']).strip(), 'location': hq[0]['DestDir'], 'failed': False} else: logger.warn('no file found where it should be @ %s - is there another script that moves things after completion ?' % hq[0]['DestDir']) return {'status': False} else: logger.warn('Could not find completed item in history') return {'status': False}
def GetIssuesInfo(comicid,dom): subtracks = dom.getElementsByTagName('issue') cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText logger.fdebug("issues I've counted: " + str(len(subtracks))) logger.fdebug("issues CV says it has: " + str(int(cntiss))) if int(len(subtracks)) != int(cntiss): logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks))) cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count cntiss = int(cntiss) n = cntiss-1 issue = {} issuechoice = [] for subtrack in subtracks: if (dom.getElementsByTagName('name')[n].firstChild) is not None: issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText else: issue['Issue_Name'] = 'None' issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText try: issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText issuechoice.append({ 'Issue_ID': issue['Issue_ID'], 'Issue_Number': issue['Issue_Number'], 'Issue_Name': issue['Issue_Name'] }) issue['issuechoice'] = issuechoice except: #logger.fdebug("publisher...ignoring this.") #logger.fdebug("n value: " + str(n) + " ...subtracks: " + str(len(subtracks))) # in order to get ALL the issues, we need to increment the count back by 1 so it grabs the # last issue pass n-=1 return issue
def historycheck(self, nzbinfo): nzbid = nzbinfo['NZBID'] history = self.server.history(True) found = False destdir = None double_pp = False hq = [ hs for hs in history if hs['NZBID'] == nzbid and ( 'SUCCESS' in hs['Status'] or ('COPY' in hs['Status'])) ] if len(hq) > 0: logger.fdebug( 'found matching completed item in history. Job has a status of %s' % hq[0]['Status']) if len(hq[0]['ScriptStatuses']) > 0: for x in hq[0]['ScriptStatuses']: if 'comicrn' in x['Name'].lower(): double_pp = True break if all([len(hq[0]['Parameters']) > 0, double_pp is False]): for x in hq[0]['Parameters']: if all( ['comicrn' in x['Name'].lower(), x['Value'] == 'yes']): double_pp = True break if double_pp is True: logger.warn( 'ComicRN has been detected as being active for this category & download. Completed Download Handling will NOT be performed due to this.' ) logger.warn( 'Either disable Completed Download Handling for NZBGet within Mylar, or remove ComicRN from your category script in NZBGet.' ) return {'status': 'double-pp', 'failed': False} if all([ 'SUCCESS' in hq[0]['Status'], (hq[0]['FileSizeMB'] * .95) <= hq[0]['DownloadedSizeMB'] <= (hq[0]['FileSizeMB'] * 1.05) ]): logger.fdebug('%s has final file size of %sMB' % (hq[0]['Name'], hq[0]['DownloadedSizeMB'])) if os.path.isdir(hq[0]['DestDir']): destdir = hq[0]['DestDir'] logger.fdebug('location found @ %s' % destdir) elif all([ 'COPY' in hq[0]['Status'], int(hq[0]['FileSizeMB']) > 0, hq[0]['DeleteStatus'] == 'COPY' ]): config = self.server.config() cDestDir = None for x in config: if x['Name'] == 'TempDir': cTempDir = x['Value'] elif x['Name'] == 'DestDir': cDestDir = x['Value'] if cDestDir is not None: break if cTempDir in hq[0]['DestDir']: destdir2 = re.sub(cTempDir, cDestDir, hq[0]['DestDir']).strip() if not destdir2.endswith(os.sep): destdir2 = destdir2 + os.sep destdir = os.path.join(destdir2, hq[0]['Name']) logger.fdebug('NZBGET Destination dir set to: %s' % destdir) else: logger.warn( 'no file found where it should be @ %s - is there another script that moves things after completion ?' % hq[0]['DestDir']) return {'status': 'file not found', 'failed': False} if mylar.CONFIG.NZBGET_DIRECTORY is not None: destdir2 = mylar.CONFIG.NZBGET_DIRECTORY if not destdir2.endswith(os.sep): destdir = destdir2 + os.sep destdir = os.path.join(destdir2, hq[0]['Name']) logger.fdebug( 'NZBGet Destination folder set via config to: %s' % destdir) if destdir is not None: return { 'status': True, 'name': re.sub('.nzb', '', hq[0]['Name']).strip(), 'location': destdir, 'failed': False, 'issueid': nzbinfo['issueid'], 'comicid': nzbinfo['comicid'], 'apicall': True, 'ddl': False } else: logger.warn('Could not find completed NZBID %s in history' % nzbid) return {'status': False}
def GetComicInfo(comicid, dom, safechk=None): if safechk is None: #safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort. safechk = 1 elif safechk > 4: logger.error( 'Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.' ) return #comicvine isn't as up-to-date with issue counts.. #so this can get really buggered, really fast. tracks = dom.getElementsByTagName('issue') try: cntit = dom.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText except: cntit = len(tracks) trackcnt = len(tracks) logger.fdebug("number of issues I counted: " + str(trackcnt)) logger.fdebug("number of issues CV says it has: " + str(cntit)) # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason if int(trackcnt) != int(cntit): cntit = trackcnt vari = "yes" else: vari = "no" logger.fdebug("vari is set to: " + str(vari)) #if str(trackcnt) != str(int(cntit)+2): # cntit = int(cntit) + 1 comic = {} comicchoice = [] cntit = int(cntit) #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName # where [0] denotes the number of the name field(s) # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue try: names = len(dom.getElementsByTagName('name')) n = 0 comic[ 'ComicPublisher'] = 'Unknown' #set this to a default value here so that it will carry through properly while (n < names): if dom.getElementsByTagName( 'name')[n].parentNode.nodeName == 'results': try: comic['ComicName'] = dom.getElementsByTagName( 'name')[n].firstChild.wholeText comic['ComicName'] = comic['ComicName'].rstrip() except: logger.error( 'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.' ) return elif dom.getElementsByTagName( 'name')[n].parentNode.nodeName == 'publisher': try: comic['ComicPublisher'] = dom.getElementsByTagName( 'name')[n].firstChild.wholeText except: comic['ComicPublisher'] = "Unknown" n += 1 except: logger.warn( 'Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible' ) return try: comic['ComicYear'] = dom.getElementsByTagName( 'start_year')[0].firstChild.wholeText except: comic['ComicYear'] = '0000' #safety check, cause you known, dufus'... if comic['ComicYear'][-1:] == '-': comic['ComicYear'] = comic['ComicYear'][:-1] try: comic['ComicURL'] = dom.getElementsByTagName( 'site_detail_url')[trackcnt].firstChild.wholeText except: #this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry. logger.warn( 'Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.' ) time.sleep(10) safechk += 1 GetComicInfo(comicid, dom, safechk) desdeck = 0 #the description field actually holds the Volume# - so let's grab it try: descchunk = dom.getElementsByTagName( 'description')[0].firstChild.wholeText comic_desc = drophtml(descchunk) desdeck += 1 except: comic_desc = 'None' #sometimes the deck has volume labels try: deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText comic_deck = deckchunk desdeck += 1 except: comic_deck = 'None' #comic['ComicDescription'] = comic_desc try: comic['Aliases'] = dom.getElementsByTagName( 'aliases')[0].firstChild.wholeText comic['Aliases'] = re.sub('\n', '##', comic['Aliases']).strip() if comic['Aliases'][-2:] == '##': comic['Aliases'] = comic['Aliases'][:-2] #logger.fdebug('Aliases: ' + str(aliases)) except: comic['Aliases'] = 'None' comic['ComicVersion'] = 'None' #noversion' #logger.info('comic_desc:' + comic_desc) #logger.info('comic_deck:' + comic_deck) #logger.info('desdeck: ' + str(desdeck)) #figure out if it's a print / digital edition. comic['Type'] = 'None' if comic_deck != 'None': if any( ['print' in comic_deck.lower(), 'digital' in comic_deck.lower()]): if 'print' in comic_deck.lower(): comic['Type'] = 'Print' elif 'digital' in comic_deck.lower(): comic['Type'] = 'Digital' if comic_desc != 'None' and comic['Type'] == 'None': if 'print' in comic_desc[:60].lower( ) and 'print edition can be found' not in comic_desc.lower(): comic['Type'] = 'Print' elif 'digital' in comic_desc[:60].lower( ) and 'digital edition can be found' not in comic_desc.lower(): comic['Type'] = 'Digital' else: comic['Type'] = 'Print' while (desdeck > 0): if desdeck == 1: if comic_desc == 'None': comicDes = comic_deck[:30] else: #extract the first 60 characters comicDes = comic_desc[:60].replace('New 52', '') elif desdeck == 2: #extract the characters from the deck comicDes = comic_deck[:30].replace('New 52', '') else: break i = 0 while (i < 2): if 'volume' in comicDes.lower(): #found volume - let's grab it. v_find = comicDes.lower().find('volume') #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #) #increased to 10 to allow for text numbering (+5 max) #sometimes it's volume 5 and ocassionally it's fifth volume. if i == 0: vfind = comicDes[v_find:v_find + 15] #if it's volume 5 format basenums = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('volume X format - ' + str(i) + ': ' + vfind) else: vfind = comicDes[:v_find] # if it's fifth volume format basenums = { 'zero': '0', 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'nineth': '9', 'tenth': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('X volume format - ' + str(i) + ': ' + vfind) volconv = '' for nums in basenums: if nums in vfind.lower(): sconv = basenums[nums] vfind = re.sub(nums, sconv, vfind.lower()) break #logger.info('volconv: ' + str(volconv)) #now we attempt to find the character position after the word 'volume' if i == 0: volthis = vfind.lower().find('volume') volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit vfind = vfind[volthis:volthis + 4] # grab the next 4 characters ;) elif i == 1: volthis = vfind.lower().find('volume') vfind = vfind[volthis - 4:volthis] # grab the next 4 characters ;) if '(' in vfind: #bracket detected in versioning' vfindit = re.findall('[^()]+', vfind) vfind = vfindit[0] vf = re.findall('[^<>]+', vfind) try: ledigit = re.sub("[^0-9]", "", vf[0]) if ledigit != '': comic['ComicVersion'] = ledigit logger.fdebug( "Volume information found! Adding to series record : volume " + comic['ComicVersion']) break except: pass i += 1 else: i += 1 if comic['ComicVersion'] == 'None': logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion'])) desdeck -= 1 else: break if vari == "yes": comic['ComicIssues'] = str(cntit) else: comic['ComicIssues'] = dom.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText comic['ComicImage'] = dom.getElementsByTagName( 'super_url')[0].firstChild.wholeText comic['ComicImageALT'] = dom.getElementsByTagName( 'small_url')[0].firstChild.wholeText comic['FirstIssueID'] = dom.getElementsByTagName( 'id')[0].firstChild.wholeText # print ("fistIss:" + str(comic['FirstIssueID'])) # comicchoice.append({ # 'ComicName': comic['ComicName'], # 'ComicYear': comic['ComicYear'], # 'Comicid': comicid, # 'ComicURL': comic['ComicURL'], # 'ComicIssues': comic['ComicIssues'], # 'ComicImage': comic['ComicImage'], # 'ComicVolume': ParseVol, # 'ComicPublisher': comic['ComicPublisher'] # }) # comic['comicchoice'] = comicchoice return comic
def GCDAdd(gcdcomicid): serieschoice = [] series = {} logger.fdebug("I'm trying to find these GCD comicid's:" + str(gcdcomicid)) for gcdid in gcdcomicid: logger.fdebug("looking at gcdid:" + str(gcdid)) input2 = 'http://www.comics.org/series/' + str(gcdid) logger.fdebug("---url: " + str(input2)) resp = urllib2.urlopen (input2) soup = BeautifulSoup (resp) logger.fdebug("SeriesName section...") parsen = soup.find("span", {"id": "series_name"}) #logger.fdebug("series name (UNPARSED): " + str(parsen)) subpar = parsen('a')[0] resultName = subpar.findNext(text=True) logger.fdebug("ComicName: " + str(resultName)) #covers-start logger.fdebug("Covers section...") coverst = soup.find("div", {"id": "series_cover"}) if coverst < 0: gcdcover = "None" logger.fdebug("unable to find any covers - setting to None") else: subcoverst = coverst('img', src=True)[0] #logger.fdebug("cover (UNPARSED) : " + str(subcoverst)) gcdcover = subcoverst['src'] logger.fdebug("Cover: " + str(gcdcover)) #covers end #publisher start logger.fdebug("Publisher section...") try: pubst = soup.find("div", {"class": "item_data"}) catchit = pubst('a')[0] except (IndexError, TypeError): pubst = soup.findAll("div", {"class": "left"})[1] catchit = pubst.find("a") publisher = catchit.findNext(text=True) logger.fdebug("Publisher: " + str(publisher)) #publisher end parsed = soup.find("div", {"id": "series_data"}) #logger.fdebug("series_data: " + str(parsed)) #print ("parse:" + str(parsed)) subtxt3 = parsed.find("dd", {"id": "publication_dates"}) #logger.fdebug("publication_dates: " + str(subtxt3)) pubdate = subtxt3.findNext(text=True).rstrip() logger.fdebug("pubdate:" + str(pubdate)) subtxt4 = parsed.find("dd", {"id": "issues_published"}) noiss = subtxt4.findNext(text=True) lenwho = len(noiss) lent = noiss.find(' ', 2) lenf = noiss.find('(') stringit = noiss[lenf:lenwho] stringout = noiss[:lent] noissues = stringout.rstrip(' \t\r\n\0') numbering = stringit.rstrip(' \t\r\n\0') logger.fdebug("noissues:" + str(noissues)) logger.fdebug("numbering:" + str(numbering)) serieschoice.append({ "ComicID": gcdid, "ComicName": resultName, "ComicYear": pubdate, "ComicIssues": noissues, "ComicPublisher": publisher, "ComicCover": gcdcover }) series['serieschoice'] = serieschoice return series
def havetotals(refreshit=None): import db, logger comics = [] myDB = db.DBConnection() if refreshit is None: comiclist = myDB.select('SELECT * from comics order by ComicSortName COLLATE NOCASE') else: comiclist = [] comicref = myDB.selectone("SELECT * from comics WHERE ComicID=?", [refreshit]).fetchone() #refreshit is the ComicID passed from the Refresh Series to force/check numerical have totals comiclist.append({"ComicID": comicref[0], "Have": comicref[7], "Total": comicref[8]}) for comic in comiclist: issue = myDB.selectone("SELECT COUNT(*) as count FROM issues WHERE ComicID=?", [comic['ComicID']]).fetchone() if issue is None: if refreshit is not None: logger.fdebug(str(comic['ComicID']) + ' has no issuedata available. Forcing complete Refresh/Rescan') return True else: continue if mylar.ANNUALS_ON: annuals_on = True annual = myDB.selectone("SELECT COUNT(*) as count FROM annuals WHERE ComicID=?", [comic['ComicID']]).fetchone() annualcount = annual[0] if not annualcount: annualcount = 0 else: annuals_on = False annual = None annualcount = 0 try: totalissues = comic['Total'] + annualcount haveissues = comic['Have'] except TypeError: logger.warning('[Warning] ComicID: ' + str(comic['ComicID']) + ' is incomplete - Removing from DB. You should try to re-add the series.') myDB.action("DELETE from COMICS WHERE ComicID=? AND ComicName LIKE 'Comic ID%'", [comic['ComicID']]) myDB.action("DELETE from ISSUES WHERE ComicID=? AND ComicName LIKE 'Comic ID%'", [comic['ComicID']]) continue if not haveissues: havetracks = 0 if refreshit is not None: if haveissues > totalissues: return True # if it's 5/4, send back to updater and don't restore previous status' else: return False # if it's 5/5 or 4/5, send back to updater and restore previous status' try: percent = (haveissues*100.0)/totalissues if percent > 100: percent = 100 except (ZeroDivisionError, TypeError): percent = 0 totalissuess = '?' if comic['ComicPublished'] is None or comic['ComicPublished'] == '': recentstatus = 'Unknown' elif comic['ForceContinuing'] == 1: recentstatus = 'Continuing' elif 'present' in comic['ComicPublished'].lower() or ( today()[:4] in comic['LatestDate']): latestdate = comic['LatestDate'] c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1) n_date = datetime.date.today() recentchk = (n_date - c_date).days if recentchk < 55: recentstatus = 'Continuing' else: recentstatus = 'Ended' else: recentstatus = 'Ended' comics.append({"ComicID": comic['ComicID'], "ComicName": comic['ComicName'], "ComicSortName": comic['ComicSortName'], "ComicPublisher": comic['ComicPublisher'], "ComicYear": comic['ComicYear'], "ComicImage": comic['ComicImage'], "LatestIssue": comic['LatestIssue'], "LatestDate": comic['LatestDate'], "ComicPublished": comic['ComicPublished'], "Status": comic['Status'], "recentstatus": recentstatus, "percent": percent, "totalissues": totalissues, "haveissues": haveissues, "DateAdded": comic['LastUpdated']}) return comics
def updateComicLocation(): import db, logger myDB = db.DBConnection() if mylar.NEWCOM_DIR is not None: logger.info('Performing a one-time mass update to Comic Location') #create the root dir if it doesn't exist if os.path.isdir(mylar.NEWCOM_DIR): logger.info('Directory (' + mylar.NEWCOM_DIR + ') already exists! Continuing...') else: logger.info('Directory does not exist!') try: os.makedirs(mylar.NEWCOM_DIR) logger.info('Directory successfully created at: ' + mylar.NEWCOM_DIR) except OSError: logger.error('Could not create comicdir : ' + mylar.NEWCOM_DIR) return dirlist = myDB.select("SELECT * FROM comics") if dirlist is not None: for dl in dirlist: comversion = dl['ComicVersion'] if comversion is None: comversion = 'None' #if comversion is None, remove it so it doesn't populate with 'None' if comversion == 'None': chunk_f_f = re.sub('\$VolumeN','',mylar.FOLDER_FORMAT) chunk_f = re.compile(r'\s+') folderformat = chunk_f.sub(' ', chunk_f_f) else: folderformat = mylar.FOLDER_FORMAT #remove all 'bad' characters from the Series Name in order to create directories. u_comicnm = dl['ComicName'] u_comicname = u_comicnm.encode('ascii', 'ignore').strip() if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname: comicdir = u_comicname if ':' in comicdir: comicdir = comicdir.replace(':','') if '/' in comicdir: comicdir = comicdir.replace('/','-') if ',' in comicdir: comicdir = comicdir.replace(',','') if '?' in comicdir: comicdir = comicdir.replace('?','') else: comicdir = u_comicname values = {'$Series': comicdir, '$Publisher': re.sub('!','',dl['ComicPublisher']), '$Year': dl['ComicYear'], '$series': dl['ComicName'].lower(), '$publisher': re.sub('!','',dl['ComicPublisher']).lower(), '$VolumeY': 'V' + str(dl['ComicYear']), '$VolumeN': comversion } if mylar.FFTONEWCOM_DIR: #if this is enabled (1) it will apply the Folder_Format to all the new dirs if mylar.FOLDER_FORMAT == '': comlocation = re.sub(mylar.DESTINATION_DIR, mylar.NEWCOM_DIR, comicdir) else: first = replace_all(folderformat, values) if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot first = first.replace(' ', mylar.REPLACE_CHAR) comlocation = os.path.join(mylar.NEWCOM_DIR,first) else: comlocation = re.sub(mylar.DESTINATION_DIR, mylar.NEWCOM_DIR, comicdir) ctrlVal = {"ComicID": dl['ComicID']} newVal = {"ComicLocation": comlocation} myDB.upsert("Comics", newVal, ctrlVal) logger.fdebug('updated ' + dl['ComicName'] + ' to : ' + comlocation) #set the value to 0 here so we don't keep on doing this... mylar.LOCMOVE = 0 mylar.config_write() else: logger.info('No new ComicLocation path specified - not updating.') #raise cherrypy.HTTPRedirect("config") return
def GetIssuesInfo(comicid, dom, arcid=None): subtracks = dom.getElementsByTagName('issue') if not mylar.CONFIG.CV_ONLY: cntiss = dom.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText logger.fdebug("issues I've counted: " + str(len(subtracks))) logger.fdebug("issues CV says it has: " + str(int(cntiss))) if int(len(subtracks)) != int(cntiss): logger.fdebug( "CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks))) cntiss = len( subtracks ) # assume count of issues is wrong, go with ACTUAL physical api count cntiss = int(cntiss) n = cntiss - 1 else: n = int(len(subtracks)) tempissue = {} issuech = [] firstdate = '2099-00-00' for subtrack in subtracks: if not mylar.CONFIG.CV_ONLY: if (dom.getElementsByTagName('name')[n].firstChild) is not None: issue['Issue_Name'] = dom.getElementsByTagName( 'name')[n].firstChild.wholeText else: issue['Issue_Name'] = 'None' issue['Issue_ID'] = dom.getElementsByTagName( 'id')[n].firstChild.wholeText issue['Issue_Number'] = dom.getElementsByTagName( 'issue_number')[n].firstChild.wholeText issuech.append({ 'Issue_ID': issue['Issue_ID'], 'Issue_Number': issue['Issue_Number'], 'Issue_Name': issue['Issue_Name'] }) else: try: totnames = len(subtrack.getElementsByTagName('name')) tot = 0 while (tot < totnames): if subtrack.getElementsByTagName( 'name')[tot].parentNode.nodeName == 'volume': tempissue['ComicName'] = subtrack.getElementsByTagName( 'name')[tot].firstChild.wholeText elif subtrack.getElementsByTagName( 'name')[tot].parentNode.nodeName == 'issue': try: tempissue[ 'Issue_Name'] = subtrack.getElementsByTagName( 'name')[tot].firstChild.wholeText except: tempissue['Issue_Name'] = None tot += 1 except: tempissue['ComicName'] = 'None' try: totids = len(subtrack.getElementsByTagName('id')) idt = 0 while (idt < totids): if subtrack.getElementsByTagName( 'id')[idt].parentNode.nodeName == 'volume': tempissue['Comic_ID'] = subtrack.getElementsByTagName( 'id')[idt].firstChild.wholeText elif subtrack.getElementsByTagName( 'id')[idt].parentNode.nodeName == 'issue': tempissue['Issue_ID'] = subtrack.getElementsByTagName( 'id')[idt].firstChild.wholeText idt += 1 except: tempissue['Issue_Name'] = 'None' try: tempissue['CoverDate'] = subtrack.getElementsByTagName( 'cover_date')[0].firstChild.wholeText except: tempissue['CoverDate'] = '0000-00-00' try: tempissue['StoreDate'] = subtrack.getElementsByTagName( 'store_date')[0].firstChild.wholeText except: tempissue['StoreDate'] = '0000-00-00' try: tempissue['Issue_Number'] = subtrack.getElementsByTagName( 'issue_number')[0].firstChild.wholeText except: logger.fdebug( 'No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.' ) try: tempissue['ComicImage'] = subtrack.getElementsByTagName( 'small_url')[0].firstChild.wholeText except: tempissue['ComicImage'] = 'None' try: tempissue['ComicImageALT'] = subtrack.getElementsByTagName( 'medium_url')[0].firstChild.wholeText except: tempissue['ComicImageALT'] = 'None' if arcid is None: issuech.append({ 'Comic_ID': comicid, 'Issue_ID': tempissue['Issue_ID'], 'Issue_Number': tempissue['Issue_Number'], 'Issue_Date': tempissue['CoverDate'], 'Store_Date': tempissue['StoreDate'], 'Issue_Name': tempissue['Issue_Name'], 'Image': tempissue['ComicImage'], 'ImageALT': tempissue['ComicImageALT'] }) else: issuech.append({ 'ArcID': arcid, 'ComicName': tempissue['ComicName'], 'ComicID': tempissue['Comic_ID'], 'IssueID': tempissue['Issue_ID'], 'Issue_Number': tempissue['Issue_Number'], 'Issue_Date': tempissue['CoverDate'], 'Store_Date': tempissue['StoreDate'], 'Issue_Name': tempissue['Issue_Name'] }) if tempissue['CoverDate'] < firstdate and tempissue[ 'CoverDate'] != '0000-00-00': firstdate = tempissue['CoverDate'] n -= 1 #issue['firstdate'] = firstdate return issuech, firstdate
def GetIssuesInfo(comicid,dom,arcid=None): subtracks = dom.getElementsByTagName('issue') if not mylar.CV_ONLY: cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText logger.fdebug("issues I've counted: " + str(len(subtracks))) logger.fdebug("issues CV says it has: " + str(int(cntiss))) if int(len(subtracks)) != int(cntiss): logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks))) cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count cntiss = int(cntiss) n = cntiss-1 else: n = int(len(subtracks)) tempissue = {} issuech = [] firstdate = '2099-00-00' for subtrack in subtracks: if not mylar.CV_ONLY: if (dom.getElementsByTagName('name')[n].firstChild) is not None: issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText else: issue['Issue_Name'] = 'None' issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText issuech.append({ 'Issue_ID': issue['Issue_ID'], 'Issue_Number': issue['Issue_Number'], 'Issue_Name': issue['Issue_Name'] }) else: try: totnames = len( subtrack.getElementsByTagName('name') ) tot = 0 while (tot < totnames): if subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume': tempissue['ComicName'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText elif subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue': try: tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText except: tempissue['Issue_Name'] = None tot+=1 except: tempissue['ComicName'] = 'None' try: totids = len( subtrack.getElementsByTagName('id') ) idt = 0 while (idt < totids): if subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume': tempissue['Comic_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText elif subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue': tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText idt+=1 except: tempissue['Issue_Name'] = 'None' try: tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText except: tempissue['CoverDate'] = '0000-00-00' try: tempissue['StoreDate'] = subtrack.getElementsByTagName('store_date')[0].firstChild.wholeText except: tempissue['StoreDate'] = '0000-00-00' try: tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText except: logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.') if arcid is None: issuech.append({ 'Comic_ID': comicid, 'Issue_ID': tempissue['Issue_ID'], 'Issue_Number': tempissue['Issue_Number'], 'Issue_Date': tempissue['CoverDate'], 'Store_Date': tempissue['StoreDate'], 'Issue_Name': tempissue['Issue_Name'] }) else: issuech.append({ 'ArcID': arcid, 'ComicName': tempissue['ComicName'], 'ComicID': tempissue['Comic_ID'], 'IssueID': tempissue['Issue_ID'], 'Issue_Number': tempissue['Issue_Number'], 'Issue_Date': tempissue['CoverDate'], 'Store_Date': tempissue['StoreDate'], 'Issue_Name': tempissue['Issue_Name'] }) if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00': firstdate = tempissue['CoverDate'] n-=1 #issue['firstdate'] = firstdate return issuech, firstdate
'JusttheDigits': justthedigits }) print('appended.') else: comiclist.append({ 'ComicFilename': item, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'JusttheDigits': justthedigits }) watchmatch['comiclist'] = comiclist else: pass #print ("directory found - ignoring") logger.fdebug('[FILECHECKER] you have a total of ' + str(comiccnt) + ' ' + watchcomic + ' comics') watchmatch['comiccount'] = comiccnt return watchmatch def validateAndCreateDirectory(dir, create=False): if os.path.exists(dir): logger.info('Found comic directory: ' + dir) return True else: logger.warn('Could not find comic directory: ' + dir) if create: if dir.strip(): logger.info('Creating comic directory (' + str(mylar.CHMOD_DIR) + ') : ' + dir) try:
def parse_downloadresults(self, id, mainlink): myDB = db.DBConnection() series = None year = None size = None title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id) soup = BeautifulSoup(open(title+'.html'), 'html.parser') orig_find = soup.find("p", {"style": "text-align: center;"}) i = 0 option_find = orig_find possible_more = None while True: #i <= 10: prev_option = option_find option_find = option_find.findNext(text=True) if i == 0 and series is None: series = option_find elif 'Year' in option_find: year = option_find.findNext(text=True) year = re.sub('\|', '', year).strip() else: if 'Size' in prev_option: size = option_find #.findNext(text=True) possible_more = orig_find.next_sibling break i+=1 logger.fdebug('Now downloading: %s [%s] / %s ... this can take a while (go get some take-out)...' % (series, year, size)) link = None for f in soup.findAll("div", {"class": "aio-pulse"}): lk = f.find('a') if lk['title'] == 'Download Now': link = {"series": series, "site": lk['title'], "year": year, "issues": None, "size": size, "link": lk['href']} break #get the first link just to test links = [] if link is None and possible_more.name == 'ul': try: bb = possible_more.findAll('li') except: pass else: for x in bb: linkline = x.find('a') if linkline: if 'go.php' in linkline['href']: volume = x.findNext(text=True) if u'\u2013' in volume: volume = re.sub(u'\u2013', '-', volume) #volume label contains series, issue(s), year(s), and size series_st = volume.find('(') issues_st = volume.find('#') series = volume[:series_st] if any([issues_st == -1, series_st == -1]): issues = None else: series = volume[:issues_st].strip() issues = volume[issues_st+1:series_st].strip() year_end = volume.find(')', series_st+1) year = re.sub('[\(\)]', '', volume[series_st+1: year_end]).strip() size_end = volume.find(')', year_end+1) size = re.sub('[\(\)]', '', volume[year_end+1: size_end]).strip() linked = linkline['href'] site = linkline.findNext(text=True) if site == 'Main Server': links.append({"series": series, "site": site, "year": year, "issues": issues, "size": size, "link": linked}) else: check_extras = soup.findAll("h3") for sb in check_extras: header = sb.findNext(text=True) if header == 'TPBs': nxt = sb.next_sibling if nxt.name == 'ul': bb = nxt.findAll('li') for x in bb: volume = x.findNext(text=True) if u'\u2013' in volume: volume = re.sub(u'\u2013', '-', volume) linkline = x.find('a') linked = linkline['href'] site = linkline.findNext(text=True) links.append({"volume": volume, "site": site, "link": linked}) if all([link is None, len(links) == 0]): logger.warn('Unable to retrieve any valid immediate download links. They might not exist.') return {'success': False} if all([link is not None, len(links) == 0]): logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link))) links = [link] elif len(links) > 0: if len(links) > 1: logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links)) cnt = 1 for x in links: if len(links) == 1: mod_id = id else: mod_id = id+'-'+str(cnt) #logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'], x['link'])) ctrlval = {'id': mod_id} vals = {'series': x['series'], 'year': x['year'], 'size': x['size'], 'issues': x['issues'], 'issueid': self.issueid, 'comicid': self.comicid, 'link': x['link'], 'mainlink': mainlink, 'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), 'status': 'Queued'} myDB.upsert('ddl_info', vals, ctrlval) mylar.DDL_QUEUE.put({'link': x['link'], 'mainlink': mainlink, 'series': x['series'], 'year': x['year'], 'size': x['size'], 'comicid': self.comicid, 'issueid': self.issueid, 'id': mod_id, 'resume': None}) cnt+=1 return {'success': True}
def search_results(self): results = {} resultlist = [] soup = BeautifulSoup(open(self.local_filename), 'html.parser') resultline = soup.find("span", {"class": "cover-article-count"}).get_text(strip=True) logger.info('There are %s results' % re.sub('Articles', '', resultline).strip()) for f in soup.findAll("article"): id = f['id'] lk = f.find('a') link = lk['href'] titlefind = f.find("h1", {"class": "post-title"}) title = titlefind.get_text(strip=True) title = re.sub(u'\u2013', '-', title).strip() filename = title issues = None pack = False #see if it's a pack type issfind_st = title.find('#') issfind_en = title.find('-', issfind_st) if issfind_en != -1: if all([title[issfind_en+1] == ' ', title[issfind_en+2].isdigit()]): iss_en = title.find(' ', issfind_en+2) if iss_en != -1: issues = title[issfind_st+1:iss_en] pack = True if title[issfind_en+1].isdigit(): iss_en = title.find(' ', issfind_en+1) if iss_en != -1: issues = title[issfind_st+1:iss_en] pack = True # if it's a pack - remove the issue-range and the possible issue years (cause it most likely will span) and pass thru as separate items if pack is True: title = re.sub(issues, '', title).strip() if title.endswith('#'): title = title[:-1].strip() else: if any(['Marvel Week+' in title, 'INDIE Week+' in title, 'Image Week' in title, 'DC Week+' in title]): continue option_find = f.find("p", {"style": "text-align: center;"}) i = 0 while i <= 2: option_find = option_find.findNext(text=True) if 'Year' in option_find: year = option_find.findNext(text=True) year = re.sub('\|', '', year).strip() if pack is True and '-' in year: title = re.sub('\('+year+'\)', '', title).strip() else: size = option_find.findNext(text=True) if all([re.sub(':', '', size).strip() != 'Size', len(re.sub('[^0-9]', '', size).strip()) > 0]): if 'MB' in size: size = re.sub('MB', 'M', size).strip() elif 'GB' in size: size = re.sub('GB', 'G', size).strip() if '//' in size: nwsize = size.find('//') size = re.sub('\[', '', size[:nwsize]).strip() else: size = '0M' i+=1 dateline = f.find('time') datefull = dateline['datetime'] datestamp = time.mktime(time.strptime(datefull, "%Y-%m-%d")) resultlist.append({"title": title, "pubdate": datetime.datetime.fromtimestamp(float(datestamp)).strftime('%a, %d %b %Y %H:%M:%S'), "filename": filename, "size": re.sub(' ', '', size).strip(), "pack": pack, "issues": issues, "link": link, "year": year, "id": re.sub('post-', '', id).strip(), "site": 'DDL'}) logger.fdebug('%s [%s]' % (title, size)) results['entries'] = resultlist return results
def processor(self, nzbinfo): nzbid = nzbinfo['NZBID'] try: logger.fdebug( 'Now checking the active queue of nzbget for the download') queueinfo = self.server.listgroups() except Expection as e: logger.warn( 'Error attempting to retrieve active queue listing: %s' % e) return {'status': False} else: logger.fdebug('valid queue result returned. Analyzing...') queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid] if len(queuedl) == 0: logger.warn( 'Unable to locate item in active queue. Could it be finished already ?' ) return {'status': False} stat = False while stat is False: time.sleep(10) queueinfo = self.server.listgroups() queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid] if len(queuedl) == 0: logger.fdebug( 'Item is no longer in active queue. It should be finished by my calculations' ) stat = True else: logger.fdebug('status: %s' % queuedl[0]['Status']) logger.fdebug('name: %s' % queuedl[0]['NZBName']) logger.fdebug('FileSize: %sMB' % queuedl[0]['FileSizeMB']) logger.fdebug('Download Left: %sMB' % queuedl[0]['RemainingSizeMB']) logger.fdebug('health: %s' % (queuedl[0]['Health'] / 10)) logger.fdebug('destination: %s' % queuedl[0]['DestDir']) logger.fdebug('File has now downloaded!') time.sleep( 5 ) #wait some seconds so shit can get written to history properly history = self.server.history() found = False hq = [ hs for hs in history if hs['NZBID'] == nzbid and 'SUCCESS' in hs['Status'] ] if len(hq) > 0: logger.fdebug( 'found matching completed item in history. Job has a status of %s' % hq[0]['Status']) if hq[0]['DownloadedSizeMB'] == hq[0]['FileSizeMB']: logger.fdebug('%s has final file size of %sMB' % (hq[0]['Name'], hq[0]['DownloadedSizeMB'])) if os.path.isdir(hq[0]['DestDir']): logger.fdebug('location found @ %s' % hq[0]['DestDir']) return { 'status': True, 'name': re.sub('.nzb', '', hq[0]['NZBName']).strip(), 'location': hq[0]['DestDir'], 'failed': False } else: logger.warn( 'no file found where it should be @ %s - is there another script that moves things after completion ?' % hq[0]['DestDir']) return {'status': False} else: logger.warn('Could not find completed item in history') return {'status': False}
def GetSeriesYears(dom): #used by the 'add a story arc' option to individually populate the Series Year for each series within the given arc. #series year is required for alot of functionality. series = dom.getElementsByTagName('volume') tempseries = {} serieslist = [] for dm in series: try: totids = len(dm.getElementsByTagName('id')) idc = 0 while (idc < totids): if dm.getElementsByTagName( 'id')[idc].parentNode.nodeName == 'volume': tempseries['ComicID'] = dm.getElementsByTagName( 'id')[idc].firstChild.wholeText idc += 1 except: logger.warn( 'There was a problem retrieving a comicid for a series within the arc. This will have to manually corrected most likely.' ) tempseries['ComicID'] = 'None' tempseries['Series'] = 'None' tempseries['Publisher'] = 'None' try: totnames = len(dm.getElementsByTagName('name')) namesc = 0 while (namesc < totnames): if dm.getElementsByTagName( 'name')[namesc].parentNode.nodeName == 'volume': tempseries['Series'] = dm.getElementsByTagName( 'name')[namesc].firstChild.wholeText elif dm.getElementsByTagName( 'name')[namesc].parentNode.nodeName == 'publisher': tempseries['Publisher'] = dm.getElementsByTagName( 'name')[namesc].firstChild.wholeText namesc += 1 except: logger.warn( 'There was a problem retrieving a Series Name or Publisher for a series within the arc. This will have to manually corrected.' ) try: tempseries['SeriesYear'] = dm.getElementsByTagName( 'start_year')[0].firstChild.wholeText except: logger.warn( 'There was a problem retrieving the start year for a particular series within the story arc.' ) tempseries['SeriesYear'] = '0000' #cause you know, dufus'... if tempseries['SeriesYear'][-1:] == '-': tempseries['SeriesYear'] = tempseries['SeriesYear'][:-1] desdeck = 0 tempseries['Volume'] = 'None' #the description field actually holds the Volume# - so let's grab it try: descchunk = dm.getElementsByTagName( 'description')[0].firstChild.wholeText comic_desc = drophtml(descchunk) desdeck += 1 except: comic_desc = 'None' #sometimes the deck has volume labels try: deckchunk = dm.getElementsByTagName('deck')[0].firstChild.wholeText comic_deck = deckchunk desdeck += 1 except: comic_deck = 'None' while (desdeck > 0): if desdeck == 1: if comic_desc == 'None': comicDes = comic_deck[:30] else: #extract the first 60 characters comicDes = comic_desc[:60].replace('New 52', '') elif desdeck == 2: #extract the characters from the deck comicDes = comic_deck[:30].replace('New 52', '') else: break i = 0 while (i < 2): if 'volume' in comicDes.lower(): #found volume - let's grab it. v_find = comicDes.lower().find('volume') #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #) #increased to 10 to allow for text numbering (+5 max) #sometimes it's volume 5 and ocassionally it's fifth volume. if i == 0: vfind = comicDes[v_find:v_find + 15] #if it's volume 5 format basenums = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('volume X format - ' + str(i) + ': ' + vfind) else: vfind = comicDes[: v_find] # if it's fifth volume format basenums = { 'zero': '0', 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'nineth': '9', 'tenth': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('X volume format - ' + str(i) + ': ' + vfind) volconv = '' for nums in basenums: if nums in vfind.lower(): sconv = basenums[nums] vfind = re.sub(nums, sconv, vfind.lower()) break #logger.info('volconv: ' + str(volconv)) if i == 0: volthis = vfind.lower().find('volume') volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit vfind = vfind[volthis:volthis + 4] # grab the next 4 characters ;) elif i == 1: volthis = vfind.lower().find('volume') vfind = vfind[volthis - 4: volthis] # grab the next 4 characters ;) if '(' in vfind: #bracket detected in versioning' vfindit = re.findall('[^()]+', vfind) vfind = vfindit[0] vf = re.findall('[^<>]+', vfind) try: ledigit = re.sub("[^0-9]", "", vf[0]) if ledigit != '': tempseries['Volume'] = ledigit logger.fdebug( "Volume information found! Adding to series record : volume " + tempseries['Volume']) break except: pass i += 1 else: i += 1 if tempseries['Volume'] == 'None': logger.fdebug('tempseries[Volume]:' + str(tempseries['Volume'])) desdeck -= 1 else: break serieslist.append({ "ComicID": tempseries['ComicID'], "ComicName": tempseries['Series'], "SeriesYear": tempseries['SeriesYear'], "Publisher": tempseries['Publisher'], "Volume": tempseries['Volume'] }) return serieslist
def __init__(self): t = threading.Thread(target=self.worker, name="DB-WRITER") t.daemon = True t.start() logger.fdebug('Thread WriteOnly initialized.')
def UpdateDates(dom): issues = dom.getElementsByTagName('issue') tempissue = {} issuelist = [] for dm in issues: tempissue['ComicID'] = 'None' tempissue['IssueID'] = 'None' try: totids = len(dm.getElementsByTagName('id')) idc = 0 while (idc < totids): if dm.getElementsByTagName( 'id')[idc].parentNode.nodeName == 'volume': tempissue['ComicID'] = dm.getElementsByTagName( 'id')[idc].firstChild.wholeText if dm.getElementsByTagName( 'id')[idc].parentNode.nodeName == 'issue': tempissue['IssueID'] = dm.getElementsByTagName( 'id')[idc].firstChild.wholeText idc += 1 except: logger.warn( 'There was a problem retrieving a comicid/issueid for the given issue. This will have to manually corrected most likely.' ) tempissue['SeriesTitle'] = 'None' tempissue['IssueTitle'] = 'None' try: totnames = len(dm.getElementsByTagName('name')) namesc = 0 while (namesc < totnames): if dm.getElementsByTagName( 'name')[namesc].parentNode.nodeName == 'issue': tempissue['IssueTitle'] = dm.getElementsByTagName( 'name')[namesc].firstChild.wholeText elif dm.getElementsByTagName( 'name')[namesc].parentNode.nodeName == 'volume': tempissue['SeriesTitle'] = dm.getElementsByTagName( 'name')[namesc].firstChild.wholeText namesc += 1 except: logger.warn( 'There was a problem retrieving the Series Title / Issue Title for a series within the arc. This will have to manually corrected.' ) try: tempissue['CoverDate'] = dm.getElementsByTagName( 'cover_date')[0].firstChild.wholeText except: tempissue['CoverDate'] = '0000-00-00' try: tempissue['StoreDate'] = dm.getElementsByTagName( 'store_date')[0].firstChild.wholeText except: tempissue['StoreDate'] = '0000-00-00' try: tempissue['IssueNumber'] = dm.getElementsByTagName( 'issue_number')[0].firstChild.wholeText except: logger.fdebug( 'No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.' ) tempissue['IssueNumber'] = 'None' try: tempissue['date_last_updated'] = dm.getElementsByTagName( 'date_last_updated')[0].firstChild.wholeText except: tempissue['date_last_updated'] = '0000-00-00' issuelist.append({ 'ComicID': tempissue['ComicID'], 'IssueID': tempissue['IssueID'], 'SeriesTitle': tempissue['SeriesTitle'], 'IssueTitle': tempissue['IssueTitle'], 'CoverDate': tempissue['CoverDate'], 'StoreDate': tempissue['StoreDate'], 'IssueNumber': tempissue['IssueNumber'], 'Date_Last_Updated': tempissue['date_last_updated'] }) return issuelist
def listFiles(dir, watchcomic, AlternateSearch=None, manual=None): # use AlternateSearch to check for filenames that follow that naming pattern # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV) # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up u_watchcomic = watchcomic.encode('ascii', 'ignore').strip() logger.fdebug('comic: ' + watchcomic) basedir = dir logger.fdebug('Looking in: ' + dir) watchmatch = {} comiclist = [] comiccnt = 0 not_these = [ '#', ',', '\/', ':', '\;', '.', '-', '\!', '\$', '\%', '\+', '\'', '\?', '\@' ] issue_exceptions = ['AU', 'AI', 'A', 'B', 'C'] for item in os.listdir(basedir): if item == 'cover.jpg' or item == 'cvinfo': continue #print item #subname = os.path.join(basedir, item) subname = item #versioning - remove it subsplit = subname.replace('_', ' ').split() volrem = None for subit in subsplit: if subit[0].lower() == 'v': vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... if len(subit) > 3: # if it's greater than 3 in length, then the format is Vyyyy vfull = 1 # add on 1 character length to account for extra space subname = re.sub(subit, '', subname) volrem = subit elif subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely logger.fdebug('volume indicator detected as version #:' + str(subit)) subname = re.sub(subit, '', subname) volrem = subit #remove the brackets.. subnm = re.findall('[^()]+', subname) if len(subnm): logger.fdebug( "detected invalid filename - attempting to detect year to continue" ) subname = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subname) subnm = re.findall('[^()]+', subname) subname = subnm[0] logger.fdebug('subname no brackets: ' + str(subname)) subname = re.sub('\_', ' ', subname) nonocount = 0 charpos = 0 detneg = "no" for nono in not_these: if nono in subname: subcnt = subname.count(nono) charpos = indices( subname, nono) # will return a list of char positions in subname #print "charpos: " + str(charpos) if nono == '-': i = 0 while (i < len(charpos)): for i, j in enumerate(charpos): #print i,j if subname[j + 1:j + 2].isdigit(): logger.fdebug( 'possible negative issue detected.') nonocount = nonocount + subcnt - 1 detneg = "yes" if '-' in watchcomic and i < len(watchcomic): logger.fdebug('- appears in series title.') i += 1 if detneg == "no": subname = re.sub(str(nono), ' ', subname) nonocount = nonocount + subcnt #logger.fdebug(str(nono) + " detected " + str(subcnt) + " times.") # segment '.' having a . by itself will denote the entire string which we don't want elif nono == '.': x = 0 fndit = 0 dcspace = 0 while x < subcnt: fndit = subname.find(nono, fndit) if subname[fndit - 1:fndit].isdigit() and subname[fndit + 1:fndit + 2].isdigit(): logger.fdebug('decimal issue detected.') dcspace += 1 x += 1 if dcspace == 1: nonocount = nonocount + subcnt + dcspace else: subname = re.sub('\.', ' ', subname) nonocount = nonocount + subcnt - 1 #(remove the extension from the length) else: #this is new - if it's a symbol seperated by a space on each side it drags in an extra char. x = 0 fndit = 0 blspc = 0 while x < subcnt: fndit = subname.find(nono, fndit) #print ("space before check: " + str(subname[fndit-1:fndit])) #print ("space after check: " + str(subname[fndit+1:fndit+2])) if subname[fndit - 1:fndit] == ' ' and subname[fndit + 1:fndit + 2] == ' ': logger.fdebug( 'blankspace detected before and after ' + str(nono)) blspc += 1 x += 1 subname = re.sub(str(nono), ' ', subname) nonocount = nonocount + subcnt + blspc #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname) modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\'\?\@]', ' ', u_watchcomic) detectand = False detectthe = False modwatchcomic = re.sub('\&', ' and ', modwatchcomic) if ' the ' in modwatchcomic.lower(): modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower()) logger.fdebug('new modwatchcomic: ' + str(modwatchcomic)) detectthe = True modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip() if '&' in subname: subname = re.sub('\&', ' and ', subname) detectand = True if ' the ' in subname.lower(): subname = re.sub("\\bthe\\b", "", subname.lower()) detectthe = True subname = re.sub('\s+', ' ', str(subname)).strip() AS_Alt = [] if AlternateSearch is not None: chkthealt = AlternateSearch.split('##') if chkthealt == 0: AS_Alternate = AlternateSearch for calt in chkthealt: AS_Alternate = re.sub('##', '', calt) #same = encode. u_altsearchcomic = AS_Alternate.encode('ascii', 'ignore').strip() altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', '', u_altsearchcomic) altsearchcomic = re.sub('\&', ' and ', altsearchcomic) altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip() AS_Alt.append(altsearchcomic) else: #create random characters so it will never match. altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf" AS_Alt.append(altsearchcomic) #if '_' in subname: # subname = subname.replace('_', ' ') logger.fdebug('watchcomic:' + str(modwatchcomic) + ' ..comparing to found file: ' + str(subname)) if modwatchcomic.lower() in subname.lower() or any( x.lower() in subname.lower() for x in AS_Alt): #altsearchcomic.lower() in subname.lower(): comicpath = os.path.join(basedir, item) logger.fdebug(modwatchcomic + ' - watchlist match on : ' + comicpath) comicsize = os.path.getsize(comicpath) #print ("Comicsize:" + str(comicsize)) comiccnt += 1 stann = 0 if 'annual' in subname.lower(): logger.fdebug('Annual detected - proceeding') jtd_len = subname.lower().find('annual') cchk = modwatchcomic else: if modwatchcomic.lower() in subname.lower(): cchk = modwatchcomic else: cchk_ls = [ x for x in AS_Alt if x.lower() in subname.lower() ] cchk = cchk_ls[0] #print "something: " + str(cchk) logger.fdebug('we should remove ' + str(nonocount) + ' characters') findtitlepos = subname.find('-') if charpos != 0: logger.fdebug('detected ' + str(len(charpos)) + ' special characters') i = 0 while (i < len(charpos)): for i, j in enumerate(charpos): #print i,j #print subname #print "digitchk: " + str(subname[j:]) if j >= len(subname): logger.fdebug( 'end reached. ignoring remainder.') break elif subname[j:] == '-': if i <= len(subname) and subname[i + 1].isdigit(): logger.fdebug('negative issue detected.') #detneg = "yes" elif j > findtitlepos: if subname[j:] == '#': if subname[i + 1].isdigit(): logger.fdebug( '# detected denoting issue#, ignoring.' ) else: nonocount -= 1 elif '-' in watchcomic and i < len(watchcomic): logger.fdebug( '- appears in series title, ignoring.') else: logger.fdebug( 'special character appears outside of title - ignoring @ position: ' + str(charpos[i])) nonocount -= 1 i += 1 #remove versioning here if volrem != None: jtd_len = len( cchk ) # + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol # else: jtd_len = len(cchk) # + nonocount logger.fdebug('nonocount [' + str(nonocount) + '] cchk [' + cchk + '] length [' + str(len(cchk)) + ']') #if detectand: # jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars #if detectthe: # jtd_len = jtd_len - 3 # char subsitiution diff between 'the' and '' = 3 chars #justthedigits = item[jtd_len:] logger.fdebug('final jtd_len to prune [' + str(jtd_len) + ']') logger.fdebug('before title removed from FILENAME [' + str(item) + ']') logger.fdebug('after title removed from FILENAME [' + str(item[jtd_len:]) + ']') logger.fdebug( 'creating just the digits using SUBNAME, pruning first [' + str(jtd_len) + '] chars from [' + subname + ']') justthedigits = subname[jtd_len:].strip() logger.fdebug('after title removed from SUBNAME [' + justthedigits + ']') #remove the title if it appears #findtitle = justthedigits.find('-') #if findtitle > 0 and detneg == "no": # justthedigits = justthedigits[:findtitle] # logger.fdebug("removed title from name - is now : " + str(justthedigits)) tmpthedigits = justthedigits justthedigits = justthedigits.split(' ', 1)[0] #if the issue has an alphanumeric (issue_exceptions, join it and push it through) logger.fdebug('JUSTTHEDIGITS [' + justthedigits + ']') if justthedigits.isdigit(): digitsvalid = "true" else: digitsvalid = "false" if justthedigits.lower() == 'annual': logger.fdebug('ANNUAL [' + tmpthedigits.split(' ', 1)[1] + ']') justthedigits += ' ' + tmpthedigits.split(' ', 1)[1] digitsvalid = "true" else: try: if tmpthedigits.split(' ', 1)[1] is not None: poss_alpha = tmpthedigits.split(' ', 1)[1] for issexcept in issue_exceptions: if issexcept.lower() in poss_alpha.lower( ) and len(poss_alpha) <= len(issexcept): justthedigits += poss_alpha logger.fdebug( 'ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']') digitsvalid = "true" break except: pass logger.fdebug('final justthedigits [' + justthedigits + ']') if digitsvalid == "false": logger.fdebug('Issue number not properly detected...ignoring.') continue if manual is not None: #this is needed for Manual Run to determine matches #without this Batman will match on Batman Incorporated, and Batman and Robin, etc.. logger.fdebug('modwatchcomic = ' + modwatchcomic.lower()) logger.fdebug('subname = ' + subname.lower()) comyear = manual['SeriesYear'] issuetotal = manual['Total'] logger.fdebug('SeriesYear: ' + str(comyear)) logger.fdebug('IssueTotal: ' + str(issuetotal)) #set the issue/year threshold here. # 2013 - (24issues/12) = 2011. minyear = int(comyear) - (int(issuetotal) / 12) #subnm defined at being of module. len_sm = len(subnm) #print ("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" while (cnt < len_sm): if subnm[cnt] is None: break if subnm[cnt] == ' ': pass else: logger.fdebug( str(cnt) + ". Bracket Word: " + str(subnm[cnt])) if subnm[cnt][:-2] == '19' or subnm[cnt][:-2] == '20': logger.fdebug("year detected: " + str(subnm[cnt])) result_comyear = subnm[cnt] if int(result_comyear) >= int(minyear): logger.fdebug( str(result_comyear) + ' is within the series range of ' + str(minyear) + '-' + str(comyear)) yearmatch = "true" break else: logger.fdebug( str(result_comyear) + ' - not right - year not within series range of ' + str(minyear) + '-' + str(comyear)) yearmatch = "false" break cnt += 1 if yearmatch == "false": continue #tmpitem = item[:jtd_len] # if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this. substring_removal = None poss_alpha = subname.split(' ')[-1:] logger.fdebug('poss_alpha: ' + str(poss_alpha)) logger.fdebug('lenalpha: ' + str(len(''.join(poss_alpha)))) for issexcept in issue_exceptions: if issexcept.lower() in str(poss_alpha).lower() and len( ''.join(poss_alpha)) <= len(issexcept): #get the last 2 words so that we can remove them cleanly substring_removal = ' '.join(subname.split(' ')[-2:]) substring_join = ''.join(subname.split(' ')[-2:]) logger.fdebug('substring_removal: ' + str(substring_removal)) logger.fdebug('substring_join: ' + str(substring_join)) break if substring_removal is not None: sub_removed = subname.replace('_', ' ').replace( substring_removal, substring_join) else: sub_removed = subname.replace('_', ' ') logger.fdebug('sub_removed: ' + str(sub_removed)) split_sub = sub_removed.rsplit(' ', 1)[0].split( ' ') #removes last word (assuming it's the issue#) split_mod = modwatchcomic.replace('_', ' ').split() #batman logger.fdebug('split_sub: ' + str(split_sub)) logger.fdebug('split_mod: ' + str(split_mod)) x = len(split_sub) - 1 scnt = 0 if x > len(split_mod) - 1: logger.fdebug('number of words do not match...aborting.') else: while (x > -1): print str(split_mod[x]) + ' comparing to ' + str( split_mod[x]) if str(split_sub[x]).lower() == str( split_mod[x]).lower(): scnt += 1 logger.fdebug('word match exact. ' + str(scnt) + '/' + str(len(split_mod))) x -= 1 wordcnt = int(scnt) logger.fdebug('scnt:' + str(scnt)) totalcnt = int(len(split_mod)) logger.fdebug('split_mod length:' + str(totalcnt)) try: spercent = (wordcnt / totalcnt) * 100 except ZeroDivisionError: spercent = 0 logger.fdebug('we got ' + str(spercent) + ' percent.') if int(spercent) >= 80: logger.fdebug("this should be considered an exact match.") else: logger.fdebug('failure - not an exact match.') continue comiclist.append({ 'ComicFilename': item, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'JusttheDigits': justthedigits }) watchmatch['comiclist'] = comiclist else: pass #print ("directory found - ignoring") logger.fdebug('you have a total of ' + str(comiccnt) + ' ' + watchcomic + ' comics') watchmatch['comiccount'] = comiccnt #print watchmatch return watchmatch
def downloadit(self, id, link, mainlink, resume=None): if mylar.DDL_LOCK is True: logger.fdebug('[DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience.') return else: mylar.DDL_LOCK = True myDB = db.DBConnection() filename = None try: with cfscrape.create_scraper() as s: if resume is not None: logger.info('[DDL-RESUME] Attempting to resume from: %s bytes' % resume) self.headers['Range'] = 'bytes=%d-' % resume cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers) t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True) filename = os.path.basename(urllib.unquote(t.url).decode('utf-8')) if 'GetComics.INFO' in filename: filename = re.sub('GetComics.INFO', '', filename, re.I).strip() try: remote_filesize = int(t.headers['Content-length']) logger.fdebug('remote filesize: %s' % remote_filesize) except Exception as e: logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e) logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.') remote_filesize = 0 mylar.DDL_LOCK = False return ({"success": False, "filename": filename, "path": None}) #write the filename to the db for tracking purposes... myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id}) path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip': buf = StringIO(t.content) f = gzip.GzipFile(fileobj=buf) if resume is not None: with open(path, 'ab') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() else: with open(path, 'wb') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() except Exception as e: logger.error('[ERROR] %s' % e) mylar.DDL_LOCK = False return ({"success": False, "filename": filename, "path": None}) else: mylar.DDL_LOCK = False if os.path.isfile(path): if path.endswith('.zip'): new_path = os.path.join(mylar.CONFIG.DDL_LOCATION, re.sub('.zip', '', filename).strip()) logger.info('Zip file detected. Unzipping into new modified path location: %s' % new_path) try: zip_f = zipfile.ZipFile(path, 'r') zip_f.extractall(new_path) zip_f.close() except Exception as e: logger.warn('[ERROR: %s] Unable to extract zip file: %s' % (e, new_path)) return ({"success": False, "filename": filename, "path": None}) else: try: os.remove(path) except Exception as e: logger.warn('[ERROR: %s] Unable to remove zip file from %s after extraction.' % (e, path)) filename = None else: new_path = path return ({"success": True, "filename": filename, "path": new_path})
def Startit(searchName, searchIssue, searchYear, ComicVersion): #searchName = "Uncanny Avengers" #searchIssue = "01" #searchYear = "2012" #clean up searchName due to webparse. searchName = searchName.replace("%20", " ") if "," in searchName: searchName = searchName.replace(",", "") logger.fdebug("name:" + str(searchName)) logger.fdebug("issue:" + str(searchIssue)) logger.fdebug("year:" + str(searchYear)) splitSearch = searchName.split(" ") joinSearch = "+".join(splitSearch) + "+" + searchIssue searchIsOne = "0" + searchIssue searchIsTwo = "00" + searchIssue if "-" in searchName: searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?') regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?') if mylar.USE_MINSIZE: size_constraints = "minsize=" + str(mylar.MINSIZE) else: size_constraints = "minsize=10" if mylar.USE_MAXSIZE: size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE) if mylar.USENET_RETENTION != None: max_age = "&age=" + str(mylar.USENET_RETENTION) feed = feedparser.parse( "http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" + str(size_constraints) + str(max_age) + "&dq=%s&max=50&more=1" % joinSearch) totNum = len(feed.entries) keyPair = {} regList = [] entries = [] mres = {} countUp = 0 logger.fdebug(str(totNum) + " results") while countUp < totNum: urlParse = feed.entries[countUp].enclosures[0] #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link keyPair[feed.entries[countUp].title] = urlParse["href"] countUp = countUp + 1 # thanks to SpammyHagar for spending the time in compiling these regEx's! regExTest = "" regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, searchYear) regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, searchYear) #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, int(searchYear) + 1) regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, int(searchYear) - 1) regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) + 1) regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) - 1) regexList = [regEx, regExOne, regExTwo, regExThree, regExFour, regExFive] for title, link in keyPair.items(): #print("titlesplit: " + str(title.split("\""))) splitTitle = title.split("\"") for subs in splitTitle: # print(title) regExCount = 0 if len(subs) > 10: #Looping through dictionary to run each regEx - length + regex is determined by regexList up top. while regExCount < len(regexList): regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE) regExCount = regExCount + 1 if regExTest: logger.fdebug(title) entries.append({'title': subs, 'link': str(link)}) if len(entries) >= 1: mres['entries'] = entries return mres # print("Title: "+regList[0]) # print("Link: "+keyPair[regList[0]]) else: logger.fdebug("No Results Found") return "no results"
def GetComicInfo(comicid,dom,safechk=None): if safechk is None: #safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort. safechk = 1 elif safechk > 4: logger.error('Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.') return #comicvine isn't as up-to-date with issue counts.. #so this can get really buggered, really fast. tracks = dom.getElementsByTagName('issue') try: cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText except: cntit = len(tracks) trackcnt = len(tracks) logger.fdebug("number of issues I counted: " + str(trackcnt)) logger.fdebug("number of issues CV says it has: " + str(cntit)) # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason if int(trackcnt) != int(cntit): cntit = trackcnt vari = "yes" else: vari = "no" logger.fdebug("vari is set to: " + str(vari)) #if str(trackcnt) != str(int(cntit)+2): # cntit = int(cntit) + 1 comic = {} comicchoice = [] cntit = int(cntit) #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName # where [0] denotes the number of the name field(s) # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue try: names = len( dom.getElementsByTagName('name') ) n = 0 while ( n < names ): if dom.getElementsByTagName('name')[n].parentNode.nodeName == 'results': try: comic['ComicName'] = dom.getElementsByTagName('name')[n].firstChild.wholeText comic['ComicName'] = comic['ComicName'].rstrip() except: logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.') return elif dom.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher': try: comic['ComicPublisher'] = dom.getElementsByTagName('name')[n].firstChild.wholeText except: comic['ComicPublisher'] = "Unknown" n+=1 except: logger.warn('Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible') return try: comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText except: comic['ComicYear'] = '0000' try: comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[trackcnt].firstChild.wholeText except: #this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry. logger.warn('Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.') time.sleep(10) safechk +=1 GetComicInfo(comicid, dom, safechk) desdeck = 0 #the description field actually holds the Volume# - so let's grab it try: descchunk = dom.getElementsByTagName('description')[0].firstChild.wholeText comic_desc = drophtml(descchunk) desdeck +=1 except: comic_desc = 'None' #sometimes the deck has volume labels try: deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText comic_deck = deckchunk desdeck +=1 except: comic_deck = 'None' #comic['ComicDescription'] = comic_desc try: comic['Aliases'] = dom.getElementsByTagName('aliases')[0].firstChild.wholeText #logger.fdebug('Aliases: ' + str(aliases)) except: comic['Aliases'] = 'None' comic['ComicVersion'] = 'noversion' #logger.info('comic_desc:' + comic_desc) #logger.info('comic_deck:' + comic_deck) #logger.info('desdeck: ' + str(desdeck)) while (desdeck > 0): if desdeck == 1: if comic_desc == 'None': comicDes = comic_deck[:30] else: #extract the first 60 characters comicDes = comic_desc[:60].replace('New 52', '') elif desdeck == 2: #extract the characters from the deck comicDes = comic_deck[:30].replace('New 52', '') else: break i = 0 while (i < 2): if 'volume' in comicDes.lower(): #found volume - let's grab it. v_find = comicDes.lower().find('volume') #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #) #increased to 10 to allow for text numbering (+5 max) #sometimes it's volume 5 and ocassionally it's fifth volume. if i == 0: vfind = comicDes[v_find:v_find+15] #if it's volume 5 format basenums = {'zero':'0','one':'1','two':'2','three':'3','four':'4','five':'5','six':'6','seven':'7','eight':'8','nine':'9','ten':'10','i':'1','ii':'2','iii':'3','iv':'4','v':'5'} logger.fdebug('volume X format - ' + str(i) + ': ' + vfind) else: vfind = comicDes[:v_find] # if it's fifth volume format basenums = {'zero':'0','first':'1','second':'2','third':'3','fourth':'4','fifth':'5','sixth':'6','seventh':'7','eighth':'8','nineth':'9','tenth':'10','i':'1','ii':'2','iii':'3','iv':'4','v':'5'} logger.fdebug('X volume format - ' + str(i) + ': ' + vfind) volconv = '' for nums in basenums: if nums in vfind.lower(): sconv = basenums[nums] vfind = re.sub(nums, sconv, vfind.lower()) break #logger.info('volconv: ' + str(volconv)) #now we attempt to find the character position after the word 'volume' if i == 0: volthis = vfind.lower().find('volume') volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit vfind = vfind[volthis:volthis+4] #grab the next 4 characters ;) elif i == 1: volthis = vfind.lower().find('volume') vfind = vfind[volthis-4:volthis] #grab the next 4 characters ;) if '(' in vfind: #bracket detected in versioning' vfindit = re.findall('[^()]+', vfind) vfind = vfindit[0] vf = re.findall('[^<>]+', vfind) ledigit = re.sub("[^0-9]", "", vf[0]) if ledigit != '': comic['ComicVersion'] = ledigit logger.fdebug("Volume information found! Adding to series record : volume " + comic['ComicVersion']) break i+=1 else: i+=1 if comic['ComicVersion'] == 'noversion': logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion'])) desdeck -=1 else: break if vari == "yes": comic['ComicIssues'] = str(cntit) else: comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText comic['ComicImageALT'] = dom.getElementsByTagName('small_url')[0].firstChild.wholeText comic['FirstIssueID'] = dom.getElementsByTagName('id')[0].firstChild.wholeText # print ("fistIss:" + str(comic['FirstIssueID'])) # comicchoice.append({ # 'ComicName': comic['ComicName'], # 'ComicYear': comic['ComicYear'], # 'Comicid': comicid, # 'ComicURL': comic['ComicURL'], # 'ComicIssues': comic['ComicIssues'], # 'ComicImage': comic['ComicImage'], # 'ComicVolume': ParseVol, # 'ComicPublisher': comic['ComicPublisher'] # }) # comic['comicchoice'] = comicchoice return comic
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix): cName = searchName #clean up searchName due to webparse/redudant naming that would return too specific of results. commons = ['and', 'the', '&', '-'] for x in commons: cnt = 0 for m in re.finditer(x, searchName.lower()): cnt += 1 tehstart = m.start() tehend = m.end() if any([x == 'the', x == 'and']): if len(searchName) == tehend: tehend = -1 if all([tehstart == 0, searchName[tehend] == ' ']) or all([ tehstart != 0, searchName[tehstart - 1] == ' ', searchName[tehend] == ' ' ]): searchName = searchName.replace(x, ' ', cnt) else: continue else: searchName = searchName.replace(x, ' ', cnt) searchName = re.sub('\s+', ' ', searchName) searchName = re.sub("[\,\:]", "", searchName).strip() #logger.fdebug("searchname: %s" % searchName) #logger.fdebug("issue: %s" % searchIssue) #logger.fdebug("year: %s" % searchYear) encodeSearch = urllib.quote_plus(searchName) splitSearch = encodeSearch.split(" ") if len(searchIssue) == 1: loop = 3 elif len(searchIssue) == 2: loop = 2 else: loop = 1 if "-" in searchName: searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?') regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?') if mylar.CONFIG.USE_MINSIZE is True: minsize = str(mylar.CONFIG.MINSIZE) else: minsize = '10' size_constraints = "&minsize=" + minsize if mylar.CONFIG.USE_MAXSIZE is True: maxsize = str(mylar.CONFIG.MAXSIZE) else: maxsize = '0' size_constraints += "&maxsize=" + maxsize if mylar.CONFIG.USENET_RETENTION is not None: max_age = "&maxage=" + str(mylar.CONFIG.USENET_RETENTION) else: max_age = "&maxage=0" feeds = [] i = 1 while (i <= loop): if i == 1: searchmethod = searchIssue elif i == 2: searchmethod = '0' + searchIssue elif i == 3: searchmethod = '00' + searchIssue else: break joinSearch = "+".join(splitSearch) + "+" + searchmethod logger.fdebug( 'Now searching experimental for issue number: %s to try and ensure all the bases are covered' % searchmethod) if mylar.CONFIG.PREFERRED_QUALITY == 1: joinSearch = joinSearch + " .cbr" elif mylar.CONFIG.PREFERRED_QUALITY == 2: joinSearch = joinSearch + " .cbz" feeds.append( feedparser.parse( "http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=85" % (joinSearch, max_age, size_constraints))) time.sleep(5) if mylar.CONFIG.ALTEXPERIMENTAL: feeds.append( feedparser.parse( "http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=86" % (joinSearch, max_age, size_constraints))) time.sleep(5) i += 1 entries = [] mres = {} tallycount = 0 for feed in feeds: totNum = len(feed.entries) tallycount += len(feed.entries) #keyPair = {} keyPair = [] regList = [] countUp = 0 while countUp < totNum: urlParse = feed.entries[countUp].enclosures[0] #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link #keyPair[feed.entries[countUp].title] = urlParse["href"] keyPair.append({ "title": feed.entries[countUp].title, "link": urlParse["href"], "length": urlParse["length"], "pubdate": feed.entries[countUp].updated }) countUp = countUp + 1 # thanks to SpammyHagar for spending the time in compiling these regEx's! regExTest = "" regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, searchYear) regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, searchYear) #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, int(searchYear) + 1) regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) - 1) regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) + 1) regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) - 1) regexList = [ regEx, regExOne, regExTwo, regExThree, regExFour, regExFive ] except_list = [ 'releases', 'gold line', 'distribution', '0-day', '0 day', '0day', 'o-day' ] for entry in keyPair: title = entry['title'] #logger.fdebug("titlesplit: " + str(title.split("\""))) splitTitle = title.split("\"") noYear = 'False' _digits = re.compile('\d') for subs in splitTitle: #logger.fdebug('sub:' + subs) regExCount = 0 if len(subs) >= len(cName) and not any( d in subs.lower() for d in except_list) and bool( _digits.search(subs)) is True: #Looping through dictionary to run each regEx - length + regex is determined by regexList up top. # while regExCount < len(regexList): # regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE) # regExCount = regExCount +1 # if regExTest: # logger.fdebug(title) # entries.append({ # 'title': subs, # 'link': str(link) # }) # this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes' if subs.lower().startswith('for'): if cName.lower().startswith('for'): pass else: #this is the crap we ignore. Continue (commented else, as it spams the logs) #logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.') continue #logger.fdebug('match.') if IssDateFix != "no": if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1) else: ComicYearFix = str(int(searchYear) + 1) else: ComicYearFix = searchYear if searchYear not in subs and ComicYearFix not in subs: noYear = 'True' noYearline = subs if (searchYear in subs or ComicYearFix in subs) and noYear == 'True': #this would occur on the next check in the line, if year exists and #the noYear check in the first check came back valid append it subs = noYearline + ' (' + searchYear + ')' noYear = 'False' if noYear == 'False': entries.append({ 'title': subs, 'link': entry['link'], 'pubdate': entry['pubdate'], 'length': entry['length'] }) break # break out so we don't write more shit. # if len(entries) >= 1: if tallycount >= 1: mres['entries'] = entries return mres else: logger.fdebug("No Results Found") return "no results"
def listFiles(dir, watchcomic, Publisher, AlternateSearch=None, manual=None, sarc=None): # use AlternateSearch to check for filenames that follow that naming pattern # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV) # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up u_watchcomic = watchcomic.encode('ascii', 'ignore').strip() logger.fdebug('[FILECHECKER] comic: ' + watchcomic) basedir = dir logger.fdebug('[FILECHECKER] Looking in: ' + dir) watchmatch = {} comiclist = [] comiccnt = 0 not_these = [ '#', ',', '\/', ':', '\;', '.', '-', '!', '\$', '\%', '\+', '\'', '\?', '\@' ] issue_exceptions = ['AU', '.INH', '.NOW', 'AI', 'A', 'B', 'C'] extensions = ('.cbr', '.cbz') for item in os.listdir(basedir): if item == 'cover.jpg' or item == 'cvinfo': continue if not item.endswith(extensions): logger.fdebug( '[FILECHECKER] filename not a valid cbr/cbz - ignoring: ' + item) continue #print item #subname = os.path.join(basedir, item) subname = item #versioning - remove it subsplit = subname.replace('_', ' ').split() volrem = None for subit in subsplit: if subit[0].lower() == 'v': vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... if len(subit) > 3: # if it's greater than 3 in length, then the format is Vyyyy vfull = 1 # add on 1 character length to account for extra space subname = re.sub(subit, '', subname) volrem = subit elif subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely logger.fdebug( '[FILECHECKER] volume indicator detected as version #:' + str(subit)) subname = re.sub(subit, '', subname) volrem = subit #check if a year is present in series title (ie. spider-man 2099) numberinseries = 'False' for i in watchcomic.split(): if ('20' in i or '19' in i): if i.isdigit(): numberinseries = 'True' else: find20 = i.find('20') if find20: stf = i[find20:4].strip() find19 = i.find('19') if find19: stf = i[find19:4].strip() logger.fdebug('[FILECHECKER] stf is : ' + str(stf)) if stf.isdigit(): numberinseries = 'True' logger.fdebug('[FILECHECKER] numberinseries: ' + numberinseries) #remove the brackets.. subnm = re.findall('[^()]+', subname) logger.fdebug('[FILECHECKER] subnm len : ' + str(len(subnm))) if len(subnm) == 1: logger.fdebug( '[FILECHECKER] ' + str(len(subnm)) + ': detected invalid filename - attempting to detect year to continue' ) #if the series has digits this f's it up. if numberinseries == 'True': #we need to remove the series from the subname and then search the remainder. watchname = re.sub( '[-\:\;\!\'\/\?\+\=\_\%\.]', '', watchcomic) #remove spec chars for watchcomic match. logger.fdebug('[FILECHECKER] watch-cleaned: ' + str(watchname)) subthis = re.sub('.cbr', '', subname) subthis = re.sub('.cbz', '', subthis) subthis = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis) logger.fdebug('[FILECHECKER] sub-cleaned: ' + str(subthis)) subthis = subthis[len(watchname):] #remove watchcomic #we need to now check the remainder of the string for digits assuming it's a possible year logger.fdebug('[FILECHECKER] new subname: ' + str(subthis)) subname = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subthis) subname = watchcomic + subname subnm = re.findall('[^()]+', subname) else: subit = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subname) subthis2 = re.sub('.cbr', '', subit) subthis1 = re.sub('.cbz', '', subthis2) subname = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis1) subnm = re.findall('[^()]+', subname) if Publisher.lower() in subname.lower(): #if the Publisher is given within the title or filename even (for some reason, some people #have this to distinguish different titles), let's remove it entirely. lenm = len(subnm) cnt = 0 pub_removed = None while (cnt < lenm): if subnm[cnt] is None: break if subnm[cnt] == ' ': pass else: logger.fdebug( str(cnt) + ". Bracket Word: " + str(subnm[cnt])) if Publisher.lower() in subnm[cnt].lower() and cnt >= 1: logger.fdebug('Publisher detected within title : ' + str(subnm[cnt])) logger.fdebug('cnt is : ' + str(cnt) + ' --- Publisher is: ' + Publisher) pub_removed = subnm[cnt] #-strip publisher if exists here- logger.fdebug('removing publisher from title') subname_pubremoved = re.sub(pub_removed, '', subname) logger.fdebug('pubremoved : ' + str(subname_pubremoved)) subname_pubremoved = re.sub( '\(\)', '', subname_pubremoved) #remove empty brackets subname_pubremoved = re.sub( '\s+', ' ', subname_pubremoved) #remove spaces > 1 logger.fdebug('blank brackets removed: ' + str(subname_pubremoved)) subnm = re.findall('[^()]+', subname_pubremoved) break cnt += 1 subname = subnm[0] if len(subnm): # if it still has no year (brackets), check setting and either assume no year needed. subname = subname logger.fdebug('[FILECHECKER] subname no brackets: ' + str(subname)) subname = re.sub('\_', ' ', subname) nonocount = 0 charpos = 0 detneg = "no" leavehyphen = False should_restart = True while should_restart: should_restart = False for nono in not_these: if nono in subname: subcnt = subname.count(nono) charpos = indices( subname, nono ) # will return a list of char positions in subname #print "charpos: " + str(charpos) if nono == '-': i = 0 while (i < len(charpos)): for i, j in enumerate(charpos): if j + 2 > len(subname): sublimit = subname[j + 1:] else: sublimit = subname[j + 1:j + 2] if sublimit.isdigit(): logger.fdebug( '[FILECHECKER] possible negative issue detected.' ) nonocount = nonocount + subcnt - 1 detneg = "yes" elif '-' in watchcomic and i < len(watchcomic): logger.fdebug( '[FILECHECKER] - appears in series title.' ) logger.fdebug('[FILECHECKER] up to - :' + subname[:j + 1].replace('-', ' ')) logger.fdebug('[FILECHECKER] after - :' + subname[j + 1:]) subname = subname[:j + 1].replace( '-', ' ') + subname[j + 1:] logger.fdebug( '[FILECHECKER] new subname is : ' + str(subname)) should_restart = True leavehyphen = True i += 1 if detneg == "no" or leavehyphen == False: subname = re.sub(str(nono), ' ', subname) nonocount = nonocount + subcnt #logger.fdebug('[FILECHECKER] (str(nono) + " detected " + str(subcnt) + " times.") # segment '.' having a . by itself will denote the entire string which we don't want elif nono == '.': x = 0 fndit = 0 dcspace = 0 while x < subcnt: fndit = subname.find(nono, fndit) if subname[fndit - 1:fndit].isdigit( ) and subname[fndit + 1:fndit + 2].isdigit(): logger.fdebug( '[FILECHECKER] decimal issue detected.') dcspace += 1 x += 1 if dcspace == 1: nonocount = nonocount + subcnt + dcspace else: subname = re.sub('\.', ' ', subname) nonocount = nonocount + subcnt - 1 #(remove the extension from the length) else: #this is new - if it's a symbol seperated by a space on each side it drags in an extra char. x = 0 fndit = 0 blspc = 0 while x < subcnt: fndit = subname.find(nono, fndit) #print ("space before check: " + str(subname[fndit-1:fndit])) #print ("space after check: " + str(subname[fndit+1:fndit+2])) if subname[fndit - 1:fndit] == ' ' and subname[fndit + 1:fndit + 2] == ' ': logger.fdebug( '[FILECHECKER] blankspace detected before and after ' + str(nono)) blspc += 1 x += 1 subname = re.sub(str(nono), ' ', subname) nonocount = nonocount + subcnt + blspc #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname) modwatchcomic = re.sub('[\_\#\,\/\:\;\.\!\$\%\'\?\@\-]', ' ', u_watchcomic) #if leavehyphen == False: # logger.fdebug('[FILECHECKER] ('removing hyphen for comparisons') # modwatchcomic = re.sub('-', ' ', modwatchcomic) # subname = re.sub('-', ' ', subname) detectand = False detectthe = False modwatchcomic = re.sub('\&', ' and ', modwatchcomic) if ' the ' in modwatchcomic.lower(): modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower()) logger.fdebug('[FILECHECKER] new modwatchcomic: ' + str(modwatchcomic)) detectthe = True modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip() if '&' in subname: subname = re.sub('\&', ' and ', subname) detectand = True if ' the ' in subname.lower(): subname = re.sub("\\bthe\\b", "", subname.lower()) detectthe = True subname = re.sub('\s+', ' ', str(subname)).strip() AS_Alt = [] if AlternateSearch is not None: chkthealt = AlternateSearch.split('##') if chkthealt == 0: AS_Alternate = AlternateSearch for calt in chkthealt: AS_Alternate = re.sub('##', '', calt) #same = encode. u_altsearchcomic = AS_Alternate.encode('ascii', 'ignore').strip() altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_altsearchcomic) altsearchcomic = re.sub('\&', ' and ', altsearchcomic) altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip() AS_Alt.append(altsearchcomic) else: #create random characters so it will never match. altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf" AS_Alt.append(altsearchcomic) #if '_' in subname: # subname = subname.replace('_', ' ') logger.fdebug('[FILECHECKER] watchcomic:' + str(modwatchcomic) + ' ..comparing to found file: ' + str(subname)) if modwatchcomic.lower() in subname.lower() or any( x.lower() in subname.lower() for x in AS_Alt): #altsearchcomic.lower() in subname.lower(): comicpath = os.path.join(basedir, item) logger.fdebug('[FILECHECKER] ' + modwatchcomic + ' - watchlist match on : ' + comicpath) comicsize = os.path.getsize(comicpath) #print ("Comicsize:" + str(comicsize)) comiccnt += 1 stann = 0 if 'annual' in subname.lower(): logger.fdebug('[FILECHECKER] Annual detected - proceeding') jtd_len = subname.lower().find('annual') cchk = modwatchcomic else: if modwatchcomic.lower() in subname.lower(): cchk = modwatchcomic else: cchk_ls = [ x for x in AS_Alt if x.lower() in subname.lower() ] cchk = cchk_ls[0] #print "something: " + str(cchk) logger.fdebug('[FILECHECKER] we should remove ' + str(nonocount) + ' characters') findtitlepos = subname.find('-') if charpos != 0: logger.fdebug('[FILECHECKER] detected ' + str(len(charpos)) + ' special characters') i = 0 while (i < len(charpos)): for i, j in enumerate(charpos): #print i,j #print subname #print "digitchk: " + str(subname[j:]) if j >= len(subname): logger.fdebug( '[FILECHECKER] end reached. ignoring remainder.' ) break elif subname[j:] == '-': if i <= len(subname) and subname[i + 1].isdigit(): logger.fdebug( '[FILECHECKER] negative issue detected.' ) #detneg = "yes" elif j > findtitlepos: if subname[j:] == '#': if subname[i + 1].isdigit(): logger.fdebug( '[FILECHECKER] # detected denoting issue#, ignoring.' ) else: nonocount -= 1 elif '-' in watchcomic and i < len(watchcomic): logger.fdebug( '[FILECHECKER] - appears in series title, ignoring.' ) else: logger.fdebug( '[FILECHECKER] special character appears outside of title - ignoring @ position: ' + str(charpos[i])) nonocount -= 1 i += 1 #remove versioning here if volrem != None: jtd_len = len( cchk ) # + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol # else: jtd_len = len(cchk) # + nonocount if sarc and mylar.READ2FILENAME: removest = subname.find( ' ' ) # the - gets removed above so we test for the first blank space... if subname[:removest].isdigit(): jtd_len += removest + 1 # +1 to account for space in place of - logger.fdebug('[FILECHECKER] adjusted jtd_len to : ' + str(removest) + ' because of story-arc reading order tags') logger.fdebug('[FILECHECKER] nonocount [' + str(nonocount) + '] cchk [' + cchk + '] length [' + str(len(cchk)) + ']') #if detectand: # jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars #if detectthe: # jtd_len = jtd_len - 3 # char subsitiution diff between 'the' and '' = 3 chars #justthedigits = item[jtd_len:] logger.fdebug('[FILECHECKER] final jtd_len to prune [' + str(jtd_len) + ']') logger.fdebug( '[FILECHECKER] before title removed from FILENAME [' + str(item) + ']') logger.fdebug('[FILECHECKER] after title removed from FILENAME [' + str(item[jtd_len:]) + ']') logger.fdebug( '[FILECHECKER] creating just the digits using SUBNAME, pruning first [' + str(jtd_len) + '] chars from [' + subname + ']') justthedigits_1 = subname[jtd_len:].strip() logger.fdebug('[FILECHECKER] after title removed from SUBNAME [' + justthedigits_1 + ']') #remove the title if it appears #findtitle = justthedigits.find('-') #if findtitle > 0 and detneg == "no": # justthedigits = justthedigits[:findtitle] # logger.fdebug('[FILECHECKER] ("removed title from name - is now : " + str(justthedigits)) justthedigits = justthedigits_1.split(' ', 1)[0] digitsvalid = "false" for jdc in list(justthedigits): #logger.fdebug('[FILECHECKER] ('jdc:' + str(jdc)) if not jdc.isdigit(): #logger.fdebug('[FILECHECKER] ('alpha') jdc_start = justthedigits.find(jdc) alpha_isschk = justthedigits[jdc_start:] #logger.fdebug('[FILECHECKER] ('alpha_isschk:' + str(alpha_isschk)) for issexcept in issue_exceptions: if issexcept.lower() in alpha_isschk.lower( ) and len(alpha_isschk) <= len(issexcept): logger.fdebug( '[FILECHECKER] ALPHANUMERIC EXCEPTION : [' + justthedigits + ']') digitsvalid = "true" break if digitsvalid == "true": break try: tmpthedigits = justthedigits_1.split(' ', 1)[1] logger.fdebug( '[FILECHECKER] If the series has a decimal, this should be a number [' + tmpthedigits + ']') if 'cbr' in tmpthedigits.lower( ) or 'cbz' in tmpthedigits.lower(): tmpthedigits = tmpthedigits[:-3].strip() logger.fdebug( '[FILECHECKER] Removed extension - now we should just have a number [' + tmpthedigits + ']') poss_alpha = tmpthedigits if poss_alpha.isdigit(): digitsvalid = "true" if justthedigits.lower() == 'annual': logger.fdebug('[FILECHECKER] ANNUAL DETECTED [' + poss_alpha + ']') justthedigits += ' ' + poss_alpha else: justthedigits += '.' + poss_alpha logger.fdebug( '[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']') else: for issexcept in issue_exceptions: decimalexcept = False if '.' in issexcept: decimalexcept = True issexcept = issexcept[ 1:] #remove the '.' from comparison... if issexcept.lower() in poss_alpha.lower( ) and len(poss_alpha) <= len(issexcept): if decimalexcept: issexcept = '.' + issexcept justthedigits += issexcept #poss_alpha logger.fdebug( '[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']') digitsvalid = "true" break except: tmpthedigits = None # justthedigits = justthedigits.split(' ', 1)[0] #if the issue has an alphanumeric (issue_exceptions, join it and push it through) logger.fdebug('[FILECHECKER] JUSTTHEDIGITS [' + justthedigits + ']') if digitsvalid == "true": pass else: if justthedigits.isdigit(): digitsvalid = "true" else: if '.' in justthedigits: tmpdec = justthedigits.find('.') b4dec = justthedigits[:tmpdec] a4dec = justthedigits[tmpdec + 1:] if a4dec.isdigit() and b4dec.isdigit(): logger.fdebug( '[FILECHECKER] DECIMAL ISSUE DETECTED') digitsvalid = "true" else: try: x = float(justthedigits) #validity check if x < 0: logger.info( "I've encountered a negative issue #: " + str(justthedigits) + ". Trying to accomodate.") digitsvalid = "true" else: raise ValueError except ValueError, e: logger.info( 'Cannot determine issue number from given issue #: ' + str(justthedigits)) # else: # logger.fdebug('[FILECHECKER] NO DECIMALS DETECTED') # digitsvalid = "false" # if justthedigits.lower() == 'annual': # logger.fdebug('[FILECHECKER] ANNUAL [' + tmpthedigits.split(' ', 1)[1] + ']') # justthedigits += ' ' + tmpthedigits.split(' ', 1)[1] # digitsvalid = "true" # else: # try: # if tmpthedigits.isdigit(): #.split(' ', 1)[1] is not None: # poss_alpha = tmpthedigits#.split(' ', 1)[1] # if poss_alpha.isdigit(): # digitsvalid = "true" # justthedigits += '.' + poss_alpha # logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']') # for issexcept in issue_exceptions: # if issexcept.lower() in poss_alpha.lower() and len(poss_alpha) <= len(issexcept): # justthedigits += poss_alpha # logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']') # digitsvalid = "true" # break # except: # pass logger.fdebug('[FILECHECKER] final justthedigits [' + justthedigits + ']') if digitsvalid == "false": logger.fdebug( '[FILECHECKER] Issue number not properly detected...ignoring.' ) comiccnt -= 1 # remove the entry from the list count as it was incorrrectly tallied. continue if manual is not None: #this is needed for Manual Run to determine matches #without this Batman will match on Batman Incorporated, and Batman and Robin, etc.. # in case it matches on an Alternate Search pattern, set modwatchcomic to the cchk value modwatchcomic = cchk logger.fdebug('[FILECHECKER] cchk = ' + cchk.lower()) logger.fdebug('[FILECHECKER] modwatchcomic = ' + modwatchcomic.lower()) logger.fdebug('[FILECHECKER] subname = ' + subname.lower()) comyear = manual['SeriesYear'] issuetotal = manual['Total'] comicvolume = manual['ComicVersion'] logger.fdebug('[FILECHECKER] SeriesYear: ' + str(comyear)) logger.fdebug('[FILECHECKER] IssueTotal: ' + str(issuetotal)) logger.fdebug('[FILECHECKER] Comic Volume: ' + str(comicvolume)) logger.fdebug('[FILECHECKER] volume detected: ' + str(volrem)) if comicvolume: ComVersChk = re.sub("[^0-9]", "", comicvolume) if ComVersChk == '' or ComVersChk == '1': ComVersChk = 0 else: ComVersChk = 0 # even if it's a V1, we need to pull the date for the given issue ID and get the publication year # for the issue. Because even if it's a V1, if there are additional Volumes then it's possible that # it will take the incorrect series. (ie. Detective Comics (1937) & Detective Comics (2011). # If issue #28 (2013) is found, it exists in both series, and because DC 1937 is a V1, it will bypass # the year check which will result in the incorrect series being picked (1937) #set the issue/year threshold here. # 2013 - (24issues/12) = 2011. #minyear = int(comyear) - (int(issuetotal) / 12) maxyear = manual['LatestDate'][:4] # yyyy-mm-dd #subnm defined at being of module. len_sm = len(subnm) #print ("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "none" vers4year = "no" vers4vol = "no" for ct in subsplit: if ct.lower().startswith('v') and ct[1:].isdigit(): logger.fdebug( '[FILECHECKER] possible versioning..checking') #we hit a versioning # - account for it if ct[1:].isdigit(): if len(ct[1:]) == 4: #v2013 logger.fdebug( '[FILECHECKER] Version detected as ' + str(ct)) vers4year = "yes" #re.sub("[^0-9]", " ", str(ct)) #remove the v break else: if len(ct) < 4: logger.fdebug( '[FILECHECKER] Version detected as ' + str(ct)) vers4vol = str(ct) break logger.fdebug( '[FILECHECKER] false version detection..ignoring.') versionmatch = "false" if vers4year is not "no" or vers4vol is not "no": if comicvolume: #is not "None" and comicvolume is not None: D_ComicVersion = re.sub("[^0-9]", "", comicvolume) if D_ComicVersion == '': D_ComicVersion = 0 else: D_ComicVersion = 0 F_ComicVersion = re.sub("[^0-9]", "", volrem) S_ComicVersion = str(comyear) logger.fdebug('[FILECHECKER] FCVersion: ' + str(F_ComicVersion)) logger.fdebug('[FILECHECKER] DCVersion: ' + str(D_ComicVersion)) logger.fdebug('[FILECHECKER] SCVersion: ' + str(S_ComicVersion)) #if annualize == "true" and int(ComicYear) == int(F_ComicVersion): # logger.fdebug('[FILECHECKER] ("We matched on versions for annuals " + str(volrem)) if int(F_ComicVersion) == int(D_ComicVersion) or int( F_ComicVersion) == int(S_ComicVersion): logger.fdebug( '[FILECHECKER] We matched on versions...' + str(volrem)) versionmatch = "true" else: logger.fdebug( '[FILECHECKER] Versions wrong. Ignoring possible match.' ) #else: while (cnt < len_sm): if subnm[cnt] is None: break if subnm[cnt] == ' ': pass else: logger.fdebug('[FILECHECKER] ' + str(cnt) + ' Bracket Word: ' + str(subnm[cnt])) #if ComVersChk == 0: # logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check') # yearmatch = "true" # break if subnm[cnt][:-2] == '19' or subnm[cnt][:-2] == '20': logger.fdebug('[FILECHECKER] year detected: ' + str(subnm[cnt])) result_comyear = subnm[cnt] if int(result_comyear) <= int(maxyear): logger.fdebug('[FILECHECKER] ' + str(result_comyear) + ' is within the series range of ' + str(comyear) + '-' + str(maxyear)) #still possible for incorrect match if multiple reboots of series end/start in same year yearmatch = "true" break else: logger.fdebug( '[FILECHECKER] ' + str(result_comyear) + ' - not right - year not within series range of ' + str(comyear) + '-' + str(maxyear)) yearmatch = "false" break cnt += 1 if versionmatch == "false": if yearmatch == "false": logger.fdebug( '[FILECHECKER] Failed to match on both version and issue year.' ) continue else: logger.fdebug( '[FILECHECKER] Matched on versions, not on year - continuing.' ) else: if yearmatch == "false": logger.fdebug( '[FILECHECKER] Matched on version, but not on year - continuing.' ) else: logger.fdebug( '[FILECHECKER] Matched on both version, and issue year - continuing.' ) if yearmatch == "none": if ComVersChk == 0: logger.fdebug( '[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check.' ) yearmatch = "true" else: continue if 'annual' in subname.lower(): subname = re.sub('annual', '', subname.lower()) subname = re.sub('\s+', ' ', subname) #tmpitem = item[:jtd_len] # if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this. substring_removal = None poss_alpha = subname.split(' ')[-1:] logger.fdebug('[FILECHECKER] poss_alpha: ' + str(poss_alpha)) logger.fdebug('[FILECHECKER] lenalpha: ' + str(len(''.join(poss_alpha)))) for issexcept in issue_exceptions: if issexcept.lower() in str(poss_alpha).lower() and len( ''.join(poss_alpha)) <= len(issexcept): #get the last 2 words so that we can remove them cleanly substring_removal = ' '.join(subname.split(' ')[-2:]) substring_join = ''.join(subname.split(' ')[-2:]) logger.fdebug('[FILECHECKER] substring_removal: ' + str(substring_removal)) logger.fdebug('[FILECHECKER] substring_join: ' + str(substring_join)) break if substring_removal is not None: sub_removed = subname.replace('_', ' ').replace( substring_removal, substring_join) else: sub_removed = subname.replace('_', ' ') logger.fdebug('[FILECHECKER] sub_removed: ' + str(sub_removed)) split_sub = sub_removed.rsplit(' ', 1)[0].split( ' ') #removes last word (assuming it's the issue#) split_mod = modwatchcomic.replace('_', ' ').split() #batman logger.fdebug('[FILECHECKER] split_sub: ' + str(split_sub)) logger.fdebug('[FILECHECKER] split_mod: ' + str(split_mod)) x = len(split_sub) - 1 scnt = 0 if x > len(split_mod) - 1: logger.fdebug( '[FILECHECKER] number of words do not match...aborting.' ) else: while (x > -1): print str(split_sub[x]) + ' comparing to ' + str( split_mod[x]) if str(split_sub[x]).lower() == str( split_mod[x]).lower(): scnt += 1 logger.fdebug('[FILECHECKER] word match exact. ' + str(scnt) + '/' + str(len(split_mod))) x -= 1 wordcnt = int(scnt) logger.fdebug('[FILECHECKER] scnt:' + str(scnt)) totalcnt = int(len(split_mod)) logger.fdebug('[FILECHECKER] split_mod length:' + str(totalcnt)) try: spercent = (wordcnt / totalcnt) * 100 except ZeroDivisionError: spercent = 0 logger.fdebug('[FILECHECKER] we got ' + str(spercent) + ' percent.') if int(spercent) >= 80: logger.fdebug( '[FILECHECKER] this should be considered an exact match.Justthedigits:' + justthedigits) else: logger.fdebug( '[FILECHECKER] failure - not an exact match.') continue if manual: print item print comicpath print comicsize print result_comyear print justthedigits comiclist.append({ 'ComicFilename': item, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'ComicYear': result_comyear, 'JusttheDigits': justthedigits }) print('appended.') else: comiclist.append({ 'ComicFilename': item, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'JusttheDigits': justthedigits }) watchmatch['comiclist'] = comiclist else: pass
def getComic(comicid, type, issueid=None, arc=None, arcid=None, arclist=None, comicidlist=None): if type == 'issue': offset = 1 issue = {} ndic = [] issuechoice = [] comicResults = [] firstdate = '2099-00-00' #let's find out how many results we get from the query... if comicid is None: #if comicid is None, it's coming from the story arc search results. id = arcid #since the arclist holds the issueids, and the pertinent reading order - we need to strip out the reading order so this works. aclist = '' for ac in arclist.split('|'): aclist += ac[:ac.find(',')] + '|' if aclist.endswith('|'): aclist = aclist[:-1] islist = aclist else: id = comicid islist = None searched = pulldetails(id, 'issue', None, 0, islist) if searched is None: return False totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): logger.fdebug("querying range from " + str(countResults) + " to " + str(countResults + 100)) if countResults > 0: #new api - have to change to page # instead of offset count offsetcount = countResults searched = pulldetails(id, 'issue', None, offsetcount, islist) issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid) if tmpdate < firstdate: firstdate = tmpdate ndic = ndic + issuechoice #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 issue['issuechoice'] = ndic issue['firstdate'] = firstdate return issue elif type == 'comic': dom = pulldetails(comicid, 'comic', None, 1) return GetComicInfo(comicid, dom) elif type == 'firstissue': dom = pulldetails(comicid, 'firstissue', issueid, 1) return GetFirstIssue(issueid, dom) elif type == 'storyarc': dom = pulldetails(arc, 'storyarc', None, 1) return GetComicInfo(issueid, dom) elif type == 'comicyears': #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year. #this grabs each issue based on issueid, and then subsets the comicid for each to be used later. #set the offset to 0, since we're doing a filter. dom = pulldetails(arcid, 'comicyears', offset=0, comicidlist=comicidlist) return GetSeriesYears(dom) elif type == 'import': #used by the importer when doing a scan with metatagging enabled. If metatagging comes back true, then there's an IssueID present #within the tagging (with CT). This compiles all of the IssueID's during a scan (in 100's), and returns the corresponding CV data #related to the given IssueID's - namely ComicID, Name, Volume (more at some point, but those are the important ones). offset = 1 if len(comicidlist) <= 100: endcnt = len(comicidlist) else: endcnt = 100 id_count = 0 import_list = [] logger.fdebug('comicidlist:' + str(comicidlist)) while id_count < len(comicidlist): #break it up by 100 per api hit #do the first 100 regardless in_cnt = 0 for i in range(id_count, endcnt): if in_cnt == 0: tmpidlist = str(comicidlist[i]) else: tmpidlist += '|' + str(comicidlist[i]) in_cnt +=1 logger.info('tmpidlist: ' + str(tmpidlist)) searched = pulldetails(None, 'import', offset=0, comicidlist=tmpidlist) if searched is None: break else: tGIL = GetImportList(searched) import_list += tGIL endcnt +=100 id_count +=100 return import_list
def rename_param(comicid, comicname, issue, ofilename, comicyear=None, issueid=None, annualize=None): import db, logger myDB = db.DBConnection() logger.fdebug('comicid: ' + str(comicid)) logger.fdebug('issue#: ' + str(issue)) # the issue here is a non-decimalized version, we need to see if it's got a decimal and if not, add '.00' # iss_find = issue.find('.') # if iss_find < 0: # # no decimal in issue number # iss = str(int(issue)) + ".00" # else: # iss_b4dec = issue[:iss_find] # iss_decval = issue[iss_find+1:] # if len(str(int(iss_decval))) == 1: # iss = str(int(iss_b4dec)) + "." + str(int(iss_decval)*10) # else: # if issue.endswith(".00"): # iss = issue # else: # iss = str(int(iss_b4dec)) + "." + iss_decval # issue = iss # print ("converted issue#: " + str(issue)) logger.fdebug('issueid:' + str(issueid)) if issueid is None: logger.fdebug('annualize is ' + str(annualize)) if annualize is None: chkissue = myDB.selectone("SELECT * from issues WHERE ComicID=? AND Issue_Number=?", [comicid, issue]).fetchone() else: chkissue = myDB.selectone("SELECT * from annuals WHERE ComicID=? AND Issue_Number=?", [comicid, issue]).fetchone() if chkissue is None: #rechk chkissue against int value of issue # chkissue = myDB.selectone("SELECT * from issues WHERE ComicID=? AND Int_IssueNumber=?", [comicid, issuedigits(issue)]).fetchone() if chkissue is None: if chkissue is None: logger.error('Invalid Issue_Number - please validate.') return else: logger.info('Int Issue_number compare found. continuing...') issueid = chkissue['IssueID'] else: issueid = chkissue['IssueID'] #use issueid to get publisher, series, year, issue number logger.fdebug('issueid is now : ' + str(issueid)) issuenzb = myDB.selectone("SELECT * from issues WHERE ComicID=? AND IssueID=?", [comicid,issueid]).fetchone() if issuenzb is None: logger.fdebug('not an issue, checking against annuals') issuenzb = myDB.selectone("SELECT * from annuals WHERE ComicID=? AND IssueID=?", [comicid,issueid]).fetchone() if issuenzb is None: logger.fdebug('Unable to rename - cannot locate issue id within db') return else: annualize = True #comicid = issuenzb['ComicID'] issuenum = issuenzb['Issue_Number'] #issueno = str(issuenum).split('.')[0] issue_except = 'None' if 'au' in issuenum.lower(): issuenum = re.sub("[^0-9]", "", issuenum) issue_except = ' AU' if '.' in issuenum: iss_find = issuenum.find('.') iss_b4dec = issuenum[:iss_find] iss_decval = issuenum[iss_find+1:] if int(iss_decval) == 0: iss = iss_b4dec issdec = int(iss_decval) issueno = str(iss) logger.fdebug('Issue Number: ' + str(issueno)) else: if len(iss_decval) == 1: iss = iss_b4dec + "." + iss_decval issdec = int(iss_decval) * 10 else: iss = iss_b4dec + "." + iss_decval.rstrip('0') issdec = int(iss_decval.rstrip('0')) * 10 issueno = iss_b4dec logger.fdebug('Issue Number: ' + str(iss)) else: iss = issuenum issueno = str(iss) logger.fdebug('iss:' + str(iss)) logger.fdebug('issueno:' + str(issueno)) # issue zero-suppression here if mylar.ZERO_LEVEL == "0": zeroadd = "" else: if mylar.ZERO_LEVEL_N == "none": zeroadd = "" elif mylar.ZERO_LEVEL_N == "0x": zeroadd = "0" elif mylar.ZERO_LEVEL_N == "00x": zeroadd = "00" logger.fdebug('Zero Suppression set to : ' + str(mylar.ZERO_LEVEL_N)) if str(len(issueno)) > 1: if int(issueno) < 0: self._log("issue detected is a negative") prettycomiss = '-' + str(zeroadd) + str(abs(issueno)) elif int(issueno) < 10: logger.fdebug('issue detected less than 10') if '.' in iss: if int(iss_decval) > 0: issueno = str(iss) prettycomiss = str(zeroadd) + str(iss) else: prettycomiss = str(zeroadd) + str(int(issueno)) else: prettycomiss = str(zeroadd) + str(iss) if issue_except != 'None': prettycomiss = str(prettycomiss) + issue_except logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '. Issue will be set as : ' + str(prettycomiss)) elif int(issueno) >= 10 and int(issueno) < 100: logger.fdebug('issue detected greater than 10, but less than 100') if mylar.ZERO_LEVEL_N == "none": zeroadd = "" else: zeroadd = "0" if '.' in iss: if int(iss_decval) > 0: issueno = str(iss) prettycomiss = str(zeroadd) + str(iss) else: prettycomiss = str(zeroadd) + str(int(issueno)) else: prettycomiss = str(zeroadd) + str(iss) if issue_except != 'None': prettycomiss = str(prettycomiss) + issue_except logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '.Issue will be set as : ' + str(prettycomiss)) else: logger.fdebug('issue detected greater than 100') if '.' in iss: if int(iss_decval) > 0: issueno = str(iss) prettycomiss = str(issueno) if issue_except != 'None': prettycomiss = str(prettycomiss) + issue_except logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '. Issue will be set as : ' + str(prettycomiss)) else: prettycomiss = str(issueno) logger.fdebug('issue length error - cannot determine length. Defaulting to None: ' + str(prettycomiss)) logger.fdebug('Pretty Comic Issue is : ' + str(prettycomiss)) issueyear = issuenzb['IssueDate'][:4] month = issuenzb['IssueDate'][5:7].replace('-','').strip() month_name = fullmonth(month) logger.fdebug('Issue Year : ' + str(issueyear)) comicnzb= myDB.selectone("SELECT * from comics WHERE comicid=?", [comicid]).fetchone() publisher = comicnzb['ComicPublisher'] logger.fdebug('Publisher: ' + str(publisher)) series = comicnzb['ComicName'] logger.fdebug('Series: ' + str(series)) seriesyear = comicnzb['ComicYear'] logger.fdebug('Year: ' + str(seriesyear)) comlocation = comicnzb['ComicLocation'] logger.fdebug('Comic Location: ' + str(comlocation)) comversion = comicnzb['ComicVersion'] if comversion is None: comversion = 'None' #if comversion is None, remove it so it doesn't populate with 'None' if comversion == 'None': chunk_f_f = re.sub('\$VolumeN','',mylar.FILE_FORMAT) chunk_f = re.compile(r'\s+') chunk_file_format = chunk_f.sub(' ', chunk_f_f) logger.fdebug('No version # found for series, removing from filename') logger.fdebug("new format: " + str(chunk_file_format)) else: chunk_file_format = mylar.FILE_FORMAT if annualize is None: chunk_f_f = re.sub('\$Annual','',chunk_file_format) chunk_f = re.compile(r'\s+') chunk_file_format = chunk_f.sub(' ', chunk_f_f) logger.fdebug('not an annual - removing from filename paramaters') logger.fdebug('new format: ' + str(chunk_file_format)) else: logger.fdebug('chunk_file_format is: ' + str(chunk_file_format)) if mylar.ANNUALS_ON: if 'annual' in series.lower(): if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower(): #if it's an annual, but $annual isn't specified in file_format, we need to #force it in there, by default in the format of $Annual $Issue #prettycomiss = "Annual " + str(prettycomiss) logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss)) else: #because it exists within title, strip it then use formatting tag for placement of wording. chunk_f_f = re.sub('\$Annual','',chunk_file_format) chunk_f = re.compile(r'\s+') chunk_file_format = chunk_f.sub(' ', chunk_f_f) logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss)) else: if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower(): #if it's an annual, but $annual isn't specified in file_format, we need to #force it in there, by default in the format of $Annual $Issue prettycomiss = "Annual " + str(prettycomiss) logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL NOT IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss)) else: logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL NOT IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss)) else: #if annuals aren't enabled, then annuals are being tracked as independent series. #annualize will be true since it's an annual in the seriesname. if 'annual' in series.lower(): if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower(): #if it's an annual, but $annual isn't specified in file_format, we need to #force it in there, by default in the format of $Annual $Issue #prettycomiss = "Annual " + str(prettycomiss) logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss)) else: #because it exists within title, strip it then use formatting tag for placement of wording. chunk_f_f = re.sub('\$Annual','',chunk_file_format) chunk_f = re.compile(r'\s+') chunk_file_format = chunk_f.sub(' ', chunk_f_f) logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss)) else: if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower(): #if it's an annual, but $annual isn't specified in file_format, we need to #force it in there, by default in the format of $Annual $Issue prettycomiss = "Annual " + str(prettycomiss) logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL NOT IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss)) else: logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL NOT IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss)) logger.fdebug('Annual detected within series title of ' + series + '. Not auto-correcting issue #') series = series.encode('ascii', 'ignore').strip() filebad = [':',',','/','?','!','\''] #in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname: for dbd in filebad: if dbd in series: if dbd == '/': repthechar = '-' else: repthechar = '' series = series.replace(dbd,repthechar) logger.fdebug('Altering series name due to filenaming restrictions: ' + series) publisher = re.sub('!','', publisher) file_values = {'$Series': series, '$Issue': prettycomiss, '$Year': issueyear, '$series': series.lower(), '$Publisher': publisher, '$publisher': publisher.lower(), '$VolumeY': 'V' + str(seriesyear), '$VolumeN': comversion, '$monthname': month_name, '$month': month, '$Annual': 'Annual' } extensions = ('.cbr', '.cbz') if ofilename.lower().endswith(extensions): path, ext = os.path.splitext(ofilename) if mylar.FILE_FORMAT == '': logger.fdebug('Rename Files is not enabled - keeping original filename.') #check if extension is in nzb_name - will screw up otherwise if ofilename.lower().endswith(extensions): nfilename = ofilename[:-4] else: nfilename = ofilename else: nfilename = replace_all(chunk_file_format, file_values) if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot nfilename = nfilename.replace(' ', mylar.REPLACE_CHAR) nfilename = re.sub('[\,\:]', '', nfilename) + ext.lower() logger.fdebug('New Filename: ' + str(nfilename)) if mylar.LOWERCASE_FILENAMES: dst = os.path.join(comlocation, nfilename.lower()) else: dst = os.path.join(comlocation, nfilename) logger.fdebug('Source: ' + str(ofilename)) logger.fdebug('Destination: ' + str(dst)) rename_this = { "destination_dir" : dst, "nfilename" : nfilename, "issueid" : issueid, "comicid" : comicid } return rename_this
def GetComicInfo(comicid, dom): #comicvine isn't as up-to-date with issue counts.. #so this can get really buggered, really fast. tracks = dom.getElementsByTagName('issue') try: cntit = dom.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText except: cntit = len(tracks) trackcnt = len(tracks) logger.fdebug("number of issues I counted: " + str(trackcnt)) logger.fdebug("number of issues CV says it has: " + str(cntit)) # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason if int(trackcnt) != int(cntit): cntit = trackcnt vari = "yes" else: vari = "no" logger.fdebug("vari is set to: " + str(vari)) #if str(trackcnt) != str(int(cntit)+2): # cntit = int(cntit) + 1 comic = {} comicchoice = [] cntit = int(cntit) #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName # where [0] denotes the number of the name field(s) # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue try: names = len(dom.getElementsByTagName('name')) n = 0 while (n < names): if dom.getElementsByTagName( 'name')[n].parentNode.nodeName == 'results': try: comic['ComicName'] = dom.getElementsByTagName( 'name')[n].firstChild.wholeText comic['ComicName'] = comic['ComicName'].rstrip() except: logger.error( 'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.' ) return elif dom.getElementsByTagName( 'name')[n].parentNode.nodeName == 'publisher': try: comic['ComicPublisher'] = dom.getElementsByTagName( 'name')[n].firstChild.wholeText except: comic['ComicPublisher'] = "Unknown" n += 1 except: logger.warn( 'Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible' ) return try: comic['ComicYear'] = dom.getElementsByTagName( 'start_year')[0].firstChild.wholeText except: comic['ComicYear'] = '0000' comic['ComicURL'] = dom.getElementsByTagName( 'site_detail_url')[trackcnt].firstChild.wholeText desdeck = 0 #the description field actually holds the Volume# - so let's grab it try: descchunk = dom.getElementsByTagName( 'description')[0].firstChild.wholeText comic_desc = drophtml(descchunk) desdeck += 1 except: comic_desc = 'None' #sometimes the deck has volume labels try: deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText comic_deck = deckchunk desdeck += 1 except: comic_deck = 'None' try: comic['Aliases'] = dom.getElementsByTagName( 'aliases')[0].firstChild.wholeText #logger.fdebug('Aliases: ' + str(aliases)) except: comic['Aliases'] = 'None' comic['ComicVersion'] = 'noversion' #logger.info('comic_desc:' + comic_desc) #logger.info('comic_deck:' + comic_deck) #logger.info('desdeck: ' + str(desdeck)) while (desdeck > 0): if desdeck == 1: if comic_desc == 'None': comicDes = comic_deck[:30] else: #extract the first 60 characters comicDes = comic_desc[:60].replace('New 52', '') elif desdeck == 2: #extract the characters from the deck comicDes = comic_deck[:30].replace('New 52', '') else: break i = 0 while (i < 2): if 'volume' in comicDes.lower(): #found volume - let's grab it. v_find = comicDes.lower().find('volume') #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #) #increased to 10 to allow for text numbering (+5 max) #sometimes it's volume 5 and ocassionally it's fifth volume. if i == 0: vfind = comicDes[v_find:v_find + 15] #if it's volume 5 format basenums = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('volume X format - ' + str(i) + ': ' + vfind) else: vfind = comicDes[:v_find] # if it's fifth volume format basenums = { 'zero': '0', 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'nineth': '9', 'tenth': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5' } logger.fdebug('X volume format - ' + str(i) + ': ' + vfind) volconv = '' for nums in basenums: if nums in vfind.lower(): sconv = basenums[nums] vfind = re.sub(nums, sconv, vfind.lower()) break #logger.info('volconv: ' + str(volconv)) #now we attempt to find the character position after the word 'volume' if i == 0: volthis = vfind.lower().find('volume') volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit vfind = vfind[volthis:volthis + 4] #grab the next 4 characters ;) elif i == 1: volthis = vfind.lower().find('volume') vfind = vfind[volthis - 4:volthis] #grab the next 4 characters ;) if '(' in vfind: #bracket detected in versioning' vfindit = re.findall('[^()]+', vfind) vfind = vfindit[0] vf = re.findall('[^<>]+', vfind) ledigit = re.sub("[^0-9]", "", vf[0]) if ledigit != '': comic['ComicVersion'] = ledigit logger.fdebug( "Volume information found! Adding to series record : volume " + comic['ComicVersion']) break i += 1 else: i += 1 if comic['ComicVersion'] == 'noversion': logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion'])) desdeck -= 1 else: break if vari == "yes": comic['ComicIssues'] = str(cntit) else: comic['ComicIssues'] = dom.getElementsByTagName( 'count_of_issues')[0].firstChild.wholeText comic['ComicImage'] = dom.getElementsByTagName( 'super_url')[0].firstChild.wholeText comic['ComicImageALT'] = dom.getElementsByTagName( 'small_url')[0].firstChild.wholeText comic['FirstIssueID'] = dom.getElementsByTagName( 'id')[0].firstChild.wholeText # print ("fistIss:" + str(comic['FirstIssueID'])) # comicchoice.append({ # 'ComicName': comic['ComicName'], # 'ComicYear': comic['ComicYear'], # 'Comicid': comicid, # 'ComicURL': comic['ComicURL'], # 'ComicIssues': comic['ComicIssues'], # 'ComicImage': comic['ComicImage'], # 'ComicVolume': ParseVol, # 'ComicPublisher': comic['ComicPublisher'] # }) # comic['comicchoice'] = comicchoice return comic
def issuedigits(issnum): import db, logger #print "issnum : " + str(issnum) if str(issnum).isdigit(): int_issnum = int( issnum ) * 1000 else: #count = 0 #for char in issnum: # if char.isalpha(): # count += 1 #if count > 5: # logger.error('This is not an issue number - not enough numerics to parse') # int_issnum = 999999999999999 # return int_issnum if 'au' in issnum.lower() and issnum[:1].isdigit(): int_issnum = (int(issnum[:-2]) * 1000) + ord('a') + ord('u') elif 'ai' in issnum.lower() and issnum[:1].isdigit(): int_issnum = (int(issnum[:-2]) * 1000) + ord('a') + ord('i') elif 'inh' in issnum.lower(): remdec = issnum.find('.') #find the decimal position. if remdec == -1: #if no decimal, it's all one string #remove the last 3 characters from the issue # (INH) int_issnum = (int(issnum[:-3]) * 1000) + ord('i') + ord('n') + ord('h') else: int_issnum = (int(issnum[:-4]) * 1000) + ord('i') + ord('n') + ord('h') elif 'now' in issnum.lower(): if '!' in issnum: issnum = re.sub('\!', '', issnum) remdec = issnum.find('.') #find the decimal position. if remdec == -1: #if no decimal, it's all one string #remove the last 3 characters from the issue # (NOW) int_issnum = (int(issnum[:-3]) * 1000) + ord('n') + ord('o') + ord('w') else: int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w') elif u'\xbd' in issnum: issnum = .5 int_issnum = int(issnum) * 1000 elif u'\xbc' in issnum: issnum = .25 int_issnum = int(issnum) * 1000 elif u'\xbe' in issnum: issnum = .75 int_issnum = int(issnum) * 1000 elif u'\u221e' in issnum: #issnum = utf-8 will encode the infinity symbol without any help int_issnum = 9999999999 * 1000 # set 9999999999 for integer value of issue elif '.' in issnum or ',' in issnum: #logger.fdebug('decimal detected.') if ',' in issnum: issnum = re.sub(',','.', issnum) issst = str(issnum).find('.') if issst == 0: issb4dec = 0 else: issb4dec = str(issnum)[:issst] decis = str(issnum)[issst+1:] if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) >= 2: decisval = int(decis) issaftdec = str(decisval) try: int_issnum = (int(issb4dec) * 1000) + (int(issaftdec) * 10) except ValueError: #logger.fdebug('This has no issue # for me to get - Either a Graphic Novel or one-shot.') int_issnum = 999999999999999 else: try: x = float(issnum) #validity check if x < 0: #logger.info("I've encountered a negative issue #: " + str(issnum) + ". Trying to accomodate.") int_issnum = (int(x)*1000) - 1 else: raise ValueError except ValueError, e: #this will account for any alpha in a issue#, so long as it doesn't have decimals. x = 0 tstord = None issno = None invchk = "false" while (x < len(issnum)): if issnum[x].isalpha(): #take first occurance of alpha in string and carry it through tstord = issnum[x:].rstrip() issno = issnum[:x].rstrip() try: isschk = float(issno) except ValueError, e: logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.') issno = None tstord = None invchk = "true" break x+=1 if tstord is not None and issno is not None: logger.fdebug('tstord: ' + str(tstord)) a = 0 ordtot = 0 while (a < len(tstord)): try: ordtot += ord(tstord[a].lower()) #lower-case the letters for simplicty except ValueError: break a+=1 logger.fdebug('issno: ' + str(issno)) int_issnum = (int(issno) * 1000) + ordtot logger.fdebug('intissnum : ' + str(int_issnum)) elif invchk == "true": logger.fdebug('this does not have an issue # that I can parse properly.') int_issnum = 999999999999999 else: logger.error(str(issnum) + 'this has an alpha-numeric in the issue # which I cannot account for.') int_issnum = 999999999999999
def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariation, resultPublished): gcdinfo = {} gcdchoice = [] gcount = 0 i = 0 if vari_loop > 1: resultPublished = "Unknown" if vari_loop == 99: vari_loop = 1 while (i <= vari_loop): if vari_loop > 0: try: boong = comseries['comseries'][i] except IndexError: break resultURL = boong['comseriesID'] ComicID = boong['comicid'] TotalIssues+= int(boong['comseriesIssues']) else: resultURL = resultURL # if we're here - it means it's a mismatched name. # let's pull down the publication date as it'll be blank otherwise inputMIS = 'http://www.comics.org' + str(resultURL) resp = urllib2.urlopen ( inputMIS ) # soup = BeautifulSoup ( resp ) try: soup = BeautifulSoup(urllib2.urlopen(inputMIS)) except UnicodeDecodeError: logger.info("I've detected your system is using: " + sys.stdout.encoding) logger.info("unable to parse properly due to utf-8 problem, ignoring wrong symbols") try: soup = BeautifulSoup(urllib2.urlopen(inputMIS)).decode('utf-8', 'ignore') except UnicodeDecodeError: logger.info("not working...aborting. Tell Evilhero.") return parsed = soup.find("div", {"id" : "series_data"}) subtxt3 = parsed.find("dd", {"id" : "publication_dates"}) resultPublished = subtxt3.findNext(text=True).rstrip() #print ("pubdate:" + str(resultPublished)) coverst = soup.find("div", {"id" : "series_cover"}) if coverst < 0: gcdcover = "None" else: subcoverst = coverst('img',src=True)[0] gcdcover = subcoverst['src'] #print ("resultURL:" + str(resultURL)) #print ("comicID:" + str(ComicID)) input2 = 'http://www.comics.org' + str(resultURL) + 'details/' resp = urllib2.urlopen(input2) soup = BeautifulSoup(resp) #for newer comics, on-sale date has complete date... #for older comics, pub.date is to be used type = soup.find(text=' On-sale date ') if type: #print ("on-sale date detected....adjusting") datetype = "on-sale" else: #print ("pub date defaulting") datetype = "pub" cnt1 = len(soup.findAll("tr", {"class" : "row_even_False"})) cnt2 = len(soup.findAll("tr", {"class" : "row_even_True"})) cnt = int(cnt1 + cnt2) #print (str(cnt) + " Issues in Total (this may be wrong due to alternate prints, etc") n_odd = -1 n_even = -1 n = 0 PI = "1.00" altcount = 0 while ( n < cnt ): if n%2==0: n_odd+=1 parsed = soup.findAll("tr", {"class" : "row_even_False"})[n_odd] ntype = "odd" else: n_even+=1 ntype = "even" parsed = soup.findAll("tr", {"class" : "row_even_True"})[n_even] subtxt3 = parsed.find("a") ParseIssue = subtxt3.findNext(text=True) fid = parsed('a',href=True)[0] resultGID = fid['href'] resultID = resultGID[7:-1] #print ( "ID: " + str(resultID) ) if ',' in ParseIssue: ParseIssue = re.sub("\,", "", ParseIssue) #print ("ParseIssue before : " + str(ParseIssue)) if 'Vol' in ParseIssue or '[' in ParseIssue: ParseIssue = re.sub("[^0-9]", "", ParseIssue) isslen = ParseIssue.find(' ') #if 'isslen' exists, it means that it's an alternative cover. #however, if ONLY alternate covers exist of an issue it won't work. #let's use the FIRST record, and ignore all other covers for the given issue. isschk = ParseIssue[:isslen] #print ("Parse is now: " + str(isschk)) #check if decimal or '1/2' exists or not, and store decimal results halfchk = "no" if '.' in isschk: isschk_find = isschk.find('.') isschk_b4dec = isschk[:isschk_find] isschk_decval = isschk[isschk_find+1:] elif '/' in isschk: ParseIssue = "0.50" isslen = 0 halfchk = "yes" else: isschk_decval = ".00" if isslen > 0: isschk = ParseIssue[:isslen] isschk2 = str(isschk) + isschk_decval #logger.fdebug("isschk: " + str(isschk) + " ...isschk2: " + str(isschk2)) if 'a' in isschk or 'b' in isschk or 'c' in isschk: isschk2 = ParseIssue[:isslen-1] + isschk_decval #altcount == 2 ParseIssue = str(isschk2) #logger.fdebug("Alt.cover found = " + str(isschk2)) if str(PI) == str(isschk2): #logger.fdebug("matched on PI: " + str(PI) + " .. and isschk2: " + str(isschk2)) if altcount == 0: #logger.fdebug("first occurance - marking and continuing..." + str(isschk2)) #this handles the first occurance.. ParseIssue = str(isschk2) PI = str(isschk2) altcount = 1 else: #logger.fdebug("Using only first record for issue - ignoring further alternate matches") ParseIssue = "this is wrong" altcount+=1 else: #logger.fdebug("issues didn't match.") altcount = 1 ParseIssue = str(isschk) + isschk_decval else: if halfchk == "yes": pass else: ParseIssue = ParseIssue + isschk_decval #print ("no alt.cover detected for - " + str(ParseIssue)) altcount = 1 if (altcount == 1): #logger.fdebug("adding issue to db : " + str(ParseIssue)) # in order to get the compare right, let's decimialize the string to '.00'. gcdinfo['ComicIssue'] = ParseIssue #print "Issue: " + str(ParseIssue) #^^ will retrieve issue #if datetype == "on-sale": subtxt1 = parsed('td')[2] ParseDate = subtxt1.findNext(text=True) pdlen = len(ParseDate) #print "sale-date..ParseDate:" + str(ParseDate) #print ("Parsed Date length: " + str(pdlen)) if len(ParseDate) < 7: subtxt3 = parsed('td')[0] ParseDate = subtxt3.findNext(text=True) #print "pub-date..ParseDate:" + str(ParseDate) if ParseDate == ' ': #default to empty so doesn't error out. ParseDate = "0000-00-00" #ParseDate = ParseDate.replace('?','') ParseDate = ParseDate.replace(' ','') #print "Parse date: " + str(ParseDate) gcdinfo['ComicDate'] = ParseDate #^^ will retrieve date # if not any(d.get('GCDIssue', None) == str(gcdinfo['ComicIssue']) for d in gcdchoice): #logger.fdebug("adding: " + str(gcdinfo['ComicIssue'])) if ComicID[:1] == "G": gcdchoice.append({ 'GCDid': ComicID, 'IssueID': resultID, 'GCDIssue': gcdinfo['ComicIssue'], 'GCDDate': gcdinfo['ComicDate'] }) gcount+=1 else: gcdchoice.append({ 'GCDid': ComicID, 'GCDIssue': gcdinfo['ComicIssue'], 'GCDDate': gcdinfo['ComicDate'] }) gcdinfo['gcdchoice'] = gcdchoice else: #--if 2 identical issue numbers legitimately exist, but have different #--publication dates, try to distinguish logger.fdebug("2 identical issue #'s have been found...determining if it's intentional.") #get current issue & publication date. logger.fdebug("Issue #:" + str(gcdinfo['ComicIssue'])) logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate'])) #get conflicting issue from tuple for d in gcdchoice: if str(d['GCDIssue']) == str(gcdinfo['ComicIssue']): logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) ) if str(d['GCDDate']) == str(gcdinfo['ComicDate']): logger.fdebug("Issue #'s and dates match...skipping.") else: logger.fdebug("Issue#'s match but different publication dates, not skipping.") #pass #logger.fdebug("Duplicate issue detected in DB - ignoring subsequent issue # " + str(gcdinfo['ComicIssue'])) PI = ParseIssue #else: # -- this needs a rework -- # if issue only has alternative covers on comics.org, it won't match # and will cause the script to return a cannot retrieve.. #compare previous issue to current issue (to help with alt.cover count) # PI = ParseIssue # altcount+=1 # print ("alternate issue - ignoring") #altcount = 0 n+=1 i+=1 gcdinfo['gcdvariation'] = issvariation if ComicID[:1] == "G": gcdinfo['totalissues'] = gcount else: gcdinfo['totalissues'] = TotalIssues gcdinfo['ComicImage'] = gcdcover gcdinfo['resultPublished'] = resultPublished #print ("gcdvariation: " + str(gcdinfo['gcdvariation'])) return gcdinfo
def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID): comchkchoice = [] comchoice = {} NOWyr = datetime.date.today().year if datetime.date.today().month == 12: NOWyr = NOWyr + 1 logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr)) comicnm = ComicName.encode('utf-8').strip() comicyr = ComicYear comicis = Total comicid = ComicID comicpub = ComicPublisher.encode('utf-8').strip() #print ("...comchk parser initialization...") #print ( "comicname: " + str(comicnm) ) #print ( "comicyear: " + str(comicyr) ) #print ( "comichave: " + str(comicis) ) #print ( "comicpub: " + str(comicpub) ) #print ( "comicid: " + str(comicid) ) # do 3 runs at the comics.org search to get the best results comicrun = [] # &pub_name=DC # have to remove the spaces from Publisher or else will not work (ie. DC Comics vs DC will not match) # take the 1st word ;) #comicpub = comicpub.split()[0] # if it's not one of the BIG publisher's it might fail - so let's increase the odds. pubbiggies = ['DC', 'Marvel', 'Image', 'IDW'] uhuh = "no" for pb in pubbiggies: if pb in comicpub: #keep publisher in url if a biggie. uhuh = "yes" #print (" publisher match : " + str(comicpub)) conv_pub = comicpub.split()[0] #print (" converted publisher to : " + str(conv_pub)) #1st run setup - leave it all as it is. comicrun.append(comicnm) cruncnt = 0 #2nd run setup - remove the last character and do a broad search (keep year or else will blow up) if len(str(comicnm).split()) > 2: comicrun.append(' '.join(comicnm.split(' ')[:-1])) cruncnt+=1 # to increase the likely hood of matches and to get a broader scope... # lets remove extra characters if re.sub('[\.\,\:]', '', comicnm) != comicnm: comicrun.append(re.sub('[\.\,\:]', '', comicnm)) cruncnt+=1 # one more addition - if the title contains a 'the', remove it ;) if comicnm.lower().startswith('the'): comicrun.append(comicnm[4:].strip()) cruncnt+=1 totalcount = 0 cr = 0 #print ("cruncnt is " + str(cruncnt)) while (cr <= cruncnt): #print ("cr is " + str(cr)) comicnm = comicrun[cr] #leaving spaces in will screw up the search...let's take care of it comicnm = re.sub(' ', '+', comicnm) #print ("comicnm: " + str(comicnm)) if uhuh == "yes": publink = "&pub_name=" + str(conv_pub) if uhuh == "no": publink = "&pub_name=" input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&keywords=&order1=series&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31' + '&title=&feature=&job_number=&pages=&script=&pencils=&inks=&colors=&letters=&story_editing=&genre=&characters=&synopsis=&reprint_notes=&story_reprinted=None¬es=' + str(publink) + '&pub_notes=&brand=&brand_notes=&indicia_publisher=&is_surrogate=None&ind_pub_notes=&series=' + str(comicnm) + '&series_year_began=&series_notes=&tracking_notes=&issue_count=&is_comics=None&format=&color=&dimensions=&paper_stock=&binding=&publishing_format=&issues=&volume=&issue_title=&variant_name=&issue_date=&indicia_frequency=&price=&issue_pages=&issue_editing=&isbn=&barcode=&issue_notes=&issue_reprinted=None&is_indexed=None' response = urllib2.urlopen (input) soup = BeautifulSoup (response) cnt1 = len(soup.findAll("tr", {"class": "listing_even"})) cnt2 = len(soup.findAll("tr", {"class": "listing_odd"})) cnt = int(cnt1 + cnt2) # print ("cnt1: " + str(cnt1)) # print ("cnt2: " + str(cnt2)) # print (str(cnt) + " results") resultName = [] resultID = [] resultYear = [] resultIssues = [] resultPublisher = [] resultURL = None n_odd = -1 n_even = -1 n = 0 while (n < cnt): if n%2==0: n_even+=1 resultp = soup.findAll("tr", {"class": "listing_even"})[n_even] else: n_odd+=1 resultp = soup.findAll("tr", {"class": "listing_odd"})[n_odd] rtp = resultp('a')[1] rtpit = rtp.findNext(text=True) rtpthis = rtpit.encode('utf-8').strip() resultName.append(helpers.cleanName(rtpthis)) # print ( "Comic Name: " + str(resultName[n]) ) pub = resultp('a')[0] pubit = pub.findNext(text=True) # pubthis = u' '.join(pubit).encode('utf-8').strip() pubthis = pubit.encode('utf-8').strip() resultPublisher.append(pubthis) # print ( "Publisher: " + str(resultPublisher[n]) ) fip = resultp('a', href=True)[1] resultID.append(fip['href']) # print ( "ID: " + str(resultID[n]) ) subtxt3 = resultp('td')[3] resultYear.append(subtxt3.findNext(text=True)) resultYear[n] = resultYear[n].replace(' ', '') subtxt4 = resultp('td')[4] resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True))) resiss = resultIssues[n].find('issue') resiss = int(resiss) resultIssues[n] = resultIssues[n].replace('', '')[:resiss] resultIssues[n] = resultIssues[n].replace(' ', '') # print ( "Year: " + str(resultYear[n]) ) # print ( "Issues: " + str(resultIssues[n]) ) # print ("comchkchoice: " + str(comchkchoice)) if not any(d.get('GCDID', None) == str(resultID[n]) for d in comchkchoice): #print ( str(resultID[n]) + " not in DB...adding.") comchkchoice.append({ "ComicID": str(comicid), "ComicName": resultName[n], "GCDID": str(resultID[n]).split('/')[2], "ComicYear": str(resultYear[n]), "ComicPublisher": resultPublisher[n], "ComicURL": "http://www.comics.org" + str(resultID[n]), "ComicIssues": str(resultIssues[n]) }) #else: #print ( str(resultID[n]) + " already in DB...skipping" ) n+=1 cr+=1 totalcount= totalcount + cnt comchoice['comchkchoice'] = comchkchoice return comchoice, totalcount
def GCDScraper(ComicName, ComicYear, Total, ComicID, quickmatch=None): NOWyr = datetime.date.today().year if datetime.date.today().month == 12: NOWyr = NOWyr + 1 logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr)) comicnm = ComicName comicyr = ComicYear comicis = Total comicid = ComicID #print ( "comicname: " + str(comicnm) ) #print ( "comicyear: " + str(comicyr) ) #print ( "comichave: " + str(comicis) ) #print ( "comicid: " + str(comicid) ) comicnm = re.sub(' ', '+', comicnm) input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31&series=' + str(comicnm) + '&is_indexed=None' response = urllib2.urlopen ( input ) soup = BeautifulSoup ( response) cnt1 = len(soup.findAll("tr", {"class" : "listing_even"})) cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"})) cnt = int(cnt1 + cnt2) #print (str(cnt) + " results") resultName = [] resultID = [] resultYear = [] resultIssues = [] resultURL = None n_odd = -1 n_even = -1 n = 0 while ( n < cnt ): if n%2==0: n_even+=1 resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even] else: n_odd+=1 resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd] rtp = resultp('a')[1] resultName.append(helpers.cleanName(rtp.findNext(text=True))) #print ( "Comic Name: " + str(resultName[n]) ) fip = resultp('a',href=True)[1] resultID.append(fip['href']) #print ( "ID: " + str(resultID[n]) ) subtxt3 = resultp('td')[3] resultYear.append(subtxt3.findNext(text=True)) resultYear[n] = resultYear[n].replace(' ','') subtxt4 = resultp('td')[4] resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True))) resiss = resultIssues[n].find('issue') resiss = int(resiss) resultIssues[n] = resultIssues[n].replace('','')[:resiss] resultIssues[n] = resultIssues[n].replace(' ','') #print ( "Year: " + str(resultYear[n]) ) #print ( "Issues: " + str(resultIssues[n]) ) CleanComicName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', comicnm) CleanComicName = re.sub(' ', '', CleanComicName).lower() CleanResultName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', resultName[n]) CleanResultName = re.sub(' ', '', CleanResultName).lower() #print ("CleanComicName: " + str(CleanComicName)) #print ("CleanResultName: " + str(CleanResultName)) if CleanResultName == CleanComicName or CleanResultName[3:] == CleanComicName: #if resultName[n].lower() == helpers.cleanName(str(ComicName)).lower(): #print ("n:" + str(n) + "...matched by name to Mylar!") #this has been seen in a few instances already, so trying to adjust. #when the series year is 2011, in gcd it might be 2012 due to publication #dates overlapping between Dec/11 and Jan/12. Let's accept a match with a #1 year grace space, and then pull in the first issue to see the actual pub # date and if coincides with the other date..match it. if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1): #print ("n:" + str(n) + "...matched by year to Mylar!") #print ( "Year: " + str(resultYear[n]) ) #Occasionally there are discrepancies in comic count between #GCD and CV. 99% it's CV not updating to the newest issue as fast #as GCD does. Therefore, let's increase the CV count by 1 to get it #to match, any more variation could cause incorrect matching. #ie. witchblade on GCD says 159 issues, CV states 161. if int(resultIssues[n]) == int(Total) or int(resultIssues[n]) == int(Total)+1 or (int(resultIssues[n])+1) == int(Total): #print ("initial issue match..continuing.") if int(resultIssues[n]) == int(Total)+1: issvariation = "cv" elif int(resultIssues[n])+1 == int(Total): issvariation = "gcd" else: issvariation = "no" #print ("n:" + str(n) + "...matched by issues to Mylar!") #print ("complete match!...proceeding") TotalIssues = resultIssues[n] resultURL = str(resultID[n]) rptxt = resultp('td')[6] resultPublished = rptxt.findNext(text=True) #print ("Series Published: " + str(resultPublished)) break n+=1 # it's possible that comicvine would return a comic name incorrectly, or gcd # has the wrong title and won't match 100%... # (ie. The Flash-2011 on comicvine is Flash-2011 on gcd) # this section is to account for variations in spelling, punctuation, etc/ basnumbs = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12} if resultURL is None: #search for number as text, and change to numeric for numbs in basnumbs: #print ("numbs:" + str(numbs)) if numbs in ComicName.lower(): numconv = basnumbs[numbs] #print ("numconv: " + str(numconv)) ComicNm = re.sub(str(numbs), str(numconv), ComicName.lower()) #print ("comicname-reVISED:" + str(ComicNm)) return GCDScraper(ComicNm, ComicYear, Total, ComicID) break if ComicName.lower().startswith('the '): ComicName = ComicName[4:] return GCDScraper(ComicName, ComicYear, Total, ComicID) if ':' in ComicName: ComicName = re.sub(':', '', ComicName) return GCDScraper(ComicName, ComicYear, Total, ComicID) if '-' in ComicName: ComicName = re.sub('-', ' ', ComicName) return GCDScraper(ComicName, ComicYear, Total, ComicID) if 'and' in ComicName.lower(): ComicName = ComicName.replace('and', '&') return GCDScraper(ComicName, ComicYear, Total, ComicID) if not quickmatch: return 'No Match' #vari_loop = 0 if quickmatch == "yes": if resultURL is None: return 'No Match' else: return 'Match' return GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=ComicID, TotalIssues=TotalIssues, issvariation=issvariation, resultPublished=resultPublished)
def GCDAdd(gcdcomicid): serieschoice = [] series = {} logger.fdebug("I'm trying to find these GCD comicid's:" + str(gcdcomicid)) for gcdid in gcdcomicid: logger.fdebug("looking at gcdid:" + str(gcdid)) input2 = 'http://www.comics.org/series/' + str(gcdid) logger.fdebug("---url: " + str(input2)) resp = urllib2.urlopen ( input2 ) soup = BeautifulSoup ( resp ) logger.fdebug("SeriesName section...") parsen = soup.find("span", {"id" : "series_name"}) #logger.fdebug("series name (UNPARSED): " + str(parsen)) subpar = parsen('a')[0] resultName = subpar.findNext(text=True) logger.fdebug("ComicName: " + str(resultName)) #covers-start logger.fdebug("Covers section...") coverst = soup.find("div", {"id" : "series_cover"}) if coverst < 0: gcdcover = "None" logger.fdebug("unable to find any covers - setting to None") else: subcoverst = coverst('img',src=True)[0] #logger.fdebug("cover (UNPARSED) : " + str(subcoverst)) gcdcover = subcoverst['src'] logger.fdebug("Cover: " + str(gcdcover)) #covers end #publisher start logger.fdebug("Publisher section...") try: pubst = soup.find("div", {"class" : "item_data"}) catchit = pubst('a')[0] except (IndexError, TypeError): pubst = soup.findAll("div", {"class" : "left"})[1] catchit = pubst.find("a") publisher = catchit.findNext(text=True) logger.fdebug("Publisher: " + str(publisher)) #publisher end parsed = soup.find("div", {"id" : "series_data"}) #logger.fdebug("series_data: " + str(parsed)) #print ("parse:" + str(parsed)) subtxt3 = parsed.find("dd", {"id" : "publication_dates"}) #logger.fdebug("publication_dates: " + str(subtxt3)) pubdate = subtxt3.findNext(text=True).rstrip() logger.fdebug("pubdate:" + str(pubdate)) subtxt4 = parsed.find("dd", {"id" : "issues_published"}) noiss = subtxt4.findNext(text=True) lenwho = len(noiss) lent = noiss.find(' ',2) lenf = noiss.find('(') stringit = noiss[lenf:lenwho] stringout = noiss[:lent] noissues = stringout.rstrip(' \t\r\n\0') numbering = stringit.rstrip(' \t\r\n\0') logger.fdebug("noissues:" + str(noissues)) logger.fdebug("numbering:" + str(numbering)) serieschoice.append({ "ComicID": gcdid, "ComicName": resultName, "ComicYear" : pubdate, "ComicIssues" : noissues, "ComicPublisher" : publisher, "ComicCover" : gcdcover }) series['serieschoice'] = serieschoice return series
def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID): comchkchoice = [] comchoice = {} NOWyr = datetime.date.today().year if datetime.date.today().month == 12: NOWyr = NOWyr + 1 logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr)) comicnm = ComicName comicyr = ComicYear comicis = Total comicid = ComicID comicpub = ComicPublisher #print ( "comicname: " + str(comicnm) ) #print ( "comicyear: " + str(comicyr) ) #print ( "comichave: " + str(comicis) ) #print ( "comicpub: " + str(comicpub) ) #print ( "comicid: " + str(comicid) ) # do 3 runs at the comics.org search to get the best results comicrun = [] # &pub_name=DC # have to remove the spaces from Publisher or else will not work (ie. DC Comics vs DC will not match) # take the 1st word ;) #comicpub = comicpub.split()[0] # if it's not one of the BIG publisher's it might fail - so let's increase the odds. pubbiggies = [ 'DC', 'Marvel', 'Image', 'IDW' ] uhuh = "no" for pb in pubbiggies: if pb in comicpub: #keep publisher in url if a biggie. uhuh = "yes" #print (" publisher match : " + str(comicpub)) conv_pub = comicpub.split()[0] #print (" converted publisher to : " + str(conv_pub)) #1st run setup - leave it all as it is. comicrun.append(comicnm) cruncnt = 0 #2nd run setup - remove the last character and do a broad search (keep year or else will blow up) if len(str(comicnm).split()) > 2: comicrun.append(' '.join(comicnm.split(' ')[:-1])) cruncnt+=1 # to increase the likely hood of matches and to get a broader scope... # lets remove extra characters if re.sub('[\.\,\:]', '', comicnm) != comicnm: comicrun.append(re.sub('[\.\,\:]', '', comicnm)) cruncnt+=1 totalcount = 0 cr = 0 #print ("cruncnt is " + str(cruncnt)) while (cr <= cruncnt): #print ("cr is " + str(cr)) comicnm = comicrun[cr] #leaving spaces in will screw up the search...let's take care of it comicnm = re.sub(' ', '+', comicnm) #print ("comicnm: " + str(comicnm)) if uhuh == "yes": publink = "&pub_name=" + str(conv_pub) if uhuh == "no": publink = "&pub_name=" input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&keywords=&order1=series&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31' + '&title=&feature=&job_number=&pages=&script=&pencils=&inks=&colors=&letters=&story_editing=&genre=&characters=&synopsis=&reprint_notes=&story_reprinted=None¬es=' + str(publink) + '&pub_notes=&brand=&brand_notes=&indicia_publisher=&is_surrogate=None&ind_pub_notes=&series=' + str(comicnm) + '&series_year_began=&series_notes=&tracking_notes=&issue_count=&is_comics=None&format=&color=&dimensions=&paper_stock=&binding=&publishing_format=&issues=&volume=&issue_title=&variant_name=&issue_date=&indicia_frequency=&price=&issue_pages=&issue_editing=&isbn=&barcode=&issue_notes=&issue_reprinted=None&is_indexed=None' response = urllib2.urlopen ( input ) soup = BeautifulSoup ( response) cnt1 = len(soup.findAll("tr", {"class" : "listing_even"})) cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"})) cnt = int(cnt1 + cnt2) # print ("cnt1: " + str(cnt1)) # print ("cnt2: " + str(cnt2)) # print (str(cnt) + " results") resultName = [] resultID = [] resultYear = [] resultIssues = [] resultPublisher = [] resultURL = None n_odd = -1 n_even = -1 n = 0 while ( n < cnt ): if n%2==0: n_even+=1 resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even] else: n_odd+=1 resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd] rtp = resultp('a')[1] resultName.append(helpers.cleanName(rtp.findNext(text=True))) # print ( "Comic Name: " + str(resultName[n]) ) pub = resultp('a')[0] resultPublisher.append(pub.findNext(text=True)) # print ( "Publisher: " + str(resultPublisher[n]) ) fip = resultp('a',href=True)[1] resultID.append(fip['href']) # print ( "ID: " + str(resultID[n]) ) subtxt3 = resultp('td')[3] resultYear.append(subtxt3.findNext(text=True)) resultYear[n] = resultYear[n].replace(' ','') subtxt4 = resultp('td')[4] resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True))) resiss = resultIssues[n].find('issue') resiss = int(resiss) resultIssues[n] = resultIssues[n].replace('','')[:resiss] resultIssues[n] = resultIssues[n].replace(' ','') # print ( "Year: " + str(resultYear[n]) ) # print ( "Issues: " + str(resultIssues[n]) ) # print ("comchkchoice: " + str(comchkchoice)) if not any(d.get('GCDID', None) == str(resultID[n]) for d in comchkchoice): #print ( str(resultID[n]) + " not in DB...adding.") comchkchoice.append({ "ComicID": str(comicid), "ComicName": str(resultName[n]), "GCDID": str(resultID[n]).split('/')[2], "ComicYear" : str(resultYear[n]), "ComicPublisher" : str(resultPublisher[n]), "ComicURL" : "http://www.comics.org" + str(resultID[n]), "ComicIssues" : str(resultIssues[n]) }) #else: #print ( str(resultID[n]) + " already in DB...skipping" ) n+=1 cr+=1 totalcount= totalcount + cnt comchoice['comchkchoice'] = comchkchoice return comchoice, totalcount
def processor(self, nzbinfo): nzbid = nzbinfo['NZBID'] try: logger.fdebug( 'Now checking the active queue of nzbget for the download') queueinfo = self.server.listgroups() except Exception as e: logger.warn( 'Error attempting to retrieve active queue listing: %s' % e) return {'status': False} else: logger.fdebug('valid queue result returned. Analyzing...') queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid] if len(queuedl) == 0: logger.warn( 'Unable to locate NZBID %s in active queue. Could it be finished already ?' % nzbid) return self.historycheck(nzbinfo) stat = False double_pp = False double_type = None while stat is False: time.sleep(10) queueinfo = self.server.listgroups() queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid] if len(queuedl) == 0: logger.fdebug( 'Item is no longer in active queue. It should be finished by my calculations' ) stat = True else: if 'comicrn' in queuedl[0]['PostInfoText'].lower(): double_pp = True double_type = 'ComicRN' elif 'nzbtomylar' in queuedl[0]['PostInfoText'].lower(): double_pp = True double_type = 'nzbToMylar' if all([ len(queuedl[0]['ScriptStatuses']) > 0, double_pp is False ]): for x in queuedl[0]['ScriptStatuses']: if 'comicrn' in x['Name'].lower(): double_pp = True double_type = 'ComicRN' break elif 'nzbtomylar' in x['Name'].lower(): double_pp = True double_type = 'nzbToMylar' break if all([ len(queuedl[0]['Parameters']) > 0, double_pp is False ]): for x in queuedl[0]['Parameters']: if all([ 'comicrn' in x['Name'].lower(), x['Value'] == 'yes' ]): double_pp = True double_type = 'ComicRN' break elif all([ 'nzbtomylar' in x['Name'].lower(), x['Value'] == 'yes' ]): double_pp = True double_type = 'nzbToMylar' break if double_pp is True: logger.warn( '%s has been detected as being active for this category & download. Completed Download Handling will NOT be performed due to this.' % double_type) logger.warn( 'Either disable Completed Download Handling for NZBGet within Mylar, or remove %s from your category script in NZBGet.' % double_type) return {'status': 'double-pp', 'failed': False} logger.fdebug('status: %s' % queuedl[0]['Status']) logger.fdebug('name: %s' % queuedl[0]['NZBName']) logger.fdebug('FileSize: %sMB' % queuedl[0]['FileSizeMB']) logger.fdebug('Download Left: %sMB' % queuedl[0]['RemainingSizeMB']) logger.fdebug('health: %s' % (queuedl[0]['Health'] / 10)) logger.fdebug('destination: %s' % queuedl[0]['DestDir']) logger.fdebug('File has now downloaded!') time.sleep( 5 ) #wait some seconds so shit can get written to history properly return self.historycheck(nzbinfo)
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix): #searchName = "Uncanny Avengers" #searchIssue = "01" #searchYear = "2012" #clean up searchName due to webparse. searchName = searchName.replace("%20", " ") if "," in searchName: searchName = searchName.replace(",", "") logger.fdebug("name:" + str(searchName)) logger.fdebug("issue:" + str(searchIssue)) logger.fdebug("year:" + str(searchYear)) splitSearch = searchName.split(" ") joinSearch = "+".join(splitSearch)+"+"+searchIssue searchIsOne = "0"+searchIssue searchIsTwo = "00"+searchIssue if "-" in searchName: searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?') regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?') #logger.fdebug('searchName:' + searchName) #logger.fdebug('regexName:' + regexName) if mylar.USE_MINSIZE: size_constraints = "minsize=" + str(mylar.MINSIZE) else: size_constraints = "minsize=10" if mylar.USE_MAXSIZE: size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE) if mylar.USENET_RETENTION != None: max_age = "&age=" + str(mylar.USENET_RETENTION) feeds = [] feeds.append(feedparser.parse("http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" + str(size_constraints) + str(max_age) + "&dq=%s&max=50&more=1" %joinSearch)) if mylar.ALTEXPERIMENTAL: feeds.append(feedparser.parse("http://nzbindex.nl/rss/?dq=%s&g[]=41&g[]=510&sort=agedesc&hidespam=0&max=&more=1" %joinSearch)) entries = [] mres = {} tallycount = 0 for feed in feeds: totNum = len(feed.entries) tallycount += len(feed.entries) keyPair = {} regList = [] countUp = 0 logger.fdebug(str(totNum) + " results") while countUp < totNum: urlParse = feed.entries[countUp].enclosures[0] #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link keyPair[feed.entries[countUp].title] = urlParse["href"] countUp=countUp+1 # thanks to SpammyHagar for spending the time in compiling these regEx's! regExTest="" regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear) regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear) #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)+1) regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)-1) regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)+1) regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)-1) regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive] except_list=['releases', 'gold line', 'distribution', '0-day', '0 day'] for title, link in keyPair.items(): #logger.fdebug("titlesplit: " + str(title.split("\""))) splitTitle = title.split("\"") noYear = 'False' for subs in splitTitle: logger.fdebug(subs) regExCount = 0 if len(subs) > 10 and not any(d in subs.lower() for d in except_list): #Looping through dictionary to run each regEx - length + regex is determined by regexList up top. # while regExCount < len(regexList): # regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE) # regExCount = regExCount +1 # if regExTest: # logger.fdebug(title) # entries.append({ # 'title': subs, # 'link': str(link) # }) if IssDateFix != "no": if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1) else: ComicYearFix = str(int(searchYear) + 1) else: ComicYearFix = searchYear if searchYear not in subs and ComicYearFix not in subs: noYear = 'True' noYearline = subs if (searchYear in subs or ComicYearFix in subs) and noYear == 'True': #this would occur on the next check in the line, if year exists and #the noYear check in the first check came back valid append it subs = noYearline + ' (' + searchYear + ')' noYear = 'False' if noYear == 'False': entries.append({ 'title': subs, 'link': str(link) }) break # break out so we don't write more shit. # if len(entries) >= 1: if tallycount >= 1: mres['entries'] = entries return mres # print("Title: "+regList[0]) # print("Link: "+keyPair[regList[0]]) else: logger.fdebug("No Results Found") return "no results"
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix): #searchName = "Uncanny Avengers" #searchIssue = "01" #searchYear = "2012" #clean up searchName due to webparse. searchName = searchName.replace("%20", " ") if "," in searchName: searchName = searchName.replace(",", "") logger.fdebug("name:" + str(searchName)) logger.fdebug("issue:" + str(searchIssue)) logger.fdebug("year:" + str(searchYear)) splitSearch = searchName.split(" ") joinSearch = "+".join(splitSearch) + "+" + searchIssue searchIsOne = "0" + searchIssue searchIsTwo = "00" + searchIssue if "-" in searchName: searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?') regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?') #logger.fdebug('searchName:' + searchName) #logger.fdebug('regexName:' + regexName) if mylar.USE_MINSIZE: size_constraints = "minsize=" + str(mylar.MINSIZE) else: size_constraints = "minsize=10" if mylar.USE_MAXSIZE: size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE) if mylar.USENET_RETENTION != None: max_age = "&age=" + str(mylar.USENET_RETENTION) feeds = [] feeds.append( feedparser.parse( "http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" + str(size_constraints) + str(max_age) + "&dq=%s&max=50&more=1" % joinSearch)) if mylar.ALTEXPERIMENTAL: feeds.append( feedparser.parse( "http://nzbindex.nl/rss/?dq=%s&g[]=41&g[]=510&sort=agedesc&hidespam=0&max=&more=1" % joinSearch)) entries = [] mres = {} tallycount = 0 for feed in feeds: totNum = len(feed.entries) tallycount += len(feed.entries) #keyPair = {} keyPair = [] regList = [] countUp = 0 logger.fdebug(str(totNum) + " results") while countUp < totNum: urlParse = feed.entries[countUp].enclosures[0] #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link #keyPair[feed.entries[countUp].title] = urlParse["href"] keyPair.append({ "title": feed.entries[countUp].title, "link": urlParse["href"], "length": urlParse["length"], "pubdate": feed.entries[countUp].updated }) countUp = countUp + 1 # thanks to SpammyHagar for spending the time in compiling these regEx's! regExTest = "" regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, searchYear) regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, searchYear) #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue, int(searchYear) + 1) regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) - 1) regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) + 1) regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % ( regexName, searchIssue, int(searchYear) - 1) regexList = [ regEx, regExOne, regExTwo, regExThree, regExFour, regExFive ] except_list = [ 'releases', 'gold line', 'distribution', '0-day', '0 day' ] for entry in keyPair: title = entry['title'] #logger.fdebug("titlesplit: " + str(title.split("\""))) splitTitle = title.split("\"") noYear = 'False' for subs in splitTitle: #logger.fdebug('sub:' + subs) regExCount = 0 if len(subs) > 10 and not any(d in subs.lower() for d in except_list): #Looping through dictionary to run each regEx - length + regex is determined by regexList up top. # while regExCount < len(regexList): # regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE) # regExCount = regExCount +1 # if regExTest: # logger.fdebug(title) # entries.append({ # 'title': subs, # 'link': str(link) # }) if IssDateFix != "no": if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1) else: ComicYearFix = str(int(searchYear) + 1) else: ComicYearFix = searchYear if searchYear not in subs and ComicYearFix not in subs: noYear = 'True' noYearline = subs if (searchYear in subs or ComicYearFix in subs) and noYear == 'True': #this would occur on the next check in the line, if year exists and #the noYear check in the first check came back valid append it subs = noYearline + ' (' + searchYear + ')' noYear = 'False' if noYear == 'False': entries.append({ 'title': subs, 'link': entry['link'], 'pubdate': entry['pubdate'], 'length': entry['length'] }) break # break out so we don't write more shit. # if len(entries) >= 1: if tallycount >= 1: mres['entries'] = entries return mres # print("Title: "+regList[0]) # print("Link: "+keyPair[regList[0]]) else: logger.fdebug("No Results Found") return "no results"
def getComic(comicid, type, issueid=None, arc=None, arcid=None, arclist=None, comicidlist=None): if type == 'issue': offset = 1 issue = {} ndic = [] issuechoice = [] comicResults = [] firstdate = '2099-00-00' #let's find out how many results we get from the query... if comicid is None: #if comicid is None, it's coming from the story arc search results. id = arcid #since the arclist holds the issueids, and the pertinent reading order - we need to strip out the reading order so this works. aclist = '' if arclist.startswith('M'): islist = arclist[1:] else: for ac in arclist.split('|'): aclist += ac[:ac.find(',')] + '|' if aclist.endswith('|'): aclist = aclist[:-1] islist = aclist else: id = comicid islist = None searched = pulldetails(id, 'issue', None, 0, islist) if searched is None: return False totalResults = searched.getElementsByTagName( 'number_of_total_results')[0].firstChild.wholeText logger.fdebug("there are " + str(totalResults) + " search results...") if not totalResults: return False countResults = 0 while (countResults < int(totalResults)): logger.fdebug("querying range from " + str(countResults) + " to " + str(countResults + 100)) if countResults > 0: #new api - have to change to page # instead of offset count offsetcount = countResults searched = pulldetails(id, 'issue', None, offsetcount, islist) issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid) if tmpdate < firstdate: firstdate = tmpdate ndic = ndic + issuechoice #search results are limited to 100 and by pagination now...let's account for this. countResults = countResults + 100 issue['issuechoice'] = ndic issue['firstdate'] = firstdate return issue elif type == 'comic': dom = pulldetails(comicid, 'comic', None, 1) return GetComicInfo(comicid, dom) elif type == 'firstissue': dom = pulldetails(comicid, 'firstissue', issueid, 1) return GetFirstIssue(issueid, dom) elif type == 'storyarc': dom = pulldetails(arc, 'storyarc', None, 1) return GetComicInfo(issueid, dom) elif type == 'comicyears': #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year & volume (hopefully). #this grabs each issue based on issueid, and then subsets the comicid for each to be used later. #set the offset to 0, since we're doing a filter. dom = pulldetails(arcid, 'comicyears', offset=0, comicidlist=comicidlist) return GetSeriesYears(dom) elif type == 'import': #used by the importer when doing a scan with metatagging enabled. If metatagging comes back true, then there's an IssueID present #within the tagging (with CT). This compiles all of the IssueID's during a scan (in 100's), and returns the corresponding CV data #related to the given IssueID's - namely ComicID, Name, Volume (more at some point, but those are the important ones). offset = 1 id_count = 0 import_list = [] logger.fdebug('comicidlist:' + str(comicidlist)) while id_count < len(comicidlist): #break it up by 100 per api hit #do the first 100 regardless in_cnt = 0 if id_count + 100 <= len(comicidlist): endcnt = id_count + 100 else: endcnt = len(comicidlist) for i in range(id_count, endcnt): if in_cnt == 0: tmpidlist = str(comicidlist[i]) else: tmpidlist += '|' + str(comicidlist[i]) in_cnt += 1 logger.info('tmpidlist: ' + str(tmpidlist)) searched = pulldetails(None, 'import', offset=0, comicidlist=tmpidlist) if searched is None: break else: tGIL = GetImportList(searched) import_list += tGIL id_count += 100 return import_list elif type == 'update_dates': dom = pulldetails(None, 'update_dates', offset=1, comicidlist=comicidlist) return UpdateDates(dom)