def RSS_Search(ComicName, IssueNumber): #this all needs to be REDONE...# loopd = int(w-1) ssab = [] ssabcount = 0 print ("--------RSS MATCHING-----------------") for entry in d['entries']: # test for comic name here print loopd, entry['title'] #print kc[loopd] #while (loopd > -1): # if str(kc[loopd]).lower() in str(entry['title'].lower()): #print entry['title'] # more precision - let's see if it's a hit on issue as well # Experimental process # since we're comparing the watchlist titles to the rss feed (for more robust matching) # the results will be 2nd/3rd variants, MR's, and comics on the watchlist but not necessarily 'NEW' rele$ # let's first compare watchlist to release list incloop = int (tot -1) while (incloop > -1): #print ("Comparing " + str(entry['title']) + " - for - " + str(watchfnd[incloop])) cleantitle = helpers.cleanName(entry['title']) if str(watchfnd[incloop]).lower() in str(cleantitle).lower(): #print ("MATCHED - " + str(watchfnd[incloop]).lower()) if str(watchfndextra[incloop]).lower() is not None: if str(watchfndextra[incloop]).lower() not in str(cleantitle).lower(): #print ("no extra matching - not a match") #print (watchfndextra[incloop].lower()) break # now we have a match on watchlist and on release list, let's check if the issue is the same # on the feed and the releaselist # we have to remove the # sign from the ki[array] field first ki[incloop] = re.sub("\D", "", str(ki[incloop])) if str(ki[incloop]) in str(cleantitle): print ("MATCH FOR DOWNLOAD!!\n WATCHLIST: " + str(watchfnd[incloop]) + "\n RLSLIST: " + str(kc[incloop]) + " ISSUE# " + str(ki[incloop]) + "\n RSS: " + str(cleantitle)) #let's do the DOWNLOAD and send to SABnzbd #this is for nzb.su - API LIMIT :( linkstart = os.path.splitext(entry['link'])[0] #following is JUST for nzb.su if nzbprov == 'nzb.su': linkit = os.path.splitext(entry['link'])[1] linkit = linkit.replace("&", "%26") thislink = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") thislink = str(linkstart) tmp = "http://192.168.2.2:8085/api?mode=addurl&name=" + str(thislink) + "&pp=3&cat=comics&apikey=" + str(SABAPI) print tmp ssab.append(str(watchfnd[incloop])) ssabcount+=1 urllib.urlopen(tmp); # time.sleep(5) incloop-=1 # - End of Experimental Process #break #loopd-=1 print ("snatched " + str(ssabcount) + " out of " + str(tot) + " comics via rss...") return ssabcount
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr): logger.info( u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " using " + str(nzbprov) ) if nzbprov == "nzb.su": apikey = mylar.NZBSU_APIKEY elif nzbprov == "dognzb": apikey = mylar.DOGNZB_APIKEY elif nzbprov == "experimental": apikey = "none" # print ("-------------------------") if mylar.PREFERRED_QUALITY == 0: filetype = "" elif mylar.PREFERRED_QUALITY == 1: filetype = ".cbr" elif mylar.PREFERRED_QUALITY == 2: filetype = ".cbz" # figure out what was missed via rss feeds and do a manual search via api # tsc = int(tot-1) findcomic = [] findcomiciss = [] findcount = 0 ci = "" comsearch = [] isssearch = [] comyear = str(ComicYear) # print ("-------SEARCH FOR MISSING------------------") findcomic.append(str(ComicName)) IssueNumber = str(re.sub("\.00", "", str(IssueNumber))) # print ("issueNumber" + str(IssueNumber)) findcomiciss.append(str(re.sub("\D", "", str(IssueNumber)))) # print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm = re.sub(" ", "%20", str(findcomic[findcount])) # print (cmi) if len(str(findcomiciss[findcount])) == 1: cmloopit = 3 elif len(str(findcomiciss[findcount])) == 2: cmloopit = 2 else: cmloopit = 1 isssearch.append(str(findcomiciss[findcount])) comsearch.append(cm) findcount += 1 # ---- # print ("------RESULTS OF SEARCH-------------------") findloop = 0 foundcomic = [] # ---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. while findloop < (findcount): comsrc = comsearch[findloop] # print (str(comsearch[findloop])) while cmloopit >= 1: # here we account for issue pattern variations if cmloopit == 3: comsearch[findloop] = comsrc + "%2000" + isssearch[findloop] + "%20" + str(filetype) # print (comsearch[findloop]) elif cmloopit == 2: comsearch[findloop] = comsrc + "%200" + isssearch[findloop] + "%20" + str(filetype) # print (comsearch[findloop]) elif cmloopit == 1: comsearch[findloop] = comsrc + "%20" + isssearch[findloop] + "%20" + str(filetype) # print (comsearch[findloop]) # print ("NZB Provider set to: " + nzbprov) if nzbprov != "experimental": if nzbprov == "dognzb": # print ("dog-search.") findurl = ( "http://dognzb.cr/api?t=search&apikey=" + str(apikey) + "&q=" + str(comsearch[findloop]) + "&o=xml&cat=7030" ) elif nzbprov == "nzb.su": # print ("nzb.su search") findurl = ( "http://nzb.su/api?t=search&q=" + str(comsearch[findloop]) + "&apikey=" + str(apikey) + "&o=xml&cat=7030" ) bb = feedparser.parse(findurl) # print (findurl) elif nzbprov == "experimental": # print ("experimental raw search") bb = parseit.MysterBinScrape(comsearch[findloop], comyear) done = False foundc = "no" if bb == "no results": # print ("no results found...attempting alternate search") pass elif len(bb["entries"]) == 0: # print ("Nothing found for : " + str(findcomic[findloop]) + " Issue: #" + str(findcomiciss[findloop])) foundc = "no" else: # print ("Found for: " + str(findcomic[findloop])) for entry in bb["entries"]: # print str(entry['title']) cleantitle = helpers.cleanName(str(entry["title"])) if done: break # print ("title: " + str(cleantitle)) # print ("link: " + entry['link']) # let's narrow search down - take out year (2010), (2011), etc # let's check for first occurance of '(' as generally indicates # that the 'title' has ended ripperlist = ["digital-", "empire", "dcp"] # this takes care of the brackets :) # m = re.findall(r"\((\w+)\)", cleantitle) m = re.findall("[^()]+", cleantitle) lenm = len(m) # print ("there are " + str(lenm) + " words.") cnt = 0 while cnt < lenm: if m[cnt] is None: break # if m[cnt] == ' ': print ("space detected") # print (str(cnt) + ". Bracket Word: " + m[cnt] ) if cnt == 0: comic_andiss = m[cnt] print ("Comic:" + str(comic_andiss)) if m[cnt][:-2] == "19" or m[cnt][:-2] == "20": print ("year detected!") result_comyear = m[cnt] if str(comyear) in result_comyear: print (str(comyear) + " - right - years match baby!") yearmatch = "true" else: print (str(comyear) + " - not right - years don't match ") yearmatch = "false" if "digital" in m[cnt] and len(m[cnt]) == 7: pass # print ("digital edition") if " of " in m[cnt]: # print ("mini-series detected : " + str(m[cnt])) result_of = m[cnt] if "cover" in m[cnt]: # print ("covers detected") result_comcovers = m[cnt] for ripper in ripperlist: if ripper in m[cnt]: # print ("Scanner detected:" + str(m[cnt])) result_comscanner = m[cnt] cnt += 1 if yearmatch == "false": break splitit = [] watchcomic_split = [] comic_iss = re.sub("[\-\:\,]", "", str(comic_andiss)) splitit = comic_iss.split(None) watchcomic_split = findcomic[findloop].split(None) bmm = re.findall("v\d", comic_iss) # print ("vers - " + str(bmm)) if len(bmm) > 0: splitst = len(splitit) - 2 else: splitst = len(splitit) - 1 if (splitst) != len(watchcomic_split): print ("incorrect comic lengths...not a match") if str(splitit[0]).lower() == "the": print ("THE word detected...attempting to adjust pattern matching") splitit[0] = splitit[4:] else: print ("length match..proceeding") n = 0 scount = 0 # print ("length:" + str(len(splitit))) while n <= len(splitit) - 1: if n < len(splitit) - 1: # print ( str(n) + ". Comparing: " + watchcomic_split[n] + " .to. " + splitit[n] ) if str(watchcomic_split[n].lower()) in str(splitit[n].lower()): # print ("word matched on : " + splitit[n]) scount += 1 # elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif len(splitit[n]) < 3 or (splitit[n][1:]) == "v": # print ("possible verisoning..checking") # we hit a versioning # - account for it if splitit[n][2:].isdigit(): comicversion = str(splitit[n]) # print ("version found:" + str(comicversion)) else: if splitit[n].isdigit(): print ("issue detected") comiss = splitit[n] comicNAMER = n - 1 comNAME = splitit[0] cmnam = 1 while cmnam < comicNAMER: comNAME = str(comNAME) + " " + str(splitit[cmnam]) cmnam += 1 # print ("comic: " + str(comNAME)) else: # print ("non-match for: " + splitit[n]) pass n += 1 spercent = (scount / int(len(splitit))) * 100 # print (str(spercent) + "% match") # if spercent >= 75: print ("it's a go captain...") # if spercent < 75: print ("failure - we only got " + str(spercent) + "% right!") print ("this should be a match!") # issue comparison now as well if int(findcomiciss[findloop]) == int(comiss): print ("issues match!") ## -- inherit issue. Comic year is non-standard. nzb year is the year ## -- comic was printed, not the start year of the comic series and ## -- thus the deciding component if matches are correct or not linkstart = os.path.splitext(entry["link"])[0] # following is JUST for nzb.su if nzbprov == "nzb.su": linkit = os.path.splitext(entry["link"])[1] # print ("linkit: " + str(linkit)) linkit = linkit.replace("&", "%26") linkapi = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") linkapi = str(linkstart) # here we distinguish between rename and not. # blackhole functinality--- # let's download the file to a temporary cache. if mylar.BLACKHOLE: if os.path.exists(mylar.BLACKHOLE_DIR): filenamenzb = ( str(ComicName) + " " + str(IssueNumber) + " (" + str(comyear) + ").nzb" ) urllib.urlretrieve(linkapi, str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) logger.info( u"Successfully sent .nzb to your Blackhole directory : " + str(mylar.BLACKHOLE_DIR) + str(filenamenzb) ) # end blackhole else: tmppath = mylar.CACHE_DIR print ("cache directory set to: " + str(tmppath)) if os.path.exists(tmppath): filenamenzb = os.path.split(linkapi)[1] # filenzb = os.path.join(tmppath,filenamenzb) if nzbprov == "nzb.su": filenzb = linkstart[21:] if nzbprov == "experimental": filenzb = filenamenzb[6:] if nzbprov == "dognzb": filenzb == str(filenamenzb) savefile = str(tmppath) + "/" + str(filenzb) + ".nzb" else: # let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) savefile = str(mylar.CACHE_DIR) + "/" + str(filenzb) + ".nzb" except OSError.e: if e.errno != errno.EEXIST: raise print ("savefile set to: " + str(savefile)) urllib.urlretrieve(linkapi, str(savefile)) # print (str(mylar.RENAME_FILES)) print ("sucessfully retrieve nzb to : " + str(savefile)) # check sab for current pause status print ("sab host set to :" + str(mylar.SAB_HOST)) sabqstatusapi = ( str(mylar.SAB_HOST) + "/api?mode=qstatus&output=xml&apikey=" + str(mylar.SAB_APIKEY) ) from xml.dom.minidom import parseString import urllib2 file = urllib2.urlopen(sabqstatusapi) data = file.read() file.close() dom = parseString(data) for node in dom.getElementsByTagName("paused"): pausestatus = node.firstChild.wholeText # print pausestatus if pausestatus != "True": # pause sab first because it downloads too quick (cbr's are small!) pauseapi = str(mylar.SAB_HOST) + "/api?mode=pause&apikey=" + str(mylar.SAB_APIKEY) urllib.urlopen(pauseapi) print "Queue paused" else: print "Queue already paused" if mylar.RENAME_FILES == 1: # print ("Saved file to: " + str(savefile)) tmpapi = ( str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) ) else: tmpapi = ( str(mylar.SAB_HOST) + "/api?mode=addurl&name=" + str(linkapi) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) ) print ("sab api string:" + str(tmpapi)) time.sleep(5) urllib.urlopen(tmpapi) if mylar.RENAME_FILES == 1: # let's give it 5 extra seconds to retrieve the nzb data... time.sleep(5) outqueue = ( str(mylar.SAB_HOST) + "/api?mode=queue&start=START&limit=LIMIT&output=xml&apikey=" + str(mylar.SAB_APIKEY) ) print ("outqueue line generated") urllib.urlopen(outqueue) time.sleep(5) print ("passed api request to SAB") # <slots><slot><filename>.nzb filename # chang nzbfilename to include series(SAB will auto rename based on this) # api?mode=queue&name=rename&value=<filename_nzi22ks>&value2=NEWNAME from xml.dom.minidom import parseString import urllib2 file = urllib2.urlopen(outqueue) data = file.read() file.close() dom = parseString(data) queue_slots = dom.getElementsByTagName("filename") queue_cnt = len(queue_slots) print ("there are " + str(queue_cnt) + " things in SABnzbd's queue") que = 0 slotmatch = "no" for queue in queue_slots: # retrieve the first xml tag (<tag>data</tag>) # that the parser finds with name tagName: queue_file = dom.getElementsByTagName("filename")[que].firstChild.wholeText while "Trying to fetch NZB" in queue_file: # let's keep waiting until nzbname is resolved by SABnzbd time.sleep(5) file = urllib2.urlopen(outqueue) data = file.read() file.close() dom = parseString(data) queue_file = dom.getElementsByTagName("filename")[que].firstChild.wholeText print (str(queue_file)) print (str(filenzb)) queue_file = queue_file.replace("_", " ") if str(queue_file) in str(filenzb): print ("matched") slotmatch = "yes" slot_nzoid = dom.getElementsByTagName("nzo_id")[que].firstChild.wholeText print ("slot_nzoid: " + str(slot_nzoid)) break que += 1 if slotmatch == "yes": if mylar.REPLACE_SPACES: repchar = mylar.REPLACE_CHAR else: repchar = " " # let's make sure there's no crap in the ComicName since it's O.G. ComicNM = re.sub("[\:\,]", "", str(ComicName)) renameit = ( str(ComicNM) + " " + str(IssueNumber) + " (" + str(SeriesYear) + ")" + " " + "(" + str(comyear) + ")" ) renameit = renameit.replace(" ", repchar) nzo_ren = ( str(mylar.SAB_HOST) + "/api?mode=queue&name=rename&apikey=" + str(mylar.SAB_APIKEY) + "&value=" + str(slot_nzoid) + "&value2=" + str(renameit) ) print ("attempting to rename queue to " + str(nzo_ren)) urllib2.urlopen(nzo_ren) print ("renamed!") # delete the .nzb now. # delnzb = str(mylar.PROG_DIR) + "/" + str(filenzb) + ".nzb" # if mylar.PROG_DIR is not "/": # os.remove(delnzb) # we need to track nzo_id to make sure finished downloaded with SABnzbd. # controlValueDict = {"nzo_id": str(slot_nzoid)} # newValueDict = {"ComicName": str(ComicName), # "ComicYEAR": str(comyear), # "ComicIssue": str(IssueNumber), # "name": str(filenamenzb)} # print ("updating SABLOG") # myDB = db.DBConnection() # myDB.upsert("sablog", newValueDict, controlValueDict) else: logger.info( u"Couldn't locate file in SAB - are you sure it's being downloaded?" ) # resume sab if it was running before we started if pausestatus != "True": # let's unpause queue now that we did our jobs. resumeapi = str(mylar.SAB_HOST) + "/api?mode=resume&apikey=" + str(mylar.SAB_APIKEY) urllib.urlopen(resumeapi) # print "Queue resumed" # else: # print "Queue already paused" # raise an exception to break out of loop foundc = "yes" done = True break else: # print ("issues don't match..") foundc = "no" if done == True: break cmloopit -= 1 findloop += 1 if foundc == "yes": foundcomic.append("yes") logger.info( u"Found :" + str(ComicName) + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(nzbprov) ) break elif foundc == "no" and nzbpr <> 0: logger.info(u"More than one search provider given - trying next one.") elif foundc == "no" and nzbpr == 0: foundcomic.append("no") logger.info( u"Couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + "). Status kept as wanted." ) break return foundc
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr): logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " using " + str(nzbprov)) if nzbprov == 'nzb.su': apikey = mylar.NZBSU_APIKEY elif nzbprov == 'dognzb': apikey = mylar.DOGNZB_APIKEY elif nzbprov == 'experimental': apikey = 'none' #print ("-------------------------") if mylar.PREFERRED_QUALITY == 0: filetype = "" elif mylar.PREFERRED_QUALITY == 1: filetype = ".cbr" elif mylar.PREFERRED_QUALITY == 2: filetype = ".cbz" # search dognzb via api! # figure out what was missed via rss feeds and do a manual search via api #tsc = int(tot-1) findcomic = [] findcomiciss = [] findcount = 0 ci = "" comsearch = [] isssearch = [] comyear = str(ComicYear) #print ("-------SEARCH FOR MISSING------------------") findcomic.append(str(ComicName)) IssueNumber = str(re.sub("\.00", "", str(IssueNumber))) #print ("issueNumber" + str(IssueNumber)) findcomiciss.append(str(re.sub("\D", "", str(IssueNumber)))) #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm = re.sub(" ", "%20", str(findcomic[findcount])) #print (cmi) #---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. if len(str(findcomiciss[findcount])) == 1: cmloopit = 3 elif len(str(findcomiciss[findcount])) == 2: cmloopit = 2 else: cmloopit = 1 isssearch.append(str(findcomiciss[findcount])) comsearch.append(cm) findcount+=1 # ---- #print ("------RESULTS OF SEARCH-------------------") findloop = 0 foundcomic = [] #---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. while (findloop < (findcount) ): comsrc = comsearch[findloop] #print (str(comsearch[findloop])) while (cmloopit >= 1 ): # here we account for issue pattern variations if cmloopit == 3: comsearch[findloop] = comsrc + "%2000" + isssearch[findloop] + "%20" + str(filetype) #print (comsearch[findloop]) elif cmloopit == 2: comsearch[findloop] = comsrc + "%200" + isssearch[findloop] + "%20" + str(filetype) #print (comsearch[findloop]) elif cmloopit == 1: comsearch[findloop] = comsrc + "%20" + isssearch[findloop] + "%20" + str(filetype) #print (comsearch[findloop]) #print ("NZB Provider set to: " + nzbprov) if nzbprov != 'experimental': if nzbprov == 'dognzb': #print ("dog-search.") findurl = "http://dognzb.cr/api?t=search&apikey=" + str(apikey) + "&q=" + str(comsearch[findloop]) + "&o=xml&cat=7030" elif nzbprov == 'nzb.su': #print ("nzb.su search") findurl = "http://nzb.su/api?t=search&q=" + str(comsearch[findloop]) + "&apikey=" + str(apikey) + "&o=xml&cat=7030" bb = feedparser.parse(findurl) #print (findurl) elif nzbprov == 'experimental': #print ("experimental raw search") bb = parseit.MysterBinScrape(comsearch[findloop], comyear) done = False foundc = "no" if bb == "no results": #print ("no results found...attempting alternate search") pass elif (len(bb['entries']) == 0): #print ("Nothing found for : " + str(findcomic[findloop]) + " Issue: #" + str(findcomiciss[findloop])) #print ("Will try search again in 60 minutes...") foundc = "no" else: #print ("Found for: " + str(findcomic[findloop])) for entry in bb['entries']: #print str(entry['title']) cleantitle = helpers.cleanName(str(entry['title'])) if done: break #print ("title: " + str(cleantitle)) #print ("link: " + entry['link']) #let's narrow search down - take out year (2010), (2011), etc #let's check for first occurance of '(' as generally indicates #that the 'title' has ended comlen = str(cleantitle).find(' (') comsub = str(cleantitle)[:comlen] #print("first bracket occurs at position: " + str(comlen)) #print("actual name with iss: " + str(comsub)) #we need to now determine the last position BEFORE the issue number #take length of findcomic (add 1 for space) and subtract comlen #will result in issue comspos = comsub.rfind(" ") #print ("last space @ position: " + str(comspos) ) #print ("COMLEN: " + str(comlen) ) comiss = comsub[comspos:comlen] # -- we need to change if there is no space after issue # # -- and bracket ie...star trek tng 1(c2c)(2012) etc # -- #print ("the comic issue is actually: #" + str(comiss)) splitit = [] splitcomp = [] comyx = comsub[:comspos] #print ("comyx: " + str(comyx)) splitchk = comyx.replace(" - ", " ") splitit = splitchk.split(None) #print (str(splitit)) splitcomp = findcomic[findloop].split(None) #print ( "split length:" + str(len(splitit)) ) if len(splitit) != len(splitcomp): #print ("incorrect comic lengths...not a match") if str(comyx[:3]).lower() == "the": #print ("THE word detected...attempting to adjust pattern matching") splitMOD = splitchk[4:] splitit = splitMOD.split(None) else: #print ("length match..proceeding") n = 0 scount = 0 while ( n <= (len(splitit)-1) ): #print ("Comparing: " + splitcomp[n] + " .to. " + splitit[n] ) if str(splitcomp[n].lower()) in str(splitit[n].lower()): #print ("word matched on : " + splitit[n]) scount+=1 elif ':' in splitit[n] or '-' in splitit[n]: splitrep = splitit[n].replace('-', '') #print ("non-character keyword...skipped on " + splitit[n]) pass else: #print ("non-match for: " + splitit[n]) pass n+=1 spercent = ( scount/int(len(splitit)) ) * 100 #print (str(spercent) + "% match") #if spercent >= 75: print ("it's a go captain...") #if spercent < 75: print ("failure - we only got " + str(spercent) + "% right!") #print ("this should be a match!") #issue comparison now as well #print ("comiss:" + str(comiss)) #print ("findcomiss:" + str(findcomiciss[findloop])) if int(findcomiciss[findloop]) == int(comiss): #print ("issues match!") #check for 'extra's - ie. Year comex = str(cleantitle)[comlen:] comspl = comex.split() LENcomspl = len(comspl) n = 0 while (LENcomspl > n): if str(comyear) not in comspl[n]: #print (str(comyear) + " - not right year baby!") yearmatch = "false" break else: #print (str(comyear) + " - years match baby!") yearmatch = "true" break n+=1 if yearmatch == "false": break ## -- start. ## -- start. ## -- inherit issue. Comic year is non-standard. nzb year is the year ## -- comic was printed, not the start year of the comic series and ## -- thus the deciding component if matches are correct or not ## -- check to see if directory exists for given comic #splitcom = ComicName.replace(' ', '_') # here we should decide if adding year or not and format #comyear = '_(2012)' #compath = '/mount/mediavg/Comics/Comics/' + str(splitcom) + str(comyear) #print ("The directory should be: " + str(compath)) #if os.path.isdir(str(compath)): # print("Directory exists!") #else: # print ("Directory doesn't exist!") # try: # os.makedirs(str(compath)) # print ("Directory successfully created at: " + str(compath)) # except OSError.e: # if e.errno != errno.EEXIST: # raise ## -- end. linkstart = os.path.splitext(entry['link'])[0] print ("linkstart:" + str(linkstart)) #following is JUST for nzb.su if nzbprov == 'nzb.su': linkit = os.path.splitext(entry['link'])[1] #print ("linkit: " + str(linkit)) linkit = linkit.replace("&", "%26") linkapi = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") linkapi = str(linkstart) #here we distinguish between rename and not. #blackhole functionality--- #let's download the file to a temporary cache. if mylar.BLACKHOLE: if os.path.exists(mylar.BLACKHOLE_DIR): filenamenzb = str(ComicName) + " " + str(IssueNumber) + " (" + str(comyear) + ").nzb" urllib.urlretrieve(linkapi, str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) logger.info(u"Successfully sent .nzb to your Blackhole directory : " + str(mylar.BLACKHOLE_DIR) + str(filenamenzb) ) #end blackhole else: tmppath = "cache/" if nzbprov == 'nzb.su': filenzb = linkstart[21:] elif nzbprov == 'experimental': filenzb = os.path.splitext(linkapi)[0][31:] elif nzbprov == 'dognzb': filenamenzb = os.path.splitext(linkapi)[0][23:] lenfilenzb = filenamenzb.find('/', 23) filenzb = str(filenamenzb)[:lenfilenzb] if os.path.exists(tmppath): savefile = str(mylar.PROG_DIR) + "/" + str(tmppath) + str(filenzb) + ".nzb" else: savefile = str(mylar.PROG_DIR) + "/" + str(filenzb) + ".nzb" print ("savefile:" + str(savefile)) urllib.urlretrieve(linkapi, str(savefile)) print ("retrieved file ") #check sab for current pause status sabqstatusapi = str(mylar.SAB_HOST) + "/api?mode=qstatus&output=xml&apikey=" + str(mylar.SAB_APIKEY) from xml.dom.minidom import parseString import urllib2 file = urllib2.urlopen(sabqstatusapi); data = file.read() file.close() dom = parseString(data) for node in dom.getElementsByTagName('paused'): pausestatus = node.firstChild.wholeText print pausestatus if pausestatus != 'True': #pause sab first because it downloads too quick (cbr's are small!) pauseapi = str(mylar.SAB_HOST) + "/api?mode=pause&apikey=" + str(mylar.SAB_APIKEY) urllib.urlopen(pauseapi); #print "Queue paused" #else: #print "Queue already paused" if mylar.RENAME_FILES == 1: tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) else: tmpapi = str(mylar.SAB_HOST) + "/api?mode=addurl&name=" + str(linkapi) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) time.sleep(5) urllib.urlopen(tmpapi); print ("sent file to sab:" + str(tmpapi)) if mylar.RENAME_FILES == 1: #let's give it 5 extra seconds to retrieve the nzb data... time.sleep(5) outqueue = str(mylar.SAB_HOST) + "/api?mode=queue&start=START&limit=LIMIT&output=xml&apikey=" + str(mylar.SAB_APIKEY) urllib.urlopen(outqueue); time.sleep(5) #<slots><slot><filename>.nzb filename #chang nzbfilename to include series(SAB will auto rename based on this) #api?mode=queue&name=rename&value=<filename_nzi22ks>&value2=NEWNAME file = urllib2.urlopen(outqueue); data = file.read() file.close() dom = parseString(data) queue_slots = dom.getElementsByTagName('filename') queue_cnt = len(queue_slots) print ("there are " + str(queue_cnt) + " things in SABnzbd's queue") que = 0 slotmatch = "no" for queue in queue_slots: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: queue_file = dom.getElementsByTagName('filename')[que].firstChild.wholeText while ('Trying to fetch NZB' in queue_file): #let's keep waiting until nzbname is resolved by SABnzbd time.sleep(5) file = urllib2.urlopen(outqueue); data = file.read() file.close() dom = parseString(data) queue_file = dom.getElementsByTagName('filename')[que].firstChild.wholeText print ("queue File:" + str(queue_file)) print ("nzb File: " + str(filenzb)) queue_file = queue_file.replace("_", " ") if str(queue_file) in str(filenzb): print ("matched") slotmatch = "yes" slot_nzoid = dom.getElementsByTagName('nzo_id')[que].firstChild.wholeText print ("slot_nzoid: " + str(slot_nzoid)) break que+=1 if slotmatch == "yes": renameit = str(ComicName.replace(' ', '_')) + "_" + str(IssueNumber) + "_(" + str(SeriesYear) + ")" + "_" + "(" + str(comyear) + ")" nzo_ren = str(mylar.SAB_HOST) + "/api?mode=queue&name=rename&apikey=" + str(mylar.SAB_APIKEY) + "&value=" + str(slot_nzoid) + "&value2=" + str(renameit) print ("attempting to rename queue to " + str(nzo_ren)) urllib2.urlopen(nzo_ren); print ("renamed!") #delete the .nzb now. #delnzb = str(mylar.PROG_DIR) + "/" + str(filenzb) + ".nzb" if mylar.PROG_DIR is not "/": os.remove(savefile) print ("removed :" + str(savefile)) #we need to track nzo_id to make sure finished downloaded with SABnzbd. #controlValueDict = {"nzo_id": str(slot_nzoid)} #newValueDict = {"ComicName": str(ComicName), # "ComicYEAR": str(comyear), # "ComicIssue": str(IssueNumber), # "name": str(filenamenzb)} #print ("updating SABLOG") #myDB = db.DBConnection() #myDB.upsert("sablog", newValueDict, controlValueDict) else: logger.info(u"Couldn't locate file in SAB - are you sure it's being downloaded?") #resume sab if it was running before we started if pausestatus != 'True': #let's unpause queue now that we did our jobs. resumeapi = str(mylar.SAB_HOST) + "/api?mode=resume&apikey=" + str(mylar.SAB_APIKEY) urllib.urlopen(resumeapi); #print "Queue resumed" #else: #print "Queue already paused" #raise an exception to break out of loop foundc = "yes" done = True break else: #print ("issues don't match..") foundc = "no" #else: #print ("this probably isn't the right match as the titles don't match") #foundcomic.append("no") #foundc = "no" if done == True: break cmloopit-=1 findloop+=1 if foundc == "yes": foundcomic.append("yes") #print ("we just found Issue: " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + ")" ) logger.info(u"Found :" + str(ComicName) + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(nzbprov)) break elif foundc == "no" and nzbpr <> 0: logger.info(u"More than one search provider given - trying next one.") #print ("Couldn't find with " + str(nzbprov) + ". More than one search provider listed, trying next option" ) elif foundc == "no" and nzbpr == 0: foundcomic.append("no") #print ("couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + ")" ) logger.info(u"Couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + "). Status kept as wanted." ) break #print (foundc) return foundc
def addComictoDB(comicid): # Putting this here to get around the circular import. Will try to use this to update images at later date. from mylar import cache myDB = db.DBConnection() # myDB.action('DELETE from blacklist WHERE ComicID=?', [comicid]) # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if dbcomic is None: newValueDict = {"ComicName": "Comic ID: %s" % (comicid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid,'comic') if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = {"ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic['ComicName'].startswith('The '): sortname = comic['ComicName'][4:] else: sortname = comic['ComicName'] logger.info(u"Now adding/updating: " + comic['ComicName']) #--Now that we know ComicName, let's try some scraping #--Start # gcd will return issue details (most importantly publishing date) gcdinfo=parseit.GCDScraper(comic['ComicName'], comic['ComicYear'], comic['ComicIssues'], comicid) if gcdinfo == "No Match": logger.warn("No matching result found for " + comic['ComicName'] + " (" + comic['ComicYear'] + ")" ) return logger.info(u"Sucessfully retrieved details for " + comic['ComicName'] ) # print ("Series Published" + parseit.resultPublished) #--End #comic book location on machine # setup default location here comlocation = mylar.DESTINATION_DIR + "/" + comic['ComicName'] + " (" + comic['ComicYear'] + ")" #if mylar.REPLACE_SPACES == "yes": #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot mylarREPLACE_CHAR = '_' comlocation = comlocation.replace(' ', mylarREPLACE_CHAR) #if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: #print ("Directory doesn't exist!") try: os.makedirs(str(comlocation)) logger.info(u"Directory successfully created at: " + str(comlocation)) except OSError.e: if e.errno != errno.EEXIST: raise #print ("root dir for series: " + comlocation) #try to account for CV not updating new issues as fast as GCD if gcdinfo['gcdvariation'] == "yes": comicIssues = str(int(comic['ComicIssues']) + 1) else: comicIssues = comic['ComicIssues'] controlValueDict = {"ComicID": comicid} newValueDict = {"ComicName": comic['ComicName'], "ComicSortName": sortname, "ComicYear": comic['ComicYear'], "ComicImage": comic['ComicImage'], "Total": comicIssues, "Description": comic['ComicDesc'], "ComicLocation": comlocation, "ComicPublisher": comic['ComicPublisher'], "ComicPublished": parseit.resultPublished, "DateAdded": helpers.today(), "Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) issued = cv.getComic(comicid,'issue') logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName'] ) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] #let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" while (n < iscnt): firstval = issued['issuechoice'][n] cleanname = helpers.cleanName(firstval['Issue_Name']) issid.append( str(firstval['Issue_ID']) ) issnum.append( str(firstval['Issue_Number']) ) issname.append(cleanname) bb = 0 while (bb < iscnt): gcdval = gcdinfo['gcdchoice'][bb] #print ("issuecompare: " + str(issnum[n])) #print ("issuecheck: " + str(gcdval['GCDIssue']) ) if str(gcdval['GCDIssue']) == str(issnum[n]): issdate.append( str(gcdval['GCDDate']) ) issnumchg = issnum[n].replace(".00", "") #print ("issnumchg" + str(issnumchg) + "...latestiss:" + str(latestiss)) int_issnum.append(int(issnumchg)) #get the latest issue / date using the date. if gcdval['GCDDate'] > latestdate: latestiss = str(issnumchg) latestdate = str(gcdval['GCDDate']) bb = iscnt bb+=1 #logger.info(u"IssueID: " + str(issid[n]) + " IssueNo: " + str(issnum[n]) + " Date" + str(issdate[n]) ) n+=1 latestiss = latestiss + ".00" #once again - thanks to the new 52 reboot...start n at 0. n = 0 logger.info(u"Now adding/updating issues for" + comic['ComicName']) # file check to see if issue exists logger.info(u"Checking directory for existing issues.") fc = filechecker.listFiles(dir=comlocation, watchcomic=comic['ComicName']) havefiles = 0 fccnt = int(fc['comiccount']) logger.info(u"Found " + str(fccnt) + " issues of " + comic['ComicName']) fcnew = [] while (n < iscnt): fn = 0 haveissue = "no" #print ("on issue " + str(int(n+1)) + " of " + str(iscnt) + " issues") # check if the issue already exists iss_exists = myDB.select('SELECT * from issues WHERE IssueID=?', [issid[n]]) #print ("checking issue: " + str(int_issnum[n])) # stupid way to do this, but check each issue against file-list in fc. while (fn < fccnt): tmpfc = fc['comiclist'][fn] #print (str(int_issnum[n]) + " against ... " + str(tmpfc['ComicFilename'])) temploc = tmpfc['ComicFilename'].replace('_', ' ') fcnew = shlex.split(str(temploc)) fcn = len(fcnew) som = 0 # this loop searches each word in the filename for a match. while (som < fcn): #print (fcnew[som]) #counts get buggered up when the issue is the last field in the filename - ie. '50.cbr' if ".cbr" in fcnew[som]: fcnew[som] = fcnew[som].replace(".cbr", "") elif ".cbz" in fcnew[som]: fcnew[som] = fcnew[som].replace(".cbz", "") if fcnew[som].isdigit(): #print ("digit detected") #good ol' 52 again.... if int(fcnew[som]) > 0: fcdigit = fcnew[som].lstrip('0') else: fcdigit = "0" #print ( "filename:" + str(int(fcnew[som])) + " - issue: " + str(int_issnum[n]) ) if int(fcdigit) == int_issnum[n]: #print ("matched") #print ("We have this issue - " + str(issnum[n]) + " at " + tmpfc['ComicFilename'] ) havefiles+=1 haveissue = "yes" isslocation = str(tmpfc['ComicFilename']) break #print ("failed word match on:" + str(fcnew[som]) + "..continuing next word") som+=1 #print (str(temploc) + " doesn't match anything...moving to next file.") fn+=1 if haveissue == "no": isslocation = "None" controlValueDict = {"IssueID": issid[n]} newValueDict = {"ComicID": comicid, "ComicName": comic['ComicName'], "IssueName": issname[n], "Issue_Number": issnum[n], "IssueDate": issdate[n], "Location": isslocation, "Int_IssueNumber": int_issnum[n] } # Only change the status & add DateAdded if the issue is not already in the database if not len(iss_exists): controlValueDict = {"IssueID": issid[n]} newValueDict['DateAdded'] = helpers.today() if haveissue == "no": if mylar.AUTOWANT_ALL: newValueDict['Status'] = "Wanted" #elif release_dict['releasedate'] > helpers.today() and mylar.AUTOWANT_UPCOMING: # newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" elif haveissue == "yes": newValueDict['Status'] = "Downloaded" myDB.upsert("issues", newValueDict, controlValueDict) n+=1 # logger.debug(u"Updating comic cache for " + comic['ComicName']) # cache.getThumb(ComicID=issue['issueid']) # newValueDict['LastUpdated'] = helpers.now() # myDB.upsert("comics", newValueDict, controlValueDict) # logger.debug(u"Updating cache for: " + comic['ComicName']) # cache.getThumb(ComicIDcomicid) controlValueStat = {"ComicID": comicid} newValueStat = {"Status": "Active", "Have": havefiles, "LatestIssue": latestiss, "LatestDate": latestdate } myDB.upsert("comics", newValueStat, controlValueStat) logger.info(u"Updating complete for: " + comic['ComicName']) #here we grab issues that have been marked as wanted above... results = myDB.select("SELECT * FROM issues where ComicID=? AND Status='Wanted'", [comicid]) if results: logger.info(u"Attempting to grab wanted issues for : " + comic['ComicName']) for result in results: foundNZB = "none" if (mylar.NZBSU or mylar.DOGNZB or mylar.EXPERIMENTAL) and (mylar.SAB_HOST): foundNZB = search.searchforissue(result['IssueID']) if foundNZB == "yes": updater.foundsearch(result['ComicID'], result['IssueID']) else: logger.info(u"No issues marked as wanted for " + comic['ComicName']) logger.info(u"Finished grabbing what I could.")
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.COMIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(mylar.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace')) return logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace')) basedir = dir comic_list = [] comiccnt = 0 extensions = ('cbr', 'cbz') for r, d, f in os.walk(dir): #for directory in d[:]: # if directory.startswith("."): # d.remove(directory) for files in f: if any(files.lower().endswith('.' + x.lower()) for x in extensions): comic = files comicpath = os.path.join(r, files) comicsize = os.path.getsize(comicpath) print "Comic: " + comic print "Comic Path: " + comicpath print "Comic Size: " + str(comicsize) # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath.decode(mylar.SYS_ENCODING, 'replace') comiccnt += 1 comic_dict = { 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path } comic_list.append(comic_dict) logger.info("I've found a total of " + str(comiccnt) + " comics....analyzing now") logger.info("comiclist: " + str(comic_list)) myDB = db.DBConnection() #let's load in the watchlist to see if we have any matches. logger.info( "loading in the watchlist to see if a series is being watched already..." ) watchlist = myDB.action("SELECT * from comics") ComicName = [] ComicYear = [] ComicPublisher = [] ComicTotal = [] ComicID = [] ComicLocation = [] AltName = [] watchcnt = 0 watch_kchoice = [] watchchoice = {} import_by_comicids = [] import_comicids = {} for watch in watchlist: # let's clean up the name, just in case for comparison purposes... watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip() #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip() if ' the ' in watchcomic.lower(): #drop the 'the' from the watchcomic title for proper comparisons. watchcomic = watchcomic[-4:] alt_chk = "no" # alt-checker flag (default to no) # account for alternate names as well if watch['AlternateSearch'] is not None and watch[ 'AlternateSearch'] is not 'None': altcomic = re.sub( '[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['AlternateSearch']).encode('utf-8').strip() #altcomic = re.sub('\s+', ' ', str(altcomic)).strip() AltName.append(altcomic) alt_chk = "yes" # alt-checker flag ComicName.append(watchcomic) ComicYear.append(watch['ComicYear']) ComicPublisher.append(watch['ComicPublisher']) ComicTotal.append(watch['Total']) ComicID.append(watch['ComicID']) ComicLocation.append(watch['ComicLocation']) watchcnt += 1 logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.") ripperlist = ['digital-', 'empire', 'dcp'] watchfound = 0 datelist = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] # datemonth = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':$ # #search for number as text, and change to numeric # for numbs in basnumbs: # #print ("numbs:" + str(numbs)) # if numbs in ComicName.lower(): # numconv = basnumbs[numbs] # #print ("numconv: " + str(numconv)) for i in comic_list: print i['ComicFilename'] comfilename = i['ComicFilename'] comlocation = i['ComicLocation'] #let's clean up the filename for matching purposes cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', comfilename) #cfilename = re.sub('\s', '_', str(cfilename)) #versioning - remove it subsplit = cfilename.replace('_', ' ').split() volno = None volyr = None for subit in subsplit: if subit[0].lower() == 'v': vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... if len(subit) > 3: # if it's greater than 3 in length, then the format is Vyyyy vfull = 1 # add on 1 character length to account for extra space cfilename = re.sub(subit, '', cfilename) volno = re.sub("[^0-9]", " ", subit) elif subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely logger.fdebug('volume indicator detected as version #:' + str(subit)) cfilename = re.sub(subit, '', cfilename) cfilename = " ".join(cfilename.split()) volyr = re.sub("[^0-9]", " ", subit).strip() logger.fdebug('volume year set as : ' + str(volyr)) cm_cn = 0 #we need to track the counter to make sure we are comparing the right array parts #this takes care of the brackets :) m = re.findall('[^()]+', cfilename) lenm = len(m) logger.fdebug("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" foundonwatch = "False" issue = 999999 while (cnt < lenm): if m[cnt] is None: break if m[cnt] == ' ': pass else: logger.fdebug(str(cnt) + ". Bracket Word: " + m[cnt]) if cnt == 0: comic_andiss = m[cnt] logger.fdebug("Comic: " + comic_andiss) # if it's not in the standard format this will bork. # let's try to accomodate (somehow). # first remove the extension (if any) extensions = ('cbr', 'cbz') if comic_andiss.lower().endswith(extensions): comic_andiss = comic_andiss[:-4] logger.fdebug("removed extension from filename.") #now we have to break up the string regardless of formatting. #let's force the spaces. comic_andiss = re.sub('_', ' ', comic_andiss) cs = comic_andiss.split() cs_len = len(cs) cn = '' ydetected = 'no' idetected = 'no' decimaldetect = 'no' for i in reversed(xrange(len(cs))): #start at the end. logger.fdebug("word: " + str(cs[i])) #assume once we find issue - everything prior is the actual title #idetected = no will ignore everything so it will assume all title if cs[i][:-2] == '19' or cs[ i][:-2] == '20' and idetected == 'no': logger.fdebug("year detected: " + str(cs[i])) ydetected = 'yes' result_comyear = cs[i] elif cs[i].isdigit( ) and idetected == 'no' or '.' in cs[i]: issue = cs[i] logger.fdebug("issue detected : " + str(issue)) idetected = 'yes' if '.' in cs[i]: #make sure it's a number on either side of decimal and assume decimal issue. decst = cs[i].find('.') dec_st = cs[i][:decst] dec_en = cs[i][decst + 1:] logger.fdebug("st: " + str(dec_st)) logger.fdebug("en: " + str(dec_en)) if dec_st.isdigit() and dec_en.isdigit(): logger.fdebug( "decimal issue detected...adjusting.") issue = dec_st + "." + dec_en logger.fdebug("issue detected: " + str(issue)) idetected = 'yes' else: logger.fdebug( "false decimal represent. Chunking to extra word." ) cn = cn + cs[i] + " " break elif '\#' in cs[i] or decimaldetect == 'yes': logger.fdebug("issue detected: " + str(cs[i])) idetected = 'yes' else: cn = cn + cs[i] + " " if ydetected == 'no': #assume no year given in filename... result_comyear = "0000" logger.fdebug("cm?: " + str(cn)) if issue is not '999999': comiss = issue else: logger.ERROR( "Invalid Issue number (none present) for " + comfilename) break cnsplit = cn.split() cname = '' findcn = 0 while (findcn < len(cnsplit)): cname = cname + cs[findcn] + " " findcn += 1 cname = cname[:len(cname) - 1] # drop the end space... print("assuming name is : " + cname) com_NAME = cname print("com_NAME : " + com_NAME) yearmatch = "True" else: logger.fdebug('checking ' + m[cnt]) # we're assuming that the year is in brackets (and it should be damnit) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': print("year detected: " + str(m[cnt])) ydetected = 'yes' result_comyear = m[cnt] elif m[cnt][:3].lower() in datelist: logger.fdebug( 'possible issue date format given - verifying') #if the date of the issue is given as (Jan 2010) or (January 2010) let's adjust. #keeping in mind that ',' and '.' are already stripped from the string if m[cnt][-4:].isdigit(): ydetected = 'yes' result_comyear = m[cnt][-4:] logger.fdebug('Valid Issue year of ' + str(result_comyear) + 'detected in format of ' + str(m[cnt])) cnt += 1 splitit = [] watchcomic_split = [] logger.fdebug("filename comic and issue: " + comic_andiss) #changed this from '' to ' ' comic_iss_b4 = re.sub('[\-\:\,]', ' ', comic_andiss) comic_iss = comic_iss_b4.replace('.', ' ') comic_iss = re.sub('[\s+]', ' ', comic_iss).strip() logger.fdebug("adjusted comic and issue: " + str(comic_iss)) #remove 'the' from here for proper comparisons. if ' the ' in comic_iss.lower(): comic_iss = comic_iss[-4:] splitit = comic_iss.split(None) logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss)) #bmm = re.findall('v\d', comic_iss) #if len(bmm) > 0: splitst = len(splitit) - 2 #else: splitst = len(splitit) - 1 #----- #here we cycle through the Watchlist looking for a match. while (cm_cn < watchcnt): #setup the watchlist comname = ComicName[cm_cn] print("watch_comic:" + comname) comyear = ComicYear[cm_cn] compub = ComicPublisher[cm_cn] comtotal = ComicTotal[cm_cn] comicid = ComicID[cm_cn] watch_location = ComicLocation[cm_cn] # if splitit[(len(splitit)-1)].isdigit(): # #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal # comic_iss = splitit[(len(splitit)-1)] # splitst = len(splitit) - 1 # if splitit[(len(splitit)-2)].isdigit(): # # for series that have a digit at the end, it screws up the logistics. # i = 1 # chg_comic = splitit[0] # while (i < (len(splitit)-1)): # chg_comic = chg_comic + " " + splitit[i] # i+=1 # logger.fdebug("chg_comic:" + str(chg_comic)) # if chg_comic.upper() == comname.upper(): # logger.fdebug("series contains numerics...adjusting..") # else: # changeup = "." + splitit[(len(splitit)-1)] # logger.fdebug("changeup to decimal: " + str(changeup)) # comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss # splitst = len(splitit) - 2 # else: # if the nzb name doesn't follow the series-issue-year format even closely..ignore nzb # logger.fdebug("invalid naming format of filename detected - cannot properly determine issue") # continue # make sure that things like - in watchcomic are accounted for when comparing to nzb. # there shouldn't be an issue in the comic now, so let's just assume it's all gravy. splitst = len(splitit) watchcomic_split = helpers.cleanName(comname) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) logger.fdebug( str(splitit) + " file series word count: " + str(splitst)) logger.fdebug( str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split))) if (splitst) != len(watchcomic_split): logger.fdebug("incorrect comic lengths...not a match") # if str(splitit[0]).lower() == "the": # logger.fdebug("THE word detected...attempting to adjust pattern matching") # splitit[0] = splitit[4:] else: logger.fdebug("length match..proceeding") n = 0 scount = 0 logger.fdebug("search-length: " + str(splitst)) logger.fdebug("Watchlist-length: " + str(len(watchcomic_split))) while (n <= (splitst) - 1): logger.fdebug("splitit: " + str(splitit[n])) if n < (splitst) and n < len(watchcomic_split): logger.fdebug( str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n])) if '+' in watchcomic_split[n]: watchcomic_split[n] = re.sub( '+', '', str(watchcomic_split[n])) if str(watchcomic_split[n].lower()) in str( splitit[n].lower()) and len( watchcomic_split[n]) >= len(splitit[n]): logger.fdebug("word matched on : " + str(splitit[n])) scount += 1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n]).lower().startswith('v'): logger.fdebug("possible versioning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) logger.fdebug("version found: " + str(comicversion)) else: logger.fdebug("Comic / Issue section") if splitit[n].isdigit(): logger.fdebug("issue detected") #comiss = splitit[n] # comicNAMER = n - 1 # com_NAME = splitit[0] # cmnam = 1 # while (cmnam <= comicNAMER): # com_NAME = str(com_NAME) + " " + str(splitit[cmnam]) # cmnam+=1 # logger.fdebug("comic: " + str(com_NAME)) else: logger.fdebug("non-match for: " + str(splitit[n])) pass n += 1 #set the match threshold to 80% (for now) # if it's less than 80% consider it a non-match and discard. #splitit has to splitit-1 because last position is issue. wordcnt = int(scount) logger.fdebug("scount:" + str(wordcnt)) totalcnt = int(splitst) logger.fdebug("splitit-len:" + str(totalcnt)) spercent = (wordcnt / totalcnt) * 100 logger.fdebug("we got " + str(spercent) + " percent.") if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") logger.fdebug("this should be a match!") # if '.' in comic_iss: # comisschk_find = comic_iss.find('.') # comisschk_b4dec = comic_iss[:comisschk_find] # comisschk_decval = comic_iss[comisschk_find+1:] # logger.fdebug("Found IssueNumber: " + str(comic_iss)) # logger.fdebug("..before decimal: " + str(comisschk_b4dec)) # logger.fdebug("...after decimal: " + str(comisschk_decval)) # #--let's make sure we don't wipe out decimal issues ;) # if int(comisschk_decval) == 0: # ciss = comisschk_b4dec # cintdec = int(comisschk_decval) # else: # if len(comisschk_decval) == 1: # ciss = comisschk_b4dec + "." + comisschk_decval # cintdec = int(comisschk_decval) * 10 # else: # ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0') # cintdec = int(comisschk_decval.rstrip('0')) * 10 # comintIss = (int(comisschk_b4dec) * 1000) + cintdec # else: # comintIss = int(comic_iss) * 1000 logger.fdebug("issue we found for is : " + str(comiss)) #set the year to the series we just found ;) result_comyear = comyear #issue comparison now as well logger.info(u"Found " + comname + " (" + str(comyear) + ") issue: " + str(comiss)) # watchfound+=1 watchmatch = str(comicid) # watch_kchoice.append({ # "ComicID": str(comicid), # "ComicName": str(comname), # "ComicYear": str(comyear), # "ComicIssue": str(int(comic_iss)), # "ComicLocation": str(watch_location), # "OriginalLocation" : str(comlocation), # "OriginalFilename" : str(comfilename) # }) foundonwatch = "True" break elif int(spercent) < 80: logger.fdebug("failure - we only got " + str(spercent) + "% right!") cm_cn += 1 if foundonwatch == "False": watchmatch = None #---if it's not a match - send it to the importer. n = 0 # print ("comic_andiss : " + str(comic_andiss)) # csplit = comic_andiss.split(None) # while ( n <= (len(csplit)-1) ): # print ("csplit:" + str(csplit[n])) # if csplit[n].isdigit(): # logger.fdebug("issue detected") # comiss = splitit[n] # logger.fdebug("issue # : " + str(comiss)) # comicNAMER = n - 1 # com_NAME = csplit[0] # cmnam = 1 # while (cmnam <= comicNAMER): # com_NAME = str(com_NAME) + " " + str(csplit[cmnam]) # cmnam+=1 # logger.fdebug("comic: " + str(com_NAME)) # n+=1 if volyr is None: if result_comyear is None: result_comyear = '0000' #no year in filename basically. else: if result_comyear is None: result_comyear = volyr if volno is None: if volyr is None: vol_label = None else: vol_label = volyr else: vol_label = volno print("adding " + com_NAME + " to the import-queue!") impid = com_NAME + "-" + str(result_comyear) + "-" + str(comiss) print("impid: " + str(impid)) import_by_comicids.append({ "impid": impid, "watchmatch": watchmatch, "comicname": com_NAME, "comicyear": result_comyear, "volume": vol_label, "comfilename": comfilename, "comlocation": comlocation.decode(mylar.SYS_ENCODING) }) if len(watch_kchoice) > 0: watchchoice['watchlist'] = watch_kchoice print("watchchoice: " + str(watchchoice)) logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.") wat = 0 comicids = [] if watchfound > 0: if mylar.IMP_MOVE: logger.info( "You checked off Move Files...so that's what I'm going to do" ) #check to see if Move Files is enabled. #if not being moved, set the archive bit. print("Moving files into appropriate directory") while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comlocation = watch_the_list['ComicLocation'] watch_comicid = watch_the_list['ComicID'] watch_comicname = watch_the_list['ComicName'] watch_comicyear = watch_the_list['ComicYear'] watch_comiciss = watch_the_list['ComicIssue'] print("ComicLocation: " + str(watch_comlocation)) orig_comlocation = watch_the_list['OriginalLocation'] orig_filename = watch_the_list['OriginalFilename'] print("Orig. Location: " + str(orig_comlocation)) print("Orig. Filename: " + str(orig_filename)) #before moving check to see if Rename to Mylar structure is enabled. if mylar.IMP_RENAME: print( "Renaming files according to configuration details : " + str(mylar.FILE_FORMAT)) renameit = helpers.rename_param( watch_comicid, watch_comicname, watch_comicyear, watch_comiciss) nfilename = renameit['nfilename'] dst_path = os.path.join(watch_comlocation, nfilename) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) else: print( "Renaming files not enabled, keeping original filename(s)" ) dst_path = os.path.join(watch_comlocation, orig_filename) #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext))) #src = os.path.join(, str(nfilename + ext)) print("I'm going to move " + str(orig_comlocation) + " to .." + str(dst_path)) try: shutil.move(orig_comlocation, dst_path) except (OSError, IOError): logger.info( "Failed to move directory - check directories and manually re-run." ) wat += 1 else: # if move files isn't enabled, let's set all found comics to Archive status :) while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comicid = watch_the_list['ComicID'] watch_issue = watch_the_list['ComicIssue'] print("ComicID: " + str(watch_comicid)) print("Issue#: " + str(watch_issue)) issuechk = myDB.action( "SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone() if issuechk is None: print("no matching issues for this comic#") else: print("...Existing status: " + str(issuechk['Status'])) control = {"IssueID": issuechk['IssueID']} values = {"Status": "Archived"} print("...changing status of " + str(issuechk['Issue_Number']) + " to Archived ") myDB.upsert("issues", values, control) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) wat += 1 if comicids is None: pass else: c_upd = len(comicids) c = 0 while (c < c_upd): print("Rescanning.. " + str(c)) updater.forceRescan(c) if not len(import_by_comicids): return "Completed" if len(import_by_comicids) > 0: import_comicids['comic_info'] = import_by_comicids print("import comicids: " + str(import_by_comicids)) return import_comicids, len(import_by_comicids)
def addComictoDB(comicid, mismatch=None, pullupd=None, imported=None, ogcname=None): # Putting this here to get around the circular import. Will try to use this to update images at later date. # from mylar import cache myDB = db.DBConnection() # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if dbcomic is None: newValueDict = { "ComicName": "Comic ID: %s" % (comicid), "Status": "Loading" } comlocation = None oldcomversion = None else: newValueDict = {"Status": "Loading"} comlocation = dbcomic['ComicLocation'] filechecker.validateAndCreateDirectory(comlocation, True) oldcomversion = dbcomic[ 'ComicVersion'] #store the comicversion and chk if it exists before hammering. myDB.upsert("comics", newValueDict, controlValueDict) #run the re-sortorder here in order to properly display the page if pullupd is None: helpers.ComicSort(comicorder=mylar.COMICSORT, imported=comicid) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid, 'comic') #comic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = { "ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active" } else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic['ComicName'].startswith('The '): sortname = comic['ComicName'][4:] else: sortname = comic['ComicName'] logger.info(u"Now adding/updating: " + comic['ComicName']) #--Now that we know ComicName, let's try some scraping #--Start # gcd will return issue details (most importantly publishing date) if not mylar.CV_ONLY: if mismatch == "no" or mismatch is None: gcdinfo = parseit.GCDScraper(comic['ComicName'], comic['ComicYear'], comic['ComicIssues'], comicid) #print ("gcdinfo: " + str(gcdinfo)) mismatch_com = "no" if gcdinfo == "No Match": updater.no_searchresults(comicid) nomatch = "true" logger.info(u"There was an error when trying to add " + comic['ComicName'] + " (" + comic['ComicYear'] + ")") return nomatch else: mismatch_com = "yes" #print ("gcdinfo:" + str(gcdinfo)) elif mismatch == "yes": CV_EXcomicid = myDB.action( "SELECT * from exceptions WHERE ComicID=?", [comicid]).fetchone() if CV_EXcomicid['variloop'] is None: pass else: vari_loop = CV_EXcomicid['variloop'] NewComicID = CV_EXcomicid['NewComicID'] gcomicid = CV_EXcomicid['GComicID'] resultURL = "/series/" + str(NewComicID) + "/" #print ("variloop" + str(CV_EXcomicid['variloop'])) #if vari_loop == '99': gcdinfo = parseit.GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=comicid, TotalIssues=0, issvariation="no", resultPublished=None) logger.info(u"Sucessfully retrieved details for " + comic['ComicName']) # print ("Series Published" + parseit.resultPublished) CV_NoYearGiven = "no" #if the SeriesYear returned by CV is blank or none (0000), let's use the gcd one. if comic['ComicYear'] is None or comic['ComicYear'] == '0000': if mylar.CV_ONLY: #we'll defer this until later when we grab all the issues and then figure it out logger.info( "Uh-oh. I can't find a Series Year for this series. I'm going to try analyzing deeper." ) SeriesYear = cv.getComic(comicid, 'firstissue', comic['FirstIssueID']) if SeriesYear == '0000': logger.info( "Ok - I couldn't find a Series Year at all. Loading in the issue data now and will figure out the Series Year." ) CV_NoYearGiven = "yes" issued = cv.getComic(comicid, 'issue') SeriesYear = issued['firstdate'][:4] else: SeriesYear = gcdinfo['SeriesYear'] else: SeriesYear = comic['ComicYear'] #let's do the Annual check here. if mylar.ANNUALS_ON: annualcomicname = re.sub('[\,\:]', '', comic['ComicName']) annuals = comicbookdb.cbdb(annualcomicname, SeriesYear) print("Number of Annuals returned: " + str(annuals['totalissues'])) nb = 0 while (nb <= int(annuals['totalissues'])): try: annualval = annuals['annualslist'][nb] except IndexError: break newCtrl = { "IssueID": str(annualval['AnnualIssue'] + annualval['AnnualDate']) } newVals = { "Issue_Number": annualval['AnnualIssue'], "IssueDate": annualval['AnnualDate'], "IssueName": annualval['AnnualTitle'], "ComicID": comicid, "Status": "Skipped" } myDB.upsert("annuals", newVals, newCtrl) nb += 1 #parseit.annualCheck(gcomicid=gcdinfo['GCDComicID'], comicid=comicid, comicname=comic['ComicName'], comicyear=SeriesYear) #comic book location on machine # setup default location here if comlocation is None: # let's remove the non-standard characters here. u_comicnm = comic['ComicName'] u_comicname = u_comicnm.encode('ascii', 'ignore').strip() if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname: comicdir = u_comicname if ':' in comicdir: comicdir = comicdir.replace(':', '') if '/' in comicdir: comicdir = comicdir.replace('/', '-') if ',' in comicdir: comicdir = comicdir.replace(',', '') if '?' in comicdir: comicdir = comicdir.replace('?', '') else: comicdir = u_comicname series = comicdir publisher = re.sub('!', '', comic['ComicPublisher']) # thanks Boom! year = SeriesYear comversion = comic['ComicVersion'] if comversion is None: comversion = 'None' #if comversion is None, remove it so it doesn't populate with 'None' if comversion == 'None': chunk_f_f = re.sub('\$VolumeN', '', mylar.FILE_FORMAT) chunk_f = re.compile(r'\s+') mylar.FILE_FORMAT = chunk_f.sub(' ', chunk_f_f) #do work to generate folder path values = { '$Series': series, '$Publisher': publisher, '$Year': year, '$series': series.lower(), '$publisher': publisher.lower(), '$VolumeY': 'V' + str(year), '$VolumeN': comversion } #print mylar.FOLDER_FORMAT #print 'working dir:' #print helpers.replace_all(mylar.FOLDER_FORMAT, values) if mylar.FOLDER_FORMAT == '': comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + SeriesYear + ")" else: comlocation = mylar.DESTINATION_DIR + "/" + helpers.replace_all( mylar.FOLDER_FORMAT, values) #comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" if mylar.DESTINATION_DIR == "": logger.error( u"There is no general directory specified - please specify in Config/Post-Processing." ) return if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot comlocation = comlocation.replace(' ', mylar.REPLACE_CHAR) #moved this out of the above loop so it will chk for existance of comlocation in case moved #if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: #print ("Directory doesn't exist!") #try: # os.makedirs(str(comlocation)) # logger.info(u"Directory successfully created at: " + str(comlocation)) #except OSError: # logger.error(u"Could not create comicdir : " + str(comlocation)) filechecker.validateAndCreateDirectory(comlocation, True) #try to account for CV not updating new issues as fast as GCD #seems CV doesn't update total counts #comicIssues = gcdinfo['totalissues'] comicIssues = comic['ComicIssues'] if not mylar.CV_ONLY: if gcdinfo['gcdvariation'] == "cv": comicIssues = str(int(comic['ComicIssues']) + 1) #let's download the image... if os.path.exists(mylar.CACHE_DIR): pass else: #let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError: logger.error( 'Could not create cache dir. Check permissions of cache dir: ' + str(mylar.CACHE_DIR)) coverfile = os.path.join(mylar.CACHE_DIR, str(comicid) + ".jpg") #try: urllib.urlretrieve(str(comic['ComicImage']), str(coverfile)) try: with open(str(coverfile)) as f: ComicImage = os.path.join('cache', str(comicid) + ".jpg") #this is for Firefox when outside the LAN...it works, but I don't know how to implement it #without breaking the normal flow for inside the LAN (above) #ComicImage = "http://" + str(mylar.HTTP_HOST) + ":" + str(mylar.HTTP_PORT) + "/cache/" + str(comicid) + ".jpg" logger.info(u"Sucessfully retrieved cover for " + comic['ComicName']) #if the comic cover local is checked, save a cover.jpg to the series folder. if mylar.COMIC_COVER_LOCAL: comiclocal = os.path.join(str(comlocation) + "/cover.jpg") shutil.copy(ComicImage, comiclocal) except IOError as e: logger.error(u"Unable to save cover locally at this time.") if oldcomversion is None: if comic['ComicVersion'].isdigit(): comicVol = "v" + comic['ComicVersion'] else: comicVol = None else: comicVol = oldcomversion #for description ... #Cdesc = helpers.cleanhtml(comic['ComicDescription']) #cdes_find = Cdesc.find("Collected") #cdes_removed = Cdesc[:cdes_find] #print cdes_removed controlValueDict = {"ComicID": comicid} newValueDict = { "ComicName": comic['ComicName'], "ComicSortName": sortname, "ComicYear": SeriesYear, "ComicImage": ComicImage, "Total": comicIssues, "ComicVersion": comicVol, "ComicLocation": comlocation, "ComicPublisher": comic['ComicPublisher'], #"Description": Cdesc.decode('utf-8', 'replace'), "DetailURL": comic['ComicURL'], # "ComicPublished": gcdinfo['resultPublished'], "ComicPublished": 'Unknown', "DateAdded": helpers.today(), "Status": "Loading" } myDB.upsert("comics", newValueDict, controlValueDict) #comicsort here... #run the re-sortorder here in order to properly display the page if pullupd is None: helpers.ComicSort(sequence='update') if CV_NoYearGiven == 'no': #if set to 'no' then we haven't pulled down the issues, otherwise we did it already issued = cv.getComic(comicid, 'issue') logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName']) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] #let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" firstiss = "10000000" firstdate = "2099-00-00" #print ("total issues:" + str(iscnt)) #---removed NEW code here--- logger.info(u"Now adding/updating issues for " + comic['ComicName']) if not mylar.CV_ONLY: #fccnt = int(fc['comiccount']) #logger.info(u"Found " + str(fccnt) + "/" + str(iscnt) + " issues of " + comic['ComicName'] + "...verifying") #fcnew = [] if iscnt > 0: #if a series is brand new, it wont have any issues/details yet so skip this part while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = str(firstval['Issue_Number']) #print ("issnum: " + str(issnum)) issname = cleanname if '.' in str(issnum): issn_st = str(issnum).find('.') issn_b4dec = str(issnum)[:issn_st] #if the length of decimal is only 1 digit, assume it's a tenth dec_is = str(issnum)[issn_st + 1:] if len(dec_is) == 1: dec_nisval = int(dec_is) * 10 iss_naftdec = str(dec_nisval) if len(dec_is) == 2: dec_nisval = int(dec_is) iss_naftdec = str(dec_nisval) iss_issue = issn_b4dec + "." + iss_naftdec issis = (int(issn_b4dec) * 1000) + dec_nisval elif 'au' in issnum.lower(): print("au detected") stau = issnum.lower().find('au') issnum_au = issnum[:stau] print("issnum_au: " + str(issnum_au)) #account for Age of Ultron mucked up numbering issis = str(int(issnum_au) * 1000) + 'AU' else: issis = int(issnum) * 1000 bb = 0 while (bb <= iscnt): try: gcdval = gcdinfo['gcdchoice'][bb] #print ("gcdval: " + str(gcdval)) except IndexError: #account for gcd variation here if gcdinfo['gcdvariation'] == 'gcd': #logger.fdebug("gcd-variation accounted for.") issdate = '0000-00-00' int_issnum = int(issis / 1000) break if 'nn' in str(gcdval['GCDIssue']): #no number detected - GN, TP or the like logger.warn( u"Non Series detected (Graphic Novel, etc) - cannot proceed at this time." ) updater.no_searchresults(comicid) return elif 'au' in gcdval['GCDIssue'].lower(): #account for Age of Ultron mucked up numbering - this is in format of 5AU.00 gstau = gcdval['GCDIssue'].lower().find('au') gcdis_au = gcdval['GCDIssue'][:gstau] gcdis = str(int(gcdis_au) * 1000) + 'AU' elif '.' in str(gcdval['GCDIssue']): #logger.fdebug("g-issue:" + str(gcdval['GCDIssue'])) issst = str(gcdval['GCDIssue']).find('.') #logger.fdebug("issst:" + str(issst)) issb4dec = str(gcdval['GCDIssue'])[:issst] #logger.fdebug("issb4dec:" + str(issb4dec)) #if the length of decimal is only 1 digit, assume it's a tenth decis = str(gcdval['GCDIssue'])[issst + 1:] #logger.fdebug("decis:" + str(decis)) if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) == 2: decisval = int(decis) issaftdec = str(decisval) gcd_issue = issb4dec + "." + issaftdec #logger.fdebug("gcd_issue:" + str(gcd_issue)) try: gcdis = (int(issb4dec) * 1000) + decisval except ValueError: logger.error( "This has no issue #'s for me to get - Either a Graphic Novel or one-shot. This feature to allow these will be added in the near future." ) updater.no_searchresults(comicid) return else: gcdis = int(str(gcdval['GCDIssue'])) * 1000 if gcdis == issis: issdate = str(gcdval['GCDDate']) if str(issis).isdigit(): int_issnum = int(gcdis / 1000) else: if 'au' in issis.lower(): int_issnum = str(int(gcdis[:-2]) / 1000) + 'AU' else: logger.error( "this has an alpha-numeric in the issue # which I cannot account for. Get on github and log the issue for evilhero." ) return #get the latest issue / date using the date. if gcdval['GCDDate'] > latestdate: latestiss = str(issnum) latestdate = str(gcdval['GCDDate']) break #bb = iscnt bb += 1 #print("(" + str(n) + ") IssueID: " + str(issid) + " IssueNo: " + str(issnum) + " Date" + str(issdate)) #---END.NEW. # check if the issue already exists iss_exists = myDB.action( 'SELECT * from issues WHERE IssueID=?', [issid]).fetchone() # Only change the status & add DateAdded if the issue is already in the database if iss_exists is None: newValueDict['DateAdded'] = helpers.today() controlValueDict = {"IssueID": issid} newValueDict = { "ComicID": comicid, "ComicName": comic['ComicName'], "IssueName": issname, "Issue_Number": issnum, "IssueDate": issdate, "Int_IssueNumber": int_issnum } if mylar.AUTOWANT_ALL: newValueDict['Status'] = "Wanted" elif issdate > helpers.today() and mylar.AUTOWANT_UPCOMING: newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" if iss_exists: #print ("Existing status : " + str(iss_exists['Status'])) newValueDict['Status'] = iss_exists['Status'] try: myDB.upsert("issues", newValueDict, controlValueDict) except sqlite3.InterfaceError, e: #raise sqlite3.InterfaceError(e) logger.error( "MAJOR error trying to get issue data, this is most likey a MULTI-VOLUME series and you need to use the custom_exceptions.csv file." ) myDB.action("DELETE FROM comics WHERE ComicID=?", [comicid]) return n += 1
def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None): # Putting this here to get around the circular import. Will try to use this to update images at later date. # from mylar import cache myDB = db.DBConnection() # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if dbcomic is None: newValueDict = {"ComicName": "Comic ID: %s" % (comicid), "Status": "Loading"} comlocation = None oldcomversion = None else: newValueDict = {"Status": "Loading"} comlocation = dbcomic['ComicLocation'] filechecker.validateAndCreateDirectory(comlocation, True) oldcomversion = dbcomic['ComicVersion'] #store the comicversion and chk if it exists before hammering. myDB.upsert("comics", newValueDict, controlValueDict) #run the re-sortorder here in order to properly display the page if pullupd is None: helpers.ComicSort(comicorder=mylar.COMICSORT, imported=comicid) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid,'comic') #comic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = {"ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic['ComicName'].startswith('The '): sortname = comic['ComicName'][4:] else: sortname = comic['ComicName'] logger.info(u"Now adding/updating: " + comic['ComicName']) #--Now that we know ComicName, let's try some scraping #--Start # gcd will return issue details (most importantly publishing date) if not mylar.CV_ONLY: if mismatch == "no" or mismatch is None: gcdinfo=parseit.GCDScraper(comic['ComicName'], comic['ComicYear'], comic['ComicIssues'], comicid) #print ("gcdinfo: " + str(gcdinfo)) mismatch_com = "no" if gcdinfo == "No Match": updater.no_searchresults(comicid) nomatch = "true" logger.info(u"There was an error when trying to add " + comic['ComicName'] + " (" + comic['ComicYear'] + ")" ) return nomatch else: mismatch_com = "yes" #print ("gcdinfo:" + str(gcdinfo)) elif mismatch == "yes": CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [comicid]).fetchone() if CV_EXcomicid['variloop'] is None: pass else: vari_loop = CV_EXcomicid['variloop'] NewComicID = CV_EXcomicid['NewComicID'] gcomicid = CV_EXcomicid['GComicID'] resultURL = "/series/" + str(NewComicID) + "/" #print ("variloop" + str(CV_EXcomicid['variloop'])) #if vari_loop == '99': gcdinfo = parseit.GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=comicid, TotalIssues=0, issvariation="no", resultPublished=None) logger.info(u"Sucessfully retrieved details for " + comic['ComicName'] ) # print ("Series Published" + parseit.resultPublished) CV_NoYearGiven = "no" #if the SeriesYear returned by CV is blank or none (0000), let's use the gcd one. if comic['ComicYear'] is None or comic['ComicYear'] == '0000': if mylar.CV_ONLY: #we'll defer this until later when we grab all the issues and then figure it out logger.info("Uh-oh. I can't find a Series Year for this series. I'm going to try analyzing deeper.") SeriesYear = cv.getComic(comicid,'firstissue',comic['FirstIssueID']) if SeriesYear == '0000': logger.info("Ok - I couldn't find a Series Year at all. Loading in the issue data now and will figure out the Series Year.") CV_NoYearGiven = "yes" issued = cv.getComic(comicid,'issue') SeriesYear = issued['firstdate'][:4] else: SeriesYear = gcdinfo['SeriesYear'] else: SeriesYear = comic['ComicYear'] #let's do the Annual check here. if mylar.ANNUALS_ON: annualcomicname = re.sub('[\,\:]', '', comic['ComicName']) annuals = comicbookdb.cbdb(annualcomicname, SeriesYear) print ("Number of Annuals returned: " + str(annuals['totalissues'])) nb = 0 while (nb <= int(annuals['totalissues'])): try: annualval = annuals['annualslist'][nb] except IndexError: break newCtrl = {"IssueID": str(annualval['AnnualIssue'] + annualval['AnnualDate'])} newVals = {"Issue_Number": annualval['AnnualIssue'], "IssueDate": annualval['AnnualDate'], "IssueName": annualval['AnnualTitle'], "ComicID": comicid, "Status": "Skipped"} myDB.upsert("annuals", newVals, newCtrl) nb+=1 #parseit.annualCheck(gcomicid=gcdinfo['GCDComicID'], comicid=comicid, comicname=comic['ComicName'], comicyear=SeriesYear) #comic book location on machine # setup default location here if comlocation is None: # let's remove the non-standard characters here. u_comicnm = comic['ComicName'] u_comicname = u_comicnm.encode('ascii', 'ignore').strip() if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname: comicdir = u_comicname if ':' in comicdir: comicdir = comicdir.replace(':','') if '/' in comicdir: comicdir = comicdir.replace('/','-') if ',' in comicdir: comicdir = comicdir.replace(',','') if '?' in comicdir: comicdir = comicdir.replace('?','') else: comicdir = u_comicname series = comicdir publisher = re.sub('!','',comic['ComicPublisher']) # thanks Boom! year = SeriesYear comversion = comic['ComicVersion'] if comversion is None: comversion = 'None' #if comversion is None, remove it so it doesn't populate with 'None' if comversion == 'None': chunk_f_f = re.sub('\$VolumeN','',mylar.FILE_FORMAT) chunk_f = re.compile(r'\s+') mylar.FILE_FORMAT = chunk_f.sub(' ', chunk_f_f) #do work to generate folder path values = {'$Series': series, '$Publisher': publisher, '$Year': year, '$series': series.lower(), '$publisher': publisher.lower(), '$VolumeY': 'V' + str(year), '$VolumeN': comversion } #print mylar.FOLDER_FORMAT #print 'working dir:' #print helpers.replace_all(mylar.FOLDER_FORMAT, values) if mylar.FOLDER_FORMAT == '': comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + SeriesYear + ")" else: comlocation = mylar.DESTINATION_DIR + "/" + helpers.replace_all(mylar.FOLDER_FORMAT, values) #comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" if mylar.DESTINATION_DIR == "": logger.error(u"There is no general directory specified - please specify in Config/Post-Processing.") return if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot comlocation = comlocation.replace(' ', mylar.REPLACE_CHAR) #moved this out of the above loop so it will chk for existance of comlocation in case moved #if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: #print ("Directory doesn't exist!") #try: # os.makedirs(str(comlocation)) # logger.info(u"Directory successfully created at: " + str(comlocation)) #except OSError: # logger.error(u"Could not create comicdir : " + str(comlocation)) filechecker.validateAndCreateDirectory(comlocation, True) #try to account for CV not updating new issues as fast as GCD #seems CV doesn't update total counts #comicIssues = gcdinfo['totalissues'] comicIssues = comic['ComicIssues'] if not mylar.CV_ONLY: if gcdinfo['gcdvariation'] == "cv": comicIssues = str(int(comic['ComicIssues']) + 1) #let's download the image... if os.path.exists(mylar.CACHE_DIR):pass else: #let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError: logger.error('Could not create cache dir. Check permissions of cache dir: ' + str(mylar.CACHE_DIR)) coverfile = os.path.join(mylar.CACHE_DIR, str(comicid) + ".jpg") #try: urllib.urlretrieve(str(comic['ComicImage']), str(coverfile)) try: with open(str(coverfile)) as f: ComicImage = os.path.join('cache',str(comicid) + ".jpg") #this is for Firefox when outside the LAN...it works, but I don't know how to implement it #without breaking the normal flow for inside the LAN (above) #ComicImage = "http://" + str(mylar.HTTP_HOST) + ":" + str(mylar.HTTP_PORT) + "/cache/" + str(comicid) + ".jpg" logger.info(u"Sucessfully retrieved cover for " + comic['ComicName']) #if the comic cover local is checked, save a cover.jpg to the series folder. if mylar.COMIC_COVER_LOCAL: comiclocal = os.path.join(str(comlocation) + "/cover.jpg") shutil.copy(ComicImage,comiclocal) except IOError as e: logger.error(u"Unable to save cover locally at this time.") if oldcomversion is None: if comic['ComicVersion'].isdigit(): comicVol = "v" + comic['ComicVersion'] else: comicVol = None else: comicVol = oldcomversion #for description ... #Cdesc = helpers.cleanhtml(comic['ComicDescription']) #cdes_find = Cdesc.find("Collected") #cdes_removed = Cdesc[:cdes_find] #print cdes_removed controlValueDict = {"ComicID": comicid} newValueDict = {"ComicName": comic['ComicName'], "ComicSortName": sortname, "ComicYear": SeriesYear, "ComicImage": ComicImage, "Total": comicIssues, "ComicVersion": comicVol, "ComicLocation": comlocation, "ComicPublisher": comic['ComicPublisher'], #"Description": Cdesc.decode('utf-8', 'replace'), "DetailURL": comic['ComicURL'], # "ComicPublished": gcdinfo['resultPublished'], "ComicPublished": 'Unknown', "DateAdded": helpers.today(), "Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) #comicsort here... #run the re-sortorder here in order to properly display the page if pullupd is None: helpers.ComicSort(sequence='update') if CV_NoYearGiven == 'no': #if set to 'no' then we haven't pulled down the issues, otherwise we did it already issued = cv.getComic(comicid,'issue') logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName'] ) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] #let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" firstiss = "10000000" firstdate = "2099-00-00" #print ("total issues:" + str(iscnt)) #---removed NEW code here--- logger.info(u"Now adding/updating issues for " + comic['ComicName']) if not mylar.CV_ONLY: #fccnt = int(fc['comiccount']) #logger.info(u"Found " + str(fccnt) + "/" + str(iscnt) + " issues of " + comic['ComicName'] + "...verifying") #fcnew = [] if iscnt > 0: #if a series is brand new, it wont have any issues/details yet so skip this part while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = str(firstval['Issue_Number']) #print ("issnum: " + str(issnum)) issname = cleanname if '.' in str(issnum): issn_st = str(issnum).find('.') issn_b4dec = str(issnum)[:issn_st] #if the length of decimal is only 1 digit, assume it's a tenth dec_is = str(issnum)[issn_st + 1:] if len(dec_is) == 1: dec_nisval = int(dec_is) * 10 iss_naftdec = str(dec_nisval) if len(dec_is) == 2: dec_nisval = int(dec_is) iss_naftdec = str(dec_nisval) iss_issue = issn_b4dec + "." + iss_naftdec issis = (int(issn_b4dec) * 1000) + dec_nisval elif 'au' in issnum.lower(): print ("au detected") stau = issnum.lower().find('au') issnum_au = issnum[:stau] print ("issnum_au: " + str(issnum_au)) #account for Age of Ultron mucked up numbering issis = str(int(issnum_au) * 1000) + 'AU' else: issis = int(issnum) * 1000 bb = 0 while (bb <= iscnt): try: gcdval = gcdinfo['gcdchoice'][bb] #print ("gcdval: " + str(gcdval)) except IndexError: #account for gcd variation here if gcdinfo['gcdvariation'] == 'gcd': #logger.fdebug("gcd-variation accounted for.") issdate = '0000-00-00' int_issnum = int ( issis / 1000 ) break if 'nn' in str(gcdval['GCDIssue']): #no number detected - GN, TP or the like logger.warn(u"Non Series detected (Graphic Novel, etc) - cannot proceed at this time.") updater.no_searchresults(comicid) return elif 'au' in gcdval['GCDIssue'].lower(): #account for Age of Ultron mucked up numbering - this is in format of 5AU.00 gstau = gcdval['GCDIssue'].lower().find('au') gcdis_au = gcdval['GCDIssue'][:gstau] gcdis = str(int(gcdis_au) * 1000) + 'AU' elif '.' in str(gcdval['GCDIssue']): #logger.fdebug("g-issue:" + str(gcdval['GCDIssue'])) issst = str(gcdval['GCDIssue']).find('.') #logger.fdebug("issst:" + str(issst)) issb4dec = str(gcdval['GCDIssue'])[:issst] #logger.fdebug("issb4dec:" + str(issb4dec)) #if the length of decimal is only 1 digit, assume it's a tenth decis = str(gcdval['GCDIssue'])[issst+1:] #logger.fdebug("decis:" + str(decis)) if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) == 2: decisval = int(decis) issaftdec = str(decisval) gcd_issue = issb4dec + "." + issaftdec #logger.fdebug("gcd_issue:" + str(gcd_issue)) try: gcdis = (int(issb4dec) * 1000) + decisval except ValueError: logger.error("This has no issue #'s for me to get - Either a Graphic Novel or one-shot. This feature to allow these will be added in the near future.") updater.no_searchresults(comicid) return else: gcdis = int(str(gcdval['GCDIssue'])) * 1000 if gcdis == issis: issdate = str(gcdval['GCDDate']) if str(issis).isdigit(): int_issnum = int( gcdis / 1000 ) else: if 'au' in issis.lower(): int_issnum = str(int(gcdis[:-2]) / 1000) + 'AU' else: logger.error("this has an alpha-numeric in the issue # which I cannot account for. Get on github and log the issue for evilhero.") return #get the latest issue / date using the date. if gcdval['GCDDate'] > latestdate: latestiss = str(issnum) latestdate = str(gcdval['GCDDate']) break #bb = iscnt bb+=1 #print("(" + str(n) + ") IssueID: " + str(issid) + " IssueNo: " + str(issnum) + " Date" + str(issdate)) #---END.NEW. # check if the issue already exists iss_exists = myDB.action('SELECT * from issues WHERE IssueID=?', [issid]).fetchone() # Only change the status & add DateAdded if the issue is already in the database if iss_exists is None: newValueDict['DateAdded'] = helpers.today() controlValueDict = {"IssueID": issid} newValueDict = {"ComicID": comicid, "ComicName": comic['ComicName'], "IssueName": issname, "Issue_Number": issnum, "IssueDate": issdate, "Int_IssueNumber": int_issnum } if mylar.AUTOWANT_ALL: newValueDict['Status'] = "Wanted" elif issdate > helpers.today() and mylar.AUTOWANT_UPCOMING: newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" if iss_exists: #print ("Existing status : " + str(iss_exists['Status'])) newValueDict['Status'] = iss_exists['Status'] try: myDB.upsert("issues", newValueDict, controlValueDict) except sqlite3.InterfaceError, e: #raise sqlite3.InterfaceError(e) logger.error("MAJOR error trying to get issue data, this is most likey a MULTI-VOLUME series and you need to use the custom_exceptions.csv file.") myDB.action("DELETE FROM comics WHERE ComicID=?", [comicid]) return n+=1
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, IssDateFix, IssueID, UseFuzzy, newznab_host=None): if nzbprov == 'nzb.su': apikey = mylar.NZBSU_APIKEY elif nzbprov == 'dognzb': apikey = mylar.DOGNZB_APIKEY elif nzbprov == 'nzbx': apikey = 'none' elif nzbprov == 'experimental': apikey = 'none' elif nzbprov == 'newznab': host_newznab = newznab_host[0] apikey = newznab_host[1] logger.fdebug("using Newznab host of : " + str(host_newznab)) logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + "(" + str(ComicYear) + ") using " + str(nzbprov)) if mylar.PREFERRED_QUALITY == 0: filetype = "" elif mylar.PREFERRED_QUALITY == 1: filetype = ".cbr" elif mylar.PREFERRED_QUALITY == 2: filetype = ".cbz" if mylar.SAB_PRIORITY: if mylar.SAB_PRIORITY == "Default": sabpriority = "-100" elif mylar.SAB_PRIORITY == "Low": sabpriority = "-1" elif mylar.SAB_PRIORITY == "Normal": sabpriority = "0" elif mylar.SAB_PRIORITY == "High": sabpriority = "1" elif mylar.SAB_PRIORITY == "Paused": sabpriority = "-2" else: #if sab priority isn't selected, default to Normal (0) sabpriority = "0" #UseFuzzy == 0: Normal #UseFuzzy == 1: Remove Year #UseFuzzy == 2: Fuzzy Year # figure out what was missed via rss feeds and do a manual search via api #tsc = int(tot-1) findcomic = [] findcomiciss = [] findcount = 0 ci = "" comsearch = [] isssearch = [] comyear = str(ComicYear) #print ("-------SEARCH FOR MISSING------------------") findcomic.append(str(ComicName)) # this should be called elsewhere..redudant code. if '.' in IssueNumber: isschk_find = IssueNumber.find('.') isschk_b4dec = IssueNumber[:isschk_find] isschk_decval = IssueNumber[isschk_find+1:] logger.fdebug("IssueNumber: " + str(IssueNumber)) logger.fdebug("..before decimal: " + str(isschk_b4dec)) logger.fdebug("...after decimal: " + str(isschk_decval)) #--let's make sure we don't wipe out decimal issues ;) if int(isschk_decval) == 0: iss = isschk_b4dec intdec = int(isschk_decval) else: if len(isschk_decval) == 1: iss = isschk_b4dec + "." + isschk_decval intdec = int(isschk_decval) * 10 else: iss = isschk_b4dec + "." + isschk_decval.rstrip('0') intdec = int(isschk_decval.rstrip('0')) * 10 logger.fdebug("let's search with this issue value: " + str(iss)) #Issue_Number = carry-over with decimals #iss = clean issue number (no decimals) intIss = (int(isschk_b4dec) * 1000) + intdec logger.fdebug("int.issue :" + str(intIss)) logger.fdebug("int.issue_b4: " + str(isschk_b4dec)) logger.fdebug("int.issue_dec: " + str(intdec)) IssueNumber = iss #issue_decimal = re.compile(r'[^\d.]+') #issue = issue_decimal.sub('', str(IssueNumber)) findcomiciss.append(iss) #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm1 = re.sub(" ", "%20", str(findcomic[findcount])) cm = re.sub("\&", "%26", str(cm1)) #print (cmi) if '.' in findcomiciss[findcount]: if len(str(isschk_b4dec)) == 3: cmloopit = 1 elif len(str(isschk_b4dec)) == 2: cmloopit = 2 elif len(str(isschk_b4dec)) == 1: cmloopit = 3 else: if len(str(findcomiciss[findcount])) == 1: cmloopit = 3 elif len(str(findcomiciss[findcount])) == 2: cmloopit = 2 else: cmloopit = 1 isssearch.append(str(findcomiciss[findcount])) comsearch.append(cm) findcount+=1 # ---- #print ("------RESULTS OF SEARCH-------------------") findloop = 0 foundcomic = [] done = False #---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. while (findloop < (findcount) ): comsrc = comsearch[findloop] while (cmloopit >= 1 ): if done is True: logger.fdebug("we should break out now - sucessful search previous") findloop == 99 break # here we account for issue pattern variations if cmloopit == 3: comsearch[findloop] = comsrc + "%2000" + isssearch[findloop] + "%20" + str(filetype) elif cmloopit == 2: comsearch[findloop] = comsrc + "%200" + isssearch[findloop] + "%20" + str(filetype) elif cmloopit == 1: comsearch[findloop] = comsrc + "%20" + isssearch[findloop] + "%20" + str(filetype) logger.fdebug("comsearch: " + str(comsearch)) logger.fdebug("cmloopit: " + str(cmloopit)) logger.fdebug("done: " + str(done)) if nzbprov != 'experimental': if nzbprov == 'dognzb': findurl = "http://dognzb.cr/api?t=search&apikey=" + str(apikey) + "&q=" + str(comsearch[findloop]) + "&o=xml&cat=7030" elif nzbprov == 'nzb.su': findurl = "http://www.nzb.su/api?t=search&q=" + str(comsearch[findloop]) + "&apikey=" + str(apikey) + "&o=xml&cat=7030" elif nzbprov == 'newznab': #let's make sure the host has a '/' at the end, if not add it. if host_newznab[-1] != "/": host_newznab = str(host_newznab) + "/" findurl = str(host_newznab) + "api?t=search&q=" + str(comsearch[findloop]) + "&apikey=" + str(apikey) + "&o=xml&cat=7030" logger.fdebug("search-url: " + str(findurl)) elif nzbprov == 'nzbx': bb = prov_nzbx.searchit(comsearch[findloop]) logger.fdebug("nzbx.co!") if nzbprov != 'nzbx': bb = feedparser.parse(findurl) elif nzbprov == 'experimental': #bb = parseit.MysterBinScrape(comsearch[findloop], comyear) bb = findcomicfeed.Startit(cm, isssearch[findloop], comyear) # since the regexs in findcomicfeed do the 3 loops, lets force the exit after cmloopit == 1 done = False foundc = "no" log2file = "" if bb == "no results": pass foundc = "no" else: for entry in bb['entries']: logger.fdebug("checking search result: " + str(entry['title'])) thisentry = str(entry['title']) logger.fdebug("Entry: " + str(thisentry)) cleantitle = re.sub('[_/.]', ' ', str(entry['title'])) cleantitle = helpers.cleanName(str(cleantitle)) nzbname = cleantitle logger.fdebug("Cleantitle: " + str(cleantitle)) if len(re.findall('[^()]+', cleantitle)) == 1: cleantitle = "abcdefghijk 0 (1901).cbz" if done: break #let's narrow search down - take out year (2010), (2011), etc #let's check for first occurance of '(' as generally indicates #that the 'title' has ended ripperlist=['digital-', 'empire', 'dcp'] #this takes care of the brackets :) m = re.findall('[^()]+', cleantitle) lenm = len(m) #print ("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" while (cnt < lenm): if m[cnt] is None: break if m[cnt] == ' ': pass else: logger.fdebug(str(cnt) + ". Bracket Word: " + str(m[cnt])) if cnt == 0: comic_andiss = m[cnt] logger.fdebug("Comic: " + str(comic_andiss)) logger.fdebug("UseFuzzy is : " + str(UseFuzzy)) if UseFuzzy == "0" or UseFuzzy == "2" or UseFuzzy is None or IssDateFix == "yes": if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': logger.fdebug("year detected: " + str(m[cnt])) result_comyear = m[cnt] if str(comyear) in result_comyear: logger.fdebug(str(comyear) + " - right years match baby!") yearmatch = "true" else: logger.fdebug(str(comyear) + " - not right - years do not match") yearmatch = "false" if UseFuzzy == "2": #Fuzzy the year +1 and -1 ComUp = int(ComicYear) + 1 ComDwn = int(ComicYear) - 1 if str(ComUp) in result_comyear or str(ComDwn) in result_comyear: logger.fdebug("Fuzzy Logic'd the Year and got a match with a year of " + str(result_comyear)) yearmatch = "true" else: logger.fdebug(str(comyear) + "Fuzzy logic'd the Year and year still didn't match.") #let's do this hear and save a few extra loops ;) #fix for issue dates between Nov-Dec/Jan if IssDateFix == "yes" and UseFuzzy is not "2": ComicYearFix = int(ComicYear) + 1 if str(ComicYearFix) in result_comyear: logger.fdebug("further analysis reveals this was published inbetween Nov-Jan, incrementing year to " + str(ComicYearFix) + " has resulted in a match!") yearmatch = "true" else: logger.fdebug(str(comyear) + " - not the right year.") elif UseFuzzy == "1": yearmatch = "true" if 'digital' in m[cnt] and len(m[cnt]) == 7: logger.fdebug("digital edition detected") pass if ' of ' in m[cnt]: logger.fdebug("mini-series detected : " + str(m[cnt])) result_of = m[cnt] if 'cover' in m[cnt]: logger.fdebug("covers detected: " + str(m[cnt])) result_comcovers = m[cnt] for ripper in ripperlist: if ripper in m[cnt]: logger.fdebug("Scanner detected: " + str(m[cnt])) result_comscanner = m[cnt] cnt+=1 if yearmatch == "false": continue splitit = [] watchcomic_split = [] logger.fdebug("original nzb comic and issue: " + str(comic_andiss)) #changed this from '' to ' ' comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(comic_andiss)) comic_iss = comic_iss_b4.replace('.',' ') logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss)) splitit = comic_iss.split(None) #something happened to dognzb searches or results...added a '.' in place of spaces #screwed up most search results with dognzb. Let's try to adjust. #watchcomic_split = findcomic[findloop].split(None) if splitit[(len(splitit)-1)].isdigit(): #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal comic_iss = splitit[(len(splitit)-1)] splitst = len(splitit) - 1 if splitit[(len(splitit)-2)].isdigit(): # for series that have a digit at the end, it screws up the logistics. i = 1 chg_comic = splitit[0] while (i < (len(splitit)-1)): chg_comic = chg_comic + " " + splitit[i] i+=1 logger.fdebug("chg_comic:" + str(chg_comic)) if chg_comic.upper() == findcomic[findloop].upper(): logger.fdebug("series contains numerics...adjusting..") else: changeup = "." + splitit[(len(splitit)-1)] logger.fdebug("changeup to decimal: " + str(changeup)) comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss splitst = len(splitit) - 2 else: # if the nzb name doesn't follow the series-issue-year format even closely..ignore nzb logger.fdebug("invalid naming format of nzb detected - cannot properly determine issue") continue logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss)) #bmm = re.findall('v\d', comic_iss) #if len(bmm) > 0: splitst = len(splitit) - 2 #else: splitst = len(splitit) - 1 # make sure that things like - in watchcomic are accounted for when comparing to nzb. watchcomic_split = re.sub('[\-\:\,\.]', ' ', findcomic[findloop]).split(None) logger.fdebug(str(splitit) + " nzb series word count: " + str(splitst)) logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split))) if (splitst) != len(watchcomic_split): logger.fdebug("incorrect comic lengths...not a match") if str(splitit[0]).lower() == "the": logger.fdebug("THE word detected...attempting to adjust pattern matching") splitit[0] = splitit[4:] else: logger.fdebug("length match..proceeding") n = 0 scount = 0 logger.fdebug("search-length: " + str(splitst)) logger.fdebug("Watchlist-length: " + str(len(watchcomic_split))) while ( n <= (splitst)-1 ): logger.fdebug("splitit: " + str(splitit[n])) if n < (splitst) and n < len(watchcomic_split): logger.fdebug(str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n])) if '+' in watchcomic_split[n]: watchcomic_split[n] = re.sub('+', '', str(watchcomic_split[n])) if str(watchcomic_split[n].lower()) in str(splitit[n].lower()): logger.fdebug("word matched on : " + str(splitit[n])) scount+=1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n].lower()).startswith('v'): logger.fdebug("possible versioning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) logger.fdebug("version found: " + str(comicversion)) else: logger.fdebug("Comic / Issue section") if splitit[n].isdigit(): logger.fdebug("issue detected") #comiss = splitit[n] comicNAMER = n - 1 comNAME = splitit[0] cmnam = 1 while (cmnam <= comicNAMER): comNAME = str(comNAME) + " " + str(splitit[cmnam]) cmnam+=1 logger.fdebug("comic: " + str(comNAME)) else: logger.fdebug("non-match for: "+ str(splitit[n])) pass n+=1 #set the match threshold to 80% (for now) # if it's less than 80% consider it a non-match and discard. #splitit has to splitit-1 because last position is issue. wordcnt = int(scount) logger.fdebug("scount:" + str(wordcnt)) totalcnt = int(splitst) logger.fdebug("splitit-len:" + str(totalcnt)) spercent = (wordcnt/totalcnt) * 100 logger.fdebug("we got " + str(spercent) + " percent.") if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") if int(spercent) < 80: logger.fdebug("failure - we only got " + str(spercent) + "% right!") continue logger.fdebug("this should be a match!") logger.fdebug("issue we are looking for is : " + str(findcomiciss[findloop])) logger.fdebug("integer value of issue we are looking for : " + str(intIss)) #redudant code - should be called elsewhere... if '.' in comic_iss: comisschk_find = comic_iss.find('.') comisschk_b4dec = comic_iss[:comisschk_find] comisschk_decval = comic_iss[comisschk_find+1:] logger.fdebug("Found IssueNumber: " + str(comic_iss)) logger.fdebug("..before decimal: " + str(comisschk_b4dec)) logger.fdebug("...after decimal: " + str(comisschk_decval)) #--let's make sure we don't wipe out decimal issues ;) if int(comisschk_decval) == 0: ciss = comisschk_b4dec cintdec = int(comisschk_decval) else: if len(comisschk_decval) == 1: ciss = comisschk_b4dec + "." + comisschk_decval cintdec = int(comisschk_decval) * 10 else: ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0') cintdec = int(comisschk_decval.rstrip('0')) * 10 comintIss = (int(comisschk_b4dec) * 1000) + cintdec else: comintIss = int(comic_iss) * 1000 logger.fdebug("issue we found for is : " + str(comic_iss)) logger.fdebug("integer value of issue we are found : " + str(comintIss)) #issue comparison now as well if int(intIss) == int(comintIss): logger.fdebug('issues match!') logger.info(u"Found " + str(ComicName) + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(nzbprov) ) ## -- inherit issue. Comic year is non-standard. nzb year is the year ## -- comic was printed, not the start year of the comic series and ## -- thus the deciding component if matches are correct or not linkstart = os.path.splitext(entry['link'])[0] #following is JUST for nzb.su if nzbprov == 'nzb.su' or nzbprov == 'newznab': linkit = os.path.splitext(entry['link'])[1] linkit = linkit.replace("&", "%26") linkapi = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") linkapi = str(linkstart) logger.fdebug("link given by: " + str(nzbprov)) logger.fdebug("link: " + str(linkstart)) logger.fdebug("linkforapi: " + str(linkapi)) #here we distinguish between rename and not. #blackhole functinality--- #let's download the file to a temporary cache. if mylar.BLACKHOLE: logger.fdebug("using blackhole directory at : " + str(mylar.BLACKHOLE_DIR)) if os.path.exists(mylar.BLACKHOLE_DIR): #pretty this biatch up. Bl_ComicName = re.sub('[/:/,\/]', '', str(ComicName)) filenamenzb = str(re.sub(" ", ".", str(Bl_ComicName))) + "." + str(IssueNumber) + ".(" + str(comyear) + ").nzb" urllib.urlretrieve(linkapi, str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) logger.fdebug("filename saved to your blackhole as : " + str(filenamenzb)) logger.info(u"Successfully sent .nzb to your Blackhole directory : " + str(mylar.BLACKHOLE_DIR) + str(filenamenzb) ) #end blackhole else: tmppath = mylar.CACHE_DIR if os.path.exists(tmppath): logger.fdebug("cache directory successfully found at : " + str(tmppath)) pass else: #let's make the dir. logger.fdebug("couldn't locate cache directory, attempting to create at : " + str(mylar.CACHE_DIR)) try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError.e: if e.errno != errno.EEXIST: raise logger.fdebug("link to retrieve via api:" + str(linkapi)) #we need to change the nzbx string now to allow for the nzbname rename. if nzbprov == 'nzbx': nzbxlink_st = linkapi.find("*|*") linkapi = linkapi[:(nzbxlink_st + 3)] + str(nzbname) logger.fdebug("new linkapi (this should =nzbname) :" + str(linkapi)) # let's build the send-to-SAB string now: tmpapi = str(mylar.SAB_HOST) logger.fdebug("send-to-SAB host string: " + str(tmpapi)) # changed to just work with direct links now... SABtype = "/api?mode=addurl&name=" fileURL = str(linkapi) tmpapi = tmpapi + str(SABtype) logger.fdebug("...selecting API type: " + str(tmpapi)) tmpapi = tmpapi + str(fileURL) logger.fdebug("...attaching nzb provider link: " + str(tmpapi)) # determine SAB priority if mylar.SAB_PRIORITY: tmpapi = tmpapi + "&priority=" + str(sabpriority) logger.fdebug("...setting priority: " + str(tmpapi)) # if category is blank, let's adjust if mylar.SAB_CATEGORY: tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY) logger.fdebug("...attaching category: " + str(tmpapi)) if mylar.RENAME_FILES == 1: tmpapi = tmpapi + "&script=ComicRN.py" logger.fdebug("...attaching rename script: " + str(tmpapi)) #final build of send-to-SAB tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY) logger.fdebug("Completed send-to-SAB link: " + str(tmpapi)) try: urllib2.urlopen(tmpapi) except urllib2.URLError: logger.error(u"Unable to send nzb file to SABnzbd") return logger.info(u"Successfully sent nzb file to SABnzbd") #delete the .nzb now. #if mylar.PROG_DIR is not "/" and nzbprov != 'nzb.su': # logger.fdebug("preparing to remove temporary nzb file at: " + str(savefile)) # os.remove(savefile) # logger.info(u"Removed temporary save file") #raise an exception to break out of loop #let's change all space to decimals for simplicity if mylar.BLACKHOLE: bhole_cname = re.sub('[/:/,\/]', '', str(ComicName)) nzbname = str(re.sub(" ", ".", str(bhole_cname))) + "." + str(IssueNumber) + ".(" + str(comyear) + ")" else: nzbname = re.sub(" ", ".", str(entry['title'])) nzbname = re.sub('[\,\:]', '', str(nzbname)) extensions = ('.cbr', '.cbz') if nzbname.lower().endswith(extensions): fd, ext = os.path.splitext(nzbname) logger.fdebug("Removed extension from nzb: " + ext) nzbname = re.sub(str(ext), '', str(nzbname)) logger.fdebug("nzbname used for post-processing:" + str(nzbname)) foundc = "yes" done = True break else: log2file = log2file + "issues don't match.." + "\n" foundc = "no" if done == True: cmloopit == 1 #let's make sure it STOPS searching after a sucessful match. break cmloopit-=1 findloop+=1 if foundc == "yes": foundcomic.append("yes") logger.fdebug("Found matching comic...preparing to send to Updater with IssueID: " + str(IssueID) + " and nzbname: " + str(nzbname)) updater.nzblog(IssueID, nzbname) nzbpr == 0 #break return foundc elif foundc == "no" and nzbpr == 0: foundcomic.append("no") logger.fdebug("couldn't find a matching comic") if IssDateFix == "no": logger.info(u"Couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + "). Status kept as wanted." ) break return foundc
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr): logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " using " + str(nzbprov)) if nzbprov == 'nzb.su': apikey = mylar.NZBSU_APIKEY elif nzbprov == 'dognzb': apikey = mylar.DOGNZB_APIKEY elif nzbprov == 'experimental': apikey = 'none' #print ("-------------------------") if mylar.PREFERRED_QUALITY == 0: filetype = "" elif mylar.PREFERRED_QUALITY == 1: filetype = ".cbr" elif mylar.PREFERRED_QUALITY == 2: filetype = ".cbz" # figure out what was missed via rss feeds and do a manual search via api #tsc = int(tot-1) findcomic = [] findcomiciss = [] findcount = 0 ci = "" comsearch = [] isssearch = [] comyear = str(ComicYear) #print ("-------SEARCH FOR MISSING------------------") findcomic.append(str(ComicName)) IssueNumber = str(re.sub("\.00", "", str(IssueNumber))) #print ("issueNumber" + str(IssueNumber)) findcomiciss.append(str(re.sub("\D", "", str(IssueNumber)))) #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm = re.sub(" ", "%20", str(findcomic[findcount])) #print (cmi) if len(str(findcomiciss[findcount])) == 1: cmloopit = 3 elif len(str(findcomiciss[findcount])) == 2: cmloopit = 2 else: cmloopit = 1 isssearch.append(str(findcomiciss[findcount])) comsearch.append(cm) findcount += 1 # ---- #print ("------RESULTS OF SEARCH-------------------") findloop = 0 foundcomic = [] #---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. while (findloop < (findcount)): comsrc = comsearch[findloop] #print (str(comsearch[findloop])) while (cmloopit >= 1): # here we account for issue pattern variations if cmloopit == 3: comsearch[findloop] = comsrc + "%2000" + isssearch[ findloop] + "%20" + str(filetype) #print (comsearch[findloop]) elif cmloopit == 2: comsearch[findloop] = comsrc + "%200" + isssearch[ findloop] + "%20" + str(filetype) #print (comsearch[findloop]) elif cmloopit == 1: comsearch[findloop] = comsrc + "%20" + isssearch[ findloop] + "%20" + str(filetype) #print (comsearch[findloop]) #print ("NZB Provider set to: " + nzbprov) if nzbprov != 'experimental': if nzbprov == 'dognzb': #print ("dog-search.") findurl = "http://dognzb.cr/api?t=search&apikey=" + str( apikey) + "&q=" + str( comsearch[findloop]) + "&o=xml&cat=7030" elif nzbprov == 'nzb.su': #print ("nzb.su search") findurl = "http://nzb.su/api?t=search&q=" + str( comsearch[findloop]) + "&apikey=" + str( apikey) + "&o=xml&cat=7030" bb = feedparser.parse(findurl) #print (findurl) elif nzbprov == 'experimental': #print ("experimental raw search") bb = parseit.MysterBinScrape(comsearch[findloop], comyear) done = False foundc = "no" if bb == "no results": #print ("no results found...attempting alternate search") pass elif (len(bb['entries']) == 0): #print ("Nothing found for : " + str(findcomic[findloop]) + " Issue: #" + str(findcomiciss[findloop])) foundc = "no" else: #print ("Found for: " + str(findcomic[findloop])) for entry in bb['entries']: #print str(entry['title']) cleantitle = helpers.cleanName(str(entry['title'])) if done: break #print ("title: " + str(cleantitle)) #print ("link: " + entry['link']) #let's narrow search down - take out year (2010), (2011), etc #let's check for first occurance of '(' as generally indicates #that the 'title' has ended ripperlist = ['digital-', 'empire', 'dcp'] #this takes care of the brackets :) # m = re.findall(r"\((\w+)\)", cleantitle) m = re.findall('[^()]+', cleantitle) lenm = len(m) #print ("there are " + str(lenm) + " words.") cnt = 0 while (cnt < lenm): if m[cnt] is None: break #if m[cnt] == ' ': print ("space detected") #print (str(cnt) + ". Bracket Word: " + m[cnt] ) if cnt == 0: comic_andiss = m[cnt] print("Comic:" + str(comic_andiss)) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': print("year detected!") result_comyear = m[cnt] if str(comyear) in result_comyear: print( str(comyear) + " - right - years match baby!") yearmatch = "true" else: print( str(comyear) + " - not right - years don't match ") yearmatch = "false" if 'digital' in m[cnt] and len(m[cnt]) == 7: pass #print ("digital edition") if ' of ' in m[cnt]: #print ("mini-series detected : " + str(m[cnt])) result_of = m[cnt] if 'cover' in m[cnt]: #print ("covers detected") result_comcovers = m[cnt] for ripper in ripperlist: if ripper in m[cnt]: #print ("Scanner detected:" + str(m[cnt])) result_comscanner = m[cnt] cnt += 1 if yearmatch == "false": break splitit = [] watchcomic_split = [] comic_iss = re.sub('[\-\:\,]', '', str(comic_andiss)) splitit = comic_iss.split(None) watchcomic_split = findcomic[findloop].split(None) bmm = re.findall('v\d', comic_iss) #print ("vers - " + str(bmm)) if len(bmm) > 0: splitst = len(splitit) - 2 else: splitst = len(splitit) - 1 if (splitst) != len(watchcomic_split): print("incorrect comic lengths...not a match") if str(splitit[0]).lower() == "the": print( "THE word detected...attempting to adjust pattern matching" ) splitit[0] = splitit[4:] else: print("length match..proceeding") n = 0 scount = 0 #print ("length:" + str(len(splitit))) while (n <= len(splitit) - 1): if n < len(splitit) - 1: #print ( str(n) + ". Comparing: " + watchcomic_split[n] + " .to. " + splitit[n] ) if str(watchcomic_split[n].lower()) in str( splitit[n].lower()): #print ("word matched on : " + splitit[n]) scount += 1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif len(splitit[n]) < 3 or ( splitit[n][1:]) == "v": #print ("possible verisoning..checking") #we hit a versioning # - account for it if splitit[n][2:].isdigit(): comicversion = str(splitit[n]) #print ("version found:" + str(comicversion)) else: if splitit[n].isdigit(): print("issue detected") comiss = splitit[n] comicNAMER = n - 1 comNAME = splitit[0] cmnam = 1 while (cmnam < comicNAMER): comNAME = str(comNAME) + " " + str( splitit[cmnam]) cmnam += 1 #print ("comic: " + str(comNAME)) else: #print ("non-match for: " + splitit[n]) pass n += 1 spercent = (scount / int(len(splitit))) * 100 #print (str(spercent) + "% match") #if spercent >= 75: print ("it's a go captain...") #if spercent < 75: print ("failure - we only got " + str(spercent) + "% right!") print("this should be a match!") #issue comparison now as well if int(findcomiciss[findloop]) == int(comiss): print("issues match!") ## -- inherit issue. Comic year is non-standard. nzb year is the year ## -- comic was printed, not the start year of the comic series and ## -- thus the deciding component if matches are correct or not linkstart = os.path.splitext(entry['link'])[0] #following is JUST for nzb.su if nzbprov == 'nzb.su': linkit = os.path.splitext(entry['link'])[1] #print ("linkit: " + str(linkit)) linkit = linkit.replace("&", "%26") linkapi = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") linkapi = str(linkstart) #here we distinguish between rename and not. #blackhole functinality--- #let's download the file to a temporary cache. if mylar.BLACKHOLE: if os.path.exists(mylar.BLACKHOLE_DIR): filenamenzb = str(ComicName) + " " + str( IssueNumber) + " (" + str( comyear) + ").nzb" urllib.urlretrieve( linkapi, str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) logger.info( u"Successfully sent .nzb to your Blackhole directory : " + str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) #end blackhole else: tmppath = mylar.CACHE_DIR print("cache directory set to: " + str(tmppath)) if os.path.exists(tmppath): filenamenzb = os.path.split(linkapi)[1] #filenzb = os.path.join(tmppath,filenamenzb) if nzbprov == 'nzb.su': filenzb = linkstart[21:] if nzbprov == 'experimental': filenzb = filenamenzb[6:] if nzbprov == 'dognzb': filenzb == str(filenamenzb) savefile = str(tmppath) + "/" + str( filenzb) + ".nzb" else: #let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info( u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) savefile = str( mylar.CACHE_DIR) + "/" + str( filenzb) + ".nzb" except OSError.e: if e.errno != errno.EEXIST: raise print("savefile set to: " + str(savefile)) urllib.urlretrieve(linkapi, str(savefile)) #print (str(mylar.RENAME_FILES)) print("sucessfully retrieve nzb to : " + str(savefile)) #check sab for current pause status print("sab host set to :" + str(mylar.SAB_HOST)) sabqstatusapi = str( mylar.SAB_HOST ) + "/api?mode=qstatus&output=xml&apikey=" + str( mylar.SAB_APIKEY) from xml.dom.minidom import parseString import urllib2 file = urllib2.urlopen(sabqstatusapi) data = file.read() file.close() dom = parseString(data) for node in dom.getElementsByTagName('paused'): pausestatus = node.firstChild.wholeText #print pausestatus if pausestatus != 'True': #pause sab first because it downloads too quick (cbr's are small!) pauseapi = str( mylar.SAB_HOST ) + "/api?mode=pause&apikey=" + str( mylar.SAB_APIKEY) urllib.urlopen(pauseapi) print "Queue paused" else: print "Queue already paused" if mylar.RENAME_FILES == 1: #print ("Saved file to: " + str(savefile)) tmpapi = str( mylar.SAB_HOST ) + "/api?mode=addlocalfile&name=" + str( savefile) + "&pp=3&cat=" + str( mylar.SAB_CATEGORY ) + "&script=ComicRN.py&apikey=" + str( mylar.SAB_APIKEY) else: tmpapi = str( mylar.SAB_HOST ) + "/api?mode=addurl&name=" + str( linkapi) + "&pp=3&cat=" + str( mylar.SAB_CATEGORY ) + "&script=ComicRN.py&apikey=" + str( mylar.SAB_APIKEY) print("sab api string:" + str(tmpapi)) time.sleep(5) urllib.urlopen(tmpapi) if mylar.RENAME_FILES == 1: #let's give it 5 extra seconds to retrieve the nzb data... time.sleep(5) outqueue = str( mylar.SAB_HOST ) + "/api?mode=queue&start=START&limit=LIMIT&output=xml&apikey=" + str( mylar.SAB_APIKEY) print("outqueue line generated") urllib.urlopen(outqueue) time.sleep(5) print("passed api request to SAB") #<slots><slot><filename>.nzb filename #chang nzbfilename to include series(SAB will auto rename based on this) #api?mode=queue&name=rename&value=<filename_nzi22ks>&value2=NEWNAME from xml.dom.minidom import parseString import urllib2 file = urllib2.urlopen(outqueue) data = file.read() file.close() dom = parseString(data) queue_slots = dom.getElementsByTagName( 'filename') queue_cnt = len(queue_slots) print("there are " + str(queue_cnt) + " things in SABnzbd's queue") que = 0 slotmatch = "no" for queue in queue_slots: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: queue_file = dom.getElementsByTagName( 'filename' )[que].firstChild.wholeText while ('Trying to fetch NZB' in queue_file): #let's keep waiting until nzbname is resolved by SABnzbd time.sleep(5) file = urllib2.urlopen(outqueue) data = file.read() file.close() dom = parseString(data) queue_file = dom.getElementsByTagName( 'filename' )[que].firstChild.wholeText print(str(queue_file)) print(str(filenzb)) queue_file = queue_file.replace( "_", " ") if str(queue_file) in str(filenzb): print("matched") slotmatch = "yes" slot_nzoid = dom.getElementsByTagName( 'nzo_id' )[que].firstChild.wholeText print("slot_nzoid: " + str(slot_nzoid)) break que += 1 if slotmatch == "yes": if mylar.REPLACE_SPACES: repchar = mylar.REPLACE_CHAR else: repchar = ' ' #let's make sure there's no crap in the ComicName since it's O.G. ComicNM = re.sub( '[\:\,]', '', str(ComicName)) renameit = str(ComicNM) + " " + str( IssueNumber) + " (" + str( SeriesYear ) + ")" + " " + "(" + str( comyear) + ")" renameit = renameit.replace( ' ', repchar) nzo_ren = str( mylar.SAB_HOST ) + "/api?mode=queue&name=rename&apikey=" + str( mylar.SAB_APIKEY ) + "&value=" + str( slot_nzoid) + "&value2=" + str( renameit) print( "attempting to rename queue to " + str(nzo_ren)) urllib2.urlopen(nzo_ren) print("renamed!") #delete the .nzb now. #delnzb = str(mylar.PROG_DIR) + "/" + str(filenzb) + ".nzb" #if mylar.PROG_DIR is not "/": #os.remove(delnzb) #we need to track nzo_id to make sure finished downloaded with SABnzbd. #controlValueDict = {"nzo_id": str(slot_nzoid)} #newValueDict = {"ComicName": str(ComicName), # "ComicYEAR": str(comyear), # "ComicIssue": str(IssueNumber), # "name": str(filenamenzb)} #print ("updating SABLOG") #myDB = db.DBConnection() #myDB.upsert("sablog", newValueDict, controlValueDict) else: logger.info( u"Couldn't locate file in SAB - are you sure it's being downloaded?" ) #resume sab if it was running before we started if pausestatus != 'True': #let's unpause queue now that we did our jobs. resumeapi = str( mylar.SAB_HOST ) + "/api?mode=resume&apikey=" + str( mylar.SAB_APIKEY) urllib.urlopen(resumeapi) #print "Queue resumed" #else: #print "Queue already paused" #raise an exception to break out of loop foundc = "yes" done = True break else: #print ("issues don't match..") foundc = "no" if done == True: break cmloopit -= 1 findloop += 1 if foundc == "yes": foundcomic.append("yes") logger.info(u"Found :" + str(ComicName) + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(nzbprov)) break elif foundc == "no" and nzbpr <> 0: logger.info( u"More than one search provider given - trying next one.") elif foundc == "no" and nzbpr == 0: foundcomic.append("no") logger.info(u"Couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + "). Status kept as wanted.") break return foundc
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, IssDateFix, IssueID): logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " using " + str(nzbprov)) if nzbprov == 'nzb.su': apikey = mylar.NZBSU_APIKEY elif nzbprov == 'dognzb': apikey = mylar.DOGNZB_APIKEY elif nzbprov == 'experimental': apikey = 'none' #print ("-------------------------") if mylar.PREFERRED_QUALITY == 0: filetype = "" elif mylar.PREFERRED_QUALITY == 1: filetype = ".cbr" elif mylar.PREFERRED_QUALITY == 2: filetype = ".cbz" if mylar.SAB_PRIORITY: if mylar.SAB_PRIORITY == 1: sabpriority = "-100" elif mylar.SAB_PRIORITY == 2: sabpriority = "-1" elif mylar.SAB_PRIORITY == 3: sabpriority = "0" elif mylar.SAB_PRIORITY == 4: sabpriority = "1" elif mylar.SAB_PRIORITY == 5: sabpriority = "-2" else: #if sab priority isn't selected, default to Normal (0) sabpriority = "0" # figure out what was missed via rss feeds and do a manual search via api #tsc = int(tot-1) findcomic = [] findcomiciss = [] findcount = 0 ci = "" comsearch = [] isssearch = [] comyear = str(ComicYear) #print ("-------SEARCH FOR MISSING------------------") findcomic.append(str(ComicName)) IssueNumber = str(re.sub("\.00", "", str(IssueNumber))) #print ("issueNumber" + str(IssueNumber)) findcomiciss.append(str(re.sub("\D", "", str(IssueNumber)))) #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm1 = re.sub(" ", "%20", str(findcomic[findcount])) cm = re.sub("\&", "%26", str(cm1)) #print (cmi) if len(str(findcomiciss[findcount])) == 1: cmloopit = 3 elif len(str(findcomiciss[findcount])) == 2: cmloopit = 2 else: cmloopit = 1 isssearch.append(str(findcomiciss[findcount])) comsearch.append(cm) findcount+=1 # ---- #print ("------RESULTS OF SEARCH-------------------") findloop = 0 foundcomic = [] #---issue problem # if issue is '011' instead of '11' in nzb search results, will not have same # results. '011' will return different than '11', as will '009' and '09'. while (findloop < (findcount) ): comsrc = comsearch[findloop] while (cmloopit >= 1 ): # here we account for issue pattern variations if cmloopit == 3: comsearch[findloop] = comsrc + "%2000" + isssearch[findloop] + "%20" + str(filetype) elif cmloopit == 2: comsearch[findloop] = comsrc + "%200" + isssearch[findloop] + "%20" + str(filetype) elif cmloopit == 1: comsearch[findloop] = comsrc + "%20" + isssearch[findloop] + "%20" + str(filetype) if nzbprov != 'experimental': if nzbprov == 'dognzb': findurl = "http://dognzb.cr/api?t=search&apikey=" + str(apikey) + "&q=" + str(comsearch[findloop]) + "&o=xml&cat=7030" elif nzbprov == 'nzb.su': findurl = "http://nzb.su/api?t=search&q=" + str(comsearch[findloop]) + "&apikey=" + str(apikey) + "&o=xml&cat=7030" bb = feedparser.parse(findurl) elif nzbprov == 'experimental': bb = parseit.MysterBinScrape(comsearch[findloop], comyear) done = False foundc = "no" if bb == "no results": pass foundc = "no" else: for entry in bb['entries']: #print ("Entry:" + str(entry['title'])) cleantitle = re.sub('_', ' ', str(entry['title'])) cleantitle = helpers.cleanName(str(cleantitle)) nzbname = cleantitle #print ("cleantitle:" + str(cleantitle)) if len(re.findall('[^()]+', cleantitle)) == 1: cleantitle = "abcdefghijk 0 (1901).cbz" if done: break #let's narrow search down - take out year (2010), (2011), etc #let's check for first occurance of '(' as generally indicates #that the 'title' has ended ripperlist=['digital-', 'empire', 'dcp'] #this takes care of the brackets :) m = re.findall('[^()]+', cleantitle) lenm = len(m) #print ("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" while (cnt < lenm): if m[cnt] is None: break #print (str(cnt) + ". Bracket Word: " + m[cnt] ) if cnt == 0: comic_andiss = m[cnt] #print ("Comic:" + str(comic_andiss)) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': #print ("year detected!") result_comyear = m[cnt] if str(comyear) in result_comyear: #print (str(comyear) + " - right - years match baby!") yearmatch = "true" else: #print (str(comyear) + " - not right - years don't match ") yearmatch = "false" if 'digital' in m[cnt] and len(m[cnt]) == 7: pass #print ("digital edition") if ' of ' in m[cnt]: #print ("mini-series detected : " + str(m[cnt])) result_of = m[cnt] if 'cover' in m[cnt]: #print ("covers detected") result_comcovers = m[cnt] for ripper in ripperlist: if ripper in m[cnt]: #print ("Scanner detected:" + str(m[cnt])) result_comscanner = m[cnt] cnt+=1 if yearmatch == "false": continue splitit = [] watchcomic_split = [] comic_iss = re.sub('[\-\:\,]', '', str(comic_andiss)) splitit = comic_iss.split(None) watchcomic_split = findcomic[findloop].split(None) bmm = re.findall('v\d', comic_iss) #print ("vers - " + str(bmm)) if len(bmm) > 0: splitst = len(splitit) - 2 else: splitst = len(splitit) - 1 if (splitst) != len(watchcomic_split): #print ("incorrect comic lengths...not a match") if str(splitit[0]).lower() == "the": #print ("THE word detected...attempting to adjust pattern matching") splitit[0] = splitit[4:] else: #print ("length match..proceeding") n = 0 scount = 0 #print ("search-length:" + str(len(splitit))) #print ("watchlist-length:" + str(len(watchcomic_split))) while ( n <= len(splitit)-1 ): #print ("splitit:" + str(splitit[n])) if n < len(splitit)-1 and n < len(watchcomic_split)-1: #print ( str(n) + ". Comparing: " + watchcomic_split[n] + " .to. " + splitit[n] ) if str(watchcomic_split[n].lower()) in str(splitit[n].lower()): #print ("word matched on : " + splitit[n]) scount+=1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n].lower()).startswith('v'): #print ("possible verisoning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) #print ("version found:" + str(comicversion)) else: #print ("issue section") if splitit[n].isdigit(): #print ("issue detected") comiss = splitit[n] comicNAMER = n - 1 comNAME = splitit[0] cmnam = 1 while (cmnam < comicNAMER): comNAME = str(comNAME) + " " + str(splitit[cmnam]) cmnam+=1 #print ("comic: " + str(comNAME)) else: #print ("non-match for: " + splitit[n]) pass n+=1 spercent = ( scount/int(len(splitit)) ) * 100 #print (str(spercent) + "% match") #if spercent >= 75: print ("it's a go captain...") #if spercent < 75: print ("failure - we only got " + str(spercent) + "% right!") #print ("this should be a match!") #issue comparison now as well if int(findcomiciss[findloop]) == int(comiss): #print ("issues match!") logger.info(u"Found " + str(ComicName) + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(nzbprov) ) ## -- inherit issue. Comic year is non-standard. nzb year is the year ## -- comic was printed, not the start year of the comic series and ## -- thus the deciding component if matches are correct or not linkstart = os.path.splitext(entry['link'])[0] #following is JUST for nzb.su if nzbprov == 'nzb.su': linkit = os.path.splitext(entry['link'])[1] linkit = linkit.replace("&", "%26") linkapi = str(linkstart) + str(linkit) else: # this should work for every other provider linkstart = linkstart.replace("&", "%26") linkapi = str(linkstart) #here we distinguish between rename and not. #blackhole functinality--- #let's download the file to a temporary cache. if mylar.BLACKHOLE: if os.path.exists(mylar.BLACKHOLE_DIR): filenamenzb = str(ComicName) + " " + str(IssueNumber) + " (" + str(comyear) + ").nzb" urllib.urlretrieve(linkapi, str(mylar.BLACKHOLE_DIR) + str(filenamenzb)) logger.info(u"Successfully sent .nzb to your Blackhole directory : " + str(mylar.BLACKHOLE_DIR) + str(filenamenzb) ) #end blackhole else: tmppath = mylar.CACHE_DIR if os.path.exists(tmppath): pass else: #let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError.e: if e.errno != errno.EEXIST: raise filenamenzb = os.path.split(linkapi)[1] #filenzb = os.path.join(tmppath,filenamenzb) if nzbprov == 'nzb.su': filenzb = linkstart[21:] elif nzbprov == 'experimental': filenzb = filenamenzb[6:] elif nzbprov == 'dognzb': filenzb = str(filenamenzb) if mylar.RENAME_FILES == 1: filenzb = str(ComicName.replace(' ', '_')) + "_" + str(IssueNumber) + "_(" + str(comyear) + ")" if mylar.REPLACE_SPACES: repchar = mylar.REPLACE_CHAR repurlchar = mylar.REPLACE_CHAR else: repchar = ' ' repurlchar = "%20" #let's make sure there's no crap in the ComicName since it's O.G. ComicNM = re.sub('[\:\,]', '', str(ComicName)) renameit = str(ComicNM) + " " + str(IssueNumber) + " (" + str(SeriesYear) + ")" + " " + "(" + str(comyear) + ")" renamethis = renameit.replace(' ', repchar) renamer1 = renameit.replace(' ', repurlchar) renamer = re.sub("\&", "%26", str(renamer1)) savefile = str(tmppath) + "/" + str(filenzb) + ".nzb" print "savefile:" + str(savefile) try: urllib.urlretrieve(linkapi, str(savefile)) except urllib.URLError: logger.error(u"Unable to retrieve nzb file.") return if os.path.getsize(str(savefile)) == 0: logger.error(u"nzb size detected as zero bytes.") continue logger.info(u"Sucessfully retrieved nzb file using " + str(nzbprov)) nzbname = str(filenzb) print "nzbname:" + str(nzbname) # NOT NEEDED ANYMORE. #print (str(mylar.RENAME_FILES)) #check sab for current pause status # sabqstatusapi = str(mylar.SAB_HOST) + "/api?mode=qstatus&output=xml&apikey=" + str(mylar.SAB_APIKEY) # file = urllib2.urlopen(sabqstatusapi); # data = file.read() # file.close() # dom = parseString(data) # for node in dom.getElementsByTagName('paused'): # pausestatus = node.firstChild.wholeText #print pausestatus # if pausestatus != 'True': #pause sab first because it downloads too quick (cbr's are small!) # pauseapi = str(mylar.SAB_HOST) + "/api?mode=pause&apikey=" + str(mylar.SAB_APIKEY) # urllib2.urlopen(pauseapi); #print "Queue paused" #else: #print "Queue already paused" # END OF NOT NEEDED. if mylar.RENAME_FILES == 1: tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) else: tmpapi = str(mylar.SAB_HOST) + "/api?mode=addurl&name=" + str(linkapi) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY) # time.sleep(5) print "send-to-SAB:" + str(tmpapi) try: urllib2.urlopen(tmpapi) except urllib2.URLError: logger.error(u"Unable to send nzb file to SABnzbd") return logger.info(u"Successfully sent nzb file to SABnzbd") #---NOT NEEDED ANYMORE. # if mylar.RENAME_FILES == 1: #let's give it 5 extra seconds to retrieve the nzb data... # time.sleep(5) # outqueue = str(mylar.SAB_HOST) + "/api?mode=queue&start=START&limit=LIMIT&output=xml&apikey=" + str(mylar.SAB_APIKEY) # urllib2.urlopen(outqueue); # time.sleep(5) #<slots><slot><filename>.nzb filename #chang nzbfilename to include series(SAB will auto rename based on this) #api?mode=queue&name=rename&value=<filename_nzi22ks>&value2=NEWNAME # file = urllib2.urlopen(outqueue); # data = file.read() # file.close() # dom = parseString(data) # queue_slots = dom.getElementsByTagName('filename') # queue_cnt = len(queue_slots) #print ("there are " + str(queue_cnt) + " things in SABnzbd's queue") # que = 0 # slotmatch = "no" # for queue in queue_slots: #retrieve the first xml tag (<tag>data</tag>) #that the parser finds with name tagName: # queue_file = dom.getElementsByTagName('filename')[que].firstChild.wholeText # while ('Trying to fetch NZB' in queue_file): #let's keep waiting until nzbname is resolved by SABnzbd # time.sleep(5) # file = urllib2.urlopen(outqueue); # data = file.read() # file.close() # dom = parseString(data) # queue_file = dom.getElementsByTagName('filename')[que].firstChild.wholeText #print ("queuefile:" + str(queue_file)) #print ("filenzb:" + str(filenzb)) # queue_file = queue_file.replace("_", " ") # if str(queue_file) in str(filenzb): #print ("matched") # slotmatch = "yes" # slot_nzoid = dom.getElementsByTagName('nzo_id')[que].firstChild.wholeText #print ("slot_nzoid: " + str(slot_nzoid)) # break # que+=1 # if slotmatch == "yes": #--start - this is now broken - SAB Priority. # # nzo_prio = str(mylar.SAB_HOST) + "/api?mode=queue&name=priority&apikey=" + str(mylar.SAB_APIKEY) + "&value=" + str(slot_nzoid) + "&value2=" + str(sabpriority) # urllib2.urlopen(nzo_prio); # #--end # nzo_ren = str(mylar.SAB_HOST) + "/api?mode=queue&name=rename&apikey=" + str(mylar.SAB_APIKEY) + "&value=" + str(slot_nzoid) + "&value2=" + str(renamer) # urllib2.urlopen(nzo_ren); # logger.info(u"Renamed nzb file in SABnzbd queue to : " + str(renamethis)) #---END OF NOT NEEDED. #delete the .nzb now. if mylar.PROG_DIR is not "/": os.remove(savefile) logger.info(u"Removed temporary save file") #--- NOT NEEDED. #we need to track nzo_id to make sure finished downloaded with SABnzbd. #controlValueDict = {"nzo_id": str(slot_nzoid)} #newValueDict = {"ComicName": str(ComicName), # "ComicYEAR": str(comyear), # "ComicIssue": str(IssueNumber), # "name": str(filenamenzb)} #print ("updating SABLOG") #myDB = db.DBConnection() #myDB.upsert("sablog", newValueDict, controlValueDict) # else: logger.info(u"Couldn't locate file in SAB - are you sure it's being downloaded?") #resume sab if it was running before we started # if pausestatus != 'True': #let's unpause queue now that we did our jobs. # resumeapi = str(mylar.SAB_HOST) + "/api?mode=resume&apikey=" + str(mylar.SAB_APIKEY) # urllib2.urlopen(resumeapi); #else: #print "Queue already paused" #--- END OF NOT NEEDED. #raise an exception to break out of loop foundc = "yes" done = True break else: #print ("issues don't match..") foundc = "no" if done == True: break cmloopit-=1 findloop+=1 if foundc == "yes": print ("found-yes") foundcomic.append("yes") updater.nzblog(IssueID, nzbname) break elif foundc == "no" and nzbpr <> 0: logger.info(u"More than one search provider given - trying next one.") elif foundc == "no" and nzbpr == 0: foundcomic.append("no") if IssDateFix == "no": logger.info(u"Couldn't find Issue " + str(IssueNumber) + " of " + str(ComicName) + "(" + str(comyear) + "). Status kept as wanted." ) break return foundc
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.COMIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(mylar.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace')) return logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace')) basedir = dir comic_list = [] comiccnt = 0 extensions = ('cbr', 'cbz') cv_location = [] for r, d, f in os.walk(dir): for files in f: if 'cvinfo' in files: cv_location.append(r) logger.fdebug('CVINFO found: ' + os.path.join(r)) if any(files.lower().endswith('.' + x.lower()) for x in extensions): comic = files comicpath = os.path.join(r, files) comicsize = os.path.getsize(comicpath) logger.fdebug('Comic: ' + comic + ' [' + comicpath + '] - ' + str(comicsize) + ' bytes') comiccnt += 1 # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath.decode(mylar.SYS_ENCODING, 'replace') comic_dict = { 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path } comic_list.append(comic_dict) logger.info("I've found a total of " + str(comiccnt) + " comics....analyzing now") #logger.info("comiclist: " + str(comic_list)) myDB = db.DBConnection() #let's load in the watchlist to see if we have any matches. logger.info( "loading in the watchlist to see if a series is being watched already..." ) watchlist = myDB.select("SELECT * from comics") ComicName = [] DisplayName = [] ComicYear = [] ComicPublisher = [] ComicTotal = [] ComicID = [] ComicLocation = [] AltName = [] watchcnt = 0 watch_kchoice = [] watchchoice = {} import_by_comicids = [] import_comicids = {} for watch in watchlist: #use the comicname_filesafe to start watchdisplaycomic = watch['ComicName'].encode('utf-8').strip( ) #re.sub('[\_\#\,\/\:\;\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip() # let's clean up the name, just in case for comparison purposes... watchcomic = re.sub( '[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['ComicName_Filesafe']).encode('utf-8').strip() #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip() if ' the ' in watchcomic.lower(): #drop the 'the' from the watchcomic title for proper comparisons. watchcomic = watchcomic[-4:] alt_chk = "no" # alt-checker flag (default to no) # account for alternate names as well if watch['AlternateSearch'] is not None and watch[ 'AlternateSearch'] is not 'None': altcomic = re.sub( '[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['AlternateSearch']).encode('utf-8').strip() #altcomic = re.sub('\s+', ' ', str(altcomic)).strip() AltName.append(altcomic) alt_chk = "yes" # alt-checker flag ComicName.append(watchcomic) DisplayName.append(watchdisplaycomic) ComicYear.append(watch['ComicYear']) ComicPublisher.append(watch['ComicPublisher']) ComicTotal.append(watch['Total']) ComicID.append(watch['ComicID']) ComicLocation.append(watch['ComicLocation']) watchcnt += 1 logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.") ripperlist = ['digital-', 'empire', 'dcp'] watchfound = 0 datelist = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] # datemonth = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':$ # #search for number as text, and change to numeric # for numbs in basnumbs: # #logger.fdebug("numbs:" + str(numbs)) # if numbs in ComicName.lower(): # numconv = basnumbs[numbs] # #logger.fdebug("numconv: " + str(numconv)) issueid_list = [] cvscanned_loc = None cvinfo_CID = None for i in comic_list: logger.fdebug('Analyzing : ' + i['ComicFilename']) comfilename = i['ComicFilename'] comlocation = i['ComicLocation'] issueinfo = None #Make sure cvinfo is checked for FIRST (so that CID can be attached to all files properly thereafter as they're scanned in) if os.path.dirname(comlocation) in cv_location and os.path.dirname( comlocation) != cvscanned_loc: #if comfilename == 'cvinfo': logger.info('comfilename: ' + comfilename) logger.info('cvscanned_loc: ' + str(cv_location)) logger.info('comlocation: ' + os.path.dirname(comlocation)) #if cvscanned_loc != comlocation: try: with open(os.path.join(os.path.dirname(comlocation), 'cvinfo')) as f: urllink = f.readline() print 'urllink: ' + str(urllink) if urllink: cid = urllink.split('/') if '4050-' in cid[-2]: cvinfo_CID = re.sub('4050-', '', cid[-2]).strip() logger.info( 'CVINFO file located within directory. Attaching everything in directory that is valid to ComicID: ' + str(cvinfo_CID)) #store the location of the cvinfo so it's applied to the correct directory (since we're scanning multile direcorties usually) cvscanned_loc = os.path.dirname(comlocation) else: logger.error( "Could not read cvinfo file properly (or it does not contain any data)" ) except (OSError, IOError): logger.error( "Could not read cvinfo file properly (or it does not contain any data)" ) #else: # don't scan in it again if it's already been done initially # continue if mylar.IMP_METADATA: logger.info('metatagging checking enabled.') #if read tags is enabled during import, check here. if i['ComicLocation'].endswith('.cbz'): logger.info('Attempting to read tags present in filename: ' + i['ComicLocation']) issueinfo = helpers.IssueDetails(i['ComicLocation']) if issueinfo is None: pass else: issuenotes_id = None logger.info( 'Successfully retrieved some tags. Lets see what I can figure out.' ) comicname = issueinfo[0]['series'] logger.fdebug('Series Name: ' + comicname) issue_number = issueinfo[0]['issue_number'] logger.fdebug('Issue Number: ' + str(issue_number)) issuetitle = issueinfo[0]['title'] logger.fdebug('Issue Title: ' + issuetitle) issueyear = issueinfo[0]['year'] logger.fdebug('Issue Year: ' + str(issueyear)) try: issuevolume = issueinfo[0]['volume'] except: issuevolume = None # if used by ComicTagger, Notes field will have the IssueID. issuenotes = issueinfo[0]['notes'] logger.fdebug('Notes: ' + issuenotes) if issuenotes is not None: if 'Issue ID' in issuenotes: st_find = issuenotes.find('Issue ID') tmp_issuenotes_id = re.sub( "[^0-9]", " ", issuenotes[st_find:]).strip() if tmp_issuenotes_id.isdigit(): issuenotes_id = tmp_issuenotes_id logger.fdebug( 'Successfully retrieved CV IssueID for ' + comicname + ' #' + str(issue_number) + ' [' + str(issuenotes_id) + ']') elif 'CVDB' in issuenotes: st_find = issuenotes.find('CVDB') tmp_issuenotes_id = re.sub( "[^0-9]", " ", issuenotes[st_find:]).strip() if tmp_issuenotes_id.isdigit(): issuenotes_id = tmp_issuenotes_id logger.fdebug( 'Successfully retrieved CV IssueID for ' + comicname + ' #' + str(issue_number) + ' [' + str(issuenotes_id) + ']') else: logger.fdebug( 'Unable to retrieve IssueID from meta-tagging. If there is other metadata present I will use that.' ) logger.fdebug("adding " + comicname + " to the import-queue!") impid = comicname + '-' + str(issueyear) + '-' + str( issue_number ) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss) logger.fdebug("impid: " + str(impid)) #make sure we only add in those issueid's which don't already have a comicid attached via the cvinfo scan above (this is for reverse-lookup of issueids) if cvinfo_CID is None: issueid_list.append(issuenotes_id) if cvscanned_loc == os.path.dirname(comlocation): cv_cid = cvinfo_CID logger.info('CVINFO_COMICID attached : ' + str(cv_cid)) else: cv_cid = None import_by_comicids.append({ "impid": impid, "comicid": cv_cid, "watchmatch": None, "displayname": helpers.cleanName(comicname), "comicname": comicname, #com_NAME, "comicyear": issueyear, "volume": issuevolume, "issueid": issuenotes_id, "comfilename": comfilename, "comlocation": comlocation.decode(mylar.SYS_ENCODING) }) else: logger.info( i['ComicLocation'] + ' is not in a metatagged format (cbz). Bypassing reading of the metatags' ) if issueinfo is None: #let's clean up the filename for matching purposes cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', comfilename) #cfilename = re.sub('\s', '_', str(cfilename)) d_filename = re.sub('[\_\#\,\/\;\!\$\%\&\?\@]', ' ', comfilename) d_filename = re.sub('[\:\-\+\']', '#', d_filename) #strip extraspaces d_filename = re.sub('\s+', ' ', d_filename) cfilename = re.sub('\s+', ' ', cfilename) #versioning - remove it subsplit = cfilename.replace('_', ' ').split() volno = None volyr = None for subit in subsplit: if subit[0].lower() == 'v': vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... if len(subit) > 3: # if it's greater than 3 in length, then the format is Vyyyy vfull = 1 # add on 1 character length to account for extra space cfilename = re.sub(subit, '', cfilename) d_filename = re.sub(subit, '', d_filename) volno = re.sub("[^0-9]", " ", subit) elif subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely logger.fdebug( 'volume indicator detected as version #:' + str(subit)) cfilename = re.sub(subit, '', cfilename) cfilename = " ".join(cfilename.split()) d_filename = re.sub(subit, '', d_filename) d_filename = " ".join(d_filename.split()) volyr = re.sub("[^0-9]", " ", subit).strip() logger.fdebug('volume year set as : ' + str(volyr)) cm_cn = 0 #we need to track the counter to make sure we are comparing the right array parts #this takes care of the brackets :) m = re.findall('[^()]+', d_filename) #cfilename) lenm = len(m) logger.fdebug("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" foundonwatch = "False" issue = 999999 while (cnt < lenm): if m[cnt] is None: break if m[cnt] == ' ': pass else: logger.fdebug(str(cnt) + ". Bracket Word: " + m[cnt]) if cnt == 0: comic_andiss = m[cnt] logger.fdebug("Comic: " + comic_andiss) # if it's not in the standard format this will bork. # let's try to accomodate (somehow). # first remove the extension (if any) extensions = ('cbr', 'cbz') if comic_andiss.lower().endswith(extensions): comic_andiss = comic_andiss[:-4] logger.fdebug("removed extension from filename.") #now we have to break up the string regardless of formatting. #let's force the spaces. comic_andiss = re.sub('_', ' ', comic_andiss) cs = comic_andiss.split() cs_len = len(cs) cn = '' ydetected = 'no' idetected = 'no' decimaldetect = 'no' for i in reversed(xrange(len(cs))): #start at the end. logger.fdebug("word: " + str(cs[i])) #assume once we find issue - everything prior is the actual title #idetected = no will ignore everything so it will assume all title if cs[i][:-2] == '19' or cs[ i][:-2] == '20' and idetected == 'no': logger.fdebug("year detected: " + str(cs[i])) ydetected = 'yes' result_comyear = cs[i] elif cs[i].isdigit( ) and idetected == 'no' or '.' in cs[i]: if '.' in cs[i]: #make sure it's a number on either side of decimal and assume decimal issue. decst = cs[i].find('.') dec_st = cs[i][:decst] dec_en = cs[i][decst + 1:] logger.fdebug("st: " + str(dec_st)) logger.fdebug("en: " + str(dec_en)) if dec_st.isdigit() and dec_en.isdigit(): logger.fdebug( "decimal issue detected...adjusting." ) issue = dec_st + "." + dec_en logger.fdebug("issue detected: " + str(issue)) idetected = 'yes' else: logger.fdebug( "false decimal represent. Chunking to extra word." ) cn = cn + cs[i] + " " #break else: issue = cs[i] logger.fdebug("issue detected : " + str(issue)) idetected = 'yes' elif '\#' in cs[i] or decimaldetect == 'yes': logger.fdebug("issue detected: " + str(cs[i])) idetected = 'yes' else: cn = cn + cs[i] + " " if ydetected == 'no': #assume no year given in filename... result_comyear = "0000" logger.fdebug("cm?: " + str(cn)) if issue is not '999999': comiss = issue else: logger.ERROR( "Invalid Issue number (none present) for " + comfilename) break cnsplit = cn.split() cname = '' findcn = 0 while (findcn < len(cnsplit)): cname = cname + cs[findcn] + " " findcn += 1 cname = cname[:len(cname) - 1] # drop the end space... logger.fdebug('assuming name is : ' + cname) com_NAME = cname logger.fdebug('com_NAME : ' + com_NAME) yearmatch = "True" else: logger.fdebug('checking ' + m[cnt]) # we're assuming that the year is in brackets (and it should be damnit) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': logger.fdebug('year detected: ' + str(m[cnt])) ydetected = 'yes' result_comyear = m[cnt] elif m[cnt][:3].lower() in datelist: logger.fdebug( 'possible issue date format given - verifying') #if the date of the issue is given as (Jan 2010) or (January 2010) let's adjust. #keeping in mind that ',' and '.' are already stripped from the string if m[cnt][-4:].isdigit(): ydetected = 'yes' result_comyear = m[cnt][-4:] logger.fdebug('Valid Issue year of ' + str(result_comyear) + 'detected in format of ' + str(m[cnt])) cnt += 1 displength = len(cname) logger.fdebug('cname length : ' + str(displength) + ' --- ' + str(cname)) logger.fdebug('d_filename is : ' + d_filename) charcount = d_filename.count('#') logger.fdebug('charcount is : ' + str(charcount)) if charcount > 0: logger.fdebug('entering loop') for i, m in enumerate(re.finditer('\#', d_filename)): if m.end() <= displength: logger.fdebug(comfilename[m.start():m.end()]) # find occurance in c_filename, then replace into d_filname so special characters are brought across newchar = comfilename[m.start():m.end()] logger.fdebug('newchar:' + str(newchar)) d_filename = d_filename[:m.start()] + str( newchar) + d_filename[m.end():] logger.fdebug('d_filename:' + str(d_filename)) dispname = d_filename[:displength] logger.fdebug('dispname : ' + dispname) splitit = [] watchcomic_split = [] logger.fdebug("filename comic and issue: " + comic_andiss) #changed this from '' to ' ' comic_iss_b4 = re.sub('[\-\:\,]', ' ', comic_andiss) comic_iss = comic_iss_b4.replace('.', ' ') comic_iss = re.sub('[\s+]', ' ', comic_iss).strip() logger.fdebug("adjusted comic and issue: " + str(comic_iss)) #remove 'the' from here for proper comparisons. if ' the ' in comic_iss.lower(): comic_iss = re.sub('\\bthe\\b', '', comic_iss).strip() splitit = comic_iss.split(None) logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss)) #here we cycle through the Watchlist looking for a match. while (cm_cn < watchcnt): #setup the watchlist comname = ComicName[cm_cn] comyear = ComicYear[cm_cn] compub = ComicPublisher[cm_cn] comtotal = ComicTotal[cm_cn] comicid = ComicID[cm_cn] watch_location = ComicLocation[cm_cn] # there shouldn't be an issue in the comic now, so let's just assume it's all gravy. splitst = len(splitit) watchcomic_split = helpers.cleanName(comname) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) logger.fdebug( str(splitit) + " file series word count: " + str(splitst)) logger.fdebug( str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split))) if (splitst) != len(watchcomic_split): logger.fdebug("incorrect comic lengths...not a match") # if str(splitit[0]).lower() == "the": # logger.fdebug("THE word detected...attempting to adjust pattern matching") # splitit[0] = splitit[4:] else: logger.fdebug("length match..proceeding") n = 0 scount = 0 logger.fdebug("search-length: " + str(splitst)) logger.fdebug("Watchlist-length: " + str(len(watchcomic_split))) while (n <= (splitst) - 1): logger.fdebug("splitit: " + str(splitit[n])) if n < (splitst) and n < len(watchcomic_split): logger.fdebug( str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n])) if '+' in watchcomic_split[n]: watchcomic_split[n] = re.sub( '+', '', str(watchcomic_split[n])) if str(watchcomic_split[n].lower()) in str( splitit[n].lower() ) and len(watchcomic_split[n]) >= len(splitit[n]): logger.fdebug("word matched on : " + str(splitit[n])) scount += 1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # logger.fdebug("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n]).lower().startswith('v'): logger.fdebug("possible versioning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) logger.fdebug("version found: " + str(comicversion)) else: logger.fdebug("Comic / Issue section") if splitit[n].isdigit(): logger.fdebug("issue detected") else: logger.fdebug("non-match for: " + str(splitit[n])) pass n += 1 #set the match threshold to 80% (for now) # if it's less than 80% consider it a non-match and discard. #splitit has to splitit-1 because last position is issue. wordcnt = int(scount) logger.fdebug("scount:" + str(wordcnt)) totalcnt = int(splitst) logger.fdebug("splitit-len:" + str(totalcnt)) spercent = (wordcnt / totalcnt) * 100 logger.fdebug("we got " + str(spercent) + " percent.") if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") logger.fdebug("this should be a match!") logger.fdebug("issue we found for is : " + str(comiss)) #set the year to the series we just found ;) result_comyear = comyear #issue comparison now as well logger.info(u"Found " + comname + " (" + str(comyear) + ") issue: " + str(comiss)) watchmatch = str(comicid) dispname = DisplayName[cm_cn] foundonwatch = "True" break elif int(spercent) < 80: logger.fdebug("failure - we only got " + str(spercent) + "% right!") cm_cn += 1 if foundonwatch == "False": watchmatch = None #---if it's not a match - send it to the importer. n = 0 if volyr is None: if result_comyear is None: result_comyear = '0000' #no year in filename basically. else: if result_comyear is None: result_comyear = volyr if volno is None: if volyr is None: vol_label = None else: vol_label = volyr else: vol_label = volno logger.fdebug("adding " + com_NAME + " to the import-queue!") impid = dispname + '-' + str(result_comyear) + '-' + str( comiss ) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss) logger.fdebug("impid: " + str(impid)) if cvscanned_loc == os.path.dirname(comlocation): cv_cid = cvinfo_CID logger.info('CVINFO_COMICID attached : ' + str(cv_cid)) else: cv_cid = None import_by_comicids.append({ "impid": impid, "comicid": cv_cid, "issueid": None, "watchmatch": watchmatch, "displayname": dispname, "comicname": dispname, #com_NAME, "comicyear": result_comyear, "volume": vol_label, "comfilename": comfilename, "comlocation": comlocation.decode(mylar.SYS_ENCODING) }) #logger.fdebug('import_by_ids: ' + str(import_by_comicids)) #reverse lookup all of the gathered IssueID's in order to get the related ComicID vals = mylar.cv.getComic(None, 'import', comicidlist=issueid_list) logger.fdebug('vals returned:' + str(vals)) if len(watch_kchoice) > 0: watchchoice['watchlist'] = watch_kchoice #logger.fdebug("watchchoice: " + str(watchchoice)) logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.") wat = 0 comicids = [] if watchfound > 0: if mylar.IMP_MOVE: logger.info( 'You checked off Move Files...so that\'s what I am going to do' ) #check to see if Move Files is enabled. #if not being moved, set the archive bit. logger.fdebug('Moving files into appropriate directory') while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comlocation = watch_the_list['ComicLocation'] watch_comicid = watch_the_list['ComicID'] watch_comicname = watch_the_list['ComicName'] watch_comicyear = watch_the_list['ComicYear'] watch_comiciss = watch_the_list['ComicIssue'] logger.fdebug('ComicLocation: ' + watch_comlocation) orig_comlocation = watch_the_list['OriginalLocation'] orig_filename = watch_the_list['OriginalFilename'] logger.fdebug('Orig. Location: ' + orig_comlocation) logger.fdebug('Orig. Filename: ' + orig_filename) #before moving check to see if Rename to Mylar structure is enabled. if mylar.IMP_RENAME: logger.fdebug( 'Renaming files according to configuration details : ' + str(mylar.FILE_FORMAT)) renameit = helpers.rename_param( watch_comicid, watch_comicname, watch_comicyear, watch_comiciss) nfilename = renameit['nfilename'] dst_path = os.path.join(watch_comlocation, nfilename) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) else: logger.fdebug( 'Renaming files not enabled, keeping original filename(s)' ) dst_path = os.path.join(watch_comlocation, orig_filename) #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext))) #src = os.path.join(, str(nfilename + ext)) logger.fdebug('I am going to move ' + orig_comlocation + ' to ' + dst_path) try: shutil.move(orig_comlocation, dst_path) except (OSError, IOError): logger.info( "Failed to move directory - check directories and manually re-run." ) wat += 1 else: # if move files isn't enabled, let's set all found comics to Archive status :) while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comicid = watch_the_list['ComicID'] watch_issue = watch_the_list['ComicIssue'] logger.fdebug('ComicID: ' + str(watch_comicid)) logger.fdebug('Issue#: ' + str(watch_issue)) issuechk = myDB.selectone( "SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone() if issuechk is None: logger.fdebug('No matching issues for this comic#') else: logger.fdebug('...Existing status: ' + str(issuechk['Status'])) control = {"IssueID": issuechk['IssueID']} values = {"Status": "Archived"} logger.fdebug('...changing status of ' + str(issuechk['Issue_Number']) + ' to Archived ') myDB.upsert("issues", values, control) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) wat += 1 if comicids is None: pass else: c_upd = len(comicids) c = 0 while (c < c_upd): logger.fdebug('Rescanning.. ' + str(c)) updater.forceRescan(c) if not len(import_by_comicids): return "Completed" if len(import_by_comicids) > 0: import_comicids['comic_info'] = import_by_comicids if vals: import_comicids['issueid_info'] = vals else: import_comicids['issueid_info'] = None logger.fdebug('import comicids: ' + str(import_by_comicids)) return import_comicids, len(import_by_comicids)
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.COMIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(mylar.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace')) return logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace')) basedir = dir comic_list = [] comiccnt = 0 extensions = ('cbr','cbz') for r,d,f in os.walk(dir): #for directory in d[:]: # if directory.startswith("."): # d.remove(directory) for files in f: if any(files.lower().endswith('.' + x.lower()) for x in extensions): comic = files comicpath = os.path.join(r, files) comicsize = os.path.getsize(comicpath) print "Comic: " + comic print "Comic Path: " + comicpath print "Comic Size: " + str(comicsize) # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath.decode(mylar.SYS_ENCODING, 'replace') comiccnt+=1 comic_dict = { 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path } comic_list.append(comic_dict) logger.info("I've found a total of " + str(comiccnt) + " comics....analyzing now") logger.info("comiclist: " + str(comic_list)) myDB = db.DBConnection() #let's load in the watchlist to see if we have any matches. logger.info("loading in the watchlist to see if a series is being watched already...") watchlist = myDB.action("SELECT * from comics") ComicName = [] ComicYear = [] ComicPublisher = [] ComicTotal = [] ComicID = [] ComicLocation = [] AltName = [] watchcnt = 0 watch_kchoice = [] watchchoice = {} import_by_comicids = [] import_comicids = {} for watch in watchlist: # let's clean up the name, just in case for comparison purposes... watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip() #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip() if ' the ' in watchcomic.lower(): #drop the 'the' from the watchcomic title for proper comparisons. watchcomic = watchcomic[-4:] alt_chk = "no" # alt-checker flag (default to no) # account for alternate names as well if watch['AlternateSearch'] is not None and watch['AlternateSearch'] is not 'None': altcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['AlternateSearch']).encode('utf-8').strip() #altcomic = re.sub('\s+', ' ', str(altcomic)).strip() AltName.append(altcomic) alt_chk = "yes" # alt-checker flag ComicName.append(watchcomic) ComicYear.append(watch['ComicYear']) ComicPublisher.append(watch['ComicPublisher']) ComicTotal.append(watch['Total']) ComicID.append(watch['ComicID']) ComicLocation.append(watch['ComicLocation']) watchcnt+=1 logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.") ripperlist=['digital-', 'empire', 'dcp'] watchfound = 0 for i in comic_list: print i['ComicFilename'] comfilename = i['ComicFilename'] comlocation = i['ComicLocation'] #let's clean up the filename for matching purposes cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', comfilename) #cfilename = re.sub('\s', '_', str(cfilename)) cm_cn = 0 #we need to track the counter to make sure we are comparing the right array parts #this takes care of the brackets :) m = re.findall('[^()]+', cfilename) lenm = len(m) print ("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" foundonwatch = "False" issue = 999999 while (cnt < lenm): if m[cnt] is None: break if m[cnt] == ' ': pass else: logger.fdebug(str(cnt) + ". Bracket Word: " + m[cnt]) if cnt == 0: comic_andiss = m[cnt] logger.fdebug("Comic: " + comic_andiss) # if it's not in the standard format this will bork. # let's try to accomodate (somehow). # first remove the extension (if any) extensions = ('cbr', 'cbz') if comic_andiss.lower().endswith(extensions): comic_andiss = comic_andiss[:-4] print ("removed extension from filename.") #now we have to break up the string regardless of formatting. #let's force the spaces. comic_andiss = re.sub('_', ' ', comic_andiss) cs = comic_andiss.split() cs_len = len(cs) cn = '' ydetected = 'no' idetected = 'no' decimaldetect = 'no' for i in reversed(xrange(len(cs))): #start at the end. print ("word: " + str(cs[i])) #assume once we find issue - everything prior is the actual title #idetected = no will ignore everything so it will assume all title if cs[i][:-2] == '19' or cs[i][:-2] == '20' and idetected == 'no': print ("year detected: " + str(cs[i])) ydetected = 'yes' result_comyear = cs[i] elif cs[i].isdigit() and idetected == 'no' or '.' in cs[i]: issue = cs[i] print ("issue detected : " + str(issue)) idetected = 'yes' if '.' in cs[i]: #make sure it's a number on either side of decimal and assume decimal issue. decst = cs[i].find('.') dec_st = cs[i][:decst] dec_en = cs[i][decst+1:] print ("st: " + str(dec_st)) print ("en: " + str(dec_en)) if dec_st.isdigit() and dec_en.isdigit(): print ("decimal issue detected...adjusting.") issue = dec_st + "." + dec_en print ("issue detected: " + str(issue)) idetected = 'yes' else: print ("false decimal represent. Chunking to extra word.") cn = cn + cs[i] + " " break elif '\#' in cs[i] or decimaldetect == 'yes': print ("issue detected: " + str(cs[i])) idetected = 'yes' else: cn = cn + cs[i] + " " if ydetected == 'no': #assume no year given in filename... result_comyear = "0000" print ("cm?: " + str(cn)) if issue is not '999999': comiss = issue else: logger.ERROR("Invalid Issue number (none present) for " + comfilename) break cnsplit = cn.split() cname = '' findcn = 0 while (findcn < len(cnsplit)): cname = cname + cs[findcn] + " " findcn+=1 cname = cname[:len(cname)-1] # drop the end space... print ("assuming name is : " + cname) com_NAME = cname print ("com_NAME : " + com_NAME) yearmatch = "True" else: # we're assuming that the year is in brackets (and it should be damnit) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': print ("year detected: " + str(m[cnt])) ydetected = 'yes' result_comyear = m[cnt] cnt+=1 splitit = [] watchcomic_split = [] logger.fdebug("filename comic and issue: " + cfilename) #changed this from '' to ' ' comic_iss_b4 = re.sub('[\-\:\,]', ' ', com_NAME) comic_iss = comic_iss_b4.replace('.',' ') logger.fdebug("adjusted comic and issue: " + str(comic_iss)) #remove 'the' from here for proper comparisons. if ' the ' in comic_iss.lower(): comic_iss = comic_iss[-4:] splitit = comic_iss.split(None) logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss)) #bmm = re.findall('v\d', comic_iss) #if len(bmm) > 0: splitst = len(splitit) - 2 #else: splitst = len(splitit) - 1 #----- #here we cycle through the Watchlist looking for a match. while (cm_cn < watchcnt): #setup the watchlist comname = ComicName[cm_cn] print ("watch_comic:" + comname) comyear = ComicYear[cm_cn] compub = ComicPublisher[cm_cn] comtotal = ComicTotal[cm_cn] comicid = ComicID[cm_cn] watch_location = ComicLocation[cm_cn] # if splitit[(len(splitit)-1)].isdigit(): # #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal # comic_iss = splitit[(len(splitit)-1)] # splitst = len(splitit) - 1 # if splitit[(len(splitit)-2)].isdigit(): # # for series that have a digit at the end, it screws up the logistics. # i = 1 # chg_comic = splitit[0] # while (i < (len(splitit)-1)): # chg_comic = chg_comic + " " + splitit[i] # i+=1 # logger.fdebug("chg_comic:" + str(chg_comic)) # if chg_comic.upper() == comname.upper(): # logger.fdebug("series contains numerics...adjusting..") # else: # changeup = "." + splitit[(len(splitit)-1)] # logger.fdebug("changeup to decimal: " + str(changeup)) # comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss # splitst = len(splitit) - 2 # else: # if the nzb name doesn't follow the series-issue-year format even closely..ignore nzb # logger.fdebug("invalid naming format of filename detected - cannot properly determine issue") # continue # make sure that things like - in watchcomic are accounted for when comparing to nzb. # there shouldn't be an issue in the comic now, so let's just assume it's all gravy. splitst = len(splitit) watchcomic_split = helpers.cleanName(comname) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) logger.fdebug(str(splitit) + " file series word count: " + str(splitst)) logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split))) if (splitst) != len(watchcomic_split): logger.fdebug("incorrect comic lengths...not a match") # if str(splitit[0]).lower() == "the": # logger.fdebug("THE word detected...attempting to adjust pattern matching") # splitit[0] = splitit[4:] else: logger.fdebug("length match..proceeding") n = 0 scount = 0 logger.fdebug("search-length: " + str(splitst)) logger.fdebug("Watchlist-length: " + str(len(watchcomic_split))) while ( n <= (splitst)-1 ): logger.fdebug("splitit: " + str(splitit[n])) if n < (splitst) and n < len(watchcomic_split): logger.fdebug(str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n])) if '+' in watchcomic_split[n]: watchcomic_split[n] = re.sub('+', '', str(watchcomic_split[n])) if str(watchcomic_split[n].lower()) in str(splitit[n].lower()) and len(watchcomic_split[n]) >= len(splitit[n]): logger.fdebug("word matched on : " + str(splitit[n])) scount+=1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n]).lower().startswith('v'): logger.fdebug("possible versioning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) logger.fdebug("version found: " + str(comicversion)) else: logger.fdebug("Comic / Issue section") if splitit[n].isdigit(): logger.fdebug("issue detected") #comiss = splitit[n] # comicNAMER = n - 1 # com_NAME = splitit[0] # cmnam = 1 # while (cmnam <= comicNAMER): # com_NAME = str(com_NAME) + " " + str(splitit[cmnam]) # cmnam+=1 # logger.fdebug("comic: " + str(com_NAME)) else: logger.fdebug("non-match for: "+ str(splitit[n])) pass n+=1 #set the match threshold to 80% (for now) # if it's less than 80% consider it a non-match and discard. #splitit has to splitit-1 because last position is issue. wordcnt = int(scount) logger.fdebug("scount:" + str(wordcnt)) totalcnt = int(splitst) logger.fdebug("splitit-len:" + str(totalcnt)) spercent = (wordcnt/totalcnt) * 100 logger.fdebug("we got " + str(spercent) + " percent.") if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") logger.fdebug("this should be a match!") # if '.' in comic_iss: # comisschk_find = comic_iss.find('.') # comisschk_b4dec = comic_iss[:comisschk_find] # comisschk_decval = comic_iss[comisschk_find+1:] # logger.fdebug("Found IssueNumber: " + str(comic_iss)) # logger.fdebug("..before decimal: " + str(comisschk_b4dec)) # logger.fdebug("...after decimal: " + str(comisschk_decval)) # #--let's make sure we don't wipe out decimal issues ;) # if int(comisschk_decval) == 0: # ciss = comisschk_b4dec # cintdec = int(comisschk_decval) # else: # if len(comisschk_decval) == 1: # ciss = comisschk_b4dec + "." + comisschk_decval # cintdec = int(comisschk_decval) * 10 # else: # ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0') # cintdec = int(comisschk_decval.rstrip('0')) * 10 # comintIss = (int(comisschk_b4dec) * 1000) + cintdec # else: # comintIss = int(comic_iss) * 1000 logger.fdebug("issue we found for is : " + str(comiss)) #set the year to the series we just found ;) result_comyear = comyear #issue comparison now as well logger.info(u"Found " + comname + " (" + str(comyear) + ") issue: " + str(comiss)) # watchfound+=1 watchmatch = str(comicid) # watch_kchoice.append({ # "ComicID": str(comicid), # "ComicName": str(comname), # "ComicYear": str(comyear), # "ComicIssue": str(int(comic_iss)), # "ComicLocation": str(watch_location), # "OriginalLocation" : str(comlocation), # "OriginalFilename" : str(comfilename) # }) foundonwatch = "True" break elif int(spercent) < 80: logger.fdebug("failure - we only got " + str(spercent) + "% right!") cm_cn+=1 if foundonwatch == "False": watchmatch = None #---if it's not a match - send it to the importer. n = 0 # print ("comic_andiss : " + str(comic_andiss)) # csplit = comic_andiss.split(None) # while ( n <= (len(csplit)-1) ): # print ("csplit:" + str(csplit[n])) # if csplit[n].isdigit(): # logger.fdebug("issue detected") # comiss = splitit[n] # logger.fdebug("issue # : " + str(comiss)) # comicNAMER = n - 1 # com_NAME = csplit[0] # cmnam = 1 # while (cmnam <= comicNAMER): # com_NAME = str(com_NAME) + " " + str(csplit[cmnam]) # cmnam+=1 # logger.fdebug("comic: " + str(com_NAME)) # n+=1 if result_comyear is None: result_comyear = '0000' #no year in filename basically. print ("adding " + com_NAME + " to the import-queue!") impid = com_NAME + "-" + str(result_comyear) + "-" + str(comiss) print ("impid: " + str(impid)) import_by_comicids.append({ "impid": impid, "watchmatch": watchmatch, "comicname" : com_NAME, "comicyear" : result_comyear, "comfilename" : comfilename, "comlocation" : comlocation.decode(mylar.SYS_ENCODING) }) if len(watch_kchoice) > 0: watchchoice['watchlist'] = watch_kchoice print ("watchchoice: " + str(watchchoice)) logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.") wat = 0 comicids = [] if watchfound > 0: if mylar.IMP_MOVE: logger.info("You checked off Move Files...so that's what I'm going to do") #check to see if Move Files is enabled. #if not being moved, set the archive bit. print("Moving files into appropriate directory") while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comlocation = watch_the_list['ComicLocation'] watch_comicid = watch_the_list['ComicID'] watch_comicname = watch_the_list['ComicName'] watch_comicyear = watch_the_list['ComicYear'] watch_comiciss = watch_the_list['ComicIssue'] print ("ComicLocation: " + str(watch_comlocation)) orig_comlocation = watch_the_list['OriginalLocation'] orig_filename = watch_the_list['OriginalFilename'] print ("Orig. Location: " + str(orig_comlocation)) print ("Orig. Filename: " + str(orig_filename)) #before moving check to see if Rename to Mylar structure is enabled. if mylar.IMP_RENAME: print("Renaming files according to configuration details : " + str(mylar.FILE_FORMAT)) renameit = helpers.rename_param(watch_comicid, watch_comicname, watch_comicyear, watch_comiciss) nfilename = renameit['nfilename'] dst_path = os.path.join(watch_comlocation,nfilename) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) else: print("Renaming files not enabled, keeping original filename(s)") dst_path = os.path.join(watch_comlocation,orig_filename) #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext))) #src = os.path.join(, str(nfilename + ext)) print ("I'm going to move " + str(orig_comlocation) + " to .." + str(dst_path)) try: shutil.move(orig_comlocation, dst_path) except (OSError, IOError): logger.info("Failed to move directory - check directories and manually re-run.") wat+=1 else: # if move files isn't enabled, let's set all found comics to Archive status :) while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comicid = watch_the_list['ComicID'] watch_issue = watch_the_list['ComicIssue'] print ("ComicID: " + str(watch_comicid)) print ("Issue#: " + str(watch_issue)) issuechk = myDB.action("SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone() if issuechk is None: print ("no matching issues for this comic#") else: print("...Existing status: " + str(issuechk['Status'])) control = {"IssueID": issuechk['IssueID']} values = { "Status": "Archived"} print ("...changing status of " + str(issuechk['Issue_Number']) + " to Archived ") myDB.upsert("issues", values, control) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) wat+=1 if comicids is None: pass else: c_upd = len(comicids) c = 0 while (c < c_upd ): print ("Rescanning.. " + str(c)) updater.forceRescan(c) if not len(import_by_comicids): return "Completed" if len(import_by_comicids) > 0: import_comicids['comic_info'] = import_by_comicids print ("import comicids: " + str(import_by_comicids)) return import_comicids, len(import_by_comicids)
def addComictoDB(comicid): # Putting this here to get around the circular import. Will try to use this to update images at later date. from mylar import cache myDB = db.DBConnection() # myDB.action('DELETE from blacklist WHERE ComicID=?', [comicid]) # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if dbcomic is None: newValueDict = {"ComicName": "Comic ID: %s" % (comicid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid,'comic') if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = {"ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic['ComicName'].startswith('The '): sortname = comic['ComicName'][4:] else: sortname = comic['ComicName'] logger.info(u"Now adding/updating: " + comic['ComicName']) #--Now that we know ComicName, let's try some scraping #--Start # gcd will return issue details (most importantly publishing date) gcdinfo=parseit.GCDScraper(comic['ComicName'], comic['ComicYear'], comic['ComicIssues'], comicid) if gcdinfo == "No Match": logger.warn("No matching result found for " + comic['ComicName'] + " (" + comic['ComicYear'] + ")" ) updater.no_searchresults(comicid) return logger.info(u"Sucessfully retrieved details for " + comic['ComicName'] ) # print ("Series Published" + parseit.resultPublished) #--End #comic book location on machine # setup default location here if ':' in comic['ComicName']: comicdir = comic['ComicName'].replace(':','') else: comicdir = comic['ComicName'] comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" if mylar.DESTINATION_DIR == "": logger.error(u"There is no general directory specified - please specify in Config/Post-Processing.") return if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot comlocation = comlocation.replace(' ', mylar.REPLACE_CHAR) #if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: #print ("Directory doesn't exist!") try: os.makedirs(str(comlocation)) logger.info(u"Directory successfully created at: " + str(comlocation)) except OSError.e: if e.errno != errno.EEXIST: raise #print ("root dir for series: " + comlocation) #try to account for CV not updating new issues as fast as GCD #seems CV doesn't update total counts #comicIssues = gcdinfo['totalissues'] if gcdinfo['gcdvariation'] == "cv": comicIssues = str(int(comic['ComicIssues']) + 1) else: comicIssues = comic['ComicIssues'] controlValueDict = {"ComicID": comicid} newValueDict = {"ComicName": comic['ComicName'], "ComicSortName": sortname, "ComicYear": comic['ComicYear'], "ComicImage": comic['ComicImage'], "Total": comicIssues, "ComicLocation": comlocation, "ComicPublisher": comic['ComicPublisher'], "ComicPublished": parseit.resultPublished, "DateAdded": helpers.today(), "Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) issued = cv.getComic(comicid,'issue') logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName'] ) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] #let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" #print ("total issues:" + str(iscnt)) #---removed NEW code here--- logger.info(u"Now adding/updating issues for" + comic['ComicName']) # file check to see if issue exists logger.info(u"Checking directory for existing issues.") #fc = filechecker.listFiles(dir=comlocation, watchcomic=comic['ComicName']) #havefiles = 0 #fccnt = int(fc['comiccount']) #logger.info(u"Found " + str(fccnt) + "/" + str(iscnt) + " issues of " + comic['ComicName'] + "...verifying") #fcnew = [] while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = str(firstval['Issue_Number']) issname = cleanname if '.' in str(issnum): issn_st = str(issnum).find('.') issn_b4dec = str(issnum)[:issn_st] #if the length of decimal is only 1 digit, assume it's a tenth dec_is = str(issnum)[issn_st + 1:] if len(dec_is) == 1: dec_nisval = int(dec_is) * 10 iss_naftdec = str(dec_nisval) if len(dec_is) == 2: dec_nisval = int(dec_is) iss_naftdec = str(dec_nisval) iss_issue = issn_b4dec + "." + iss_naftdec issis = (int(issn_b4dec) * 1000) + dec_nisval else: issis = int(issnum) * 1000 bb = 0 while (bb <= iscnt): try: gcdval = gcdinfo['gcdchoice'][bb] except IndexError: #account for gcd variation here if gcdinfo['gcdvariation'] == 'gcd': print ("gcd-variation accounted for.") issdate = '0000-00-00' int_issnum = int ( issis / 1000 ) break if 'nn' in str(gcdval['GCDIssue']): #no number detected - GN, TP or the like logger.warn(u"Non Series detected (Graphic Novel, etc) - cannot proceed at this time.") updater.no_searchresults(comicid) return elif '.' in str(gcdval['GCDIssue']): issst = str(gcdval['GCDIssue']).find('.') issb4dec = str(gcdval['GCDIssue'])[:issst] #if the length of decimal is only 1 digit, assume it's a tenth decis = str(gcdval['GCDIssue'])[issst+1:] if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) == 2: decisval = int(decis) issaftdec = str(decisval) gcd_issue = issb4dec + "." + issaftdec gcdis = (int(issb4dec) * 1000) + decisval else: gcdis = int(str(gcdval['GCDIssue'])) * 1000 if gcdis == issis: issdate = str(gcdval['GCDDate']) int_issnum = int( gcdis / 1000 ) #get the latest issue / date using the date. if gcdval['GCDDate'] > latestdate: latestiss = str(issnum) latestdate = str(gcdval['GCDDate']) break #bb = iscnt bb+=1 #print("(" + str(n) + ") IssueID: " + str(issid) + " IssueNo: " + str(issnum) + " Date" + str(issdate)) #---END.NEW. # check if the issue already exists iss_exists = myDB.select('SELECT * from issues WHERE IssueID=?', [issid]) # Only change the status & add DateAdded if the issue is not already in the database if not len(iss_exists): newValueDict['DateAdded'] = helpers.today() controlValueDict = {"IssueID": issid} newValueDict = {"ComicID": comicid, "ComicName": comic['ComicName'], "IssueName": issname, "Issue_Number": issnum, "IssueDate": issdate, "Int_IssueNumber": int_issnum } if mylar.AUTOWANT_ALL: newValueDict['Status'] = "Wanted" #elif release_dict['releasedate'] > helpers.today() and mylar.AUTOWANT_UPCOMING: # newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" myDB.upsert("issues", newValueDict, controlValueDict) n+=1 # logger.debug(u"Updating comic cache for " + comic['ComicName']) # cache.getThumb(ComicID=issue['issueid']) # logger.debug(u"Updating cache for: " + comic['ComicName']) # cache.getThumb(ComicIDcomicid) #check for existing files... updater.forceRescan(comicid) controlValueStat = {"ComicID": comicid} newValueStat = {"Status": "Active", "LatestIssue": latestiss, "LatestDate": latestdate } myDB.upsert("comics", newValueStat, controlValueStat) logger.info(u"Updating complete for: " + comic['ComicName']) #here we grab issues that have been marked as wanted above... results = myDB.select("SELECT * FROM issues where ComicID=? AND Status='Wanted'", [comicid]) if results: logger.info(u"Attempting to grab wanted issues for : " + comic['ComicName']) for result in results: foundNZB = "none" if (mylar.NZBSU or mylar.DOGNZB or mylar.EXPERIMENTAL) and (mylar.SAB_HOST): foundNZB = search.searchforissue(result['IssueID']) if foundNZB == "yes": updater.foundsearch(result['ComicID'], result['IssueID']) else: logger.info(u"No issues marked as wanted for " + comic['ComicName']) logger.info(u"Finished grabbing what I could.")
def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None): # Putting this here to get around the circular import. Will try to use this to update images at later date. # from mylar import cache myDB = db.DBConnection() # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if dbcomic is None: newValueDict = {"ComicName": "Comic ID: %s" % (comicid), "Status": "Loading"} comlocation = None else: newValueDict = {"Status": "Loading"} comlocation = dbcomic['ComicLocation'] myDB.upsert("comics", newValueDict, controlValueDict) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid,'comic') #comic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = {"ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic['ComicName'].startswith('The '): sortname = comic['ComicName'][4:] else: sortname = comic['ComicName'] logger.info(u"Now adding/updating: " + comic['ComicName']) #--Now that we know ComicName, let's try some scraping #--Start # gcd will return issue details (most importantly publishing date) if mismatch == "no" or mismatch is None: gcdinfo=parseit.GCDScraper(comic['ComicName'], comic['ComicYear'], comic['ComicIssues'], comicid) #print ("gcdinfo: " + str(gcdinfo)) mismatch_com = "no" if gcdinfo == "No Match": updater.no_searchresults(comicid) nomatch = "true" logger.info(u"There was an error when trying to add " + comic['ComicName'] + " (" + comic['ComicYear'] + ")" ) return nomatch else: mismatch_com = "yes" #print ("gcdinfo:" + str(gcdinfo)) elif mismatch == "yes": CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [comicid]).fetchone() if CV_EXcomicid['variloop'] is None: pass else: vari_loop = CV_EXcomicid['variloop'] NewComicID = CV_EXcomicid['NewComicID'] gcomicid = CV_EXcomicid['GComicID'] resultURL = "/series/" + str(NewComicID) + "/" #print ("variloop" + str(CV_EXcomicid['variloop'])) #if vari_loop == '99': gcdinfo = parseit.GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=comicid, TotalIssues=0, issvariation="no", resultPublished=None) logger.info(u"Sucessfully retrieved details for " + comic['ComicName'] ) # print ("Series Published" + parseit.resultPublished) #comic book location on machine # setup default location here if comlocation is None: if ':' in comic['ComicName'] or '/' in comic['ComicName'] or ',' in comic['ComicName']: comicdir = comic['ComicName'] if ':' in comicdir: comicdir = comicdir.replace(':','') if '/' in comicdir: comicdir = comicdir.replace('/','-') if ',' in comicdir: comicdir = comicdir.replace(',','') else: comicdir = comic['ComicName'] series = comicdir publisher = comic['ComicPublisher'] year = comic['ComicYear'] #do work to generate folder path values = {'$Series': series, '$Publisher': publisher, '$Year': year, '$series': series.lower(), '$publisher': publisher.lower(), '$Volume': year } #print mylar.FOLDER_FORMAT #print 'working dir:' #print helpers.replace_all(mylar.FOLDER_FORMAT, values) if mylar.FOLDER_FORMAT == '': comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" else: comlocation = mylar.DESTINATION_DIR + "/" + helpers.replace_all(mylar.FOLDER_FORMAT, values) #comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" if mylar.DESTINATION_DIR == "": logger.error(u"There is no general directory specified - please specify in Config/Post-Processing.") return if mylar.REPLACE_SPACES: #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot comlocation = comlocation.replace(' ', mylar.REPLACE_CHAR) #if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: #print ("Directory doesn't exist!") try: os.makedirs(str(comlocation)) logger.info(u"Directory successfully created at: " + str(comlocation)) except OSError: logger.error(u"Could not create comicdir : " + str(comlocation)) #try to account for CV not updating new issues as fast as GCD #seems CV doesn't update total counts #comicIssues = gcdinfo['totalissues'] if gcdinfo['gcdvariation'] == "cv": comicIssues = str(int(comic['ComicIssues']) + 1) else: comicIssues = comic['ComicIssues'] #let's download the image... if os.path.exists(mylar.CACHE_DIR):pass else: #let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError: logger.error('Could not create cache dir. Check permissions of cache dir: ' + str(mylar.CACHE_DIR)) coverfile = mylar.CACHE_DIR + "/" + str(comicid) + ".jpg" #try: urllib.urlretrieve(str(comic['ComicImage']), str(coverfile)) try: with open(str(coverfile)) as f: ComicImage = os.path.join('cache',str(comicid) + ".jpg") logger.info(u"Sucessfully retrieved cover for " + str(comic['ComicName'])) #if the comic cover local is checked, save a cover.jpg to the series folder. if mylar.COMIC_COVER_LOCAL: comiclocal = os.path.join(str(comlocation) + "/cover.jpg") shutil.copy(ComicImage,comiclocal) except IOError as e: logger.error(u"Unable to save cover locally at this time.") controlValueDict = {"ComicID": comicid} newValueDict = {"ComicName": comic['ComicName'], "ComicSortName": sortname, "ComicYear": comic['ComicYear'], "ComicImage": ComicImage, "Total": comicIssues, "ComicLocation": comlocation, "ComicPublisher": comic['ComicPublisher'], "ComicPublished": gcdinfo['resultPublished'], "DateAdded": helpers.today(), "Status": "Loading"} myDB.upsert("comics", newValueDict, controlValueDict) issued = cv.getComic(comicid,'issue') logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName'] ) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] #let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" #print ("total issues:" + str(iscnt)) #---removed NEW code here--- logger.info(u"Now adding/updating issues for " + comic['ComicName']) # file check to see if issue exists logger.info(u"Checking directory for existing issues.") #fc = filechecker.listFiles(dir=comlocation, watchcomic=comic['ComicName']) #havefiles = 0 #fccnt = int(fc['comiccount']) #logger.info(u"Found " + str(fccnt) + "/" + str(iscnt) + " issues of " + comic['ComicName'] + "...verifying") #fcnew = [] if iscnt > 0: #if a series is brand new, it wont have any issues/details yet so skip this part while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = str(firstval['Issue_Number']) issname = cleanname if '.' in str(issnum): issn_st = str(issnum).find('.') issn_b4dec = str(issnum)[:issn_st] #if the length of decimal is only 1 digit, assume it's a tenth dec_is = str(issnum)[issn_st + 1:] if len(dec_is) == 1: dec_nisval = int(dec_is) * 10 iss_naftdec = str(dec_nisval) if len(dec_is) == 2: dec_nisval = int(dec_is) iss_naftdec = str(dec_nisval) iss_issue = issn_b4dec + "." + iss_naftdec issis = (int(issn_b4dec) * 1000) + dec_nisval else: issis = int(issnum) * 1000 bb = 0 while (bb <= iscnt): try: gcdval = gcdinfo['gcdchoice'][bb] except IndexError: #account for gcd variation here if gcdinfo['gcdvariation'] == 'gcd': #logger.fdebug("gcd-variation accounted for.") issdate = '0000-00-00' int_issnum = int ( issis / 1000 ) break if 'nn' in str(gcdval['GCDIssue']): #no number detected - GN, TP or the like logger.warn(u"Non Series detected (Graphic Novel, etc) - cannot proceed at this time.") updater.no_searchresults(comicid) return elif '.' in str(gcdval['GCDIssue']): #logger.fdebug("g-issue:" + str(gcdval['GCDIssue'])) issst = str(gcdval['GCDIssue']).find('.') #logger.fdebug("issst:" + str(issst)) issb4dec = str(gcdval['GCDIssue'])[:issst] #logger.fdebug("issb4dec:" + str(issb4dec)) #if the length of decimal is only 1 digit, assume it's a tenth decis = str(gcdval['GCDIssue'])[issst+1:] #logger.fdebug("decis:" + str(decis)) if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) == 2: decisval = int(decis) issaftdec = str(decisval) gcd_issue = issb4dec + "." + issaftdec #logger.fdebug("gcd_issue:" + str(gcd_issue)) gcdis = (int(issb4dec) * 1000) + decisval else: gcdis = int(str(gcdval['GCDIssue'])) * 1000 if gcdis == issis: issdate = str(gcdval['GCDDate']) int_issnum = int( gcdis / 1000 ) #get the latest issue / date using the date. if gcdval['GCDDate'] > latestdate: latestiss = str(issnum) latestdate = str(gcdval['GCDDate']) break #bb = iscnt bb+=1 #print("(" + str(n) + ") IssueID: " + str(issid) + " IssueNo: " + str(issnum) + " Date" + str(issdate)) #---END.NEW. # check if the issue already exists iss_exists = myDB.action('SELECT * from issues WHERE IssueID=?', [issid]).fetchone() # Only change the status & add DateAdded if the issue is already in the database if iss_exists is None: newValueDict['DateAdded'] = helpers.today() controlValueDict = {"IssueID": issid} newValueDict = {"ComicID": comicid, "ComicName": comic['ComicName'], "IssueName": issname, "Issue_Number": issnum, "IssueDate": issdate, "Int_IssueNumber": int_issnum } if mylar.AUTOWANT_ALL: newValueDict['Status'] = "Wanted" elif issdate > helpers.today() and mylar.AUTOWANT_UPCOMING: newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" if iss_exists: #print ("Existing status : " + str(iss_exists['Status'])) newValueDict['Status'] = iss_exists['Status'] try: myDB.upsert("issues", newValueDict, controlValueDict) except sqlite3.InterfaceError, e: #raise sqlite3.InterfaceError(e) logger.error("MAJOR error trying to get issue data, this is most likey a MULTI-VOLUME series and you need to use the custom_exceptions.csv file.") myDB.action("DELETE FROM comics WHERE ComicID=?", [comicid]) return n+=1
n+=1 # logger.debug(u"Updating comic cache for " + comic['ComicName']) # cache.getThumb(ComicID=issue['issueid']) # logger.debug(u"Updating cache for: " + comic['ComicName']) # cache.getThumb(ComicIDcomicid) else: if iscnt > 0: #if a series is brand new, it wont have any issues/details yet so skip this part while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = firstval['Issue_Number'] #print ("issnum: " + str(issnum)) issname = cleanname issdate = str(firstval['Issue_Date']) if issnum.isdigit(): int_issnum = int( issnum ) else: if 'au' in issnum.lower(): int_issnum = str(int(issnum[:-2])) + 'AU' elif u'\xbd' in issnum: issnum = .5 int_issnum = .5 elif '.' in issnum or ',' in issnum: if ',' in issnum: issnum = re.sub(',','.', issnum)
def addComictoDB(comicid, mismatch=None): # Putting this here to get around the circular import. Will try to use this to update images at later date. from mylar import cache myDB = db.DBConnection() # We need the current minimal info in the database instantly # so we don't throw a 500 error when we redirect to the artistPage controlValueDict = {"ComicID": comicid} dbcomic = myDB.action("SELECT * FROM comics WHERE ComicID=?", [comicid]).fetchone() if dbcomic is None: newValueDict = {"ComicName": "Comic ID: %s" % (comicid), "Status": "Loading"} comlocation = None else: newValueDict = {"Status": "Loading"} comlocation = dbcomic["ComicLocation"] myDB.upsert("comics", newValueDict, controlValueDict) # we need to lookup the info for the requested ComicID in full now comic = cv.getComic(comicid, "comic") # comic = myDB.action('SELECT * FROM comics WHERE ComicID=?', [comicid]).fetchone() if not comic: logger.warn("Error fetching comic. ID for : " + comicid) if dbcomic is None: newValueDict = {"ComicName": "Fetch failed, try refreshing. (%s)" % (comicid), "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("comics", newValueDict, controlValueDict) return if comic["ComicName"].startswith("The "): sortname = comic["ComicName"][4:] else: sortname = comic["ComicName"] logger.info(u"Now adding/updating: " + comic["ComicName"]) # --Now that we know ComicName, let's try some scraping # --Start # gcd will return issue details (most importantly publishing date) if mismatch == "no" or mismatch is None: gcdinfo = parseit.GCDScraper(comic["ComicName"], comic["ComicYear"], comic["ComicIssues"], comicid) mismatch_com = "no" if gcdinfo == "No Match": updater.no_searchresults(comicid) nomatch = "true" logger.info( u"There was an error when trying to add " + comic["ComicName"] + " (" + comic["ComicYear"] + ")" ) return nomatch else: mismatch_com = "yes" # print ("gcdinfo:" + str(gcdinfo)) elif mismatch == "yes": CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [comicid]).fetchone() if CV_EXcomicid["variloop"] is None: pass else: vari_loop = CV_EXcomicid["variloop"] NewComicID = CV_EXcomicid["NewComicID"] gcomicid = CV_EXcomicid["GComicID"] resultURL = "/series/" + str(NewComicID) + "/" # print ("variloop" + str(CV_EXcomicid['variloop'])) # if vari_loop == '99': gcdinfo = parseit.GCDdetails( comseries=None, resultURL=resultURL, vari_loop=0, ComicID=comicid, TotalIssues=0, issvariation="no", resultPublished=None, ) logger.info(u"Sucessfully retrieved details for " + comic["ComicName"]) # print ("Series Published" + parseit.resultPublished) # comic book location on machine # setup default location here if comlocation is None: if ":" in comic["ComicName"] or "/" in comic["ComicName"] or "," in comic["ComicName"]: comicdir = comic["ComicName"] if ":" in comicdir: comicdir = comicdir.replace(":", "") if "/" in comicdir: comicdir = comicdir.replace("/", "-") if "," in comicdir: comicdir = comicdir.replace(",", "") else: comicdir = comic["ComicName"] series = comicdir publisher = comic["ComicPublisher"] year = comic["ComicYear"] # do work to generate folder path values = {"$Series": series, "$Publisher": publisher, "$Year": year} # print mylar.FOLDER_FORMAT # print 'working dir:' # print helpers.replace_all(mylar.FOLDER_FORMAT, values) if mylar.FOLDER_FORMAT == "": comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic["ComicYear"] + ")" else: comlocation = mylar.DESTINATION_DIR + "/" + helpers.replace_all(mylar.FOLDER_FORMAT, values) # comlocation = mylar.DESTINATION_DIR + "/" + comicdir + " (" + comic['ComicYear'] + ")" if mylar.DESTINATION_DIR == "": logger.error(u"There is no general directory specified - please specify in Config/Post-Processing.") return if mylar.REPLACE_SPACES: # mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot comlocation = comlocation.replace(" ", mylar.REPLACE_CHAR) # if it doesn't exist - create it (otherwise will bugger up later on) if os.path.isdir(str(comlocation)): logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...") else: # print ("Directory doesn't exist!") try: os.makedirs(str(comlocation)) logger.info(u"Directory successfully created at: " + str(comlocation)) except OSError: logger.error(u"Could not create comicdir : " + str(comlocation)) # try to account for CV not updating new issues as fast as GCD # seems CV doesn't update total counts # comicIssues = gcdinfo['totalissues'] if gcdinfo["gcdvariation"] == "cv": comicIssues = str(int(comic["ComicIssues"]) + 1) else: comicIssues = comic["ComicIssues"] # let's download the image... if os.path.exists(mylar.CACHE_DIR): pass else: # let's make the dir. try: os.makedirs(str(mylar.CACHE_DIR)) logger.info(u"Cache Directory successfully created at: " + str(mylar.CACHE_DIR)) except OSError: logger.error("Could not create cache dir. Check permissions of cache dir: " + str(mylar.CACHE_DIR)) coverfile = mylar.CACHE_DIR + "/" + str(comicid) + ".jpg" # try: urllib.urlretrieve(str(comic["ComicImage"]), str(coverfile)) try: with open(str(coverfile)) as f: ComicImage = "cache/" + str(comicid) + ".jpg" logger.info(u"Sucessfully retrieved cover for " + str(comic["ComicName"])) except IOError as e: logger.error(u"Unable to save cover locally at this time.") controlValueDict = {"ComicID": comicid} newValueDict = { "ComicName": comic["ComicName"], "ComicSortName": sortname, "ComicYear": comic["ComicYear"], "ComicImage": ComicImage, "Total": comicIssues, "ComicLocation": comlocation, "ComicPublisher": comic["ComicPublisher"], "ComicPublished": gcdinfo["resultPublished"], "DateAdded": helpers.today(), "Status": "Loading", } myDB.upsert("comics", newValueDict, controlValueDict) issued = cv.getComic(comicid, "issue") logger.info(u"Sucessfully retrieved issue details for " + comic["ComicName"]) n = 0 iscnt = int(comicIssues) issid = [] issnum = [] issname = [] issdate = [] int_issnum = [] # let's start issue #'s at 0 -- thanks to DC for the new 52 reboot! :) latestiss = "0" latestdate = "0000-00-00" # print ("total issues:" + str(iscnt)) # ---removed NEW code here--- logger.info(u"Now adding/updating issues for " + comic["ComicName"]) # file check to see if issue exists logger.info(u"Checking directory for existing issues.") # fc = filechecker.listFiles(dir=comlocation, watchcomic=comic['ComicName']) # havefiles = 0 # fccnt = int(fc['comiccount']) # logger.info(u"Found " + str(fccnt) + "/" + str(iscnt) + " issues of " + comic['ComicName'] + "...verifying") # fcnew = [] while n <= iscnt: # ---NEW.code try: firstval = issued["issuechoice"][n] except IndexError: break cleanname = helpers.cleanName(firstval["Issue_Name"]) issid = str(firstval["Issue_ID"]) issnum = str(firstval["Issue_Number"]) issname = cleanname if "." in str(issnum): issn_st = str(issnum).find(".") issn_b4dec = str(issnum)[:issn_st] # if the length of decimal is only 1 digit, assume it's a tenth dec_is = str(issnum)[issn_st + 1 :] if len(dec_is) == 1: dec_nisval = int(dec_is) * 10 iss_naftdec = str(dec_nisval) if len(dec_is) == 2: dec_nisval = int(dec_is) iss_naftdec = str(dec_nisval) iss_issue = issn_b4dec + "." + iss_naftdec issis = (int(issn_b4dec) * 1000) + dec_nisval else: issis = int(issnum) * 1000 bb = 0 while bb <= iscnt: try: gcdval = gcdinfo["gcdchoice"][bb] except IndexError: # account for gcd variation here if gcdinfo["gcdvariation"] == "gcd": # print ("gcd-variation accounted for.") issdate = "0000-00-00" int_issnum = int(issis / 1000) break if "nn" in str(gcdval["GCDIssue"]): # no number detected - GN, TP or the like logger.warn(u"Non Series detected (Graphic Novel, etc) - cannot proceed at this time.") updater.no_searchresults(comicid) return elif "." in str(gcdval["GCDIssue"]): # print ("g-issue:" + str(gcdval['GCDIssue'])) issst = str(gcdval["GCDIssue"]).find(".") # print ("issst:" + str(issst)) issb4dec = str(gcdval["GCDIssue"])[:issst] # print ("issb4dec:" + str(issb4dec)) # if the length of decimal is only 1 digit, assume it's a tenth decis = str(gcdval["GCDIssue"])[issst + 1 :] # print ("decis:" + str(decis)) if len(decis) == 1: decisval = int(decis) * 10 issaftdec = str(decisval) if len(decis) == 2: decisval = int(decis) issaftdec = str(decisval) gcd_issue = issb4dec + "." + issaftdec # print ("gcd_issue:" + str(gcd_issue)) gcdis = (int(issb4dec) * 1000) + decisval else: gcdis = int(str(gcdval["GCDIssue"])) * 1000 if gcdis == issis: issdate = str(gcdval["GCDDate"]) int_issnum = int(gcdis / 1000) # get the latest issue / date using the date. if gcdval["GCDDate"] > latestdate: latestiss = str(issnum) latestdate = str(gcdval["GCDDate"]) break # bb = iscnt bb += 1 # print("(" + str(n) + ") IssueID: " + str(issid) + " IssueNo: " + str(issnum) + " Date" + str(issdate)) # ---END.NEW. # check if the issue already exists iss_exists = myDB.action("SELECT * from issues WHERE IssueID=?", [issid]).fetchone() # Only change the status & add DateAdded if the issue is already in the database if iss_exists is None: newValueDict["DateAdded"] = helpers.today() controlValueDict = {"IssueID": issid} newValueDict = { "ComicID": comicid, "ComicName": comic["ComicName"], "IssueName": issname, "Issue_Number": issnum, "IssueDate": issdate, "Int_IssueNumber": int_issnum, } if mylar.AUTOWANT_ALL: newValueDict["Status"] = "Wanted" # elif release_dict['releasedate'] > helpers.today() and mylar.AUTOWANT_UPCOMING: # newValueDict['Status'] = "Wanted" else: newValueDict["Status"] = "Skipped" if iss_exists: # print ("Existing status : " + str(iss_exists['Status'])) newValueDict["Status"] = iss_exists["Status"] myDB.upsert("issues", newValueDict, controlValueDict) n += 1 # logger.debug(u"Updating comic cache for " + comic['ComicName']) # cache.getThumb(ComicID=issue['issueid']) # logger.debug(u"Updating cache for: " + comic['ComicName']) # cache.getThumb(ComicIDcomicid) # check for existing files... updater.forceRescan(comicid) controlValueStat = {"ComicID": comicid} newValueStat = { "Status": "Active", "LatestIssue": latestiss, "LatestDate": latestdate, "LastUpdated": helpers.now(), } myDB.upsert("comics", newValueStat, controlValueStat) logger.info(u"Updating complete for: " + comic["ComicName"]) # lets' check the pullist for anyting at this time as well since we're here. if mylar.AUTOWANT_UPCOMING: logger.info(u"Checking this week's pullist for new issues of " + str(comic["ComicName"])) updater.newpullcheck() # here we grab issues that have been marked as wanted above... results = myDB.select("SELECT * FROM issues where ComicID=? AND Status='Wanted'", [comicid]) if results: logger.info(u"Attempting to grab wanted issues for : " + comic["ComicName"]) for result in results: foundNZB = "none" if (mylar.NZBSU or mylar.DOGNZB or mylar.EXPERIMENTAL) and (mylar.SAB_HOST): foundNZB = search.searchforissue(result["IssueID"]) if foundNZB == "yes": updater.foundsearch(result["ComicID"], result["IssueID"]) else: logger.info(u"No issues marked as wanted for " + comic["ComicName"]) logger.info(u"Finished grabbing what I could.")
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.COMIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(mylar.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace')) return logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace')) basedir = dir comic_list = [] comiccnt = 0 extensions = ('cbr','cbz') for r,d,f in os.walk(dir): #for directory in d[:]: # if directory.startswith("."): # d.remove(directory) for files in f: if any(files.lower().endswith('.' + x.lower()) for x in extensions): comic = files comicpath = os.path.join(r, files) comicsize = os.path.getsize(comicpath) print "Comic: " + comic print "Comic Path: " + comicpath print "Comic Size: " + str(comicsize) # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath.decode(mylar.SYS_ENCODING, 'replace') comiccnt+=1 comic_dict = { 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path } comic_list.append(comic_dict) logger.info("I've found a total of " + str(comiccnt) + " comics....analyzing now") logger.info("comiclist: " + str(comic_list)) myDB = db.DBConnection() #let's load in the watchlist to see if we have any matches. logger.info("loading in the watchlist to see if a series is being watched already...") watchlist = myDB.select("SELECT * from comics") ComicName = [] DisplayName = [] ComicYear = [] ComicPublisher = [] ComicTotal = [] ComicID = [] ComicLocation = [] AltName = [] watchcnt = 0 watch_kchoice = [] watchchoice = {} import_by_comicids = [] import_comicids = {} for watch in watchlist: watchdisplaycomic = re.sub('[\_\#\,\/\:\;\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip() # let's clean up the name, just in case for comparison purposes... watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip() #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip() if ' the ' in watchcomic.lower(): #drop the 'the' from the watchcomic title for proper comparisons. watchcomic = watchcomic[-4:] alt_chk = "no" # alt-checker flag (default to no) # account for alternate names as well if watch['AlternateSearch'] is not None and watch['AlternateSearch'] is not 'None': altcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['AlternateSearch']).encode('utf-8').strip() #altcomic = re.sub('\s+', ' ', str(altcomic)).strip() AltName.append(altcomic) alt_chk = "yes" # alt-checker flag ComicName.append(watchcomic) DisplayName.append(watchdisplaycomic) ComicYear.append(watch['ComicYear']) ComicPublisher.append(watch['ComicPublisher']) ComicTotal.append(watch['Total']) ComicID.append(watch['ComicID']) ComicLocation.append(watch['ComicLocation']) watchcnt+=1 logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.") ripperlist=['digital-', 'empire', 'dcp'] watchfound = 0 datelist = ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'] # datemonth = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':$ # #search for number as text, and change to numeric # for numbs in basnumbs: # #print ("numbs:" + str(numbs)) # if numbs in ComicName.lower(): # numconv = basnumbs[numbs] # #print ("numconv: " + str(numconv)) for i in comic_list: print i['ComicFilename'] comfilename = i['ComicFilename'] comlocation = i['ComicLocation'] #let's clean up the filename for matching purposes cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', comfilename) #cfilename = re.sub('\s', '_', str(cfilename)) d_filename = re.sub('[\_\#\,\/\;\!\$\%\&\?\@]', ' ', comfilename) d_filename = re.sub('[\:\-\+\']', '#', d_filename) #versioning - remove it subsplit = cfilename.replace('_', ' ').split() volno = None volyr = None for subit in subsplit: if subit[0].lower() == 'v': vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... if len(subit) > 3: # if it's greater than 3 in length, then the format is Vyyyy vfull = 1 # add on 1 character length to account for extra space cfilename = re.sub(subit, '', cfilename) d_filename = re.sub(subit, '', d_filename) volno = re.sub("[^0-9]", " ", subit) elif subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely logger.fdebug('volume indicator detected as version #:' + str(subit)) cfilename = re.sub(subit, '', cfilename) cfilename = " ".join(cfilename.split()) d_filename = re.sub(subit, '', d_filename) d_filename = " ".join(d_filename.split()) volyr = re.sub("[^0-9]", " ", subit).strip() logger.fdebug('volume year set as : ' + str(volyr)) cm_cn = 0 #we need to track the counter to make sure we are comparing the right array parts #this takes care of the brackets :) m = re.findall('[^()]+', cfilename) lenm = len(m) logger.fdebug("there are " + str(lenm) + " words.") cnt = 0 yearmatch = "false" foundonwatch = "False" issue = 999999 while (cnt < lenm): if m[cnt] is None: break if m[cnt] == ' ': pass else: logger.fdebug(str(cnt) + ". Bracket Word: " + m[cnt]) if cnt == 0: comic_andiss = m[cnt] logger.fdebug("Comic: " + comic_andiss) # if it's not in the standard format this will bork. # let's try to accomodate (somehow). # first remove the extension (if any) extensions = ('cbr', 'cbz') if comic_andiss.lower().endswith(extensions): comic_andiss = comic_andiss[:-4] logger.fdebug("removed extension from filename.") #now we have to break up the string regardless of formatting. #let's force the spaces. comic_andiss = re.sub('_', ' ', comic_andiss) cs = comic_andiss.split() cs_len = len(cs) cn = '' ydetected = 'no' idetected = 'no' decimaldetect = 'no' for i in reversed(xrange(len(cs))): #start at the end. logger.fdebug("word: " + str(cs[i])) #assume once we find issue - everything prior is the actual title #idetected = no will ignore everything so it will assume all title if cs[i][:-2] == '19' or cs[i][:-2] == '20' and idetected == 'no': logger.fdebug("year detected: " + str(cs[i])) ydetected = 'yes' result_comyear = cs[i] elif cs[i].isdigit() and idetected == 'no' or '.' in cs[i]: issue = cs[i] logger.fdebug("issue detected : " + str(issue)) idetected = 'yes' if '.' in cs[i]: #make sure it's a number on either side of decimal and assume decimal issue. decst = cs[i].find('.') dec_st = cs[i][:decst] dec_en = cs[i][decst+1:] logger.fdebug("st: " + str(dec_st)) logger.fdebug("en: " + str(dec_en)) if dec_st.isdigit() and dec_en.isdigit(): logger.fdebug("decimal issue detected...adjusting.") issue = dec_st + "." + dec_en logger.fdebug("issue detected: " + str(issue)) idetected = 'yes' else: logger.fdebug("false decimal represent. Chunking to extra word.") cn = cn + cs[i] + " " break elif '\#' in cs[i] or decimaldetect == 'yes': logger.fdebug("issue detected: " + str(cs[i])) idetected = 'yes' else: cn = cn + cs[i] + " " if ydetected == 'no': #assume no year given in filename... result_comyear = "0000" logger.fdebug("cm?: " + str(cn)) if issue is not '999999': comiss = issue else: logger.ERROR("Invalid Issue number (none present) for " + comfilename) break cnsplit = cn.split() cname = '' findcn = 0 while (findcn < len(cnsplit)): cname = cname + cs[findcn] + " " findcn+=1 cname = cname[:len(cname)-1] # drop the end space... print ("assuming name is : " + cname) com_NAME = cname print ("com_NAME : " + com_NAME) yearmatch = "True" else: logger.fdebug('checking ' + m[cnt]) # we're assuming that the year is in brackets (and it should be damnit) if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': print ("year detected: " + str(m[cnt])) ydetected = 'yes' result_comyear = m[cnt] elif m[cnt][:3].lower() in datelist: logger.fdebug('possible issue date format given - verifying') #if the date of the issue is given as (Jan 2010) or (January 2010) let's adjust. #keeping in mind that ',' and '.' are already stripped from the string if m[cnt][-4:].isdigit(): ydetected = 'yes' result_comyear = m[cnt][-4:] logger.fdebug('Valid Issue year of ' + str(result_comyear) + 'detected in format of ' + str(m[cnt])) cnt+=1 displength = len(cname) print 'd_filename is : ' + d_filename charcount = d_filename.count('#') print ('charcount is : ' + str(charcount)) if charcount > 0: print ('entering loop') for i,m in enumerate(re.finditer('\#', d_filename)): if m.end() <= displength: print comfilename[m.start():m.end()] # find occurance in c_filename, then replace into d_filname so special characters are brought across newchar = comfilename[m.start():m.end()] print 'newchar:' + str(newchar) d_filename = d_filename[:m.start()] + str(newchar) + d_filename[m.end():] print 'd_filename:' + str(d_filename) dispname = d_filename[:displength] print ('dispname : ' + dispname) splitit = [] watchcomic_split = [] logger.fdebug("filename comic and issue: " + comic_andiss) #changed this from '' to ' ' comic_iss_b4 = re.sub('[\-\:\,]', ' ', comic_andiss) comic_iss = comic_iss_b4.replace('.',' ') comic_iss = re.sub('[\s+]', ' ', comic_iss).strip() logger.fdebug("adjusted comic and issue: " + str(comic_iss)) #remove 'the' from here for proper comparisons. if ' the ' in comic_iss.lower(): comic_iss = comic_iss[-4:] splitit = comic_iss.split(None) logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss)) #here we cycle through the Watchlist looking for a match. while (cm_cn < watchcnt): #setup the watchlist comname = ComicName[cm_cn] print ("watch_comic:" + comname) comyear = ComicYear[cm_cn] compub = ComicPublisher[cm_cn] comtotal = ComicTotal[cm_cn] comicid = ComicID[cm_cn] watch_location = ComicLocation[cm_cn] # there shouldn't be an issue in the comic now, so let's just assume it's all gravy. splitst = len(splitit) watchcomic_split = helpers.cleanName(comname) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) logger.fdebug(str(splitit) + " file series word count: " + str(splitst)) logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split))) if (splitst) != len(watchcomic_split): logger.fdebug("incorrect comic lengths...not a match") # if str(splitit[0]).lower() == "the": # logger.fdebug("THE word detected...attempting to adjust pattern matching") # splitit[0] = splitit[4:] else: logger.fdebug("length match..proceeding") n = 0 scount = 0 logger.fdebug("search-length: " + str(splitst)) logger.fdebug("Watchlist-length: " + str(len(watchcomic_split))) while ( n <= (splitst)-1 ): logger.fdebug("splitit: " + str(splitit[n])) if n < (splitst) and n < len(watchcomic_split): logger.fdebug(str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n])) if '+' in watchcomic_split[n]: watchcomic_split[n] = re.sub('+', '', str(watchcomic_split[n])) if str(watchcomic_split[n].lower()) in str(splitit[n].lower()) and len(watchcomic_split[n]) >= len(splitit[n]): logger.fdebug("word matched on : " + str(splitit[n])) scount+=1 #elif ':' in splitit[n] or '-' in splitit[n]: # splitrep = splitit[n].replace('-', '') # print ("non-character keyword...skipped on " + splitit[n]) elif str(splitit[n]).lower().startswith('v'): logger.fdebug("possible versioning..checking") #we hit a versioning # - account for it if splitit[n][1:].isdigit(): comicversion = str(splitit[n]) logger.fdebug("version found: " + str(comicversion)) else: logger.fdebug("Comic / Issue section") if splitit[n].isdigit(): logger.fdebug("issue detected") else: logger.fdebug("non-match for: "+ str(splitit[n])) pass n+=1 #set the match threshold to 80% (for now) # if it's less than 80% consider it a non-match and discard. #splitit has to splitit-1 because last position is issue. wordcnt = int(scount) logger.fdebug("scount:" + str(wordcnt)) totalcnt = int(splitst) logger.fdebug("splitit-len:" + str(totalcnt)) spercent = (wordcnt/totalcnt) * 100 logger.fdebug("we got " + str(spercent) + " percent.") if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") logger.fdebug("this should be a match!") logger.fdebug("issue we found for is : " + str(comiss)) #set the year to the series we just found ;) result_comyear = comyear #issue comparison now as well logger.info(u"Found " + comname + " (" + str(comyear) + ") issue: " + str(comiss)) watchmatch = str(comicid) dispname = DisplayName[cm_cn] foundonwatch = "True" break elif int(spercent) < 80: logger.fdebug("failure - we only got " + str(spercent) + "% right!") cm_cn+=1 if foundonwatch == "False": watchmatch = None #---if it's not a match - send it to the importer. n = 0 if volyr is None: if result_comyear is None: result_comyear = '0000' #no year in filename basically. else: if result_comyear is None: result_comyear = volyr if volno is None: if volyr is None: vol_label = None else: vol_label = volyr else: vol_label = volno print ("adding " + com_NAME + " to the import-queue!") impid = com_NAME + "-" + str(result_comyear) + "-" + str(comiss) print ("impid: " + str(impid)) import_by_comicids.append({ "impid" : impid, "watchmatch" : watchmatch, "displayname" : dispname, "comicname" : com_NAME, "comicyear" : result_comyear, "volume" : vol_label, "comfilename" : comfilename, "comlocation" : comlocation.decode(mylar.SYS_ENCODING) }) if len(watch_kchoice) > 0: watchchoice['watchlist'] = watch_kchoice print ("watchchoice: " + str(watchchoice)) logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.") wat = 0 comicids = [] if watchfound > 0: if mylar.IMP_MOVE: logger.info("You checked off Move Files...so that's what I'm going to do") #check to see if Move Files is enabled. #if not being moved, set the archive bit. print("Moving files into appropriate directory") while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comlocation = watch_the_list['ComicLocation'] watch_comicid = watch_the_list['ComicID'] watch_comicname = watch_the_list['ComicName'] watch_comicyear = watch_the_list['ComicYear'] watch_comiciss = watch_the_list['ComicIssue'] print ("ComicLocation: " + str(watch_comlocation)) orig_comlocation = watch_the_list['OriginalLocation'] orig_filename = watch_the_list['OriginalFilename'] print ("Orig. Location: " + str(orig_comlocation)) print ("Orig. Filename: " + str(orig_filename)) #before moving check to see if Rename to Mylar structure is enabled. if mylar.IMP_RENAME: print("Renaming files according to configuration details : " + str(mylar.FILE_FORMAT)) renameit = helpers.rename_param(watch_comicid, watch_comicname, watch_comicyear, watch_comiciss) nfilename = renameit['nfilename'] dst_path = os.path.join(watch_comlocation,nfilename) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) else: print("Renaming files not enabled, keeping original filename(s)") dst_path = os.path.join(watch_comlocation,orig_filename) #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext))) #src = os.path.join(, str(nfilename + ext)) print ("I'm going to move " + str(orig_comlocation) + " to .." + str(dst_path)) try: shutil.move(orig_comlocation, dst_path) except (OSError, IOError): logger.info("Failed to move directory - check directories and manually re-run.") wat+=1 else: # if move files isn't enabled, let's set all found comics to Archive status :) while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comicid = watch_the_list['ComicID'] watch_issue = watch_the_list['ComicIssue'] print ("ComicID: " + str(watch_comicid)) print ("Issue#: " + str(watch_issue)) issuechk = myDB.selectone("SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone() if issuechk is None: print ("no matching issues for this comic#") else: print("...Existing status: " + str(issuechk['Status'])) control = {"IssueID": issuechk['IssueID']} values = { "Status": "Archived"} print ("...changing status of " + str(issuechk['Issue_Number']) + " to Archived ") myDB.upsert("issues", values, control) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) wat+=1 if comicids is None: pass else: c_upd = len(comicids) c = 0 while (c < c_upd ): print ("Rescanning.. " + str(c)) updater.forceRescan(c) if not len(import_by_comicids): return "Completed" if len(import_by_comicids) > 0: import_comicids['comic_info'] = import_by_comicids print ("import comicids: " + str(import_by_comicids)) return import_comicids, len(import_by_comicids)
# logger.debug(u"Updating comic cache for " + comic['ComicName']) # cache.getThumb(ComicID=issue['issueid']) # logger.debug(u"Updating cache for: " + comic['ComicName']) # cache.getThumb(ComicIDcomicid) else: if iscnt > 0: #if a series is brand new, it wont have any issues/details yet so skip this part while (n <= iscnt): #---NEW.code try: firstval = issued['issuechoice'][n] except IndexError: break cleanname = helpers.cleanName(firstval['Issue_Name']) issid = str(firstval['Issue_ID']) issnum = firstval['Issue_Number'] #print ("issnum: " + str(issnum)) issname = cleanname issdate = str(firstval['Issue_Date']) if issnum.isdigit(): int_issnum = int(issnum) else: if 'au' in issnum.lower(): int_issnum = str(int(issnum[:-2])) + 'AU' elif u'\xbd' in issnum: issnum = .5 int_issnum = .5 elif '.' in issnum or ',' in issnum: if ',' in issnum: issnum = re.sub(',', '.', issnum)