def searchit(self): chk_id = None #logger.info('searchterm: %s' % self.searchterm) #self.searchterm is a tuple containing series name, issue number, volume and publisher. series_search = self.searchterm['series'] issue_search = self.searchterm['issue'] volume_search = self.searchterm['volume'] if series_search.startswith('0-Day Comics Pack'): #issue = '21' = WED, #volume='2' = 2nd month torrentid = 22247 #2018 publisher_search = None #'2' #2nd month comic_id = None elif all([ self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None' ]): torrentid = self.searchterm['torrentid_32p'] comic_id = self.searchterm['id'] publisher_search = self.searchterm['publisher'] else: torrentid = None comic_id = self.searchterm['id'] annualize = False if 'annual' in series_search.lower(): series_search = re.sub(' annual', '', series_search.lower()).strip() annualize = True publisher_search = self.searchterm['publisher'] spl = [x for x in self.publisher_list if x in publisher_search] for x in spl: publisher_search = re.sub(x, '', publisher_search).strip() #logger.info('publisher search set to : %s' % publisher_search) # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use. if comic_id: chk_id = helpers.checkthe_id(comic_id) if any([chk_id is None, mylar.CONFIG.DEEP_SEARCH_32P is True]): #generate the dynamic name of the series here so we can match it up as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(series_search) mod_series = re.sub('\|', '', as_dinfo['mod_seriesname']).strip() as_puinfo = as_d.dynamic_replace(publisher_search) pub_series = as_puinfo['mod_seriesname'] logger.fdebug('series_search: %s' % series_search) if '/' in series_search: series_search = series_search[:series_search.find('/')] if ':' in series_search: series_search = series_search[:series_search.find(':')] if ',' in series_search: series_search = series_search[:series_search.find(',')] logger.fdebug('config.search_32p: %s' % mylar.CONFIG.SEARCH_32P) if mylar.CONFIG.SEARCH_32P is False: url = 'https://walksoftly.itsaninja.party/serieslist.php' params = { 'series': re.sub('\|', '', mod_series.lower()).strip() } #series_search} logger.fdebug('search query: %s' % re.sub('\|', '', mod_series.lower()).strip()) try: t = requests.get( url, params=params, verify=True, headers={ 'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/') + 7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(') + 1] }) except requests.exceptions.RequestException as e: logger.warn(e) return "no results" if t.status_code == '619': logger.warn('[%s] Unable to retrieve data from site.' % t.status_code) return "no results" elif t.status_code == '999': logger.warn( '[%s] No series title was provided to the search query.' % t.status_code) return "no results" try: results = t.json() except: results = t.text if len(results) == 0: logger.warn('No results found for search on 32P.') return "no results" # with cfscrape.create_scraper(delay=15) as s: # s.headers = self.headers # cj = LWPCookieJar(os.path.join(mylar.CONFIG.SECURE_DIR, ".32p_cookies.dat")) # cj.load() # s.cookies = cj data = [] pdata = [] pubmatch = False if any([ series_search.startswith('0-Day Comics Pack'), torrentid is not None ]): data.append({"id": torrentid, "series": series_search}) else: if any([not chk_id, mylar.CONFIG.DEEP_SEARCH_32P is True]): if mylar.CONFIG.SEARCH_32P is True: url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F params = {'action': 'serieslist', 'filter': series_search} time.sleep( 1) #just to make sure we don't hammer, 1s pause. t = self.session.get(url, params=params, verify=True, allow_redirects=True) soup = BeautifulSoup(t.content, "html.parser") results = soup.find_all("a", {"class": "object-qtip"}, {"data-type": "torrentgroup"}) for r in results: if mylar.CONFIG.SEARCH_32P is True: torrentid = r['data-id'] torrentname = r.findNext(text=True) torrentname = torrentname.strip() else: torrentid = r['id'] torrentname = r['series'] as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(torrentname) seriesresult = re.sub('\|', '', as_dinfo['mod_seriesname']).strip() logger.fdebug('searchresult: %s --- %s [%s]' % (seriesresult, mod_series, publisher_search)) if seriesresult.lower() == mod_series.lower(): logger.fdebug('[MATCH] %s [%s]' % (torrentname, torrentid)) data.append({"id": torrentid, "series": torrentname}) elif publisher_search.lower() in seriesresult.lower(): logger.fdebug('[MATCH] Publisher match.') tmp_torrentname = re.sub(publisher_search.lower(), '', seriesresult.lower()).strip() as_t = filechecker.FileChecker() as_tinfo = as_t.dynamic_replace(tmp_torrentname) if re.sub('\|', '', as_tinfo['mod_seriesname']).strip( ) == mod_series.lower(): logger.fdebug('[MATCH] %s [%s]' % (torrentname, torrentid)) pdata.append({ "id": torrentid, "series": torrentname }) pubmatch = True logger.fdebug('%s series listed for searching that match.' % len(data)) else: logger.fdebug( 'Exact series ID already discovered previously. Setting to : %s [%s]' % (chk_id['series'], chk_id['id'])) pdata.append({"id": chk_id['id'], "series": chk_id['series']}) pubmatch = True if all([len(data) == 0, len(pdata) == 0]): return "no results" else: dataset = [] if len(data) > 0: dataset += data if len(pdata) > 0: dataset += pdata logger.fdebug( str(len(dataset)) + ' series match the tile being searched for on 32P...') if all([ chk_id is None, not series_search.startswith('0-Day Comics Pack'), self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None' ]) and any([len(data) == 1, len(pdata) == 1]): #update the 32p_reference so we avoid doing a url lookup next time helpers.checkthe_id(comic_id, dataset) else: if all([ not series_search.startswith('0-Day Comics Pack'), self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None' ]): pass else: logger.debug( 'Unable to properly verify reference on 32P - will update the 32P reference point once the issue has been successfully matched against.' ) results32p = [] resultlist = {} for x in dataset: #for 0-day packs, issue=week#, volume=month, id=0-day year pack (ie.issue=21&volume=2 for feb.21st) payload = { "action": "groupsearch", "id": x['id'], #searchid, "issue": issue_search } #in order to match up against 0-day stuff, volume has to be none at this point #when doing other searches tho, this should be allowed to go through #if all([volume_search != 'None', volume_search is not None]): # payload.update({'volume': re.sub('v', '', volume_search).strip()}) if series_search.startswith('0-Day Comics Pack'): payload.update({"volume": volume_search}) payload = json.dumps(payload) payload = json.loads(payload) logger.fdebug('payload: %s' % payload) url = 'https://32pag.es/ajax.php' time.sleep(1) #just to make sure we don't hammer, 1s pause. try: d = self.session.get(url, params=payload, verify=True, allow_redirects=True) except Exception as e: logger.error('%s [%s] Could not POST URL %s' % (self.module, e, url)) try: searchResults = d.json() except Exception as e: searchResults = d.text logger.debug( '[%s] %s Search Result did not return valid JSON, falling back on text: %s' % (e, self.module, searchResults.text)) return False if searchResults[ 'status'] == 'success' and searchResults['count'] > 0: logger.fdebug('successfully retrieved %s search results' % searchResults['count']) for a in searchResults['details']: if series_search.startswith('0-Day Comics Pack'): title = series_search else: title = self.searchterm['series'] + ' v' + a[ 'volume'] + ' #' + a['issues'] results32p.append({ 'link': a['id'], 'title': title, 'filesize': a['size'], 'issues': a['issues'], 'pack': a['pack'], 'format': a['format'], 'language': a['language'], 'seeders': a['seeders'], 'leechers': a['leechers'], 'scanner': a['scanner'], 'chkit': { 'id': x['id'], 'series': x['series'] }, 'pubdate': datetime.datetime.fromtimestamp(float( a['upload_time'])).strftime( '%a, %d %b %Y %H:%M:%S'), 'int_pubdate': float(a['upload_time']) }) else: logger.fdebug('32P did not return any valid search results.') if len(results32p) > 0: resultlist['entries'] = sorted(results32p, key=itemgetter('pack', 'title'), reverse=False) logger.debug('%s Resultslist: %s' % (self.module, resultlist)) else: resultlist = 'no results' return resultlist
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None, queue=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.CONFIG.COMIC_DIR if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir) return "Fail" logger.info('Scanning comic directory: %s' % dir) basedir = dir comic_list = [] failure_list = [] utter_failure_list = [] comiccnt = 0 extensions = ('cbr','cbz') cv_location = [] cbz_retry = 0 mylar.IMPORT_STATUS = 'Now attempting to parse files for additional information' myDB = db.DBConnection() #mylar.IMPORT_PARSED_COUNT #used to count what #/totalfiles the filename parser is currently on for r, d, f in os.walk(dir): for files in f: mylar.IMPORT_FILES +=1 if any(files.lower().endswith('.' + x.lower()) for x in extensions): comicpath = os.path.join(r, files) if mylar.CONFIG.IMP_PATHS is True: if myDB.select('SELECT * FROM comics JOIN issues WHERE issues.Status="Downloaded" AND ComicLocation=? AND issues.Location=?', [r, files]): logger.info('Skipped known issue path: %s' % comicpath) continue comic = files if not os.path.exists(comicpath): logger.fdebug(f'''Comic: {comic} doesn't actually exist - assuming it is a symlink to a nonexistant path.''') continue comicsize = os.path.getsize(comicpath) logger.fdebug('Comic: ' + comic + ' [' + comicpath + '] - ' + str(comicsize) + ' bytes') try: t = filechecker.FileChecker(dir=r, file=comic) results = t.listFiles() #logger.info(results) #'type': re.sub('\.','', filetype).strip(), #'sub': path_list, #'volume': volume, #'match_type': match_type, #'comicfilename': filename, #'comiclocation': clocation, #'series_name': series_name, #'series_volume': issue_volume, #'series_year': issue_year, #'justthedigits': issue_number, #'annualcomicid': annual_comicid, #'scangroup': scangroup} if results: resultline = '[PARSE-' + results['parse_status'].upper() + ']' resultline += '[SERIES: ' + results['series_name'] + ']' if results['series_volume'] is not None: resultline += '[VOLUME: ' + results['series_volume'] + ']' if results['issue_year'] is not None: resultline += '[ISSUE YEAR: ' + str(results['issue_year']) + ']' if results['issue_number'] is not None: resultline += '[ISSUE #: ' + results['issue_number'] + ']' logger.fdebug(resultline) else: logger.fdebug('[PARSED] FAILURE.') continue # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath if results['parse_status'] == 'success': comic_list.append({'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': {'series_name': results['series_name'], 'series_volume': results['series_volume'], 'issue_year': results['issue_year'], 'issue_number': results['issue_number']} }) comiccnt +=1 mylar.IMPORT_PARSED_COUNT +=1 else: failure_list.append({'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': {'series_name': results['series_name'], 'series_volume': results['series_volume'], 'issue_year': results['issue_year'], 'issue_number': results['issue_number']} }) mylar.IMPORT_FAILURE_COUNT +=1 if comic.endswith('.cbz'): cbz_retry +=1 except Exception as e: logger.info('bang') utter_failure_list.append({'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': None, 'error': e }) logger.info('[' + str(e) + '] FAILURE encountered. Logging the error for ' + comic + ' and continuing...') mylar.IMPORT_FAILURE_COUNT +=1 if comic.endswith('.cbz'): cbz_retry +=1 continue if 'cvinfo' in files: cv_location.append(r) logger.fdebug('CVINFO found: ' + os.path.join(r)) mylar.IMPORT_TOTALFILES = comiccnt logger.info('I have successfully discovered & parsed a total of ' + str(comiccnt) + ' files....analyzing now') logger.info('I have not been able to determine what ' + str(len(failure_list)) + ' files are') logger.info('However, ' + str(cbz_retry) + ' out of the ' + str(len(failure_list)) + ' files are in a cbz format, which may contain metadata.') logger.info('[ERRORS] I have encountered ' + str(len(utter_failure_list)) + ' file-scanning errors during the scan, but have recorded the necessary information.') mylar.IMPORT_STATUS = 'Successfully parsed ' + str(comiccnt) + ' files' #return queue.put(valreturn) if len(utter_failure_list) > 0: logger.fdebug('Failure list: %s' % utter_failure_list) #let's load in the watchlist to see if we have any matches. logger.info("loading in the watchlist to see if a series is being watched already...") watchlist = myDB.select("SELECT * from comics") ComicName = [] DisplayName = [] ComicYear = [] ComicPublisher = [] ComicTotal = [] ComicID = [] ComicLocation = [] AltName = [] watchcnt = 0 watch_kchoice = [] watchchoice = {} import_by_comicids = [] import_comicids = {} for watch in watchlist: #use the comicname_filesafe to start watchdisplaycomic = watch['ComicName'] # let's clean up the name, just in case for comparison purposes... watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['ComicName_Filesafe']) #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip() if ' the ' in watchcomic.lower(): #drop the 'the' from the watchcomic title for proper comparisons. watchcomic = watchcomic[-4:] alt_chk = "no" # alt-checker flag (default to no) # account for alternate names as well if watch['AlternateSearch'] is not None and watch['AlternateSearch'] != 'None': altcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['AlternateSearch']) #altcomic = re.sub('\s+', ' ', str(altcomic)).strip() AltName.append(altcomic) alt_chk = "yes" # alt-checker flag ComicName.append(watchcomic) DisplayName.append(watchdisplaycomic) ComicYear.append(watch['ComicYear']) ComicPublisher.append(watch['ComicPublisher']) ComicTotal.append(watch['Total']) ComicID.append(watch['ComicID']) ComicLocation.append(watch['ComicLocation']) watchcnt+=1 logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.") ripperlist=['digital-', 'empire', 'dcp'] watchfound = 0 datelist = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] # datemonth = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':$ # #search for number as text, and change to numeric # for numbs in basnumbs: # #logger.fdebug("numbs:" + str(numbs)) # if numbs in ComicName.lower(): # numconv = basnumbs[numbs] # #logger.fdebug("numconv: " + str(numconv)) issueid_list = [] cvscanned_loc = None cvinfo_CID = None cnt = 0 mylar.IMPORT_STATUS = '[0%] Now parsing individual filenames for metadata if available' for i in comic_list: mylar.IMPORT_STATUS = '[' + str(cnt) + '/' + str(comiccnt) + '] Now parsing individual filenames for metadata if available' logger.fdebug('Analyzing : ' + i['ComicFilename']) comfilename = i['ComicFilename'] comlocation = i['ComicLocation'] issueinfo = None #probably need to zero these issue-related metadata to None so we can pick the best option issuevolume = None #Make sure cvinfo is checked for FIRST (so that CID can be attached to all files properly thereafter as they're scanned in) if os.path.dirname(comlocation) in cv_location and os.path.dirname(comlocation) != cvscanned_loc: #if comfilename == 'cvinfo': logger.info('comfilename: ' + comfilename) logger.info('cvscanned_loc: ' + str(cv_location)) logger.info('comlocation: ' + os.path.dirname(comlocation)) #if cvscanned_loc != comlocation: try: with open(os.path.join(os.path.dirname(comlocation), 'cvinfo')) as f: urllink = f.readline() if urllink: cid = urllink.strip() pattern = re.compile(r"^.*?\b(49|4050)-(?P<num>\d{2,})\b.*$", re.I) match = pattern.match(cid) if match: cvinfo_CID = match.group("num") logger.info('CVINFO file located within directory. Attaching everything in directory that is valid to ComicID: ' + str(cvinfo_CID)) #store the location of the cvinfo so it's applied to the correct directory (since we're scanning multile direcorties usually) cvscanned_loc = os.path.dirname(comlocation) else: logger.error("Could not read cvinfo file properly (or it does not contain any data)") except (OSError, IOError): logger.error("Could not read cvinfo file properly (or it does not contain any data)") #else: # don't scan in it again if it's already been done initially # continue if mylar.CONFIG.IMP_METADATA: #if read tags is enabled during import, check here. if i['ComicLocation'].endswith('.cbz'): logger.fdebug('[IMPORT-CBZ] Metatagging checking enabled.') logger.info('[IMPORT-CBZ} Attempting to read tags present in filename: ' + i['ComicLocation']) try: issueinfo = helpers.IssueDetails(i['ComicLocation'], justinfo=True) except: logger.fdebug('[IMPORT-CBZ] Unable to retrieve metadata - possibly doesn\'t exist. Ignoring meta-retrieval') pass else: logger.info('issueinfo: ' + str(issueinfo)) if issueinfo is None or issueinfo['metadata'] is None: logger.fdebug('[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.') pass else: issuenotes_id = None logger.info('[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.') comicname = issueinfo['metadata']['series'] if comicname is not None: logger.fdebug('[IMPORT-CBZ] Series Name: ' + comicname) as_d = filechecker.FileChecker() as_dyninfo = as_d.dynamic_replace(comicname) logger.fdebug('Dynamic-ComicName: ' + as_dyninfo['mod_seriesname']) else: logger.fdebug('[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.') issueinfo = None issue_number = None if issueinfo is not None: try: issueyear = issueinfo['metadata']['year'] except: issueyear = None #if the issue number is a non-numeric unicode string, this will screw up along with impID issue_number = issueinfo['metadata']['issue_number'] if issue_number is not None: logger.fdebug('[IMPORT-CBZ] Issue Number: ' + issue_number) else: issue_number = i['parsed']['issue_number'] if 'annual' in comicname.lower() or 'annual' in comfilename.lower(): if issue_number is None or issue_number == 'None': logger.info('Annual detected with no issue number present within metadata. Assuming year as issue.') try: issue_number = 'Annual ' + str(issueyear) except: issue_number = 'Annual ' + i['parsed']['issue_year'] else: logger.info('Annual detected with issue number present within metadata.') if 'annual' not in issue_number.lower(): issue_number = 'Annual ' + issue_number mod_series = re.sub('annual', '', comicname, flags=re.I).strip() else: mod_series = comicname logger.fdebug('issue number SHOULD Be: ' + issue_number) try: issuetitle = issueinfo['metadata']['title'] except: issuetitle = None try: issueyear = issueinfo['metadata']['year'] except: issueyear = None try: issuevolume = str(issueinfo['metadata']['volume']) if all([issuevolume is not None, issuevolume != 'None', not issuevolume.lower().startswith('v')]): issuevolume = 'v' + str(issuevolume) if any([issuevolume is None, issuevolume == 'None']): logger.info('EXCEPT] issue volume is NONE') issuevolume = None else: logger.fdebug('[TRY]issue volume is: ' + str(issuevolume)) except: logger.fdebug('[EXCEPT]issue volume is: ' + str(issuevolume)) issuevolume = None if any([comicname is None, comicname == 'None', issue_number is None, issue_number == 'None']): logger.fdebug('[IMPORT-CBZ] Improperly tagged file as the metatagging is invalid. Ignoring meta and just parsing the filename.') issueinfo = None pass else: # if used by ComicTagger, Notes field will have the IssueID. issuenotes = issueinfo['metadata']['notes'] logger.fdebug('[IMPORT-CBZ] Notes: ' + issuenotes) if issuenotes is not None and issuenotes != 'None': if 'Issue ID' in issuenotes: st_find = issuenotes.find('Issue ID') tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip() if tmp_issuenotes_id.isdigit(): issuenotes_id = tmp_issuenotes_id logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']') elif 'CVDB' in issuenotes: st_find = issuenotes.find('CVDB') tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip() if tmp_issuenotes_id.isdigit(): issuenotes_id = tmp_issuenotes_id logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']') else: logger.fdebug('[IMPORT-CBZ] Unable to retrieve IssueID from meta-tagging. If there is other metadata present I will use that.') logger.fdebug('[IMPORT-CBZ] Adding ' + comicname + ' to the import-queue!') #impid = comicname + '-' + str(issueyear) + '-' + str(issue_number) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss) impid = str(random.randint(1000000,99999999)) logger.fdebug('[IMPORT-CBZ] impid: ' + str(impid)) #make sure we only add in those issueid's which don't already have a comicid attached via the cvinfo scan above (this is for reverse-lookup of issueids) issuepopulated = False if cvinfo_CID is None: if issuenotes_id is None: logger.info('[IMPORT-CBZ] No ComicID detected where it should be. Bypassing this metadata entry and going the parsing route [' + comfilename + ']') else: #we need to store the impid here as well so we can look it up. issueid_list.append({'issueid': issuenotes_id, 'importinfo': {'impid': impid, 'comicid': None, 'comicname': comicname, 'dynamicname': as_dyninfo['mod_seriesname'], 'comicyear': issueyear, 'issuenumber': issue_number, 'volume': issuevolume, 'comfilename': comfilename, 'comlocation': comlocation} }) mylar.IMPORT_CID_COUNT +=1 issuepopulated = True if issuepopulated == False: if cvscanned_loc == os.path.dirname(comlocation): cv_cid = cvinfo_CID logger.fdebug('[IMPORT-CBZ] CVINFO_COMICID attached : ' + str(cv_cid)) else: cv_cid = None import_by_comicids.append({ "impid": impid, "comicid": cv_cid, "watchmatch": None, "displayname": mod_series, "comicname": comicname, "dynamicname": as_dyninfo['mod_seriesname'], "comicyear": issueyear, "issuenumber": issue_number, "volume": issuevolume, "issueid": issuenotes_id, "comfilename": comfilename, "comlocation": comlocation }) mylar.IMPORT_CID_COUNT +=1 else: pass #logger.fdebug(i['ComicFilename'] + ' is not in a metatagged format (cbz). Bypassing reading of the metatags') if issueinfo is None: if i['parsedinfo']['issue_number'] is None: if 'annual' in i['parsedinfo']['series_name'].lower(): logger.fdebug('Annual detected with no issue number present. Assuming year as issue.')##1 issue') if i['parsedinfo']['issue_year'] is not None: issuenumber = 'Annual ' + str(i['parsedinfo']['issue_year']) else: issuenumber = 'Annual 1' else: issuenumber = i['parsedinfo']['issue_number'] if 'annual' in i['parsedinfo']['series_name'].lower(): mod_series = re.sub('annual', '', i['parsedinfo']['series_name'], flags=re.I).strip() logger.fdebug('Annual detected with no issue number present. Assuming year as issue.')##1 issue') if i['parsedinfo']['issue_number'] is not None: issuenumber = 'Annual ' + str(i['parsedinfo']['issue_number']) else: if i['parsedinfo']['issue_year'] is not None: issuenumber = 'Annual ' + str(i['parsedinfo']['issue_year']) else: issuenumber = 'Annual 1' else: mod_series = i['parsedinfo']['series_name'] issuenumber = i['parsedinfo']['issue_number'] logger.fdebug('[' + mod_series + '] Adding to the import-queue!') isd = filechecker.FileChecker() is_dyninfo = isd.dynamic_replace(mod_series) #helpers.conversion(mod_series)) logger.fdebug('Dynamic-ComicName: ' + is_dyninfo['mod_seriesname']) #impid = dispname + '-' + str(result_comyear) + '-' + str(comiss) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss) impid = str(random.randint(1000000,99999999)) logger.fdebug("impid: " + str(impid)) if cvscanned_loc == os.path.dirname(comlocation): cv_cid = cvinfo_CID logger.fdebug('CVINFO_COMICID attached : ' + str(cv_cid)) else: cv_cid = None if issuevolume is None: logger.fdebug('issue volume is : ' + str(issuevolume)) if i['parsedinfo']['series_volume'] is None: issuevolume = None else: if str(i['parsedinfo']['series_volume'].lower()).startswith('v'): issuevolume = i['parsedinfo']['series_volume'] else: issuevolume = 'v' + str(i['parsedinfo']['series_volume']) else: logger.fdebug('issue volume not none : ' + str(issuevolume)) if issuevolume.lower().startswith('v'): issuevolume = issuevolume else: issuevolume = 'v' + str(issuevolume) logger.fdebug('IssueVolume is : ' + str(issuevolume)) import_by_comicids.append({ "impid": impid, "comicid": cv_cid, "issueid": None, "watchmatch": None, #watchmatch (should be true/false if it already exists on watchlist) "displayname": mod_series, "comicname": i['parsedinfo']['series_name'], "dynamicname": is_dyninfo['mod_seriesname'].lower(), "comicyear": i['parsedinfo']['issue_year'], "issuenumber": issuenumber, #issuenumber, "volume": issuevolume, "comfilename": comfilename, "comlocation": comlocation #helpers.conversion(comlocation) }) cnt+=1 #logger.fdebug('import_by_ids: ' + str(import_by_comicids)) #reverse lookup all of the gathered IssueID's in order to get the related ComicID reverse_issueids = [] for x in issueid_list: reverse_issueids.append(x['issueid']) vals = [] if len(reverse_issueids) > 0: mylar.IMPORT_STATUS = 'Now Reverse looking up ' + str(len(reverse_issueids)) + ' IssueIDs to get the ComicIDs' vals = mylar.cv.getComic(None, 'import', comicidlist=reverse_issueids) #logger.fdebug('vals returned:' + str(vals)) if len(watch_kchoice) > 0: watchchoice['watchlist'] = watch_kchoice #logger.fdebug("watchchoice: " + str(watchchoice)) logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.") wat = 0 comicids = [] if watchfound > 0: if mylar.CONFIG.IMP_MOVE: logger.info('You checked off Move Files...so that\'s what I am going to do') #check to see if Move Files is enabled. #if not being moved, set the archive bit. logger.fdebug('Moving files into appropriate directory') while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comlocation = watch_the_list['ComicLocation'] watch_comicid = watch_the_list['ComicID'] watch_comicname = watch_the_list['ComicName'] watch_comicyear = watch_the_list['ComicYear'] watch_comiciss = watch_the_list['ComicIssue'] logger.fdebug('ComicLocation: ' + watch_comlocation) orig_comlocation = watch_the_list['OriginalLocation'] orig_filename = watch_the_list['OriginalFilename'] logger.fdebug('Orig. Location: ' + orig_comlocation) logger.fdebug('Orig. Filename: ' + orig_filename) #before moving check to see if Rename to Mylar structure is enabled. if mylar.CONFIG.IMP_RENAME: logger.fdebug('Renaming files according to configuration details : ' + str(mylar.CONFIG.FILE_FORMAT)) renameit = helpers.rename_param(watch_comicid, watch_comicname, watch_comicyear, watch_comiciss) nfilename = renameit['nfilename'] dst_path = os.path.join(watch_comlocation, nfilename) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) else: logger.fdebug('Renaming files not enabled, keeping original filename(s)') dst_path = os.path.join(watch_comlocation, orig_filename) #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext))) #src = os.path.join(, str(nfilename + ext)) logger.fdebug('I am going to move ' + orig_comlocation + ' to ' + dst_path) try: shutil.move(orig_comlocation, dst_path) except (OSError, IOError): logger.info("Failed to move directory - check directories and manually re-run.") wat+=1 else: # if move files isn't enabled, let's set all found comics to Archive status :) while (wat < watchfound): watch_the_list = watchchoice['watchlist'][wat] watch_comicid = watch_the_list['ComicID'] watch_issue = watch_the_list['ComicIssue'] logger.fdebug('ComicID: ' + str(watch_comicid)) logger.fdebug('Issue#: ' + str(watch_issue)) issuechk = myDB.selectone("SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone() if issuechk is None: logger.fdebug('No matching issues for this comic#') else: logger.fdebug('...Existing status: ' + str(issuechk['Status'])) control = {"IssueID": issuechk['IssueID']} values = {"Status": "Archived"} logger.fdebug('...changing status of ' + str(issuechk['Issue_Number']) + ' to Archived ') myDB.upsert("issues", values, control) if str(watch_comicid) not in comicids: comicids.append(watch_comicid) wat+=1 if comicids is None: pass else: c_upd = len(comicids) c = 0 while (c < c_upd ): logger.fdebug('Rescanning.. ' + str(c)) updater.forceRescan(c) if not len(import_by_comicids): return "Completed" if len(import_by_comicids) > 0 or len(vals) > 0: #import_comicids['comic_info'] = import_by_comicids #if vals: # import_comicids['issueid_info'] = vals #else: # import_comicids['issueid_info'] = None if vals: cvimport_comicids = vals import_cv_ids = len(vals) else: cvimport_comicids = None import_cv_ids = 0 else: import_cv_ids = 0 cvimport_comicids = None return {'import_by_comicids': import_by_comicids, 'import_count': len(import_by_comicids), 'CV_import_comicids': cvimport_comicids, 'import_cv_ids': import_cv_ids, 'issueid_list': issueid_list, 'failure_list': failure_list, 'utter_failure_list': utter_failure_list}
def update_db(self): # mylar.MAINTENANCE_UPDATE will indicate what's being updated in the db if mylar.MAINTENANCE_UPDATE: self.db_version_check(display=False) # backup mylar.db here self.backup_files(dbs=True) for dmode in mylar.MAINTENANCE_UPDATE: if dmode['mode'] == 'rss update': logger.info( '[MAINTENANCE-MODE][DB-CONVERSION] Updating dB due to RSS table conversion' ) if dmode['resume'] > 0: logger.info( '[MAINTENANCE-MODE][DB-CONVERSION][DB-RECOVERY] Attempting to resume conversion from previous run (starting at record: %s)' % dmode['resume']) #force set logging to warning level only so the progress indicator can be displayed in console prev_log_level = mylar.LOG_LEVEL self.toggle_logging(level=0) if dmode['mode'] == 'rss update': self.sql_attachmylar() row_cnt = self.dbmylar.execute( "SELECT COUNT(rowid) as count FROM rssdb") rowcnt = row_cnt.fetchone()[0] mylar.MAINTENANCE_DB_TOTAL = rowcnt if dmode['resume'] > 0: xt = self.dbmylar.execute( "SELECT rowid, Title FROM rssdb WHERE rowid >= ? ORDER BY rowid ASC", [dmode['resume']]) else: xt = self.dbmylar.execute( "SELECT rowid, Title FROM rssdb ORDER BY rowid ASC" ) xlist = xt.fetchall() mylar.MAINTENANCE_DB_COUNT = 0 if xlist is None: print('Nothing in the rssdb to update. Ignoring.') return True try: if dmode['resume'] > 0 and xlist is not None: logger.info('resume set at : %s' % (xlist[dmode['resume']], )) #xlist[dmode['resume']:] mylar.MAINTENANCE_DB_COUNT = dmode['resume'] except Exception as e: print( '[ERROR:%s] - table resume location is not accureate. Starting from start, but this should go quick..' % e) xt = self.dbmylar.execute( "SELECT rowid, Title FROM rssdb ORDER BY rowid ASC" ) xlist = xt.fetchall() dmode['resume'] = 0 if xlist: resultlist = [] delete_rows = [] for x in self.progressBar(xlist, prefix='Progress', suffix='Complete', length=50, resume=dmode['resume']): #signal capture here since we can't do it as per normal if any([ mylar.SIGNAL == 'shutdown', mylar.SIGNAL == 'restart' ]): try: self.dbmylar.executemany( "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?", (resultlist)) self.sql_closemylar() except Exception as e: print('error: %s' % e) else: send_it = { 'mode': dmode['mode'], 'version': self.db_version, 'status': 'incomplete', 'total': mylar.MAINTENANCE_DB_TOTAL, 'current': mylar.MAINTENANCE_DB_COUNT, 'last_run': helpers.utctimestamp() } self.db_update_status(send_it) #toggle back the logging level to what it was originally. self.toggle_logging(level=prev_log_level) if mylar.SIGNAL == 'shutdown': logger.info( '[MAINTENANCE-MODE][DB-CONVERSION][SHUTDOWN]Shutting Down...' ) return False else: logger.info( '[MAINTENANCE-MODE][DB-CONVERSION][RESTART]Restarting...' ) return True mylar.MAINTENANCE_DB_COUNT += 1 if not x[1]: logger.fdebug( '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s' % x[1]) delete_rows.append((x[0], )) continue try: if any( ext in x[1] for ext in ['yenc', '.pdf', '.rar', '.mp4', '.avi']): logger.fdebug( '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s' % x[1]) delete_rows.append((x[0], )) continue else: flc = filechecker.FileChecker(file=x[1]) filelist = flc.listFiles() except Exception as e: logger.fdebug( '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s' % x[1]) delete_rows.append((x[0], )) continue else: if all([ filelist['series_name'] != '', filelist['series_name'] is not None ]) and filelist['issue_number'] != '-': issuenumber = filelist['issue_number'] seriesname = re.sub( r'[\u2014|\u2013|\u2e3a|\u2e3b]', '-', filelist['series_name']).strip() if seriesname.endswith( '-') and '#' in seriesname[-6:]: ck1 = seriesname.rfind('#') ck2 = seriesname.rfind('-') if seriesname[ck1 + 1:ck2 - 1].strip().isdigit(): issuenumber = '%s %s' % ( seriesname[ck1:].strip(), issuenumber) seriesname = seriesname[:ck1 - 1].strip() issuenumber.strip() resultlist.append( (issuenumber, seriesname.strip(), x[0])) if len(resultlist) > 500: # write it out every 5000 records. try: logger.fdebug('resultlist: %s' % (resultlist, )) self.dbmylar.executemany( "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?", (resultlist)) self.sql_closemylar() # update the update_db so if it has to resume it doesn't from the beginning or wrong point ( last 5000th write ). send_it = { 'mode': dmode['mode'], 'version': self.db_version, 'status': 'incomplete', 'total': mylar.MAINTENANCE_DB_TOTAL, 'current': mylar.MAINTENANCE_DB_COUNT, 'last_run': helpers.utctimestamp() } self.db_update_status(send_it) except Exception as e: print('error: %s' % e) return False else: logger.fdebug('reattaching') self.sql_attachmylar() resultlist = [] try: if len(resultlist) > 0: self.dbmylar.executemany( "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?", (resultlist)) self.sql_closemylar() except Exception as e: print('error: %s' % e) return False else: try: send_it = { 'mode': dmode['mode'], 'version': 1, 'status': 'complete', 'total': mylar.MAINTENANCE_DB_TOTAL, 'current': mylar.MAINTENANCE_DB_COUNT, 'last_run': helpers.utctimestamp() } except Exception as e: print('error_sendit: %s' % e) else: self.db_update_status(send_it) if delete_rows: # only do this on completion, or else the rowids will be different and it will mess up a rerun try: self.sql_attachmylar() print( '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Removing %s invalid RSS entries from table...' % len(delete_rows)) self.dbmylar.executemany( "DELETE FROM rssdb WHERE rowid=?", (delete_rows)) self.sql_closemylar() except Exception as e: print('error: %s' % e) else: self.sql_attachmylar() print( '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Cleaning up...' ) self.dbmylar.execute("VACUUM") else: print( '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Cleaning up...' ) self.sql_attachmylar() self.dbmylar.execute("VACUUM") self.sql_closemylar() #toggle back the logging level to what it was originally. self.toggle_logging(level=prev_log_level) logger.info( '[MAINTENANCE-MODE][DB-CONVERSION] Updating dB complete! (%s / %s)' % (mylar.MAINTENANCE_DB_COUNT, mylar.MAINTENANCE_DB_TOTAL)) mylar.MAINTENANCE_UPDATE[:] = [ x for x in mylar.MAINTENANCE_UPDATE if not ('rss update' == x.get('mode')) ] else: mylar.MAINTENANCE_DB_COUNT = 0 logger.info( '[MAINTENANCE-MODE] Update DB set to start - but nothing was provided as to what. Returning to non-maintenance mode' ) return True
def walk_the_walk(self): folder_location = mylar.CONFIG.FOLDER_CACHE_LOCATION if folder_location is None: return {'status': False} logger.info('checking locally...') filelist = None logger.info('check_folder_cache: %s' % (mylar.CHECK_FOLDER_CACHE)) if mylar.CHECK_FOLDER_CACHE is not None: rd = mylar.CHECK_FOLDER_CACHE #datetime.datetime.utcfromtimestamp(mylar.CHECK_FOLDER_CACHE) rd_mins = rd + datetime.timedelta(seconds = 600) #10 minute cache retention rd_now = datetime.datetime.utcfromtimestamp(time.time()) if calendar.timegm(rd_mins.utctimetuple()) > calendar.timegm(rd_now.utctimetuple()): # if < 10 minutes since last check, use cached listing logger.info('using cached folder listing since < 10 minutes since last file check.') filelist = mylar.FOLDER_CACHE if filelist is None: logger.info('generating new directory listing for folder_cache') flc = filechecker.FileChecker(folder_location, justparse=True, pp_mode=True) mylar.FOLDER_CACHE = flc.listFiles() mylar.CHECK_FOLDER_CACHE = datetime.datetime.utcfromtimestamp(helpers.utctimestamp()) local_status = False filepath = None filename = None for fl in mylar.FOLDER_CACHE['comiclist']: logger.info('fl: %s' % (fl,)) if self.arc is not None: comicname = self.arc['ComicName'] corrected_type = None alternatesearch = None booktype = self.arc['Type'] publisher = self.arc['Publisher'] issuenumber = self.arc['IssueNumber'] issuedate = self.arc['IssueDate'] issuename = self.arc['IssueName'] issuestatus = self.arc['Status'] elif self.comic is not None: comicname = self.comic['ComicName'] booktype = self.comic['Type'] corrected_type = self.comic['Corrected_Type'] alternatesearch = self.comic['AlternateSearch'] publisher = self.comic['ComicPublisher'] issuenumber = self.issue['Issue_Number'] issuedate = self.issue['IssueDate'] issuename = self.issue['IssueName'] issuestatus = self.issue['Status'] else: # weekly - (one/off) comicname = self.weekly['COMIC'] booktype = self.weekly['format'] corrected_type = None alternatesearch = None publisher = self.weekly['PUBLISHER'] issuenumber = self.weekly['ISSUE'] issuedate = self.weekly['SHIPDATE'] issuename = None issuestatus = self.weekly['STATUS'] if booktype is not None: if (all([booktype != 'Print', booktype != 'Digital', booktype != 'None', booktype is not None]) and corrected_type != 'Print') or any([corrected_type == 'TPB', corrected_type == 'GN', corrected_type == 'HC']): if booktype == 'One-Shot' and corrected_type is None: booktype = 'One-Shot' else: if booktype == 'GN' and corrected_type is None: booktype = 'GN' elif booktype == 'HC' and corrected_type is None: booktype = 'HC' else: booktype = 'TPB' wm = filechecker.FileChecker(watchcomic=comicname, Publisher=publisher, AlternateSearch=alternatesearch) watchmatch = wm.matchIT(fl) logger.info('watchmatch: %s' % (watchmatch,)) # this is all for a really general type of match - if passed, the post-processing checks will do the real brunt work if watchmatch['process_status'] == 'fail': continue if watchmatch['justthedigits'] is not None: temploc= watchmatch['justthedigits'].replace('_', ' ') if "Director's Cut" not in temploc: temploc = re.sub('[\#\']', '', temploc) else: if any([booktype == 'TPB', booktype =='GN', booktype == 'HC', booktype == 'One-Shot']): temploc = '1' else: temploc = None continue int_iss = helpers.issuedigits(issuenumber) issyear = issuedate[:4] old_status = issuestatus issname = issuename if temploc is not None: fcdigit = helpers.issuedigits(temploc) elif any([booktype == 'TPB', booktype == 'GN', booktype == 'GC', booktype == 'One-Shot']) and temploc is None: fcdigit = helpers.issuedigits('1') if int(fcdigit) == int_iss: logger.fdebug('[%s] Issue match - #%s' % (self.issueid, self.issue['Issue_Number'])) local_status = True if watchmatch['sub'] is None: filepath = watchmatch['comiclocation'] filename = watchmatch['comicfilename'] else: filepath = os.path.join(watchmatch['comiclocation'], watchmatch['sub']) filename = watchmatch['comicfilename'] break #if local_status is True: #try: # copied_folder = os.path.join(mylar.CONFIG.CACHE_DIR, 'tmp_filer') # if os.path.exists(copied_folder): # shutil.rmtree(copied_folder) # os.mkdir(copied_folder) # logger.info('created temp directory: %s' % copied_folder) # shutil.copy(os.path.join(filepath, filename), copied_folder) #except Exception as e: # logger.error('[%s] error: %s' % (e, filepath)) # filepath = None # local_status = False #else: #filepath = os.path.join(copied_folder, filename) #logger.info('Successfully copied file : %s' % filepath) return {'status': local_status, 'filename': filename, 'filepath': filepath}
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None, queue=None): if cron and not mylar.LIBRARYSCAN: return if not dir: dir = mylar.CONFIG.COMIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(mylar.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace')) return "Fail" logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace')) basedir = dir comic_list = [] failure_list = [] utter_failure_list = [] comiccnt = 0 extensions = ('cbr', 'cbz') cv_location = [] cbz_retry = 0 mylar.IMPORT_STATUS = 'Now attempting to parse files for additional information' myDB = db.DBConnection() #mylar.IMPORT_PARSED_COUNT #used to count what #/totalfiles the filename parser is currently on for r, d, f in os.walk(dir): for files in f: mylar.IMPORT_FILES += 1 if any(files.lower().endswith('.' + x.lower()) for x in extensions): comicpath = os.path.join(r, files) if myDB.select( 'SELECT * FROM comics JOIN issues WHERE issues.Status="Downloaded" AND ComicLocation=? AND issues.Location=?', [ r.decode(mylar.SYS_ENCODING), files.decode(mylar.SYS_ENCODING) ]): logger.info('Skipped known issue path: %r', comicpath) continue comic = files comicsize = os.path.getsize(comicpath) logger.fdebug('Comic: ' + comic + ' [' + comicpath + '] - ' + str(comicsize) + ' bytes') try: t = filechecker.FileChecker(dir=r, file=comic) results = t.listFiles() #logger.info(results) #'type': re.sub('\.','', filetype).strip(), #'sub': path_list, #'volume': volume, #'match_type': match_type, #'comicfilename': filename, #'comiclocation': clocation, #'series_name': series_name, #'series_volume': issue_volume, #'series_year': issue_year, #'justthedigits': issue_number, #'annualcomicid': annual_comicid, #'scangroup': scangroup} if results: resultline = '[PARSE-' + results['parse_status'].upper( ) + ']' resultline += '[SERIES: ' + results['series_name'] + ']' if results['series_volume'] is not None: resultline += '[VOLUME: ' + results[ 'series_volume'] + ']' if results['issue_year'] is not None: resultline += '[ISSUE YEAR: ' + str( results['issue_year']) + ']' if results['issue_number'] is not None: resultline += '[ISSUE #: ' + results[ 'issue_number'] + ']' logger.fdebug(resultline) else: logger.fdebug('[PARSED] FAILURE.') continue # We need the unicode path to use for logging, inserting into database unicode_comic_path = comicpath.decode( mylar.SYS_ENCODING, 'replace') if results['parse_status'] == 'success': comic_list.append({ 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': { 'series_name': results['series_name'], 'series_volume': results['series_volume'], 'issue_year': results['issue_year'], 'issue_number': results['issue_number'] } }) comiccnt += 1 mylar.IMPORT_PARSED_COUNT += 1 else: failure_list.append({ 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': { 'series_name': results['series_name'], 'series_volume': results['series_volume'], 'issue_year': results['issue_year'], 'issue_number': results['issue_number'] } }) mylar.IMPORT_FAILURE_COUNT += 1 if comic.endswith('.cbz'): cbz_retry += 1 except Exception, e: logger.info('bang') utter_failure_list.append({ 'ComicFilename': comic, 'ComicLocation': comicpath, 'ComicSize': comicsize, 'Unicode_ComicLocation': unicode_comic_path, 'parsedinfo': None, 'error': e }) logger.info( '[' + str(e) + '] FAILURE encountered. Logging the error for ' + comic + ' and continuing...') mylar.IMPORT_FAILURE_COUNT += 1 if comic.endswith('.cbz'): cbz_retry += 1 continue if 'cvinfo' in files: cv_location.append(r) logger.fdebug('CVINFO found: ' + os.path.join(r))
if issueinfo is None: logger.fdebug( '[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.' ) pass else: issuenotes_id = None logger.info( '[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.' ) comicname = issueinfo[0]['series'] if comicname is not None: logger.fdebug('[IMPORT-CBZ] Series Name: ' + comicname) as_d = filechecker.FileChecker() as_dyninfo = as_d.dynamic_replace(comicname) logger.fdebug('Dynamic-ComicName: ' + as_dyninfo['mod_seriesname']) else: logger.fdebug( '[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.' ) issueinfo = None issue_number = None if issueinfo is not None: try: issueyear = issueinfo[0]['year'] except: issueyear = None
def searchit(self): #self.searchterm is a tuple containing series name, issue number, volume and publisher. series_search = self.searchterm['series'] comic_id = self.searchterm['id'] if comic_id: chk_id = helpers.checkthe_id(comic_id) annualize = False if 'Annual' in series_search: series_search = re.sub(' Annual', '', series_search).strip() annualize = True issue_search = self.searchterm['issue'] volume_search = self.searchterm['volume'] publisher_search = self.searchterm['publisher'] spl = [x for x in self.publisher_list if x in publisher_search] for x in spl: publisher_search = re.sub(x, '', publisher_search).strip() logger.info('publisher search set to : ' + publisher_search) chk_id = None # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use. if comic_id: chk_id = helpers.checkthe_id(comic_id) if not chk_id: #generate the dynamic name of the series here so we can match it up as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(series_search) mod_series = re.sub('\|','', as_dinfo['mod_seriesname']).strip() as_puinfo = as_d.dynamic_replace(publisher_search) pub_series = as_puinfo['mod_seriesname'] logger.info('series_search: ' + series_search) if '/' in series_search: series_search = series_search[:series_search.find('/')] if ':' in series_search: series_search = series_search[:series_search.find(':')] if ',' in series_search: series_search = series_search[:series_search.find(',')] if not mylar.SEARCH_32P: url = 'https://walksoftly.itsaninja.party/serieslist.php' params = {'series': re.sub('\|','', mod_series.lower()).strip()} #series_search} try: t = requests.get(url, params=params, verify=True, headers={'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/')+7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(')+1]}) except requests.exceptions.RequestException as e: logger.warn(e) return "no results" if t.status_code == '619': logger.warn('[' + str(t.status_code) + '] Unable to retrieve data from site.') return "no results" elif t.status_code == '999': logger.warn('[' + str(t.status_code) + '] No series title was provided to the search query.') return "no results" try: results = t.json() except: results = t.text if len(results) == 0: logger.warn('No results found for search on 32P.') return "no results" with cfscrape.create_scraper() as s: s.headers = self.headers cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat")) cj.load() s.cookies = cj data = [] pdata = [] pubmatch = False if not chk_id: if mylar.SEARCH_32P: url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F params = {'action': 'serieslist', 'filter': series_search} time.sleep(1) #just to make sure we don't hammer, 1s pause. t = s.get(url, params=params, verify=True, allow_redirects=True) soup = BeautifulSoup(t.content, "html.parser") results = soup.find_all("a", {"class":"object-qtip"},{"data-type":"torrentgroup"}) for r in results: if mylar.SEARCH_32P: torrentid = r['data-id'] torrentname = r.findNext(text=True) torrentname = torrentname.strip() else: torrentid = r['id'] torrentname = r['series'] as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(torrentname) seriesresult = re.sub('\|','', as_dinfo['mod_seriesname']).strip() #seriesresult = as_dinfo['mod_seriesname'] logger.info('searchresult: ' + seriesresult + ' --- ' + mod_series + '[' + publisher_search + ']') if seriesresult == mod_series: logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']') data.append({"id": torrentid, "series": torrentname}) elif publisher_search in seriesresult: logger.info('publisher match.') tmp_torrentname = re.sub(publisher_search, '', seriesresult).strip() as_t = filechecker.FileChecker() as_tinfo = as_t.dynamic_replace(tmp_torrentname) logger.info('tmp_torrentname:' + tmp_torrentname) logger.info('as_tinfo:' + as_tinfo['mod_seriesname']) if re.sub('\|', '', as_tinfo['mod_seriesname']).strip() == mod_series: logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']') pdata.append({"id": torrentid, "series": torrentname}) pubmatch = True logger.info(str(len(data)) + ' series listed for searching that match.') else: logger.info('Exact series ID already discovered previously. Setting to :' + chk_id['series'] + '[' + str(chk_id['id']) + ']') pdata.append({"id": chk_id['id'], "series": chk_id['series']}) pubmatch = True if all([len(data) == 0, len(pdata) == 0]): return "no results" if len(pdata) == 1: logger.info(str(len(pdata)) + ' series match the title being search for') dataset = pdata searchid = pdata[0]['id'] elif len(data) == 1: logger.info(str(len(data)) + ' series match the title being search for') dataset = data searchid = data[0]['id'] else: dataset = [] if len(data) > 0: dataset += data if len(pdata) > 0: dataset += pdata if chk_id is None and any([len(data) == 1, len(pdata) == 1]): #update the 32p_reference so we avoid doing a url lookup next time helpers.checkthe_id(comic_id, dataset) else: logger.warn('More than one result - will update the 32p reference point once the issue has been successfully matched against.') results32p = [] resultlist = {} for x in dataset: payload = {'action': 'groupsearch', 'id': x['id'], #searchid, 'issue': issue_search} #in order to match up against 0-day stuff, volume has to be none at this point #when doing other searches tho, this should be allowed to go through #if all([volume_search != 'None', volume_search is not None]): # payload.update({'volume': re.sub('v', '', volume_search).strip()}) logger.info('payload: ' + str(payload)) url = 'https://32pag.es/ajax.php' time.sleep(1) #just to make sure we don't hammer, 1s pause. try: d = s.post(url, params=payload, verify=True, allow_redirects=True) #logger.debug(self.module + ' Reply from AJAX: \n %s', d.text) except Exception as e: logger.info(self.module + ' Could not POST URL %s', url) try: searchResults = d.json() except: searchResults = d.text logger.debug(self.module + ' Search Result did not return valid JSON, falling back on text: %s', searchResults.text) return False #logger.debug(self.module + " Search Result: %s", searchResults) if searchResults['status'] == 'success' and searchResults['count'] > 0: logger.info('successfully retrieved ' + str(searchResults['count']) + ' search results.') for a in searchResults['details']: results32p.append({'link': a['id'], 'title': self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues'], 'filesize': a['size'], 'issues': a['issues'], 'pack': a['pack'], 'format': a['format'], 'language': a['language'], 'seeders': a['seeders'], 'leechers': a['leechers'], 'scanner': a['scanner'], 'chkit': {'id': x['id'], 'series': x['series']}, 'pubdate': datetime.datetime.fromtimestamp(float(a['upload_time'])).strftime('%c')}) if len(results32p) > 0: resultlist['entries'] = sorted(results32p, key=itemgetter('pack','title'), reverse=False) else: resultlist = 'no results' return resultlist
def searchit(self): with requests.Session() as s: #self.searchterm is a tuple containing series name, issue number and volume. series_search = self.searchterm['series'] issue_search = self.searchterm['issue'] volume_search = self.searchterm['volume'] #generate the dynamic name of the series here so we can match it up as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(series_search) mod_series = as_dinfo['mod_seriesname'] if '/' in series_search: series_search = series_search[:series_search.find('/')] if ':' in series_search: series_search = series_search[:series_search.find(':')] url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F params = {'action': 'serieslist', 'filter': series_search} s.headers = self.headers cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat")) cj.load() s.cookies = cj time.sleep(1) #just to make sure we don't hammer, 1s pause. t = s.get(url, params=params, verify=True) soup = BeautifulSoup(t.content) results = soup.find_all("a", {"class": "object-qtip"}, {"data-type": "torrentgroup"}) data = [] for r in results: torrentid = r['data-id'] torrentname = r.findNext(text=True) torrentname = torrentname.strip() as_d = filechecker.FileChecker() as_dinfo = as_d.dynamic_replace(torrentname) seriesresult = as_dinfo['mod_seriesname'] logger.info('searchresult: ' + seriesresult + ' --- ' + mod_series) if seriesresult == mod_series: logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']') data.append({"id": torrentid, "series": torrentname}) logger.info( str(len(data)) + ' series listed for searching that match.') if len(data) == 1: logger.info( str(len(data)) + ' series match the title being search for') payload = { 'action': 'groupsearch', 'id': data[0]['id'], 'issue': issue_search } #in order to match up against 0-day stuff, volume has to be none at this point #when doing other searches tho, this should be allowed to go through #if all([volume_search != 'None', volume_search is not None]): # payload.update({'volume': re.sub('v', '', volume_search).strip()}) logger.info('payload: ' + str(payload)) url = 'https://32pag.es/ajax.php' time.sleep(1) #just to make sure we don't hammer, 1s pause. d = s.get(url, params=payload, verify=True) results32p = [] results = {} try: searchResults = d.json() except: searchResults = d.text if searchResults[ 'status'] == 'success' and searchResults['count'] > 0: logger.info('successfully retrieved ' + str(searchResults['count']) + ' search results.') for a in searchResults['details']: results32p.append({ 'link': a['id'], 'title': self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues'], 'filesize': a['size'], 'pack': a['pack'], 'format': a['format'], 'language': a['language'], 'seeders': a['seeders'], 'leechers': a['leechers'], 'scanner': a['scanner'], 'pubdate': datetime.datetime.fromtimestamp( float(a['upload_time'])).strftime('%c') }) results['entries'] = results32p else: results = 'no results' else: results = 'no results' return results