コード例 #1
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def latestdate_fix():
    import db, logger
    datefix = []
    myDB = db.DBConnection()
    comiclist = myDB.select('SELECT * FROM comics')
    if comiclist is None:
        logger.fdebug('No Series in watchlist to correct latest date')
        return
    for cl in comiclist:
        latestdate = cl['LatestDate']
        #logger.fdebug("latestdate:  " + str(latestdate))
        if latestdate[8:] == '':
            #logger.fdebug("invalid date " + str(latestdate) + " appending 01 for day to avoid errors")
            if len(latestdate) <= 7:
                finddash = latestdate.find('-')
                #logger.info('dash found at position ' + str(finddash))
                if finddash != 4:  #format of mm-yyyy
                    lat_month = latestdate[:finddash]
                    lat_year = latestdate[finddash+1:]
                else:  #format of yyyy-mm
                    lat_month = latestdate[finddash+1:]
                    lat_year = latestdate[:finddash]

                latestdate = (lat_year) + '-' + str(lat_month) + '-01'
                datefix.append({"comicid":    cl['ComicID'],
                                "latestdate": latestdate})
                #logger.info('latest date: ' + str(latestdate))

    #now we fix.
    if len(datefix) > 0:
       for df in datefix:
          newCtrl = {"ComicID":    df['comicid']}
          newVal = {"LatestDate":  df['latestdate']}
          myDB.upsert("comics", newVal, newCtrl)
    return
コード例 #2
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def LoadAlternateSearchNames(seriesname_alt, comicid):
    import logger    
    #seriesname_alt = db.comics['AlternateSearch']
    AS_Alt = []
    Alternate_Names = {}
    alt_count = 0

    #logger.fdebug('seriesname_alt:' + str(seriesname_alt))
    if seriesname_alt is None or seriesname_alt == 'None':
        logger.fdebug('no Alternate name given. Aborting search.')
        return "no results"
    else:
        chkthealt = seriesname_alt.split('##')
        if chkthealt == 0:
            AS_Alternate = seriesname_alt
            AS_Alt.append(seriesname_alt)
        for calt in chkthealt:
            AS_Alter = re.sub('##','',calt)
            u_altsearchcomic = AS_Alter.encode('ascii', 'ignore').strip()
            AS_formatrem_seriesname = re.sub('\s+', ' ', u_altsearchcomic)
            if AS_formatrem_seriesname[:1] == ' ': AS_formatrem_seriesname = AS_formatrem_seriesname[1:]

            AS_Alt.append({"AlternateName": AS_formatrem_seriesname})
            alt_count+=1

        Alternate_Names['AlternateName'] = AS_Alt
        Alternate_Names['ComicID'] = comicid
        Alternate_Names['Count'] = alt_count
        #logger.info('AlternateNames returned:' + str(Alternate_Names))

        return Alternate_Names
コード例 #3
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def renamefile_readingorder(readorder):
    import logger
    logger.fdebug('readingorder#: ' + str(readorder))
    if int(readorder) < 10: readord = "00" + str(readorder)
    elif int(readorder) > 10 and int(readorder) < 99: readord = "0" + str(readorder)
    else: readord = str(readorder)

    return readord
コード例 #4
0
ファイル: cv.py プロジェクト: adrianmoisey/mylar
def getComic(comicid,type,issueid=None,arc=None,arcid=None,arclist=None,comicidlist=None):
    if type == 'issue': 
        offset = 1
        issue = {}
        ndic = []
        issuechoice = []
        comicResults = []
        firstdate = '2099-00-00'
        #let's find out how many results we get from the query...
        if comicid is None:
            #if comicid is None, it's coming from the story arc search results.
            id = arcid
            islist = arclist
        else:
            id = comicid
            islist = None
        searched = pulldetails(id,'issue',None,0,islist)
        if searched is None: return False
        totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
        logger.fdebug("there are " + str(totalResults) + " search results...")
        if not totalResults:
            return False
        countResults = 0
        while (countResults < int(totalResults)):
            logger.fdebug("querying " + str(countResults))
            if countResults > 0:
                #new api - have to change to page # instead of offset count
                offsetcount = countResults
                searched = pulldetails(id,'issue',None,offsetcount,islist)
            issuechoice,tmpdate = GetIssuesInfo(id,searched,arcid)
            if tmpdate < firstdate:
                firstdate = tmpdate
            ndic = ndic + issuechoice
            #search results are limited to 100 and by pagination now...let's account for this.
            countResults = countResults + 100

        issue['issuechoice'] = ndic
        issue['firstdate'] = firstdate

        return issue

    elif type == 'comic':
        dom = pulldetails(comicid,'comic',None,1)
        return GetComicInfo(comicid,dom)
    elif type == 'firstissue': 
        dom = pulldetails(comicid,'firstissue',issueid,1)
        return GetFirstIssue(issueid,dom)
    elif type == 'storyarc':
        dom = pulldetails(arc,'storyarc',None,1)   
        return GetComicInfo(issueid,dom)
    elif type == 'comicyears':
        #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year.
        #this grabs each issue based on issueid, and then subsets the comicid for each to be used later.
        #set the offset to 0, since we're doing a filter.
        dom = pulldetails(arcid,'comicyears',offset=0,comicidlist=comicidlist)
        return GetSeriesYears(dom)
コード例 #5
0
ファイル: filechecker.py プロジェクト: brunnels/mylar
def listFiles(dir,watchcomic,AlternateSearch=None):
    # use AlternateSearch to check for filenames that follow that naming pattern
    # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the 
    # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)

    logger.fdebug("comic: " + watchcomic)
    basedir = dir
    logger.fdebug("Looking in: " + dir)
    watchmatch = {}
    comiclist = []
    comiccnt = 0
    for item in os.listdir(basedir):
        #print item
        #subname = os.path.join(basedir, item)
        subname = item
        #print subname
        subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname))
        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(watchcomic))
        modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
        subname = re.sub('\s+', ' ', str(subname)).strip()
        if AlternateSearch is not None:
            altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(AlternateSearch))
            altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()       
        else:
            #create random characters so it will never match.
            altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
        #if '_' in subname:
        #    subname = subname.replace('_', ' ')
        logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
        if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():
            if 'annual' in subname.lower():
                #print ("it's an annual - unsure how to proceed")
                continue
            comicpath = os.path.join(basedir, item)
            logger.fdebug( modwatchcomic + " - watchlist match on : " + comicpath)
            comicsize = os.path.getsize(comicpath)
            #print ("Comicsize:" + str(comicsize))
            comiccnt+=1
            comiclist.append({
                 'ComicFilename':           item,
                 'ComicLocation':           comicpath,
                 'ComicSize':               comicsize
                 })
            watchmatch['comiclist'] = comiclist
        else:
            pass
            #print ("directory found - ignoring")
    
    logger.fdebug("you have a total of " + str(comiccnt) + " " + str(watchcomic) + " comics")
    watchmatch['comiccount'] = comiccnt
    return watchmatch
コード例 #6
0
ファイル: cv.py プロジェクト: claym/mylar
def GetImportList(results):
    importlist = results.getElementsByTagName('issue')
    serieslist = []
    importids = {}
    tempseries = {}
    for implist in importlist:
        try:
            totids = len(implist.getElementsByTagName('id'))
            idt = 0
            while (idt < totids):
                if implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
                    tempseries['ComicID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText
                elif implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue':
                    tempseries['IssueID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText
                idt += 1
        except:
            tempseries['ComicID'] = None

        try:
            totnames = len(implist.getElementsByTagName('name'))
            tot = 0
            while (tot < totnames):
                if implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume':
                    tempseries['ComicName'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText
                elif implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue':
                    try:
                        tempseries['Issue_Name'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText
                    except:
                        tempseries['Issue_Name'] = None
                tot += 1
        except:
            tempseries['ComicName'] = 'None'

        try:
            tempseries['Issue_Number'] = implist.getElementsByTagName('issue_number')[0].firstChild.wholeText
        except:
            logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.')

        logger.info('tempseries:' + str(tempseries))
        serieslist.append({"ComicID":      tempseries['ComicID'],
                           "IssueID":      tempseries['IssueID'],
                           "ComicName":    tempseries['ComicName'],
                           "Issue_Name":   tempseries['Issue_Name'],
                           "Issue_Number": tempseries['Issue_Number']})


    return serieslist
コード例 #7
0
ファイル: filechecker.py プロジェクト: StevieCoops/mylar
def traverse_directories(dir):
    filelist = []

    for (dirname, subs, files) in os.walk(dir):

        for fname in files:
            if dirname == dir:
                direc = ''
            else:
                direc = dirname

            filelist.append({"directory":  direc,
                             "filename":   fname})

    logger.fdebug('there are ' + str(len(filelist)) + ' files.')
    #logger.fdeubg(filelist)

    return filelist
コード例 #8
0
ファイル: cv.py プロジェクト: cdj/mylar
def getComic(comicid,type,issueid=None,arc=None):
    if type == 'issue': 
        offset = 1
        issue = {}
        ndic = []
        issuechoice = []
        comicResults = []
        firstdate = '2099-00-00'
        #let's find out how many results we get from the query...
        searched = pulldetails(comicid,'issue',None,0)
        if searched is None: return False
        totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
        logger.fdebug("there are " + str(totalResults) + " search results...")
        if not totalResults:
            return False
        countResults = 0
        while (countResults < int(totalResults)):
            logger.fdebug("querying " + str(countResults))
            if countResults > 0:
                #new api - have to change to page # instead of offset count
                offsetcount = countResults
                searched = pulldetails(comicid,'issue',None,offsetcount)
            issuechoice,tmpdate = GetIssuesInfo(comicid,searched)
            if tmpdate < firstdate:
                firstdate = tmpdate
            ndic = ndic + issuechoice
            #search results are limited to 100 and by pagination now...let's account for this.
            countResults = countResults + 100

        issue['issuechoice'] = ndic
        issue['firstdate'] = firstdate

        return issue

    elif type == 'comic':
        dom = pulldetails(comicid,'comic',None,1)
        return GetComicInfo(comicid,dom)
    elif type == 'firstissue': 
        dom = pulldetails(comicid,'firstissue',issueid,1)
        return GetFirstIssue(issueid,dom)
    elif type == 'storyarc':
        dom = pulldetails(arc,'storyarc',None,1)   
        return GetComicInfo(issueid,dom)
コード例 #9
0
ファイル: nzbget.py プロジェクト: sankarara/mylar
    def sender(self, filename, test=False):
        if mylar.CONFIG.NZBGET_PRIORITY:
            if any([
                    mylar.CONFIG.NZBGET_PRIORITY == 'Default',
                    mylar.CONFIG.NZBGET_PRIORITY == 'Normal'
            ]):
                nzbgetpriority = 0
            elif mylar.CONFIG.NZBGET_PRIORITY == 'Low':
                nzbgetpriority = -50
            elif mylar.CONFIG.NZBGET_PRIORITY == 'High':
                nzbgetpriority = 50
            #there's no priority for "paused", so set "Very Low" and deal with that later...
            elif mylar.CONFIG.NZBGET_PRIORITY == 'Paused':
                nzbgetpriority = -100
        else:
            #if nzbget priority isn't selected, default to Normal (0)
            nzbgetpriority = 0

        in_file = open(filename, 'r')
        nzbcontent = in_file.read()
        in_file.close()
        nzbcontent64 = standard_b64encode(nzbcontent)
        try:
            logger.fdebug('sending now to %s' % self.nzb_url)
            sendresponse = self.server.append(filename, nzbcontent64,
                                              mylar.CONFIG.NZBGET_CATEGORY,
                                              nzbgetpriority, False, False, '',
                                              0, 'SCORE')
        except Exception as e:
            logger.warn('uh-oh: %s' % e)
            return {'status': False}
        else:
            if sendresponse <= 0:
                logger.warn(
                    'Invalid response received after sending to NZBGet: %s' %
                    sendresponse)
                return {'status': False}
            else:
                #sendresponse is the NZBID that we use to track the progress....
                return {'status': True, 'NZBID': sendresponse}
コード例 #10
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def cvapi_check(web=None):
    import logger
    if web is None: logger.fdebug('[ComicVine API] ComicVine API Check Running...')
    if mylar.CVAPI_TIME is None or mylar.CVAPI_TIME == '':
        c_date = now()
        c_obj_date = datetime.datetime.strptime(c_date,"%Y-%m-%d %H:%M:%S")
        mylar.CVAPI_TIME = c_obj_date
    else:
        if isinstance(mylar.CVAPI_TIME, unicode):
            c_obj_date = datetime.datetime.strptime(mylar.CVAPI_TIME,"%Y-%m-%d %H:%M:%S")
        else:
            c_obj_date = mylar.CVAPI_TIME
    if web is None: logger.fdebug('[ComicVine API] API Start Monitoring Time (~15mins): ' + str(mylar.CVAPI_TIME))
    now_date = now()
    n_date = datetime.datetime.strptime(now_date,"%Y-%m-%d %H:%M:%S")
    if web is None: logger.fdebug('[ComicVine API] Time now: ' + str(n_date))
    absdiff = abs(n_date - c_obj_date)
    mins = round(((absdiff.days * 24 * 60 * 60 + absdiff.seconds) / 60.0),2)
    if mins < 15:
        if web is None: logger.info('[ComicVine API] Comicvine API count now at : ' + str(mylar.CVAPI_COUNT) + ' / ' + str(mylar.CVAPI_MAX) + ' in ' + str(mins) + ' minutes.')
        if mylar.CVAPI_COUNT > mylar.CVAPI_MAX:
            cvleft = 15 - mins
            if web is None: logger.warn('[ComicVine API] You have already hit your API limit (' + str(mylar.CVAPI_MAX) + ' with ' + str(cvleft) + ' minutes. Best be slowing down, cowboy.')
    elif mins > 15:
        mylar.CVAPI_COUNT = 0
        c_date = now()
        mylar.CVAPI_TIME = datetime.datetime.strptime(c_date,"%Y-%m-%d %H:%M:%S")
        if web is None: logger.info('[ComicVine API] 15 minute API interval resetting [' + str(mylar.CVAPI_TIME) + ']. Resetting API count to : ' + str(mylar.CVAPI_COUNT))

    if web is None:
        return        
    else:
        line = str(mylar.CVAPI_COUNT) + ' hits / ' + str(mins) + ' minutes'
        return line
コード例 #11
0
ファイル: cv.py プロジェクト: mriutta/mylar
def getComic(comicid,type,issueid=None):
    if type == 'issue': 
        offset = 1
        issue = {}
        ndic = []
        issuechoice = []
        comicResults = []
        firstdate = '2099-00-00'
        #let's find out how many results we get from the query...
        searched = pulldetails(comicid,'issue',None,0)
        if searched is None: return False
        totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
        logger.fdebug("there are " + str(totalResults) + " search results...")
        if not totalResults:
            return False
        countResults = 0
        while (countResults < int(totalResults)):
            logger.fdebug("querying " + str(countResults))
            if countResults > 0:
                #new api - have to change to page # instead of offset count
                offsetcount = countResults
                searched = pulldetails(comicid,'issue',None,offsetcount)
            issuechoice,tmpdate = GetIssuesInfo(comicid,searched)
            if tmpdate < firstdate:
                firstdate = tmpdate
            ndic = ndic + issuechoice
            #search results are limited to 100 and by pagination now...let's account for this.
            countResults = countResults + 100

        issue['issuechoice'] = ndic
        issue['firstdate'] = firstdate

        return issue

    elif type == 'comic':
        dom = pulldetails(comicid,'comic',None,1)
        return GetComicInfo(comicid,dom)
    elif type == 'firstissue': 
        dom = pulldetails(comicid,'firstissue',issueid,1)
        return GetFirstIssue(issueid,dom)
コード例 #12
0
ファイル: nzbget.py プロジェクト: DarkSir23/mylar
    def sender(self, filename, test=False):
        if mylar.CONFIG.NZBGET_PRIORITY:
            if any([mylar.CONFIG.NZBGET_PRIORITY == 'Default', mylar.CONFIG.NZBGET_PRIORITY == 'Normal']):
                nzbgetpriority = 0
            elif mylar.CONFIG.NZBGET_PRIORITY == 'Low':
                nzbgetpriority = -50
            elif mylar.CONFIG.NZBGET_PRIORITY == 'High':
                nzbgetpriority = 50
            #there's no priority for "paused", so set "Very Low" and deal with that later...
            elif mylar.CONFIG.NZBGET_PRIORITY == 'Paused':
                nzbgetpriority = -100
        else:
            #if nzbget priority isn't selected, default to Normal (0)
            nzbgetpriority = 0


        in_file = open(filename, 'r')
        nzbcontent = in_file.read()
        in_file.close()
        nzbcontent64 = standard_b64encode(nzbcontent)
        try:
            logger.fdebug('sending now to %s' % self.nzb_url)
            if mylar.CONFIG.NZBGET_CATEGORY is None:
                nzb_category = ''
            else:
                nzb_category = mylar.CONFIG.NZBGET_CATEGORY
            sendresponse = self.server.append(filename, nzbcontent64, nzb_category, nzbgetpriority, False, False, '', 0, 'SCORE')
        except Exception as e:
            logger.warn('uh-oh: %s' % e)
            return {'status': False}
        else:
            if sendresponse <= 0:
                logger.warn('Invalid response received after sending to NZBGet: %s' % sendresponse)
                return {'status': False}
            else:
                #sendresponse is the NZBID that we use to track the progress....
                return {'status': True,
                        'NZBID':  sendresponse}
コード例 #13
0
ファイル: helpers.py プロジェクト: citrusy/mylar
def latestdate_fix():
    import db, logger
    datefix = []
    myDB = db.DBConnection()
    comiclist = myDB.action('SELECT * FROM comics')
    if comiclist is None:
        logger.fdebug('No Series in watchlist to correct latest date')
        return
    for cl in comiclist:
        latestdate = cl['LatestDate']
        #logger.fdebug("latestdate:  " + str(latestdate))
        if latestdate[8:] == '':
            #logger.fdebug("invalid date " + str(latestdate) + " appending 01 for day to avoid errors")
            if len(latestdate) <= 7:
                finddash = latestdate.find('-')
                #logger.info('dash found at position ' + str(finddash))
                if finddash != 4:  #format of mm-yyyy
                    lat_month = latestdate[:finddash]
                    lat_year = latestdate[finddash + 1:]
                else:  #format of yyyy-mm
                    lat_month = latestdate[finddash + 1:]
                    lat_year = latestdate[:finddash]

                latestdate = (lat_year) + '-' + str(lat_month) + '-01'
                datefix.append({
                    "comicid": cl['ComicID'],
                    "latestdate": latestdate
                })
                #logger.info('latest date: ' + str(latestdate))

    #now we fix.
    if len(datefix) > 0:
        for df in datefix:
            newCtrl = {"ComicID": df['comicid']}
            newVal = {"LatestDate": df['latestdate']}
            myDB.upsert("comics", newVal, newCtrl)
    return
コード例 #14
0
ファイル: cv.py プロジェクト: GingerCowboy/mylar
def GetIssuesInfo(comicid,dom):
    subtracks = dom.getElementsByTagName('issue')
    if not mylar.CV_ONLY:
        cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
        logger.fdebug("issues I've counted: " + str(len(subtracks)))
        logger.fdebug("issues CV says it has: " + str(int(cntiss)))

        if int(len(subtracks)) != int(cntiss):
            logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
            cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
        cntiss = int(cntiss)
        n = cntiss-1
    else:
        n = int(len(subtracks))
    tempissue = {}
    issuech = []
    firstdate = '2099-00-00'
    for subtrack in subtracks:
        if not mylar.CV_ONLY:
            if (dom.getElementsByTagName('name')[n].firstChild) is not None:
                issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
            else:
                issue['Issue_Name'] = 'None'

            issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
            issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText
            
            issuech.append({
                'Issue_ID':                issue['Issue_ID'],
                'Issue_Number':            issue['Issue_Number'],
                'Issue_Name':              issue['Issue_Name']
                })
        else:
            try:
                tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[0].firstChild.wholeText
            except:
                tempissue['Issue_Name'] = 'None'
            tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[0].firstChild.wholeText
            try:
                tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText
            except:
                tempissue['CoverDate'] = '0000-00-00'
            tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText
            issuech.append({
                'Issue_ID':                tempissue['Issue_ID'],
                'Issue_Number':            tempissue['Issue_Number'],
                'Issue_Date':              tempissue['CoverDate'],
                'Issue_Name':              tempissue['Issue_Name']
                })

            if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00':
                firstdate = tempissue['CoverDate']
        n-=1

    #issue['firstdate'] = firstdate
    return issuech, firstdate
コード例 #15
0
ファイル: cv.py プロジェクト: mriutta/mylar
def GetIssuesInfo(comicid,dom):
    subtracks = dom.getElementsByTagName('issue')
    if not mylar.CV_ONLY:
        cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
        logger.fdebug("issues I've counted: " + str(len(subtracks)))
        logger.fdebug("issues CV says it has: " + str(int(cntiss)))

        if int(len(subtracks)) != int(cntiss):
            logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
            cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
        cntiss = int(cntiss)
        n = cntiss-1
    else:
        n = int(len(subtracks))
    tempissue = {}
    issuech = []
    firstdate = '2099-00-00'
    for subtrack in subtracks:
        if not mylar.CV_ONLY:
            if (dom.getElementsByTagName('name')[n].firstChild) is not None:
                issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
            else:
                issue['Issue_Name'] = 'None'

            issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
            issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText
            
            issuech.append({
                'Issue_ID':                issue['Issue_ID'],
                'Issue_Number':            issue['Issue_Number'],
                'Issue_Name':              issue['Issue_Name']
                })
        else:
            try:
                tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[0].firstChild.wholeText
            except:
                tempissue['Issue_Name'] = 'None'
            tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[0].firstChild.wholeText
            try:
                tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText
            except:
                tempissue['CoverDate'] = '0000-00-00'
            tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText
            issuech.append({
                'Issue_ID':                tempissue['Issue_ID'],
                'Issue_Number':            tempissue['Issue_Number'],
                'Issue_Date':              tempissue['CoverDate'],
                'Issue_Name':              tempissue['Issue_Name']
                })

            if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00':
                firstdate = tempissue['CoverDate']
        n-=1

    #issue['firstdate'] = firstdate
    return issuech, firstdate
コード例 #16
0
ファイル: cv.py プロジェクト: Tension113/mylar
def GetComicInfo(comicid,dom):

    #comicvine isn't as up-to-date with issue counts..
    #so this can get really buggered, really fast.
    tracks = dom.getElementsByTagName('issue')
    try:
        cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
    except:
        cntit = len(tracks)
    trackcnt = len(tracks)
    logger.fdebug("number of issues I counted: " + str(trackcnt))
    logger.fdebug("number of issues CV says it has: " + str(cntit))
    # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
    if int(trackcnt) != int(cntit):
        cntit = trackcnt
        vari = "yes"
    else: vari = "no"
    logger.fdebug("vari is set to: " + str(vari))
    #if str(trackcnt) != str(int(cntit)+2):
    #    cntit = int(cntit) + 1
    comic = {}
    comicchoice = []
    cntit = int(cntit)
    #retrieve the first xml tag (<tag>data</tag>)
    #that the parser finds with name tagName:
    comic['ComicName'] = dom.getElementsByTagName('name')[trackcnt].firstChild.wholeText
    comic['ComicName'] = comic['ComicName'].rstrip() 
    comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText
    comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[0].firstChild.wholeText
    if vari == "yes": 
        comic['ComicIssues'] = str(cntit)
    else:
        comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
    comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText
    comic['ComicPublisher'] = dom.getElementsByTagName('name')[trackcnt+1].firstChild.wholeText

    comicchoice.append({
        'ComicName':              comic['ComicName'],
        'ComicYear':              comic['ComicYear'],
        'Comicid':                comicid,
        'ComicURL':               comic['ComicURL'],
        'ComicIssues':            comic['ComicIssues'],
        'ComicImage':             comic['ComicImage'],
        'ComicPublisher':         comic['ComicPublisher']
        })

    comic['comicchoice'] = comicchoice
    return comic
コード例 #17
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def checkthepub(ComicID):
    import db, logger
    myDB = db.DBConnection()
    publishers = ['marvel', 'dc', 'darkhorse']
    pubchk = myDB.selectone("SELECT * FROM comics WHERE ComicID=?", [ComicID]).fetchone()
    if pubchk is None:
        logger.fdebug('No publisher information found to aid in determining series..defaulting to base check of 55 days.')
        return mylar.BIGGIE_PUB
    else:
        for publish in publishers:
            if publish in str(pubchk['ComicPublisher']).lower():
                logger.fdebug('Biggie publisher detected - ' + str(pubchk['ComicPublisher']))
                return mylar.BIGGIE_PUB

        logger.fdebug('Indie publisher detected - ' + str(pubchk['ComicPublisher']))
        return mylar.INDIE_PUB
コード例 #18
0
ファイル: db.py プロジェクト: DarkSir23/mylar
 def worker(self):
     myDB = DBConnection()
     #this should be in it's own thread somewhere, constantly polling the queue and sending them to the writer.
     logger.fdebug('worker started.')
     while True:
         thisthread = threading.currentThread().name
         if not mylarQueue.empty():
 # Rename the main thread
             logger.fdebug('[' + str(thisthread) + '] queue is not empty yet...')
             (QtableName, QvalueDict, QkeyDict) = mylarQueue.get(block=True, timeout=None)
             logger.fdebug('[REQUEUE] Table: ' + str(QtableName) + ' values: ' + str(QvalueDict) + ' keys: ' + str(QkeyDict))
             sqlResult = myDB.upsert(QtableName, QvalueDict, QkeyDict)
             if sqlResult:
                 mylarQueue.task_done()
                 return sqlResult
         else:
             time.sleep(1)
コード例 #19
0
 def worker(self):
     myDB = DBConnection()
     #this should be in it's own thread somewhere, constantly polling the queue and sending them to the writer.
     logger.fdebug('worker started.')
     while True:
         thisthread = threading.currentThread().name
         if not mylarQueue.empty():
             # Rename the main thread
             logger.fdebug('[' + str(thisthread) +
                           '] queue is not empty yet...')
             (QtableName, QvalueDict,
              QkeyDict) = mylarQueue.get(block=True, timeout=None)
             logger.fdebug('[REQUEUE] Table: ' + str(QtableName) +
                           ' values: ' + str(QvalueDict) + ' keys: ' +
                           str(QkeyDict))
             sqlResult = myDB.upsert(QtableName, QvalueDict, QkeyDict)
             if sqlResult:
                 mylarQueue.task_done()
                 return sqlResult
         else:
             time.sleep(1)
コード例 #20
0
ファイル: nzbget.py プロジェクト: yonkyunior/mylar
    def historycheck(self, nzbid):
        history = self.server.history()
        found = False
        hq = [hs for hs in history if hs['NZBID'] == nzbid and 'SUCCESS' in hs['Status']]
        if len(hq) > 0:
            logger.fdebug('found matching completed item in history. Job has a status of %s' % hq[0]['Status'])
            if hq[0]['DownloadedSizeMB'] == hq[0]['FileSizeMB']:
                logger.fdebug('%s has final file size of %sMB' % (hq[0]['Name'], hq[0]['DownloadedSizeMB']))
                if os.path.isdir(hq[0]['DestDir']):
                    logger.fdebug('location found @ %s' % hq[0]['DestDir'])
                    return {'status':   True,
                            'name':     re.sub('.nzb', '', hq[0]['NZBName']).strip(),
                            'location': hq[0]['DestDir'],
                            'failed':   False}

                else:
                    logger.warn('no file found where it should be @ %s - is there another script that moves things after completion ?' % hq[0]['DestDir'])
                    return {'status': False}
        else:
            logger.warn('Could not find completed item in history')
            return {'status': False}
コード例 #21
0
ファイル: cv.py プロジェクト: Tension113/mylar
def GetIssuesInfo(comicid,dom):
    subtracks = dom.getElementsByTagName('issue')
    cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
    logger.fdebug("issues I've counted: " + str(len(subtracks)))
    logger.fdebug("issues CV says it has: " + str(int(cntiss)))

    if int(len(subtracks)) != int(cntiss):
        logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
        cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
    cntiss = int(cntiss)
    n = cntiss-1
    
    issue = {}
    issuechoice = []
    for subtrack in subtracks:
        if (dom.getElementsByTagName('name')[n].firstChild) is not None:
            issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
        else:
            issue['Issue_Name'] = 'None'
        issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
        try:
            issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText

            issuechoice.append({
                 'Issue_ID':                issue['Issue_ID'],
                 'Issue_Number':            issue['Issue_Number'],
                 'Issue_Name':              issue['Issue_Name']
                 })

            issue['issuechoice'] = issuechoice
        except:
            #logger.fdebug("publisher...ignoring this.")
            #logger.fdebug("n value: " + str(n) + " ...subtracks: " + str(len(subtracks)))
            # in order to get ALL the issues, we need to increment the count back by 1 so it grabs the
            # last issue
            pass
        n-=1

    return issue
コード例 #22
0
ファイル: nzbget.py プロジェクト: xeddmc/mylar
    def historycheck(self, nzbinfo):
        nzbid = nzbinfo['NZBID']
        history = self.server.history(True)
        found = False
        destdir = None
        double_pp = False
        hq = [
            hs for hs in history if hs['NZBID'] == nzbid and (
                'SUCCESS' in hs['Status'] or ('COPY' in hs['Status']))
        ]
        if len(hq) > 0:
            logger.fdebug(
                'found matching completed item in history. Job has a status of %s'
                % hq[0]['Status'])
            if len(hq[0]['ScriptStatuses']) > 0:
                for x in hq[0]['ScriptStatuses']:
                    if 'comicrn' in x['Name'].lower():
                        double_pp = True
                        break

            if all([len(hq[0]['Parameters']) > 0, double_pp is False]):
                for x in hq[0]['Parameters']:
                    if all(
                        ['comicrn' in x['Name'].lower(), x['Value'] == 'yes']):
                        double_pp = True
                        break

            if double_pp is True:
                logger.warn(
                    'ComicRN has been detected as being active for this category & download. Completed Download Handling will NOT be performed due to this.'
                )
                logger.warn(
                    'Either disable Completed Download Handling for NZBGet within Mylar, or remove ComicRN from your category script in NZBGet.'
                )
                return {'status': 'double-pp', 'failed': False}

            if all([
                    'SUCCESS' in hq[0]['Status'],
                (hq[0]['FileSizeMB'] * .95) <= hq[0]['DownloadedSizeMB'] <=
                (hq[0]['FileSizeMB'] * 1.05)
            ]):
                logger.fdebug('%s has final file size of %sMB' %
                              (hq[0]['Name'], hq[0]['DownloadedSizeMB']))
                if os.path.isdir(hq[0]['DestDir']):
                    destdir = hq[0]['DestDir']
                    logger.fdebug('location found @ %s' % destdir)
            elif all([
                    'COPY' in hq[0]['Status'],
                    int(hq[0]['FileSizeMB']) > 0,
                    hq[0]['DeleteStatus'] == 'COPY'
            ]):
                config = self.server.config()
                cDestDir = None
                for x in config:
                    if x['Name'] == 'TempDir':
                        cTempDir = x['Value']
                    elif x['Name'] == 'DestDir':
                        cDestDir = x['Value']
                    if cDestDir is not None:
                        break

                if cTempDir in hq[0]['DestDir']:
                    destdir2 = re.sub(cTempDir, cDestDir,
                                      hq[0]['DestDir']).strip()
                    if not destdir2.endswith(os.sep):
                        destdir2 = destdir2 + os.sep
                    destdir = os.path.join(destdir2, hq[0]['Name'])
                    logger.fdebug('NZBGET Destination dir set to: %s' %
                                  destdir)
            else:
                logger.warn(
                    'no file found where it should be @ %s - is there another script that moves things after completion ?'
                    % hq[0]['DestDir'])
                return {'status': 'file not found', 'failed': False}

            if mylar.CONFIG.NZBGET_DIRECTORY is not None:
                destdir2 = mylar.CONFIG.NZBGET_DIRECTORY
                if not destdir2.endswith(os.sep):
                    destdir = destdir2 + os.sep
                destdir = os.path.join(destdir2, hq[0]['Name'])
                logger.fdebug(
                    'NZBGet Destination folder set via config to: %s' %
                    destdir)

            if destdir is not None:
                return {
                    'status': True,
                    'name': re.sub('.nzb', '', hq[0]['Name']).strip(),
                    'location': destdir,
                    'failed': False,
                    'issueid': nzbinfo['issueid'],
                    'comicid': nzbinfo['comicid'],
                    'apicall': True,
                    'ddl': False
                }
        else:
            logger.warn('Could not find completed NZBID %s in history' % nzbid)
            return {'status': False}
コード例 #23
0
def GetComicInfo(comicid, dom, safechk=None):
    if safechk is None:
        #safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort.
        safechk = 1
    elif safechk > 4:
        logger.error(
            'Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.'
        )
        return
    #comicvine isn't as up-to-date with issue counts..
    #so this can get really buggered, really fast.
    tracks = dom.getElementsByTagName('issue')
    try:
        cntit = dom.getElementsByTagName(
            'count_of_issues')[0].firstChild.wholeText
    except:
        cntit = len(tracks)
    trackcnt = len(tracks)
    logger.fdebug("number of issues I counted: " + str(trackcnt))
    logger.fdebug("number of issues CV says it has: " + str(cntit))
    # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
    if int(trackcnt) != int(cntit):
        cntit = trackcnt
        vari = "yes"
    else:
        vari = "no"
    logger.fdebug("vari is set to: " + str(vari))
    #if str(trackcnt) != str(int(cntit)+2):
    #    cntit = int(cntit) + 1
    comic = {}
    comicchoice = []
    cntit = int(cntit)
    #retrieve the first xml tag (<tag>data</tag>)
    #that the parser finds with name tagName:
    # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName
    # where [0] denotes the number of the name field(s)
    # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue
    try:
        names = len(dom.getElementsByTagName('name'))
        n = 0
        comic[
            'ComicPublisher'] = 'Unknown'  #set this to a default value here so that it will carry through properly
        while (n < names):
            if dom.getElementsByTagName(
                    'name')[n].parentNode.nodeName == 'results':
                try:
                    comic['ComicName'] = dom.getElementsByTagName(
                        'name')[n].firstChild.wholeText
                    comic['ComicName'] = comic['ComicName'].rstrip()
                except:
                    logger.error(
                        'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.'
                    )
                    return

            elif dom.getElementsByTagName(
                    'name')[n].parentNode.nodeName == 'publisher':
                try:
                    comic['ComicPublisher'] = dom.getElementsByTagName(
                        'name')[n].firstChild.wholeText
                except:
                    comic['ComicPublisher'] = "Unknown"

            n += 1
    except:
        logger.warn(
            'Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible'
        )
        return

    try:
        comic['ComicYear'] = dom.getElementsByTagName(
            'start_year')[0].firstChild.wholeText
    except:
        comic['ComicYear'] = '0000'

    #safety check, cause you known, dufus'...
    if comic['ComicYear'][-1:] == '-':
        comic['ComicYear'] = comic['ComicYear'][:-1]

    try:
        comic['ComicURL'] = dom.getElementsByTagName(
            'site_detail_url')[trackcnt].firstChild.wholeText
    except:
        #this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry.
        logger.warn(
            'Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.'
        )
        time.sleep(10)
        safechk += 1
        GetComicInfo(comicid, dom, safechk)

    desdeck = 0
    #the description field actually holds the Volume# - so let's grab it
    try:
        descchunk = dom.getElementsByTagName(
            'description')[0].firstChild.wholeText
        comic_desc = drophtml(descchunk)
        desdeck += 1
    except:
        comic_desc = 'None'

    #sometimes the deck has volume labels
    try:
        deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText
        comic_deck = deckchunk
        desdeck += 1
    except:
        comic_deck = 'None'

    #comic['ComicDescription'] = comic_desc

    try:
        comic['Aliases'] = dom.getElementsByTagName(
            'aliases')[0].firstChild.wholeText
        comic['Aliases'] = re.sub('\n', '##', comic['Aliases']).strip()
        if comic['Aliases'][-2:] == '##':
            comic['Aliases'] = comic['Aliases'][:-2]
        #logger.fdebug('Aliases: ' + str(aliases))
    except:
        comic['Aliases'] = 'None'

    comic['ComicVersion'] = 'None'  #noversion'
    #logger.info('comic_desc:' + comic_desc)
    #logger.info('comic_deck:' + comic_deck)
    #logger.info('desdeck: ' + str(desdeck))

    #figure out if it's a print / digital edition.
    comic['Type'] = 'None'
    if comic_deck != 'None':
        if any(
            ['print' in comic_deck.lower(), 'digital' in comic_deck.lower()]):
            if 'print' in comic_deck.lower():
                comic['Type'] = 'Print'
            elif 'digital' in comic_deck.lower():
                comic['Type'] = 'Digital'
    if comic_desc != 'None' and comic['Type'] == 'None':
        if 'print' in comic_desc[:60].lower(
        ) and 'print edition can be found' not in comic_desc.lower():
            comic['Type'] = 'Print'
        elif 'digital' in comic_desc[:60].lower(
        ) and 'digital edition can be found' not in comic_desc.lower():
            comic['Type'] = 'Digital'
        else:
            comic['Type'] = 'Print'

    while (desdeck > 0):
        if desdeck == 1:
            if comic_desc == 'None':
                comicDes = comic_deck[:30]
            else:
                #extract the first 60 characters
                comicDes = comic_desc[:60].replace('New 52', '')
        elif desdeck == 2:
            #extract the characters from the deck
            comicDes = comic_deck[:30].replace('New 52', '')
        else:
            break

        i = 0
        while (i < 2):
            if 'volume' in comicDes.lower():
                #found volume - let's grab it.
                v_find = comicDes.lower().find('volume')
                #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
                #increased to 10 to allow for text numbering (+5 max)
                #sometimes it's volume 5 and ocassionally it's fifth volume.
                if i == 0:
                    vfind = comicDes[v_find:v_find +
                                     15]  #if it's volume 5 format
                    basenums = {
                        'zero': '0',
                        'one': '1',
                        'two': '2',
                        'three': '3',
                        'four': '4',
                        'five': '5',
                        'six': '6',
                        'seven': '7',
                        'eight': '8',
                        'nine': '9',
                        'ten': '10',
                        'i': '1',
                        'ii': '2',
                        'iii': '3',
                        'iv': '4',
                        'v': '5'
                    }
                    logger.fdebug('volume X format - ' + str(i) + ': ' + vfind)
                else:
                    vfind = comicDes[:v_find]  # if it's fifth volume format
                    basenums = {
                        'zero': '0',
                        'first': '1',
                        'second': '2',
                        'third': '3',
                        'fourth': '4',
                        'fifth': '5',
                        'sixth': '6',
                        'seventh': '7',
                        'eighth': '8',
                        'nineth': '9',
                        'tenth': '10',
                        'i': '1',
                        'ii': '2',
                        'iii': '3',
                        'iv': '4',
                        'v': '5'
                    }
                    logger.fdebug('X volume format - ' + str(i) + ': ' + vfind)
                volconv = ''
                for nums in basenums:
                    if nums in vfind.lower():
                        sconv = basenums[nums]
                        vfind = re.sub(nums, sconv, vfind.lower())
                        break
                #logger.info('volconv: ' + str(volconv))

                #now we attempt to find the character position after the word 'volume'
                if i == 0:
                    volthis = vfind.lower().find('volume')
                    volthis = volthis + 6  # add on the actual word to the position so that we can grab the subsequent digit
                    vfind = vfind[volthis:volthis +
                                  4]  # grab the next 4 characters ;)
                elif i == 1:
                    volthis = vfind.lower().find('volume')
                    vfind = vfind[volthis -
                                  4:volthis]  # grab the next 4 characters ;)

                if '(' in vfind:
                    #bracket detected in versioning'
                    vfindit = re.findall('[^()]+', vfind)
                    vfind = vfindit[0]
                vf = re.findall('[^<>]+', vfind)
                try:
                    ledigit = re.sub("[^0-9]", "", vf[0])
                    if ledigit != '':
                        comic['ComicVersion'] = ledigit
                        logger.fdebug(
                            "Volume information found! Adding to series record : volume "
                            + comic['ComicVersion'])
                        break
                except:
                    pass

                i += 1
            else:
                i += 1

        if comic['ComicVersion'] == 'None':
            logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion']))
            desdeck -= 1
        else:
            break

    if vari == "yes":
        comic['ComicIssues'] = str(cntit)
    else:
        comic['ComicIssues'] = dom.getElementsByTagName(
            'count_of_issues')[0].firstChild.wholeText

    comic['ComicImage'] = dom.getElementsByTagName(
        'super_url')[0].firstChild.wholeText
    comic['ComicImageALT'] = dom.getElementsByTagName(
        'small_url')[0].firstChild.wholeText

    comic['FirstIssueID'] = dom.getElementsByTagName(
        'id')[0].firstChild.wholeText

    #    print ("fistIss:" + str(comic['FirstIssueID']))
    #    comicchoice.append({
    #        'ComicName':              comic['ComicName'],
    #        'ComicYear':              comic['ComicYear'],
    #        'Comicid':                comicid,
    #        'ComicURL':               comic['ComicURL'],
    #        'ComicIssues':            comic['ComicIssues'],
    #        'ComicImage':             comic['ComicImage'],
    #        'ComicVolume':            ParseVol,
    #        'ComicPublisher':         comic['ComicPublisher']
    #        })

    #    comic['comicchoice'] = comicchoice
    return comic
コード例 #24
0
ファイル: parseit.py プロジェクト: yonkyunior/mylar
def GCDAdd(gcdcomicid):
    serieschoice = []
    series = {}
    logger.fdebug("I'm trying to find these GCD comicid's:" + str(gcdcomicid))
    for gcdid in gcdcomicid:
        logger.fdebug("looking at gcdid:" + str(gcdid))
        input2 = 'http://www.comics.org/series/' + str(gcdid)
        logger.fdebug("---url: " + str(input2))
        resp = urllib2.urlopen (input2)
        soup = BeautifulSoup (resp)
        logger.fdebug("SeriesName section...")
        parsen = soup.find("span", {"id": "series_name"})
        #logger.fdebug("series name (UNPARSED): " + str(parsen))
        subpar = parsen('a')[0]
        resultName = subpar.findNext(text=True)
        logger.fdebug("ComicName: " + str(resultName))
        #covers-start
        logger.fdebug("Covers section...")
        coverst = soup.find("div", {"id": "series_cover"})
        if coverst < 0:
            gcdcover = "None"
            logger.fdebug("unable to find any covers - setting to None")
        else:
            subcoverst = coverst('img', src=True)[0]
            #logger.fdebug("cover (UNPARSED) : " + str(subcoverst))
            gcdcover = subcoverst['src']
        logger.fdebug("Cover: " + str(gcdcover))
        #covers end
        #publisher start
        logger.fdebug("Publisher section...")
        try:
            pubst = soup.find("div", {"class": "item_data"})
            catchit = pubst('a')[0]

        except (IndexError, TypeError):
            pubst = soup.findAll("div", {"class": "left"})[1]
            catchit = pubst.find("a")

        publisher = catchit.findNext(text=True)
        logger.fdebug("Publisher: " + str(publisher))
        #publisher end
        parsed = soup.find("div", {"id": "series_data"})
        #logger.fdebug("series_data: " + str(parsed))
        #print ("parse:" + str(parsed))
        subtxt3 = parsed.find("dd", {"id": "publication_dates"})
        #logger.fdebug("publication_dates: " + str(subtxt3))
        pubdate = subtxt3.findNext(text=True).rstrip()
        logger.fdebug("pubdate:" + str(pubdate))
        subtxt4 = parsed.find("dd", {"id": "issues_published"})
        noiss = subtxt4.findNext(text=True)
        lenwho = len(noiss)
        lent = noiss.find(' ', 2)
        lenf = noiss.find('(')
        stringit = noiss[lenf:lenwho]
        stringout = noiss[:lent]
        noissues = stringout.rstrip('  \t\r\n\0')
        numbering = stringit.rstrip('  \t\r\n\0')
        logger.fdebug("noissues:" + str(noissues))
        logger.fdebug("numbering:" + str(numbering))
        serieschoice.append({
               "ComicID":         gcdid,
               "ComicName":       resultName,
               "ComicYear":        pubdate,
               "ComicIssues":    noissues,
               "ComicPublisher": publisher,
               "ComicCover":     gcdcover
              })
    series['serieschoice'] = serieschoice
    return series
コード例 #25
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def havetotals(refreshit=None):
        import db, logger

        comics = []

        myDB = db.DBConnection()

        if refreshit is None:
            comiclist = myDB.select('SELECT * from comics order by ComicSortName COLLATE NOCASE')
        else:
            comiclist = []
            comicref = myDB.selectone("SELECT * from comics WHERE ComicID=?", [refreshit]).fetchone()
            #refreshit is the ComicID passed from the Refresh Series to force/check numerical have totals
            comiclist.append({"ComicID":  comicref[0],
                              "Have":     comicref[7],
                              "Total":   comicref[8]})
        for comic in comiclist:
            issue = myDB.selectone("SELECT COUNT(*) as count FROM issues WHERE ComicID=?", [comic['ComicID']]).fetchone()
            if issue is None:
                if refreshit is not None:
                    logger.fdebug(str(comic['ComicID']) + ' has no issuedata available. Forcing complete Refresh/Rescan')
                    return True                    
                else:
                    continue
            if mylar.ANNUALS_ON:
                annuals_on = True
                annual = myDB.selectone("SELECT COUNT(*) as count FROM annuals WHERE ComicID=?", [comic['ComicID']]).fetchone()
                annualcount = annual[0]
                if not annualcount:
                    annualcount = 0
            else:
                annuals_on = False
                annual = None
                annualcount = 0
            try:
                totalissues = comic['Total'] + annualcount
                haveissues = comic['Have']
            except TypeError:
                logger.warning('[Warning] ComicID: ' + str(comic['ComicID']) + ' is incomplete - Removing from DB. You should try to re-add the series.')
                myDB.action("DELETE from COMICS WHERE ComicID=? AND ComicName LIKE 'Comic ID%'", [comic['ComicID']])
                myDB.action("DELETE from ISSUES WHERE ComicID=? AND ComicName LIKE 'Comic ID%'", [comic['ComicID']])
                continue

            if not haveissues:
                havetracks = 0

            if refreshit is not None:
                if haveissues > totalissues:
                    return True   # if it's 5/4, send back to updater and don't restore previous status'
                else:
                    return False  # if it's 5/5 or 4/5, send back to updater and restore previous status'

            try:
                percent = (haveissues*100.0)/totalissues
                if percent > 100:
                    percent = 100
            except (ZeroDivisionError, TypeError):
                percent = 0
                totalissuess = '?'

            if comic['ComicPublished'] is None or comic['ComicPublished'] == '':
                recentstatus = 'Unknown'
            elif comic['ForceContinuing'] == 1:
                recentstatus = 'Continuing'
            elif 'present' in comic['ComicPublished'].lower() or ( today()[:4] in comic['LatestDate']):
                latestdate = comic['LatestDate']
                c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1)
                n_date = datetime.date.today()
                recentchk = (n_date - c_date).days
                if recentchk < 55:
                    recentstatus = 'Continuing'
                else:
                    recentstatus = 'Ended'
            else:
                recentstatus = 'Ended'

            comics.append({"ComicID":         comic['ComicID'],
                           "ComicName":       comic['ComicName'],
                           "ComicSortName":   comic['ComicSortName'],
                           "ComicPublisher":  comic['ComicPublisher'],
                           "ComicYear":       comic['ComicYear'],
                           "ComicImage":      comic['ComicImage'],
                           "LatestIssue":     comic['LatestIssue'],
                           "LatestDate":      comic['LatestDate'],
                           "ComicPublished":  comic['ComicPublished'],
                           "Status":          comic['Status'],
                           "recentstatus":    recentstatus,
                           "percent":         percent,
                           "totalissues":     totalissues,
                           "haveissues":      haveissues,
                           "DateAdded":       comic['LastUpdated']})

        return comics
コード例 #26
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def updateComicLocation():
    import db, logger
    myDB = db.DBConnection()
    if mylar.NEWCOM_DIR is not None:
        logger.info('Performing a one-time mass update to Comic Location')
        #create the root dir if it doesn't exist
        if os.path.isdir(mylar.NEWCOM_DIR):
            logger.info('Directory (' + mylar.NEWCOM_DIR + ') already exists! Continuing...')
        else:
            logger.info('Directory does not exist!')
            try:
                os.makedirs(mylar.NEWCOM_DIR)
                logger.info('Directory successfully created at: ' + mylar.NEWCOM_DIR)
            except OSError:
                logger.error('Could not create comicdir : ' + mylar.NEWCOM_DIR)
                return

        dirlist = myDB.select("SELECT * FROM comics")

        if dirlist is not None:
            for dl in dirlist:
                
                comversion = dl['ComicVersion']                
                if comversion is None:
                    comversion = 'None'
                #if comversion is None, remove it so it doesn't populate with 'None'
                if comversion == 'None':
                    chunk_f_f = re.sub('\$VolumeN','',mylar.FOLDER_FORMAT)
                    chunk_f = re.compile(r'\s+')
                    folderformat = chunk_f.sub(' ', chunk_f_f)
                else:
                    folderformat = mylar.FOLDER_FORMAT

                #remove all 'bad' characters from the Series Name in order to create directories.
                u_comicnm = dl['ComicName']
                u_comicname = u_comicnm.encode('ascii', 'ignore').strip()
                if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname:
                    comicdir = u_comicname
                if ':' in comicdir:
                    comicdir = comicdir.replace(':','')
                if '/' in comicdir:
                    comicdir = comicdir.replace('/','-')
                if ',' in comicdir:
                    comicdir = comicdir.replace(',','')
                if '?' in comicdir:
                    comicdir = comicdir.replace('?','')
                else: comicdir = u_comicname


                values = {'$Series':        comicdir,
                          '$Publisher':     re.sub('!','',dl['ComicPublisher']),
                          '$Year':          dl['ComicYear'],
                          '$series':        dl['ComicName'].lower(),
                          '$publisher':     re.sub('!','',dl['ComicPublisher']).lower(),
                          '$VolumeY':       'V' + str(dl['ComicYear']),
                          '$VolumeN':       comversion
                          }

                if mylar.FFTONEWCOM_DIR:
                    #if this is enabled (1) it will apply the Folder_Format to all the new dirs
                    if mylar.FOLDER_FORMAT == '':
                        comlocation = re.sub(mylar.DESTINATION_DIR, mylar.NEWCOM_DIR, comicdir)
                    else:
                        first = replace_all(folderformat, values)                    
                        if mylar.REPLACE_SPACES:
                            #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot
                            first = first.replace(' ', mylar.REPLACE_CHAR)
                        comlocation = os.path.join(mylar.NEWCOM_DIR,first)

                else:
                    comlocation = re.sub(mylar.DESTINATION_DIR, mylar.NEWCOM_DIR, comicdir)

                ctrlVal = {"ComicID":    dl['ComicID']}
                newVal = {"ComicLocation": comlocation}
                myDB.upsert("Comics", newVal, ctrlVal)
                logger.fdebug('updated ' + dl['ComicName'] + ' to : ' + comlocation)
        #set the value to 0 here so we don't keep on doing this...
        mylar.LOCMOVE = 0
        mylar.config_write()
    else:
        logger.info('No new ComicLocation path specified - not updating.')
        #raise cherrypy.HTTPRedirect("config")
    return
コード例 #27
0
def GetIssuesInfo(comicid, dom, arcid=None):
    subtracks = dom.getElementsByTagName('issue')
    if not mylar.CONFIG.CV_ONLY:
        cntiss = dom.getElementsByTagName(
            'count_of_issues')[0].firstChild.wholeText
        logger.fdebug("issues I've counted: " + str(len(subtracks)))
        logger.fdebug("issues CV says it has: " + str(int(cntiss)))

        if int(len(subtracks)) != int(cntiss):
            logger.fdebug(
                "CV's count is wrong, I counted different...going with my count for physicals"
                + str(len(subtracks)))
            cntiss = len(
                subtracks
            )  # assume count of issues is wrong, go with ACTUAL physical api count
        cntiss = int(cntiss)
        n = cntiss - 1
    else:
        n = int(len(subtracks))
    tempissue = {}
    issuech = []
    firstdate = '2099-00-00'
    for subtrack in subtracks:
        if not mylar.CONFIG.CV_ONLY:
            if (dom.getElementsByTagName('name')[n].firstChild) is not None:
                issue['Issue_Name'] = dom.getElementsByTagName(
                    'name')[n].firstChild.wholeText
            else:
                issue['Issue_Name'] = 'None'

            issue['Issue_ID'] = dom.getElementsByTagName(
                'id')[n].firstChild.wholeText
            issue['Issue_Number'] = dom.getElementsByTagName(
                'issue_number')[n].firstChild.wholeText

            issuech.append({
                'Issue_ID': issue['Issue_ID'],
                'Issue_Number': issue['Issue_Number'],
                'Issue_Name': issue['Issue_Name']
            })
        else:
            try:
                totnames = len(subtrack.getElementsByTagName('name'))
                tot = 0
                while (tot < totnames):
                    if subtrack.getElementsByTagName(
                            'name')[tot].parentNode.nodeName == 'volume':
                        tempissue['ComicName'] = subtrack.getElementsByTagName(
                            'name')[tot].firstChild.wholeText
                    elif subtrack.getElementsByTagName(
                            'name')[tot].parentNode.nodeName == 'issue':
                        try:
                            tempissue[
                                'Issue_Name'] = subtrack.getElementsByTagName(
                                    'name')[tot].firstChild.wholeText
                        except:
                            tempissue['Issue_Name'] = None
                    tot += 1
            except:
                tempissue['ComicName'] = 'None'

            try:
                totids = len(subtrack.getElementsByTagName('id'))
                idt = 0
                while (idt < totids):
                    if subtrack.getElementsByTagName(
                            'id')[idt].parentNode.nodeName == 'volume':
                        tempissue['Comic_ID'] = subtrack.getElementsByTagName(
                            'id')[idt].firstChild.wholeText
                    elif subtrack.getElementsByTagName(
                            'id')[idt].parentNode.nodeName == 'issue':
                        tempissue['Issue_ID'] = subtrack.getElementsByTagName(
                            'id')[idt].firstChild.wholeText
                    idt += 1
            except:
                tempissue['Issue_Name'] = 'None'

            try:
                tempissue['CoverDate'] = subtrack.getElementsByTagName(
                    'cover_date')[0].firstChild.wholeText
            except:
                tempissue['CoverDate'] = '0000-00-00'
            try:
                tempissue['StoreDate'] = subtrack.getElementsByTagName(
                    'store_date')[0].firstChild.wholeText
            except:
                tempissue['StoreDate'] = '0000-00-00'
            try:
                tempissue['Issue_Number'] = subtrack.getElementsByTagName(
                    'issue_number')[0].firstChild.wholeText
            except:
                logger.fdebug(
                    'No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.'
                )

            try:
                tempissue['ComicImage'] = subtrack.getElementsByTagName(
                    'small_url')[0].firstChild.wholeText
            except:
                tempissue['ComicImage'] = 'None'

            try:
                tempissue['ComicImageALT'] = subtrack.getElementsByTagName(
                    'medium_url')[0].firstChild.wholeText
            except:
                tempissue['ComicImageALT'] = 'None'

            if arcid is None:
                issuech.append({
                    'Comic_ID': comicid,
                    'Issue_ID': tempissue['Issue_ID'],
                    'Issue_Number': tempissue['Issue_Number'],
                    'Issue_Date': tempissue['CoverDate'],
                    'Store_Date': tempissue['StoreDate'],
                    'Issue_Name': tempissue['Issue_Name'],
                    'Image': tempissue['ComicImage'],
                    'ImageALT': tempissue['ComicImageALT']
                })

            else:
                issuech.append({
                    'ArcID': arcid,
                    'ComicName': tempissue['ComicName'],
                    'ComicID': tempissue['Comic_ID'],
                    'IssueID': tempissue['Issue_ID'],
                    'Issue_Number': tempissue['Issue_Number'],
                    'Issue_Date': tempissue['CoverDate'],
                    'Store_Date': tempissue['StoreDate'],
                    'Issue_Name': tempissue['Issue_Name']
                })

            if tempissue['CoverDate'] < firstdate and tempissue[
                    'CoverDate'] != '0000-00-00':
                firstdate = tempissue['CoverDate']
        n -= 1

    #issue['firstdate'] = firstdate
    return issuech, firstdate
コード例 #28
0
ファイル: cv.py プロジェクト: adrianmoisey/mylar
def GetIssuesInfo(comicid,dom,arcid=None):
    subtracks = dom.getElementsByTagName('issue')
    if not mylar.CV_ONLY:
        cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
        logger.fdebug("issues I've counted: " + str(len(subtracks)))
        logger.fdebug("issues CV says it has: " + str(int(cntiss)))

        if int(len(subtracks)) != int(cntiss):
            logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
            cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
        cntiss = int(cntiss)
        n = cntiss-1
    else:
        n = int(len(subtracks))
    tempissue = {}
    issuech = []
    firstdate = '2099-00-00'
    for subtrack in subtracks:
        if not mylar.CV_ONLY:
            if (dom.getElementsByTagName('name')[n].firstChild) is not None:
                issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
            else:
                issue['Issue_Name'] = 'None'

            issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
            issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText
            
            issuech.append({
                'Issue_ID':                issue['Issue_ID'],
                'Issue_Number':            issue['Issue_Number'],
                'Issue_Name':              issue['Issue_Name']
                })
        else:
            try:
                totnames = len( subtrack.getElementsByTagName('name') )
                tot = 0
                while (tot < totnames):
                    if subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume':
                        tempissue['ComicName'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
                    elif subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue':
                        try:
                            tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
                        except:
                            tempissue['Issue_Name'] = None
                    tot+=1
            except:
                tempissue['ComicName'] = 'None'

            try:
                totids = len( subtrack.getElementsByTagName('id') )
                idt = 0
                while (idt < totids):
                    if subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
                        tempissue['Comic_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
                    elif subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue':
                        tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
                    idt+=1
            except:
                tempissue['Issue_Name'] = 'None'

            try:
                tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText
            except:
                tempissue['CoverDate'] = '0000-00-00'
            try:
                tempissue['StoreDate'] = subtrack.getElementsByTagName('store_date')[0].firstChild.wholeText
            except:
                tempissue['StoreDate'] = '0000-00-00'
            try:
                tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText
            except:
                logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.')

            if arcid is None:
                issuech.append({
                    'Comic_ID':                comicid,
                    'Issue_ID':                tempissue['Issue_ID'],
                    'Issue_Number':            tempissue['Issue_Number'],
                    'Issue_Date':              tempissue['CoverDate'],
                    'Store_Date':              tempissue['StoreDate'],
                    'Issue_Name':              tempissue['Issue_Name']
                    })

            else:
                issuech.append({
                    'ArcID':                   arcid,
                    'ComicName':               tempissue['ComicName'],
                    'ComicID':                 tempissue['Comic_ID'],
                    'IssueID':                 tempissue['Issue_ID'],
                    'Issue_Number':            tempissue['Issue_Number'],
                    'Issue_Date':              tempissue['CoverDate'],
                    'Store_Date':              tempissue['StoreDate'],
                    'Issue_Name':              tempissue['Issue_Name']
                    })

            if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00':
                firstdate = tempissue['CoverDate']
        n-=1

    #issue['firstdate'] = firstdate
    return issuech, firstdate
コード例 #29
0
                    'JusttheDigits': justthedigits
                })
                print('appended.')
            else:
                comiclist.append({
                    'ComicFilename': item,
                    'ComicLocation': comicpath,
                    'ComicSize': comicsize,
                    'JusttheDigits': justthedigits
                })
            watchmatch['comiclist'] = comiclist
        else:
            pass
            #print ("directory found - ignoring")

    logger.fdebug('[FILECHECKER] you have a total of ' + str(comiccnt) + ' ' +
                  watchcomic + ' comics')
    watchmatch['comiccount'] = comiccnt
    return watchmatch


def validateAndCreateDirectory(dir, create=False):
    if os.path.exists(dir):
        logger.info('Found comic directory: ' + dir)
        return True
    else:
        logger.warn('Could not find comic directory: ' + dir)
        if create:
            if dir.strip():
                logger.info('Creating comic directory (' +
                            str(mylar.CHMOD_DIR) + ') : ' + dir)
                try:
コード例 #30
0
ファイル: getcomics.py プロジェクト: DarkSir23/mylar
    def parse_downloadresults(self, id, mainlink):
        myDB = db.DBConnection()
        series = None
        year = None
        size = None
        title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id)
        soup = BeautifulSoup(open(title+'.html'), 'html.parser')
        orig_find = soup.find("p", {"style": "text-align: center;"})
        i = 0
        option_find = orig_find
        possible_more = None
        while True: #i <= 10:
            prev_option = option_find
            option_find = option_find.findNext(text=True)
            if i == 0 and series is None:
                series = option_find
            elif 'Year' in option_find:
                year = option_find.findNext(text=True)
                year = re.sub('\|', '', year).strip()
            else:
                if 'Size' in prev_option:
                    size = option_find #.findNext(text=True)
                    possible_more = orig_find.next_sibling
                    break
            i+=1

        logger.fdebug('Now downloading: %s [%s] / %s ... this can take a while (go get some take-out)...' % (series, year, size))

        link = None
        for f in soup.findAll("div", {"class": "aio-pulse"}):
            lk = f.find('a')
            if lk['title'] == 'Download Now':
                link = {"series":  series,
                         "site":   lk['title'],
                         "year":   year,
                         "issues": None,
                         "size":   size,
                         "link":   lk['href']}

                break #get the first link just to test

        links = []

        if link is None and possible_more.name == 'ul':
            try:
                bb = possible_more.findAll('li')
            except:
                pass
            else:
                for x in bb:
                    linkline = x.find('a')
                    if linkline:
                        if 'go.php' in linkline['href']:
                            volume = x.findNext(text=True)
                            if u'\u2013' in volume:
                                volume = re.sub(u'\u2013', '-', volume)
                            #volume label contains series, issue(s), year(s), and size
                            series_st = volume.find('(')
                            issues_st = volume.find('#')
                            series = volume[:series_st]
                            if any([issues_st == -1, series_st == -1]):
                                issues = None
                            else:
                                series = volume[:issues_st].strip()
                                issues = volume[issues_st+1:series_st].strip()
                            year_end = volume.find(')', series_st+1)
                            year = re.sub('[\(\)]', '', volume[series_st+1: year_end]).strip()
                            size_end = volume.find(')', year_end+1)
                            size = re.sub('[\(\)]', '', volume[year_end+1: size_end]).strip()
                            linked = linkline['href']
                            site = linkline.findNext(text=True)
                            if site == 'Main Server':
                                links.append({"series": series,
                                              "site":   site,
                                              "year":   year,
                                              "issues": issues,
                                              "size":   size,
                                              "link":   linked})
        else:
            check_extras = soup.findAll("h3")
            for sb in check_extras:
                header = sb.findNext(text=True)
                if header == 'TPBs':
                    nxt = sb.next_sibling
                    if nxt.name == 'ul':
                        bb = nxt.findAll('li')
                        for x in bb:
                            volume = x.findNext(text=True)
                            if u'\u2013' in volume:
                                volume = re.sub(u'\u2013', '-', volume)
                            linkline = x.find('a')
                            linked = linkline['href']
                            site = linkline.findNext(text=True)
                            links.append({"volume": volume,
                                          "site": site,
                                          "link": linked})

        if all([link is None, len(links) == 0]):
            logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
            return {'success':  False}
        if all([link is not None, len(links) == 0]):
            logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
            links = [link]
        elif len(links) > 0:
            if len(links) > 1:
                logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
        cnt = 1
        for x in links:
            if len(links) == 1:
                mod_id = id
            else:
                mod_id = id+'-'+str(cnt)
            #logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'],  x['link']))

            ctrlval = {'id':        mod_id}
            vals = {'series':       x['series'],
                    'year':         x['year'],
                    'size':         x['size'],
                    'issues':       x['issues'],
                    'issueid':      self.issueid,
                    'comicid':      self.comicid,
                    'link':         x['link'],
                    'mainlink':     mainlink,
                    'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
                    'status':       'Queued'}
            myDB.upsert('ddl_info', vals, ctrlval)

            mylar.DDL_QUEUE.put({'link':     x['link'],
                                 'mainlink': mainlink,
                                 'series':   x['series'],
                                 'year':     x['year'],
                                 'size':     x['size'],
                                 'comicid':  self.comicid,
                                 'issueid':  self.issueid,
                                 'id':       mod_id,
                                 'resume':   None})
            cnt+=1

        return {'success': True}
コード例 #31
0
ファイル: getcomics.py プロジェクト: DarkSir23/mylar
    def search_results(self):
        results = {}
        resultlist = []
        soup = BeautifulSoup(open(self.local_filename), 'html.parser')

        resultline = soup.find("span", {"class": "cover-article-count"}).get_text(strip=True)
        logger.info('There are %s results' % re.sub('Articles', '', resultline).strip())

        for f in soup.findAll("article"):
            id = f['id']
            lk = f.find('a')
            link = lk['href']
            titlefind = f.find("h1", {"class": "post-title"})
            title = titlefind.get_text(strip=True)
            title = re.sub(u'\u2013', '-', title).strip()
            filename = title
            issues = None
            pack = False
            #see if it's a pack type
            issfind_st = title.find('#')
            issfind_en = title.find('-', issfind_st)
            if issfind_en != -1:
                if all([title[issfind_en+1] == ' ', title[issfind_en+2].isdigit()]):
                    iss_en = title.find(' ', issfind_en+2)
                    if iss_en != -1:
                        issues = title[issfind_st+1:iss_en]
                        pack = True
                if title[issfind_en+1].isdigit():
                    iss_en = title.find(' ', issfind_en+1)
                    if iss_en != -1:
                        issues = title[issfind_st+1:iss_en]
                        pack = True

            # if it's a pack - remove the issue-range and the possible issue years (cause it most likely will span) and pass thru as separate items
            if pack is True:
                title = re.sub(issues, '', title).strip()
                if title.endswith('#'):
                    title = title[:-1].strip()
            else:
                if any(['Marvel Week+' in title, 'INDIE Week+' in title, 'Image Week' in title, 'DC Week+' in title]):
                    continue

            option_find = f.find("p", {"style": "text-align: center;"})
            i = 0
            while i <= 2:
                option_find = option_find.findNext(text=True)
                if 'Year' in option_find:
                    year = option_find.findNext(text=True)
                    year = re.sub('\|', '', year).strip()
                    if pack is True and '-' in year:
                        title = re.sub('\('+year+'\)', '', title).strip()
                else:
                    size = option_find.findNext(text=True)
                    if all([re.sub(':', '', size).strip() != 'Size', len(re.sub('[^0-9]', '', size).strip()) > 0]):
                        if 'MB' in size:
                            size = re.sub('MB', 'M', size).strip()
                        elif 'GB' in size:
                            size = re.sub('GB', 'G', size).strip()
                        if '//' in size:
                            nwsize = size.find('//')
                            size = re.sub('\[', '', size[:nwsize]).strip()
                    else:
                        size = '0M'
                i+=1
            dateline = f.find('time')
            datefull = dateline['datetime']
            datestamp = time.mktime(time.strptime(datefull, "%Y-%m-%d"))
            resultlist.append({"title":    title,
                               "pubdate":  datetime.datetime.fromtimestamp(float(datestamp)).strftime('%a, %d %b %Y %H:%M:%S'),
                               "filename": filename,
                               "size":     re.sub(' ', '', size).strip(),
                               "pack":     pack,
                               "issues":   issues,
                               "link":     link,
                               "year":     year,
                               "id":       re.sub('post-', '', id).strip(),
                               "site":     'DDL'})

            logger.fdebug('%s [%s]' % (title, size))

        results['entries'] = resultlist
        return results
コード例 #32
0
ファイル: nzbget.py プロジェクト: sankarara/mylar
    def processor(self, nzbinfo):
        nzbid = nzbinfo['NZBID']
        try:
            logger.fdebug(
                'Now checking the active queue of nzbget for the download')
            queueinfo = self.server.listgroups()
        except Expection as e:
            logger.warn(
                'Error attempting to retrieve active queue listing: %s' % e)
            return {'status': False}
        else:
            logger.fdebug('valid queue result returned. Analyzing...')
            queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid]
            if len(queuedl) == 0:
                logger.warn(
                    'Unable to locate item in active queue. Could it be finished already ?'
                )
                return {'status': False}

            stat = False
            while stat is False:
                time.sleep(10)
                queueinfo = self.server.listgroups()
                queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid]
                if len(queuedl) == 0:
                    logger.fdebug(
                        'Item is no longer in active queue. It should be finished by my calculations'
                    )
                    stat = True
                else:
                    logger.fdebug('status: %s' % queuedl[0]['Status'])
                    logger.fdebug('name: %s' % queuedl[0]['NZBName'])
                    logger.fdebug('FileSize: %sMB' % queuedl[0]['FileSizeMB'])
                    logger.fdebug('Download Left: %sMB' %
                                  queuedl[0]['RemainingSizeMB'])
                    logger.fdebug('health: %s' % (queuedl[0]['Health'] / 10))
                    logger.fdebug('destination: %s' % queuedl[0]['DestDir'])
            logger.fdebug('File has now downloaded!')
            time.sleep(
                5
            )  #wait some seconds so shit can get written to history properly
            history = self.server.history()
            found = False
            hq = [
                hs for hs in history
                if hs['NZBID'] == nzbid and 'SUCCESS' in hs['Status']
            ]
            if len(hq) > 0:
                logger.fdebug(
                    'found matching completed item in history. Job has a status of %s'
                    % hq[0]['Status'])
                if hq[0]['DownloadedSizeMB'] == hq[0]['FileSizeMB']:
                    logger.fdebug('%s has final file size of %sMB' %
                                  (hq[0]['Name'], hq[0]['DownloadedSizeMB']))
                    if os.path.isdir(hq[0]['DestDir']):
                        logger.fdebug('location found @ %s' % hq[0]['DestDir'])
                        return {
                            'status': True,
                            'name': re.sub('.nzb', '',
                                           hq[0]['NZBName']).strip(),
                            'location': hq[0]['DestDir'],
                            'failed': False
                        }

                    else:
                        logger.warn(
                            'no file found where it should be @ %s - is there another script that moves things after completion ?'
                            % hq[0]['DestDir'])
                        return {'status': False}
            else:
                logger.warn('Could not find completed item in history')
                return {'status': False}
コード例 #33
0
def GetSeriesYears(dom):
    #used by the 'add a story arc' option to individually populate the Series Year for each series within the given arc.
    #series year is required for alot of functionality.
    series = dom.getElementsByTagName('volume')
    tempseries = {}
    serieslist = []
    for dm in series:
        try:
            totids = len(dm.getElementsByTagName('id'))
            idc = 0
            while (idc < totids):
                if dm.getElementsByTagName(
                        'id')[idc].parentNode.nodeName == 'volume':
                    tempseries['ComicID'] = dm.getElementsByTagName(
                        'id')[idc].firstChild.wholeText
                idc += 1
        except:
            logger.warn(
                'There was a problem retrieving a comicid for a series within the arc. This will have to manually corrected most likely.'
            )
            tempseries['ComicID'] = 'None'

        tempseries['Series'] = 'None'
        tempseries['Publisher'] = 'None'
        try:
            totnames = len(dm.getElementsByTagName('name'))
            namesc = 0
            while (namesc < totnames):
                if dm.getElementsByTagName(
                        'name')[namesc].parentNode.nodeName == 'volume':
                    tempseries['Series'] = dm.getElementsByTagName(
                        'name')[namesc].firstChild.wholeText
                elif dm.getElementsByTagName(
                        'name')[namesc].parentNode.nodeName == 'publisher':
                    tempseries['Publisher'] = dm.getElementsByTagName(
                        'name')[namesc].firstChild.wholeText
                namesc += 1
        except:
            logger.warn(
                'There was a problem retrieving a Series Name or Publisher for a series within the arc. This will have to manually corrected.'
            )

        try:
            tempseries['SeriesYear'] = dm.getElementsByTagName(
                'start_year')[0].firstChild.wholeText
        except:
            logger.warn(
                'There was a problem retrieving the start year for a particular series within the story arc.'
            )
            tempseries['SeriesYear'] = '0000'

        #cause you know, dufus'...
        if tempseries['SeriesYear'][-1:] == '-':
            tempseries['SeriesYear'] = tempseries['SeriesYear'][:-1]

        desdeck = 0
        tempseries['Volume'] = 'None'

        #the description field actually holds the Volume# - so let's grab it
        try:
            descchunk = dm.getElementsByTagName(
                'description')[0].firstChild.wholeText
            comic_desc = drophtml(descchunk)
            desdeck += 1
        except:
            comic_desc = 'None'

        #sometimes the deck has volume labels
        try:
            deckchunk = dm.getElementsByTagName('deck')[0].firstChild.wholeText
            comic_deck = deckchunk
            desdeck += 1
        except:
            comic_deck = 'None'

        while (desdeck > 0):
            if desdeck == 1:
                if comic_desc == 'None':
                    comicDes = comic_deck[:30]
                else:
                    #extract the first 60 characters
                    comicDes = comic_desc[:60].replace('New 52', '')
            elif desdeck == 2:
                #extract the characters from the deck
                comicDes = comic_deck[:30].replace('New 52', '')
            else:
                break

            i = 0
            while (i < 2):
                if 'volume' in comicDes.lower():
                    #found volume - let's grab it.
                    v_find = comicDes.lower().find('volume')
                    #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
                    #increased to 10 to allow for text numbering (+5 max)
                    #sometimes it's volume 5 and ocassionally it's fifth volume.
                    if i == 0:
                        vfind = comicDes[v_find:v_find +
                                         15]  #if it's volume 5 format
                        basenums = {
                            'zero': '0',
                            'one': '1',
                            'two': '2',
                            'three': '3',
                            'four': '4',
                            'five': '5',
                            'six': '6',
                            'seven': '7',
                            'eight': '8',
                            'nine': '9',
                            'ten': '10',
                            'i': '1',
                            'ii': '2',
                            'iii': '3',
                            'iv': '4',
                            'v': '5'
                        }
                        logger.fdebug('volume X format - ' + str(i) + ': ' +
                                      vfind)
                    else:
                        vfind = comicDes[:
                                         v_find]  # if it's fifth volume format
                        basenums = {
                            'zero': '0',
                            'first': '1',
                            'second': '2',
                            'third': '3',
                            'fourth': '4',
                            'fifth': '5',
                            'sixth': '6',
                            'seventh': '7',
                            'eighth': '8',
                            'nineth': '9',
                            'tenth': '10',
                            'i': '1',
                            'ii': '2',
                            'iii': '3',
                            'iv': '4',
                            'v': '5'
                        }
                        logger.fdebug('X volume format - ' + str(i) + ': ' +
                                      vfind)
                    volconv = ''
                    for nums in basenums:
                        if nums in vfind.lower():
                            sconv = basenums[nums]
                            vfind = re.sub(nums, sconv, vfind.lower())
                            break
                    #logger.info('volconv: ' + str(volconv))

                    if i == 0:
                        volthis = vfind.lower().find('volume')
                        volthis = volthis + 6  # add on the actual word to the position so that we can grab the subsequent digit
                        vfind = vfind[volthis:volthis +
                                      4]  # grab the next 4 characters ;)
                    elif i == 1:
                        volthis = vfind.lower().find('volume')
                        vfind = vfind[volthis - 4:
                                      volthis]  # grab the next 4 characters ;)

                    if '(' in vfind:
                        #bracket detected in versioning'
                        vfindit = re.findall('[^()]+', vfind)
                        vfind = vfindit[0]
                    vf = re.findall('[^<>]+', vfind)
                    try:
                        ledigit = re.sub("[^0-9]", "", vf[0])
                        if ledigit != '':
                            tempseries['Volume'] = ledigit
                            logger.fdebug(
                                "Volume information found! Adding to series record : volume "
                                + tempseries['Volume'])
                            break
                    except:
                        pass

                    i += 1
                else:
                    i += 1

            if tempseries['Volume'] == 'None':
                logger.fdebug('tempseries[Volume]:' +
                              str(tempseries['Volume']))
                desdeck -= 1
            else:
                break

        serieslist.append({
            "ComicID": tempseries['ComicID'],
            "ComicName": tempseries['Series'],
            "SeriesYear": tempseries['SeriesYear'],
            "Publisher": tempseries['Publisher'],
            "Volume": tempseries['Volume']
        })

    return serieslist
コード例 #34
0
 def __init__(self):
     t = threading.Thread(target=self.worker, name="DB-WRITER")
     t.daemon = True
     t.start()
     logger.fdebug('Thread WriteOnly initialized.')
コード例 #35
0
def UpdateDates(dom):
    issues = dom.getElementsByTagName('issue')
    tempissue = {}
    issuelist = []
    for dm in issues:
        tempissue['ComicID'] = 'None'
        tempissue['IssueID'] = 'None'
        try:
            totids = len(dm.getElementsByTagName('id'))
            idc = 0
            while (idc < totids):
                if dm.getElementsByTagName(
                        'id')[idc].parentNode.nodeName == 'volume':
                    tempissue['ComicID'] = dm.getElementsByTagName(
                        'id')[idc].firstChild.wholeText
                if dm.getElementsByTagName(
                        'id')[idc].parentNode.nodeName == 'issue':
                    tempissue['IssueID'] = dm.getElementsByTagName(
                        'id')[idc].firstChild.wholeText
                idc += 1
        except:
            logger.warn(
                'There was a problem retrieving a comicid/issueid for the given issue. This will have to manually corrected most likely.'
            )

        tempissue['SeriesTitle'] = 'None'
        tempissue['IssueTitle'] = 'None'
        try:
            totnames = len(dm.getElementsByTagName('name'))
            namesc = 0
            while (namesc < totnames):
                if dm.getElementsByTagName(
                        'name')[namesc].parentNode.nodeName == 'issue':
                    tempissue['IssueTitle'] = dm.getElementsByTagName(
                        'name')[namesc].firstChild.wholeText
                elif dm.getElementsByTagName(
                        'name')[namesc].parentNode.nodeName == 'volume':
                    tempissue['SeriesTitle'] = dm.getElementsByTagName(
                        'name')[namesc].firstChild.wholeText
                namesc += 1
        except:
            logger.warn(
                'There was a problem retrieving the Series Title / Issue Title for a series within the arc. This will have to manually corrected.'
            )

        try:
            tempissue['CoverDate'] = dm.getElementsByTagName(
                'cover_date')[0].firstChild.wholeText
        except:
            tempissue['CoverDate'] = '0000-00-00'
        try:
            tempissue['StoreDate'] = dm.getElementsByTagName(
                'store_date')[0].firstChild.wholeText
        except:
            tempissue['StoreDate'] = '0000-00-00'
        try:
            tempissue['IssueNumber'] = dm.getElementsByTagName(
                'issue_number')[0].firstChild.wholeText
        except:
            logger.fdebug(
                'No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.'
            )
            tempissue['IssueNumber'] = 'None'
        try:
            tempissue['date_last_updated'] = dm.getElementsByTagName(
                'date_last_updated')[0].firstChild.wholeText
        except:
            tempissue['date_last_updated'] = '0000-00-00'

        issuelist.append({
            'ComicID': tempissue['ComicID'],
            'IssueID': tempissue['IssueID'],
            'SeriesTitle': tempissue['SeriesTitle'],
            'IssueTitle': tempissue['IssueTitle'],
            'CoverDate': tempissue['CoverDate'],
            'StoreDate': tempissue['StoreDate'],
            'IssueNumber': tempissue['IssueNumber'],
            'Date_Last_Updated': tempissue['date_last_updated']
        })

    return issuelist
コード例 #36
0
def listFiles(dir, watchcomic, AlternateSearch=None, manual=None):

    # use AlternateSearch to check for filenames that follow that naming pattern
    # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
    # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)

    # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
    u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()
    logger.fdebug('comic: ' + watchcomic)
    basedir = dir
    logger.fdebug('Looking in: ' + dir)
    watchmatch = {}
    comiclist = []
    comiccnt = 0
    not_these = [
        '#', ',', '\/', ':', '\;', '.', '-', '\!', '\$', '\%', '\+', '\'',
        '\?', '\@'
    ]

    issue_exceptions = ['AU', 'AI', 'A', 'B', 'C']

    for item in os.listdir(basedir):
        if item == 'cover.jpg' or item == 'cvinfo': continue
        #print item
        #subname = os.path.join(basedir, item)
        subname = item
        #versioning - remove it
        subsplit = subname.replace('_', ' ').split()
        volrem = None
        for subit in subsplit:
            if subit[0].lower() == 'v':
                vfull = 0
                if subit[1:].isdigit():
                    #if in format v1, v2009 etc...
                    if len(subit) > 3:
                        # if it's greater than 3 in length, then the format is Vyyyy
                        vfull = 1  # add on 1 character length to account for extra space
                    subname = re.sub(subit, '', subname)
                    volrem = subit
                elif subit.lower()[:3] == 'vol':
                    #if in format vol.2013 etc
                    #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
                    logger.fdebug('volume indicator detected as version #:' +
                                  str(subit))
                    subname = re.sub(subit, '', subname)
                    volrem = subit

        #remove the brackets..
        subnm = re.findall('[^()]+', subname)
        if len(subnm):
            logger.fdebug(
                "detected invalid filename - attempting to detect year to continue"
            )
            subname = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3',
                             subname)
            subnm = re.findall('[^()]+', subname)

        subname = subnm[0]
        logger.fdebug('subname no brackets: ' + str(subname))
        subname = re.sub('\_', ' ', subname)
        nonocount = 0
        charpos = 0
        detneg = "no"
        for nono in not_these:
            if nono in subname:
                subcnt = subname.count(nono)
                charpos = indices(
                    subname,
                    nono)  # will return a list of char positions in subname
                #print "charpos: " + str(charpos)
                if nono == '-':
                    i = 0
                    while (i < len(charpos)):
                        for i, j in enumerate(charpos):
                            #print i,j
                            if subname[j + 1:j + 2].isdigit():
                                logger.fdebug(
                                    'possible negative issue detected.')
                                nonocount = nonocount + subcnt - 1
                                detneg = "yes"
                            if '-' in watchcomic and i < len(watchcomic):
                                logger.fdebug('- appears in series title.')
                        i += 1
                    if detneg == "no":
                        subname = re.sub(str(nono), ' ', subname)
                        nonocount = nonocount + subcnt
                #logger.fdebug(str(nono) + " detected " + str(subcnt) + " times.")
                # segment '.' having a . by itself will denote the entire string which we don't want
                elif nono == '.':
                    x = 0
                    fndit = 0
                    dcspace = 0
                    while x < subcnt:
                        fndit = subname.find(nono, fndit)
                        if subname[fndit -
                                   1:fndit].isdigit() and subname[fndit +
                                                                  1:fndit +
                                                                  2].isdigit():
                            logger.fdebug('decimal issue detected.')
                            dcspace += 1
                        x += 1
                    if dcspace == 1:
                        nonocount = nonocount + subcnt + dcspace
                    else:
                        subname = re.sub('\.', ' ', subname)
                        nonocount = nonocount + subcnt - 1  #(remove the extension from the length)
                else:
                    #this is new - if it's a symbol seperated by a space on each side it drags in an extra char.
                    x = 0
                    fndit = 0
                    blspc = 0
                    while x < subcnt:
                        fndit = subname.find(nono, fndit)
                        #print ("space before check: " + str(subname[fndit-1:fndit]))
                        #print ("space after check: " + str(subname[fndit+1:fndit+2]))
                        if subname[fndit -
                                   1:fndit] == ' ' and subname[fndit +
                                                               1:fndit +
                                                               2] == ' ':
                            logger.fdebug(
                                'blankspace detected before and after ' +
                                str(nono))
                            blspc += 1
                        x += 1
                    subname = re.sub(str(nono), ' ', subname)
                    nonocount = nonocount + subcnt + blspc
        #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\'\?\@]', ' ',
                               u_watchcomic)
        detectand = False
        detectthe = False
        modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
        if ' the ' in modwatchcomic.lower():
            modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower())
            logger.fdebug('new modwatchcomic: ' + str(modwatchcomic))
            detectthe = True
        modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
        if '&' in subname:
            subname = re.sub('\&', ' and ', subname)
            detectand = True
        if ' the ' in subname.lower():
            subname = re.sub("\\bthe\\b", "", subname.lower())
            detectthe = True
        subname = re.sub('\s+', ' ', str(subname)).strip()

        AS_Alt = []
        if AlternateSearch is not None:
            chkthealt = AlternateSearch.split('##')
            if chkthealt == 0:
                AS_Alternate = AlternateSearch
            for calt in chkthealt:
                AS_Alternate = re.sub('##', '', calt)
                #same = encode.
                u_altsearchcomic = AS_Alternate.encode('ascii',
                                                       'ignore').strip()
                altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', '',
                                        u_altsearchcomic)
                altsearchcomic = re.sub('\&', ' and ', altsearchcomic)
                altsearchcomic = re.sub('\s+', ' ',
                                        str(altsearchcomic)).strip()
                AS_Alt.append(altsearchcomic)
        else:
            #create random characters so it will never match.
            altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
            AS_Alt.append(altsearchcomic)
        #if '_' in subname:
        #    subname = subname.replace('_', ' ')
        logger.fdebug('watchcomic:' + str(modwatchcomic) +
                      ' ..comparing to found file: ' + str(subname))
        if modwatchcomic.lower() in subname.lower() or any(
                x.lower() in subname.lower()
                for x in AS_Alt):  #altsearchcomic.lower() in subname.lower():
            comicpath = os.path.join(basedir, item)
            logger.fdebug(modwatchcomic + ' - watchlist match on : ' +
                          comicpath)
            comicsize = os.path.getsize(comicpath)
            #print ("Comicsize:" + str(comicsize))
            comiccnt += 1

            stann = 0
            if 'annual' in subname.lower():
                logger.fdebug('Annual detected - proceeding')
                jtd_len = subname.lower().find('annual')
                cchk = modwatchcomic
            else:
                if modwatchcomic.lower() in subname.lower():
                    cchk = modwatchcomic
                else:
                    cchk_ls = [
                        x for x in AS_Alt if x.lower() in subname.lower()
                    ]
                    cchk = cchk_ls[0]
                    #print "something: " + str(cchk)

                logger.fdebug('we should remove ' + str(nonocount) +
                              ' characters')

                findtitlepos = subname.find('-')
                if charpos != 0:
                    logger.fdebug('detected ' + str(len(charpos)) +
                                  ' special characters')
                    i = 0
                    while (i < len(charpos)):
                        for i, j in enumerate(charpos):
                            #print i,j
                            #print subname
                            #print "digitchk: " + str(subname[j:])
                            if j >= len(subname):
                                logger.fdebug(
                                    'end reached. ignoring remainder.')
                                break
                            elif subname[j:] == '-':
                                if i <= len(subname) and subname[i +
                                                                 1].isdigit():
                                    logger.fdebug('negative issue detected.')
                                    #detneg = "yes"
                            elif j > findtitlepos:
                                if subname[j:] == '#':
                                    if subname[i + 1].isdigit():
                                        logger.fdebug(
                                            '# detected denoting issue#, ignoring.'
                                        )
                                    else:
                                        nonocount -= 1
                                elif '-' in watchcomic and i < len(watchcomic):
                                    logger.fdebug(
                                        '- appears in series title, ignoring.')
                                else:
                                    logger.fdebug(
                                        'special character appears outside of title - ignoring @ position: '
                                        + str(charpos[i]))
                                    nonocount -= 1
                        i += 1

            #remove versioning here
            if volrem != None:
                jtd_len = len(
                    cchk
                )  # + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol #
            else:
                jtd_len = len(cchk)  # + nonocount

            logger.fdebug('nonocount [' + str(nonocount) + '] cchk [' + cchk +
                          '] length [' + str(len(cchk)) + ']')

            #if detectand:
            #    jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
            #if detectthe:
            #    jtd_len = jtd_len - 3  # char subsitiution diff between 'the' and '' = 3 chars

            #justthedigits = item[jtd_len:]

            logger.fdebug('final jtd_len to prune [' + str(jtd_len) + ']')
            logger.fdebug('before title removed from FILENAME [' + str(item) +
                          ']')
            logger.fdebug('after title removed from FILENAME [' +
                          str(item[jtd_len:]) + ']')
            logger.fdebug(
                'creating just the digits using SUBNAME, pruning first [' +
                str(jtd_len) + '] chars from [' + subname + ']')

            justthedigits = subname[jtd_len:].strip()

            logger.fdebug('after title removed from SUBNAME [' +
                          justthedigits + ']')

            #remove the title if it appears
            #findtitle = justthedigits.find('-')
            #if findtitle > 0 and detneg == "no":
            #    justthedigits = justthedigits[:findtitle]
            #    logger.fdebug("removed title from name - is now : " + str(justthedigits))

            tmpthedigits = justthedigits
            justthedigits = justthedigits.split(' ', 1)[0]

            #if the issue has an alphanumeric (issue_exceptions, join it and push it through)
            logger.fdebug('JUSTTHEDIGITS [' + justthedigits + ']')
            if justthedigits.isdigit():
                digitsvalid = "true"
            else:
                digitsvalid = "false"

            if justthedigits.lower() == 'annual':
                logger.fdebug('ANNUAL [' + tmpthedigits.split(' ', 1)[1] + ']')
                justthedigits += ' ' + tmpthedigits.split(' ', 1)[1]
                digitsvalid = "true"
            else:

                try:
                    if tmpthedigits.split(' ', 1)[1] is not None:
                        poss_alpha = tmpthedigits.split(' ', 1)[1]
                        for issexcept in issue_exceptions:
                            if issexcept.lower() in poss_alpha.lower(
                            ) and len(poss_alpha) <= len(issexcept):
                                justthedigits += poss_alpha
                                logger.fdebug(
                                    'ALPHANUMERIC EXCEPTION. COMBINING : [' +
                                    justthedigits + ']')
                                digitsvalid = "true"
                                break
                except:
                    pass

            logger.fdebug('final justthedigits [' + justthedigits + ']')
            if digitsvalid == "false":
                logger.fdebug('Issue number not properly detected...ignoring.')
                continue

            if manual is not None:
                #this is needed for Manual Run to determine matches
                #without this Batman will match on Batman Incorporated, and Batman and Robin, etc..
                logger.fdebug('modwatchcomic = ' + modwatchcomic.lower())
                logger.fdebug('subname = ' + subname.lower())
                comyear = manual['SeriesYear']
                issuetotal = manual['Total']
                logger.fdebug('SeriesYear: ' + str(comyear))
                logger.fdebug('IssueTotal: ' + str(issuetotal))

                #set the issue/year threshold here.
                #  2013 - (24issues/12) = 2011.
                minyear = int(comyear) - (int(issuetotal) / 12)

                #subnm defined at being of module.
                len_sm = len(subnm)

                #print ("there are " + str(lenm) + " words.")
                cnt = 0
                yearmatch = "false"

                while (cnt < len_sm):
                    if subnm[cnt] is None: break
                    if subnm[cnt] == ' ':
                        pass
                    else:
                        logger.fdebug(
                            str(cnt) + ". Bracket Word: " + str(subnm[cnt]))

                    if subnm[cnt][:-2] == '19' or subnm[cnt][:-2] == '20':
                        logger.fdebug("year detected: " + str(subnm[cnt]))
                        result_comyear = subnm[cnt]
                        if int(result_comyear) >= int(minyear):
                            logger.fdebug(
                                str(result_comyear) +
                                ' is within the series range of ' +
                                str(minyear) + '-' + str(comyear))
                            yearmatch = "true"
                            break
                        else:
                            logger.fdebug(
                                str(result_comyear) +
                                ' - not right - year not within series range of '
                                + str(minyear) + '-' + str(comyear))
                            yearmatch = "false"
                            break
                    cnt += 1

                if yearmatch == "false": continue

                #tmpitem = item[:jtd_len]
                # if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this.
                substring_removal = None
                poss_alpha = subname.split(' ')[-1:]
                logger.fdebug('poss_alpha: ' + str(poss_alpha))
                logger.fdebug('lenalpha: ' + str(len(''.join(poss_alpha))))
                for issexcept in issue_exceptions:
                    if issexcept.lower() in str(poss_alpha).lower() and len(
                            ''.join(poss_alpha)) <= len(issexcept):
                        #get the last 2 words so that we can remove them cleanly
                        substring_removal = ' '.join(subname.split(' ')[-2:])
                        substring_join = ''.join(subname.split(' ')[-2:])
                        logger.fdebug('substring_removal: ' +
                                      str(substring_removal))
                        logger.fdebug('substring_join: ' + str(substring_join))
                        break

                if substring_removal is not None:
                    sub_removed = subname.replace('_', ' ').replace(
                        substring_removal, substring_join)
                else:
                    sub_removed = subname.replace('_', ' ')
                logger.fdebug('sub_removed: ' + str(sub_removed))
                split_sub = sub_removed.rsplit(' ', 1)[0].split(
                    ' ')  #removes last word (assuming it's the issue#)
                split_mod = modwatchcomic.replace('_', ' ').split()  #batman
                logger.fdebug('split_sub: ' + str(split_sub))
                logger.fdebug('split_mod: ' + str(split_mod))

                x = len(split_sub) - 1
                scnt = 0
                if x > len(split_mod) - 1:
                    logger.fdebug('number of words do not match...aborting.')
                else:
                    while (x > -1):
                        print str(split_mod[x]) + ' comparing to ' + str(
                            split_mod[x])
                        if str(split_sub[x]).lower() == str(
                                split_mod[x]).lower():
                            scnt += 1
                            logger.fdebug('word match exact. ' + str(scnt) +
                                          '/' + str(len(split_mod)))
                        x -= 1

                wordcnt = int(scnt)
                logger.fdebug('scnt:' + str(scnt))
                totalcnt = int(len(split_mod))
                logger.fdebug('split_mod length:' + str(totalcnt))
                try:
                    spercent = (wordcnt / totalcnt) * 100
                except ZeroDivisionError:
                    spercent = 0
                logger.fdebug('we got ' + str(spercent) + ' percent.')
                if int(spercent) >= 80:
                    logger.fdebug("this should be considered an exact match.")
                else:
                    logger.fdebug('failure - not an exact match.')
                    continue

            comiclist.append({
                'ComicFilename': item,
                'ComicLocation': comicpath,
                'ComicSize': comicsize,
                'JusttheDigits': justthedigits
            })
            watchmatch['comiclist'] = comiclist
        else:
            pass
            #print ("directory found - ignoring")
    logger.fdebug('you have a total of ' + str(comiccnt) + ' ' + watchcomic +
                  ' comics')
    watchmatch['comiccount'] = comiccnt
    #print watchmatch
    return watchmatch
コード例 #37
0
ファイル: getcomics.py プロジェクト: DarkSir23/mylar
    def downloadit(self, id, link, mainlink, resume=None):
        if mylar.DDL_LOCK is True:
            logger.fdebug('[DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience.')
            return
        else:
            mylar.DDL_LOCK = True

        myDB = db.DBConnection()
        filename = None
        try:
            with cfscrape.create_scraper() as s:
                if resume is not None:
                    logger.info('[DDL-RESUME] Attempting to resume from: %s bytes' % resume)
                    self.headers['Range'] = 'bytes=%d-' % resume
                cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers)
                t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)

                filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
                if 'GetComics.INFO' in filename:
                    filename = re.sub('GetComics.INFO', '', filename, re.I).strip()

                try:
                    remote_filesize = int(t.headers['Content-length'])
                    logger.fdebug('remote filesize: %s' % remote_filesize)
                except Exception as e:
                    logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
                    logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
                    remote_filesize = 0
                    mylar.DDL_LOCK = False
                    return ({"success":  False,
                            "filename": filename,
                            "path":     None})

                #write the filename to the db for tracking purposes...
                myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})

                path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename)

                if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip':
                    buf = StringIO(t.content)
                    f = gzip.GzipFile(fileobj=buf)

                if resume is not None:
                    with open(path, 'ab') as f:
                        for chunk in t.iter_content(chunk_size=1024):
                            if chunk:
                                f.write(chunk)
                                f.flush()

                else:
                    with open(path, 'wb') as f:
                        for chunk in t.iter_content(chunk_size=1024):
                            if chunk:
                                f.write(chunk)
                                f.flush()

        except Exception as e:
            logger.error('[ERROR] %s' % e)
            mylar.DDL_LOCK = False
            return ({"success":  False,
                     "filename": filename,
                     "path":     None})

        else:
            mylar.DDL_LOCK = False
            if os.path.isfile(path):
                if path.endswith('.zip'):
                    new_path = os.path.join(mylar.CONFIG.DDL_LOCATION, re.sub('.zip', '', filename).strip())
                    logger.info('Zip file detected. Unzipping into new modified path location: %s' % new_path)
                    try:
                        zip_f = zipfile.ZipFile(path, 'r')
                        zip_f.extractall(new_path)
                        zip_f.close()
                    except Exception as e:
                        logger.warn('[ERROR: %s] Unable to extract zip file: %s' % (e, new_path))
                        return ({"success":  False,
                                 "filename": filename,
                                 "path":     None})
                    else:
                        try:
                            os.remove(path)
                        except Exception as e:
                            logger.warn('[ERROR: %s] Unable to remove zip file from %s after extraction.' % (e, path))
                        filename = None
                else:
                    new_path = path
                return ({"success":  True,
                         "filename": filename,
                         "path":     new_path})
コード例 #38
0
ファイル: findcomicfeed.py プロジェクト: mriutta/mylar
def Startit(searchName, searchIssue, searchYear, ComicVersion):
    #searchName = "Uncanny Avengers"
    #searchIssue = "01"
    #searchYear = "2012"
    #clean up searchName due to webparse.
    searchName = searchName.replace("%20", " ")
    if "," in searchName:
        searchName = searchName.replace(",", "")
    logger.fdebug("name:" + str(searchName))
    logger.fdebug("issue:" + str(searchIssue))
    logger.fdebug("year:" + str(searchYear))
    splitSearch = searchName.split(" ")
    joinSearch = "+".join(splitSearch) + "+" + searchIssue
    searchIsOne = "0" + searchIssue
    searchIsTwo = "00" + searchIssue

    if "-" in searchName:
        searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')

    regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')

    if mylar.USE_MINSIZE:
        size_constraints = "minsize=" + str(mylar.MINSIZE)
    else:
        size_constraints = "minsize=10"

    if mylar.USE_MAXSIZE:
        size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE)

    if mylar.USENET_RETENTION != None:
        max_age = "&age=" + str(mylar.USENET_RETENTION)

    feed = feedparser.parse(
        "http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" +
        str(size_constraints) + str(max_age) +
        "&dq=%s&max=50&more=1" % joinSearch)

    totNum = len(feed.entries)

    keyPair = {}
    regList = []
    entries = []
    mres = {}
    countUp = 0

    logger.fdebug(str(totNum) + " results")

    while countUp < totNum:
        urlParse = feed.entries[countUp].enclosures[0]
        #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
        keyPair[feed.entries[countUp].title] = urlParse["href"]

        countUp = countUp + 1

    # thanks to SpammyHagar for spending the time in compiling these regEx's!

    regExTest = ""

    regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                searchYear)
    regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
        regexName, searchIssue, searchYear)

    #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
    regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                   int(searchYear) + 1)
    regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                     int(searchYear) - 1)
    regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
        regexName, searchIssue, int(searchYear) + 1)
    regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
        regexName, searchIssue, int(searchYear) - 1)

    regexList = [regEx, regExOne, regExTwo, regExThree, regExFour, regExFive]

    for title, link in keyPair.items():
        #print("titlesplit: " + str(title.split("\"")))
        splitTitle = title.split("\"")

        for subs in splitTitle:
            #        print(title)
            regExCount = 0
            if len(subs) > 10:
                #Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
                while regExCount < len(regexList):
                    regExTest = re.findall(regexList[regExCount],
                                           subs,
                                           flags=re.IGNORECASE)
                    regExCount = regExCount + 1
                    if regExTest:
                        logger.fdebug(title)
                        entries.append({'title': subs, 'link': str(link)})

    if len(entries) >= 1:
        mres['entries'] = entries
        return mres
#       print("Title: "+regList[0])
#       print("Link: "+keyPair[regList[0]])
    else:
        logger.fdebug("No Results Found")
        return "no results"
コード例 #39
0
ファイル: cv.py プロジェクト: adrianmoisey/mylar
def GetComicInfo(comicid,dom,safechk=None):
    if safechk is None:
        #safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort.
        safechk = 1
    elif safechk > 4:
        logger.error('Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.')
        return
    #comicvine isn't as up-to-date with issue counts..
    #so this can get really buggered, really fast.
    tracks = dom.getElementsByTagName('issue')
    try:
        cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
    except:
        cntit = len(tracks)
    trackcnt = len(tracks)
    logger.fdebug("number of issues I counted: " + str(trackcnt))
    logger.fdebug("number of issues CV says it has: " + str(cntit))
    # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
    if int(trackcnt) != int(cntit):
        cntit = trackcnt
        vari = "yes"
    else: vari = "no"
    logger.fdebug("vari is set to: " + str(vari))
    #if str(trackcnt) != str(int(cntit)+2):
    #    cntit = int(cntit) + 1
    comic = {}
    comicchoice = []
    cntit = int(cntit)
    #retrieve the first xml tag (<tag>data</tag>)
    #that the parser finds with name tagName:
    # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName
    # where [0] denotes the number of the name field(s)
    # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue
    try:
        names = len( dom.getElementsByTagName('name') )
        n = 0
        while ( n < names ):
            if dom.getElementsByTagName('name')[n].parentNode.nodeName == 'results':
                try:
                    comic['ComicName'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
                    comic['ComicName'] = comic['ComicName'].rstrip() 
                except:
                    logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.')
                    return

            elif dom.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher':
                try:
                    comic['ComicPublisher'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
                except:
                    comic['ComicPublisher'] = "Unknown"

            n+=1  
    except:
        logger.warn('Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible')
        return

    try:
        comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText
    except:
        comic['ComicYear'] = '0000'

    try:
        comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[trackcnt].firstChild.wholeText
    except:
        #this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry.
        logger.warn('Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.')
        time.sleep(10)
        safechk +=1
        GetComicInfo(comicid, dom, safechk)
        
    desdeck = 0
    #the description field actually holds the Volume# - so let's grab it
    try:
        descchunk = dom.getElementsByTagName('description')[0].firstChild.wholeText
        comic_desc = drophtml(descchunk)
        desdeck +=1
    except:
        comic_desc = 'None'

    #sometimes the deck has volume labels
    try:
        deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText
        comic_deck = deckchunk
        desdeck +=1
    except:
        comic_deck = 'None'

    #comic['ComicDescription'] = comic_desc

    try:
        comic['Aliases'] = dom.getElementsByTagName('aliases')[0].firstChild.wholeText
        #logger.fdebug('Aliases: ' + str(aliases))
    except:
        comic['Aliases'] = 'None'

    comic['ComicVersion'] = 'noversion'
    #logger.info('comic_desc:' + comic_desc)
    #logger.info('comic_deck:' + comic_deck)
    #logger.info('desdeck: ' + str(desdeck))
    while (desdeck > 0):
        if desdeck == 1:
            if comic_desc == 'None':
                comicDes = comic_deck[:30]
            else:
                #extract the first 60 characters
                comicDes = comic_desc[:60].replace('New 52', '')
        elif desdeck == 2:
            #extract the characters from the deck
            comicDes = comic_deck[:30].replace('New 52', '')
        else:
            break

        i = 0
        while (i < 2):
            if 'volume' in comicDes.lower():
                #found volume - let's grab it.
                v_find = comicDes.lower().find('volume')
                #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
                #increased to 10 to allow for text numbering (+5 max)
                #sometimes it's volume 5 and ocassionally it's fifth volume.
                if i == 0:
                    vfind = comicDes[v_find:v_find+15]   #if it's volume 5 format
                    basenums = {'zero':'0','one':'1','two':'2','three':'3','four':'4','five':'5','six':'6','seven':'7','eight':'8','nine':'9','ten':'10','i':'1','ii':'2','iii':'3','iv':'4','v':'5'}
                    logger.fdebug('volume X format - ' + str(i) + ': ' + vfind)
                else:
                    vfind = comicDes[:v_find]   # if it's fifth volume format
                    basenums = {'zero':'0','first':'1','second':'2','third':'3','fourth':'4','fifth':'5','sixth':'6','seventh':'7','eighth':'8','nineth':'9','tenth':'10','i':'1','ii':'2','iii':'3','iv':'4','v':'5'}
                    logger.fdebug('X volume format - ' + str(i) + ': ' + vfind)
                volconv = ''
                for nums in basenums:
                    if nums in vfind.lower():
                        sconv = basenums[nums]
                        vfind = re.sub(nums, sconv, vfind.lower())
                        break        
                #logger.info('volconv: ' + str(volconv))

                #now we attempt to find the character position after the word 'volume'
                if i == 0:
                    volthis = vfind.lower().find('volume')
                    volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit
                    vfind = vfind[volthis:volthis+4] #grab the next 4 characters ;)
                elif i == 1:
                    volthis = vfind.lower().find('volume')
                    vfind = vfind[volthis-4:volthis] #grab the next 4 characters ;)

                if '(' in vfind:
                    #bracket detected in versioning'
                    vfindit = re.findall('[^()]+', vfind)
                    vfind = vfindit[0]
                vf = re.findall('[^<>]+', vfind)
                ledigit = re.sub("[^0-9]", "", vf[0])
                if ledigit != '':
                    comic['ComicVersion'] = ledigit
                    logger.fdebug("Volume information found! Adding to series record : volume " + comic['ComicVersion'])
                    break
                i+=1
            else:
                i+=1

        if comic['ComicVersion'] == 'noversion':
            logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion']))
            desdeck -=1
        else:
            break

    if vari == "yes": 
        comic['ComicIssues'] = str(cntit)
    else:
        comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText

    comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText
    comic['ComicImageALT'] = dom.getElementsByTagName('small_url')[0].firstChild.wholeText

    comic['FirstIssueID'] = dom.getElementsByTagName('id')[0].firstChild.wholeText

#    print ("fistIss:" + str(comic['FirstIssueID']))
#    comicchoice.append({
#        'ComicName':              comic['ComicName'],
#        'ComicYear':              comic['ComicYear'],
#        'Comicid':                comicid,
#        'ComicURL':               comic['ComicURL'],
#        'ComicIssues':            comic['ComicIssues'],
#        'ComicImage':             comic['ComicImage'],
#        'ComicVolume':            ParseVol,
#        'ComicPublisher':         comic['ComicPublisher']
#        })

#    comic['comicchoice'] = comicchoice
    return comic
コード例 #40
0
ファイル: findcomicfeed.py プロジェクト: vibbix/mylar
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix):
    cName = searchName

    #clean up searchName due to webparse/redudant naming that would return too specific of results.
    commons = ['and', 'the', '&', '-']
    for x in commons:
        cnt = 0
        for m in re.finditer(x, searchName.lower()):
            cnt += 1
            tehstart = m.start()
            tehend = m.end()
            if any([x == 'the', x == 'and']):
                if len(searchName) == tehend:
                    tehend = -1
                if all([tehstart == 0, searchName[tehend] == ' ']) or all([
                        tehstart != 0, searchName[tehstart - 1] == ' ',
                        searchName[tehend] == ' '
                ]):
                    searchName = searchName.replace(x, ' ', cnt)
                else:
                    continue
            else:
                searchName = searchName.replace(x, ' ', cnt)

    searchName = re.sub('\s+', ' ', searchName)
    searchName = re.sub("[\,\:]", "", searchName).strip()
    #logger.fdebug("searchname: %s" % searchName)
    #logger.fdebug("issue: %s" % searchIssue)
    #logger.fdebug("year: %s" % searchYear)
    encodeSearch = urllib.quote_plus(searchName)
    splitSearch = encodeSearch.split(" ")

    if len(searchIssue) == 1:
        loop = 3
    elif len(searchIssue) == 2:
        loop = 2
    else:
        loop = 1

    if "-" in searchName:
        searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')
    regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')

    if mylar.CONFIG.USE_MINSIZE is True:
        minsize = str(mylar.CONFIG.MINSIZE)
    else:
        minsize = '10'
    size_constraints = "&minsize=" + minsize

    if mylar.CONFIG.USE_MAXSIZE is True:
        maxsize = str(mylar.CONFIG.MAXSIZE)
    else:
        maxsize = '0'
    size_constraints += "&maxsize=" + maxsize

    if mylar.CONFIG.USENET_RETENTION is not None:
        max_age = "&maxage=" + str(mylar.CONFIG.USENET_RETENTION)
    else:
        max_age = "&maxage=0"

    feeds = []
    i = 1
    while (i <= loop):
        if i == 1:
            searchmethod = searchIssue
        elif i == 2:
            searchmethod = '0' + searchIssue
        elif i == 3:
            searchmethod = '00' + searchIssue
        else:
            break

        joinSearch = "+".join(splitSearch) + "+" + searchmethod

        logger.fdebug(
            'Now searching experimental for issue number: %s to try and ensure all the bases are covered'
            % searchmethod)

        if mylar.CONFIG.PREFERRED_QUALITY == 1:
            joinSearch = joinSearch + " .cbr"
        elif mylar.CONFIG.PREFERRED_QUALITY == 2:
            joinSearch = joinSearch + " .cbz"

        feeds.append(
            feedparser.parse(
                "http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=85"
                % (joinSearch, max_age, size_constraints)))
        time.sleep(5)
        if mylar.CONFIG.ALTEXPERIMENTAL:
            feeds.append(
                feedparser.parse(
                    "http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=86"
                    % (joinSearch, max_age, size_constraints)))
            time.sleep(5)
        i += 1

    entries = []
    mres = {}
    tallycount = 0

    for feed in feeds:
        totNum = len(feed.entries)
        tallycount += len(feed.entries)

        #keyPair = {}
        keyPair = []
        regList = []
        countUp = 0

        while countUp < totNum:
            urlParse = feed.entries[countUp].enclosures[0]
            #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
            #keyPair[feed.entries[countUp].title] = urlParse["href"]
            keyPair.append({
                "title": feed.entries[countUp].title,
                "link": urlParse["href"],
                "length": urlParse["length"],
                "pubdate": feed.entries[countUp].updated
            })
            countUp = countUp + 1

        # thanks to SpammyHagar for spending the time in compiling these regEx's!

        regExTest = ""

        regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                    searchYear)
        regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, searchYear)

        #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
        regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                       int(searchYear) + 1)
        regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) - 1)
        regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) + 1)
        regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) - 1)

        regexList = [
            regEx, regExOne, regExTwo, regExThree, regExFour, regExFive
        ]

        except_list = [
            'releases', 'gold line', 'distribution', '0-day', '0 day', '0day',
            'o-day'
        ]

        for entry in keyPair:
            title = entry['title']
            #logger.fdebug("titlesplit: " + str(title.split("\"")))
            splitTitle = title.split("\"")
            noYear = 'False'
            _digits = re.compile('\d')

            for subs in splitTitle:
                #logger.fdebug('sub:' + subs)
                regExCount = 0
                if len(subs) >= len(cName) and not any(
                        d in subs.lower() for d in except_list) and bool(
                            _digits.search(subs)) is True:
                    #Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
                    #                while regExCount < len(regexList):
                    #                    regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
                    #                    regExCount = regExCount +1
                    #                    if regExTest:
                    #                        logger.fdebug(title)
                    #                        entries.append({
                    #                                  'title':   subs,
                    #                                  'link':    str(link)
                    #                                  })
                    # this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes'
                    if subs.lower().startswith('for'):
                        if cName.lower().startswith('for'):
                            pass
                        else:
                            #this is the crap we ignore. Continue (commented else, as it spams the logs)
                            #logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.')
                            continue
                    #logger.fdebug('match.')
                    if IssDateFix != "no":
                        if IssDateFix == "01" or IssDateFix == "02":
                            ComicYearFix = str(int(searchYear) - 1)
                        else:
                            ComicYearFix = str(int(searchYear) + 1)
                    else:
                        ComicYearFix = searchYear

                    if searchYear not in subs and ComicYearFix not in subs:
                        noYear = 'True'
                        noYearline = subs

                    if (searchYear in subs
                            or ComicYearFix in subs) and noYear == 'True':
                        #this would occur on the next check in the line, if year exists and
                        #the noYear check in the first check came back valid append it
                        subs = noYearline + ' (' + searchYear + ')'
                        noYear = 'False'

                    if noYear == 'False':

                        entries.append({
                            'title': subs,
                            'link': entry['link'],
                            'pubdate': entry['pubdate'],
                            'length': entry['length']
                        })
                        break  # break out so we don't write more shit.

#    if len(entries) >= 1:
    if tallycount >= 1:
        mres['entries'] = entries
        return mres
    else:
        logger.fdebug("No Results Found")
        return "no results"
コード例 #41
0
def listFiles(dir,
              watchcomic,
              Publisher,
              AlternateSearch=None,
              manual=None,
              sarc=None):

    # use AlternateSearch to check for filenames that follow that naming pattern
    # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
    # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)

    # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
    u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()
    logger.fdebug('[FILECHECKER] comic: ' + watchcomic)
    basedir = dir
    logger.fdebug('[FILECHECKER] Looking in: ' + dir)
    watchmatch = {}
    comiclist = []
    comiccnt = 0
    not_these = [
        '#', ',', '\/', ':', '\;', '.', '-', '!', '\$', '\%', '\+', '\'', '\?',
        '\@'
    ]

    issue_exceptions = ['AU', '.INH', '.NOW', 'AI', 'A', 'B', 'C']

    extensions = ('.cbr', '.cbz')

    for item in os.listdir(basedir):
        if item == 'cover.jpg' or item == 'cvinfo': continue
        if not item.endswith(extensions):
            logger.fdebug(
                '[FILECHECKER] filename not a valid cbr/cbz - ignoring: ' +
                item)
            continue

        #print item
        #subname = os.path.join(basedir, item)
        subname = item

        #versioning - remove it
        subsplit = subname.replace('_', ' ').split()
        volrem = None
        for subit in subsplit:
            if subit[0].lower() == 'v':
                vfull = 0
                if subit[1:].isdigit():
                    #if in format v1, v2009 etc...
                    if len(subit) > 3:
                        # if it's greater than 3 in length, then the format is Vyyyy
                        vfull = 1  # add on 1 character length to account for extra space
                    subname = re.sub(subit, '', subname)
                    volrem = subit
                elif subit.lower()[:3] == 'vol':
                    #if in format vol.2013 etc
                    #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
                    logger.fdebug(
                        '[FILECHECKER] volume indicator detected as version #:'
                        + str(subit))
                    subname = re.sub(subit, '', subname)
                    volrem = subit

        #check if a year is present in series title (ie. spider-man 2099)
        numberinseries = 'False'

        for i in watchcomic.split():
            if ('20' in i or '19' in i):
                if i.isdigit():
                    numberinseries = 'True'
                else:
                    find20 = i.find('20')
                    if find20:
                        stf = i[find20:4].strip()
                    find19 = i.find('19')
                    if find19:
                        stf = i[find19:4].strip()
                    logger.fdebug('[FILECHECKER] stf is : ' + str(stf))
                    if stf.isdigit():
                        numberinseries = 'True'

        logger.fdebug('[FILECHECKER] numberinseries: ' + numberinseries)

        #remove the brackets..
        subnm = re.findall('[^()]+', subname)
        logger.fdebug('[FILECHECKER] subnm len : ' + str(len(subnm)))
        if len(subnm) == 1:
            logger.fdebug(
                '[FILECHECKER] ' + str(len(subnm)) +
                ': detected invalid filename - attempting to detect year to continue'
            )
            #if the series has digits this f's it up.
            if numberinseries == 'True':
                #we need to remove the series from the subname and then search the remainder.
                watchname = re.sub(
                    '[-\:\;\!\'\/\?\+\=\_\%\.]', '',
                    watchcomic)  #remove spec chars for watchcomic match.
                logger.fdebug('[FILECHECKER] watch-cleaned: ' + str(watchname))
                subthis = re.sub('.cbr', '', subname)
                subthis = re.sub('.cbz', '', subthis)
                subthis = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis)
                logger.fdebug('[FILECHECKER] sub-cleaned: ' + str(subthis))
                subthis = subthis[len(watchname):]  #remove watchcomic
                #we need to now check the remainder of the string for digits assuming it's a possible year
                logger.fdebug('[FILECHECKER] new subname: ' + str(subthis))
                subname = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)',
                                 '\\1 (\\2) \\3', subthis)
                subname = watchcomic + subname
                subnm = re.findall('[^()]+', subname)
            else:
                subit = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3',
                               subname)
                subthis2 = re.sub('.cbr', '', subit)
                subthis1 = re.sub('.cbz', '', subthis2)
                subname = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis1)
                subnm = re.findall('[^()]+', subname)
        if Publisher.lower() in subname.lower():
            #if the Publisher is given within the title or filename even (for some reason, some people
            #have this to distinguish different titles), let's remove it entirely.
            lenm = len(subnm)

            cnt = 0
            pub_removed = None

            while (cnt < lenm):
                if subnm[cnt] is None: break
                if subnm[cnt] == ' ':
                    pass
                else:
                    logger.fdebug(
                        str(cnt) + ". Bracket Word: " + str(subnm[cnt]))

                if Publisher.lower() in subnm[cnt].lower() and cnt >= 1:
                    logger.fdebug('Publisher detected within title : ' +
                                  str(subnm[cnt]))
                    logger.fdebug('cnt is : ' + str(cnt) +
                                  ' --- Publisher is: ' + Publisher)
                    pub_removed = subnm[cnt]
                    #-strip publisher if exists here-
                    logger.fdebug('removing publisher from title')
                    subname_pubremoved = re.sub(pub_removed, '', subname)
                    logger.fdebug('pubremoved : ' + str(subname_pubremoved))
                    subname_pubremoved = re.sub(
                        '\(\)', '', subname_pubremoved)  #remove empty brackets
                    subname_pubremoved = re.sub(
                        '\s+', ' ', subname_pubremoved)  #remove spaces > 1
                    logger.fdebug('blank brackets removed: ' +
                                  str(subname_pubremoved))
                    subnm = re.findall('[^()]+', subname_pubremoved)
                    break
                cnt += 1
        subname = subnm[0]

        if len(subnm):
            # if it still has no year (brackets), check setting and either assume no year needed.
            subname = subname
        logger.fdebug('[FILECHECKER] subname no brackets: ' + str(subname))
        subname = re.sub('\_', ' ', subname)
        nonocount = 0
        charpos = 0
        detneg = "no"
        leavehyphen = False
        should_restart = True
        while should_restart:
            should_restart = False
            for nono in not_these:
                if nono in subname:
                    subcnt = subname.count(nono)
                    charpos = indices(
                        subname, nono
                    )  # will return a list of char positions in subname
                    #print "charpos: " + str(charpos)
                    if nono == '-':
                        i = 0
                        while (i < len(charpos)):
                            for i, j in enumerate(charpos):
                                if j + 2 > len(subname):
                                    sublimit = subname[j + 1:]
                                else:
                                    sublimit = subname[j + 1:j + 2]
                                if sublimit.isdigit():
                                    logger.fdebug(
                                        '[FILECHECKER] possible negative issue detected.'
                                    )
                                    nonocount = nonocount + subcnt - 1
                                    detneg = "yes"
                                elif '-' in watchcomic and i < len(watchcomic):
                                    logger.fdebug(
                                        '[FILECHECKER] - appears in series title.'
                                    )
                                    logger.fdebug('[FILECHECKER] up to - :' +
                                                  subname[:j +
                                                          1].replace('-', ' '))
                                    logger.fdebug('[FILECHECKER] after -  :' +
                                                  subname[j + 1:])
                                    subname = subname[:j + 1].replace(
                                        '-', ' ') + subname[j + 1:]
                                    logger.fdebug(
                                        '[FILECHECKER] new subname is : ' +
                                        str(subname))
                                    should_restart = True
                                    leavehyphen = True
                            i += 1
                        if detneg == "no" or leavehyphen == False:
                            subname = re.sub(str(nono), ' ', subname)
                            nonocount = nonocount + subcnt
                #logger.fdebug('[FILECHECKER] (str(nono) + " detected " + str(subcnt) + " times.")
                # segment '.' having a . by itself will denote the entire string which we don't want
                    elif nono == '.':
                        x = 0
                        fndit = 0
                        dcspace = 0
                        while x < subcnt:
                            fndit = subname.find(nono, fndit)
                            if subname[fndit - 1:fndit].isdigit(
                            ) and subname[fndit + 1:fndit + 2].isdigit():
                                logger.fdebug(
                                    '[FILECHECKER] decimal issue detected.')
                                dcspace += 1
                            x += 1
                        if dcspace == 1:
                            nonocount = nonocount + subcnt + dcspace
                        else:
                            subname = re.sub('\.', ' ', subname)
                            nonocount = nonocount + subcnt - 1  #(remove the extension from the length)
                    else:
                        #this is new - if it's a symbol seperated by a space on each side it drags in an extra char.
                        x = 0
                        fndit = 0
                        blspc = 0
                        while x < subcnt:
                            fndit = subname.find(nono, fndit)
                            #print ("space before check: " + str(subname[fndit-1:fndit]))
                            #print ("space after check: " + str(subname[fndit+1:fndit+2]))
                            if subname[fndit -
                                       1:fndit] == ' ' and subname[fndit +
                                                                   1:fndit +
                                                                   2] == ' ':
                                logger.fdebug(
                                    '[FILECHECKER] blankspace detected before and after '
                                    + str(nono))
                                blspc += 1
                            x += 1
                        subname = re.sub(str(nono), ' ', subname)
                        nonocount = nonocount + subcnt + blspc
        #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)

        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\!\$\%\'\?\@\-]', ' ',
                               u_watchcomic)
        #if leavehyphen == False:
        #    logger.fdebug('[FILECHECKER] ('removing hyphen for comparisons')
        #    modwatchcomic = re.sub('-', ' ', modwatchcomic)
        #    subname = re.sub('-', ' ', subname)
        detectand = False
        detectthe = False
        modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
        if ' the ' in modwatchcomic.lower():
            modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower())
            logger.fdebug('[FILECHECKER] new modwatchcomic: ' +
                          str(modwatchcomic))
            detectthe = True
        modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
        if '&' in subname:
            subname = re.sub('\&', ' and ', subname)
            detectand = True
        if ' the ' in subname.lower():
            subname = re.sub("\\bthe\\b", "", subname.lower())
            detectthe = True
        subname = re.sub('\s+', ' ', str(subname)).strip()

        AS_Alt = []
        if AlternateSearch is not None:
            chkthealt = AlternateSearch.split('##')
            if chkthealt == 0:
                AS_Alternate = AlternateSearch
            for calt in chkthealt:
                AS_Alternate = re.sub('##', '', calt)
                #same = encode.
                u_altsearchcomic = AS_Alternate.encode('ascii',
                                                       'ignore').strip()
                altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',
                                        ' ', u_altsearchcomic)
                altsearchcomic = re.sub('\&', ' and ', altsearchcomic)
                altsearchcomic = re.sub('\s+', ' ',
                                        str(altsearchcomic)).strip()
                AS_Alt.append(altsearchcomic)
        else:
            #create random characters so it will never match.
            altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
            AS_Alt.append(altsearchcomic)
        #if '_' in subname:
        #    subname = subname.replace('_', ' ')
        logger.fdebug('[FILECHECKER] watchcomic:' + str(modwatchcomic) +
                      ' ..comparing to found file: ' + str(subname))
        if modwatchcomic.lower() in subname.lower() or any(
                x.lower() in subname.lower()
                for x in AS_Alt):  #altsearchcomic.lower() in subname.lower():
            comicpath = os.path.join(basedir, item)
            logger.fdebug('[FILECHECKER] ' + modwatchcomic +
                          ' - watchlist match on : ' + comicpath)
            comicsize = os.path.getsize(comicpath)
            #print ("Comicsize:" + str(comicsize))
            comiccnt += 1

            stann = 0
            if 'annual' in subname.lower():
                logger.fdebug('[FILECHECKER] Annual detected - proceeding')
                jtd_len = subname.lower().find('annual')
                cchk = modwatchcomic
            else:
                if modwatchcomic.lower() in subname.lower():
                    cchk = modwatchcomic
                else:
                    cchk_ls = [
                        x for x in AS_Alt if x.lower() in subname.lower()
                    ]
                    cchk = cchk_ls[0]
                    #print "something: " + str(cchk)

                logger.fdebug('[FILECHECKER] we should remove ' +
                              str(nonocount) + ' characters')

                findtitlepos = subname.find('-')
                if charpos != 0:
                    logger.fdebug('[FILECHECKER] detected ' +
                                  str(len(charpos)) + ' special characters')
                    i = 0
                    while (i < len(charpos)):
                        for i, j in enumerate(charpos):
                            #print i,j
                            #print subname
                            #print "digitchk: " + str(subname[j:])
                            if j >= len(subname):
                                logger.fdebug(
                                    '[FILECHECKER] end reached. ignoring remainder.'
                                )
                                break
                            elif subname[j:] == '-':
                                if i <= len(subname) and subname[i +
                                                                 1].isdigit():
                                    logger.fdebug(
                                        '[FILECHECKER] negative issue detected.'
                                    )
                                    #detneg = "yes"
                            elif j > findtitlepos:
                                if subname[j:] == '#':
                                    if subname[i + 1].isdigit():
                                        logger.fdebug(
                                            '[FILECHECKER] # detected denoting issue#, ignoring.'
                                        )
                                    else:
                                        nonocount -= 1
                                elif '-' in watchcomic and i < len(watchcomic):
                                    logger.fdebug(
                                        '[FILECHECKER] - appears in series title, ignoring.'
                                    )
                                else:
                                    logger.fdebug(
                                        '[FILECHECKER] special character appears outside of title - ignoring @ position: '
                                        + str(charpos[i]))
                                    nonocount -= 1
                        i += 1

            #remove versioning here
            if volrem != None:
                jtd_len = len(
                    cchk
                )  # + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol #
            else:
                jtd_len = len(cchk)  # + nonocount

            if sarc and mylar.READ2FILENAME:
                removest = subname.find(
                    ' '
                )  # the - gets removed above so we test for the first blank space...
                if subname[:removest].isdigit():
                    jtd_len += removest + 1  # +1 to account for space in place of -
                    logger.fdebug('[FILECHECKER] adjusted jtd_len to : ' +
                                  str(removest) +
                                  ' because of story-arc reading order tags')

            logger.fdebug('[FILECHECKER] nonocount [' + str(nonocount) +
                          '] cchk [' + cchk + '] length [' + str(len(cchk)) +
                          ']')

            #if detectand:
            #    jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
            #if detectthe:
            #    jtd_len = jtd_len - 3  # char subsitiution diff between 'the' and '' = 3 chars

            #justthedigits = item[jtd_len:]

            logger.fdebug('[FILECHECKER] final jtd_len to prune [' +
                          str(jtd_len) + ']')
            logger.fdebug(
                '[FILECHECKER] before title removed from FILENAME [' +
                str(item) + ']')
            logger.fdebug('[FILECHECKER] after title removed from FILENAME [' +
                          str(item[jtd_len:]) + ']')
            logger.fdebug(
                '[FILECHECKER] creating just the digits using SUBNAME, pruning first ['
                + str(jtd_len) + '] chars from [' + subname + ']')

            justthedigits_1 = subname[jtd_len:].strip()

            logger.fdebug('[FILECHECKER] after title removed from SUBNAME [' +
                          justthedigits_1 + ']')

            #remove the title if it appears
            #findtitle = justthedigits.find('-')
            #if findtitle > 0 and detneg == "no":
            #    justthedigits = justthedigits[:findtitle]
            #    logger.fdebug('[FILECHECKER] ("removed title from name - is now : " + str(justthedigits))

            justthedigits = justthedigits_1.split(' ', 1)[0]

            digitsvalid = "false"

            for jdc in list(justthedigits):
                #logger.fdebug('[FILECHECKER] ('jdc:' + str(jdc))
                if not jdc.isdigit():
                    #logger.fdebug('[FILECHECKER] ('alpha')
                    jdc_start = justthedigits.find(jdc)
                    alpha_isschk = justthedigits[jdc_start:]
                    #logger.fdebug('[FILECHECKER] ('alpha_isschk:' + str(alpha_isschk))
                    for issexcept in issue_exceptions:
                        if issexcept.lower() in alpha_isschk.lower(
                        ) and len(alpha_isschk) <= len(issexcept):
                            logger.fdebug(
                                '[FILECHECKER] ALPHANUMERIC EXCEPTION : [' +
                                justthedigits + ']')
                            digitsvalid = "true"
                            break
                if digitsvalid == "true": break

            try:
                tmpthedigits = justthedigits_1.split(' ', 1)[1]
                logger.fdebug(
                    '[FILECHECKER] If the series has a decimal, this should be a number ['
                    + tmpthedigits + ']')
                if 'cbr' in tmpthedigits.lower(
                ) or 'cbz' in tmpthedigits.lower():
                    tmpthedigits = tmpthedigits[:-3].strip()
                    logger.fdebug(
                        '[FILECHECKER] Removed extension - now we should just have a number ['
                        + tmpthedigits + ']')
                poss_alpha = tmpthedigits
                if poss_alpha.isdigit():
                    digitsvalid = "true"
                    if justthedigits.lower() == 'annual':
                        logger.fdebug('[FILECHECKER] ANNUAL DETECTED [' +
                                      poss_alpha + ']')
                        justthedigits += ' ' + poss_alpha
                    else:
                        justthedigits += '.' + poss_alpha
                        logger.fdebug(
                            '[FILECHECKER] DECIMAL ISSUE DETECTED [' +
                            justthedigits + ']')
                else:
                    for issexcept in issue_exceptions:
                        decimalexcept = False
                        if '.' in issexcept:
                            decimalexcept = True
                            issexcept = issexcept[
                                1:]  #remove the '.' from comparison...
                        if issexcept.lower() in poss_alpha.lower(
                        ) and len(poss_alpha) <= len(issexcept):
                            if decimalexcept:
                                issexcept = '.' + issexcept
                            justthedigits += issexcept  #poss_alpha
                            logger.fdebug(
                                '[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : ['
                                + justthedigits + ']')
                            digitsvalid = "true"
                            break
            except:
                tmpthedigits = None

#            justthedigits = justthedigits.split(' ', 1)[0]

#if the issue has an alphanumeric (issue_exceptions, join it and push it through)
            logger.fdebug('[FILECHECKER] JUSTTHEDIGITS [' + justthedigits +
                          ']')
            if digitsvalid == "true":
                pass
            else:
                if justthedigits.isdigit():
                    digitsvalid = "true"
                else:
                    if '.' in justthedigits:
                        tmpdec = justthedigits.find('.')
                        b4dec = justthedigits[:tmpdec]
                        a4dec = justthedigits[tmpdec + 1:]
                        if a4dec.isdigit() and b4dec.isdigit():
                            logger.fdebug(
                                '[FILECHECKER] DECIMAL ISSUE DETECTED')
                            digitsvalid = "true"
                    else:
                        try:
                            x = float(justthedigits)
                            #validity check
                            if x < 0:
                                logger.info(
                                    "I've encountered a negative issue #: " +
                                    str(justthedigits) +
                                    ". Trying to accomodate.")
                                digitsvalid = "true"
                            else:
                                raise ValueError
                        except ValueError, e:
                            logger.info(
                                'Cannot determine issue number from given issue #: '
                                + str(justthedigits))


#                else:
#                    logger.fdebug('[FILECHECKER] NO DECIMALS DETECTED')
#                    digitsvalid = "false"

#            if justthedigits.lower() == 'annual':
#                logger.fdebug('[FILECHECKER] ANNUAL ['  + tmpthedigits.split(' ', 1)[1] + ']')
#                justthedigits += ' ' + tmpthedigits.split(' ', 1)[1]
#                digitsvalid = "true"
#            else:
#                try:
#                    if tmpthedigits.isdigit(): #.split(' ', 1)[1] is not None:
#                        poss_alpha = tmpthedigits#.split(' ', 1)[1]
#                        if poss_alpha.isdigit():
#                            digitsvalid = "true"
#                            justthedigits += '.' + poss_alpha
#                            logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']')
#                        for issexcept in issue_exceptions:
#                            if issexcept.lower() in poss_alpha.lower() and len(poss_alpha) <= len(issexcept):
#                                justthedigits += poss_alpha
#                                logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']')
#                                digitsvalid = "true"
#                                break
#                except:
#                    pass

            logger.fdebug('[FILECHECKER] final justthedigits [' +
                          justthedigits + ']')
            if digitsvalid == "false":
                logger.fdebug(
                    '[FILECHECKER] Issue number not properly detected...ignoring.'
                )
                comiccnt -= 1  # remove the entry from the list count as it was incorrrectly tallied.
                continue

            if manual is not None:
                #this is needed for Manual Run to determine matches
                #without this Batman will match on Batman Incorporated, and Batman and Robin, etc..

                # in case it matches on an Alternate Search pattern, set modwatchcomic to the cchk value
                modwatchcomic = cchk
                logger.fdebug('[FILECHECKER] cchk = ' + cchk.lower())
                logger.fdebug('[FILECHECKER] modwatchcomic = ' +
                              modwatchcomic.lower())
                logger.fdebug('[FILECHECKER] subname = ' + subname.lower())
                comyear = manual['SeriesYear']
                issuetotal = manual['Total']
                comicvolume = manual['ComicVersion']
                logger.fdebug('[FILECHECKER] SeriesYear: ' + str(comyear))
                logger.fdebug('[FILECHECKER] IssueTotal: ' + str(issuetotal))
                logger.fdebug('[FILECHECKER] Comic Volume: ' +
                              str(comicvolume))
                logger.fdebug('[FILECHECKER] volume detected: ' + str(volrem))

                if comicvolume:
                    ComVersChk = re.sub("[^0-9]", "", comicvolume)
                    if ComVersChk == '' or ComVersChk == '1':
                        ComVersChk = 0
                else:
                    ComVersChk = 0

                # even if it's a V1, we need to pull the date for the given issue ID and get the publication year
                # for the issue. Because even if it's a V1, if there are additional Volumes then it's possible that
                # it will take the incorrect series. (ie. Detective Comics (1937) & Detective Comics (2011).
                # If issue #28 (2013) is found, it exists in both series, and because DC 1937 is a V1, it will bypass
                # the year check which will result in the incorrect series being picked (1937)

                #set the issue/year threshold here.
                #  2013 - (24issues/12) = 2011.
                #minyear = int(comyear) - (int(issuetotal) / 12)

                maxyear = manual['LatestDate'][:4]  # yyyy-mm-dd

                #subnm defined at being of module.
                len_sm = len(subnm)

                #print ("there are " + str(lenm) + " words.")
                cnt = 0
                yearmatch = "none"
                vers4year = "no"
                vers4vol = "no"

                for ct in subsplit:
                    if ct.lower().startswith('v') and ct[1:].isdigit():
                        logger.fdebug(
                            '[FILECHECKER] possible versioning..checking')
                        #we hit a versioning # - account for it
                        if ct[1:].isdigit():
                            if len(ct[1:]) == 4:  #v2013
                                logger.fdebug(
                                    '[FILECHECKER] Version detected as ' +
                                    str(ct))
                                vers4year = "yes"  #re.sub("[^0-9]", " ", str(ct)) #remove the v
                                break
                            else:
                                if len(ct) < 4:
                                    logger.fdebug(
                                        '[FILECHECKER] Version detected as ' +
                                        str(ct))
                                    vers4vol = str(ct)
                                    break
                        logger.fdebug(
                            '[FILECHECKER] false version detection..ignoring.')

                versionmatch = "false"
                if vers4year is not "no" or vers4vol is not "no":

                    if comicvolume:  #is not "None" and comicvolume is not None:
                        D_ComicVersion = re.sub("[^0-9]", "", comicvolume)
                        if D_ComicVersion == '':
                            D_ComicVersion = 0
                    else:
                        D_ComicVersion = 0

                    F_ComicVersion = re.sub("[^0-9]", "", volrem)
                    S_ComicVersion = str(comyear)
                    logger.fdebug('[FILECHECKER] FCVersion: ' +
                                  str(F_ComicVersion))
                    logger.fdebug('[FILECHECKER] DCVersion: ' +
                                  str(D_ComicVersion))
                    logger.fdebug('[FILECHECKER] SCVersion: ' +
                                  str(S_ComicVersion))

                    #if annualize == "true" and int(ComicYear) == int(F_ComicVersion):
                    #    logger.fdebug('[FILECHECKER] ("We matched on versions for annuals " + str(volrem))

                    if int(F_ComicVersion) == int(D_ComicVersion) or int(
                            F_ComicVersion) == int(S_ComicVersion):
                        logger.fdebug(
                            '[FILECHECKER] We matched on versions...' +
                            str(volrem))
                        versionmatch = "true"
                    else:
                        logger.fdebug(
                            '[FILECHECKER] Versions wrong. Ignoring possible match.'
                        )

                #else:
                while (cnt < len_sm):
                    if subnm[cnt] is None: break
                    if subnm[cnt] == ' ':
                        pass
                    else:
                        logger.fdebug('[FILECHECKER] ' + str(cnt) +
                                      ' Bracket Word: ' + str(subnm[cnt]))

                        #if ComVersChk == 0:
                        #    logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check')
                        #    yearmatch = "true"
                        #    break
                    if subnm[cnt][:-2] == '19' or subnm[cnt][:-2] == '20':
                        logger.fdebug('[FILECHECKER] year detected: ' +
                                      str(subnm[cnt]))
                        result_comyear = subnm[cnt]
                        if int(result_comyear) <= int(maxyear):
                            logger.fdebug('[FILECHECKER] ' +
                                          str(result_comyear) +
                                          ' is within the series range of ' +
                                          str(comyear) + '-' + str(maxyear))
                            #still possible for incorrect match if multiple reboots of series end/start in same year
                            yearmatch = "true"
                            break
                        else:
                            logger.fdebug(
                                '[FILECHECKER] ' + str(result_comyear) +
                                ' - not right - year not within series range of '
                                + str(comyear) + '-' + str(maxyear))
                            yearmatch = "false"
                            break
                    cnt += 1
                if versionmatch == "false":
                    if yearmatch == "false":
                        logger.fdebug(
                            '[FILECHECKER] Failed to match on both version and issue year.'
                        )
                        continue
                    else:
                        logger.fdebug(
                            '[FILECHECKER] Matched on versions, not on year - continuing.'
                        )
                else:
                    if yearmatch == "false":
                        logger.fdebug(
                            '[FILECHECKER] Matched on version, but not on year - continuing.'
                        )
                    else:
                        logger.fdebug(
                            '[FILECHECKER] Matched on both version, and issue year - continuing.'
                        )

                if yearmatch == "none":
                    if ComVersChk == 0:
                        logger.fdebug(
                            '[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check.'
                        )
                        yearmatch = "true"
                    else:
                        continue

                if 'annual' in subname.lower():
                    subname = re.sub('annual', '', subname.lower())
                    subname = re.sub('\s+', ' ', subname)

                #tmpitem = item[:jtd_len]
                # if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this.
                substring_removal = None
                poss_alpha = subname.split(' ')[-1:]
                logger.fdebug('[FILECHECKER] poss_alpha: ' + str(poss_alpha))
                logger.fdebug('[FILECHECKER] lenalpha: ' +
                              str(len(''.join(poss_alpha))))
                for issexcept in issue_exceptions:
                    if issexcept.lower() in str(poss_alpha).lower() and len(
                            ''.join(poss_alpha)) <= len(issexcept):
                        #get the last 2 words so that we can remove them cleanly
                        substring_removal = ' '.join(subname.split(' ')[-2:])
                        substring_join = ''.join(subname.split(' ')[-2:])
                        logger.fdebug('[FILECHECKER] substring_removal: ' +
                                      str(substring_removal))
                        logger.fdebug('[FILECHECKER] substring_join: ' +
                                      str(substring_join))
                        break

                if substring_removal is not None:
                    sub_removed = subname.replace('_', ' ').replace(
                        substring_removal, substring_join)
                else:
                    sub_removed = subname.replace('_', ' ')
                logger.fdebug('[FILECHECKER] sub_removed: ' + str(sub_removed))
                split_sub = sub_removed.rsplit(' ', 1)[0].split(
                    ' ')  #removes last word (assuming it's the issue#)
                split_mod = modwatchcomic.replace('_', ' ').split()  #batman
                logger.fdebug('[FILECHECKER] split_sub: ' + str(split_sub))
                logger.fdebug('[FILECHECKER] split_mod: ' + str(split_mod))

                x = len(split_sub) - 1
                scnt = 0
                if x > len(split_mod) - 1:
                    logger.fdebug(
                        '[FILECHECKER] number of words do not match...aborting.'
                    )
                else:
                    while (x > -1):
                        print str(split_sub[x]) + ' comparing to ' + str(
                            split_mod[x])
                        if str(split_sub[x]).lower() == str(
                                split_mod[x]).lower():
                            scnt += 1
                            logger.fdebug('[FILECHECKER] word match exact. ' +
                                          str(scnt) + '/' +
                                          str(len(split_mod)))
                        x -= 1

                wordcnt = int(scnt)
                logger.fdebug('[FILECHECKER] scnt:' + str(scnt))
                totalcnt = int(len(split_mod))
                logger.fdebug('[FILECHECKER] split_mod length:' +
                              str(totalcnt))
                try:
                    spercent = (wordcnt / totalcnt) * 100
                except ZeroDivisionError:
                    spercent = 0
                logger.fdebug('[FILECHECKER] we got ' + str(spercent) +
                              ' percent.')
                if int(spercent) >= 80:
                    logger.fdebug(
                        '[FILECHECKER] this should be considered an exact match.Justthedigits:'
                        + justthedigits)
                else:
                    logger.fdebug(
                        '[FILECHECKER] failure - not an exact match.')
                    continue

            if manual:
                print item
                print comicpath
                print comicsize
                print result_comyear
                print justthedigits
                comiclist.append({
                    'ComicFilename': item,
                    'ComicLocation': comicpath,
                    'ComicSize': comicsize,
                    'ComicYear': result_comyear,
                    'JusttheDigits': justthedigits
                })
                print('appended.')
            else:
                comiclist.append({
                    'ComicFilename': item,
                    'ComicLocation': comicpath,
                    'ComicSize': comicsize,
                    'JusttheDigits': justthedigits
                })
            watchmatch['comiclist'] = comiclist
        else:
            pass
コード例 #42
0
ファイル: cv.py プロジェクト: ChapeLu/mylar
def getComic(comicid, type, issueid=None, arc=None, arcid=None, arclist=None, comicidlist=None):
    if type == 'issue':
        offset = 1
        issue = {}
        ndic = []
        issuechoice = []
        comicResults = []
        firstdate = '2099-00-00'
        #let's find out how many results we get from the query...
        if comicid is None:
            #if comicid is None, it's coming from the story arc search results.
            id = arcid
            #since the arclist holds the issueids, and the pertinent reading order - we need to strip out the reading order so this works.
            aclist = ''
            for ac in arclist.split('|'):
                aclist += ac[:ac.find(',')] + '|'
            if aclist.endswith('|'):
                aclist = aclist[:-1]
            islist = aclist
        else:
            id = comicid
            islist = None
        searched = pulldetails(id, 'issue', None, 0, islist)
        if searched is None:
            return False
        totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
        logger.fdebug("there are " + str(totalResults) + " search results...")
        if not totalResults:
            return False
        countResults = 0
        while (countResults < int(totalResults)):
            logger.fdebug("querying range from " + str(countResults) + " to " + str(countResults + 100))
            if countResults > 0:
                #new api - have to change to page # instead of offset count
                offsetcount = countResults
                searched = pulldetails(id, 'issue', None, offsetcount, islist)
            issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid)
            if tmpdate < firstdate:
                firstdate = tmpdate
            ndic = ndic + issuechoice
            #search results are limited to 100 and by pagination now...let's account for this.
            countResults = countResults + 100

        issue['issuechoice'] = ndic
        issue['firstdate'] = firstdate
        return issue

    elif type == 'comic':
        dom = pulldetails(comicid, 'comic', None, 1)
        return GetComicInfo(comicid, dom)
    elif type == 'firstissue':
        dom = pulldetails(comicid, 'firstissue', issueid, 1)
        return GetFirstIssue(issueid, dom)
    elif type == 'storyarc':
        dom = pulldetails(arc, 'storyarc', None, 1)
        return GetComicInfo(issueid, dom)
    elif type == 'comicyears':
        #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year.
        #this grabs each issue based on issueid, and then subsets the comicid for each to be used later.
        #set the offset to 0, since we're doing a filter.
        dom = pulldetails(arcid, 'comicyears', offset=0, comicidlist=comicidlist)
        return GetSeriesYears(dom)
    elif type == 'import':
        #used by the importer when doing a scan with metatagging enabled. If metatagging comes back true, then there's an IssueID present
        #within the tagging (with CT). This compiles all of the IssueID's during a scan (in 100's), and returns the corresponding CV data
        #related to the given IssueID's - namely ComicID, Name, Volume (more at some point, but those are the important ones).
        offset = 1
        if len(comicidlist) <= 100:
            endcnt = len(comicidlist)
        else:
            endcnt = 100

        id_count = 0
        import_list = []
        logger.fdebug('comicidlist:' + str(comicidlist))

        while id_count < len(comicidlist):
            #break it up by 100 per api hit
            #do the first 100 regardless
            in_cnt = 0
            for i in range(id_count, endcnt):
                if in_cnt == 0:
                    tmpidlist = str(comicidlist[i])
                else:
                    tmpidlist += '|' + str(comicidlist[i])
                in_cnt +=1
            logger.info('tmpidlist: ' + str(tmpidlist))

            searched = pulldetails(None, 'import', offset=0, comicidlist=tmpidlist)

            if searched is None:
                break
            else:
                tGIL = GetImportList(searched)
                import_list += tGIL

            endcnt +=100
            id_count +=100

        return import_list
コード例 #43
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def rename_param(comicid, comicname, issue, ofilename, comicyear=None, issueid=None, annualize=None):
            import db, logger
            myDB = db.DBConnection()
            logger.fdebug('comicid: ' + str(comicid))
            logger.fdebug('issue#: ' + str(issue))
            # the issue here is a non-decimalized version, we need to see if it's got a decimal and if not, add '.00'
#            iss_find = issue.find('.')
#            if iss_find < 0:
#                # no decimal in issue number
#                iss = str(int(issue)) + ".00"
#            else:
#                iss_b4dec = issue[:iss_find]
#                iss_decval = issue[iss_find+1:]
#                if len(str(int(iss_decval))) == 1:
#                    iss = str(int(iss_b4dec)) + "." + str(int(iss_decval)*10)
#                else:
#                    if issue.endswith(".00"):
#                        iss = issue
#                    else:
#                        iss = str(int(iss_b4dec)) + "." + iss_decval
#            issue = iss

#            print ("converted issue#: " + str(issue))
            logger.fdebug('issueid:' + str(issueid))

            if issueid is None:
                logger.fdebug('annualize is ' + str(annualize))
                if annualize is None:
                    chkissue = myDB.selectone("SELECT * from issues WHERE ComicID=? AND Issue_Number=?", [comicid, issue]).fetchone()
                else:
                    chkissue = myDB.selectone("SELECT * from annuals WHERE ComicID=? AND Issue_Number=?", [comicid, issue]).fetchone()

                if chkissue is None:
                    #rechk chkissue against int value of issue #
                    chkissue = myDB.selectone("SELECT * from issues WHERE ComicID=? AND Int_IssueNumber=?", [comicid, issuedigits(issue)]).fetchone()
                    if chkissue is None:
                        if chkissue is None:
                            logger.error('Invalid Issue_Number - please validate.')
                            return
                    else:
                        logger.info('Int Issue_number compare found. continuing...')
                        issueid = chkissue['IssueID']                       
                else:
                    issueid = chkissue['IssueID']

            #use issueid to get publisher, series, year, issue number
            logger.fdebug('issueid is now : ' + str(issueid))
            issuenzb = myDB.selectone("SELECT * from issues WHERE ComicID=? AND IssueID=?", [comicid,issueid]).fetchone()
            if issuenzb is None:
                logger.fdebug('not an issue, checking against annuals')
                issuenzb = myDB.selectone("SELECT * from annuals WHERE ComicID=? AND IssueID=?", [comicid,issueid]).fetchone()
                if issuenzb is None:
                    logger.fdebug('Unable to rename - cannot locate issue id within db')
                    return
                else:
                    annualize = True
            #comicid = issuenzb['ComicID']
            issuenum = issuenzb['Issue_Number']
            #issueno = str(issuenum).split('.')[0]
            issue_except = 'None'
            if 'au' in issuenum.lower():
                issuenum = re.sub("[^0-9]", "", issuenum)
                issue_except = ' AU'
            if '.' in issuenum:
                iss_find = issuenum.find('.')
                iss_b4dec = issuenum[:iss_find]
                iss_decval = issuenum[iss_find+1:]
                if int(iss_decval) == 0:
                    iss = iss_b4dec
                    issdec = int(iss_decval)
                    issueno = str(iss)
                    logger.fdebug('Issue Number: ' + str(issueno))
                else:
                    if len(iss_decval) == 1:
                        iss = iss_b4dec + "." + iss_decval
                        issdec = int(iss_decval) * 10
                    else:
                        iss = iss_b4dec + "." + iss_decval.rstrip('0')
                        issdec = int(iss_decval.rstrip('0')) * 10
                    issueno = iss_b4dec
                    logger.fdebug('Issue Number: ' + str(iss))
            else:
                iss = issuenum
                issueno = str(iss)
            logger.fdebug('iss:' + str(iss))
            logger.fdebug('issueno:' + str(issueno))
            # issue zero-suppression here
            if mylar.ZERO_LEVEL == "0":
                zeroadd = ""
            else:
                if mylar.ZERO_LEVEL_N  == "none": zeroadd = ""
                elif mylar.ZERO_LEVEL_N == "0x": zeroadd = "0"
                elif mylar.ZERO_LEVEL_N == "00x": zeroadd = "00"

            logger.fdebug('Zero Suppression set to : ' + str(mylar.ZERO_LEVEL_N))

            if str(len(issueno)) > 1:
                if int(issueno) < 0:
                    self._log("issue detected is a negative")
                    prettycomiss = '-' + str(zeroadd) + str(abs(issueno))
                elif int(issueno) < 10:
                    logger.fdebug('issue detected less than 10')
                    if '.' in iss:
                        if int(iss_decval) > 0:
                            issueno = str(iss)
                            prettycomiss = str(zeroadd) + str(iss)
                        else:
                            prettycomiss = str(zeroadd) + str(int(issueno))
                    else:
                        prettycomiss = str(zeroadd) + str(iss)
                    if issue_except != 'None':
                        prettycomiss = str(prettycomiss) + issue_except
                    logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '. Issue will be set as : ' + str(prettycomiss))
                elif int(issueno) >= 10 and int(issueno) < 100:
                    logger.fdebug('issue detected greater than 10, but less than 100')
                    if mylar.ZERO_LEVEL_N == "none":
                        zeroadd = ""
                    else:
                        zeroadd = "0"
                    if '.' in iss:
                        if int(iss_decval) > 0:
                            issueno = str(iss)
                            prettycomiss = str(zeroadd) + str(iss)
                        else:
                           prettycomiss = str(zeroadd) + str(int(issueno))
                    else:
                        prettycomiss = str(zeroadd) + str(iss)
                    if issue_except != 'None':
                        prettycomiss = str(prettycomiss) + issue_except
                    logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '.Issue will be set as : ' + str(prettycomiss))
                else:
                    logger.fdebug('issue detected greater than 100')
                    if '.' in iss:
                        if int(iss_decval) > 0:
                            issueno = str(iss)
                    prettycomiss = str(issueno)
                    if issue_except != 'None':
                        prettycomiss = str(prettycomiss) + issue_except
                    logger.fdebug('Zero level supplement set to ' + str(mylar.ZERO_LEVEL_N) + '. Issue will be set as : ' + str(prettycomiss))
            else:
                prettycomiss = str(issueno)
                logger.fdebug('issue length error - cannot determine length. Defaulting to None:  ' + str(prettycomiss))

            logger.fdebug('Pretty Comic Issue is : ' + str(prettycomiss))
            issueyear = issuenzb['IssueDate'][:4]
            month = issuenzb['IssueDate'][5:7].replace('-','').strip()
            month_name = fullmonth(month)
            logger.fdebug('Issue Year : ' + str(issueyear))
            comicnzb= myDB.selectone("SELECT * from comics WHERE comicid=?", [comicid]).fetchone()
            publisher = comicnzb['ComicPublisher']
            logger.fdebug('Publisher: ' + str(publisher))
            series = comicnzb['ComicName']
            logger.fdebug('Series: ' + str(series))
            seriesyear = comicnzb['ComicYear']
            logger.fdebug('Year: '  + str(seriesyear))
            comlocation = comicnzb['ComicLocation']
            logger.fdebug('Comic Location: ' + str(comlocation))
            comversion = comicnzb['ComicVersion']
            if comversion is None:
                comversion = 'None'
            #if comversion is None, remove it so it doesn't populate with 'None'
            if comversion == 'None':
                chunk_f_f = re.sub('\$VolumeN','',mylar.FILE_FORMAT)
                chunk_f = re.compile(r'\s+')
                chunk_file_format = chunk_f.sub(' ', chunk_f_f)
                logger.fdebug('No version # found for series, removing from filename')
                logger.fdebug("new format: " + str(chunk_file_format))
            else:
                chunk_file_format = mylar.FILE_FORMAT

            if annualize is None:
                chunk_f_f = re.sub('\$Annual','',chunk_file_format)
                chunk_f = re.compile(r'\s+')
                chunk_file_format = chunk_f.sub(' ', chunk_f_f)
                logger.fdebug('not an annual - removing from filename paramaters')
                logger.fdebug('new format: ' + str(chunk_file_format))

            else:
                logger.fdebug('chunk_file_format is: ' + str(chunk_file_format))
                if mylar.ANNUALS_ON:
                    if 'annual' in series.lower():
                        if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower():
                        #if it's an annual, but $annual isn't specified in file_format, we need to
                        #force it in there, by default in the format of $Annual $Issue
                            #prettycomiss = "Annual " + str(prettycomiss)
                            logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss))
                        else:
                            #because it exists within title, strip it then use formatting tag for placement of wording.
                            chunk_f_f = re.sub('\$Annual','',chunk_file_format)
                            chunk_f = re.compile(r'\s+')
                            chunk_file_format = chunk_f.sub(' ', chunk_f_f)
                            logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss))
                    else:
                        if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower():
                        #if it's an annual, but $annual isn't specified in file_format, we need to
                        #force it in there, by default in the format of $Annual $Issue
                            prettycomiss = "Annual " + str(prettycomiss)
                            logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL NOT IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss))
                        else:
                            logger.fdebug('[' + series + '][ANNUALS-ON][ANNUAL NOT IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss))

                else:
                    #if annuals aren't enabled, then annuals are being tracked as independent series.
                    #annualize will be true since it's an annual in the seriesname.
                    if 'annual' in series.lower():
                        if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower():
                        #if it's an annual, but $annual isn't specified in file_format, we need to
                        #force it in there, by default in the format of $Annual $Issue
                            #prettycomiss = "Annual " + str(prettycomiss)
                            logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss))
                        else:
                            #because it exists within title, strip it then use formatting tag for placement of wording.
                            chunk_f_f = re.sub('\$Annual','',chunk_file_format)
                            chunk_f = re.compile(r'\s+')
                            chunk_file_format = chunk_f.sub(' ', chunk_f_f)
                            logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss))
                    else:
                        if '$Annual' not in chunk_file_format: # and 'annual' not in ofilename.lower():
                            #if it's an annual, but $annual isn't specified in file_format, we need to
                            #force it in there, by default in the format of $Annual $Issue
                            prettycomiss = "Annual " + str(prettycomiss)
                            logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL NOT IN SERIES][NOT $ANNUAL] prettycomiss: ' + str(prettycomiss))
                        else:
                            logger.fdebug('[' + series + '][ANNUALS-OFF][ANNUAL NOT IN SERIES][$ANNUAL] prettycomiss: ' + str(prettycomiss))


                    logger.fdebug('Annual detected within series title of ' + series + '. Not auto-correcting issue #')

            series = series.encode('ascii', 'ignore').strip()
            filebad = [':',',','/','?','!','\''] #in u_comicname or '/' in u_comicname or ',' in u_comicname or '?' in u_comicname:
            for dbd in filebad:
                if dbd in series:
                    if dbd == '/': repthechar = '-'
                    else: repthechar = ''
                    series = series.replace(dbd,repthechar)
                    logger.fdebug('Altering series name due to filenaming restrictions: ' + series)
           
            publisher = re.sub('!','', publisher)

            file_values = {'$Series':    series,
                           '$Issue':     prettycomiss,
                           '$Year':      issueyear,
                           '$series':    series.lower(),
                           '$Publisher': publisher,
                           '$publisher': publisher.lower(),
                           '$VolumeY':   'V' + str(seriesyear),
                           '$VolumeN':   comversion,
                           '$monthname': month_name,
                           '$month':     month,
                           '$Annual':    'Annual'
                          }

            extensions = ('.cbr', '.cbz')

            if ofilename.lower().endswith(extensions):
                path, ext = os.path.splitext(ofilename)

            if mylar.FILE_FORMAT == '':
                logger.fdebug('Rename Files is not enabled - keeping original filename.')
                #check if extension is in nzb_name - will screw up otherwise
                if ofilename.lower().endswith(extensions):
                    nfilename = ofilename[:-4]
                else:
                    nfilename = ofilename
            else:
                nfilename = replace_all(chunk_file_format, file_values)
                if mylar.REPLACE_SPACES:
                    #mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot
                    nfilename = nfilename.replace(' ', mylar.REPLACE_CHAR)

            nfilename = re.sub('[\,\:]', '', nfilename) + ext.lower()
            logger.fdebug('New Filename: ' + str(nfilename))

            if mylar.LOWERCASE_FILENAMES:
                dst = os.path.join(comlocation, nfilename.lower())
            else:
                dst = os.path.join(comlocation, nfilename)

            logger.fdebug('Source: ' + str(ofilename))
            logger.fdebug('Destination: ' + str(dst))

            rename_this = { "destination_dir" : dst, 
                            "nfilename" : nfilename,
                            "issueid" : issueid,
                            "comicid" : comicid }

            return rename_this
コード例 #44
0
def GetComicInfo(comicid, dom):

    #comicvine isn't as up-to-date with issue counts..
    #so this can get really buggered, really fast.
    tracks = dom.getElementsByTagName('issue')
    try:
        cntit = dom.getElementsByTagName(
            'count_of_issues')[0].firstChild.wholeText
    except:
        cntit = len(tracks)
    trackcnt = len(tracks)
    logger.fdebug("number of issues I counted: " + str(trackcnt))
    logger.fdebug("number of issues CV says it has: " + str(cntit))
    # if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
    if int(trackcnt) != int(cntit):
        cntit = trackcnt
        vari = "yes"
    else:
        vari = "no"
    logger.fdebug("vari is set to: " + str(vari))
    #if str(trackcnt) != str(int(cntit)+2):
    #    cntit = int(cntit) + 1
    comic = {}
    comicchoice = []
    cntit = int(cntit)
    #retrieve the first xml tag (<tag>data</tag>)
    #that the parser finds with name tagName:
    # to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName
    # where [0] denotes the number of the name field(s)
    # where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue
    try:
        names = len(dom.getElementsByTagName('name'))
        n = 0
        while (n < names):
            if dom.getElementsByTagName(
                    'name')[n].parentNode.nodeName == 'results':
                try:
                    comic['ComicName'] = dom.getElementsByTagName(
                        'name')[n].firstChild.wholeText
                    comic['ComicName'] = comic['ComicName'].rstrip()
                except:
                    logger.error(
                        'There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.'
                    )
                    return

            elif dom.getElementsByTagName(
                    'name')[n].parentNode.nodeName == 'publisher':
                try:
                    comic['ComicPublisher'] = dom.getElementsByTagName(
                        'name')[n].firstChild.wholeText
                except:
                    comic['ComicPublisher'] = "Unknown"

            n += 1
    except:
        logger.warn(
            'Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible'
        )
        return

    try:
        comic['ComicYear'] = dom.getElementsByTagName(
            'start_year')[0].firstChild.wholeText
    except:
        comic['ComicYear'] = '0000'
    comic['ComicURL'] = dom.getElementsByTagName(
        'site_detail_url')[trackcnt].firstChild.wholeText

    desdeck = 0
    #the description field actually holds the Volume# - so let's grab it
    try:
        descchunk = dom.getElementsByTagName(
            'description')[0].firstChild.wholeText
        comic_desc = drophtml(descchunk)
        desdeck += 1
    except:
        comic_desc = 'None'

    #sometimes the deck has volume labels
    try:
        deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText
        comic_deck = deckchunk
        desdeck += 1
    except:
        comic_deck = 'None'

    try:
        comic['Aliases'] = dom.getElementsByTagName(
            'aliases')[0].firstChild.wholeText
        #logger.fdebug('Aliases: ' + str(aliases))
    except:
        comic['Aliases'] = 'None'

    comic['ComicVersion'] = 'noversion'
    #logger.info('comic_desc:' + comic_desc)
    #logger.info('comic_deck:' + comic_deck)
    #logger.info('desdeck: ' + str(desdeck))
    while (desdeck > 0):
        if desdeck == 1:
            if comic_desc == 'None':
                comicDes = comic_deck[:30]
            else:
                #extract the first 60 characters
                comicDes = comic_desc[:60].replace('New 52', '')
        elif desdeck == 2:
            #extract the characters from the deck
            comicDes = comic_deck[:30].replace('New 52', '')
        else:
            break

        i = 0
        while (i < 2):
            if 'volume' in comicDes.lower():
                #found volume - let's grab it.
                v_find = comicDes.lower().find('volume')
                #arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
                #increased to 10 to allow for text numbering (+5 max)
                #sometimes it's volume 5 and ocassionally it's fifth volume.
                if i == 0:
                    vfind = comicDes[v_find:v_find +
                                     15]  #if it's volume 5 format
                    basenums = {
                        'zero': '0',
                        'one': '1',
                        'two': '2',
                        'three': '3',
                        'four': '4',
                        'five': '5',
                        'six': '6',
                        'seven': '7',
                        'eight': '8',
                        'nine': '9',
                        'ten': '10',
                        'i': '1',
                        'ii': '2',
                        'iii': '3',
                        'iv': '4',
                        'v': '5'
                    }
                    logger.fdebug('volume X format - ' + str(i) + ': ' + vfind)
                else:
                    vfind = comicDes[:v_find]  # if it's fifth volume format
                    basenums = {
                        'zero': '0',
                        'first': '1',
                        'second': '2',
                        'third': '3',
                        'fourth': '4',
                        'fifth': '5',
                        'sixth': '6',
                        'seventh': '7',
                        'eighth': '8',
                        'nineth': '9',
                        'tenth': '10',
                        'i': '1',
                        'ii': '2',
                        'iii': '3',
                        'iv': '4',
                        'v': '5'
                    }
                    logger.fdebug('X volume format - ' + str(i) + ': ' + vfind)
                volconv = ''
                for nums in basenums:
                    if nums in vfind.lower():
                        sconv = basenums[nums]
                        vfind = re.sub(nums, sconv, vfind.lower())
                        break
                #logger.info('volconv: ' + str(volconv))

                #now we attempt to find the character position after the word 'volume'
                if i == 0:
                    volthis = vfind.lower().find('volume')
                    volthis = volthis + 6  # add on the actual word to the position so that we can grab the subsequent digit
                    vfind = vfind[volthis:volthis +
                                  4]  #grab the next 4 characters ;)
                elif i == 1:
                    volthis = vfind.lower().find('volume')
                    vfind = vfind[volthis -
                                  4:volthis]  #grab the next 4 characters ;)

                if '(' in vfind:
                    #bracket detected in versioning'
                    vfindit = re.findall('[^()]+', vfind)
                    vfind = vfindit[0]
                vf = re.findall('[^<>]+', vfind)
                ledigit = re.sub("[^0-9]", "", vf[0])
                if ledigit != '':
                    comic['ComicVersion'] = ledigit
                    logger.fdebug(
                        "Volume information found! Adding to series record : volume "
                        + comic['ComicVersion'])
                    break
                i += 1
            else:
                i += 1

        if comic['ComicVersion'] == 'noversion':
            logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion']))
            desdeck -= 1
        else:
            break

    if vari == "yes":
        comic['ComicIssues'] = str(cntit)
    else:
        comic['ComicIssues'] = dom.getElementsByTagName(
            'count_of_issues')[0].firstChild.wholeText

    comic['ComicImage'] = dom.getElementsByTagName(
        'super_url')[0].firstChild.wholeText
    comic['ComicImageALT'] = dom.getElementsByTagName(
        'small_url')[0].firstChild.wholeText

    comic['FirstIssueID'] = dom.getElementsByTagName(
        'id')[0].firstChild.wholeText

    #    print ("fistIss:" + str(comic['FirstIssueID']))
    #    comicchoice.append({
    #        'ComicName':              comic['ComicName'],
    #        'ComicYear':              comic['ComicYear'],
    #        'Comicid':                comicid,
    #        'ComicURL':               comic['ComicURL'],
    #        'ComicIssues':            comic['ComicIssues'],
    #        'ComicImage':             comic['ComicImage'],
    #        'ComicVolume':            ParseVol,
    #        'ComicPublisher':         comic['ComicPublisher']
    #        })

    #    comic['comicchoice'] = comicchoice
    return comic
コード例 #45
0
ファイル: helpers.py プロジェクト: phairplay/mylar
def issuedigits(issnum):
    import db, logger
    #print "issnum : " + str(issnum)
    if str(issnum).isdigit():
        int_issnum = int( issnum ) * 1000
    else:
        #count = 0
        #for char in issnum:
        #    if char.isalpha():
        #        count += 1
        #if count > 5:
        #    logger.error('This is not an issue number - not enough numerics to parse')
        #    int_issnum = 999999999999999
        #    return int_issnum
        if 'au' in issnum.lower() and issnum[:1].isdigit():
            int_issnum = (int(issnum[:-2]) * 1000) + ord('a') + ord('u')
        elif 'ai' in issnum.lower() and issnum[:1].isdigit():
            int_issnum = (int(issnum[:-2]) * 1000) + ord('a') + ord('i')
        elif 'inh' in issnum.lower():
            remdec = issnum.find('.')  #find the decimal position.
            if remdec == -1:
                #if no decimal, it's all one string
                #remove the last 3 characters from the issue # (INH)
                int_issnum = (int(issnum[:-3]) * 1000) + ord('i') + ord('n') + ord('h')
            else:
                int_issnum = (int(issnum[:-4]) * 1000) + ord('i') + ord('n') + ord('h')
        elif 'now' in issnum.lower():
            if '!' in issnum: issnum = re.sub('\!', '', issnum)
            remdec = issnum.find('.')  #find the decimal position.
            if remdec == -1:
                #if no decimal, it's all one string 
                #remove the last 3 characters from the issue # (NOW)
                int_issnum = (int(issnum[:-3]) * 1000) + ord('n') + ord('o') + ord('w')
            else:
                int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w')
        elif u'\xbd' in issnum:
            issnum = .5
            int_issnum = int(issnum) * 1000
        elif u'\xbc' in issnum:
            issnum = .25
            int_issnum = int(issnum) * 1000
        elif u'\xbe' in issnum:
            issnum = .75
            int_issnum = int(issnum) * 1000
        elif u'\u221e' in issnum:
            #issnum = utf-8 will encode the infinity symbol without any help
            int_issnum = 9999999999 * 1000  # set 9999999999 for integer value of issue
        elif '.' in issnum or ',' in issnum:
            #logger.fdebug('decimal detected.')
            if ',' in issnum: issnum = re.sub(',','.', issnum)
            issst = str(issnum).find('.')
            if issst == 0:
                issb4dec = 0
            else:
                issb4dec = str(issnum)[:issst]
            decis = str(issnum)[issst+1:]
            if len(decis) == 1:
                decisval = int(decis) * 10
                issaftdec = str(decisval)
            if len(decis) >= 2:
                decisval = int(decis)
                issaftdec = str(decisval)
            try:
                int_issnum = (int(issb4dec) * 1000) + (int(issaftdec) * 10)
            except ValueError:
                #logger.fdebug('This has no issue # for me to get - Either a Graphic Novel or one-shot.')
                int_issnum = 999999999999999
        else:
            try:
                x = float(issnum)
                #validity check
                if x < 0:
                    #logger.info("I've encountered a negative issue #: " + str(issnum) + ". Trying to accomodate.")
                    int_issnum = (int(x)*1000) - 1
                else: raise ValueError
            except ValueError, e:
                #this will account for any alpha in a issue#, so long as it doesn't have decimals.
                x = 0
                tstord = None
                issno = None
                invchk = "false"
                while (x < len(issnum)):
                    if issnum[x].isalpha():
                    #take first occurance of alpha in string and carry it through
                        tstord = issnum[x:].rstrip()
                        issno = issnum[:x].rstrip()
                        try:
                            isschk = float(issno)
                        except ValueError, e:
                            logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.')
                            issno = None
                            tstord = None
                            invchk = "true"
                        break
                    x+=1
                if tstord is not None and issno is not None:
                    logger.fdebug('tstord: ' + str(tstord))
                    a = 0
                    ordtot = 0
                    while (a < len(tstord)):
                        try:
                            ordtot += ord(tstord[a].lower())  #lower-case the letters for simplicty
                        except ValueError:
                            break
                        a+=1
                    logger.fdebug('issno: ' + str(issno))
                    int_issnum = (int(issno) * 1000) + ordtot
                    logger.fdebug('intissnum : ' + str(int_issnum))
                elif invchk == "true":
                    logger.fdebug('this does not have an issue # that I can parse properly.')
                    int_issnum = 999999999999999
                else:
                    logger.error(str(issnum) + 'this has an alpha-numeric in the issue # which I cannot account for.')
                    int_issnum = 999999999999999
コード例 #46
0
ファイル: parseit.py プロジェクト: brunnels/mylar
def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariation, resultPublished):

    gcdinfo = {}
    gcdchoice = []
    gcount = 0
    i = 0
    if vari_loop > 1:
        resultPublished = "Unknown"

    if vari_loop == 99: vari_loop = 1

    while (i <= vari_loop):
        if vari_loop > 0:
            try:
                boong = comseries['comseries'][i]
            except IndexError:
                break
            resultURL = boong['comseriesID']
            ComicID = boong['comicid']
            TotalIssues+= int(boong['comseriesIssues'])
        else: 
            resultURL = resultURL
            # if we're here - it means it's a mismatched name.
            # let's pull down the publication date as it'll be blank otherwise
            inputMIS = 'http://www.comics.org' + str(resultURL)
            resp = urllib2.urlopen ( inputMIS )
#            soup = BeautifulSoup ( resp )
            try:
                soup = BeautifulSoup(urllib2.urlopen(inputMIS))
            except UnicodeDecodeError:
                logger.info("I've detected your system is using: " + sys.stdout.encoding)
                logger.info("unable to parse properly due to utf-8 problem, ignoring wrong symbols")
                try:
                    soup = BeautifulSoup(urllib2.urlopen(inputMIS)).decode('utf-8', 'ignore')
                except UnicodeDecodeError:
                    logger.info("not working...aborting. Tell Evilhero.")
                    return
            parsed = soup.find("div", {"id" : "series_data"})
            subtxt3 = parsed.find("dd", {"id" : "publication_dates"})
            resultPublished = subtxt3.findNext(text=True).rstrip()
            #print ("pubdate:" + str(resultPublished))
            coverst = soup.find("div", {"id" : "series_cover"})
            if coverst < 0: 
                gcdcover = "None"
            else:
                subcoverst = coverst('img',src=True)[0]
                gcdcover = subcoverst['src']

        #print ("resultURL:" + str(resultURL))
        #print ("comicID:" + str(ComicID))
        input2 = 'http://www.comics.org' + str(resultURL) + 'details/'
        resp = urllib2.urlopen(input2)
        soup = BeautifulSoup(resp)

        #for newer comics, on-sale date has complete date...
        #for older comics, pub.date is to be used

        type = soup.find(text=' On-sale date ')
        if type:
            #print ("on-sale date detected....adjusting")
            datetype = "on-sale"
        else:
            #print ("pub date defaulting")
            datetype = "pub"

        cnt1 = len(soup.findAll("tr", {"class" : "row_even_False"}))
        cnt2 = len(soup.findAll("tr", {"class" : "row_even_True"}))

        cnt = int(cnt1 + cnt2)

        #print (str(cnt) + " Issues in Total (this may be wrong due to alternate prints, etc")

        n_odd = -1
        n_even = -1
        n = 0
        PI = "1.00"
        altcount = 0
        while ( n < cnt ):       
            if n%2==0:
                n_odd+=1
                parsed = soup.findAll("tr", {"class" : "row_even_False"})[n_odd]
                ntype = "odd"
            else:
                n_even+=1
                ntype = "even"
                parsed = soup.findAll("tr", {"class" : "row_even_True"})[n_even]
            subtxt3 = parsed.find("a")
            ParseIssue = subtxt3.findNext(text=True)

            fid = parsed('a',href=True)[0]
            resultGID = fid['href']
            resultID = resultGID[7:-1]
            #print ( "ID: " + str(resultID) )

            if ',' in ParseIssue: ParseIssue = re.sub("\,", "", ParseIssue)
            #print ("ParseIssue before : " + str(ParseIssue))
            if 'Vol' in ParseIssue or '[' in ParseIssue: 
                ParseIssue = re.sub("[^0-9]", "", ParseIssue)
            isslen = ParseIssue.find(' ')
            #if 'isslen' exists, it means that it's an alternative cover.
            #however, if ONLY alternate covers exist of an issue it won't work.
            #let's use the FIRST record, and ignore all other covers for the given issue.
            isschk = ParseIssue[:isslen]
            #print ("Parse is now: " + str(isschk))

            #check if decimal or '1/2' exists or not, and store decimal results
            halfchk = "no"
            if '.' in isschk:
                isschk_find = isschk.find('.')
                isschk_b4dec = isschk[:isschk_find]
                isschk_decval = isschk[isschk_find+1:]
            elif '/' in isschk:
                ParseIssue = "0.50"
                isslen = 0
                halfchk = "yes"
            else:
                isschk_decval = ".00"

            if isslen > 0:
                isschk = ParseIssue[:isslen]
                isschk2 = str(isschk) + isschk_decval
                #logger.fdebug("isschk: " + str(isschk) + " ...isschk2: " + str(isschk2))
                if 'a' in isschk or 'b' in isschk or 'c' in isschk:
                    isschk2 = ParseIssue[:isslen-1] + isschk_decval
                    #altcount == 2
                ParseIssue = str(isschk2)
                #logger.fdebug("Alt.cover found = " + str(isschk2))
                if str(PI) == str(isschk2):
                    #logger.fdebug("matched on PI: " + str(PI) + " .. and isschk2: " + str(isschk2))
                    if altcount == 0:
                        #logger.fdebug("first occurance - marking and continuing..." + str(isschk2))
                        #this handles the first occurance..
                        ParseIssue = str(isschk2)
                        PI = str(isschk2)
                        altcount = 1
                    else:
                        #logger.fdebug("Using only first record for issue - ignoring further alternate matches")
                        ParseIssue = "this is wrong"
                        altcount+=1
                else:
                    #logger.fdebug("issues didn't match.")
                    altcount = 1
                    ParseIssue = str(isschk) + isschk_decval
            else:
                if halfchk == "yes": pass
                else: 
                    ParseIssue = ParseIssue + isschk_decval
                #print ("no alt.cover detected for - " + str(ParseIssue))
                altcount = 1
            if (altcount == 1):
                #logger.fdebug("adding issue to db : " + str(ParseIssue))
                # in order to get the compare right, let's decimialize the string to '.00'.
                gcdinfo['ComicIssue'] = ParseIssue
                #print "Issue: " + str(ParseIssue)
                #^^ will retrieve issue
                #if datetype == "on-sale":
                subtxt1 = parsed('td')[2]
                ParseDate = subtxt1.findNext(text=True)
                pdlen = len(ParseDate)
                #print "sale-date..ParseDate:" + str(ParseDate)
                #print ("Parsed Date length: " + str(pdlen))
                if len(ParseDate) < 7:
                    subtxt3 = parsed('td')[0]
                    ParseDate = subtxt3.findNext(text=True)               
                    #print "pub-date..ParseDate:" + str(ParseDate)
                    if ParseDate == ' ':
                        #default to empty so doesn't error out.
                        ParseDate = "0000-00-00"
                #ParseDate = ParseDate.replace('?','')
                ParseDate = ParseDate.replace(' ','')
                #print "Parse date: " + str(ParseDate)
                gcdinfo['ComicDate'] = ParseDate
                #^^ will retrieve date #
                if not any(d.get('GCDIssue', None) == str(gcdinfo['ComicIssue']) for d in gcdchoice):
                    #logger.fdebug("adding: " + str(gcdinfo['ComicIssue']))
                    if ComicID[:1] == "G":
                        gcdchoice.append({
                            'GCDid':                ComicID,
                            'IssueID':              resultID,
                            'GCDIssue':             gcdinfo['ComicIssue'],
                            'GCDDate':              gcdinfo['ComicDate']
                            })
                        gcount+=1
                    else:
                        gcdchoice.append({
                            'GCDid':                ComicID,
                            'GCDIssue':             gcdinfo['ComicIssue'],
                            'GCDDate':              gcdinfo['ComicDate']
                            })

                    gcdinfo['gcdchoice'] = gcdchoice

                else:
                    #--if 2 identical issue numbers legitimately exist, but have different
                    #--publication dates, try to distinguish
                    logger.fdebug("2 identical issue #'s have been found...determining if it's intentional.")
                    #get current issue & publication date.
                    logger.fdebug("Issue #:" + str(gcdinfo['ComicIssue']))
                    logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate']))
                    #get conflicting issue from tuple
                    for d in gcdchoice:
                        if str(d['GCDIssue']) == str(gcdinfo['ComicIssue']):
                            logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) )
                            if str(d['GCDDate']) == str(gcdinfo['ComicDate']):
                                logger.fdebug("Issue #'s and dates match...skipping.")
                            else:
                                logger.fdebug("Issue#'s match but different publication dates, not skipping.")
                    #pass
                    #logger.fdebug("Duplicate issue detected in DB - ignoring subsequent issue # " + str(gcdinfo['ComicIssue']))

                PI = ParseIssue
        #else:
            # -- this needs a rework --
            # if issue only has alternative covers on comics.org, it won't match
            # and will cause the script to return a cannot retrieve..
            #compare previous issue to current issue (to help with alt.cover count)
         #   PI = ParseIssue
         #   altcount+=1
         #   print ("alternate issue - ignoring")
        #altcount = 0
            n+=1
        i+=1
    gcdinfo['gcdvariation'] = issvariation
    if ComicID[:1] == "G":
        gcdinfo['totalissues'] = gcount
    else:
        gcdinfo['totalissues'] = TotalIssues
    gcdinfo['ComicImage'] = gcdcover
    gcdinfo['resultPublished'] = resultPublished
    #print ("gcdvariation: " + str(gcdinfo['gcdvariation']))
    return gcdinfo
コード例 #47
0
ファイル: parseit.py プロジェクト: yonkyunior/mylar
def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
    comchkchoice = []
    comchoice = {}

    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName.encode('utf-8').strip()
    comicyr = ComicYear
    comicis = Total
    comicid = ComicID
    comicpub = ComicPublisher.encode('utf-8').strip()
    #print ("...comchk parser initialization...")
    #print ( "comicname: " + str(comicnm) )
    #print ( "comicyear: " + str(comicyr) )
    #print ( "comichave: " + str(comicis) )
    #print ( "comicpub: " + str(comicpub) )
    #print ( "comicid: " + str(comicid) )
    # do 3 runs at the comics.org search to get the best results
    comicrun = []
    # &pub_name=DC
    # have to remove the spaces from Publisher or else will not work (ie. DC Comics vs DC will not match)
    # take the 1st word ;)
    #comicpub = comicpub.split()[0]
    # if it's not one of the BIG publisher's it might fail - so let's increase the odds.
    pubbiggies = ['DC',
                   'Marvel',
                   'Image',
                   'IDW']
    uhuh = "no"
    for pb in pubbiggies:
        if pb in comicpub:
            #keep publisher in url if a biggie.
            uhuh = "yes"
            #print (" publisher match : " + str(comicpub))
            conv_pub = comicpub.split()[0]
            #print (" converted publisher to : " + str(conv_pub))
    #1st run setup - leave it all as it is.
    comicrun.append(comicnm)
    cruncnt = 0
    #2nd run setup - remove the last character and do a broad search (keep year or else will blow up)
    if len(str(comicnm).split()) > 2:
        comicrun.append(' '.join(comicnm.split(' ')[:-1]))
        cruncnt+=1
    # to increase the likely hood of matches and to get a broader scope...
    # lets remove extra characters
    if re.sub('[\.\,\:]', '', comicnm) != comicnm:
        comicrun.append(re.sub('[\.\,\:]', '', comicnm))
        cruncnt+=1
    # one more addition - if the title contains a 'the', remove it ;)
    if comicnm.lower().startswith('the'):
        comicrun.append(comicnm[4:].strip())
        cruncnt+=1
    totalcount = 0
    cr = 0
    #print ("cruncnt is " + str(cruncnt))
    while (cr <= cruncnt):
        #print ("cr is " + str(cr))
        comicnm = comicrun[cr]
        #leaving spaces in will screw up the search...let's take care of it
        comicnm = re.sub(' ', '+', comicnm)
        #print ("comicnm: " + str(comicnm))
        if uhuh == "yes":
            publink = "&pub_name=" + str(conv_pub)
        if uhuh == "no":
            publink = "&pub_name="
        input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&keywords=&order1=series&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31' + '&title=&feature=&job_number=&pages=&script=&pencils=&inks=&colors=&letters=&story_editing=&genre=&characters=&synopsis=&reprint_notes=&story_reprinted=None&notes=' + str(publink) + '&pub_notes=&brand=&brand_notes=&indicia_publisher=&is_surrogate=None&ind_pub_notes=&series=' + str(comicnm) + '&series_year_began=&series_notes=&tracking_notes=&issue_count=&is_comics=None&format=&color=&dimensions=&paper_stock=&binding=&publishing_format=&issues=&volume=&issue_title=&variant_name=&issue_date=&indicia_frequency=&price=&issue_pages=&issue_editing=&isbn=&barcode=&issue_notes=&issue_reprinted=None&is_indexed=None'
        response = urllib2.urlopen (input)
        soup = BeautifulSoup (response)
        cnt1 = len(soup.findAll("tr", {"class": "listing_even"}))
        cnt2 = len(soup.findAll("tr", {"class": "listing_odd"}))

        cnt = int(cnt1 + cnt2)
#        print ("cnt1: " + str(cnt1))
#        print ("cnt2: " + str(cnt2))
#        print (str(cnt) + " results")

        resultName = []
        resultID = []
        resultYear = []
        resultIssues = []
        resultPublisher = []
        resultURL = None
        n_odd = -1
        n_even = -1
        n = 0
        while (n < cnt):
            if n%2==0:
                n_even+=1
                resultp = soup.findAll("tr", {"class": "listing_even"})[n_even]
            else:
                n_odd+=1
                resultp = soup.findAll("tr", {"class": "listing_odd"})[n_odd]
            rtp = resultp('a')[1]
            rtpit = rtp.findNext(text=True)
            rtpthis = rtpit.encode('utf-8').strip()
            resultName.append(helpers.cleanName(rtpthis))
#            print ( "Comic Name: " + str(resultName[n]) )

            pub = resultp('a')[0]
            pubit = pub.findNext(text=True)
#            pubthis = u' '.join(pubit).encode('utf-8').strip()
            pubthis = pubit.encode('utf-8').strip()
            resultPublisher.append(pubthis)
#            print ( "Publisher: " + str(resultPublisher[n]) )

            fip = resultp('a', href=True)[1]
            resultID.append(fip['href'])
#            print ( "ID: " + str(resultID[n]) )

            subtxt3 = resultp('td')[3]
            resultYear.append(subtxt3.findNext(text=True))
            resultYear[n] = resultYear[n].replace(' ', '')
            subtxt4 = resultp('td')[4]
            resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
            resiss = resultIssues[n].find('issue')
            resiss = int(resiss)
            resultIssues[n] = resultIssues[n].replace('', '')[:resiss]
            resultIssues[n] = resultIssues[n].replace(' ', '')
#            print ( "Year: " + str(resultYear[n]) )
#            print ( "Issues: " + str(resultIssues[n]) )
#            print ("comchkchoice: " + str(comchkchoice))
            if not any(d.get('GCDID', None) == str(resultID[n]) for d in comchkchoice):
                #print ( str(resultID[n]) + " not in DB...adding.")
                comchkchoice.append({
                       "ComicID":         str(comicid),
                       "ComicName":       resultName[n],
                       "GCDID":           str(resultID[n]).split('/')[2],
                       "ComicYear":      str(resultYear[n]),
                       "ComicPublisher": resultPublisher[n],
                       "ComicURL":       "http://www.comics.org" + str(resultID[n]),
                       "ComicIssues":    str(resultIssues[n])
                      })
            #else:
                #print ( str(resultID[n]) + " already in DB...skipping" )
            n+=1
        cr+=1
    totalcount= totalcount + cnt
    comchoice['comchkchoice'] = comchkchoice
    return comchoice, totalcount
コード例 #48
0
ファイル: parseit.py プロジェクト: brunnels/mylar
def GCDScraper(ComicName, ComicYear, Total, ComicID, quickmatch=None):
    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
    comicid = ComicID
    #print ( "comicname: " + str(comicnm) )
    #print ( "comicyear: " + str(comicyr) )
    #print ( "comichave: " + str(comicis) )
    #print ( "comicid: " + str(comicid) )
    comicnm = re.sub(' ', '+', comicnm)
    input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31&series=' + str(comicnm) + '&is_indexed=None'
    response = urllib2.urlopen ( input )
    soup = BeautifulSoup ( response)
    cnt1 = len(soup.findAll("tr", {"class" : "listing_even"}))
    cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"}))

    cnt = int(cnt1 + cnt2)

    #print (str(cnt) + " results")

    resultName = []
    resultID = []
    resultYear = []
    resultIssues = []
    resultURL = None
    n_odd = -1
    n_even = -1
    n = 0
    while ( n < cnt ):
        if n%2==0:
            n_even+=1
            resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even]
        else:
            n_odd+=1
            resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
        rtp = resultp('a')[1]
        resultName.append(helpers.cleanName(rtp.findNext(text=True)))
        #print ( "Comic Name: " + str(resultName[n]) )
        fip = resultp('a',href=True)[1]
        resultID.append(fip['href'])
        #print ( "ID: " + str(resultID[n]) )

        subtxt3 = resultp('td')[3]
        resultYear.append(subtxt3.findNext(text=True))
        resultYear[n] = resultYear[n].replace(' ','')
        subtxt4 = resultp('td')[4]
        resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
        resiss = resultIssues[n].find('issue')
        resiss = int(resiss)
        resultIssues[n] = resultIssues[n].replace('','')[:resiss]
        resultIssues[n] = resultIssues[n].replace(' ','')
        #print ( "Year: " + str(resultYear[n]) )
        #print ( "Issues: " + str(resultIssues[n]) )
        CleanComicName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', comicnm)
        CleanComicName = re.sub(' ', '', CleanComicName).lower()
        CleanResultName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', resultName[n])        
        CleanResultName = re.sub(' ', '', CleanResultName).lower()
        #print ("CleanComicName: " + str(CleanComicName))
        #print ("CleanResultName: " + str(CleanResultName))
        if CleanResultName == CleanComicName or CleanResultName[3:] == CleanComicName:
        #if resultName[n].lower() == helpers.cleanName(str(ComicName)).lower(): 
            #print ("n:" + str(n) + "...matched by name to Mylar!")
            #this has been seen in a few instances already, so trying to adjust.
            #when the series year is 2011, in gcd it might be 2012 due to publication
            #dates overlapping between Dec/11 and Jan/12. Let's accept a match with a 
            #1 year grace space, and then pull in the first issue to see the actual pub
            # date and if coincides with the other date..match it.
            if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1): 
                #print ("n:" + str(n) + "...matched by year to Mylar!")
                #print ( "Year: " + str(resultYear[n]) )
                #Occasionally there are discrepancies in comic count between
                #GCD and CV. 99% it's CV not updating to the newest issue as fast
                #as GCD does. Therefore, let's increase the CV count by 1 to get it
                #to match, any more variation could cause incorrect matching.
                #ie. witchblade on GCD says 159 issues, CV states 161.
                if int(resultIssues[n]) == int(Total) or int(resultIssues[n]) == int(Total)+1 or (int(resultIssues[n])+1) == int(Total):
                    #print ("initial issue match..continuing.")
                    if int(resultIssues[n]) == int(Total)+1:
                        issvariation = "cv"
                    elif int(resultIssues[n])+1 == int(Total):
                        issvariation = "gcd"
                    else:
                        issvariation = "no"
                        #print ("n:" + str(n) + "...matched by issues to Mylar!")
                        #print ("complete match!...proceeding")
                    TotalIssues = resultIssues[n]
                    resultURL = str(resultID[n])
                    rptxt = resultp('td')[6]
                    resultPublished = rptxt.findNext(text=True)
                    #print ("Series Published: " + str(resultPublished))
                    break
                
        n+=1
    # it's possible that comicvine would return a comic name incorrectly, or gcd
    # has the wrong title and won't match 100%...
    # (ie. The Flash-2011 on comicvine is Flash-2011 on gcd)
    # this section is to account for variations in spelling, punctuation, etc/
    basnumbs = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12}
    if resultURL is None:
        #search for number as text, and change to numeric
        for numbs in basnumbs:
            #print ("numbs:" + str(numbs))
            if numbs in ComicName.lower():
                numconv = basnumbs[numbs]
                #print ("numconv: " + str(numconv))
                ComicNm = re.sub(str(numbs), str(numconv), ComicName.lower())
                #print ("comicname-reVISED:" + str(ComicNm))
                return GCDScraper(ComicNm, ComicYear, Total, ComicID)
                break
        if ComicName.lower().startswith('the '):
            ComicName = ComicName[4:]
            return GCDScraper(ComicName, ComicYear, Total, ComicID)        
        if ':' in ComicName: 
            ComicName = re.sub(':', '', ComicName)
            return GCDScraper(ComicName, ComicYear, Total, ComicID)
        if '-' in ComicName:
            ComicName = re.sub('-', ' ', ComicName)
            return GCDScraper(ComicName, ComicYear, Total, ComicID)
        if 'and' in ComicName.lower():
            ComicName = ComicName.replace('and', '&')
            return GCDScraper(ComicName, ComicYear, Total, ComicID)        
        if not quickmatch: return 'No Match'
    #vari_loop = 0
    if quickmatch == "yes":
        if resultURL is None: return 'No Match'
        else: return 'Match'
    return GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=ComicID, TotalIssues=TotalIssues, issvariation=issvariation, resultPublished=resultPublished)
コード例 #49
0
ファイル: parseit.py プロジェクト: brunnels/mylar
def GCDAdd(gcdcomicid):
    serieschoice = []
    series = {}
    logger.fdebug("I'm trying to find these GCD comicid's:" + str(gcdcomicid))
    for gcdid in gcdcomicid:
        logger.fdebug("looking at gcdid:" + str(gcdid))
        input2 = 'http://www.comics.org/series/' + str(gcdid)
        logger.fdebug("---url: " + str(input2))
        resp = urllib2.urlopen ( input2 )
        soup = BeautifulSoup ( resp )
        logger.fdebug("SeriesName section...")
        parsen = soup.find("span", {"id" : "series_name"})
        #logger.fdebug("series name (UNPARSED): " + str(parsen))
        subpar = parsen('a')[0]
        resultName = subpar.findNext(text=True)
        logger.fdebug("ComicName: " + str(resultName))
        #covers-start
        logger.fdebug("Covers section...")
        coverst = soup.find("div", {"id" : "series_cover"})
        if coverst < 0:
            gcdcover = "None"
            logger.fdebug("unable to find any covers - setting to None")
        else:
            subcoverst = coverst('img',src=True)[0]
            #logger.fdebug("cover (UNPARSED) : " + str(subcoverst))
            gcdcover = subcoverst['src']
        logger.fdebug("Cover: " + str(gcdcover))
        #covers end
        #publisher start
        logger.fdebug("Publisher section...")
        try:
            pubst = soup.find("div", {"class" : "item_data"})
            catchit = pubst('a')[0]

        except (IndexError, TypeError):
            pubst = soup.findAll("div", {"class" : "left"})[1]
            catchit = pubst.find("a")

        publisher = catchit.findNext(text=True)
        logger.fdebug("Publisher: " + str(publisher))
        #publisher end
        parsed = soup.find("div", {"id" : "series_data"})
        #logger.fdebug("series_data: " + str(parsed))
        #print ("parse:" + str(parsed))
        subtxt3 = parsed.find("dd", {"id" : "publication_dates"})
        #logger.fdebug("publication_dates: " + str(subtxt3))
        pubdate = subtxt3.findNext(text=True).rstrip()
        logger.fdebug("pubdate:" + str(pubdate))
        subtxt4 = parsed.find("dd", {"id" : "issues_published"})
        noiss = subtxt4.findNext(text=True)
        lenwho = len(noiss)
        lent = noiss.find(' ',2)
        lenf = noiss.find('(')
        stringit = noiss[lenf:lenwho]
        stringout = noiss[:lent]
        noissues = stringout.rstrip('  \t\r\n\0')
        numbering = stringit.rstrip('  \t\r\n\0')
        logger.fdebug("noissues:" + str(noissues))
        logger.fdebug("numbering:" + str(numbering))
        serieschoice.append({
               "ComicID":         gcdid,
               "ComicName":       resultName,
               "ComicYear" :        pubdate,
               "ComicIssues" :    noissues,
               "ComicPublisher" : publisher,
               "ComicCover" :     gcdcover
              })   
    series['serieschoice'] = serieschoice 
    return series
コード例 #50
0
ファイル: parseit.py プロジェクト: brunnels/mylar
def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
    comchkchoice = []
    comchoice = {}

    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
    comicid = ComicID
    comicpub = ComicPublisher
    #print ( "comicname: " + str(comicnm) )
    #print ( "comicyear: " + str(comicyr) )
    #print ( "comichave: " + str(comicis) )
    #print ( "comicpub: " + str(comicpub) )
    #print ( "comicid: " + str(comicid) )
    # do 3 runs at the comics.org search to get the best results
    comicrun = []
    # &pub_name=DC
    # have to remove the spaces from Publisher or else will not work (ie. DC Comics vs DC will not match)
    # take the 1st word ;)
    #comicpub = comicpub.split()[0]
    # if it's not one of the BIG publisher's it might fail - so let's increase the odds.
    pubbiggies = [ 'DC', 
                   'Marvel',
                   'Image',
                   'IDW' ]
    uhuh = "no"
    for pb in pubbiggies:
        if pb in comicpub:
            #keep publisher in url if a biggie.    
            uhuh = "yes"
            #print (" publisher match : " + str(comicpub))
            conv_pub = comicpub.split()[0]
            #print (" converted publisher to : " + str(conv_pub))
    #1st run setup - leave it all as it is.
    comicrun.append(comicnm)
    cruncnt = 0
    #2nd run setup - remove the last character and do a broad search (keep year or else will blow up)
    if len(str(comicnm).split()) > 2:
        comicrun.append(' '.join(comicnm.split(' ')[:-1]))
        cruncnt+=1
    # to increase the likely hood of matches and to get a broader scope...
    # lets remove extra characters
    if re.sub('[\.\,\:]', '', comicnm) != comicnm:
        comicrun.append(re.sub('[\.\,\:]', '', comicnm))
        cruncnt+=1
    totalcount = 0
    cr = 0
    #print ("cruncnt is " + str(cruncnt))
    while (cr <= cruncnt):
        #print ("cr is " + str(cr))
        comicnm = comicrun[cr]
        #leaving spaces in will screw up the search...let's take care of it
        comicnm = re.sub(' ', '+', comicnm)
        #print ("comicnm: " + str(comicnm))
        if uhuh == "yes":
            publink = "&pub_name=" + str(conv_pub)
        if uhuh == "no":
            publink = "&pub_name="
        input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&keywords=&order1=series&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31' + '&title=&feature=&job_number=&pages=&script=&pencils=&inks=&colors=&letters=&story_editing=&genre=&characters=&synopsis=&reprint_notes=&story_reprinted=None&notes=' + str(publink) + '&pub_notes=&brand=&brand_notes=&indicia_publisher=&is_surrogate=None&ind_pub_notes=&series=' + str(comicnm) + '&series_year_began=&series_notes=&tracking_notes=&issue_count=&is_comics=None&format=&color=&dimensions=&paper_stock=&binding=&publishing_format=&issues=&volume=&issue_title=&variant_name=&issue_date=&indicia_frequency=&price=&issue_pages=&issue_editing=&isbn=&barcode=&issue_notes=&issue_reprinted=None&is_indexed=None'
        response = urllib2.urlopen ( input )
        soup = BeautifulSoup ( response)
        cnt1 = len(soup.findAll("tr", {"class" : "listing_even"}))
        cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"}))

        cnt = int(cnt1 + cnt2)
#        print ("cnt1: " + str(cnt1))
#        print ("cnt2: " + str(cnt2))
#        print (str(cnt) + " results")

        resultName = []
        resultID = []
        resultYear = []
        resultIssues = []
        resultPublisher = []
        resultURL = None
        n_odd = -1
        n_even = -1
        n = 0
        while ( n < cnt ):
            if n%2==0:
                n_even+=1
                resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even]
            else:
                n_odd+=1
                resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
            rtp = resultp('a')[1]
            resultName.append(helpers.cleanName(rtp.findNext(text=True)))
#            print ( "Comic Name: " + str(resultName[n]) )

            pub = resultp('a')[0]
            resultPublisher.append(pub.findNext(text=True))
#            print ( "Publisher: " + str(resultPublisher[n]) )

            fip = resultp('a',href=True)[1]
            resultID.append(fip['href'])
#            print ( "ID: " + str(resultID[n]) )

            subtxt3 = resultp('td')[3]
            resultYear.append(subtxt3.findNext(text=True))
            resultYear[n] = resultYear[n].replace(' ','')
            subtxt4 = resultp('td')[4]
            resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
            resiss = resultIssues[n].find('issue')
            resiss = int(resiss)
            resultIssues[n] = resultIssues[n].replace('','')[:resiss]
            resultIssues[n] = resultIssues[n].replace(' ','')
#            print ( "Year: " + str(resultYear[n]) )
#            print ( "Issues: " + str(resultIssues[n]) )
#            print ("comchkchoice: " + str(comchkchoice))
            if not any(d.get('GCDID', None) == str(resultID[n]) for d in comchkchoice):
                #print ( str(resultID[n]) + " not in DB...adding.")
                comchkchoice.append({
                       "ComicID":         str(comicid),
                       "ComicName":       str(resultName[n]),
                       "GCDID":           str(resultID[n]).split('/')[2],
                       "ComicYear" :      str(resultYear[n]),
                       "ComicPublisher" : str(resultPublisher[n]),
                       "ComicURL" :       "http://www.comics.org" + str(resultID[n]),
                       "ComicIssues" :    str(resultIssues[n])
                      })
            #else:
                #print ( str(resultID[n]) + " already in DB...skipping" ) 
            n+=1
        cr+=1
    totalcount= totalcount + cnt
    comchoice['comchkchoice'] = comchkchoice
    return comchoice, totalcount 
コード例 #51
0
ファイル: nzbget.py プロジェクト: xeddmc/mylar
    def processor(self, nzbinfo):
        nzbid = nzbinfo['NZBID']
        try:
            logger.fdebug(
                'Now checking the active queue of nzbget for the download')
            queueinfo = self.server.listgroups()
        except Exception as e:
            logger.warn(
                'Error attempting to retrieve active queue listing: %s' % e)
            return {'status': False}
        else:
            logger.fdebug('valid queue result returned. Analyzing...')
            queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid]
            if len(queuedl) == 0:
                logger.warn(
                    'Unable to locate NZBID %s in active queue. Could it be finished already ?'
                    % nzbid)
                return self.historycheck(nzbinfo)

            stat = False
            double_pp = False
            double_type = None
            while stat is False:
                time.sleep(10)
                queueinfo = self.server.listgroups()
                queuedl = [qu for qu in queueinfo if qu['NZBID'] == nzbid]
                if len(queuedl) == 0:
                    logger.fdebug(
                        'Item is no longer in active queue. It should be finished by my calculations'
                    )
                    stat = True
                else:
                    if 'comicrn' in queuedl[0]['PostInfoText'].lower():
                        double_pp = True
                        double_type = 'ComicRN'
                    elif 'nzbtomylar' in queuedl[0]['PostInfoText'].lower():
                        double_pp = True
                        double_type = 'nzbToMylar'

                    if all([
                            len(queuedl[0]['ScriptStatuses']) > 0,
                            double_pp is False
                    ]):
                        for x in queuedl[0]['ScriptStatuses']:
                            if 'comicrn' in x['Name'].lower():
                                double_pp = True
                                double_type = 'ComicRN'
                                break
                            elif 'nzbtomylar' in x['Name'].lower():
                                double_pp = True
                                double_type = 'nzbToMylar'
                                break

                    if all([
                            len(queuedl[0]['Parameters']) > 0,
                            double_pp is False
                    ]):
                        for x in queuedl[0]['Parameters']:
                            if all([
                                    'comicrn' in x['Name'].lower(),
                                    x['Value'] == 'yes'
                            ]):
                                double_pp = True
                                double_type = 'ComicRN'
                                break
                            elif all([
                                    'nzbtomylar' in x['Name'].lower(),
                                    x['Value'] == 'yes'
                            ]):
                                double_pp = True
                                double_type = 'nzbToMylar'
                                break

                    if double_pp is True:
                        logger.warn(
                            '%s has been detected as being active for this category & download. Completed Download Handling will NOT be performed due to this.'
                            % double_type)
                        logger.warn(
                            'Either disable Completed Download Handling for NZBGet within Mylar, or remove %s from your category script in NZBGet.'
                            % double_type)
                        return {'status': 'double-pp', 'failed': False}

                    logger.fdebug('status: %s' % queuedl[0]['Status'])
                    logger.fdebug('name: %s' % queuedl[0]['NZBName'])
                    logger.fdebug('FileSize: %sMB' % queuedl[0]['FileSizeMB'])
                    logger.fdebug('Download Left: %sMB' %
                                  queuedl[0]['RemainingSizeMB'])
                    logger.fdebug('health: %s' % (queuedl[0]['Health'] / 10))
                    logger.fdebug('destination: %s' % queuedl[0]['DestDir'])

            logger.fdebug('File has now downloaded!')
            time.sleep(
                5
            )  #wait some seconds so shit can get written to history properly
            return self.historycheck(nzbinfo)
コード例 #52
0
ファイル: findcomicfeed.py プロジェクト: TimTim74/mylar
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix):
    #searchName = "Uncanny Avengers"
    #searchIssue = "01"
    #searchYear = "2012"
    #clean up searchName due to webparse.
    searchName = searchName.replace("%20", " ")
    if "," in searchName:
        searchName = searchName.replace(",", "")
    logger.fdebug("name:" + str(searchName))
    logger.fdebug("issue:" + str(searchIssue))
    logger.fdebug("year:" + str(searchYear))
    splitSearch = searchName.split(" ")
    joinSearch = "+".join(splitSearch)+"+"+searchIssue
    searchIsOne = "0"+searchIssue
    searchIsTwo = "00"+searchIssue

    if "-" in searchName:
        searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')

    regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')

    
    #logger.fdebug('searchName:' + searchName)
    #logger.fdebug('regexName:' + regexName)

    if mylar.USE_MINSIZE:
        size_constraints = "minsize=" + str(mylar.MINSIZE)
    else:
        size_constraints = "minsize=10"

    if mylar.USE_MAXSIZE:
        size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE)

    if mylar.USENET_RETENTION != None:
        max_age = "&age=" + str(mylar.USENET_RETENTION)

    feeds = []
    feeds.append(feedparser.parse("http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" + str(size_constraints) + str(max_age) + "&dq=%s&max=50&more=1" %joinSearch))
    if mylar.ALTEXPERIMENTAL:
        feeds.append(feedparser.parse("http://nzbindex.nl/rss/?dq=%s&g[]=41&g[]=510&sort=agedesc&hidespam=0&max=&more=1" %joinSearch))

    entries = []
    mres = {}
    tallycount = 0

    for feed in feeds:
        totNum = len(feed.entries)
        tallycount += len(feed.entries)

        keyPair = {}
        regList = []
        countUp = 0

        logger.fdebug(str(totNum) + " results")

        while countUp < totNum:
     	    urlParse = feed.entries[countUp].enclosures[0]
	    #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
	    keyPair[feed.entries[countUp].title] = urlParse["href"]

    	    countUp=countUp+1


        # thanks to SpammyHagar for spending the time in compiling these regEx's!

        regExTest=""

        regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
        regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)

        #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
        regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)+1)
        regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)-1)
        regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)+1)
        regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear)-1)

        regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive]

        except_list=['releases', 'gold line', 'distribution', '0-day', '0 day']

        for title, link in keyPair.items():
            #logger.fdebug("titlesplit: " + str(title.split("\"")))
            splitTitle = title.split("\"")
            noYear = 'False'

            for subs in splitTitle:
                logger.fdebug(subs)
                regExCount = 0
                if len(subs) > 10 and not any(d in subs.lower() for d in except_list):
                #Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
#                while regExCount < len(regexList):
#                    regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
#                    regExCount = regExCount +1
#                    if regExTest:   
#                        logger.fdebug(title)
#                        entries.append({
#                                  'title':   subs,
#                                  'link':    str(link)
#                                  })
                    if IssDateFix != "no":
                        if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1)
                        else: ComicYearFix = str(int(searchYear) + 1)
                    else:
                        ComicYearFix = searchYear

                    if searchYear not in subs and ComicYearFix not in subs:
                        noYear = 'True'
                        noYearline = subs

                    if (searchYear in subs or ComicYearFix in subs) and noYear == 'True':
                        #this would occur on the next check in the line, if year exists and
                        #the noYear check in the first check came back valid append it
                        subs = noYearline + ' (' + searchYear + ')'                  
                        noYear = 'False'

                    if noYear == 'False':
                        
                        entries.append({
                                  'title':   subs,
                                  'link':    str(link)
                                  })
                        break  # break out so we don't write more shit.
              
#    if len(entries) >= 1:
    if tallycount >= 1:
        mres['entries'] = entries
        return mres 
#       print("Title: "+regList[0])
#       print("Link: "+keyPair[regList[0]])        
    else:
        logger.fdebug("No Results Found")
        return "no results"
コード例 #53
0
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix):
    #searchName = "Uncanny Avengers"
    #searchIssue = "01"
    #searchYear = "2012"
    #clean up searchName due to webparse.
    searchName = searchName.replace("%20", " ")
    if "," in searchName:
        searchName = searchName.replace(",", "")
    logger.fdebug("name:" + str(searchName))
    logger.fdebug("issue:" + str(searchIssue))
    logger.fdebug("year:" + str(searchYear))
    splitSearch = searchName.split(" ")
    joinSearch = "+".join(splitSearch) + "+" + searchIssue
    searchIsOne = "0" + searchIssue
    searchIsTwo = "00" + searchIssue

    if "-" in searchName:
        searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')

    regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')

    #logger.fdebug('searchName:' + searchName)
    #logger.fdebug('regexName:' + regexName)

    if mylar.USE_MINSIZE:
        size_constraints = "minsize=" + str(mylar.MINSIZE)
    else:
        size_constraints = "minsize=10"

    if mylar.USE_MAXSIZE:
        size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE)

    if mylar.USENET_RETENTION != None:
        max_age = "&age=" + str(mylar.USENET_RETENTION)

    feeds = []
    feeds.append(
        feedparser.parse(
            "http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" +
            str(size_constraints) + str(max_age) +
            "&dq=%s&max=50&more=1" % joinSearch))
    if mylar.ALTEXPERIMENTAL:
        feeds.append(
            feedparser.parse(
                "http://nzbindex.nl/rss/?dq=%s&g[]=41&g[]=510&sort=agedesc&hidespam=0&max=&more=1"
                % joinSearch))

    entries = []
    mres = {}
    tallycount = 0

    for feed in feeds:
        totNum = len(feed.entries)
        tallycount += len(feed.entries)

        #keyPair = {}
        keyPair = []
        regList = []
        countUp = 0

        logger.fdebug(str(totNum) + " results")

        while countUp < totNum:
            urlParse = feed.entries[countUp].enclosures[0]
            #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
            #keyPair[feed.entries[countUp].title] = urlParse["href"]
            keyPair.append({
                "title": feed.entries[countUp].title,
                "link": urlParse["href"],
                "length": urlParse["length"],
                "pubdate": feed.entries[countUp].updated
            })

            countUp = countUp + 1

        # thanks to SpammyHagar for spending the time in compiling these regEx's!

        regExTest = ""

        regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                    searchYear)
        regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, searchYear)

        #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
        regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (regexName, searchIssue,
                                                       int(searchYear) + 1)
        regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) - 1)
        regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) + 1)
        regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" % (
            regexName, searchIssue, int(searchYear) - 1)

        regexList = [
            regEx, regExOne, regExTwo, regExThree, regExFour, regExFive
        ]

        except_list = [
            'releases', 'gold line', 'distribution', '0-day', '0 day'
        ]

        for entry in keyPair:
            title = entry['title']
            #logger.fdebug("titlesplit: " + str(title.split("\"")))
            splitTitle = title.split("\"")
            noYear = 'False'

            for subs in splitTitle:
                #logger.fdebug('sub:' + subs)
                regExCount = 0
                if len(subs) > 10 and not any(d in subs.lower()
                                              for d in except_list):
                    #Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
                    #                while regExCount < len(regexList):
                    #                    regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
                    #                    regExCount = regExCount +1
                    #                    if regExTest:
                    #                        logger.fdebug(title)
                    #                        entries.append({
                    #                                  'title':   subs,
                    #                                  'link':    str(link)
                    #                                  })
                    if IssDateFix != "no":
                        if IssDateFix == "01" or IssDateFix == "02":
                            ComicYearFix = str(int(searchYear) - 1)
                        else:
                            ComicYearFix = str(int(searchYear) + 1)
                    else:
                        ComicYearFix = searchYear

                    if searchYear not in subs and ComicYearFix not in subs:
                        noYear = 'True'
                        noYearline = subs

                    if (searchYear in subs
                            or ComicYearFix in subs) and noYear == 'True':
                        #this would occur on the next check in the line, if year exists and
                        #the noYear check in the first check came back valid append it
                        subs = noYearline + ' (' + searchYear + ')'
                        noYear = 'False'

                    if noYear == 'False':

                        entries.append({
                            'title': subs,
                            'link': entry['link'],
                            'pubdate': entry['pubdate'],
                            'length': entry['length']
                        })
                        break  # break out so we don't write more shit.

#    if len(entries) >= 1:
    if tallycount >= 1:
        mres['entries'] = entries
        return mres
#       print("Title: "+regList[0])
#       print("Link: "+keyPair[regList[0]])
    else:
        logger.fdebug("No Results Found")
        return "no results"
コード例 #54
0
def getComic(comicid,
             type,
             issueid=None,
             arc=None,
             arcid=None,
             arclist=None,
             comicidlist=None):
    if type == 'issue':
        offset = 1
        issue = {}
        ndic = []
        issuechoice = []
        comicResults = []
        firstdate = '2099-00-00'
        #let's find out how many results we get from the query...
        if comicid is None:
            #if comicid is None, it's coming from the story arc search results.
            id = arcid
            #since the arclist holds the issueids, and the pertinent reading order - we need to strip out the reading order so this works.
            aclist = ''
            if arclist.startswith('M'):
                islist = arclist[1:]
            else:
                for ac in arclist.split('|'):
                    aclist += ac[:ac.find(',')] + '|'
                if aclist.endswith('|'):
                    aclist = aclist[:-1]
                islist = aclist
        else:
            id = comicid
            islist = None
        searched = pulldetails(id, 'issue', None, 0, islist)
        if searched is None:
            return False
        totalResults = searched.getElementsByTagName(
            'number_of_total_results')[0].firstChild.wholeText
        logger.fdebug("there are " + str(totalResults) + " search results...")
        if not totalResults:
            return False
        countResults = 0
        while (countResults < int(totalResults)):
            logger.fdebug("querying range from " + str(countResults) + " to " +
                          str(countResults + 100))
            if countResults > 0:
                #new api - have to change to page # instead of offset count
                offsetcount = countResults
                searched = pulldetails(id, 'issue', None, offsetcount, islist)
            issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid)
            if tmpdate < firstdate:
                firstdate = tmpdate
            ndic = ndic + issuechoice
            #search results are limited to 100 and by pagination now...let's account for this.
            countResults = countResults + 100

        issue['issuechoice'] = ndic
        issue['firstdate'] = firstdate
        return issue

    elif type == 'comic':
        dom = pulldetails(comicid, 'comic', None, 1)
        return GetComicInfo(comicid, dom)
    elif type == 'firstissue':
        dom = pulldetails(comicid, 'firstissue', issueid, 1)
        return GetFirstIssue(issueid, dom)
    elif type == 'storyarc':
        dom = pulldetails(arc, 'storyarc', None, 1)
        return GetComicInfo(issueid, dom)
    elif type == 'comicyears':
        #used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year & volume (hopefully).
        #this grabs each issue based on issueid, and then subsets the comicid for each to be used later.
        #set the offset to 0, since we're doing a filter.
        dom = pulldetails(arcid,
                          'comicyears',
                          offset=0,
                          comicidlist=comicidlist)
        return GetSeriesYears(dom)
    elif type == 'import':
        #used by the importer when doing a scan with metatagging enabled. If metatagging comes back true, then there's an IssueID present
        #within the tagging (with CT). This compiles all of the IssueID's during a scan (in 100's), and returns the corresponding CV data
        #related to the given IssueID's - namely ComicID, Name, Volume (more at some point, but those are the important ones).
        offset = 1
        id_count = 0
        import_list = []
        logger.fdebug('comicidlist:' + str(comicidlist))

        while id_count < len(comicidlist):
            #break it up by 100 per api hit
            #do the first 100 regardless
            in_cnt = 0
            if id_count + 100 <= len(comicidlist):
                endcnt = id_count + 100
            else:
                endcnt = len(comicidlist)

            for i in range(id_count, endcnt):
                if in_cnt == 0:
                    tmpidlist = str(comicidlist[i])
                else:
                    tmpidlist += '|' + str(comicidlist[i])
                in_cnt += 1
            logger.info('tmpidlist: ' + str(tmpidlist))

            searched = pulldetails(None,
                                   'import',
                                   offset=0,
                                   comicidlist=tmpidlist)

            if searched is None:
                break
            else:
                tGIL = GetImportList(searched)
                import_list += tGIL

            id_count += 100

        return import_list

    elif type == 'update_dates':
        dom = pulldetails(None,
                          'update_dates',
                          offset=1,
                          comicidlist=comicidlist)
        return UpdateDates(dom)