コード例 #1
0
def notify(added_items):
    notifications = RssConfig('Notifications')
    homeassistant_settings = notifications.get("homeassistant").split(',')
    pushbullet_token = notifications.get("pushbullet")
    pushover_settings = notifications.get("pushover").split(',')
    items = []
    for item in added_items:
        item = item.replace('[<a href="',
                            '').replace('" target="_blank">Link</a>]', '')
        items.append(item)
    if len(items) > 0:
        cut_items = list(api_request_cutter(items, 5))
        if len(notifications.get("homeassistant")) > 0:
            for cut_item in cut_items:
                homassistant_url = homeassistant_settings[0]
                homeassistant_password = homeassistant_settings[1]
                Homeassistant(cut_item, homassistant_url,
                              homeassistant_password)
        if len(notifications.get("pushbullet")) > 0:
            Pushbullet(items, pushbullet_token)
        if len(notifications.get('pushover')) > 0:
            for cut_item in cut_items:
                pushover_user = pushover_settings[0]
                pushover_token = pushover_settings[1]
                Pushover(cut_item, pushover_user, pushover_token)
コード例 #2
0
 def __init__(self):
     self.config = RssConfig(self._INTERNAL_NAME)
     self.log_info = logging.info
     self.log_error = logging.error
     self.log_debug = logging.debug
     list([
         _mkdir_p(os.path.dirname(self.config.get(f)))
         for f in ['db_file', 'file']
     ])
     _mkdir_p(self.config.get('crawljob_directory'))
     self.db = RssDb(self.config.get('db_file'))
     self._periodical_active = False
     self.periodical = RepeatableTimer(
         int(self.config.get('interval')) * 60, self.periodical_task)
コード例 #3
0
def getURL(url):
    proxy = RssConfig('RSScrawler').get('proxy')
    if proxy:
        proxies = []
        if proxy.startswith('http://'):
            proxies[0] = proxy[:4]
            proxies[1] = proxy
        elif proxy.startswith('https://'):
            proxies[0] = proxy[:5]
            proxies[1] = proxy
        elif proxy.startswith('socks5://'):
            proxies[0] = 'http'
            proxies[1] = proxy
        proxies = {proxies[0]: proxies[1]}
        scraper = cfscrape.create_scraper(delay=10, proxies=proxies)
    else:
        scraper = cfscrape.create_scraper(delay=10)
    return scraper.get(url).content
コード例 #4
0
ファイル: RSScrawler.py プロジェクト: pilgrim2go/RSScrawler
 def __init__(self):
     self.config = RssConfig(self._INTERNAL_NAME)
     self.log_info = logging.info
     self.log_error = logging.error
     self.log_debug = logging.debug
     list([_mkdir_p(os.path.dirname(self.config.get(f))) for f in ['db_file', 'file']])
     _mkdir_p(self.config.get('crawljob_directory'))
     self.db = RssDb(self.config.get('db_file'))
     self._periodical_active = False
     self.periodical = RepeatableTimer(
         int(self.config.get('interval')) * 60,
         self.periodical_task
     )
コード例 #5
0
def write_crawljob_file(package_name, folder_name, link_text, crawljob_dir, subdir):
    crawljob_file = crawljob_dir + '/%s.crawljob' % unicode(re.sub('[^\w\s\.-]', '', package_name.replace(' ', '')).strip().lower())
    crawljobs = RssConfig('Crawljobs')
    autostart = crawljobs.get("autostart")
    usesubdir = crawljobs.get("subdir")
    if not usesubdir:
      subdir = ""
    if autostart:
        autostart = "TRUE"
    else:
        autostart = "FALSE"
    try:
        file = open(crawljob_file, 'w')
        file.write('enabled=TRUE\n')
        file.write('autoStart=' + autostart + '\n')
        file.write('extractPasswords=["' + "bW92aWUtYmxvZy5vcmc=".decode('base64') + '","' + "c2VyaWVuanVua2llcy5vcmc=".decode('base64') + '","' + "aGQtYXJlYS5vcmc=".decode('base64') + '","' + "aGQtd29ybGQub3Jn".decode('base64') + '","' + "d2FyZXotd29ybGQub3Jn".decode('base64') + '"]\n')
        file.write('downloadPassword='******'base64') + '\n')
        file.write('extractAfterDownload=TRUE\n')
        file.write('forcedStart=' + autostart + '\n')
        file.write('autoConfirm=' + autostart + '\n')
        if not subdir == "":
            file.write('downloadFolder=' + subdir + "/" + '%s\n' % folder_name)
            if subdir == "RSScrawler/Remux":
                file.write('priority=Lower\n')
        else:
            file.write('downloadFolder=' + '%s\n' % folder_name)
        file.write('packageName=%s\n' % package_name.replace(' ', ''))
        file.write('text=%s\n' % link_text)
        file.close()
        return True
    except UnicodeEncodeError as e:
        file.close()
        log_error("Beim Schreibversuch des Crawljobs: %s FEHLER: %s" %(crawljob_file, e.message))
        if os.path.isfile(crawljob_file):
            log_info("Entferne defekten Crawljob: %s" % crawljob_file)
            os.remove(crawljob_file)
        return False
コード例 #6
0
def notify(added_items):
    notifications = RssConfig('Notifications')
    homeassistant_settings = notifications.get("homeassistant").split(',')
    pushbullet_token = notifications.get("pushbullet")
    pushover_settings = notifications.get("pushover").split(',')
    items = []
    for item in added_items:
        item = re.sub(r' - <a href.*<\/a>', '', item)
        items.append(item)
    if len(items) > 0:
        cut_items = list(api_request_cutter(items, 5))
        if len(notifications.get("homeassistant")) > 0:
            for cut_item in cut_items:
                homassistant_url = homeassistant_settings[0]
                homeassistant_password = homeassistant_settings[1]
                Homeassistant(cut_item, homassistant_url,
                              homeassistant_password)
        if len(notifications.get("pushbullet")) > 0:
            Pushbullet(items, pushbullet_token)
        if len(notifications.get('pushover')) > 0:
            for cut_item in cut_items:
                pushover_user = pushover_settings[0]
                pushover_token = pushover_settings[1]
                Pushover(cut_item, pushover_user, pushover_token)
コード例 #7
0
def get_all():
    if request.method == 'GET':
        general = RssConfig('RSScrawler')
        alerts = RssConfig('Notifications')
        crawljobs = RssConfig('Crawljobs')
        mb = RssConfig('MB')
        sj = RssConfig('SJ')
        yt = RssConfig('YT')
        ver = version.getVersion()
        if version.updateCheck()[0]:
            updateready = True
            updateversion = version.updateCheck()[1]
            print(
                'Update steht bereit (' + updateversion +
                ')! Weitere Informationen unter https://github.com/rix1337/RSScrawler/releases/latest'
            )
        else:
            updateready = False
        log = ''
        logfile = os.path.join(os.path.dirname(sys.argv[0]), 'RSScrawler.log')
        if os.path.isfile(logfile):
            logfile = open(os.path.join(logfile))
            output = StringIO.StringIO()
            for line in reversed(logfile.readlines()):
                output.write("<p>" + line.replace("\n", "</p>"))
                log = output.getvalue()
        return jsonify({
            "version": {
                "ver": ver,
                "update_ready": updateready,
                "docker": docker,
            },
            "log": log,
            "lists": {
                "mb": {
                    "filme": getListe('MB_Filme'),
                    "filme3d": getListe('MB_3D'),
                    "regex": getListe('MB_Regex'),
                },
                "sj": {
                    "serien": getListe('SJ_Serien'),
                    "regex": getListe('SJ_Serien_Regex'),
                    "staffeln_regex": getListe('SJ_Staffeln_Regex'),
                },
                "mbsj": {
                    "staffeln": getListe('MB_Staffeln'),
                },
                "yt": {
                    "kanaele_playlisten": getListe('YT_Channels'),
                },
            },
            "settings": {
                "general": {
                    "pfad": general.get("jdownloader"),
                    "port": to_int(general.get("port")),
                    "prefix": general.get("prefix"),
                    "interval": to_int(general.get("interval")),
                    "english": bool(general.get("english")),
                    "hoster": general.get("hoster"),
                },
                "alerts": {
                    "homeassistant": alerts.get("homeassistant"),
                    "pushbullet": alerts.get("pushbullet"),
                    "pushover": alerts.get("pushover"),
                },
                "crawljobs": {
                    "autostart": bool(crawljobs.get("autostart")),
                    "subdir": bool(crawljobs.get("subdir")),
                },
                "mb": {
                    "quality": mb.get("quality"),
                    "ignore": mb.get("ignore"),
                    "regex": bool(mb.get("regex")),
                    "imdb_score": to_float(mb.get("imdb")),
                    "imdb_year": to_int(mb.get("imdbyear")),
                    "historical": bool(mb.get("historical")),
                    "force_dl": bool(mb.get("enforcedl")),
                    "cutoff": bool(mb.get("cutoff")),
                    "crawl_3d": bool(mb.get("crawl3d")),
                },
                "sj": {
                    "quality": sj.get("quality"),
                    "ignore": sj.get("rejectlist"),
                    "regex": bool(sj.get("regex")),
                },
                "mbsj": {
                    "enabled": bool(mb.get("crawlseasons")),
                    "quality": mb.get("seasonsquality"),
                    "packs": bool(mb.get("seasonpacks")),
                    "source": mb.get("seasonssource"),
                },
                "yt": {
                    "enabled": bool(yt.get("youtube")),
                    "max": to_int(yt.get("maxvideos")),
                    "ignore": yt.get("ignore"),
                }
            }
        })
    else:
        return "Failed", 405
コード例 #8
0
def get_post_settings():
    if request.method == 'GET':
        general = RssConfig('RSScrawler')
        alerts = RssConfig('Notifications')
        crawljobs = RssConfig('Crawljobs')
        mb = RssConfig('MB')
        sj = RssConfig('SJ')
        yt = RssConfig('YT')
        return jsonify({
            "settings": {
                "general": {
                    "pfad": general.get("jdownloader"),
                    "port": to_int(general.get("port")),
                    "prefix": general.get("prefix"),
                    "interval": to_int(general.get("interval")),
                    "english": bool(general.get("english")),
                    "hoster": general.get("hoster"),
                },
                "alerts": {
                    "homeassistant": alerts.get("homeassistant"),
                    "pushbullet": alerts.get("pushbullet"),
                    "pushover": alerts.get("pushover"),
                },
                "crawljobs": {
                    "autostart": bool(crawljobs.get("autostart")),
                    "subdir": bool(crawljobs.get("subdir")),
                },
                "mb": {
                    "quality": mb.get("quality"),
                    "ignore": mb.get("ignore"),
                    "regex": bool(mb.get("regex")),
                    "imdb_score": to_float(mb.get("imdb")),
                    "imdb_year": to_int(mb.get("imdbyear")),
                    "historical": bool(mb.get("historical")),
                    "force_dl": bool(mb.get("enforcedl")),
                    "cutoff": bool(mb.get("cutoff")),
                    "crawl_3d": bool(mb.get("crawl3d")),
                },
                "sj": {
                    "quality": sj.get("quality"),
                    "ignore": sj.get("rejectlist"),
                    "regex": bool(sj.get("regex")),
                },
                "mbsj": {
                    "enabled": bool(mb.get("crawlseasons")),
                    "quality": mb.get("seasonsquality"),
                    "packs": bool(mb.get("seasonpacks")),
                    "source": mb.get("seasonssource"),
                },
                "yt": {
                    "enabled": bool(yt.get("youtube")),
                    "max": to_int(yt.get("maxvideos")),
                    "ignore": yt.get("ignore"),
                }
            }
        })
    if request.method == 'POST':
        data = request.json
        with open(
                os.path.join(os.path.dirname(sys.argv[0]),
                             'Einstellungen/RSScrawler.ini'), 'wb') as f:
            f.write('# RSScrawler.ini (Stand: RSScrawler ' +
                    version.getVersion() + ')\n')
            f.write("\n[RSScrawler]\n")
            f.write("jdownloader = " +
                    to_str(data['general']['pfad']).encode('utf-8') + "\n")
            f.write("port = " +
                    to_str(data['general']['port']).encode('utf-8') + "\n")
            f.write("prefix = " +
                    to_str(data['general']['prefix']).encode('utf-8').lower() +
                    "\n")
            interval = to_str(data['general']['interval']).encode('utf-8')
            if to_int(interval) < 3:
                interval = '3'
            f.write("interval = " + interval + "\n")
            f.write("english = " +
                    to_str(data['general']['english']).encode('utf-8') + "\n")
            f.write("hoster = " +
                    to_str(data['general']['hoster']).encode('utf-8') + "\n")
            f.write("\n[MB]\n")
            f.write("quality = " +
                    to_str(data['mb']['quality']).encode('utf-8') + "\n")
            f.write("ignore = " +
                    to_str(data['mb']['ignore']).encode('utf-8').lower() +
                    "\n")
            f.write("historical = " +
                    to_str(data['mb']['historical']).encode('utf-8') + "\n")
            f.write("regex = " + to_str(data['mb']['regex']).encode('utf-8') +
                    "\n")
            f.write("cutoff = " +
                    to_str(data['mb']['cutoff']).encode('utf-8') + "\n")
            f.write("crawl3d = " +
                    to_str(data['mb']['crawl_3d']).encode('utf-8') + "\n")
            f.write("enforcedl = " +
                    to_str(data['mb']['force_dl']).encode('utf-8') + "\n")
            f.write("crawlseasons = " +
                    to_str(data['mbsj']['enabled']).encode('utf-8') + "\n")
            f.write("seasonsquality = " +
                    to_str(data['mbsj']['quality']).encode('utf-8') + "\n")
            f.write("seasonpacks = " +
                    to_str(data['mbsj']['packs']).encode('utf-8') + "\n")
            f.write("seasonssource = " +
                    to_str(data['mbsj']['source']).encode('utf-8').lower() +
                    "\n")
            f.write("imdbyear = " +
                    to_str(data['mb']['imdb_year']).encode('utf-8') + "\n")
            imdb = to_str(data['mb']['imdb_score']).encode('utf-8')
            if re.match('[^0-9]', imdb):
                imdb = 0.0
            elif imdb == '':
                imdb = 0.0
            else:
                imdb = round(
                    float(
                        to_str(
                            data['mb']['imdb_score']).encode('utf-8').replace(
                                ",", ".")), 1)
            if imdb > 10:
                imdb = 10.0
            f.write("imdb = " + to_str(imdb) + "\n")
            f.write("\n[SJ]\n")
            f.write("quality = " +
                    to_str(data['sj']['quality']).encode('utf-8') + "\n")
            f.write("rejectlist = " +
                    to_str(data['sj']['ignore']).encode('utf-8').lower() +
                    "\n")
            f.write("regex = " + to_str(data['sj']['regex']).encode('utf-8') +
                    "\n")
            f.write("\n[YT]\n")
            f.write("youtube = " +
                    to_str(data['yt']['enabled']).encode('utf-8') + "\n")
            maxvideos = to_str(data['yt']['max']).encode('utf-8')
            if maxvideos == "":
                maxvideos = "10"
            if to_int(maxvideos) < 1:
                f.write("maxvideos = 1\n")
            elif to_int(maxvideos) > 50:
                f.write("maxvideos = 50\n")
            else:
                f.write("maxvideos = " + to_str(maxvideos) + "\n")
            f.write("ignore = " +
                    to_str(data['yt']['ignore']).encode('utf-8') + "\n")
            f.write("\n[Notifications]\n")
            f.write("homeassistant = " +
                    to_str(data['alerts']['homeassistant']).encode('utf-8') +
                    "\n")
            f.write("pushbullet = " +
                    to_str(data['alerts']['pushbullet']).encode('utf-8') +
                    "\n")
            f.write("pushover = " +
                    to_str(data['alerts']['pushover']).encode('utf-8') + "\n")
            f.write("\n[Crawljobs]\n")
            f.write("autostart = " +
                    to_str(data['crawljobs']['autostart']).encode('utf-8') +
                    "\n")
            f.write("subdir = " +
                    to_str(data['crawljobs']['subdir']).encode('utf-8') + "\n")
        files.check()
        return "Success", 201
    else:
        return "Failed", 405
コード例 #9
0
import version

import StringIO
import os
import re
import sys

import logging

app = Flask(__name__, static_url_path='/web', template_folder='web')

if not os.path.exists(
        os.path.join(os.path.dirname(sys.argv[0]), 'Einstellungen')):
    prefix = ""
else:
    general = RssConfig('RSScrawler')
    if general.get("prefix"):
        prefix = '/' + general.get("prefix")
    else:
        prefix = ""


def to_int(i):
    i = i.strip().replace("None", "")
    return int(i) if i else ""


def to_float(i):
    i = i.strip().replace("None", "")
    return float(i) if i else ""
コード例 #10
0
class SJ():
    MIN_CHECK_INTERVAL = 2 * 60  #2minutes
    _INTERNAL_NAME = 'SJ'

    def __init__(self):
        self.config = RssConfig(self._INTERNAL_NAME)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        list([
            _mkdir_p(os.path.dirname(self.config.get(f)))
            for f in ['db_file', 'file']
        ])
        _mkdir_p(self.config.get('crawljob_directory'))
        self.db = RssDb(self.config.get('db_file'))
        self._periodical_active = False
        self.periodical = RepeatableTimer(
            int(self.config.get('interval')) * 60, self.periodical_task)

    def activate(self):
        self._periodical_active = True
        self.periodical.start()

    @_restart_timer
    def periodical_task(self):
        feed = feedparser.parse(
            'http://serienjunkies.org/xml/feeds/episoden.xml')
        self.pattern = "|".join(getSeriesList(self.config.get("file"))).lower()
        reject = self.config.get("rejectlist").replace(
            ";", "|").lower() if len(
                self.config.get("rejectlist")) > 0 else "^unmatchable$"
        self.quality = self.config.get("quality")
        self.hoster = self.config.get("hoster")
        if self.hoster == "alle":
            self.hoster = "."
        self.added_items = []

        for post in feed.entries:
            link = post.link
            title = post.title

            if self.config.get("regex"):
                m = re.search(self.pattern, title.lower())
                if not m and not "720p" in title and not "1080p" in title:
                    m = re.search(self.pattern.replace("480p", "."),
                                  title.lower())
                    self.quality = "480p"
                if m:
                    if "720p" in title.lower(): self.quality = "720p"
                    if "1080p" in title.lower(): self.quality = "1080p"
                    m = re.search(reject, title.lower())
                    if m:
                        self.log_debug("Rejected: " + title)
                        continue
                    title = re.sub('\[.*\] ', '', post.title)
                    self.range_checkr(link, title)

            else:
                if self.config.get("quality") != '480p':
                    m = re.search(self.pattern, title.lower())
                    if m:
                        if self.config.get("language") in title:
                            mm = re.search(self.quality, title.lower())
                            if mm:
                                mmm = re.search(reject, title.lower())
                                if mmm:
                                    self.log_debug("Rejected: " + title)
                                    continue
                                title = re.sub('\[.*\] ', '', post.title)
                                self.range_checkr(link, title)

                else:
                    m = re.search(self.pattern, title.lower())
                    if m:
                        if self.config.get("language") in title:
                            if "720p" in title.lower(
                            ) or "1080p" in title.lower():
                                continue
                            mm = re.search(reject, title.lower())
                            if mm:
                                self.log_debug("Rejected: " + title)
                                continue
                            title = re.sub('\[.*\] ', '', post.title)
                            self.range_checkr(link, title)

        if len(self.config.get('pushbulletapi')) > 2:
            notifyPushbulletSJ(
                self.config.get("pushbulletapi"),
                self.added_items) if len(self.added_items) > 0 else True

    def range_checkr(self, link, title):
        pattern = re.match(".*S\d{2}E\d{2}-\w?\d{2}.*", title)
        if pattern is not None:
            range0 = re.sub(r".*S\d{2}E(\d{2}-\w?\d{2}).*", r"\1",
                            title).replace("E", "")
            number1 = re.sub(r"(\d{2})-\d{2}", r"\1", range0)
            number2 = re.sub(r"\d{2}-(\d{2})", r"\1", range0)
            title_cut = re.findall(r"(.*S\d{2}E)(\d{2}-\w?\d{2})(.*)", title)
            try:
                for count in range(int(number1), (int(number2) + 1)):
                    NR = re.match("d\{2}", str(count))
                    if NR is not None:
                        title1 = title_cut[0][0] + str(
                            count) + ".*" + title_cut[0][-1]
                        self.range_parse(link, title1)
                    else:
                        title1 = title_cut[0][0] + "0" + str(
                            count) + ".*" + title_cut[0][-1]
                        self.range_parse(link, title1)
            except ValueError as e:
                logging.error("Raised ValueError exception: %s" % e.message)
        else:
            self.parse_download(link, title)

    def range_parse(self, series_url, search_title):
        req_page = getURL(series_url)
        soup = BeautifulSoup(req_page)

        try:
            titles = soup.findAll(text=re.compile(search_title))
            for title in titles:
                if self.quality != '480p' and self.quality in title:
                    self.parse_download(series_url, title)
                if self.quality == '480p' and not (('.720p.' in title) or
                                                   ('.1080p.' in title)):
                    self.parse_download(series_url, title)
        except re.error as e:
            self.log_error('sre_constants.error: %s' % e)

    def parse_download(self, series_url, search_title):
        req_page = getURL(series_url)
        soup = BeautifulSoup(req_page)

        title = soup.find(text=re.compile(search_title))
        if title:
            items = []
            links = title.parent.parent.findAll('a')
            for link in links:
                url = link['href']
                pattern = '.*%s_.*' % self.hoster
                if re.match(pattern, url):
                    items.append(url)
            self.send_package(title, items) if len(items) > 0 else True

    def send_package(self, title, link):
        try:
            storage = self.db.retrieve(title)
        except Exception as e:
            self.log_debug("db.retrieve got exception: %s, title: %s" %
                           (e, title))
        if storage == 'downloaded':
            self.log_debug(title + " already downloaded")
        else:
            self.log_info("NEW RELEASE: " + title)
            self.db.store(title, 'downloaded')
            write_crawljob_file(
                title, title, link, self.config.get('crawljob_directory')
            ) and self.added_items.append(title.encode("utf-8"))
コード例 #11
0
class MovieblogFeed():
    FEED_URL = "http://www.movie-blog.org/feed/"
    SUBSTITUTE = "[&#\s/]"
    _INTERNAL_NAME = 'MB'

    def __init__(self):
        self.config = RssConfig(self._INTERNAL_NAME)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        list([
            _mkdir_p(os.path.dirname(self.config.get(f)))
            for f in ['db_file', 'patternfile']
        ])
        _mkdir_p(self.config.get('crawljob_directory'))
        self.db = RssDb(self.config.get('db_file'))
        self._hosters_pattern = self.config.get('hoster').replace(';', '|')
        self._periodical_active = False
        self.periodical = RepeatableTimer(
            int(self.config.get('interval')) * 60, self.periodical_task)
        self.dictWithNamesAndLinks = {}

    def activate(self):
        self._periodical_active = True
        self.periodical.start()
        return self

    def readInput(self, file):
        if not os.path.isfile(file):
            open(file, "a").close()
            placeholder = open(file, 'w')
            placeholder.write(
                'ADD ALL MOVIES YOU WANT TO CRAWL FOR AS NEW LINES IN THIS FILE\n'
            )
            placeholder.close()
        try:
            f = codecs.open(file, "rb")
            return f.read().splitlines()
        except:
            self.log_error("Inputfile not found")

    def getPatterns(self, patterns, quality, rg, sf):
        return {line: (quality, rg, sf) for line in patterns}

    def searchLinks(self, feed):
        ignore = "|".join(["\.%s\." % p for p in self.config.get("ignore").lower().split(',')
                           if not self.config.get('crawl3d') or p != '3d']) \
            if not self.config.get("ignore") == "" else "^unmatchable$"
        for key in self.allInfos:
            s = re.sub(self.SUBSTITUTE, ".", key).lower()
            for post in feed.entries:
                """Search for title"""
                found = re.search(s, post.title.lower())
                if found:
                    """Check if we have to ignore it"""
                    found = re.search(ignore, post.title.lower())
                    if found:
                        self.log_debug("Ignoring [%s]" % post.title)
                        continue
                    """Search for quality"""
                    ss = self.allInfos[key][0].lower()

                    if '.3d.' in post.title.lower():
                        if self.config.get('crawl3d') and (
                                "1080p" in post.title.lower()
                                or "1080i" in post.title.lower()):
                            found = True
                        else:
                            continue
                    else:
                        if ss == "480p":
                            if "720p" in post.title.lower(
                            ) or "1080p" in post.title.lower(
                            ) or "1080i" in post.title.lower():
                                continue
                            found = True
                        else:
                            found = re.search(ss, post.title.lower())
                    if found:
                        """Search for releasegroup"""
                        sss = "[\.-]+" + self.allInfos[key][1].lower()
                        found = re.search(sss, post.title.lower())

                        if self.allInfos[key][2]:
                            # If all True, then found = True
                            found = all([
                                word in post.title.lower()
                                for word in self.allInfos[key][2]
                            ])

                        if found:
                            try:
                                episode = re.search(
                                    r'([\w\.\s]*s\d{1,2}e\d{1,2})[\w\.\s]*',
                                    post.title.lower()).group(1)
                                if "repack" in post.title.lower():
                                    episode = episode + "-repack"
                                self.log_debug(
                                    "TV-Series detected, will shorten its name to [%s]"
                                    % episode)
                                yield (episode, [post.link], key)
                            except:
                                yield (post.title, [post.link], key)

    def _get_download_links(self, url, hosters_pattern=None):
        tree = html.fromstring(requests.get(url).content)
        xpath = '//*[@id="content"]/span/div/div[2]/p//strong[contains(text(),"Download:") or contains(text(),"Mirror #")]/following-sibling::a[1]'
        return [
            common.get_first(link.xpath('./@href'))
            for link in tree.xpath(xpath) if hosters_pattern is None
            or re.search(hosters_pattern, link.text, flags=re.IGNORECASE)
        ]

    @_restart_timer
    def periodical_task(self):
        urls = []
        text = []

        dl = {
            key: ('.*', '.*', ('.dl.', ))
            for key in self.db.get_patterns('notdl')
        }

        self.allInfos = dict(
            set({
                key: dl[key] if key in dl else value
                for (key, value) in self.getPatterns(
                    self.readInput(self.config.get("patternfile")),
                    self.config.get('quality'), '.*', None).items()
            }.items()) | set(
                self.getPatterns(
                    self.readInput(self.config.get("seasonslist")
                                   ), self.config.get('seasonsquality'), '.*',
                    ('.complete.', '.' + self.config.get('seasonssource') +
                     '.')).items() if self.config.get('crawlseasons') else []))

        if self.config.get("historical"):
            for xline in self.allInfos.keys():
                if len(xline) > 0 and not xline.startswith("#"):
                    xn = xline.split(",")[0].replace(".",
                                                     " ").replace(" ", "+")
                    urls.append(
                        'http://www.movie-blog.org/search/%s/feed/rss2/' % xn)
        else:
            urls.append(self.FEED_URL)

        for url in urls:
            for (key, value,
                 pattern) in self.searchLinks(feedparser.parse(url)):
                if self.db.retrieve(key) == 'added' or self.db.retrieve(
                        key) == 'notdl':
                    self.log_debug("[%s] has already been added" % key)
                else:
                    self.db.store(
                        key, 'notdl' if self.config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added', pattern)
                    self.log_info("NEW RELEASE: " + key)
                    download_link = [
                        common.get_first(
                            self._get_download_links(value[0],
                                                     self._hosters_pattern))
                    ]
                    if any(download_link):
                        write_crawljob_file(
                            key, key, download_link,
                            self.config.get(
                                "crawljob_directory")) and text.append(key)
        if len(text) > 0:
            notifyPushbulletMB(self.config.get("pushbulletapi"), text)
コード例 #12
0
ファイル: RSScrawler.py プロジェクト: pilgrim2go/RSScrawler
class SJ():
    MIN_CHECK_INTERVAL = 2 * 60 #2minutes
    _INTERNAL_NAME = 'SJ'

    def __init__(self):
        self.config = RssConfig(self._INTERNAL_NAME)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        list([_mkdir_p(os.path.dirname(self.config.get(f))) for f in ['db_file', 'file']])
        _mkdir_p(self.config.get('crawljob_directory'))
        self.db = RssDb(self.config.get('db_file'))
        self._periodical_active = False
        self.periodical = RepeatableTimer(
            int(self.config.get('interval')) * 60,
            self.periodical_task
        )

    def activate(self):
        self._periodical_active = True
        self.periodical.start()

    @_restart_timer
    def periodical_task(self):
        feed = feedparser.parse('http://serienjunkies.org/xml/feeds/episoden.xml')
        self.pattern = "|".join(getSeriesList(self.config.get("file"))).lower()
        reject = self.config.get("rejectlist").replace(";","|").lower() if len(self.config.get("rejectlist")) > 0 else "^unmatchable$"
        self.quality = self.config.get("quality")
        self.hoster = self.config.get("hoster")
        if self.hoster == "alle":
            self.hoster = "."
        self.added_items = []

        for post in feed.entries:
            link = post.link
            title = post.title

            if str2bool(self.config.get("regex")):
                m = re.search(self.pattern,title.lower())
                if not m and not "720p" in title and not "1080p" in title:
                    m = re.search(self.pattern.replace("480p","."),title.lower())
                    self.quality = "480p"
                if m:
                    if "720p" in title.lower(): self.quality = "720p"
                    if "1080p" in title.lower(): self.quality = "1080p"
                    m = re.search(reject,title.lower())
                    if m:
                        self.log_debug("Rejected: " + title)
                        continue
                    title = re.sub('\[.*\] ', '', post.title)
                    self.range_checkr(link,title)

            else:
                if self.config.get("quality") != '480p':
                    m = re.search(self.pattern,title.lower())
                    if m:
                        if self.config.get("language") in title:
                            mm = re.search(self.quality,title.lower())
                            if mm:
                                mmm = re.search(reject,title.lower())
                                if mmm:
                                    self.log_debug("Rejected: " + title)
                                    continue
                                title = re.sub('\[.*\] ', '', post.title)
                                self.range_checkr(link,title)

                else:
                    m = re.search(self.pattern,title.lower())
                    if m:
                        if self.config.get("language") in title:
                            if "720p" in title.lower() or "1080p" in title.lower():
                                continue
                            mm = re.search(reject,title.lower())
                            if mm:
                                self.log_debug("Rejected: " + title)
                                continue
                            title = re.sub('\[.*\] ', '', post.title)
                            self.range_checkr(link,title)

        if len(self.config.get('pushbulletapi')) > 2:
            notifyPushbulletSJ(self.config.get("pushbulletapi"),self.added_items) if len(self.added_items) > 0 else True

    def range_checkr(self, link, title):
        pattern = re.match(".*S\d{2}E\d{2}-\w?\d{2}.*", title)
        if pattern is not None:
            range0 = re.sub(r".*S\d{2}E(\d{2}-\w?\d{2}).*",r"\1", title).replace("E","")
            number1 = re.sub(r"(\d{2})-\d{2}",r"\1", range0)
            number2 = re.sub(r"\d{2}-(\d{2})",r"\1", range0)
            title_cut = re.findall(r"(.*S\d{2}E)(\d{2}-\w?\d{2})(.*)",title)
            try:
                for count in range(int(number1),(int(number2)+1)):
                    NR = re.match("d\{2}", str(count))
                    if NR is not None:
                        title1 = title_cut[0][0] + str(count) + ".*" + title_cut[0][-1]
                        self.range_parse(link, title1)
                    else:
                        title1 = title_cut[0][0] + "0" + str(count) + ".*" + title_cut[0][-1]
                        self.range_parse(link, title1)
            except ValueError as e:
                logging.error("Raised ValueError exception: %s" %e.message)
        else:
            self.parse_download(link, title)

    def range_parse(self,series_url, search_title):
        req_page = getURL(series_url)
        soup = BeautifulSoup(req_page)

        try:
            titles = soup.findAll(text=re.compile(search_title))
            for title in titles:
               if self.quality !='480p' and self.quality in title:
                   self.parse_download(series_url, title)
               if self.quality =='480p' and not (('.720p.' in title) or ('.1080p.' in title)):
                   self.parse_download(series_url, title)
        except re.error as e:
            self.log_error('sre_constants.error: %s' % e)


    def parse_download(self,series_url, search_title):
        req_page = getURL(series_url)
        soup = BeautifulSoup(req_page)

        title = soup.find(text=re.compile(search_title))
        if title:
            items = []
            links = title.parent.parent.findAll('a')
            for link in links:
                url = link['href']
                pattern = '.*%s_.*' % self.hoster
                if re.match(pattern, url):
                    items.append(url)
            self.send_package(title,items) if len(items) > 0 else True

    def send_package(self, title, link):
        try:
            storage = self.db.retrieve(title)
        except Exception as e:
            self.log_debug("db.retrieve got exception: %s, title: %s" % (e,title))
        if storage == 'downloaded':
            self.log_debug(title + " already downloaded")
        else:
            self.log_info("NEW RELEASE: " + title)
            self.db.store(title, 'downloaded')
            write_crawljob_file(title, title, link,
                                self.config.get('crawljob_directory')) and self.added_items.append(title.encode("utf-8"))
コード例 #13
0
ファイル: RSScrawler.py プロジェクト: pilgrim2go/RSScrawler
class MovieblogFeed():
    FEED_URL = "http://www.movie-blog.org/feed/"
    SUBSTITUTE = "[&#\s/]"
    _INTERNAL_NAME='MB'

    def __init__(self):
        self.config = RssConfig(self._INTERNAL_NAME)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        list([_mkdir_p(os.path.dirname(self.config.get(f))) for f in ['db_file', 'patternfile']])
        _mkdir_p(self.config.get('crawljob_directory'))
        self.db = RssDb(self.config.get('db_file'))
        self._periodical_active = False
        self.periodical = RepeatableTimer(
            int(self.config.get('interval')) * 60,
            self.periodical_task
        )

    def activate(self):
        self._periodical_active = True
        self.periodical.start()
        return self

    def readInput(self):
        if not os.path.isfile(self.config.get("patternfile")):
            open(self.config.get("patternfile"), "a").close()
        try:
            f = codecs.open(self.config.get("patternfile"), "rb")
            return f.read().splitlines()
        except:
            self.log_error("Inputfile not found")

    def getPatterns(self):
        out = {}
        for line in self.mypatterns:
            if len(line) == 0 or line.startswith("#"):
                continue
            try:
                n = line.split(",")[0]
                q = line.split(",")[1]
                r = line.split(",")[2]
            except:
                self.log_error("Syntax error in [%s] detected, please take corrective action" %self.config.get("patternfile"))

            try:
                d = line.split(",")[3]
            except:
                d = ""

            if q == "":
                q = r'.*'

            if r == "":
                r = r'.*'

            out[n] = [q,r,d]
        return out

    def searchLinks(self):
        ignore = self.config.get("ignore").lower().replace(",","|") if not self.config.get("ignore") == "" else "^unmatchable$"
        for key in self.allInfos:
            s = re.sub(self.SUBSTITUTE,".",key).lower()
            for post in self.feed.entries:
                """Search for title"""
                found = re.search(s,post.title.lower())
                if found:
                    """Check if we have to ignore it"""
                    found = re.search(ignore,post.title.lower())
                    if found:
                        self.log_debug("Ignoring [%s]" %post.title)
                        continue
                    """Search for quality"""
                    ss = self.allInfos[key][0].lower()
                    if ss == "480p":
                        if "720p" in post.title.lower() or "1080p" in post.title.lower() or "1080i" in post.title.lower():
                            continue
                        found = True
                    else:
                        found = re.search(ss,post.title.lower())
                    if found:
                        """Search for releasegroup"""
                        sss = "[\.-]+"+self.allInfos[key][1].lower()
                        found = re.search(sss,post.title.lower())
                        if found:
                            try:
                                episode = re.search(r'([\w\.\s]*s\d{1,2}e\d{1,2})[\w\.\s]*',post.title.lower()).group(1)
                                if "repack" in post.title.lower():
                                    episode = episode + "-repack"
                                self.log_debug("TV-Series detected, will shorten its name to [%s]" %episode)
                                self.dictWithNamesAndLinks[episode] = [post.link]
                            except:
                                self.dictWithNamesAndLinks[post.title] = [post.link]

    @_restart_timer
    def periodical_task(self):
        urls = []
        text = []
        self.mypatterns = self.readInput()

        self.dictWithNamesAndLinks = {}
        self.allInfos = self.getPatterns()

        if self.config.get("historical"):
            for xline in self.mypatterns:
                if len(xline) == 0 or xline.startswith("#"):
                    continue
                xn = xline.split(",")[0].replace(".", " ").replace(" ", "+")
                urls.append('http://www.movie-blog.org/search/%s/feed/rss2/' %xn)
        else:
            urls.append(self.FEED_URL)

        for url in urls:
            self.feed = feedparser.parse(url)
            self.searchLinks()

        for key in self.dictWithNamesAndLinks:
            if not self.db.retrieve(key) == 'added':
                self.db.store(key, 'added')
                self.log_info("NEW RELEASE: " + key)
                write_crawljob_file(key, key, [self.dictWithNamesAndLinks[key][0]],
                    self.config.get("crawljob_directory")) and text.append(key)
            else:
                self.log_debug("[%s] has already been added" %key)
        if len(text) > 0:
            notifyPushbulletMB(self.config.get("pushbulletapi"),text)
コード例 #14
0
ファイル: common.py プロジェクト: sweatcher/RSScrawler
def load(dockerglobal):
    main = RssConfig('RSScrawler')
    jdownloader = main.get("jdownloader")
    port = main.get("port")
    prefix = main.get("prefix")
    interval = main.get("interval")
    hoster = main.get("hoster")
    pushbulletapi = main.get("pushbulletapi")
    # MB-Bereich
    mb = RssConfig('MB')
    mbquality = mb.get("quality")
    ignore = mb.get("ignore")
    historical = str(mb.get("historical"))
    mbregex = str(mb.get("regex"))
    cutoff = str(mb.get("cutoff"))
    crawl3d = str(mb.get("crawl3d"))
    enforcedl = str(mb.get("enforcedl"))
    crawlseasons = str(mb.get("crawlseasons"))
    seasonsquality = mb.get("seasonsquality")
    seasonssource = mb.get("seasonssource")
    # SJ-Bereich
    sj = RssConfig('SJ')
    sjquality = sj.get("quality")
    rejectlist = sj.get("rejectlist")
    sjregex = str(sj.get("regex"))
    # Wandle Werte für HTML um
    if hoster == 'Share-Online':
      hosterso = ' selected'
      hosterul = ''
    else:
      hosterso = ''
      hosterul = ' selected'
    if mbquality == '1080p':
      mbq1080 = ' selected'
      mbq720 = ''
      mbq480 = ''
    if mbquality == '720p':
      mbq1080 = ''
      mbq720 = ' selected'
      mbq480 = ''
    if mbquality == '480p':
      mbq1080 = ''
      mbq720 = ''
      mbq480 = ' selected'
    if seasonsquality == '1080p':
      msq1080 = ' selected'
      msq720 = ''
      msq480 = ''
    if seasonsquality == '720p':
      msq1080 = ''
      msq720 = ' selected'
      msq480 = ''
    if seasonsquality == '480p':
      msq1080 = ''
      msq720 = ''
      msq480 = ' selected'
    if sjquality == '1080p':
      sjq1080 = ' selected'
      sjq720 = ''
      sjq480 = ''
    if sjquality == '720p':
      sjq1080 = ''
      sjq720 = ' selected'
      sjq480 = ''
    if sjquality == '480p':
      sjq1080 = ''
      sjq720 = ''
      sjq480 = ' selected'
    if historical == 'True':
      historicaltrue = ' selected'
      historicalfalse = ''
    else:
      historicaltrue = ''
      historicalfalse = ' selected'
    if mbregex == 'True':
      mbregextrue = ' selected'
      mbregexfalse = ''
      mrdiv = "block"
    else:
      mbregextrue = ''
      mbregexfalse = ' selected'
      mrdiv = "none"
    if cutoff == 'True':
      cutofftrue = ' selected'
      cutofffalse = ''
    else:
      cutofftrue = ''
      cutofffalse = ' selected'
    if crawl3d == 'True':
      crawl3dtrue = ' selected'
      crawl3dfalse = ''
      tddiv = "block"
    else:
      crawl3dtrue = ''
      crawl3dfalse = ' selected'
      tddiv = "none"
    if enforcedl == 'True':
      enforcedltrue = ' selected'
      enforcedlfalse = ''
    else:
      enforcedltrue = ''
      enforcedlfalse = ' selected'
    if crawlseasons == 'True':
      crawlseasonstrue = ' selected'
      crawlseasonsfalse = ''
      ssdiv = "block"
    else:
      crawlseasonstrue = ''
      crawlseasonsfalse = ' selected'
      ssdiv = "none"
    if sjregex == 'True':
      sjregextrue = ' selected'
      sjregexfalse = ''
      srdiv = "block"
    else:
      sjregextrue = ''
      sjregexfalse = ' selected'
      srdiv = "none"
    # Erkenne Prefix
    if prefix:
      prefix = '/' + prefix
    # Erkenne Docker Umgebung
    if dockerglobal == '1':
      dockerblocker = ' readonly="readonly"'
      dockerhint = 'Docker-Modus: Kann nur per Docker-Run angepasst werden! '
    else:
      dockerblocker = ''
      dockerhint = ''
    return (jdownloader, port, prefix, interval, hoster, pushbulletapi, mbquality, ignore, historical, mbregex, cutoff, crawl3d, enforcedl, crawlseasons, seasonsquality, seasonssource, sjquality, rejectlist, sjregex, hosterso, hosterul, mbq1080, mbq720, mbq480, msq1080, msq720, msq480, sjq1080, sjq720, sjq480, historicaltrue, historicalfalse, mbregextrue, mbregexfalse, mrdiv, cutofftrue, cutofffalse, crawl3dtrue, crawl3dfalse, tddiv, enforcedltrue, enforcedlfalse, crawlseasonstrue, crawlseasonsfalse, ssdiv, sjregextrue, sjregexfalse, srdiv, dockerblocker, dockerhint)
コード例 #15
0
def write_crawljob_file(package_name, folder_name, link_text, crawljob_dir,
                        subdir):
    # Crawljobs enden auf .crawljob
    crawljob_file = crawljob_dir + '/%s.crawljob' % unicode(
        # Windows-inkompatible Sonderzeichen/Leerzeichen werden ersetzt
        re.sub('[^\w\s\.-]', '', package_name.replace(' ',
                                                      '')).strip().lower())
    # Versuche .crawljob zu schreiben
    crawljobs = RssConfig('Crawljobs')
    autostart = crawljobs.get("autostart")
    usesubdir = crawljobs.get("subdir")
    if usesubdir == "False":
        subdir = ""
    if autostart == "True":
        autostart = "TRUE"
    else:
        autostart = "FALSE"
    try:
        # Öffne Crawljob mit Schreibzugriff
        file = open(crawljob_file, 'w')
        # Optionen für Paketeigenschaften im JDownloader:
        # Paket ist aktiviert
        file.write('enabled=TRUE\n')
        # Download startet automatisch
        file.write('autoStart=' + autostart + '\n')
        # Passwörter hinzufügen
        file.write('extractPasswords=["' +
                   "bW92aWUtYmxvZy5vcmc=".decode('base64') + '","' +
                   "c2VyaWVuanVua2llcy5vcmc=".decode('base64') + '"]\n')
        # Archive automatisch entpacken
        file.write('extractAfterDownload=TRUE\n')
        # Erzwinge automatischen Start
        file.write('forcedStart=' + autostart + '\n')
        # Bestätige Fragen des JDownloaders automatisch
        file.write('autoConfirm=' + autostart + '\n')
        # Unterverzeichnis des Downloads ist folder_name & subdir wird wenn es nicht leer ist mit angegeben. Subdir hilft bei der Automatisierung (bspw. über Filebot).
        if not subdir == "":
            file.write('downloadFolder=' + subdir + "/" + '%s\n' % folder_name)
            # Niedrige Priorität für erzwungene zweisprachige Downloads
            if subdir == "RSScrawler/Remux":
                file.write('priority=Lower\n')
        else:
            file.write('downloadFolder=' + '%s\n' % folder_name)
        # Name des Pakets im JDownloader ist package_name (ohne Leerzeichen!)
        file.write('packageName=%s\n' % package_name.replace(' ', ''))
        # Nutze ersten Eintrag (lt. Code einzigen!) des link_text Arrays als Downloadlink
        file.write('text=%s\n' % link_text)
        # Beende Schreibvorgang
        file.close()
        # Bestätige erfolgreichen Schreibvorgang
        return True
    # Bei Fehlern:
    except UnicodeEncodeError as e:
        # Beende Schreibvorgang
        file.close()
        # Erläutere den Fehler im Log inkl. Dateipfad des Crawljobs und Fehlerbericht
        logging.error("Beim Schreibversuch des Crawljobs: %s FEHLER: %s" %
                      (crawljob_file, e.message))
        # Wenn hiernach ein fehlerhafter Crawljob zurück bleibt
        if os.path.isfile(crawljob_file):
            # Logge das weitere Vorgehen
            logging.info("Entferne defekten Crawljob: %s" % crawljob_file)
            # Entferne den Crawljob
            os.remove(crawljob_file)
        # Vermerke fehlgeschlagenen Schreibvorgang
        return False
コード例 #16
0
ファイル: RSScrawler.py プロジェクト: Mengele88/RSScrawler
class MovieblogFeed():
    FEED_URL = "http://www.movie-blog.org/feed/"
    SUBSTITUTE = "[&#\s/]"
    _INTERNAL_NAME='MB'

    def __init__(self):
        self.config = RssConfig(self._INTERNAL_NAME)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        list([_mkdir_p(os.path.dirname(self.config.get(f))) for f in ['db_file', 'patternfile']])
        _mkdir_p(self.config.get('crawljob_directory'))
        self.db = RssDb(self.config.get('db_file'))
        self._hosters_pattern = self.config.get('hoster').replace(';','|')
        self._periodical_active = False
        self.periodical = RepeatableTimer(
            int(self.config.get('interval')) * 60,
            self.periodical_task
        )
        self.dictWithNamesAndLinks = {}

    def activate(self):
        self._periodical_active = True
        self.periodical.start()
        return self

    def readInput(self, file):
        if not os.path.isfile(file):
            open(file, "a").close()
            placeholder = open(file, 'w')
            placeholder.write('ADD ALL MOVIES YOU WANT TO CRAWL FOR AS NEW LINES IN THIS FILE\n')
            placeholder.close()
        try:
            f = codecs.open(file, "rb")
            return f.read().splitlines()
        except:
            self.log_error("Inputfile not found")

    def getPatterns(self, patterns, quality, rg, sf):
        return {line: (quality, rg, sf) for line in patterns}

    def searchLinks(self, feed):
        ignore = "|".join(["\.%s\." % p for p in self.config.get("ignore").lower().split(',')
                           if not self.config.get('crawl3d') or p != '3d']) \
            if not self.config.get("ignore") == "" else "^unmatchable$"
        for key in self.allInfos:
            s = re.sub(self.SUBSTITUTE,".",key).lower()
            for post in feed.entries:
                """Search for title"""
                found = re.search(s,post.title.lower())
                if found:
                    """Check if we have to ignore it"""
                    found = re.search(ignore,post.title.lower())
                    if found:
                        self.log_debug("Ignoring [%s]" %post.title)
                        continue
                    """Search for quality"""
                    ss = self.allInfos[key][0].lower()

                    if '.3d.' in post.title.lower():
                        if self.config.get('crawl3d') and ("1080p" in post.title.lower() or "1080i" in post.title.lower()):
                            found = True
                        else:
                            continue
                    else:
                        if ss == "480p":
                            if "720p" in post.title.lower() or "1080p" in post.title.lower() or "1080i" in post.title.lower():
                                continue
                            found = True
                        else:
                            found = re.search(ss,post.title.lower())
                    if found:
                        """Search for releasegroup"""
                        sss = "[\.-]+"+self.allInfos[key][1].lower()
                        found = re.search(sss,post.title.lower())

                        if self.allInfos[key][2]:
                            # If all True, then found = True
                            found = all([word in post.title.lower() for word in self.allInfos[key][2]])

                        if found:
                            try:
                                episode = re.search(r'([\w\.\s]*s\d{1,2}e\d{1,2})[\w\.\s]*',post.title.lower()).group(1)
                                if "repack" in post.title.lower():
                                    episode = episode + "-repack"
                                self.log_debug("TV-Series detected, will shorten its name to [%s]" %episode)
                                yield (episode, [post.link], key)
                            except:
                                yield (post.title, [post.link], key)


    def _get_download_links(self, url, hosters_pattern=None):
        tree = html.fromstring(requests.get(url).content)
        xpath = '//*[@id="content"]/span/div/div[2]/p//strong[contains(text(),"Download:") or contains(text(),"Mirror #")]/following-sibling::a[1]'
        return [common.get_first(link.xpath('./@href')) for link in tree.xpath(xpath) if hosters_pattern is None or re.search(hosters_pattern, link.text, flags=re.IGNORECASE)]

    @_restart_timer
    def periodical_task(self):
        urls = []
        text = []

        dl = {key:('.*', '.*', ('.dl.',)) for key in self.db.get_patterns('notdl')}

        self.allInfos = dict(
            set({key: dl[key] if key in dl else value for (key, value) in self.getPatterns(
                    self.readInput(self.config.get("patternfile")),
                    self.config.get('quality'),
                    '.*',
                    None
                ).items()}.items()
            ) |
            set(self.getPatterns(
                    self.readInput(self.config.get("seasonslist")),
                    self.config.get('seasonsquality'),
                    '.*',
                    ('.complete.','.' + self.config.get('seasonssource') + '.')
            ).items() if self.config.get('crawlseasons') else [])
        )

        if self.config.get("historical"):
            for xline in self.allInfos.keys():
                if len(xline) > 0 and not xline.startswith("#"):
                    xn = xline.split(",")[0].replace(".", " ").replace(" ", "+")
                    urls.append('http://www.movie-blog.org/search/%s/feed/rss2/' %xn)
        else:
            urls.append(self.FEED_URL)

        for url in urls:
            for (key, value, pattern) in self.searchLinks(feedparser.parse(url)):
                if self.db.retrieve(key) == 'added' or self.db.retrieve(key) == 'notdl':
                    self.log_debug("[%s] has already been added" % key)
                else:
                    self.db.store(
                        key,
                        'notdl' if self.config.get('enforcedl') and '.dl.' not in key.lower() else 'added',
                        pattern
                    )
                    self.log_info("NEW RELEASE: " + key)
                    download_link = [common.get_first(self._get_download_links(value[0], self._hosters_pattern))]
                    if any(download_link):
                        write_crawljob_file(
                            key,
                            key,
                            download_link,
                            self.config.get("crawljob_directory")
                        ) and text.append(key)
        if len(text) > 0:
            notifyPushbulletMB(self.config.get("pushbulletapi"),text)