Example #1
0
    def __init__(self, link, posts, handbook):
        self.file = File()
        self.log = Log('sites')

        self.link = link
        self.posts = posts
        self.handbook = handbook
Example #2
0
  def save(self, dirTarget,withThumb=False,streamMode=StreamMode.Exodus):
    """
    save the movie in .strm format for GOmovies addon
    """
    name = '{0} ({1})'.format( self.nameEn.lower()[:40] , self.year )
    base = File.cleanName(name)
    dirTarget = os.path.join( dirTarget , base )
    info = os.path.join( dirTarget, '{0}.{1}'.format( base , 'nfo'  ) )
    File.ensureFolder(dirTarget)

    links = []
    if streamMode == StreamMode.Exodus:
      links.append([name,self.asExodus()])
    if streamMode == StreamMode.Pulsar:
      links.append([name,self.asPulsar()])
    if streamMode == StreamMode.Salts:
      links.append([name,self.asSalts()])
    if streamMode == StreamMode.Quasar:
      links.append([name,self.asQuasar()])

    strm = os.path.join( dirTarget, '{0}.{1}'.format( base , 'strm' ) )
    with codecs.open(strm,'w') as ff:
      ff.write("#EXTM3U\n")
      for xx in links:
        ff.write(u'#EXTINF:{0},{1}\n'.format(0,xx[0] ))
        ff.write(u'{0}\n'.format(xx[1]))
    if withThumb:
      thumb = os.path.join( dirTarget, 'folder.jpg'.format( base , 'jpg' ))
      if os.path.exists(thumb) == False:
        FileLoader.load(self.imdbInfo.get('Poster'), '@' +  thumb )
    if os.path.exists(info):
      return False
    with codecs.open(info,'w') as ff:
      ff.write('http://www.imdb.com/title/{0}'.format(self.idImdb))
    return True
Example #3
0
    def __init__(self):
        self.type = 'sites'

        self.db = Db()
        self.file = File()
        self.log = Log(self.type)

        self.news = []
Example #4
0
 def initPaths(self):
   self.dirRoot    = self.path
   self.dirSource  = os.path.join( self.dirRoot, 'Feeder'  )
   self.cache      = os.path.join( self.data   , "Cache"   )
   self.dirLogs    = os.path.join( self.data   , "Logs"    )
   File.ensureFolder(self.cache  )
   File.ensureFolder(self.dirLogs)
   Log.log = self.log
   FileLoader.dirCache = os.path.join( self.data, "Cache" )
Example #5
0
  def initPaths(self):
    self.dirRoot    = self.path
    self.cache      = os.path.join( self.data   , "Cache"   )
    self.dirLogs    = os.path.join( self.data   , "Logs"    )

    File.ensureFolder(self.cache  )
    File.ensureFolder(self.dirLogs)

    FileLoader.dirCache = os.path.join( self.data, "Cache" )

    SeriesInfo.tvdbMoldDir = os.path.join( self.dirRoot   , "html" )
    Log.log = self.log
Example #6
0
    def save(self, dirTarget, langid=7, withThumb=False, streamMode=StreamMode.Exodus):
        added = 0
        episodes = 0
        base = File.cleanName(u"{0} ({1})".format(self.name.lower(), self.year))
        dirSeries = os.path.join(dirTarget, base)
        self.saveTvdbInfo()
        if os.path.exists(dirSeries) == False:
            added += 1
        self.saveTvshow(dirSeries, langid=langid)
        episodes = 0
        for episode in self.episodes:
            try:
                if episode.season == 0:
                    continue
                if episode.name == "TBA":
                    continue
                dirSeason = os.path.join(
                    dirSeries, File.cleanName(u"{0}.S{1:02}".format(self.findNameClean(), episode.season))
                )
                self.saveTvshow(dirSeason, langid=langid)

                name = u"{3}.S{0:02}E{1:02}.{2}".format(episode.season, episode.episode, episode.name, self.name)

                part0 = dirSeason
                part1 = self.name
                part2 = ".S{0:02}E{1:02}.".format(episode.season, episode.episode)
                part3 = episode.name[: 256 - -len(part0) - len(part1) - len(part2) - 10]

                base = File.cleanName(u"{0}{1}{2}".format(part1, part2, part3))

                links = []
                if streamMode == StreamMode.Exodus:
                    links.append([name, episode.asExodus()])
                if streamMode == StreamMode.Pulsar:
                    links.append([name, episode.asPulsar()])

                if streamMode == StreamMode.Salts:
                    links.append([name, episode.asSalts()])
                if streamMode == StreamMode.Quasar:
                    links.append([name, episode.asQuasar()])

                strm = os.path.join(dirSeason, u"{0}.{1}".format(base, "strm"))
                if os.path.exists(strm) == False:
                    episodes += 1
                with codecs.open(strm, "w") as ff:
                    ff.write("#EXTM3U\n")
                    for xx in links:
                        ff.write(u"#EXTINF:{0},{1}\n".format(0, xx[0]))
                        ff.write(u"{0}\n".format(xx[1]))
            except Exception, ee:
                Log.log(Text.formatException(ee))
Example #7
0
File: log.py Project: Nikkoz/yii2
class Log():
    def __init__(self, filename):
        self.filename = filename

        self.file = File()

    def begin(self):
        self.start_time = time.time()

        self.write('--- start at {0} ---'.format(
            str(datetime.today() + timedelta(hours=2))))

    def finish(self):
        self.write("--- finish at {0}, duration: {1} ---".format(
            str(datetime.today() + timedelta(hours=2)), self.duration()))

    def write(self, msg):
        f = self.file.write_a(self.filename, 'log')
        f.write(msg + '\n')
        f.close()

        #output to the console
        print(msg)

    def duration(self):
        result = time.time() - self.start_time

        if result > 60:
            result = round(result / 60, 4)
            per = 'min'
        else:
            result = round(result, 4)
            per = 'sec'

        return "{0} {1}".format(result, per)
Example #8
0
  def library(self):
    library = self.config.getStr('library')
    if library == '':
      library = os.path.join( self.data   , "Library" )
    SeriesInfo.tvdbDataDir = os.path.join( library   , ".tvdb" )
    SeriesInfo.tvdbHtmlDir = os.path.join( library   , ".html" )

    File.ensureFolder(library)
    File.ensureFolder(SeriesInfo.tvdbDataDir)
    File.ensureFolder(SeriesInfo.tvdbHtmlDir)
    return library
Example #9
0
class Scraping(Union):
    def __init__(self):
        self.type = 'sites'

        self.db = Db()
        self.file = File()
        self.log = Log(self.type)

        self.news = []

    #list of sites from db
    def site_list(self):
        return self.db.get_sites()

    # get all posts
    def posts_list(self):
        result = self.db.get_posts('post', self.day_ago())
        posts = []

        for post in result:
            posts.append(post[0])

        return posts

    def handbook_list(self):
        handbooks = {}
        for handbook in self.db.get_handbook():
            handbooks[handbook[0]] = {
                'title': handbook[1],
                'check': handbook[2]
            }

        return handbooks

    def start(self):
        # check import is ready
        if not self.start_import():
            return False

        self.posts = self.posts_list()
        self.handbook = self.handbook_list()

        #print(len(self.posts))

        if self.handbook:
            for site in self.site_list():
                if site[3] > 0:
                    self.scrap(site)
                else:
                    self.log.write("resourse {0} is desabled".format(site[1]))

            #save news to db
            print(len(self.news))
            #print(self.posts)

        self.finish_import()

    def scrap(self, site):
        self.file.set_file(site[1])

        resourse = self.switch(site)
        resourse.start()

        self.news = self.merge(self.news, resourse.get_posts())
        self.posts = resourse.get_titles()

    def switch(self, site):
        x = site[2]

        self.log.write("---\n{0} start at {1}".format(site[1], self.get_day()))

        if x == 'thebitcoinnews':
            return CThebitcoinnews(site[1], self.posts, self.handbook)
        elif x == 'coinjournal':
            return CCoinjournal(site[1], self.posts, self.handbook)
        elif x == 'coindesk':
            return CCoindesk(site[1], self.posts, self.handbook)
        elif x == 'bitcoin':
            return CBitcoin(site[1], self.posts, self.handbook)
        elif x == 'cointelegraph':
            return CCointelegraph(site[1], self.posts, self.handbook)
        elif x == 'bitcoinmagazine':
            return CBitcoinmagazine(site[1], self.posts, self.handbook)
        elif x == 'newsbtc':
            return CNewsbtc(site[1], self.posts, self.handbook)
        elif x == 'forklog':
            return CForklog(site[1], self.posts, self.handbook)
        elif x == 'coinspeaker':
            return CCoinspeaker(site[1], self.posts, self.handbook)
        elif x == 'bitcoinist':
            return CBitcoinist(site[1], self.posts, self.handbook)
        elif x == 'bitcoinertoday':
            return CBitcoinertoday(site[1], self.posts, self.handbook)
        elif x == 'coindoo':
            return CCoindoo(site[1], self.posts, self.handbook)
        elif x == 'trustnodes':
            return CTrustnodes(site[1], self.posts, self.handbook)
        elif x == 'btcmanager':
            return CBtcmanager(site[1], self.posts, self.handbook)
        elif x == 'usethebitcoin':
            return CUsethebitcoin(site[1], self.posts, self.handbook)
        elif x == 'investinblockchain':
            return CInvestinblockchain(site[1], self.posts, self.handbook)
        elif x == 'ethereumworldnews':
            return CEthereumworldnews(site[1], self.posts, self.handbook)
        elif x == 'coinstaker':
            return CCoinstaker(site[1], self.posts, self.handbook)
        elif x == 'livebitcoinnews':
            return CLivebitcoinnews(site[1], self.posts, self.handbook)
        elif x == 'coinsnewbium':
            return CCoinsnewbium(site[1], self.posts, self.handbook)
        elif x == 'ccn':
            return CCcn(site[1], self.posts, self.handbook)
        elif x == 'themerkle':
            return CThemerkle(site[1], self.posts, self.handbook)
        elif x == 'ethnews':
            return CEthnews(site[1], self.posts, self.handbook)
        elif x == 'zycrypto':
            return CZycrypto(site[1], self.posts, self.handbook)
        elif x == 'profitconfidential':
            return CProfitconfidential(site[1], self.posts, self.handbook)
        elif x == 'cryptoanswers':
            return CCryptoanswers(site[1], self.posts, self.handbook)
        elif x == 'bloomberg':
            return CBloomberg(site[1], self.posts, self.handbook)

        return None
Example #10
0
class Main():
    def __init__(self, link, posts, handbook):
        self.file = File()
        self.log = Log('sites')

        self.link = link
        self.posts = posts
        self.handbook = handbook

    #def posts(self):
    #    return self.posts

    def read_file(self):
        with self.file.read('page', 'html') as input_file:
            text = input_file.read()

        return text

    '''
    type - type of tag(id or class)
    value - value of tag
    '''

    def get_menu(self, type, value, inner='', span=False):
        soup = self.soup()

        if inner != '':
            links = soup.find(inner, {type: value}).find('ul')
        else:
            links = soup.find('ul', {type: value})

        if not links:
            raise RuntimeError("structure of the site menu has changed")

        pages = []
        titles = []

        for item in links.find_all('a'):
            title = self.clear_title(item, span)

            if title in self.menu and not title in titles:
                titles.append(title)

                pages.append({
                    'title': title,
                    'url': self.check_url(item.get('href'))
                })

        return pages

    def set_file(self, url):
        self.file.set_file(url, 'sites')

    def soup(self):
        text = self.read_file()

        return BeautifulSoup(text, 'html.parser')

    def check_date(self, date, is_timestump=False, format=None):
        if format is None:
            date = date[:19]
            format = "%Y-%m-%dT%H:%M:%S"

        try:
            if not is_timestump:
                date = time.mktime(datetime.strptime(date, format).timetuple())

            day_ago = datetime.today() - timedelta(days=1)

            if (float(date) < day_ago.timestamp()):
                return None
        except ValueError:
            raise RuntimeError("structure date has changed")

        return date

    def get_posts(self):
        return self.result

    def get_titles(self):
        return self.posts

    def clear(self, text):
        try:
            myre = re.compile(
                u"[\U0001F300-\U0001F64F\U0001F680-\U0001F6FF\u2600-\u26FF\u2700-\u27BF]+",
                re.UNICODE)
        except re.error:
            myre = re.compile(
                u"(\ud83c[\udf00-\udfff]|\ud83d[\udc00-\ude4f\ude80-\udeff]|[\u2600-\u26FF\u2700-\u27BF])+",
                re.UNICODE)

        return myre.sub(r'', text.replace("\xa0", " ")).strip()

    def change_date(self, date, format="%b %d, %Y"):
        number, type, ago = date.split(' ')

        if type == 'HOURS' or type == 'hours':
            date = datetime.now() - timedelta(hours=int(number))
        elif type == 'MINUTES' or type == 'minutes' or type == 'mins':
            date = datetime.now() - timedelta(minutes=int(number))

        return date.strftime(format)

    def clear_title(self, point, clear):
        spans = point.find_all('span')

        if spans and not clear:
            for span in spans:
                span.extract()

        return point.text.strip()

    def check_url(self, url):
        return self.link + url.replace(self.link, '').lstrip('/')

    def check_handbook_post(self, title, text):
        check = []

        for h in self.handbook:
            if self.handbook[h]['check'] == 0:
                pattern = re.compile(
                    '(^|\W)' + self.handbook[h]['title'] + '(\W|$)',
                    re.IGNORECASE)
            else:
                pattern = re.compile('(^|\W)' + self.handbook[h]['title'] +
                                     '(\W|$)')

            match_title = re.search(pattern, title)
            match_text = re.search(pattern, text)

            if not match_text is None or not match_title is None:
                check.append(h)

        return check

    def multiple_replacer(self, *key_values):
        replace_dict = dict(key_values)
        replacement_function = lambda match: replace_dict[match.group(0)]
        pattern = re.compile("|".join([re.escape(k) for k, v in key_values]),
                             re.M)
        return lambda string: pattern.sub(replacement_function, string)

    def multiple_replace(self, string, *key_values):
        return self.multiple_replacer(*key_values)(string)
Example #11
0
 def cleanLogs(self):
   File.delFiles(self.dirLogs)
   self.notify( sm.str(sm.cleanLogs) )
Example #12
0
 def cleanCache(self):
   File.delFiles(self.cache)
   self.notify( sm.str(sm.cleanCache) )
Example #13
0
 def cleanLibrary(self):
   File.delFiles(self.library)
   self.cleanSource()
   self.notify( sm.str(sm.cleanLibrary) )
Example #14
0
 def saveTvshow(self, dirTarget, langid=7):
     File.ensureFolder(dirTarget)
     with file(os.path.join(dirTarget, "tvshow.nfo"), "w") as ff:
         ff.write(
             "http://thetvdb.com/?{0}={1}&{2}={3}&{4}={5}".format("tab", "series", "id", self.tvdbId, "lid", langid)
         )
Example #15
0
 def findNameClean(self):
     name = self.name if self.name != None else "unknown"
     return File.cleanName(self.name)[:50].lower()
Example #16
0
File: log.py Project: Nikkoz/yii2
    def __init__(self, filename):
        self.filename = filename

        self.file = File()
Example #17
0
 def library(self):
   library = self.config.getStr('library')
   if library == '':
     library = os.path.join( self.data   , "Library" )
   File.ensureFolder(library)
   return library