Ejemplo n.º 1
0
    def genre(self, page, subpage, genres, type):

        genre_ids = [58,69,57,59,84,86,60,79,77,93,89,82,71,66,95,88,75,85,83,
                     90,63,94,72,73,67,87,78,61,70,91,92,64,96,68,62,65,76,80,74,81,98,97]

        genre_titles = ['Action', 'Adventure', 'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy',
                        'Game', 'Harem', 'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts', 'Mecha',
                        'Military', 'Music', 'Mystery', 'Parody', 'Police', 'Psychological', 'Romance', 'Samurai',
                        'School', 'Sci-Fi', 'Seinen', 'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life',
                        'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire', 'Yaoi', 'Yuri']

        if genres is None:
            genres = xbmcgui.Dialog().multiselect("Genre", genre_titles)
        else:
            genres = json.loads(genres)

        list = []
        for i in genres:
            list.append(genre_ids[int(i)])

        list = cache.get(scraper().genreScrape, 24, list, page, subpage, type)

        subpage, page = self.subpagination(subpage, page)

        self.list_builder(list)

        self.addDirectoryItem('Next', 'genreSearch', page=page, genres=genres, subpage=subpage, type=type)

        self.createDirectory(sort=False)
Ejemplo n.º 2
0
def remove_empty_lines(html):
    key = '%s:remove_empty_lines' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if '</' in html:
        html = html.strip().replace('\n', '')
        soup = BeautifulSoup(html)
        lines = []
        for element in soup.contents:
            if isinstance(element, Tag):
                if element.text:
                    lines.append(str(element).strip())
                elif 'br' in str(element):
                    lines.append('\n')
            elif isinstance(element, NavigableString):
                lines.append(str(element).strip())
        out = ''.join(lines).strip()
        while '\n\n' in out:
            out = out.replace('\n\n', '\n')
    else:
        out = '\n'.join([line for line in html.split('\n') if line.strip()])
    cache.set(key, out, namespace="filters")
    return out
Ejemplo n.º 3
0
def get_trend_echos(url_echos, report):
    """
    Get Echos trend
    """
    if not url_echos:
        return report
    url = url_echos.replace('/action-', '/recommandations-action-')
    content = cache.get(url)
    if content:
        soup = BeautifulSoup(content, 'html.parser')
        for i in soup.find_all('div', 'tendance hausse'):
            if 'court terme' in i.text:
                report['echos']['short term'] = 'Hausse'
            if 'moyen terme' in i.text:
                report['echos']['mid term'] = 'Hausse'
        for i in soup.find_all('div', 'tendance egal'):
            if 'court terme' in i.text:
                report['echos']['short term'] = 'Neutre'
            if 'moyen terme' in i.text:
                report['echos']['mid term'] = 'Neutre'
        for i in soup.find_all('div', 'tendance baisse'):
            if 'court terme' in i.text:
                report['echos']['short term'] = 'Baisse'
            if 'moyen terme' in i.text:
                report['echos']['mid term'] = 'Baisse'
    return report
Ejemplo n.º 4
0
def remove_empty_lines(html):
  key = '%s:remove_empty_lines' % hash(html)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if '</' in html:
    html = html.strip().replace('\n', '')
    soup = BeautifulSoup(html)
    lines = []
    for element in soup.contents:
      if isinstance(element, Tag):
        if element.text:
          lines.append(str(element).strip())
        elif 'br' in str(element):
          lines.append('\n')
      elif isinstance(element, NavigableString):
        lines.append(str(element).strip())
    out = ''.join(lines).strip()
    while '\n\n' in out:
      out = out.replace('\n\n', '\n')
  else:
    out = '\n'.join([line for line in html.split('\n') if line.strip()])
  cache.set(key, out, namespace="filters")
  return out
Ejemplo n.º 5
0
    def searchNew(self):

        control.busy()

        t = control.lang(32010).encode('utf-8')
        k = control.keyboard('', t)
        k.doModal()
        q = k.getText()

        if (q == None or q == ''): return

        try:
            from sqlite3 import dbapi2 as database
        except:
            from pysqlite2 import dbapi2 as database

        dbcon = database.connect(control.searchFile)
        dbcur = dbcon.cursor()
        dbcur.execute("INSERT INTO search VALUES (?,?)", (None, q))
        dbcon.commit()
        dbcur.close()

        list = cache.get(scraper().search, 24, q)

        self.list_builder(list)

        control.idle()

        self.createDirectory(sort=False)
Ejemplo n.º 6
0
def fetch_location_dict(area_id):
    key = GROCERY_LOCATION_KEY + u'{}'.format(area_id)
    location_dict = cache.get(key)
    if not location_dict:
        location_url = config.LOCATIONURL + str(area_id) + '/'
        headers = {'Authorization': config.TOKEN}
        response = make_api_call(location_url, headers=headers)

        try:
            data_list = json.loads(response.text)
        except Exception as e:
            logger.exception(e)
            return False, None, u'Unable to fetch area details'

        if not data_list:
            return False, None, u'Area Does not exist'

        data = data_list[0]
        location_dict = dict()
        location_dict['area'] = data.get('areaid')
        location_dict['country'] = [data.get('countryid')]
        location_dict['state'] = [data.get('stateid')]
        location_dict['city'] = [data.get('cityid')]
        location_dict['zone'] = [data.get('zoneid')]

        cache.set(key, location_dict, ex=GROCERY_CACHE_TTL)

    return True, location_dict, None
Ejemplo n.º 7
0
def get_trend_frtn(url_frtn, report):
    """
    Get FRTN trend
    """
    if not url_frtn:
        return report, None
    market = int(url_frtn.split('-')[-1])
    isin = url_frtn.split('-')[-2]
    trend_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/geraqf/NPGVBAF/SGA') + \
        '{market:06d}{isin}'.format(market=market, isin=isin)
    content = cache.get(trend_url)
    if content and content != 'null':
        try:
            json_content = json.loads(content)
            mapping = {
                'POSITIVE': 'Hausse',
                'NEUTRE': 'Neutre',
                'NEGATIVE': 'Baisse',
            }
            if 'opinionCT' in json_content and json_content[
                    'opinionCT'] in mapping:
                report['frtn']['short term'] = mapping[
                    json_content['opinionCT']]
            if 'opinionMT' in json_content and json_content[
                    'opinionMT'] in mapping:
                report['frtn']['mid term'] = mapping[json_content['opinionMT']]
        except json.decoder.JSONDecodeError:
            pass
    return report, isin
Ejemplo n.º 8
0
    def check_resolver(self):

        try:
            r = cache.get(
                client.request, 1,
                base64.b64decode(
                    'aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3hpYmFsYmExMC9zY3JpcHQubW9kdWxlLmFkdWx0cmVzb2x2ZXIvbWFzdGVyL2xpYi9hZHVsdHJlc29sdmVyL3Jlc29sdmVyLnB5'
                ))
            if len(r) > 1:
                with open(self.resolverFile, 'r') as f:
                    compfile = f.read()
                if 'import' in r:
                    if compfile == r:
                        log_utils.log('Resolver checked and up to date!',
                                      log_utils.LOGNOTICE)
                        pass
                    else:
                        with open(self.resolverFile, 'w') as f:
                            f.write(r)
                        log_utils.log('Resolver updated!', log_utils.LOGNOTICE)
                        kodi.notify(msg='Resolver Updated.',
                                    duration=1250,
                                    sound=True)
        except Exception as e:
            log_utils.log(
                'Error checking for resolver update :: Error: %s' % str(e),
                log_utils.LOGERROR)
Ejemplo n.º 9
0
def _render(info, post_type, owner, viewport, mode=None, **kwargs):  
  owner_id = 'public' if (not owner or not owner.id) else owner.id
  
  if post_type in ['note', 'feed', 'file']:
    if mode:
      key = '%s:%s' % (viewport, mode)
    else:
      key = viewport
      
    if (owner and 
        owner.id and 
        owner.id != info.last_action.owner.id and 
        owner.id not in info.read_receipt_ids and 
        viewport != "discover"):
      status = 'unread'
    elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by:
      status = 'pinned'
    elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by:
      status = 'archived'
    else:
      status = None
      
    if status:
      key = key + ':' + status
      
    key += ':%s:%s' % (post_type, owner_id)
    namespace = info.id
    
  else:
    key = post_type
    namespace = owner_id
    
  html = cache.get(key, namespace)
  hit = False
  if not html:
    if post_type == 'note':
      html = NOTE_TEMPLATE.render(note=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    elif post_type == 'file':
      html = FILE_TEMPLATE.render(file=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    
    else:
      html = FEED_TEMPLATE.render(feed=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    cache.set(key, html, 86400, namespace)
  else:
    hit = True

  html = html.replace('<li id="post', '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post' % (key, namespace, "HIT" if hit else "MISS"))
    
  return html
Ejemplo n.º 10
0
    def searchOld(self, q):

        list = cache.get(scraper().search, 24, q)

        self.list_builder(list)

        self.createDirectory(sort=False)

        return
Ejemplo n.º 11
0
    def episodeList(self, url, slug):

        list = cache.get(scraper().episodeList, 24, url)

        for item in list:
            self.addDirectoryItem(item['meta']['title'], "playItem", url=item['url'],
                                  type=item['type'], slug=slug, is_folder=False, playable=True, meta=item['meta'],
                                  art=item['art'])
        self.createDirectory(sort=False)
Ejemplo n.º 12
0
def _get_saved_list():
    try:
        return store.get(_saved_list_key)
    except KeyError:
        pass
    try:  # backward compatible (try cache)
        return cache.get(_saved_list_key)
    except KeyError:
        return []
def _get_saved_list():
    try:
        return store.get(_saved_list_key)
    except KeyError:
        pass
    try: # backward compatible (try cache)
        return cache.get(_saved_list_key)
    except KeyError:
        return []
Ejemplo n.º 14
0
def get_potential(url_brsrm, url_frtn, cours):
    """
    Returns the potential for 3 month
    """
    report = dict()
    report['brsrm'] = dict()
    report['brsrm']['value'] = None
    report['brsrm']['percentage'] = 0
    report['frtn'] = dict()
    report['frtn']['value'] = None
    report['frtn']['percentage'] = 0
    if url_brsrm:
        content = cache.get(url_brsrm)
        if content:
            soup = BeautifulSoup(content, 'html.parser')
            for i in soup.find_all('p'):
                if 'Objectif de cours' in i.text:
                    value = i.find('span', 'u-text-bold')
                    if not value:
                        return report
                    report['brsrm']['value'] = common.clean_data(
                        value.text, json_load=False).split()[0]
                    if cours:
                        val = float(cours['cotation']['valorisation'].replace(
                            ',', '.').split()[0])
                        report['brsrm']['percentage'] = round(
                            (float(report['brsrm']['value']) / val - 1) * 100,
                            1)
    if url_frtn:
        market = int(url_frtn.split('-')[-1])
        isin = url_frtn.split('-')[-2]
        avis_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/nivf/SGA') + \
            '{market:06d}{isin}'.format(market=market, isin=isin)
        content = cache.get(avis_url)
        if content:
            try:
                json_content = json.loads(content)
                report['frtn']['value'] = json_content['consensus']['objectif']
                report['frtn']['percentage'] = round(
                    float(json_content['consensus']['potentiel']) * 100, 1)
            except json.decoder.JSONDecodeError:
                pass
    return report
Ejemplo n.º 15
0
    def playItem(self, slug, url):
        control.busy()
        resolve_dialog = xbmcgui.DialogProgress()
        link_list = cache.get(scraper().scrapeLinks, 24, slug, url)
        control.idle()

        if len(link_list) == 0:

            dialog = xbmcgui.Dialog()
            dialog.notification('Anime Incursion', 'No Links Available',
                                xbmcgui.NOTIFICATION_INFO, 5000)

        else:

            resolve_dialog.create('Anime Incursion', '')
            resolve_dialog.update(0)
            link_list = sorted(link_list,
                               key=lambda x: (x['quality']),
                               reverse=True)
            link_total = len(link_list)
            progress = 0
            path = ''

            for i in link_list:
                # if resolve_dialog.iscanceled() == True:
                #   return

                progress += 1
                resolve_dialog.update(
                    int((100 / float(link_total)) * progress),
                    str(progress) + ' | [B]Host: ' + i['name'].upper() +
                    "[/B] | [B]Resolution: " + str(i['quality']) + "p[/B]")
                try:
                    if i['direct'] == False:

                        import resolveurl
                        path = resolveurl.resolve(i['url']).encode('utf-8')
                        break
                    else:
                        path = i['url']
                        break
                except:
                    continue

            if path != '':
                play_item = xbmcgui.ListItem(path=path)
                print('INFO - ' + str(sys.argv[1]))
                xbmcplugin.setResolvedUrl(int(sys.argv[1]),
                                          True,
                                          listitem=play_item)
            else:
                dialog = xbmcgui.Dialog()
                dialog.notification('Anime Incursion',
                                    'Unable to Resolve Links',
                                    xbmcgui.NOTIFICATION_INFO, 5000)
Ejemplo n.º 16
0
    def showList(self, page, type, subpage, order='score_desc'):

        list = cache.get(scraper().filterScrape, 24, page, type, order, subpage)

        subpage, page = self.subpagination(subpage, page)

        self.list_builder(list)

        self.addDirectoryItem('Next', 'showList', page=page, type=type, order=order, subpage=subpage)

        self.createDirectory(sort=False)
Ejemplo n.º 17
0
def to_text(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:to_text' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
        out = api.remove_html_tags(html)
        cache.set(key, out, namespace="filters")
    return out
Ejemplo n.º 18
0
def to_text(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:to_text' % hash(html)
  out = cache.get(key, namespace="filters")
  if not out:
    out = api.remove_html_tags(html)
    cache.set(key, out, namespace="filters")
  return out
Ejemplo n.º 19
0
def get_cours(isin, mic, disable_cache=False):
    """
    Returns core info from isin
    """
    url = common.decode_rot('uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg') + \
          common.decode_rot('/fgernzvat/pbhef/trgPbhef?') + \
          'code={}&place={}&codif=ISIN'.format(isin, mic)
    content = cache.get(url, verify=False, disable_cache=disable_cache)
    cours = None
    if content:
        cours = common.clean_data(content)
    return cours
Ejemplo n.º 20
0
def lines_truncate(text, lines_count=5):
  
  key = '%s:lines_truncate' % hash(text)
  out = cache.get(key, namespace="filters")
#  if out:
#    return out
  
  raw = text
  text = _normalize_newlines(text)
  
  # remove blank lines
  lines = [line for line in text.split('\n') if line.strip()]
#  text = '\n'.join(lines)
  
  images = re.compile('<img.*?>', re.IGNORECASE).findall(text)
  for i in images:
    text = text.replace(i, md5(i).hexdigest())
  
  links = re.compile('<a.*?</a>', re.IGNORECASE).findall(text)
  for i in links:
    text = text.replace(i, md5(i).hexdigest())
  
  text = text.replace('<br/>', '<br>')
  text = text.replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') # md5('<br>').hexdigest() = '8b0f0ea73162b7552dda3c149b6c045d'
  text = text.strip().replace('\n', '<br>')
  
  words_per_line = 15
  longest_line = max(lines[:lines_count], key=len) if len(lines) != 0 else None
  if longest_line and len(longest_line.split()) > words_per_line: 
    lines = textwrap.wrap(text)
  else:
    lines = [line for line in text.split('<br>') if line.strip()]
    
  # skip blank lines (and blank lines quote)
  if len([line for line in lines if line.strip() and line.strip() != '>']) >= lines_count:
    blank_lines = len([line for line in lines if line.strip() in ['', '>']])
    out = ' '.join(lines[:lines_count+blank_lines])
  else:
    out = text
    
  if len(out) < len(text):
    text = ' '.join(text[:len(out)].split()[0:-1]).rstrip('.') + '...'
    if len(text) / float(len(raw)) > 0.7: # nếu còn 1 ít text thì hiện luôn, không cắt làm gì cho mệt
      text = raw
  
  out = text.replace('<br>', '\n')
  out = out.replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')
  for i in images:
    out = out.replace(md5(i).hexdigest(), i)
  for i in links:
    out = out.replace(md5(i).hexdigest(), i)
  cache.set(key, out, namespace="filters")
  return out  
Ejemplo n.º 21
0
def _convert_to_text(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:convert_to_text' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
        html = fix_unclosed_tags(html)
        plain_text = api.remove_html_tags(html)
        cache.set(key, out, namespace="filters")
    return out
Ejemplo n.º 22
0
def _convert_to_text(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:convert_to_text' % hash(html)
  out = cache.get(key, namespace="filters")
  if not out:
    html = fix_unclosed_tags(html)
    plain_text = api.remove_html_tags(html)
    cache.set(key, out, namespace="filters")
  return out
Ejemplo n.º 23
0
def unmunge(html):
  """Clean up Word HTML"""
  if 'mso' in html: # remove outlook html style
    key = '%s:unmunge' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
      html = re.sub(re.compile('p"mso.*?"'), 'p', html)
      html = re.sub(re.compile('( style=".*?")'), '', html)
      out = unmungeHtml(html.decode('utf-8'))
      cache.set(key, out, namespace="filters")
    return out
  return html
Ejemplo n.º 24
0
def unmunge(html):
    """Clean up Word HTML"""
    if 'mso' in html:  # remove outlook html style
        key = '%s:unmunge' % hash(html)
        out = cache.get(key, namespace="filters")
        if not out:
            html = re.sub(re.compile('p"mso.*?"'), 'p', html)
            html = re.sub(re.compile('( style=".*?")'), '', html)
            out = unmungeHtml(html.decode('utf-8'))
            cache.set(key, out, namespace="filters")
        return out
    return html
Ejemplo n.º 25
0
def test_data_cache(data_cache):
    cache = data_cache
    # make sure new cache has been initialized and is empty
    # at this point it should only contain: {"data": {}, "last_update": None}
    assert len(cache.cache.keys()) == 2
    cache.save()
    # add stuff to cache
    cache.add("test_item", "something")
    # and test retrieval of stuff
    assert cache.get("test_item") == "something"
    assert cache.is_known("test_item")
    assert type(cache.get_timestamp()) == datetime.datetime
Ejemplo n.º 26
0
def get_url_echos(isin, mic):
    """
    Return Echos URL
    """
    url = common.decode_rot('uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg') + \
          common.decode_rot('/fgernzvat/pbhef/oybpf/trgUrnqreSvpur?') + \
          'code={}&place={}&codif=ISIN'.format(isin, mic)
    content = cache.get(url, verify=False)
    if not content:
        return None
    header_fiche = common.clean_data(content)
    url_echos = common.clean_url(
        header_fiche['headerFiche']['tweetHeaderFiche'])
    return url_echos
Ejemplo n.º 27
0
def get(isin, years=3):
    """
    Get 3 years history of this ISIN
    """
    url = common.decode_rot(
        'uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg/SQF/uvfgbel.kzy?' +
        'ragvgl=rpubf&ivrj=NYY&pbqvsvpngvba=VFVA&rkpunatr=KCNE&' +
        'nqqQnlYnfgCevpr=snyfr&nqwhfgrq=gehr&onfr100=snyfr&' +
        'frffJvguAbDhbg=snyfr&crevbq={}L&tenahynevgl=&aoFrff=&'.format(years) +
        'vafgeGbPzc=haqrsvarq&vaqvpngbeYvfg=&pbzchgrIne=gehr&' +
        'bhgchg=pfiUvfgb&') + 'code={}'.format(isin)
    content = cache.get(url, verify=False)
    if content:
        return content.split('\n')
    return ''
Ejemplo n.º 28
0
def get_url_brsrm(isin):
    """
    Return Brsrm URL
    """
    base_url = common.decode_rot('uggcf://jjj.obhefbenzn.pbz')
    search_path = common.decode_rot('/erpurepur/nwnk?dhrel=')
    content = cache.get(base_url + search_path + isin)
    if not content:
        return None
    soup = BeautifulSoup(content, 'html.parser')
    if soup.find('a', 'search__list-link') is None \
        or 'href' not in soup.find('a', 'search__list-link'):
        return None
    path = soup.find('a', 'search__list-link')['href']
    return base_url + path
Ejemplo n.º 29
0
def get_url_frtn(isin):
    """
    Return Frtn URL
    """
    base_url = 'https://bourse.fortuneo.fr/api/search?term={}'.format(isin)
    content = cache.get(base_url)
    if not content:
        return None
    try:
        json_content = json.loads(content)
    except json.decoder.JSONDecodeError:
        return None
    try:
        url = json_content['searchResults']['market']['arkea']['items'][0][
            'url']
    except (KeyError, IndexError):
        return None
    return url
Ejemplo n.º 30
0
    def decorated_function(*args, **kwargs):
        session_id = session.get("session_id")
        user_id = api.get_user_id(session_id)
        if user_id and request.method in ["GET", "OPTIONS"]:
            if request.query_string:
                key = "%s: %s %s?%s" % (user_id, request.method, request.path, request.query_string)
            else:
                key = "%s: %s %s" % (user_id, request.method, request.path)

            rv = cache.get(key)
            if not rv:
                rv = f(*args, **kwargs)
                cache.set(key, rv)
            return rv
        elif user_id and request.method == "POST":
            key = "%s:*" % user_id
            cache.clear(key)
        return f(*args, **kwargs)
Ejemplo n.º 31
0
def get_dividend_brsrm(url_brsrm, report):
    """
    Get dividend from BRSRM
    """
    if not url_brsrm:
        return report
    content = cache.get(url_brsrm)
    if not content:
        return report
    soup = BeautifulSoup(content, 'html.parser')
    for div_relative in soup.find_all('div', 'u-relative'):
        if 'Rendement' not in div_relative.text:
            continue
        if len(div_relative.find_all('td')) < 6:
            continue
        report['brsrm']['percent'] = float(
            common.clean_data(div_relative.find_all('td')[6].\
            text, json_load=False).split()[0].split('%')[0])
    return report
Ejemplo n.º 32
0
    def decorated_function(*args, **kwargs):
        session_id = session.get('session_id')
        user_id = api.get_user_id(session_id)
        if user_id and request.method in ['GET', 'OPTIONS']:
            if request.query_string:
                key = '%s: %s %s?%s' % (user_id, request.method, request.path,
                                        request.query_string)
            else:
                key = '%s: %s %s' % (user_id, request.method, request.path)

            rv = cache.get(key)
            if not rv:
                rv = f(*args, **kwargs)
                cache.set(key, rv)
            return rv
        elif user_id and request.method == 'POST':
            key = '%s:*' % user_id
            cache.clear(key)
        return f(*args, **kwargs)
Ejemplo n.º 33
0
def compute_benefices(report):
    """
    Get necessary informations and returns an approximation of the profit development
    """
    indice = '1eCCK5'
    count = 1
    continue_req = True
    while continue_req:
        url = common.decode_rot('uggcf://jjj.obhefbenzn.pbz/obhefr/npgvbaf/' +
                                'cnyznerf/qvivqraqrf/cntr-{}?'.format(count) +
                                'znexrg={}&inevngvba=6'.format(indice))
        content = cache.get(url)
        continue_req = content != ''
        if continue_req:
            profit = parse_profit(BeautifulSoup(content, 'html.parser'),
                                  report)
            if profit != 0:
                return profit
        count += 1
    return None
Ejemplo n.º 34
0
def description(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:description' % hash(html)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if '</' in html:
    plain_text = _convert_to_text(html)
  else:
    plain_text = html
  lines = []
  for line in plain_text.split('\n'):
    if '(' in line or ')' in line:
      continue
    elif '[' in line or ']' in line:
      continue
    elif '/' in line:
      continue
    elif ';' in line:
      continue
    elif ' ' in line \
      and len(line) > 15 \
      and line.count('.') < 2 \
      and 'dear' not in line.lower() \
      and 'hi' not in line.lower() \
      and 'unsubscribe' not in line.lower():
      lines.append(clean(line))
    else:
      continue
  
  lines.sort(key=len)
  if lines:
    out = lines[-1].rstrip('.') + '...'
  else:
    out = '...'
  cache.set(key, out, namespace="filters")
  return out
Ejemplo n.º 35
0
def description(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:description' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if '</' in html:
        plain_text = _convert_to_text(html)
    else:
        plain_text = html
    lines = []
    for line in plain_text.split('\n'):
        if '(' in line or ')' in line:
            continue
        elif '[' in line or ']' in line:
            continue
        elif '/' in line:
            continue
        elif ';' in line:
            continue
        elif ' ' in line \
          and len(line) > 15 \
          and line.count('.') < 2 \
          and 'dear' not in line.lower() \
          and 'hi' not in line.lower() \
          and 'unsubscribe' not in line.lower():
            lines.append(clean(line))
        else:
            continue

    lines.sort(key=len)
    if lines:
        out = lines[-1].rstrip('.') + '...'
    else:
        out = '...'
    cache.set(key, out, namespace="filters")
    return out
Ejemplo n.º 36
0
def sanitize_html(value):
    '''
  https://stackoverflow.com/questions/16861/sanitising-user-input-using-python
  '''
    if '</' not in value:  # không phải HTML
        return value

    key = '%s:sanitize_html' % hash(value)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    base_url = None
    rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:'))
    rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:'))
    re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE)
    #  validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split()
    validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split(
    )
    validAttrs = 'src width height alt title class href'.split()
    urlAttrs = 'href title'.split()  # Attributes which should have a URL

    soup = BeautifulSoup(value.decode('utf-8'))
    for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
        # Get rid of comments
        comment.extract()
    for tag in soup.findAll(True):
        if tag.name not in validTags:
            tag.hidden = True
        attrs = tag.attrs
        tag.attrs = []
        for attr, val in attrs:
            if attr in validAttrs:
                val = re_scripts.sub('', val)  # Remove scripts (vbs & js)
                if attr in urlAttrs:
                    val = urljoin(base_url, val)  # Calculate the absolute url
                tag.attrs.append((attr, val))

    out = soup.renderContents().decode('utf8')
    cache.set(key, out, namespace="filters")
    return out
Ejemplo n.º 37
0
def fix_unclosed_tags(html):
  if not html:
    return html
  
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  try:
    key = '%s:fix_unclosed_tags' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
      return out
  
    h = lxml.html.fromstring(html)
    out = lxml.html.tostring(h)
    
    cache.set(key, out, namespace="filters")
    return out
  except Exception:
    return ''
Ejemplo n.º 38
0
def fix_unclosed_tags(html):
    if not html:
        return html

    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    try:
        key = '%s:fix_unclosed_tags' % hash(html)
        out = cache.get(key, namespace="filters")
        if out:
            return out

        h = lxml.html.fromstring(html)
        out = lxml.html.tostring(h)

        cache.set(key, out, namespace="filters")
        return out
    except Exception:
        return ''
Ejemplo n.º 39
0
def sanitize_html(value):
  '''
  https://stackoverflow.com/questions/16861/sanitising-user-input-using-python
  '''
  if '</' not in value: # không phải HTML
    return value
  
  key = '%s:sanitize_html' % hash(value)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  base_url=None
  rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:'))
  rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:'))
  re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE)
#  validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split()
  validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split()
  validAttrs = 'src width height alt title class href'.split()
  urlAttrs = 'href title'.split() # Attributes which should have a URL
  
  soup = BeautifulSoup(value.decode('utf-8'))
  for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
    # Get rid of comments
    comment.extract()
  for tag in soup.findAll(True):
    if tag.name not in validTags:
      tag.hidden = True
    attrs = tag.attrs
    tag.attrs = []
    for attr, val in attrs:
      if attr in validAttrs:
        val = re_scripts.sub('', val) # Remove scripts (vbs & js)
        if attr in urlAttrs:
          val = urljoin(base_url, val) # Calculate the absolute url
        tag.attrs.append((attr, val))

  out = soup.renderContents().decode('utf8')
  cache.set(key, out, namespace="filters")
  return out  
Ejemplo n.º 40
0
def get_dividend_frtn(url_frtn, report):
    """
    Get dividend from FRTN
    """
    if not url_frtn:
        return report
    market = int(url_frtn.split('-')[-1])
    isin = url_frtn.split('-')[-2]
    avis_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/nivf/SGA') + \
        '{market:06d}{isin}'.format(market=market, isin=isin)
    content = cache.get(avis_url)
    if not content:
        return report
    try:
        json_content = json.loads(content)
        if len(json_content['consensus']['listeAnnee']) > 1:
            report['frtn']['percent'] = round(
                float(json_content['consensus']['listeAnnee'][1]['rendement'])
                * 100, 2)
    except json.decoder.JSONDecodeError:
        pass
    return report
Ejemplo n.º 41
0
 def decorated_function(*args, **kwargs):
   session_id = session.get('session_id')
   user_id = api.get_user_id(session_id)
   if user_id and request.method in ['GET', 'OPTIONS']:
     if request.query_string:
       key = '%s: %s %s?%s' % (user_id,
                               request.method,
                               request.path,
                               request.query_string)
     else:
       key = '%s: %s %s' % (user_id,
                               request.method,
                               request.path)
     
     rv = cache.get(key)
     if not rv:
       rv = f(*args, **kwargs)
       cache.set(key, rv)
     return rv
   elif user_id and request.method == 'POST':
     key = '%s:*' % user_id
     cache.clear(key)
   return f(*args, **kwargs)
Ejemplo n.º 42
0
def autolink(text):  
  if not text:
    return text
  
  key = '%s:autolink' % hash(text)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if re.match(EMAIL_RE, text):
    email = text 
    user_id = api.get_user_id_from_email_address(email)
    user = api.get_user_info(user_id)
    return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name)
    
  s = text or ''
  s += ' '
  s = str(s) # convert unicode to string
  s = s.replace('\r\n', '\n')

  
  urls = api.extract_urls(s)
  urls = list(set(urls))
  urls.sort(key=len, reverse=True)
  
  for url in urls:
    hash_string = md5(url).hexdigest()
    info = api.get_url_info(url)
    if not url.startswith('http'):
      s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))
    
    elif len(url) > 60:
      u = url[:60]
        
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest())))
          break
    else:
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)))
          break
        
  for url in urls:
    s = s.replace(md5(url).hexdigest(), url)
    if len(url) > 60 and url.startswith('http'):
      s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...')
      
  
  mentions = MENTIONS_RE.findall(s)
  if mentions:
    for mention in mentions:
      if '](topic:' in mention:
        topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        topic['id'] = topic['id'].split(':', 1)[-1]
        
        #TODO: update topic name?
        s = s.replace(mention, 
             '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name')))
      elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        user['id'] = user['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
      else:
        group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        group['id'] = group['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name')))
        
#  hashtags = re.compile('(#\[.*?\))').findall(s)
#  if hashtags:
#    for hashtag in hashtags:
#      tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
#      tag['id'] = tag['id'].split(':', 1)[-1]
#      s = s.replace(hashtag, 
#           '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))
  
  cache.set(key, s, namespace="filters")
  return s
Ejemplo n.º 43
0
def flavored_markdown(text): 
  key = '%s:flavored_markdown' % hash(text)
  html = cache.get(key, namespace="filters")
  if html:
    return html
   
  text = ' ' + text + ' '
  text = unescape(text)
  
  # extract Reference-style links
  reference_urls = REFERENCE_URL_REGEX.findall(text)
  reference_urls = [i[0] for i in reference_urls]
  for i in reference_urls:
    text = text.replace(i, md5(i).hexdigest())  
  
  # extract urls
  urls = URL_REGEX.findall(text)
  urls = [i[0] for i in urls if i]
  urls.sort(key=len, reverse=True)
  for url in urls:
    for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % url in text:
        text = text.replace(pattern % url, pattern % md5(url).hexdigest())
        break
      
  # extract emoticons and symbols
  symbols = EMOTICONS.keys()
  symbols.extend(SYMBOLS.keys())
  symbols.sort(key=len, reverse=True)
  for symbol in symbols:
    for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % symbol in text:
        text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest())
        break
  
  # extract mentions
  mentions = re.findall('(@\[.*?\))', text)
  if mentions:
    for mention in mentions:
      text = text.replace(mention, md5(mention).hexdigest())
  
  # extract hashtags
  hashtags = re.findall('(#\[.*?\))', text)
  if hashtags:
    for hashtag in hashtags:
      text = text.replace(hashtag, md5(hashtag).hexdigest())
            
  # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle
  words_with_underscores = [w for w in \
                            re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \
                            if not w.startswith('_')]
  
  for word in words_with_underscores:
    text = text.replace(word, md5(word).hexdigest())
  
  # treats newlines in paragraph-like content as real line breaks
  text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d')
  text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br>
  text = text.strip().replace('<br>', '  \n') # treats newlines
  text = text.strip().replace('||  \n', '||\n') # undo if wiki-tables
  text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')
  
  # restore reference_urls
  for i in reference_urls:
    text = text.replace(md5(i).hexdigest(), i) 
  
  # convert text to html
  html = markdown(text, extras=["wiki-tables",
                                "cuddled-lists",
                                "fenced-code-blocks",
                                "header-ids",
                                "code-friendly",
                                "pyshell",
                                "footnotes"])
  
#  print html
  
  # extract code-blocks
  html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching
  code_blocks = re.findall('(<code>.*?</code>)', html)
  for block in code_blocks:
    html = html.replace(block, md5(block).hexdigest())
    
    
  # Show emoticons and symbols
  for symbol in symbols:
    if SYMBOLS.has_key(symbol):
      html = html.replace(md5(symbol).hexdigest(),
                          SYMBOLS[symbol])
    else:
      html = html.replace(md5(symbol).hexdigest(),
                          EMOTICONS[symbol].replace("<img src", 
                                                    "<img class='emoticon' src"))
  
  # Autolinks urls, mentions, hashtags, turn youtube links to embed code
  for url in urls: 
    title = api.get_url_info(url).title
    hash_string = md5(url).hexdigest()
    if len(url) > 40:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...'))
    else:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url))
  
  for mention in mentions:
    hash_string = md5(mention).hexdigest()
    user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
    user['id'] = user['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
  
  for hashtag in hashtags:
    hash_string = md5(hashtag).hexdigest()
    tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
    tag['id'] = tag['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))  
    
  # Restore code blocks
  for block in code_blocks:
    html = html.replace(md5(block).hexdigest(), block)
  
  # restore urls, mentions, emoticons and hashtag in code blocks
  for url in urls:
    html = html.replace(md5(url).hexdigest(), url)
  for mention in mentions:
    html = html.replace(md5(mention).hexdigest(), mention)
  for hashtag in hashtags:
    html = html.replace(md5(hashtag).hexdigest(), hashtag)  
  for symbol in symbols:
    html = html.replace(md5(symbol).hexdigest(), symbol)  
  
  # restore words with underscores
  for word in words_with_underscores:
    html = html.replace(md5(word).hexdigest(), word)
  
  # restore \n
  html = html.replace('<br/>', '\n') 

  # xss protection
  html = sanitize_html(html)

  if not html or html.isspace():
    return ''
  
  
  # add target="_blank" to all a tags
  html = PyQuery(html)
  html('a:not(.overlay)').attr('target', '_blank')
  html = str(html)
  html = html.replace('<br/>', '<br>')
  
  cache.set(key, html, namespace="filters")
  return html