Example #1
0
def GetThreadsList(WebPage = -1 ):
    Threads = []
    if WebPage == -1:
        printDBG("GetThreadsList Pobieram WebPage\n")
        WebPage = GetDVHKforumContent()
    if WebPage == -1:
        print "GetThreadsList Brak WebPage, koniec"
        Threads.append({'threadID': 0,'threadICON': '','threadTITLE': str('Błąd logowania do DVHK!!!'), 'threadDESCR': str('Błąd logowania do DVHK!!!')})
        return Threads
    WebPage = WebPage[:WebPage.find('<td class="tfoot"')] # wywalenie stopki
    WebPage = WebPage[WebPage.find('<td class="alt1" id="td_threadstatusicon_'):] #wywalenie tego co zostalo z naglowka <td class="alt1" id="td_threadstatusicon_
    WebPage = WebPage.replace('\r','').replace('\n','').replace('<td class="alt1" id="td_threadstatusicon_','\nthreadID=') #kazdy post w jednej linii
    WebPage = WebPage.replace('  ',' ').replace('\t\t\t','\t').replace('\t\t','\t').replace('&quot;','').replace('&#10070;','') #wywalenie smieci
    WebPage = WebPage.replace('<img src="disturbed/statusicon/','threadICON=')
    WebPage = re.sub('<td class="alt2" title=".+?','',WebPage.encode('utf-8'))
    WebPage = re.sub('<div>.*<a href="showthread.php.*thread_title_[0-9]*" style="font-weight:bold">','\tthreadTITLE=',WebPage)#search,nowe posty
    WebPage = re.sub('<div>.*<a href="showthread.php.*thread_title_[0-9]*">','\tthreadTITLE=',WebPage)#lista na forum
    WebPage = re.sub('</a>[ \t]*</div>[ \t]*<div class=.*','=ENDthreadTITLE',WebPage)#koniec tytulu podfora
    WebPage = re.sub('</a>	<span class="smallfont" style="white-space:nowrap">','=ENDthreadTITLE',WebPage)#koniec tytulu podfora
    WebPage = re.sub('.gif.+?title=','\tthreadDESCR=',WebPage)
    WebPage = re.sub('<img class="inlineimg".+?','',WebPage)
    #printDBG(WebPage +'\n')
    #for thread in re.findall('threadID=([0-9]+?)">.+?threadICON=(.+?) title="(.+?)">[ \t]+?<div>.+?bold">(.+?)</a>', WebPage.encode('utf-8')):
    ThreadsList = re.findall('threadID=([0-9]+?)">.+?threadICON=(.+?)\tthreadDESCR="(.+?)">[ \t]+?threadTITLE=(.+?)=ENDthreadTITLE', WebPage)
    for thread in ThreadsList:
        threadID = int(thread[0])
        threadICON = thread[1]
        threadDESCR = thread[2]
        threadTITLE =  thread[3]
        #printDBG('threadID:'+ str(threadID) + '\nthreadICON:' + threadICON + '\nthreadTITLE:'+ threadTITLE + '\nthreadDESCR:' + threadDESCR + '\n')
        Threads.append({'threadID': threadID,'threadICON': threadICON,'threadTITLE': threadTITLE, 'threadDESCR': threadDESCR})
    print Threads
    return Threads
Example #2
0
def GetForumsList(WebPage = -1):
    if WebPage == -1:
        WebPage = GetDVHKforumContent()
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    Forums = []
    if WebPage == -1:
        Forums.append({'ID': 0,'LEVEL': 0,'NAME': 'BŁĄD LOGOWANIA!!!', 'ParenID': 0})
        return Forums
    UsedIDs = []
    BlockedIDs = [97,124,112,35,158]
    level_0_ID = 0
    level_1_ID = 0
    level_2_ID = 0
    parentID = 0
    for forum in re.findall('<option value="([0-9]+?)".+?class="fjdpth([0123])".+?>(.+?)</option>', WebPage.encode('utf-8'), re.DOTALL):
        forumName = forum[2].strip()
        forumID = int(forum[0])
        forumlevel = int(forum[1])
        if forumlevel == 0:
            level_0_ID = forumID
            parentID = 0
        elif forumlevel == 1:
            level_1_ID = forumID
            parentID = level_0_ID
        elif forumlevel == 2:
            level_2_ID = forumID
            parentID = level_1_ID
        if forumName != '' and forumID not in UsedIDs and forumID not in BlockedIDs and parentID not in BlockedIDs:
            Forums.append({'ID': forumID,'LEVEL': forumlevel,'NAME': forumName, 'ParenID': parentID})
            UsedIDs.append(forumID)
    return Forums
Example #3
0
def GetDVHKforumContent(WebPage = -1):
    if WebPage == -1:
        printDBG("GetDVHKforumContent Pobieram WebPage\n")
        WebPage = GetWebPage()
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    #krok 1 - obcinamy gore strony
    Txt2Search = '<!-- / nav buttons bar -->'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[WebPage.find(Txt2Search) + len(Txt2Search):]
    Txt2Search = '<!-- main -->'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[WebPage.find(Txt2Search) + len(Txt2Search):]
    #krok 2 - obcinamy dol strony
    Txt2Search = '<!-- / close content container -->'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[:WebPage.find(Txt2Search)]
    return WebPage
Example #4
0
def GetForumContent(WebPage = -1):
    if WebPage == -1:
        printDBG("GetForumContent Pobieram WebPage\n")
        WebPage = GetWebPage()
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    #krok 1.1 - obcinamy gore strony dla newposts
    Txt2Search = '<div id="recent" class="main_content">'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[WebPage.find(Txt2Search) + len(Txt2Search):]
    #krok 1.2 - obcinamy gore strony dla index.php
    Txt2Search = '<div id="main_content_section">'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[WebPage.find(Txt2Search) + len(Txt2Search):]
    #krok 2 - obcinamy dol strony
    Txt2Search = '<div class="pagesection" id="readbuttons">'
    if WebPage.find(Txt2Search) > 0:
        WebPage = WebPage[:WebPage.find(Txt2Search)]
    return WebPage
Example #5
0
def GetThreadsList(WebPage = -1 ):
    global WebPageCharSet
    Threads = []
    if WebPage == -1:
        printDBG("GetThreadsList Pobieram WebPage\n")
        WebPage = GetWebPage( url = 'http://forum.xunil.pl',  vdir = '/index.php?board=56.0' )
    if WebPage == -1:
        print "GetThreadsList Brak WebPage, koniec"
        Threads.append({'threadID': 0,'threadICON': '','threadTITLE': str('Błąd logowania do xunil!!!'), 'threadDESCR': str('Błąd logowania do xunil!!!')})
        return Threads
    #printDBG('\n########################\n' + WebPage.encode('utf-8') +'\n\n')
    WebPage = WebPage[WebPage.find('<div id="main_content_section">'):] #wywalenie naglowka
    #WebPage = WebPage[:WebPage.find('<div class="pagesection">')] # wywalenie stopki
    WebPage = WebPage.replace('\r','').replace('\n','').replace('<span id="msg_','\nthreadID=') #kazdy post w jednej linii
    WebPage = WebPage.replace('href="http://forum.xunil.pl/index.php?topic=','ToPiC=')
    WebPage = WebPage.replace('href="http://forum.xunil.pl/index.php?topic=','ToPiC=')
    WebPage = WebPage.replace('<img src="http://forum.xunil.pl/Themes/default/images/','threadICON=')
    WebPage = WebPage.replace('polish-utf8/','').replace('icons/','')
    
    #WebPage = re.sub('<td class="alt2" title=".+?','',WebPage.encode('utf-8'))
    #WebPage = re.sub('<div>.*<a href="showthread.php.*thread_title_[0-9]*" style="font-weight:bold">','\tthreadTITLE=',WebPage)#search,nowe posty
    #WebPage = re.sub('<div>.*<a href="showthread.php.*thread_title_[0-9]*">','\tthreadTITLE=',WebPage)#lista na forum
    #WebPage = re.sub('</a>[ \t]*</div>[ \t]*<div class=.*','=ENDthreadTITLE',WebPage)#koniec tytulu podfora
    #WebPage = re.sub('</a>	<span class="smallfont" style="white-space:nowrap">','=ENDthreadTITLE',WebPage)#koniec tytulu podfora
    #WebPage = re.sub('.gif.+?title=','\tthreadDESCR=',WebPage)
    #WebPage = re.sub('<img class="inlineimg".+?','',WebPage)
    #printDBG('\n########################\n' + WebPage.encode('utf-8') +'\n\n')
    #for thread in re.findall('threadID=([0-9]+?)">.+?threadICON=(.+?) title="(.+?)">[ \t]+?<div>.+?bold">(.+?)</a>', WebPage.encode('utf-8')):
    ThreadsList = re.findall('ToPiC=([0-9]+?).0">(.+?)</a>.+?threadICON=(.+?).gif"', WebPage)
    for thread in ThreadsList:
        #print thread
        threadID = int(thread[0])
        threadTITLE = thread[1].encode(WebPageCharSet)
        threadICON = thread[2].encode(WebPageCharSet)
        threadDESCR = ''
        #printDBG('threadID:'+ str(threadID) + '\nthreadICON:' + threadICON + '\nthreadTITLE:'+ threadTITLE + '\nthreadDESCR:' + threadDESCR + '\n')
        Threads.append({'threadID': threadID,'threadICON': threadICON,'threadTITLE': threadTITLE, 'threadDESCR': threadDESCR})
    print Threads
    return Threads
Example #6
0
def GetForumsList(WebPage = -1):
    if WebPage == -1:
        WebPage = GetForumContent()
        #printDBG('\n##### WebPage ##### \n' + WebPage.encode('utf-8')) 
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    Forums = []
    if WebPage == -1:
        Forums.append({'ID': 0,'LEVEL': 0,'NAME': 'BŁĄD LOGOWANIA!!!', 'ParenID': 0})
        return Forums
    UsedIDs = []
    BlockedIDs = [998,999]
    level_0_ID = 0
    level_1_ID = 0
    level_2_ID = 0
    parentID = 0
    for forum in re.findall('name="b([0-9]+?)">(.+?)</a>', WebPage.encode('utf-8'), re.DOTALL):
        #print forum
        forumName = forum[1].strip()
        forumID = int(forum[0])
        forumlevel = 0
        if forumlevel == 0:
            level_0_ID = forumID
            parentID = 0
        elif forumlevel == 1:
            level_1_ID = forumID
            parentID = level_0_ID
        elif forumlevel == 2:
            level_2_ID = forumID
            parentID = level_1_ID
        if forumName != '' and forumID not in UsedIDs and forumID not in BlockedIDs and parentID not in BlockedIDs:
            Forums.append({'ID': forumID,'LEVEL': forumlevel,'NAME': forumName, 'ParenID': parentID})
            UsedIDs.append(forumID)
    for forum in Forums:
        Txt2Search = '<tr id="board_' + str(forum['ID']) + "_children"
        if WebPage.find(Txt2Search) > 0:
            printDBG("Forum " + str(forum['ID']) + " ma subfora")
            SubPage = WebPage[WebPage.find(Txt2Search) + len(Txt2Search):]
            SubPage = SubPage[:SubPage.find('</td>')]
            printDBG('\n##### SubPage ##### \n' + SubPage.encode('utf-8'))
            for subforum in re.findall('board=([0-9]+?).0".+?">(.+?)</a>', SubPage.encode('utf-8'), re.DOTALL):
                #print subforum
                forumID = int(subforum[0])
                forumlevel = forum['LEVEL'] + 1
                parentID = forum['ID']
                forumName = subforum[1].strip()
                if forumName != '' and forumID not in UsedIDs and forumID not in BlockedIDs and parentID not in BlockedIDs:
                    Forums.append({'ID': forumID,'LEVEL': forumlevel,'NAME': forumName, 'ParenID': parentID})
                    UsedIDs.append(forumID)
                
    return Forums
Example #7
0
def GetFullThread(WebPage = -1):
    if WebPage == -1:
        WebPage = GetWebPage(url = 'forum.dvhk.pl', vdir = '/showthread.php?t=649501&page=9999')
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    PostsInThread = 'BŁAD'
    WebPage = WebPage.encode('utf8')
    if WebPage.find('<title>') > 0 and WebPage.find('</title>') > 0:
        PostsInThread = WebPage[WebPage.find('<title>')+len('<title>'):WebPage.find('</title>')] + '\n\n'
        
    printDBG("Tytul:" + PostsInThread)
    WebPage = WebPage[WebPage.find('<td class="thead" style="font-weight:normal" >'):]
    WebPage = WebPage.replace('\r','').replace('\n','').replace('<td class="thead" style="font-weight:normal" >','\nPostBegin=') #kazdy post w jednej linii
    WebPage = WebPage.replace('<div class="smallfont">','')
    WebPage = WebPage.replace('<img src="images/ranks/star-blue.png"','')
    WebPage = WebPage.replace('<img src="images/ranks/star-gold.png"','')
    WebPage = WebPage.replace('alt="" border="" />','')
    #zamiana emotikon na standardowe
    WebPage = WebPage.replace('<img src="images/smilies/newtongue.gif" border="0" alt="" title="Stick Out Tongue" class="inlineimg" />',':P')
    WebPage = WebPage.replace('<img src="images/smilies/newwink.gif" border="0" alt="" title="Wink" class="inlineimg" />',';)')
    WebPage = WebPage.replace('<img src="images/smilies/newbiggrin.gif" border="0" alt="" title="Big Grin" class="inlineimg" />',':D')
    WebPage = WebPage.replace('<img src="images/smilies/newblink.gif" border="0" alt="" title="EEK!" class="inlineimg" />',':o')
    WebPage = WebPage.replace('<img src="images/smilies/newsmile.gif" border="0" alt="" title="Smile" class="inlineimg" />',':)')
    WebPage = WebPage.replace('<img src="images/smilies/newsad.gif" border="0" alt="" title="Frown" class="inlineimg" />',':(')
    #wywalenie smieci
    WebPage = WebPage.replace('  ',' ').replace('\t\t\t','\t').replace('\t\t','\t')
    WebPage = re.sub('<font size="[0-9]">','',WebPage)
    WebPage = re.sub('<font color="[#a-zA-Z0-9]+?">','',WebPage)
    #WebPage = re.sub('<font color="#FF6600">','',WebPage)
    WebPage = re.sub('<[/]*[ibu]>','',WebPage)
    WebPage = re.sub('<[/]*strong>','',WebPage)
    WebPage = re.sub('\[[/]*SPOILER\]','',WebPage)
    WebPage = WebPage.replace('</font>','')
    WebPage = WebPage.replace('border="0" alt="" onload="NcodeImageResizer.createOn(this);"','')
    FullThread = re.findall('<!-- status icon and date -->(.+?)<!-- / status icon and date -->.+?<a class="bigusername" href="member.php(.+?)</span></a>\t<script type="text/javascript">.+?<!-- icon and title -->(.+?)<!-- / icon and title -->\t<!-- message -->(.+?)<!-- / message -->', WebPage)
    
    for Post in FullThread:
        DataPostu  = str(Post[0][Post[0].find('</a>')+4:].replace('\t','').replace('<a name="newpost"></a>',''))
        AutorPostu = str(Post[1][Post[1].find('font-weight:'):].split('>')[1])
        #PodTytulPostu = str(Post[2].encode('utf8'))
        TekstPostu = str(Post[3].replace('<br />','\n').strip())
        if TekstPostu[-6:] == '</div>':
            TekstPostu = TekstPostu[:-6]
        #obsluga cytatow
        LicznikCytatow = 0
        while (TekstPostu.count('<div class="smallfont" style="margin-bottom:2px">Cytat:</div>') > 0):
            LicznikCytatow = LicznikCytatow + 1
            TekstPostu = TekstPostu.replace('<div class="smallfont" style="margin-bottom:2px">Cytat:</div>','Cytat%i:"' % LicznikCytatow,1)
            TekstPostu = TekstPostu.replace('<table cellpadding="6" cellspacing="0" border="0" width="100%">\t<tr>\t<td class="alt2" style="border:1px inset">\t\t<div>','',1)
            TekstPostu = TekstPostu.replace('</table></div>','"\n',1)
        ######CYTATY!!!!!!!
        #>link do innego postu z cytatu
        TekstPostu = TekstPostu.replace('<img class="inlineimg" src="disturbed/buttons/viewpost.gif" border="0" alt="Zobacz post" />','')
        TekstPostu = TekstPostu.replace('<table cellpadding="6" cellspacing="0" border="0" width="100%">','<table>')
        TekstPostu = TekstPostu.replace('<div style="font-style:italic">','')
        TekstPostu = TekstPostu.replace('<td class="alt2" style="border:1px inset">','')
        TekstPostu = re.sub('<a href="showthread.php\?p=.+?" rel="nofollow"','',TekstPostu)
        #czyscimy formatowanie
        TekstPostu = re.sub('"[\t ]*[\t ]','" ',TekstPostu)
        TekstPostu = re.sub('[\t ]*[\t ]></a>\t</div>','>>\n',TekstPostu)
        TekstPostu = re.sub('</div>[\t ]*[\t ]</td>[\t ]*[\t ]</tr>[\t ]*[\t ]"',' "',TekstPostu)
        TekstPostu = re.sub('[\t]*<div id="post_message_[0-9]+?">','',TekstPostu)
        TekstPostu = re.sub('[\t]*<div style="margin:[0-9]+?px; margin-top:[0-9]+?px; ">','',TekstPostu)
        TekstPostu = TekstPostu.replace('<table>\t<tr>\t\t\t',' ')
        TekstPostu = re.sub('<a href="http://.+?[\t ]target="_blank">','',TekstPostu)
        ######KODy
        TekstPostu = TekstPostu.replace('<div style="margin:20px; margin-top:5px">\t<div class="smallfont" style="margin-bottom:2px">','@@@')
        TekstPostu = TekstPostu.replace('</pre></div>','@@@\n')
        TekstPostu = TekstPostu.replace('</div>\t<pre class="alt2" dir="ltr" style=" margin: 0px;\tpadding: 6px;\tborder: 1px inset;\twidth: 630px;\theight: 66px;\ttext-align: left;\toverflow: auto">','')
        TekstPostu = re.sub('</div>\t<pre class="alt2" dir="ltr" style=" margin: 0px;\tpadding: [0-9]+?px;\tborder: [0-9]+?px inset;\twidth: [0-9]+?px;\theight: [0-9]+?px;\ttext-align: left;\toverflow: auto">','',TekstPostu)
        #TekstPostu = TekstPostu.replace('','').replace('','').replace('','')
        PostsInThread = str(PostsInThread) + "\n########## " + DataPostu + ' , ' + AutorPostu + str(' napisał:\n')
        PostsInThread = PostsInThread + TekstPostu.strip() + '\n'
    printDBG(PostsInThread)
    return PostsInThread
Example #8
0
def GetWebPage(url = 'forum.dvhk.pl', vdir = '/search.php?do=getnew', uname = '', passwd = '' ):
    if uname == '' and passwd == '':
        printDBG("GetWebPage brak uname i passwd\n")
        try:
            file = open("/etc/enigma2/settings")
            for line in file:
                if line.startswith('config.plugins.BoardReader.dvhk_login='******'Znaleziono uname:' + uname + '\n')
                if line.startswith('config.plugins.BoardReader.dvhk_password='******'Znaleziono passwd: XXXXXXX\n')
                if uname != '' and passwd != '':
                    break
        except:
            pass
        
    global WebPageCharSet
    if not url.startswith('http://'):
        url = 'http://' + url
    loginurl = url + '/login.php?do=login'
    if not vdir.startswith('/') and not url.endswith('/'):
        vdir = '/' + vdir
    forumurl = url + vdir
    md5 = hashlib.md5(passwd);md5 = md5.hexdigest()
    # Options for request
    opts = {
    'do': 'login',
    'vb_login_md5password': md5,
    'vb_login_md5password_utf': md5,
    's': '',
    'vb_login_username': uname, 
    'security_token': 'guest', 
    }
    data = urllib.urlencode(opts)
    
    # Request header
    global headers
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.12) Gecko/20100101 Firefox/7.0.1',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-gb,en;q=0.5',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Connection': 'keep-alive',
    'Referer': loginurl
    }
    
    # Cookie Handling
    jar = cookielib.CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
    
    # Send Request
    opener.addheader = headers
    opener.open(loginurl, data)
    # Check
    response = opener.open(forumurl)
    WebPageCharSet = response.headers.getparam('charset')
    #print kodowanie
    WebPage = response.read().decode(WebPageCharSet)
    WebPage = WebPage.replace('&quot;','"').replace('&amp;','&').replace('&lt;','<').replace('&gt;','>').replace('&nbsp;',' ')
    #poprawka smieci po kodowaniu html-a
    if 'login.php?do=logout' in WebPage:
        printDBG('GetWebPage:Zalogowany do dvhk\n')
        return WebPage
    else:
        printDBG('GetWebPage:Blad logowania do dvhk\n')
        return -1
Example #9
0
def GetFullThread(WebPage = -1):
    if WebPage == -1:
        WebPage = GetWebPage(url = 'http://forum.xunil.pl', vdir = '/index.php?topic=1128.9999')
    if WebPage == -1:
        printDBG("Brak WebPage, koniec\n")
        return -1
    PostsInThread = 'BŁAD'
    WebPage = WebPage.encode('utf8')
    if WebPage.find('<title>') > 0 and WebPage.find('</title>') > 0:
        PostsInThread = WebPage[WebPage.find('<title>')+len('<title>'):WebPage.find('</title>')] + '\n\n'
        
    printDBG("Tytul:" + PostsInThread)
    WebPage = WebPage[WebPage.find('<div id="main_content_section">'):]
    WebPage = WebPage[WebPage.find('<div id="forumposts">'):]
    WebPage = WebPage[:WebPage.find('<div class="pagesection">')]
    WebPage = WebPage.replace('\r','').replace('\n','') #wszystko w jednej linii
    WebPage = WebPage.replace('<div class','\nDivClass=') #kazdy div w jednej linii
    WebPage = WebPage.replace('<ul class=','\nUlClass=') #kazdy div w jednej linii
    WebPage = WebPage.replace('<span class=','\nSpanClass=') #kazdy div w jednej linii
    WebPage = WebPage.replace('<hr class=','\nHrClass=') #kazdy div w jednej linii
    #wywalenie smieci
    WebPage = WebPage.replace('  ',' ').replace('\t\t\t\t','\t').replace('\t\t\t','\t').replace('\t\t','\t')
    WebPage = re.sub('DivClass=="pagesection">.*','',WebPage) #wywalenie konca strony
    WebPage = re.sub('DivClass=="keyinfo">.*','',WebPage)
    WebPage = re.sub('HrClass="post_separator".*','',WebPage)
    WebPage = re.sub('SpanClass=".*','',WebPage)
    WebPage = re.sub('UlClass=".*','',WebPage)
    WebPage = re.sub('DivClass=="post_wrapper">.*','',WebPage)
    WebPage = re.sub('DivClass=="postarea">.*','',WebPage)
    WebPage = re.sub('ivClass=="flow_hidden">.*','',WebPage)
    WebPage = re.sub('DivClass=="moderatorbar">.*','',WebPage)
    WebPage = re.sub('DivClass=="smalltext modified".*','',WebPage)
    WebPage = re.sub('DivClass=="messageicon">.*','',WebPage)
    WebPage = re.sub('DivClass=="windowbg.*','',WebPage)
    WebPage = re.sub('DivClass=="signature".*','',WebPage)
    WebPage = re.sub('DivClass=="smalltext reportlinks">.*','',WebPage)
    WebPage = re.sub('DivClass=="post">.*','',WebPage)
    WebPage = re.sub('DivClass=="quoteheader">.*','',WebPage)
    WebPage = re.sub('DivClass=="quotefooter">.*','',WebPage)
    WebPage = re.sub('DivClass=="poster">.*title=.*">','DivClass=Poster=',WebPage)
    WebPage = WebPage.replace('\r','').replace('\n','').replace('DivClass=Poster=','ENDofLINE\nNaszPost=Poster=') #wszystko z jednego posta w jednej linii
    #zamiana emotikon na standardowe
    WebPage = WebPage.replace('<img src="images/smilies/newtongue.gif" border="0" alt="" title="Stick Out Tongue" class="inlineimg" />',':P')
    WebPage = WebPage.replace('<img src="images/smilies/newblink.gif" border="0" alt="" title="EEK!" class="inlineimg" />',':o')
    WebPage = re.sub('<img src=".*wink.gif.*title="Wink" class="smiley" />',';)',WebPage)
    WebPage = re.sub('<img src=".*cheesy.gif.*title="Cheesy" class="smiley" />','{:-D',WebPage)
    WebPage = re.sub('<img src=".*smiley.gif.*title="Smiley" class="smiley" />',':)',WebPage)
    WebPage = re.sub('<img src=".*sad.gif.*title="Sad" class="smiley" />',':(',WebPage)

    for Post in re.findall('NaszPost=Poster=(.+?)</a>.+?<strong>.+?strong>(.+?)&#187;</div>.+?DivClass=="inner".+?id="msg_[0-9]+?">(.+?)ENDofLINE', WebPage):
        AutorPostu = Post[0]
        DataPostu  = Post[1]
        TekstPostu = str(Post[2].replace('<br />','\n').strip())
        #obsluga cytatow
        LicznikCytatow = 0
        while (TekstPostu.count('DivClass=="topslice_quote">') > 0):
            LicznikCytatow = LicznikCytatow + 1
            TekstPostu = TekstPostu.replace('DivClass=="topslice_quote">','Cytat%i:"' % LicznikCytatow,1)
            TekstPostu = TekstPostu.replace('DivClass=="botslice_quote">','"\n',1)
        ######CYTATY!!!!!!!
        #>link do innego postu z cytatu
        TekstPostu = TekstPostu.replace('<img class="inlineimg" src="disturbed/buttons/viewpost.gif" border="0" alt="Zobacz post" />','')
        TekstPostu = TekstPostu.replace('<table cellpadding="6" cellspacing="0" border="0" width="100%">','<table>')
        TekstPostu = TekstPostu.replace('<div style="font-style:italic">','')
        TekstPostu = TekstPostu.replace('<td class="alt2" style="border:1px inset">','')
        TekstPostu = re.sub('<a href="showthread.php\?p=.+?" rel="nofollow"','',TekstPostu)
        #TekstPostu = TekstPostu.replace('','').replace('','').replace('','')
        #czyscimy formatowanie i smieci
        TekstPostu = re.sub('<span style=.*color">','',TekstPostu)
        TekstPostu = re.sub('DivClass=="codeheader">.*class="bbc_code">','Kod:\n#',TekstPostu)
        TekstPostu = re.sub('</code>','#\n',TekstPostu)
        TekstPostu = re.sub('<a href=".*clip.gif.*/>','Załączono: ',TekstPostu)
        TekstPostu = re.sub('"<a href=".*topic=.*class="bbc_standard_quote">','"',TekstPostu)
        TekstPostu = re.sub('<img src=".*modify_inline.gif.*" />','',TekstPostu)
        TekstPostu = TekstPostu.replace('</blockquote>','')
        TekstPostu = re.sub('</div>[\t ]*</div>','',TekstPostu)
        TekstPostu = re.sub(':"Cytuj<blockquote class="bbc_standard_quote">',':"',TekstPostu)

        TekstPostu = TekstPostu.replace('<strong>','').replace('</strong>','').replace('</a>','').replace('<tt class="bbc_tt">','').replace('</tt>','')
        TekstPostu = TekstPostu.replace('</span>','').replace('\n\n','\n')
        TekstPostu = TekstPostu.replace('\t\t\t</div>','').replace('\t\t</div>','').replace('\t</div>','')
        TekstPostu = TekstPostu.replace('\t\n','\n').replace('\n\n','\n')
        #sklejamy wszystko do kupy
        PostsInThread = str(PostsInThread) + "\n########## " + DataPostu + ' , ' + AutorPostu + str(' napisał:\n')
        PostsInThread = PostsInThread + TekstPostu.strip() + '\n'
    printDBG(PostsInThread)
    return PostsInThread
Example #10
0
def GetWebPage(url = 'forum.xunil.pl', vdir = '/index.php', uname = '', passwd = '' ):
    global WebPageLastTime
    CurTime = time.time()
    try:
        if WebPageLastTime > CurTime - 2:
            time.sleep(WebPageLastTime - CurTime + 2)
    except:
        print "NoWebpage"
    if uname == '' and passwd == '':
        printDBG("GetWebPage szukam uname i passwd\n")
        try:
            file = open("/etc/enigma2/settings")
            for line in file:
                if line.startswith('config.plugins.BoardReader.xunil_login='******'Znaleziono uname:' + uname + '\n')
                if line.startswith('config.plugins.BoardReader.xunil_password='******'Znaleziono passwd: XXXXXXX\n')
                if uname != '' and passwd != '':
                    break
        except:
            pass
        try:
            file = open("/usr/local/e2/etc/enigma2/settings")
            for line in file:
                if line.startswith('config.plugins.BoardReader.xunil_login='******'Znaleziono uname:' + uname + '\n')
                if line.startswith('config.plugins.BoardReader.xunil_password='******'Znaleziono passwd:' + passwd + '\n')
                if uname != '' and passwd != '':
                    break
        except:
            pass
    if uname == '' or passwd == '':
        printDBG("GetWebPage uname lub passwd\n")
        
    global WebPageCharSet
    if not url.startswith('http://'):
        url = 'http://' + url
    loginurl = url + '/index.php?action=login2'
    if not vdir.startswith('/') and not url.endswith('/'):
        vdir = '/' + vdir
    forumurl = url + vdir
    printDBG('loginurl:' + loginurl + '\n')
    printDBG('forumurl:' + forumurl + '\n')
    md5 = hashlib.md5(passwd);md5 = md5.hexdigest()
    # Options for request
    opts = {
    'user': uname, 
    'passwrd': passwd,
    'cookielength': '-1',
    'openid_identifier': '',
    'hash_passwrd': md5
    }
    data = urllib.urlencode(opts)
    
    # Request header
    global headers
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.12) Gecko/20100101 Firefox/7.0.1',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-gb,en;q=0.5',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Connection': 'keep-alive',
    'Referer': loginurl
    }
    
    # Cookie Handling
    jar = cookielib.CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
    
    # Send Request
    opener.addheader = headers
    opener.open(loginurl, data)
    # Check
    response = opener.open(forumurl)
    WebPageCharSet = response.headers.getparam('charset')
    printDBG('\nWebPageCharSet:' + WebPageCharSet + '\n')
    #print kodowanie
    WebPage = response.read().decode(WebPageCharSet)
    WebPage = WebPage.replace('&quot;','"').replace('&amp;','&').replace('&lt;','<').replace('&gt;','>').replace('&nbsp;',' ') #poprawka smieci po kodowaniu html-a
    WebPageLastTime = time.time()
    if 'id="button_logout"' in WebPage:
        printDBG('\nGetWebPage:Zalogowany do xunil :)\n')
        return WebPage
    else:
        printDBG('\nGetWebPage:Blad logowania do xunil :(\n')
        return -1