def getsub(a,b): for number in range(a,b): browser.get("http://www.shooter.cn/xml/sub/%u/%u.xml" % (number/1000,number)) # Load page i = 0 while "Shooter.cn" not in browser.title and i < 2: i = i + 1 browser.get("http://www.shooter.cn/xml/sub/%u/%u.xml" % (number/1000,number)) # Load page if "Shooter.cn" not in browser.title: print str(number) + "'s page can't be loaded." continue # if not "Shooter.cn" in browser.title: # print str(number) + " seems to be broken." # continue try: table = browser.find_element_by_tag_name("table") except: print str(number) + " seems to be broken." continue title = browser.find_element_by_id("movietitle1") row = table.find_elements_by_tag_name("tr") # j=0 # for i in row: # print str(j) + ": ", # print i.text # j=j+1 # print len(row) # exit() cells = row[2].find_elements_by_tag_name("td") fmt = cells[1].text lang = cells[3].text cells = row[len(row) - 1].find_elements_by_tag_name("td") filename = cells[1].text # print filename # exit() c1 = 'Subrip' in fmt # or 'SSA' in fmt or 'ASS' in fmt or 'MacroDVD' in fmt c2 = (u'简' in lang or u'中' in lang) and (u'英' in lang) # print table.text if c1 and c2: print title.text cur.execute('INSERT into movies VALUES(?,?,?,?)',(number,title.text,filename,table.text)) elem = browser.find_element_by_id("downsubbtn") # Find the query box elem.click() db.commit() if number % 1000 == 999: movefiles.movefiles(number/1000)
# -*- coding: utf-8 -*- #!/usr/bin/python import sys, os, time, shutil import movefiles sys.exit() movefiles.movefiles(5)