Exemple #1
0
def get_value_from(url):
    soup = mksoup(requests.get(BASE + url).text)
    return str(soup.find_all("input")[-1]).split("\"")[-2]
def get_value_from(url):
    soup = mksoup(requests.get(BASE + url).text)
    return str(soup.find_all("input")[-1]).split("\"")[-2]
Exemple #3
0
        return
    with open('ultBlockList.tmp.gz', 'wb') as out:
        while True:
            data = handle.read(1024)
            if len(data) == 0: break
            out.write(data)
    with gzip.open('ultBlockList.tmp.gz') as contents:
        with open("blocklist.txt", "a+") as f:
            for line in contents:
                f.write(line)
    os.remove('ultBlockList.tmp.gz')


if __name__ == "__main__":
    print("Getting list page")
    soup = mksoup(requests.get("https://www.iblocklist.com/lists.php").text)
    links = {}  #dict of name of list -> its url
    for row in soup.find_all("tr")[1:]:  #for each table row
        section = str(list(row.children)[0])
        pieces = section.split("\"")
        links[pieces[2].split("<")[0][1:]] = pieces[1]

    for link in links:  #download and combine files
        print "Downloading " + link + " blocklist."
        value = get_value_from(links[link])
        if value == "subscription":
            print "Blocklist is not available for free download D:"
        elif value == "unavailable":
            print "URL is unavailable"
        else:  #download and add this sucker
            process(value)
        return
    with open('ultBlockList.tmp.gz', 'wb') as out:
        while True:
            data = handle.read(1024)
            if len(data) == 0: break
            out.write(data)
    contents = gzip.GzipFile('ultBlockList.tmp.gz')
    f = open("blocklist.txt", "a+")#TODO add check for if it exists
    for line in contents:
        f.write(line)
    f.close()
    os.remove('ultBlockList.tmp.gz')

if __name__=="__main__":
    print("Getting list page")
    soup = mksoup(requests.get("https://www.iblocklist.com/lists.php").text)
    links = {}#dict of name of list -> its url
    for row in soup.find_all("tr")[1:]:#for each table row
        section = str(list(row.children)[0])
        pieces = section.split("\"")
        links[pieces[2].split("<")[0][1:]] = pieces[1]

    for link in links:#download and combine files
        print "Downloading " + link + " blocklist."
        value = get_value_from(links[link])
        if value == "subscription":
            print "Blocklist is not available for free download D:"
        elif value == "unavailable":
            print "URL is unavailable"
        else:#download and add this sucker
            process(value)