Example #1
0
def urlretrieve(url, filename):
    filename = unquote(filename)
    try:
        ur(url, filename)
    except Exception as e:
        print(e)
    print("Done : " + filename)
Example #2
0
def downloadimg(articles):
    for article in articles:  #遍历每页的标题和链接以及图片地址,下载图片在电脑上
        print(article.text, article['href'])
        if not os.path.isdir(os.path.join('download', article.text)):
            os.mkdir(os.path.join('download', article.text))
        res = requests.get('https://www.ptt.cc' + article['href'])
        images = reg_imgur_file.findall(res.text)
        print(images)
        for image in set(images):  #保存图片
            ID = re.search('http[s]?://[i.]*imgur.com/(\w+\.(?:jpg|png|gif))',
                           image).group(1)
            print(ID)  #档案
            ur(image, os.path.join('download', article.text, ID))
Example #3
0
def get_top_machts():
    html = ur('http://football.kulichki.net/')
    bs = b(html.read())
    div = bs.find('div', {"class": 'col2 inl vtop'}).center.table
    tr_list = div.find_all('tr')
    result = ''
    for item in tr_list[1:]:
        if item.find('span') is not None:

            flag = plus_flag(item.find('span').text)
            plus_flag(flag)
            result = result + flag + '\n'
        else:
            a = item.find('p', {"align": "left"}).text
            a = a.replace('\n', '')
            a = a.replace('  ', ' ')
            matchtime = a[1:a.index('.')]
            timeplus = (int(matchtime[:2]) + 2) % 24
            timeplus = str(timeplus)
            if len(timeplus) == 1:
                timeplus = '0' + timeplus

            matchname = a[a.index('.') + 2:a.rindex('-')]
            result = result + '*' + timeplus + matchtime[
                2:] + '* _' + matchname + '_\n'
    return result
async def nine_nine(ctx, code: str):
    if code == "Lucia Crimson Abyss":
        translator = Translator()
        my_url = "https://wiki.biligame.com/zspms/%E9%9C%B2%E8%A5%BF%E4%BA%9A%C2%B7%E6%B7%B1%E7%BA%A2%E4%B9%8B%E6%B8%8A"
        uClient = ur(my_url)
        page_html = uClient.read()
        uClient.close()
        page_soup = soup(page_html, "html.parser")
        containers_1 = page_soup.findAll("div", {"class": "tj-left"})
        x = containers_1[0].findAll("table", {"class": "wikitable"})
        y = x[0].findAll("td")
        count = 0
        st = ""
        for i in y:
            a = translator.translate(str(i.text))
            st += a.text + "\n"
            count += 1
            if count % 3 == 0:
                await ctx.send("```{}```".format(st))
                st = ""
'rating' DECIMAL(2,1)
)''')

cur.execute('''CREATE TABLE IF NOT EXISTS Specs(
'id' TEXT,"In The Box" TEXT, "Model Number" TEXT, "Model Name" TEXT, "Color" TEXT, "Browse Type" TEXT, "SIM Type" TEXT, "Hybrid Sim Slot" TEXT, "Touchscreen" TEXT, "OTG Compatible" TEXT, "Sound Enhancements" TEXT, "Display Size" TEXT, "Resolution" TEXT, "Resolution Type" TEXT, "GPU" TEXT, "Display Colors" TEXT, "Other Display Features" TEXT, "Operating System" TEXT, "Processor Type" TEXT, "Processor Core" TEXT, "Primary Clock Speed" TEXT, "Operating Frequency" TEXT, "Internal Storage" TEXT, "RAM" TEXT, "Expandable Storage" TEXT, "Supported Memory Card Type" TEXT, "Memory Card Slot Type" TEXT, "Primary Camera Available" TEXT, "Primary Camera" TEXT, "Primary Camera Features" TEXT, "Secondary Camera Available" TEXT, "Secondary Camera" TEXT, "Secondary Camera Features" TEXT, "Flash" TEXT, "HD Recording" TEXT, "Full HD Recording" TEXT, "Video Recording" TEXT, "Video Recording Resolution" TEXT, "Frame Rate" TEXT, "Dual Camera Lens" TEXT, "Phone Book" TEXT, "Network Type" TEXT, "Supported Networks" TEXT, "Internet Connectivity" TEXT, "3G" TEXT, "Pre-installed Browser" TEXT, "Micro USB Port" TEXT, "Bluetooth Support" TEXT, "Bluetooth Version" TEXT, "Wi-Fi" TEXT, "Wi-Fi Hotspot" TEXT, "USB Connectivity" TEXT, "Audio Jack" TEXT, "Map Support" TEXT, "GPS Support" TEXT, "Smartphone" TEXT, "SIM Size" TEXT, "User Interface" TEXT, "Removable Battery" TEXT, "SMS" TEXT, "Graphics PPI" TEXT, "Sensors" TEXT, "Other Features" TEXT, "Important Apps" TEXT, "FM Radio" TEXT, "Audio Formats" TEXT, "Video Formats" TEXT, "Battery Capacity" TEXT, "Width" TEXT, "Height" TEXT, "Depth" TEXT, "Weight" TEXT, "Warranty Summary" TEXT, "Quick Charging" TEXT, "Display Type" TEXT, "Wi-Fi Version" TEXT, "Infrared" TEXT, "Secondary Clock Speed" TEXT, "USB Tethering" TEXT, "MMS" TEXT, "Video Call Support" TEXT, "Micro USB Version" TEXT, "Battery Type" TEXT, "3G Speed" TEXT, "GPRS" TEXT, "EDGE" TEXT, "GPRS Features" TEXT, "EDGE Features" TEXT, "Voice Input" TEXT, "SIM Access" TEXT, "Call Log Memory" TEXT, "Speaker Phone" TEXT, "Speed Dialing" TEXT, "WAP" TEXT, "WAP Version" TEXT, "Touchscreen Type" TEXT, "Predictive Text Input" TEXT, "User Memory" TEXT, "Supported Languages" TEXT, "Browser" TEXT, "FM Radio Recording" TEXT, "NFC" TEXT, "Keypad Type" TEXT, "Dual Battery" TEXT, "Business Phone" TEXT, "Image Editor" TEXT, "Call Wait/Hold" TEXT, "Conference Call" TEXT, "Hands Free" TEXT, "Call Divert" TEXT, "Call Timer" TEXT, "Social Networking Phone" TEXT, "Instant Message" TEXT, "Series" TEXT, "Phone Book Memory" TEXT, "SMS Memory" TEXT, "JAVA Support" TEXT, "Games" TEXT, "Optical Zoom" TEXT, "Call Records" TEXT, "Logs" TEXT, "Keypad" TEXT, "Music Player" TEXT, "Warranty Service Type" TEXT, "Digital Zoom" TEXT, "Mini USB Port" TEXT, "Mini HDMI Port" TEXT, "HD Game Support" TEXT, "Total Memory" TEXT, "Hot SWAP Support" TEXT, "Mini USB Version" TEXT, "TV Out" TEXT, "Mobile Tracker" TEXT, "Java Application" TEXT, "Ringtones Format" TEXT, "Domestic Warranty" TEXT, "International Warranty" TEXT, "Covered in Warranty" TEXT, "Talk Time" TEXT, "Upgradable Operating System" TEXT, "DLNA Support" TEXT, "Not Covered in Warranty" TEXT, "Additional Content" TEXT)''')

cur.execute('''SELECT * FROM Mobile ORDER BY id DESC LIMIT 1''')
idnew = int(cur.fetchone()[0]) + 1
conn.commit()
cur.close()
url1 = 'https://www.flipkart.com/search?sid=tyy%2C4io&otracker=CLP_Filters&page='
url2 = 'https://www.flipkart.com'
for i in range(1, 20):
    conn = sqlite3.connect('mobiles.sqlite')
    cur = conn.cursor()
    html1 = ur(url1+str(i), context=ctx).read()
    soup1 = bs(html1,'html.parser')
    lst1 = soup1.findAll('a',{'class':'_31qSD5'})
    for a in lst1:
        html1 = ur(url2+a['href'], context=ctx).read()
        soup1 = bs(html1, 'html.parser')
        lst2 = soup1.findAll('tr',{'class':'_3_6Uyw row'})
        specs1 = dict()
        for li in lst2:
            specs1[li.find(class_='_3-wDH3 col col-3-12').get_text()]=li.find(class_='_3YhLQA').get_text()
        if(specs1.get('Model Number',0)):    cur.execute('SELECT * FROM Specs WHERE "Model Number"=?', (specs1['Model Number'], ))
        if cur.fetchone():
            continue
        try:
            lst = re.findall('^\S(\S+),(\S+)',a.find(class_='_1vC4OE _2rQ-NK').get_text())[0]
            lst = lst[0]+lst[1]
async def nine_nine(ctx):
    channel = bot.get_channel(615955484347990019)
    names = """
1. Shadow Strike-Reverse
2. Zero Degree Calibration
3. Chimera Retrograde
4. Zero Form
5. Sirius
6. Red Sakura
7. Ray Mill
8. God Giver
9. Dragon Wind
10. Wei Zi
11. Soul Slayer
12. Frenzied Fusion Canon
13. Leitning
14. Great God Power
15. Calender
16. Red Lotus Fanatic
17. Machine Rhyme
18. Wolf Eater
19. Nuclear-based red dragon
20. Sain
21. Pitch Black"""
    choice_embed = discord.Embed(
        title=
        "Following are the weapons in Punishing Gray Raven which come under 6-star rarity:",
        description=names)
    await channel.send(embed=choice_embed)

    def check(m):
        return m

    msg = await bot.wait_for('message', check=check)
    title = msg.content
    mydb = mysql.connector.connect(host="localhost",
                                   user="******",
                                   passwd=password,
                                   database="Punishing Gray Raven",
                                   auth_plugin='mysql_native_password')
    mycursor = mydb.cursor()
    mycursor.execute("SELECT * FROM Arms where Name = '{}';".format(title))
    myresult = mycursor.fetchall()
    name = myresult[0][0]
    link = myresult[0][1]
    uClient = ur(link)
    page_html = uClient.read()
    uClient.close()
    page_soup = soup(page_html, "html.parser")
    con = page_soup.findAll("table", {"class": "wikitable"})
    ans = con[0].findAll("img", {"class": "img-kk"})
    url = ans[0]["src"]
    wtype = myresult[0][2]
    rare = myresult[0][3]
    rr = ""
    for i in range(rare):
        rr += "✰"
    skill = myresult[0][4]
    embed = discord.Embed(title=name, description=" Details", color=0x00ff00)
    embed.set_thumbnail(url=url)
    embed.add_field(name="Type", value=wtype, inline=False)
    embed.add_field(name="Rarity", value=rr, inline=False)
    embed.add_field(name="Skills", value=skill, inline=False)
    await channel.send(embed=embed)
async def nine_nine(ctx):
    channel = bot.get_channel(615955484347990019)
    names = """
1. Guinnea
2. Hannah
3. Einstein
4. Frederick
5. Adolf
6. Condelina
7. Leonardo
8. Shakespeare
9. Patton
10. Catherine
11. Heisen
12. Basilone
13. Fellete
14. Darwin"""
    choice_embed = discord.Embed(
        title=
        "Following are the memories in Punishing Gray Raven which come under 6-star rarity:",
        description=names)
    await channel.send(embed=choice_embed)

    def check(m):
        return m

    msg = await bot.wait_for('message', check=check)
    title = msg.content
    mydb = mysql.connector.connect(host="localhost",
                                   user="******",
                                   passwd=password,
                                   database="Punishing Gray Raven",
                                   auth_plugin='mysql_native_password')
    mycursor = mydb.cursor()
    mycursor.execute("SELECT * FROM Memory where Name = '{}';".format(title))
    myresult = mycursor.fetchall()
    name = myresult[0][0]
    link = myresult[0][1]
    uClient = ur(link)
    page_html = uClient.read()
    uClient.close()
    page_soup = soup(page_html, "html.parser")
    con = page_soup.findAll("table", {"class": "wikitable"})
    ans = con[0].findAll("img", {"class": "img-kk"})
    url = ans[0]["src"]
    rarity = myresult[0][2]
    two_piece_eff = myresult[0][3]
    four_piece_eff = myresult[0][4]
    hp = myresult[0][5]
    crit = myresult[0][6]
    atk = myresult[0][7]
    Def = myresult[0][8]
    rr = ""
    for i in range(rarity):
        rr += "✰"
    embed = discord.Embed(title=name, description=" Details", color=0x00ff00)
    embed.set_thumbnail(url=url)
    embed.add_field(name="Rarity", value=rr, inline=False)
    embed.add_field(name="2-piece effect", value=two_piece_eff, inline=False)
    embed.add_field(name="4-piece effect", value=four_piece_eff, inline=False)
    embed.add_field(name="HP", value=hp, inline=True)
    embed.add_field(name="CRIT", value=crit, inline=True)
    embed.add_field(name="ATK", value=atk, inline=True)
    embed.add_field(name="DEF", value=Def, inline=True)
    await channel.send(embed=embed)
Example #8
0
        t2 = soup.findAll("div", {"class": "location"})
        t3 = soup.findAll("span", {"class": "company"})

        for link, loc, comp in zip(t1, t2, t3):
            absolute_link = urljoin(url, link.get("href"))
            company = comp.text.strip()
            title = link.get("title")
            location = loc.text
            if (company in companies and title
                    in titles):  # get rids of duplicate/spam job posting
                continue
            titles.append(title)
            companies.append(company)
            signal.alarm(5)
            try:
                uClient = ur(absolute_link)  #opens site, and gets page
                pageText = uClient.read()  # html
                uClient.close()  #closes sites
                linkCount += 1
                pageSoup = bs(pageText, "html.parser")  #html parser
                print(linkCount)
                intro = pageSoup.findAll("p")  # job description
                middle = pageSoup.findAll("li")  # tech
                for i in middle:
                    if (str(i)[3] == ' '):
                        pass
                    else:
                        line = (str(i)[4:-5]).lower()
                        line = re.sub(r"[^a-zA-Z0-9#]+", ' ', line)
                        for t1, t2 in now_next(line.split()):
                            if (t1 in tech and techFound[t1] == False):
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as ur

web_url = 'https://www.flipkart.com/search?q=boat+headphones&sid=0pm%2Cfcn&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_1_15_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_1_15_na_na_na&as-pos=1&as-type=RECENT&suggestionId=boat+headphones%7CHeadphones+%26+Earphones&requestId=493a648d-a9ab-4451-8dcd-0ad615e7ea65&as-searchtext=%20boat%20headphones'

with ur(web_url) as url:
    page_html = url.read()

soup = bs(page_html, 'html.parser')

containers = soup.findAll('div', {'class': '_3liAhj'})
print('Total count is ', len(containers))

#print(bs.prettify(containers[0]))

product_number = 1

for container in containers:

    name = container.div.img['alt']

    product_price = container.findAll('div', {'class': '_1vC4OE'})
    spliting_price = product_price[0].text.replace(',', '').split('₹')
    price = 'Rs.' + spliting_price[1]

    product_rating = container.findAll('div', {'class': 'hGSR34'})
    rating = product_rating[0].text

    print('Product No.:', product_number)
    print('Name       : ' + name)
    print('Price      : ' + price)
Example #10
0
from urllib.parse import urljoin

jobList = list()
job = input("Enter job title:")
jobSplit = job.split(" ")

if (len(jobSplit) == 2):
    url = "https://www.indeed.com/jobs?q={}+{}&start=".format(
        jobSplit[0], jobSplit[1])
elif (len(jobSplit) == 3):
    url = "https://www.indeed.com/jobs?q={}+{}+{}&start=".format(
        jobSplit[0], jobSplit[1], jobSplit[2])
else:
    raise Exception('Invalid job title')

uClient = ur(url)
pageText = uClient.read()
uClient.close()
pageSoup = bs(pageText, "html.parser")

avg = pageSoup.find(attrs={"id": "univsrch-salary-currentsalary"})
minimum = pageSoup.find(
    attrs={"class": "univsrch-sal-min univsrch-sal-caption float-left"})
maximum = pageSoup.find(
    attrs={"class": "univsrch-sal-max univsrch-sal-caption float-right"})

avgAmount = ""
minAmount = ""
maxAmount = ""

for i in avg.text:
Example #11
0
# -*- coding: utf-8 -*-
"""
Created on Mon Jul  9 19:35:29 2018

@author: Shanmukha
"""

from urllib.request import urlopen as ur
from bs4 import BeautifulSoup as BS
my_url='https://www.newegg.com/global/in/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description=graphics+card&ignorear=0&N=-1&isNodeId=1'
#opening up connection and grabing the information
uclient=ur(my_url)
page_html=uclient.read()
uclient.close()
#html parsing
page_soup=BS(page_html,"html.parser")
#grab each product information
containers=page_soup.findAll("div",{"class":"item-container"})
filename="scrap1_newegg.csv"
f=open(filename,"w")
headers="brand,product_name,shipping\n"
f.write(headers)
for container in containers:
    brand=container.div.div.a.img['title']
    title_container=container.findAll("a",{"class":"item-title"})
    product_name=title_container[0].text
    shipping_container=container.findAll("li",{"class":"price-ship"})
    shipping=shipping_container[0].text.strip()
    print("brand: "+ brand)
    print("product_name: "+ product_name)
    print("shipping:"+ shipping)
Example #12
0
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 27 14:02:56 2016

@author: SRINIVAS
"""




import simplejson
import urllib.request.Request as ur
import urllib.request.urlopen as ure
import urllib
url = "https://www.virustotal.com/vtapi/v2/file/rescan"
parameters = {"resource": "99017f6eebbac24f351415dd410d522d, 7896b9b34bdbedbe7bdc6d446ecb09d5",
              "apikey": "069ee921004cfe52f111a8c8d382ab20cd678b902ee303b8e0b9b7aa7b9053f7"}
data = urllib.urlencode(parameters)
req = ur(url, data)
response = ure(req)
json = response.read()
print(json)
Example #13
0
print(req)
print(req.text)
bs = BeautifulSoup(req.text, 'html5lib')
div = bs.find('div', id='js-item-start')
print(div)
ur(div.attrs['data-image'], fmat(name))
'''

for i in range(500):
    try:
        f = '/'.join(fmat(name).split('/')[:-1])
        if not os.path.isdir(f):
            os.makedirs(f)
        #ur(BeautifulSoup(r.get(fmat(url)).text, 'html5lib').find('div', id='js-item-start').attrs['data-image'], fmat(name))
        ur(
            BeautifulSoup(r.get(fmat(url)).text, 'html5lib').find(
                'picture', class_='item-comic-image').find('img').attrs['src'],
            fmat(name))
        print('saved to', fmat(name))
    except Exception as e:
        # raise e
        print(f'got {e}, trying a mod')
    finally:
        d = str(int(d) + 1)
        if int(d) > 31:
            d = '1'
            m = str(int(m) + 1)
            if int(m) > 12:
                m = '1'
                y = str(int(y) + 1)