예제 #1
0
#!/usr/bin/python2.7

import panda as pd

tables = pd.read_html("www.chico.com/gasprices")

print(tables[0])
예제 #2
0
def scrape():
    executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
    return Browser('chrome', **executable_path, headless=False)

    url = "https://mars.nasa.gov/news/"
    browser.visit(url)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    latestnews = soup.find("div", class_="content_title").text
    paragraphtext = soup.find("div", class_="article_teaser_body").text

    url_2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url_2)

    browser.find_by_id("full_image").click()
    time.sleep(5)

    html2 = browser.html
    soup2 = BeautifulSoup(html2, "html.parser")

    img_url = soup2.find("img", class_="fancybox-image")["src"]

    featured_image_url = "https://www.jpl.nasa.gov" + img_url

    url3 = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url3)
    html3 = browser.html
    soup3 = BeautifulSoup(html3, 'html.parser')

    mars_weather = soup3.find(
        "p",
        class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"
    ).text

    url_4 = "http://space-facts.com/mars/"
    browser.visit(url_4)
    html4 = browser.html
    soup4 = BeautifulSoup(html4, "html.parser")

    marsdata = pd.read_html(url_4)

    df_marsdata = marsdata[0]
    df_marsdata.columns = ["Mars_Profile", "Mars_ProfileValue"]
    df_marsdata.set_index("Mars_Profile", inplace=True)
    marsdata_html = df_marsdata.to_html(justify="left")

    url_5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url_5)

    html5 = browser.html
    soup5 = BeautifulSoup(html5, 'html.parser')
    mars_hemisphere = []

    for i in range(4):
        time.sleep(5)
        images = browser.find_by_tag("h3")
        images[i].click()
        html5 = browser.html
        soup5 = BeautifulSoup(html5, 'html.parser')
        partial = soup5.find("img", class_="wide-image")["src"]
        image_title = soup5.find("h2", class_="title").text
        img_url = "https://astrogeology.usgs.gov" + partial
        dictionary = {"title": image_title, "img_url": img_url}
        mars_hemisphere.append(dictionary)
        browser.back()

    mars_hemisphere_dict = []
예제 #3
0
.sin(arr)
.log(arr)

NP can do regular operation * / + - < > <= >= != 

Pandas use to convert different type data to table and use advance function to filter and remap the data

import panda as pd 
#read and write to csv file
df = pd.read_csv('file_name')
df = df.to_csv('example', index=False)
#Excel Input and output, beware of image in the excel file, it may cause it to crash
pd.read_excel('Excel_Sample.xlsx',sheetname='Sheet1')
pd.to_excel('excelname.xlsm', sheetname)
#Html Input
df = pd.read_html('http://.....html')
#Read Database in sql
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:momory:')
df.to_sql('data', engine)
sql_df = pd.read_sql('data', con=engine)

#Convert to DataFrames
df= pd.DataFrame(np.random(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())
df['W'] or df['W', 'Z'] #to call one or more column
#creating new columns
df['new'] = #whatever
#Removing columns
df.drop('nameofthecolumn',axis=1) #need to use "inplace=True" to make it inplace change
#selecting Rows
df.loc['A'] #using the name to location row
예제 #4
0
letter = 'abcdefghijklmnopqrstuvwxyz'

for num in (146, 148, 155, 160, 161, 166, 167):

    Wyckoff_positions_list = []
    Wyckoff_positions = {}

    driver.get('https://it.iucr.org/Ac/ch2o3v0001/sgtable2o3o{}/'.format(num))
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    table = soup.find_all('table', {'class': 'genpos'})[-1]
    table = table.find('table', {'class': 'genposcoords'})
    genpos = table.find('td', {'class': 'genposcoords'})
    genpos = re.findall(r"<i><i>(.*?)</i></i>", str(genpos))
    Wyckoff_positions_list.append(genpos)

    table = soup.find_all('table', {'class': 'specpos'})[-1]
    table = table.find_all('table', {'class': 'specposcoords'})
    for line in table:
        pos = pd.read_html(str(line))[0].iloc[0, 0].replace(u'\xa0',
                                                            u'').split(',')
        Wyckoff_positions_list.append(pos)
    n = len(Wyckoff_positions_list)
    for i in range(0, n):
        Wyckoff_positions[letter[n - 1 - i]] = Wyckoff_positions_list[i]

    with open('space_group_{}_Wyckoff_site_data.json'.format(num), 'w') as f:
        json.dump(Wyckoff_positions, f)
예제 #5
0
import panda as pd
from lxml import html

for x in range(1, 4009):
    url1 = "http://stats.espncricinfo.com/ci/engine/stats/index.html?class=11;page=" + str(x) + ";template=results;type=batting;view=innings"

    data1 = pd.read_html(url1)

    df1 = data1[2]
    df1.to_csv(path_or_buf='/Users/mohiulalamprince/work/python/cricstat/player-info-%s.csv' % '{:05d}'.format(x), sep=',')
    print str(x) + "  =>  [DONE]"