def scrape(id): url = f"https://www.larvalabs.com/cryptopunks/details/{id}" soup = Soup.get(url) attributes = soup.find("a", {"href": "/cryptopunks/search?query"}, mode="list") attributes = [a.text for a in attributes] attributes = " + ".join(attributes) return pd.DataFrame({"id": id, "attributes": attributes}, index=[0])
def scrape(id): url = f"https://www.larvalabs.com/cryptopunks/details/{id}" soup = Soup.get(url) attributes = soup.find("a", {"href": "/cryptopunks/search?query"}, mode="list") attributes = [a.text for a in attributes] trs = soup.find("tr")[1:] transactions = [tr_to_dict(tr) for tr in trs] return dict(id=id, attributes=attributes, transactions=transactions)
def future(symbol): url = f"https://www.hsx.com/security/view/{symbol}" soup = Soup.get(url) lis = soup.find("ul", {"class": "credit"}).find("li") symbols = [extract_symbol(li) for li in lis][:5] prices = [] for symbol in tqdm(symbols): prices.append(price(symbol)) time.sleep(random.uniform(1, 10) / 10) return round(sum(prices) / len(prices), 2)
def scrape_data_for(*, date=None, week=None): if (date and week) or (not date and not week): raise Exception("Choose one of date= or week=") if date: week = nfl_week(date) url = "https://www.fantasysharks.com/apps/bert/stats/points.php" segment = week + 691 params = {"League": -1, "Position": 99, "scoring": 13, "Segment": segment} soup = Soup.get(url, params) trs = soup.find("table", {"id": "toolData"}).find("tr") data = parse_all_trs(trs) df = pd.DataFrame(data) df['week'] = week df['fetched_at'] = pd.Timestamp("now") return df
def _make_soup(query: str, page: int = 1) -> Soup: params = {"q": query, "commit": "Search", "page": page} soup = Soup.get(URL, params) return soup
import random import time import json from itertools import cycle from collections import Counter from pathlib import Path import requests from gazpacho import Soup from tqdm import tqdm # get proxies url = "https://free-proxy-list.net/" soup = Soup.get(url) table = soup.find("table", {"id": "proxylisttable"}) trs = table.find("tr")[1:-1] tr = trs[0] proxy = ":".join([td.text for td in tr.find("td")[:2]]) def parse(tr): proxy = ":".join([td.text for td in tr.find("td")[:2]]) return proxy proxies = [parse(tr) for tr in trs] # scrape functions def save_html(id, proxy):
def price(symbol): url = f"https://www.hsx.com/security/view/{symbol}" soup = Soup.get(url) value = float(soup.find("p", {'class': "value"}).text[2:]) return value