def get_nft_of_the_day() -> pd.DataFrame: """Scrapes data about nft of the day. [Source: CoinGecko] Returns ------- pandas.DataFrame metric, value """ url = "https://www.coingecko.com/en/nft" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() row = scraped_data.find("div", class_="tw-px-4 tw-py-5 sm:tw-p-6") try: *author, description, _ = clean_row(row) if len(author) > 3: author, description = author[:3], author[3] except (ValueError, IndexError): return pd.DataFrame() df = (pd.Series({ "author": " ".join(author), "desc": description, "url": GECKO_BASE_URL + row.find("a")["href"], "img": row.find("img")["src"], }).to_frame().reset_index()) df.columns = ["Metric", "Value"] df["Metric"] = df["Metric"].apply( lambda x: replace_underscores_in_column_names(x) if isinstance(x, str) else x) df = wrap_text_in_df(df, w=100) return df
def get_top_volume_coins() -> pd.DataFrame: """Scrapes top coins by trading volume "https://www.coingecko.com/en/coins/high_volume" Returns ------- pandas.DataFrame Rank, Name, Symbol, Price, Change_1h, Change_24h, Change_7d, Volume_24h, Market_Cap """ columns = [ "Rank", "Name", "Symbol", "Price", "Change_1h", "Change_24h", "Change_7d", "Volume_24h", "Market_Cap", ] url = "https://www.coingecko.com/en/coins/high_volume" rows = scrape_gecko_data(url).find("tbody").find_all("tr") results = [] for row in rows: row_cleaned = clean_row(row) if len(row_cleaned) == 9: row_cleaned.insert(0, "?") row_cleaned.pop(3) results.append(row_cleaned) df = replace_qm(pd.DataFrame(results, columns=columns)) df.drop("Rank", axis=1, inplace=True) create_df_index(df, "Rank") df["Price"] = df["Price"].apply(lambda x: float(x.strip("$").replace(",", ""))) return df
def get_nft_market_status() -> pd.DataFrame: """Scrapes overview data of nft markets from "https://www.coingecko.com/en/nft" [Source: CoinGecko] Returns ------- pandas.DataFrame Metric, Value """ url = "https://www.coingecko.com/en/nft" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find_all("span", class_="overview-box d-inline-block p-3 mr-2") kpis = {} for row in rows: value, *kpi = clean_row(row) name = " ".join(kpi) kpis[name] = value df = pd.Series(kpis).to_frame().reset_index() df.columns = ["Metric", "Value"] return df
def get_holdings_overview(endpoint: str = "bitcoin") -> pd.DataFrame: """Scrapes overview of public companies that holds ethereum or bitcoin from "https://www.coingecko.com/en/public-companies-{bitcoin/ethereum}" [Source: CoinGecko] Parameters ---------- endpoint : str "bitcoin" or "ethereum" Returns ------- pandas.DataFrame Metric, Value """ url = f"https://www.coingecko.com/en/public-companies-{endpoint}" rows = scrape_gecko_data(url).find_all( "span", class_="overview-box d-inline-block p-3 mr-2") kpis = {} for row in rows: row_cleaned = clean_row(row) if row_cleaned: value, *kpi = row_cleaned name = " ".join(kpi) kpis[name] = value df = pd.Series(kpis).to_frame().reset_index() df.columns = ["Metric", "Value"] df["Metric"] = df["Metric"].apply( lambda x: replace_underscores_in_column_names(x) if isinstance(x, str) else x) return df
def get_gainers_or_losers(period: str = "1h", typ: str = "gainers") -> pd.DataFrame: """Scrape data about top gainers - coins which gain the most in given period and top losers - coins that lost the most in given period of time. [Source: CoinGecko] Parameters ---------- period: str One from [1h, 24h, 7d, 14d, 30d, 60d, 1y] typ: str Either "gainers" or "losers" Returns ------- pandas.DataFrame Top Gainers / Top Losers - coins which gain/lost most in price in given period of time. Columns: Symbol, Name, Volume, Price, %Change_{period}, Url """ category = { "gainers": 0, "losers": 1, } if period not in PERIODS: raise ValueError( f"Wrong time period\nPlease chose one from list: {PERIODS.keys()}") url = f"https://www.coingecko.com/en/coins/trending{PERIODS.get(period)}" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find_all("tbody")[category.get(typ)].find_all("tr") results = [] for row in rows: url = GECKO_BASE_URL + row.find("a")["href"] symbol, name, *_, volume, price, change = clean_row(row) try: change = percent_to_float(change) except (ValueError, TypeError) as e: print(e) results.append([symbol, name, volume, price, change, url]) df = pd.DataFrame( results, columns=[ "Symbol", "Name", "Volume", "Price", f"%Change_{period}", "Url", ], ) df.index = df.index + 1 df.reset_index(inplace=True) df = df.rename(columns={"index": "Rank"}) df["Price"] = df["Price"].apply( lambda x: float(x.strip("$").replace(",", ""))) return df
def get_yield_farms() -> pd.DataFrame: """Scrapes yield farms data from "https://www.coingecko.com/en/yield-farming" Returns ------- pandas.DataFrame Rank, Name, Pool, Audits, Collateral, Value Locked, Return Year, Return Hour """ columns = [ "Rank", "Name", "Pool", "Audits", "Collateral", "Value_Locked", "Return_Year", ] url = "https://www.coingecko.com/en/yield-farming" rows = scrape_gecko_data(url).find("tbody").find_all("tr") results = [] for row in rows: row_cleaned = clean_row(row)[:-2] if " New" in row_cleaned: # find better way to fix it in future row_cleaned.remove(" New") if len(row_cleaned) == 7: row_cleaned.insert(2, None) ( rank, name, pool, *others, _, value_locked, apy1, _, # hourly removed for most cases it's 0.00 so it doesn't bring any value for user ) = row_cleaned auditors, collateral = collateral_auditors_parse(others) auditors = ", ".join(aud.strip() for aud in auditors) collateral = ", ".join(coll.strip() for coll in collateral) results.append( [ rank, name, pool, auditors, collateral, value_locked, apy1, ] ) df = pd.DataFrame(results, columns=columns).replace({"": None}) for col in ["Return_Year"]: df[col] = df[col].apply( lambda x: x.replace(" Yearly", "") if isinstance(x, str) else x ) df["Rank"] = df["Rank"].astype(int) df = wrap_text_in_df(df, w=30) return df
def get_stable_coins() -> pd.DataFrame: """Scrapes stable coins data from "https://www.coingecko.com/en/stablecoins" [Source: CoinGecko] Returns ------- pandas.DataFrame Rank, Name, Symbol, Price, Change_24h, Exchanges, Market_Cap, Change_30d, Url """ columns = [ "Rank", "Name", "Symbol", "Price", "Change_24h", "Exchanges", "Market_Cap", "Change_30d", "Url", ] url = "https://www.coingecko.com/en/stablecoins" rows = scrape_gecko_data(url).find("tbody").find_all("tr") results = [] for row in rows: link = GECKO_BASE_URL + row.find("a")["href"] row_cleaned = clean_row(row) if len(row_cleaned) == 8: row_cleaned.append(None) ( rank, name, *symbols, price, volume_24h, exchanges, market_cap, change_30d, ) = row_cleaned symbol = symbols[0] if symbols else symbols results.append([ rank, name, symbol, price, volume_24h, exchanges, market_cap, change_30d, link, ]) df = replace_qm(pd.DataFrame(results, columns=columns)) df.drop("Rank", axis=1, inplace=True) create_df_index(df, "Rank") df["Price"] = df["Price"].apply( lambda x: float(x.strip("$").replace(",", ""))) return df
def get_top_crypto_categories() -> pd.DataFrame: """Scrapes top crypto categories from "https://www.coingecko.com/en/categories" [Source: CoinGecko] Returns ------- pandas.DataFrame Rank, Name, Change_1h, Change_7d, Market_Cap, Volume_24h,Coins, Url """ columns = [ "Rank", "Name", "Change_1h", "Change_24h", "Change_7d", "Market_Cap", "Volume_24h", "Coins", "Url", ] url = "https://www.coingecko.com/en/categories" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: url = GECKO_BASE_URL + row.find("a")["href"] ( rank, *names, change_1h, change_24h, change_7d, market_cap, volume, n_of_coins, ) = row.text.strip().split() results.append([ rank, " ".join(names), change_1h, change_24h, change_7d, market_cap, volume, n_of_coins, url, ]) df = pd.DataFrame(results, columns=columns) df["Rank"] = df["Rank"].astype(int) return df
def get_top_defi_coins() -> pd.DataFrame: """Scrapes top decentralized finance coins "https://www.coingecko.com/en/defi" [Source: CoinGecko] Returns ------- pandas.DataFrame Top Decentralized Finance Coins Columns: Rank, Name, Symbol, Price, Change_1h, Change_24h, Change_7d, Volume_24h, Market_Cap, Url """ url = "https://www.coingecko.com/en/defi" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: row_cleaned = clean_row(row) row_cleaned.pop(2) url = GECKO_BASE_URL + row.find("a")["href"] row_cleaned.append(url) if len(row_cleaned) == 11: row_cleaned.insert(4, "?") results.append(row_cleaned) df = pd.DataFrame( results, columns=[ "Rank", "Name", "Symbol", "Price", "Change_1h", "Change_24h", "Change_7d", "Volume_24h", "Market_Cap", "Fully Diluted Market Cap", "Market Cap to TVL Ratio", "Url", ], ) df.drop( ["Fully Diluted Market Cap", "Market Cap to TVL Ratio"], axis=1, inplace=True, ) df["Rank"] = df["Rank"].astype(int) df["Price"] = df["Price"].apply( lambda x: float(x.strip("$").replace(",", ""))) return df
def get_news(n: int = 100) -> pd.DataFrame: """Scrapes news from "https://www.coingecko.com/en/news?page={}" [Source: CoinGecko] Parameters ---------- n: int Number of news, by default n=100, one page has 25 news, so 4 pages are scraped. Returns ------- pandas.DataFrame: Title, Author, Posted, Article """ n_of_pages = (math.ceil(n / 25) + 1) if n else 2 dfs = [] for page in range(1, n_of_pages): url = f"https://www.coingecko.com/en/news?page={page}" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find_all("article") results = [] for row in rows: header = row.find("header") link = header.find("a")["href"] text = [ t for t in header.text.strip().split("\n") if t not in ["", " "] ] article = row.find("div", class_="post-body").text.strip() title, *by_who = text author, posted = " ".join(by_who).split("(") posted = posted.strip().replace(")", "") results.append([title, author.strip(), posted, article, link]) dfs.append( pd.DataFrame( results, columns=[ "Title", "Author", "Posted", "Article", "Url", ], )) df = pd.concat(dfs, ignore_index=True).head(n) df.drop("Article", axis=1, inplace=True) df.index = df.index + 1 df.reset_index(inplace=True) df.rename(columns={"index": "Index"}, inplace=True) return df
def get_recently_added_coins() -> pd.DataFrame: """Scrape recently added coins on CoinGecko from "https://www.coingecko.com/en/coins/recently_added" [Source: CoinGecko] Returns ------- pandas.DataFrame Recently Added Coins on CoinGecko Columns: Name, Symbol, Price, Change_1h, Change_24h, Added """ columns = [ "Name", "Symbol", "Price", "Change_1h", "Change_24h", "Added", "Url", ] url = "https://www.coingecko.com/en/coins/recently_added" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: url = GECKO_BASE_URL + row.find("a")["href"] row_cleaned = clean_row(row) ( name, symbol, _, price, *changes, _, _volume, last_added, ) = row_cleaned change_1h, change_24h, _ = changes_parser(changes) results.append( [name, symbol, price, change_1h, change_24h, last_added, url]) df = replace_qm(pd.DataFrame(results, columns=columns)) df.index = df.index + 1 df.reset_index(inplace=True) df.rename(columns={"index": "Rank"}, inplace=True) df["Price"] = df["Price"].apply( lambda x: float(x.strip("$").replace(",", ""))) return df
def get_discovered_coins(category: str = "trending") -> pd.DataFrame: """Scrapes data from "https://www.coingecko.com/en/discover" [Source: CoinGecko] - Most voted coins - Most popular coins - Recently added coins - Most positive sentiment coins Parameters ---------- category: str - one from list: [trending, most_voted, positive_sentiment,recently_added, most_visited] Returns ------- pandas.DataFrame: Most voted, most trending, recently added, most positive sentiment coins. Columns: Name, Price_BTC, Price_USD, Url """ if category not in CATEGORIES: raise ValueError( f"Wrong category name\nPlease chose one from list: {CATEGORIES.keys()}" ) url = "https://www.coingecko.com/en/discover" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() popular = scraped_data.find_all( "div", class_="col-12 col-sm-6 col-md-6 col-lg-4")[CATEGORIES[category]] rows = popular.find_all("a") results = [] btc_price = get_btc_price() for row in rows: name, *_, price = clean_row(row) url = GECKO_BASE_URL + row["href"] if price.startswith("BTC"): price = price.replace("BTC", "").replace(",", ".") price_usd = (int(btc_price) * float(price)) if btc_price else None results.append([name, price, price_usd, url]) return pd.DataFrame( results, columns=[ "Name", "Price_BTC", "Price_USD", "Url", ], )
def get_top_dexes() -> pd.DataFrame: """Scrapes top decentralized exchanges from "https://www.coingecko.com/en/dex" [Source: CoinGecko] Returns ------- pandas.DataFrame Top Decentralized Crypto Exchanges Columns: Name, Rank, Volume_24h, Coins, Pairs, Visits, Most_Traded, Market_Share """ columns = [ "Name", "Rank", "Volume_24h", "Coins", "Pairs", "Visits", "Most_Traded", "Market_Share", ] url = "https://www.coingecko.com/en/dex" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: row_cleaned = clean_row(row) if " Trading Incentives" in row_cleaned: row_cleaned.remove(" Trading Incentives") if len(row_cleaned) == 8: row_cleaned.insert(-3, "N/A") results.append(row_cleaned) df = pd.DataFrame(results) df["Name"] = df.iloc[:, 1] + " " + df.iloc[:, 2].replace("N/A", "") df.drop(df.columns[1:3], axis=1, inplace=True) df = swap_columns(df) df.columns = columns df["Most_Traded"] = ( df["Most_Traded"].apply(lambda x: x.split("$")[0]).str.replace( ",", "", regex=True).str.replace(".", "", regex=True)) df["Most_Traded"] = df["Most_Traded"].apply(lambda x: None if x.isdigit() else x) df["Rank"] = df["Rank"].astype(int) df.set_index("Rank", inplace=True) return df.reset_index()
def get_top_nfts() -> pd.DataFrame: """Scrapes top nfts from "https://www.coingecko.com/en/nft" [Source: CoinGecko] Returns ------- pandas.DataFrame Top NFTs (Non-Fungible Tokens) Columns: Rank, Name, Symbol, Price, Change_1d, Change_24h, Change_7d, Market_Cap, Url """ url = "https://www.coingecko.com/en/nft" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: link = GECKO_BASE_URL + row.find("a")["href"] row_cleaned = clean_row(row) if len(row_cleaned) == 9: row_cleaned.insert(5, "N/A") row_cleaned.append(link) row_cleaned.pop(3) results.append(row_cleaned) df = pd.DataFrame( results, columns=[ "Rank", "Name", "Symbol", "Price", "Change_1h", "Change_24h", "Change_7d", "Volume_24h", "Market_Cap", "Url", ], ) df["Rank"] = df["Rank"].astype(int) df["Price"] = df["Price"].apply(lambda x: x.strip("$").replace(",", "")) return df
def get_companies_assets(endpoint: str = "bitcoin") -> pd.DataFrame: """Scrapes list of companies that holds ethereum or bitcoin from "https://www.coingecko.com/en/public-companies-{bitcoin/ethereum}" [Source: CoinGecko] Parameters ---------- endpoint : str "bitcoin" or "ethereum" Returns ------- pandas.DataFrame Rank, Company, Ticker, Country, Total_Btc, Entry_Value, Today_Value, Pct_Supply, Url """ url = f"https://www.coingecko.com/en/public-companies-{endpoint}" try: scraped_data = scrape_gecko_data(url) except RetryError as e: print(e) return pd.DataFrame() rows = scraped_data.find("tbody").find_all("tr") results = [] for row in rows: link = row.find("a")["href"] row_cleaned = clean_row(row) row_cleaned.append(link) results.append(row_cleaned) df = pd.DataFrame( results, columns=[ "Rank", "Company", "Ticker", "Country", "Total_Btc", "Entry_Value", "Today_Value", "Pct_Supply", "Url", ], ) return df