def get_nft_of_the_day() -> pd.DataFrame:
    """Scrapes data about nft of the day. [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        metric, value
    """

    url = "https://www.coingecko.com/en/nft"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    row = scraped_data.find("div", class_="tw-px-4 tw-py-5 sm:tw-p-6")
    try:
        *author, description, _ = clean_row(row)
        if len(author) > 3:
            author, description = author[:3], author[3]
    except (ValueError, IndexError):
        return pd.DataFrame()
    df = (pd.Series({
        "author": " ".join(author),
        "desc": description,
        "url": GECKO_BASE_URL + row.find("a")["href"],
        "img": row.find("img")["src"],
    }).to_frame().reset_index())
    df.columns = ["Metric", "Value"]
    df["Metric"] = df["Metric"].apply(
        lambda x: replace_underscores_in_column_names(x)
        if isinstance(x, str) else x)
    df = wrap_text_in_df(df, w=100)
    return df
Example #2
0
def get_top_volume_coins() -> pd.DataFrame:
    """Scrapes top coins by trading volume "https://www.coingecko.com/en/coins/high_volume"

    Returns
    -------
    pandas.DataFrame
        Rank, Name, Symbol, Price, Change_1h, Change_24h, Change_7d, Volume_24h, Market_Cap
    """
    columns = [
        "Rank",
        "Name",
        "Symbol",
        "Price",
        "Change_1h",
        "Change_24h",
        "Change_7d",
        "Volume_24h",
        "Market_Cap",
    ]
    url = "https://www.coingecko.com/en/coins/high_volume"
    rows = scrape_gecko_data(url).find("tbody").find_all("tr")
    results = []
    for row in rows:
        row_cleaned = clean_row(row)
        if len(row_cleaned) == 9:
            row_cleaned.insert(0, "?")
        row_cleaned.pop(3)
        results.append(row_cleaned)
    df = replace_qm(pd.DataFrame(results, columns=columns))
    df.drop("Rank", axis=1, inplace=True)
    create_df_index(df, "Rank")
    df["Price"] = df["Price"].apply(lambda x: float(x.strip("$").replace(",", "")))
    return df
def get_nft_market_status() -> pd.DataFrame:
    """Scrapes overview data of nft markets from "https://www.coingecko.com/en/nft" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Metric, Value
    """

    url = "https://www.coingecko.com/en/nft"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find_all("span",
                                 class_="overview-box d-inline-block p-3 mr-2")
    kpis = {}
    for row in rows:
        value, *kpi = clean_row(row)
        name = " ".join(kpi)
        kpis[name] = value
    df = pd.Series(kpis).to_frame().reset_index()
    df.columns = ["Metric", "Value"]
    return df
def get_holdings_overview(endpoint: str = "bitcoin") -> pd.DataFrame:
    """Scrapes overview of public companies that holds ethereum or bitcoin
    from "https://www.coingecko.com/en/public-companies-{bitcoin/ethereum}" [Source: CoinGecko]

    Parameters
    ----------
    endpoint : str
        "bitcoin" or "ethereum"

    Returns
    -------
    pandas.DataFrame
        Metric, Value
    """

    url = f"https://www.coingecko.com/en/public-companies-{endpoint}"
    rows = scrape_gecko_data(url).find_all(
        "span", class_="overview-box d-inline-block p-3 mr-2")
    kpis = {}
    for row in rows:
        row_cleaned = clean_row(row)
        if row_cleaned:
            value, *kpi = row_cleaned
            name = " ".join(kpi)
            kpis[name] = value

    df = pd.Series(kpis).to_frame().reset_index()
    df.columns = ["Metric", "Value"]
    df["Metric"] = df["Metric"].apply(
        lambda x: replace_underscores_in_column_names(x)
        if isinstance(x, str) else x)
    return df
def get_gainers_or_losers(period: str = "1h",
                          typ: str = "gainers") -> pd.DataFrame:
    """Scrape data about top gainers - coins which gain the most in given period and
    top losers - coins that lost the most in given period of time. [Source: CoinGecko]

    Parameters
    ----------
    period: str
        One from [1h, 24h, 7d, 14d, 30d, 60d, 1y]
    typ: str
        Either "gainers" or "losers"
    Returns
    -------
    pandas.DataFrame
        Top Gainers / Top Losers - coins which gain/lost most in price in given period of time.
        Columns: Symbol, Name, Volume, Price, %Change_{period}, Url
    """

    category = {
        "gainers": 0,
        "losers": 1,
    }

    if period not in PERIODS:
        raise ValueError(
            f"Wrong time period\nPlease chose one from list: {PERIODS.keys()}")

    url = f"https://www.coingecko.com/en/coins/trending{PERIODS.get(period)}"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find_all("tbody")[category.get(typ)].find_all("tr")
    results = []
    for row in rows:
        url = GECKO_BASE_URL + row.find("a")["href"]
        symbol, name, *_, volume, price, change = clean_row(row)
        try:
            change = percent_to_float(change)
        except (ValueError, TypeError) as e:
            print(e)
        results.append([symbol, name, volume, price, change, url])
    df = pd.DataFrame(
        results,
        columns=[
            "Symbol",
            "Name",
            "Volume",
            "Price",
            f"%Change_{period}",
            "Url",
        ],
    )
    df.index = df.index + 1
    df.reset_index(inplace=True)
    df = df.rename(columns={"index": "Rank"})
    df["Price"] = df["Price"].apply(
        lambda x: float(x.strip("$").replace(",", "")))
    return df
Example #6
0
def get_yield_farms() -> pd.DataFrame:
    """Scrapes yield farms data from "https://www.coingecko.com/en/yield-farming"

    Returns
    -------
    pandas.DataFrame
        Rank, Name, Pool, Audits, Collateral, Value Locked, Return Year, Return Hour
    """
    columns = [
        "Rank",
        "Name",
        "Pool",
        "Audits",
        "Collateral",
        "Value_Locked",
        "Return_Year",
    ]
    url = "https://www.coingecko.com/en/yield-farming"
    rows = scrape_gecko_data(url).find("tbody").find_all("tr")
    results = []
    for row in rows:
        row_cleaned = clean_row(row)[:-2]
        if " New" in row_cleaned:  # find better way to fix it in future
            row_cleaned.remove(" New")

        if len(row_cleaned) == 7:
            row_cleaned.insert(2, None)
        (
            rank,
            name,
            pool,
            *others,
            _,
            value_locked,
            apy1,
            _,  # hourly removed for most cases it's 0.00 so it doesn't bring any value for user
        ) = row_cleaned
        auditors, collateral = collateral_auditors_parse(others)
        auditors = ", ".join(aud.strip() for aud in auditors)
        collateral = ", ".join(coll.strip() for coll in collateral)
        results.append(
            [
                rank,
                name,
                pool,
                auditors,
                collateral,
                value_locked,
                apy1,
            ]
        )
    df = pd.DataFrame(results, columns=columns).replace({"": None})
    for col in ["Return_Year"]:
        df[col] = df[col].apply(
            lambda x: x.replace(" Yearly", "") if isinstance(x, str) else x
        )
    df["Rank"] = df["Rank"].astype(int)
    df = wrap_text_in_df(df, w=30)
    return df
def get_stable_coins() -> pd.DataFrame:
    """Scrapes stable coins data from "https://www.coingecko.com/en/stablecoins" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Rank, Name, Symbol, Price, Change_24h, Exchanges, Market_Cap, Change_30d, Url
    """

    columns = [
        "Rank",
        "Name",
        "Symbol",
        "Price",
        "Change_24h",
        "Exchanges",
        "Market_Cap",
        "Change_30d",
        "Url",
    ]
    url = "https://www.coingecko.com/en/stablecoins"
    rows = scrape_gecko_data(url).find("tbody").find_all("tr")
    results = []
    for row in rows:
        link = GECKO_BASE_URL + row.find("a")["href"]
        row_cleaned = clean_row(row)
        if len(row_cleaned) == 8:
            row_cleaned.append(None)

        (
            rank,
            name,
            *symbols,
            price,
            volume_24h,
            exchanges,
            market_cap,
            change_30d,
        ) = row_cleaned
        symbol = symbols[0] if symbols else symbols
        results.append([
            rank,
            name,
            symbol,
            price,
            volume_24h,
            exchanges,
            market_cap,
            change_30d,
            link,
        ])
    df = replace_qm(pd.DataFrame(results, columns=columns))
    df.drop("Rank", axis=1, inplace=True)
    create_df_index(df, "Rank")
    df["Price"] = df["Price"].apply(
        lambda x: float(x.strip("$").replace(",", "")))
    return df
def get_top_crypto_categories() -> pd.DataFrame:
    """Scrapes top crypto categories from "https://www.coingecko.com/en/categories" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
       Rank, Name, Change_1h, Change_7d, Market_Cap, Volume_24h,Coins, Url
    """

    columns = [
        "Rank",
        "Name",
        "Change_1h",
        "Change_24h",
        "Change_7d",
        "Market_Cap",
        "Volume_24h",
        "Coins",
        "Url",
    ]
    url = "https://www.coingecko.com/en/categories"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []
    for row in rows:
        url = GECKO_BASE_URL + row.find("a")["href"]
        (
            rank,
            *names,
            change_1h,
            change_24h,
            change_7d,
            market_cap,
            volume,
            n_of_coins,
        ) = row.text.strip().split()
        results.append([
            rank,
            " ".join(names),
            change_1h,
            change_24h,
            change_7d,
            market_cap,
            volume,
            n_of_coins,
            url,
        ])

    df = pd.DataFrame(results, columns=columns)
    df["Rank"] = df["Rank"].astype(int)
    return df
def get_top_defi_coins() -> pd.DataFrame:
    """Scrapes top decentralized finance coins "https://www.coingecko.com/en/defi" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Top Decentralized Finance Coins
        Columns: Rank, Name, Symbol, Price, Change_1h, Change_24h, Change_7d, Volume_24h, Market_Cap, Url
    """

    url = "https://www.coingecko.com/en/defi"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []
    for row in rows:

        row_cleaned = clean_row(row)
        row_cleaned.pop(2)
        url = GECKO_BASE_URL + row.find("a")["href"]
        row_cleaned.append(url)
        if len(row_cleaned) == 11:
            row_cleaned.insert(4, "?")
        results.append(row_cleaned)

    df = pd.DataFrame(
        results,
        columns=[
            "Rank",
            "Name",
            "Symbol",
            "Price",
            "Change_1h",
            "Change_24h",
            "Change_7d",
            "Volume_24h",
            "Market_Cap",
            "Fully Diluted Market Cap",
            "Market Cap to TVL Ratio",
            "Url",
        ],
    )
    df.drop(
        ["Fully Diluted Market Cap", "Market Cap to TVL Ratio"],
        axis=1,
        inplace=True,
    )
    df["Rank"] = df["Rank"].astype(int)
    df["Price"] = df["Price"].apply(
        lambda x: float(x.strip("$").replace(",", "")))
    return df
def get_news(n: int = 100) -> pd.DataFrame:
    """Scrapes news from "https://www.coingecko.com/en/news?page={}" [Source: CoinGecko]

    Parameters
    ----------
    n: int
        Number of news, by default n=100, one page has 25 news, so 4 pages are scraped.
    Returns
    -------
    pandas.DataFrame:
        Title, Author, Posted, Article
    """

    n_of_pages = (math.ceil(n / 25) + 1) if n else 2
    dfs = []
    for page in range(1, n_of_pages):
        url = f"https://www.coingecko.com/en/news?page={page}"
        try:
            scraped_data = scrape_gecko_data(url)
        except RetryError as e:
            print(e)
            return pd.DataFrame()
        rows = scraped_data.find_all("article")
        results = []
        for row in rows:
            header = row.find("header")
            link = header.find("a")["href"]
            text = [
                t for t in header.text.strip().split("\n")
                if t not in ["", " "]
            ]
            article = row.find("div", class_="post-body").text.strip()
            title, *by_who = text
            author, posted = " ".join(by_who).split("(")
            posted = posted.strip().replace(")", "")
            results.append([title, author.strip(), posted, article, link])
        dfs.append(
            pd.DataFrame(
                results,
                columns=[
                    "Title",
                    "Author",
                    "Posted",
                    "Article",
                    "Url",
                ],
            ))
    df = pd.concat(dfs, ignore_index=True).head(n)
    df.drop("Article", axis=1, inplace=True)
    df.index = df.index + 1
    df.reset_index(inplace=True)
    df.rename(columns={"index": "Index"}, inplace=True)
    return df
def get_recently_added_coins() -> pd.DataFrame:
    """Scrape recently added coins on CoinGecko from "https://www.coingecko.com/en/coins/recently_added"
    [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Recently Added Coins on CoinGecko
        Columns: Name, Symbol, Price, Change_1h, Change_24h, Added
    """

    columns = [
        "Name",
        "Symbol",
        "Price",
        "Change_1h",
        "Change_24h",
        "Added",
        "Url",
    ]

    url = "https://www.coingecko.com/en/coins/recently_added"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []

    for row in rows:
        url = GECKO_BASE_URL + row.find("a")["href"]
        row_cleaned = clean_row(row)
        (
            name,
            symbol,
            _,
            price,
            *changes,
            _,
            _volume,
            last_added,
        ) = row_cleaned
        change_1h, change_24h, _ = changes_parser(changes)
        results.append(
            [name, symbol, price, change_1h, change_24h, last_added, url])
    df = replace_qm(pd.DataFrame(results, columns=columns))
    df.index = df.index + 1
    df.reset_index(inplace=True)
    df.rename(columns={"index": "Rank"}, inplace=True)
    df["Price"] = df["Price"].apply(
        lambda x: float(x.strip("$").replace(",", "")))
    return df
def get_discovered_coins(category: str = "trending") -> pd.DataFrame:
    """Scrapes data from "https://www.coingecko.com/en/discover" [Source: CoinGecko]
        - Most voted coins
        - Most popular coins
        - Recently added coins
        - Most positive sentiment coins

    Parameters
    ----------
    category: str
        - one from list: [trending, most_voted, positive_sentiment,recently_added, most_visited]
    Returns
    -------
    pandas.DataFrame:
        Most voted, most trending, recently added, most positive sentiment coins.
        Columns: Name, Price_BTC, Price_USD, Url
    """

    if category not in CATEGORIES:
        raise ValueError(
            f"Wrong category name\nPlease chose one from list: {CATEGORIES.keys()}"
        )
    url = "https://www.coingecko.com/en/discover"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    popular = scraped_data.find_all(
        "div",
        class_="col-12 col-sm-6 col-md-6 col-lg-4")[CATEGORIES[category]]
    rows = popular.find_all("a")
    results = []
    btc_price = get_btc_price()

    for row in rows:
        name, *_, price = clean_row(row)
        url = GECKO_BASE_URL + row["href"]
        if price.startswith("BTC"):
            price = price.replace("BTC", "").replace(",", ".")

        price_usd = (int(btc_price) * float(price)) if btc_price else None
        results.append([name, price, price_usd, url])
    return pd.DataFrame(
        results,
        columns=[
            "Name",
            "Price_BTC",
            "Price_USD",
            "Url",
        ],
    )
def get_top_dexes() -> pd.DataFrame:
    """Scrapes top decentralized exchanges from "https://www.coingecko.com/en/dex" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Top Decentralized Crypto Exchanges
        Columns: Name, Rank, Volume_24h, Coins, Pairs, Visits, Most_Traded, Market_Share
    """

    columns = [
        "Name",
        "Rank",
        "Volume_24h",
        "Coins",
        "Pairs",
        "Visits",
        "Most_Traded",
        "Market_Share",
    ]
    url = "https://www.coingecko.com/en/dex"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []
    for row in rows:
        row_cleaned = clean_row(row)
        if " Trading Incentives" in row_cleaned:
            row_cleaned.remove(" Trading Incentives")
        if len(row_cleaned) == 8:
            row_cleaned.insert(-3, "N/A")
        results.append(row_cleaned)
    df = pd.DataFrame(results)
    df["Name"] = df.iloc[:, 1] + " " + df.iloc[:, 2].replace("N/A", "")
    df.drop(df.columns[1:3], axis=1, inplace=True)
    df = swap_columns(df)
    df.columns = columns
    df["Most_Traded"] = (
        df["Most_Traded"].apply(lambda x: x.split("$")[0]).str.replace(
            ",", "", regex=True).str.replace(".", "", regex=True))
    df["Most_Traded"] = df["Most_Traded"].apply(lambda x: None
                                                if x.isdigit() else x)
    df["Rank"] = df["Rank"].astype(int)
    df.set_index("Rank", inplace=True)
    return df.reset_index()
def get_top_nfts() -> pd.DataFrame:
    """Scrapes top nfts from "https://www.coingecko.com/en/nft" [Source: CoinGecko]

    Returns
    -------
    pandas.DataFrame
        Top NFTs (Non-Fungible Tokens)
        Columns: Rank, Name, Symbol, Price, Change_1d, Change_24h, Change_7d, Market_Cap, Url
    """

    url = "https://www.coingecko.com/en/nft"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []
    for row in rows:
        link = GECKO_BASE_URL + row.find("a")["href"]
        row_cleaned = clean_row(row)
        if len(row_cleaned) == 9:
            row_cleaned.insert(5, "N/A")
        row_cleaned.append(link)
        row_cleaned.pop(3)
        results.append(row_cleaned)
    df = pd.DataFrame(
        results,
        columns=[
            "Rank",
            "Name",
            "Symbol",
            "Price",
            "Change_1h",
            "Change_24h",
            "Change_7d",
            "Volume_24h",
            "Market_Cap",
            "Url",
        ],
    )
    df["Rank"] = df["Rank"].astype(int)
    df["Price"] = df["Price"].apply(lambda x: x.strip("$").replace(",", ""))
    return df
def get_companies_assets(endpoint: str = "bitcoin") -> pd.DataFrame:
    """Scrapes list of companies that holds ethereum or bitcoin
    from "https://www.coingecko.com/en/public-companies-{bitcoin/ethereum}" [Source: CoinGecko]

    Parameters
    ----------
    endpoint : str
        "bitcoin" or "ethereum"

    Returns
    -------
    pandas.DataFrame
        Rank, Company, Ticker, Country, Total_Btc, Entry_Value, Today_Value, Pct_Supply, Url
    """

    url = f"https://www.coingecko.com/en/public-companies-{endpoint}"
    try:
        scraped_data = scrape_gecko_data(url)
    except RetryError as e:
        print(e)
        return pd.DataFrame()
    rows = scraped_data.find("tbody").find_all("tr")
    results = []
    for row in rows:
        link = row.find("a")["href"]
        row_cleaned = clean_row(row)
        row_cleaned.append(link)
        results.append(row_cleaned)
    df = pd.DataFrame(
        results,
        columns=[
            "Rank",
            "Company",
            "Ticker",
            "Country",
            "Total_Btc",
            "Entry_Value",
            "Today_Value",
            "Pct_Supply",
            "Url",
        ],
    )
    return df