def init(branch=dolthub_branch): # check if repo already exists in our cwd cwd = os.getcwd() path = os.path.join(cwd, dolthub_repo) if os.path.isdir(path): print(' [*] DoltHub Repo found in ./{}, re-initializing'.format(dolthub_repo)) dolt = Dolt(path) # make sure the data isn't stale and pull new data print(' [*] Performing `dolt pull` to ensure repo is up to date') # check what branch we have b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) # if we are not on the branch passed, then switch if b != branch: try: print(' [*] Checking out branch: {}'.format(branch)) Dolt.checkout(dolt, branch=branch) # recheck the branch b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) except: pass p = Dolt.pull(dolt) s = Dolt.status(dolt) print(' [*] Current Status: {}'.format(s)) else: # clone the database from DoltHub, save it into a var to be referenced for read/write purposes print(' [*] Cloning DoltHub Repo: {} into ./{}'.format(dolthub_fullrepo, dolthub_repo)) dolt = Dolt.clone(dolthub_fullrepo, branch=branch) b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) return dolt
from doltpy.cli import Dolt from doltpy.cli.read import read_pandas import sqlalchemy import pandas as pd import sys from sqlalchemy import create_engine from tqdm import tqdm engine = create_engine("mysql://*****:*****@ip:3306/data?charset=utf8") repo = Dolt('hospital-price-transparency') query = "SELECT code, short_description, long_description from cpt_hcpcs WHERE (short_description like '%%blood%%' and " \ "short_description like '%%transfusion%%') or (long_description like '%%blood%%' and " \ "long_description like '%%transfusion%%'); " def chunker(seq, size): return (seq[pos:pos + size] for pos in range(0, len(seq), size)) def insert_with_progress(df, table_name): chunksize = int(len(df) / 10) with tqdm(total=len(df)) as pbar: for i in range(10): pos = chunksize * i cdf = df.iloc[pos:pos + chunksize, :] cdf.to_sql(name=table_name, con=engine, if_exists="append",
from nba_api.stats.static import players from nba_api.stats.endpoints import playercareerstats table_map = { 'CareerTotalsAllStarSeason': 'career_totals_allstar', 'CareerTotalsPostSeason': 'career_totals_post_season', 'CareerTotalsRegularSeason': 'career_totals_regular_season', 'SeasonRankingsPostSeason': 'rankings_post_season', 'SeasonRankingsRegularSeason': 'rankings_regular_season', 'SeasonTotalsAllStarSeason': 'season_totals_allstar', 'SeasonTotalsPostSeason': 'season_totals_post_season', 'SeasonTotalsRegularSeason': 'season_totals_regular_season' } repo = Dolt('.') # Import players players_df = pandas.DataFrame(players.get_players()) print(players_df) write_pandas(repo, 'players', players_df, import_mode='replace', primary_key=['id'], commit=False) # Import previously downloaded stats count = 1