Example #1
0
def init(branch=dolthub_branch):
    # check if repo already exists in our cwd
    cwd = os.getcwd()
    path = os.path.join(cwd, dolthub_repo)
    if os.path.isdir(path):
        print(' [*] DoltHub Repo found in ./{}, re-initializing'.format(dolthub_repo))
        dolt = Dolt(path)
        # make sure the data isn't stale and pull new data
        print(' [*] Performing `dolt pull` to ensure repo is up to date')
        # check what branch we have
        b = Dolt.branch(dolt)[0].name
        print('   [*] Current Branch: {}'.format(b))
        # if we are not on the branch passed, then switch
        if b != branch:
            try:
                print('   [*] Checking out branch: {}'.format(branch))
                Dolt.checkout(dolt, branch=branch)
                # recheck the branch
                b = Dolt.branch(dolt)[0].name
                print('   [*] Current Branch: {}'.format(b))
            except:
                pass

        p = Dolt.pull(dolt)
        s = Dolt.status(dolt)
        
        print('   [*] Current Status: {}'.format(s))
    else:
        # clone the database from DoltHub, save it into a var to be referenced for read/write purposes
        print(' [*] Cloning DoltHub Repo: {} into ./{}'.format(dolthub_fullrepo, dolthub_repo))
        dolt = Dolt.clone(dolthub_fullrepo, branch=branch)
        b = Dolt.branch(dolt)[0].name
        print('   [*] Current Branch: {}'.format(b))
    return dolt
Example #2
0
from doltpy.cli import Dolt

from doltpy.cli.read import read_pandas
import sqlalchemy
import pandas as pd
import sys
from sqlalchemy import create_engine
from tqdm import tqdm

engine = create_engine("mysql://*****:*****@ip:3306/data?charset=utf8")

repo = Dolt('hospital-price-transparency')

query = "SELECT code, short_description, long_description from cpt_hcpcs WHERE (short_description like '%%blood%%' and " \
        "short_description like '%%transfusion%%')         or (long_description like '%%blood%%' and             " \
        "long_description like '%%transfusion%%'); "


def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


def insert_with_progress(df, table_name):
    chunksize = int(len(df) / 10)
    with tqdm(total=len(df)) as pbar:
        for i in range(10):
            pos = chunksize * i
            cdf = df.iloc[pos:pos + chunksize, :]
            cdf.to_sql(name=table_name,
                       con=engine,
                       if_exists="append",
from nba_api.stats.static import players

from nba_api.stats.endpoints import playercareerstats

table_map = {
    'CareerTotalsAllStarSeason': 'career_totals_allstar',
    'CareerTotalsPostSeason': 'career_totals_post_season',
    'CareerTotalsRegularSeason': 'career_totals_regular_season',
    'SeasonRankingsPostSeason': 'rankings_post_season',
    'SeasonRankingsRegularSeason': 'rankings_regular_season',
    'SeasonTotalsAllStarSeason': 'season_totals_allstar',
    'SeasonTotalsPostSeason': 'season_totals_post_season',
    'SeasonTotalsRegularSeason': 'season_totals_regular_season'
}

repo = Dolt('.')

# Import players
players_df = pandas.DataFrame(players.get_players())

print(players_df)

write_pandas(repo,
             'players',
             players_df,
             import_mode='replace',
             primary_key=['id'],
             commit=False)

# Import previously downloaded stats
count = 1