Beispiel #1
0
def init(branch=dolthub_branch):
    # check if repo already exists in our cwd
    cwd = os.getcwd()
    path = os.path.join(cwd, dolthub_repo)
    if os.path.isdir(path):
        print(' [*] DoltHub Repo found in ./{}, re-initializing'.format(dolthub_repo))
        dolt = Dolt(path)
        # make sure the data isn't stale and pull new data
        print(' [*] Performing `dolt pull` to ensure repo is up to date')
        # check what branch we have
        b = Dolt.branch(dolt)[0].name
        print('   [*] Current Branch: {}'.format(b))
        # if we are not on the branch passed, then switch
        if b != branch:
            try:
                print('   [*] Checking out branch: {}'.format(branch))
                Dolt.checkout(dolt, branch=branch)
                # recheck the branch
                b = Dolt.branch(dolt)[0].name
                print('   [*] Current Branch: {}'.format(b))
            except:
                pass

        p = Dolt.pull(dolt)
        s = Dolt.status(dolt)
        
        print('   [*] Current Status: {}'.format(s))
    else:
        # clone the database from DoltHub, save it into a var to be referenced for read/write purposes
        print(' [*] Cloning DoltHub Repo: {} into ./{}'.format(dolthub_fullrepo, dolthub_repo))
        dolt = Dolt.clone(dolthub_fullrepo, branch=branch)
        b = Dolt.branch(dolt)[0].name
        print('   [*] Current Branch: {}'.format(b))
    return dolt
Beispiel #2
0
def new_branch(dolt, intake):
    # generates a modifier at the end
    # city-protect-import-py-5d365f9b
    new_branch = 'city-protect-import-py-{}'.format(
        str(uuid.uuid4()).split('-')[0])
    try:
        print('   [*] Creating new Branch: {}'.format(new_branch))
        b = Dolt.branch(dolt,
                        branch_name=dolthub_branch,
                        new_branch=new_branch,
                        copy=True)
        b2 = Dolt.branch(intake,
                         branch_name=intake_branch,
                         new_branch=new_branch,
                         copy=True)
        #print(b)
        Dolt.checkout(dolt, branch=new_branch)
        Dolt.checkout(intake, branch=new_branch)
        print('   [*] Checking out new branch...')
        # recheck the branch
        b = Dolt.branch(dolt)[0].name
        b2 = Dolt.branch(intake)[0].name
        print('   [*] Current Branch: {} / {}'.format(b, b2))
    except:
        print('   [!] FATAL ERROR: CANNOT CHECKOUT NEW BRANCH. ABORTING')
        sys.exit()
Beispiel #3
0
def commit(dolt):
    branch = Dolt.branch(dolt)[0].name
    print('  [*] Commiting changes to {}'.format(branch))
    # make sure we are in our custom branch and not a main one
    if 'city-protect' in branch:
        dolt.remote(name='origin', url=dolthub_fullrepo)
        dolt.add(['data_incident_reports', 'datasets'])
        dolt.commit('Data Added from cityprotect_load.py ETL Script')
        dolt.push('origin', branch, set_upstream='origin')
        print('  [*] Done!')
    else:
        print(  '[!] ERROR: Cannot push to main branch. Aborting. Use dolt cli to migrate and finalize commit')
Beispiel #4
0
def _import_helper(
    dolt: Dolt,
    table: str,
    write_import_file: Callable[[str], None],
    import_mode: Optional[str] = None,
    primary_key: Optional[List[str]] = None,
    commit: Optional[bool] = False,
    commit_message: Optional[str] = None,
    commit_date: Optional[datetime.datetime] = None,
) -> None:
    import_mode = _get_import_mode_and_flags(dolt, table, import_mode)
    logger.info(f"Importing to table {table} in dolt directory located in {dolt.repo_dir()}, import mode {import_mode}")

    fname = tempfile.mktemp(suffix=".csv")
    import_flags = IMPORT_MODES_TO_FLAGS[import_mode]
    try:
        write_import_file(fname)
        args = ["table", "import", table] + import_flags
        if primary_key:
            args += ["--pk={}".format(",".join(primary_key))]

        dolt.execute(args + [fname])

        if commit:
            msg = commit_message or f"Committing write to table {table} in {import_mode} mode"
            dolt.add(table)
            dolt.commit(msg, date=commit_date)
    finally:
        if os.path.exists(fname):
            os.remove(fname)
Beispiel #5
0
def _get_import_mode_and_flags(dolt: Dolt, table: str, import_mode: Optional[str] = None) -> str:
    import_modes = IMPORT_MODES_TO_FLAGS.keys()
    if import_mode and import_mode not in import_modes:
        raise ValueError(f"update_mode must be one of: {import_modes}")
    else:
        if table in [table.name for table in dolt.ls()]:
            logger.info(f'No import mode specified, table exists, using "{UPDATE}"')
            import_mode = UPDATE
        else:
            logger.info(f'No import mode specified, table exists, using "{CREATE}"')
            import_mode = CREATE

    return import_mode
Beispiel #6
0
def _add_test_table(dolt: Dolt, table_name: str):
    dolt.sql(query=f'''
        CREATE TABLE `{table_name}` (
            `name` VARCHAR(32),
            `adjective` VARCHAR(32),
            `id` INT NOT NULL,
            `date_of_death` DATETIME,
            PRIMARY KEY (`id`)
        );
    ''')
    dolt.add(table_name)
    dolt.commit('Created test table')
Beispiel #7
0
def sql_server():
    p = None
    d = tempfile.TemporaryDirectory()
    try:
        db_path = os.path.join(d.name, "tracks")
        db = Dolt.init(db_path)
        db.sql("create table tracks (TrackId bigint, Name text)")
        db.sql(
            "insert into tracks values (0, 'Sue'), (1, 'L'), (2, 'M'), (3, 'Ji'), (4, 'Po')"
        )
        db.sql("select dolt_commit('-am', 'Init tracks')")
        p = Popen(args=["dolt", "sql-server", "-l", "trace", "--port", "3307"],
                  cwd=db_path)
        time.sleep(.5)
        yield db
    finally:
        if p is not None:
            p.kill()
        if os.path.exists(d.name):
            shutil.rmtree(d.name)
Beispiel #8
0
from doltpy.cli import Dolt
from doltpy.sql import DoltSQLServerContext, ServerConfig
import sqlalchemy as sa
import os

cur_dir = os.path.dirname(os.path.realpath(__file__))
f = open(cur_dir + "/../../private/.mysql_password", "r")
mysql_password = f.read().strip("\n")

# Setup objects to represents source and target databases, start Dolt SQL Server
dolt = Dolt.clone('durst/csknow')
dssc = DoltSQLServerContext(dolt, ServerConfig())
dssc.start_server()
mysql_engine = sa.create_engine(
    '{dialect}://{user}:{password}@{host}:{port}/{database}'.format(
        dialect='mysql+mysqlconnector',
        user="******",
        password=mysql_password,
        host="localhost",
        port="3124",
        database="csknow"
    )
)

from doltpy.sql.sync import sync_schema_to_dolt, MYSQL_TO_DOLT_TYPE_MAPPING

sync_schema_to_dolt(mysql_engine,
                    dssc.engine,
                    {"players":"players", "rounds":"rounds", "ticks":"ticks", "player_at_tick":"player_at_tick", "spotted":"spotted", "weapon_fire":"weapon_fire", "kills":"kills", "hurt":"hurt", "grenades":"grenades", "flashed":"flashed", "grenade_trajectories":"grenade_trajectories", "plants":"plants", "defusals":"defusals", "explosions":"explosions"},
                    MYSQL_TO_DOLT_TYPE_MAPPINGS)
Beispiel #9
0
from doltpy.cli import Dolt

from doltpy.cli.read import read_pandas
import sqlalchemy
import pandas as pd
import sys
from sqlalchemy import create_engine
from tqdm import tqdm

engine = create_engine("mysql://*****:*****@ip:3306/data?charset=utf8")

repo = Dolt('hospital-price-transparency')

query = "SELECT code, short_description, long_description from cpt_hcpcs WHERE (short_description like '%%blood%%' and " \
        "short_description like '%%transfusion%%')         or (long_description like '%%blood%%' and             " \
        "long_description like '%%transfusion%%'); "


def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


def insert_with_progress(df, table_name):
    chunksize = int(len(df) / 10)
    with tqdm(total=len(df)) as pbar:
        for i in range(10):
            pos = chunksize * i
            cdf = df.iloc[pos:pos + chunksize, :]
            cdf.to_sql(name=table_name,
                       con=engine,
                       if_exists="append",
Beispiel #10
0
def _read_table_sql(dolt: Dolt, sql: str) -> List[dict]:
    return dolt.sql(sql, result_format="csv")
Beispiel #11
0
def _write_helper(dolt: Dolt, data: List[dict], update_type: str):
    write_rows(dolt, TEST_TABLE, data, update_type, ['id'], commit=True)
    commit_hash, _ = dolt.log().popitem(last=False)
    return dolt, commit_hash
from nba_api.stats.static import players

from nba_api.stats.endpoints import playercareerstats

table_map = {
    'CareerTotalsAllStarSeason': 'career_totals_allstar',
    'CareerTotalsPostSeason': 'career_totals_post_season',
    'CareerTotalsRegularSeason': 'career_totals_regular_season',
    'SeasonRankingsPostSeason': 'rankings_post_season',
    'SeasonRankingsRegularSeason': 'rankings_regular_season',
    'SeasonTotalsAllStarSeason': 'season_totals_allstar',
    'SeasonTotalsPostSeason': 'season_totals_post_season',
    'SeasonTotalsRegularSeason': 'season_totals_regular_season'
}

repo = Dolt('.')

# Import players
players_df = pandas.DataFrame(players.get_players())

print(players_df)

write_pandas(repo,
             'players',
             players_df,
             import_mode='replace',
             primary_key=['id'],
             commit=False)

# Import previously downloaded stats
count = 1
Beispiel #13
0
import logging

logger = logging.getLogger()
logger.setLevel(logging.WARNING)

from doltpy.cli import Dolt
from doltpy.cli.write import write_pandas
import pandas as pd

if __name__ == "__main__":
    dolt = Dolt.init("foo")

    df_v1 = pd.DataFrame({"A": [1, 1, 1], "B": [1, 1, 1]})
    df_v2 = pd.DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": [1, 1, 1, 2, 2, 2]})

    write_pandas(dolt=dolt,
                 table="bar",
                 df=df_v1.reset_index(),
                 primary_key=["index"],
                 import_mode="create")
    dolt.add("bar")
    dolt.commit("Initialize bar")

    v1 = list(dolt.log(number="1").keys())[0]

    write_pandas(dolt=dolt,
                 table="bar",
                 df=df_v2.reset_index(),
                 primary_key=["index"],
                 import_mode="update")
    dolt.add("bar")