Beispiel #1
0
def loader(loader_builder: DoltLoaderBuilder, dolt_dir: str, clone: bool,
           push: bool, remote_name: str, dry_run: bool, remote_url: str):
    if clone:
        assert remote_url, 'If clone is True then remote must be passed'
        temp_dir = tempfile.mkdtemp()
        logger.info('Clone is set to true, so ignoring dolt_dir')
        repo = Dolt(temp_dir)
        if clone:
            logger.info(
                'Clone set to True, cloning remote {}'.format(remote_url))
        repo.clone(remote_url)
    else:
        assert os.path.exists(os.path.join(
            dolt_dir, '.dolt')), 'Repo must exist locally if not cloned'
        repo = Dolt(dolt_dir)

    logger.info('''Commencing to load to DoltHub with the following options:
                        - dolt_dir  {dolt_dir}
                        - clone     {clone}
                        - remote    {remote}
                        - push      {push}
        '''.format(dolt_dir=repo.repo_dir,
                   push=push,
                   clone=clone,
                   remote=remote_name))

    if not dry_run:
        loaders = loader_builder()
        for dolt_loader in loaders:
            branch = dolt_loader(repo)
            if push:
                logger.info('Pushing changes to remote {} on branch {}'.format(
                    remote_name, branch))
                repo.push(remote_name, branch)
Beispiel #2
0
def load_to_dolthub(loader_or_loaders: Union[DoltLoader, List[DoltLoader]],
                    clone: bool,
                    push: bool,
                    remote_name: str,
                    remote_url: str,
                    dolt_dir: str = None,
                    dry_run: bool = False):
    """
    This function takes a `DoltLoaderBuilder`, repo and remote settings, and attempts to execute the loaders returned
    by the builder.
    :param loader_or_loaders:
    :param dolt_dir:
    :param clone:
    :param push:
    :param remote_name:
    :param dry_run:
    :param remote_url:
    :return:
    """
    if type(loader_or_loaders) == list:
        loaders = loader_or_loaders
    else:
        loaders = [loader_or_loaders]

    if clone:
        assert remote_url, 'If clone is True then remote must be passed'
        temp_dir = tempfile.mkdtemp()
        logger.info('Clone is set to true, so ignoring dolt_dir')
        if clone:
            logger.info(
                'Clone set to True, cloning remote {}'.format(remote_url))
        repo = Dolt.clone(remote_url, temp_dir)
    else:
        assert os.path.exists(os.path.join(
            dolt_dir, '.dolt')), 'Repo must exist locally if not cloned'
        repo = Dolt(dolt_dir)

    logger.info('''Commencing to load to DoltHub with the following options:
                        - dolt_dir  {dolt_dir}
                        - clone     {clone}
                        - remote    {remote}
                        - push      {push}
        '''.format(dolt_dir=repo.repo_dir,
                   push=push,
                   clone=clone,
                   remote=remote_name))

    if not dry_run:
        for dolt_loader in loaders:
            branch = dolt_loader(repo)
            if push:
                logger.info('Pushing changes to remote {} on branch {}'.format(
                    remote_name, branch))
                repo.push(remote_name, branch)
Beispiel #3
0
        tweet: List[dict] = repo.sql(query=random_tweet_query, result_format='csv')
        tweet_text = OnscreenText(text=tweet[0]["text"], pos=(-0.5, 0.02), scale=0.07)

    # Define a procedure to move the camera.
    def spin_camera_task(self, task):
        angle_degrees = task.time * 6.0
        angle_radians = angle_degrees * (pi / 180.0)
        self.camera.setPos(20 * sin(angle_radians), -20 * cos(angle_radians), 3)
        self.camera.setHpr(angle_degrees, 0, 0)

        return Task.cont


if __name__ == "__main__":
    working_directory: str = "working"
    tweets_directory: str = os.path.join(working_directory, "tweets")

    if not os.path.exists(working_directory):
        print("Creating Working Directory...")
        os.mkdir(working_directory)

    # TODO: Thread Me
    if not os.path.exists(tweets_directory):
        print("Cloning Tweets Repo...")
        repo: Dolt = Dolt.clone(remote_url="alexis-evelyn/presidential-tweets", new_dir=tweets_directory)
    else:
        repo: Dolt = Dolt(repo_dir=tweets_directory)

    app = MyApp(repo=repo)
    app.run()
Beispiel #4
0
    'CINTMED': 'med_cumulative_response_rate_internet',
    'CMED': 'med_cumulative_response_rate',
}

pks = ['geo_id']

data_url = 'https://www2.census.gov/programs-surveys/decennial/2020/data/2020map/2020/decennialrr2020.csv'
geo_url = 'https://www2.census.gov/programs-surveys/decennial/2020/data/2020map/2020/decennialrr2020_crosswalkfile.csv'

org = 'Liquidata'
repo_name = 'us-census-response-rates'

target = f'{org}/{repo_name}'

print(f'Cloning {target}')
repo = Dolt.clone(target, '.')

# Import GEO mapping table
outcsvfile = 'geo.csv'
print(f'Reading {geo_url}')
with urllib.request.urlopen(geo_url) as response, open(outcsvfile, "w") as outcsvhandle:
    csvreader = csv.reader(io.StringIO(response.read().decode('latin1')))
    csvwriter = csv.writer(outcsvhandle)

    header = next(csvreader)

    header_out = []
    for col in header:
        if geo_column_map.get(col):
            header_out.append(geo_column_map.get(col))
        else:
def scrape_document(url):
    headless_chrome = [
        CHROME, '--headless', '--disable-gpu', '--dump-dom',
        '--crash-dumps-dir=/tmp', url
    ]

    process = Popen(headless_chrome, stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()

    return output


repo_name = 'Liquidata/online-services'
root = '.'
repo = Dolt.clone(repo_name, root)

documents_df = read_table(repo, 'documents')
documents_df['terms_raw'] = documents_df['terms_raw'].astype(str)
documents_df['privacy_raw'] = documents_df['privacy_raw'].astype(str)

for index, row in documents_df.iterrows():
    print(f'Processing {index}')
    documents_df.at[index, 'terms_raw'] = scrape_document(row['terms_url'])
    documents_df.at[index, 'privacy_raw'] = scrape_document(row['privacy_url'])

import_df(repo, 'documents', documents_df, ['product_id'])

if repo.status().is_clean:
    print('No changes to repo. Exiting')
else: