def loader(loader_builder: DoltLoaderBuilder, dolt_dir: str, clone: bool, push: bool, remote_name: str, dry_run: bool, remote_url: str): if clone: assert remote_url, 'If clone is True then remote must be passed' temp_dir = tempfile.mkdtemp() logger.info('Clone is set to true, so ignoring dolt_dir') repo = Dolt(temp_dir) if clone: logger.info( 'Clone set to True, cloning remote {}'.format(remote_url)) repo.clone(remote_url) else: assert os.path.exists(os.path.join( dolt_dir, '.dolt')), 'Repo must exist locally if not cloned' repo = Dolt(dolt_dir) logger.info('''Commencing to load to DoltHub with the following options: - dolt_dir {dolt_dir} - clone {clone} - remote {remote} - push {push} '''.format(dolt_dir=repo.repo_dir, push=push, clone=clone, remote=remote_name)) if not dry_run: loaders = loader_builder() for dolt_loader in loaders: branch = dolt_loader(repo) if push: logger.info('Pushing changes to remote {} on branch {}'.format( remote_name, branch)) repo.push(remote_name, branch)
def load_to_dolthub(loader_or_loaders: Union[DoltLoader, List[DoltLoader]], clone: bool, push: bool, remote_name: str, remote_url: str, dolt_dir: str = None, dry_run: bool = False): """ This function takes a `DoltLoaderBuilder`, repo and remote settings, and attempts to execute the loaders returned by the builder. :param loader_or_loaders: :param dolt_dir: :param clone: :param push: :param remote_name: :param dry_run: :param remote_url: :return: """ if type(loader_or_loaders) == list: loaders = loader_or_loaders else: loaders = [loader_or_loaders] if clone: assert remote_url, 'If clone is True then remote must be passed' temp_dir = tempfile.mkdtemp() logger.info('Clone is set to true, so ignoring dolt_dir') if clone: logger.info( 'Clone set to True, cloning remote {}'.format(remote_url)) repo = Dolt.clone(remote_url, temp_dir) else: assert os.path.exists(os.path.join( dolt_dir, '.dolt')), 'Repo must exist locally if not cloned' repo = Dolt(dolt_dir) logger.info('''Commencing to load to DoltHub with the following options: - dolt_dir {dolt_dir} - clone {clone} - remote {remote} - push {push} '''.format(dolt_dir=repo.repo_dir, push=push, clone=clone, remote=remote_name)) if not dry_run: for dolt_loader in loaders: branch = dolt_loader(repo) if push: logger.info('Pushing changes to remote {} on branch {}'.format( remote_name, branch)) repo.push(remote_name, branch)
tweet: List[dict] = repo.sql(query=random_tweet_query, result_format='csv') tweet_text = OnscreenText(text=tweet[0]["text"], pos=(-0.5, 0.02), scale=0.07) # Define a procedure to move the camera. def spin_camera_task(self, task): angle_degrees = task.time * 6.0 angle_radians = angle_degrees * (pi / 180.0) self.camera.setPos(20 * sin(angle_radians), -20 * cos(angle_radians), 3) self.camera.setHpr(angle_degrees, 0, 0) return Task.cont if __name__ == "__main__": working_directory: str = "working" tweets_directory: str = os.path.join(working_directory, "tweets") if not os.path.exists(working_directory): print("Creating Working Directory...") os.mkdir(working_directory) # TODO: Thread Me if not os.path.exists(tweets_directory): print("Cloning Tweets Repo...") repo: Dolt = Dolt.clone(remote_url="alexis-evelyn/presidential-tweets", new_dir=tweets_directory) else: repo: Dolt = Dolt(repo_dir=tweets_directory) app = MyApp(repo=repo) app.run()
'CINTMED': 'med_cumulative_response_rate_internet', 'CMED': 'med_cumulative_response_rate', } pks = ['geo_id'] data_url = 'https://www2.census.gov/programs-surveys/decennial/2020/data/2020map/2020/decennialrr2020.csv' geo_url = 'https://www2.census.gov/programs-surveys/decennial/2020/data/2020map/2020/decennialrr2020_crosswalkfile.csv' org = 'Liquidata' repo_name = 'us-census-response-rates' target = f'{org}/{repo_name}' print(f'Cloning {target}') repo = Dolt.clone(target, '.') # Import GEO mapping table outcsvfile = 'geo.csv' print(f'Reading {geo_url}') with urllib.request.urlopen(geo_url) as response, open(outcsvfile, "w") as outcsvhandle: csvreader = csv.reader(io.StringIO(response.read().decode('latin1'))) csvwriter = csv.writer(outcsvhandle) header = next(csvreader) header_out = [] for col in header: if geo_column_map.get(col): header_out.append(geo_column_map.get(col)) else:
def scrape_document(url): headless_chrome = [ CHROME, '--headless', '--disable-gpu', '--dump-dom', '--crash-dumps-dir=/tmp', url ] process = Popen(headless_chrome, stdout=PIPE) (output, err) = process.communicate() exit_code = process.wait() return output repo_name = 'Liquidata/online-services' root = '.' repo = Dolt.clone(repo_name, root) documents_df = read_table(repo, 'documents') documents_df['terms_raw'] = documents_df['terms_raw'].astype(str) documents_df['privacy_raw'] = documents_df['privacy_raw'].astype(str) for index, row in documents_df.iterrows(): print(f'Processing {index}') documents_df.at[index, 'terms_raw'] = scrape_document(row['terms_url']) documents_df.at[index, 'privacy_raw'] = scrape_document(row['privacy_url']) import_df(repo, 'documents', documents_df, ['product_id']) if repo.status().is_clean: print('No changes to repo. Exiting') else: