async def _run(self, setting: Setting, spinner: Halo) -> Result: leader = setting.leader server = setting.server auditor = setting.auditor spinner.text = "[experiment] starting servers" hosts = ",".join([f"{m.hostname}:{PORT}" for m in (leader, server, auditor)]) server_cmd = ( f"ulimit -n 65536 && " f"{RIPOSTE_BASE}/server/server -idx {{idx}} " f" -servers {hosts} " f" -threads {self.server_threads} " f"2>&1 " f"| tee /tmp/riposte.log" ) auditor_proc = auditor.ssh.create_process(server_cmd.format(idx=2)) server_proc = server.ssh.create_process(server_cmd.format(idx=1)) leader_proc = leader.ssh.create_process(server_cmd.format(idx=0)) # order of below is important async with auditor_proc as auditor_proc: async with server_proc as server_proc: await asyncio.sleep(1) # leader needs other servers to be up async with leader_proc as leader_proc: await asyncio.sleep(2) # leader waits 2s at the beginning spinner.text = "[experiment] starting clients" client_cmd = ( f"{RIPOSTE_BASE}/client/client " f" -leader {leader.hostname}:{PORT} " f" -hammer " f" -threads {self.client_threads} " f"2>&1 " f"| tee /tmp/riposte-client.log" ) client_procs = await asyncio.gather( *[c.ssh.create_process(client_cmd) for c in setting.clients] ) spinner.text = f"[experiment] run experiment for {WAIT_TIME}s" await asyncio.sleep(WAIT_TIME) spinner.text = "[experiment] cleaning up" for client_proc in client_procs: client_proc.kill() for client_proc in client_procs: await client_proc.wait() leader_proc.kill() server_proc.kill() auditor_proc.kill() spinner.text = "[experiment] parsing output" lines = (await leader_proc.stdout.read()).split("\n") return self._parse(lines)
def _create( cloud_formation_yaml_file, aws_requirements_file, docker_path=get_docker_path(), ): spinner = Halo(text='creating cloudformation template', spinner='dots') spinner.start() os.path.basename(__file__) try: client_cf.create_stack( StackName='sprite', TemplateBody=open(cloud_formation_yaml_file, 'r').read(), Capabilities=['CAPABILITY_IAM'], ) except ClientError as e: if e.response['Error']['Code'] != 'AlreadyExistsException': raise spinner.succeed('created lambda skeleton') spinner.start() spinner.text = 'building docker' # docker build # - creates new zip # - updates remote aws lambda code try: docker_client = docker.from_env() requirements = (open('./requirements-aws.txt', 'r').read().replace('\n', ' ')) # noqa image, events = docker_client.images.build( path=docker_path, forcerm=True, tag='sprite:latest', buildargs={'REQUIREMENTS': requirements}) aws_session = boto3.session.Session() spinner.succeed('built docker') spinner.start() spinner.text = 'deploying function code' aws_creds = aws_session.get_credentials() aws_response = docker_client.containers.run( image='sprite:latest', command='sprite', environment={ 'AWS_ACCESS_KEY_ID': aws_creds.access_key, 'AWS_SECRET_ACCESS_KEY': aws_creds.secret_key, 'AWS_DEFAULT_REGION': aws_session.region_name, }) aws_response = aws_response.decode('utf8') aws_response = json.loads(aws_response) spinner.succeed('code deployed') except BuildError as e: spinner.fail(str(e))
async def run(self, setting: Setting, spinner: Halo) -> Result: server0 = setting.server0 server1 = setting.server1 clients = setting.clients await self._cleanup(setting) await self._install_keys(list(setting)) keys = await self._sort_keys(server0) ips = await self._sort_ips(server0, server1, clients) # set up configs counts = await self._install_configs(setting, ips, keys) # run the dissent processes shutdown = asyncio.Event() spinner.text = f"[experiment] starting processes" all_procs = self._run_dissent(setting, counts, shutdown) try: # TODO: wait until broadcaster log has "WaitingForServer"? or "Registering" await asyncio.sleep(5) curl_cmd = ( "curl -X POST --data-binary @message localhost:{port}/session/send" ) curl_procs = [] for (client, (broadcasters, _)) in zip(clients, counts): for idx in range(broadcasters): curl_procs.append( client.ssh.run( curl_cmd.format(port=8850 + idx, check=True))) await asyncio.gather(*curl_procs) wait_time = WAIT_TIME_LONG if self.blame else WAIT_TIME_SHORT spinner.text = f"[experiment] run processes for {wait_time}s" await server0.ssh.run( 'tail -f -n +0 server.log | grep -m1 "finished bulk"', check=True, timeout=wait_time, ) # TODO: wait time should probably be estimated from parameters # e.g. it's way too short for many clients + blame finally: shutdown.set() log = (await server0.ssh.run( "cat server.log | " "grep -E '(Opening slot|finished bulk|SERVER_PUSH_CLEARTEXT|Phase: 1)'", check=True, )).stdout latency = self._parse_log(log) return Result(experiment=self, time=latency, queries=self.clients)
def dfload(df_filepath, *args, show_progress=False, parquet_convert_ndarray_to_list=False, **kwargs): '''Loads a dataframe file based on the file's extension. Parameters ---------- df_filepath : str local path to an existing dataframe. The file extension is used to determine the file type. show_progress : bool show a progress spinner in the terminal parquet_convert_ndarray_to_list : bool whether or not to convert 1D ndarrays in the loaded parquet table into Python lists args : list list of positional arguments to pass to the corresponding reader kwargs : dict dictionary of keyword arguments to pass to the corresponding reader Returns ------- pandas.DataFrame loaded dataframe Notes ----- For '.csv' or '.csv.zip' files, we use :func:`mt.pandas.csv.read_csv`. For '.parquet' files, we use :func:`pandas.read_parquet`. Raises ------ TypeError if file type is unknown ''' path = df_filepath.lower() if path.endswith('.parquet'): spinner = Halo("dfloading '{}'".format(path), spinner='dots') if show_progress else dummy_scope with spinner: try: df = _pd.read_parquet(df_filepath, *args, **kwargs) if parquet_convert_ndarray_to_list: for x in df.columns: if show_progress: spinner.text = 'converting column: {}'.format(x) if df.dtypes[x] == _np.dtype('O'): # object df[x] = df[x].apply(array2list) # because Parquet would save lists into nested numpy arrays which is not we expect yet. if show_progress: spinner.succeed("dfloaded '{}'".format(path)) except: if show_progress: spinner.fail("failed to dfload '{}'".format(path)) raise return df if path.endswith('.csv') or path.endswith('.csv.zip'): return read_csv(df_filepath, *args, show_progress=show_progress, **kwargs) raise TypeError("Unknown file type: '{}'".format(df_filepath))
async def run(self, setting: Setting, spinner: Halo) -> Result: # See Riposte sec. 3.2 for how to calculate number of writers that we # can handle. This gives a 95% success rate. # The Riposte implementation uses XOR, not field addition so we need the # 19.5 multiplier not 2.7. rows = math.ceil(self.channels * 19.5) # See Riposte sec. 4.3 for how to calculate communication-optimal width/height # these variable names correspond to that section alpha = 128 beta = self.message_size * 8 # bits per byte c = math.sqrt(beta / (1 + alpha)) # pylint: disable=invalid-name height_optimal = math.ceil(math.sqrt(rows) * c) width_optimal = math.ceil(math.sqrt(rows) / c) # But Riposte fig. 4 suggests width = height is optimal width_even = height_even = math.ceil(math.sqrt(rows)) results = [] for width, height in ( (width_optimal, height_optimal), (width_even, height_even), ): # Riposte has no configuration files, so we need to recompile spinner.text = "[experiment] compiling with correct settings" await self._compile( setting, width, height ) # TODO(zjn): catch if this doesn't work results.append(await self._run(setting, spinner)) # return the best result return max(*results, key=lambda r: r.qps)
def update_player_data(self, task, data_set, no_prompts): subprocess.run(["clear"]) print_heading(f"Update {data_set}", fg="bright_yellow") spinner = Halo(spinner=get_random_dots_spinner(), color=get_random_cli_color()) spinner.text = "Updating player data..." spinner.start() result = task.execute() if result.failure: spinner.stop() return result spinner.succeed(f"{data_set} was successfully updated!") if no_prompts: return Result.Ok() updated_players = result.value or [] if not updated_players: pause(message="Press any key to continue...") return Result.Ok(updated_players) heading = f"Updated {data_set}: Results" message = f"{len(updated_players)} changes total:" table_viewer = DictListTableViewer( dict_list=updated_players, prompt="Press Enter to continue", confirm_only=True, table_color="bright_yellow", heading=heading, heading_color="bright_yellow", message=message, message_color="blue", ) table_viewer.launch() return Result.Ok(updated_players)
def test_spinner_getters_setters(self): """Test spinner getters and setters. """ spinner = Halo() self.assertEqual(spinner.text, '') self.assertEqual(spinner.color, 'cyan') self.assertIsNone(spinner.spinner_id) spinner.spinner = 'dots12' spinner.text = 'bar' spinner.color = 'red' self.assertEqual(spinner.text, 'bar') self.assertEqual(spinner.color, 'red') if is_supported(): self.assertEqual(spinner.spinner, Spinners['dots12'].value) else: self.assertEqual(spinner.spinner, default_spinner) spinner.spinner = 'dots11' if is_supported(): self.assertEqual(spinner.spinner, Spinners['dots11'].value) else: self.assertEqual(spinner.spinner, default_spinner) spinner.spinner = 'foo_bar' self.assertEqual(spinner.spinner, default_spinner) # Color is None spinner.color = None spinner.start() spinner.stop() self.assertIsNone(spinner.color)
def _refresh_token(self, token_data: Union[dict, List[dict]] = []) -> bool: auth = self.auth token_data = utils.listify(token_data) token = None spin = Halo("Attempting to Refresh Token") spin.start() for idx, t in enumerate(token_data): try: if idx == 1: spin.fail() spin.text = spin.text + " retry" spin.start() token = auth.refreshToken(t) if token: auth.storeToken(token) auth.central_info["token"] = token break except Exception as e: log.exception( f"Attempt to refresh token returned {e.__class__.__name__} {e}" ) if token: self.headers[ "authorization"] = f"Bearer {self.auth.central_info['token']['access_token']}" # spin.succeed() spin.stop() else: spin.fail() return token is not None
def find_operator_info(args: argparse.Namespace, operator_name: str) -> None: """With the specified arguments, calls all the functions needed to find information and print all information out to the screen. This function will determine whether to use Gamepress or JSON for information, then call either one's appropriate information-getting functions and build an Operator object using the provided information. The Operator object will be used for printing. Nothing is returned. """ spinner = Halo(text="Fetching...", spinner="dots", color="magenta") # Initialize the arguments for cmd purposes spinner.start() operator_dict, operator_key = get_operator_dict(operator_name) spinner.text = "Parsing..." spinner.color = "yellow" operator = parse_operator_data(args, operator_dict, operator_key, operator_name) # ---------------------------------------- if operator is not None: spinner.succeed("Success!") if operator_dict == {} or args.gamepress: sys.stdout.write("\nSkipping JSON; Using gamepress.\n") # Print out the results sys.stdout.write("\n\n" + operator.name + " ") sys.stdout.write("*" * operator.rarity + " ") # Star rarity sys.stdout.write(operator.profession + "\n") sys.stdout.write(operator.get_formatted_tags() + "\n\n") for desc_text in operator.description: sys.stdout.write(desc_text) all_properties = [ operator.get_property(prop) for prop in operator.get_all_properties() ] # Fetch the stats all_messages = ([parse_stats(operator.stats)] + all_properties if (operator.has_stats()) else all_properties) for prop in all_messages: for text in prop: sys.stdout.write(text + "\n") else: spinner.fail("Failed.") sys.stdout.write("\n\n" + operator_name.replace("-", " ").title() + "\n") sys.stdout.write("\n" + "Could not find operator! " + "Either the server is down, or your spelling is! \n") sys.stdout.write("\n\n")
def entry(ctx, **kwargs): """Unofficial command-line client for the Unsplash API. Give your desktop some personality with gorgeous photos from artists on Unsplash! Manage your account, collections, liked photos, and more. When no command is specified, the default behaviour is to download a photo given the supplied options and set it as the user's desktop wallpaper. """ if ctx.invoked_subcommand is None: spinner = Halo(text="Selecting an image...", spinner="dots").start() if kwargs["id"]: image = api.photo(kwargs["id"]) else: if kwargs["orientation"] == "any": kwargs.pop("orientation", None) if kwargs["collections"]: kwargs["collections"] = alias.resolve(kwargs["collections"]) image = api.random(kwargs) spinner.text = "Downloading image..." image_path = download(image["id"], image["urls"]["full"]) utils.set_wallpaper(image_path) spinner.succeed("Photo by %s (@%s)" % (image["user"]["name"], image["user"]["username"])) pretty_print_info(image)
def get_all_s3_objects(self): subprocess.run(["clear"]) spinner = Halo(spinner=get_random_dots_spinner(), color=get_random_cli_color()) spinner.text = "Retrieving details of all objects stored in S3..." spinner.start() self.s3_sync.get_all_s3_objects() spinner.stop()
def download_files(folder_title: str, LINKS: List[str], args: argparse.Namespace): """ Download files when the given URL is parsed """ ROOT_PATH = Path.cwd() links_len = len(LINKS) if args.name: dir_path = ROOT_PATH / args.name else: dir_path = ROOT_PATH / folder_title # Create folder spinner = Halo(text="Creating folder", spinner="dots") spinner.start() try: dir_path.mkdir() except FileExistsError as _: pass spinner.stop_and_persist(symbol="✅".encode("utf-8"), text="Folder Created") print(f"Total files: {links_len}") print(f"Download Path: {dir_path}") # Start download for index, url in enumerate(LINKS): r = requests.get(url, stream=True, headers={"Accept-Encoding": None}) total_size = int(r.headers.get("Content-Length")) spinner.text = f"Downloading {index + 1}/{links_len} file" spinner.spinner = "arrow3" spinner.start() time.sleep(1) spinner.stop() file = url.split("/")[-1] file = (file[:50] + "...") if len(file) > 50 else file with open(dir_path / file, "wb") as f: with tqdm( total=total_size, desc=f"{file:<53}", unit="B", unit_scale=True, bar_format="{l_bar}{bar:20}{r_bar}{bar:-10b}", ) as pbar: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) pbar.update(len(chunk)) spinner.spinner = "monkey" spinner.start() time.sleep(2) spinner.stop_and_persist(symbol="🔥".encode("utf-8"), text="All files downloaded.")
def load_boxscore_data(self): spinner = Halo(spinner=get_random_dots_spinner(), color=get_random_cli_color()) spinner.text = f"Loading data for {self.bbref_game_id}..." spinner.start() self.game_data = GameData(self.app, self.bbref_game_id) self.game_data.bat_boxscore[self.away_team_id] self.game_data.pitch_boxscore[self.away_team_id] self.game_data.bat_boxscore[self.home_team_id] self.game_data.pitch_boxscore[self.home_team_id] spinner.stop()
def go(self, _url=None, _params={}): # Serialise the final search query request = _url + '?' + urllib.urlencode(_params) # GitHub conventions... request = request.replace('%3A', ':') request = request.replace('%2B', '+') if self.verbose: print('Requesting %s...' % request) # Make the request! response = requests.get(request, headers={ 'Accept': 'application/vnd.github.cloak-preview', 'Authorization': 'token ' + self.args['github_oauth_token'] }) raw_data = response.json() # We've probably hit a rate limit if 'items' not in raw_data.keys(): if 'X-RateLimit-Reset' in response.headers: reset_at = datetime.fromtimestamp( float(response.headers['X-RateLimit-Reset'])) # Wait until reset diff = (reset_at - datetime.now()).total_seconds() if diff > 0: # + 5 for good measure diff = int(diff + 5) countdown_text = 'Waiting %d seconds due to rate limiting...' ratelimit = Halo(text=countdown_text % diff, spinner='dots') ratelimit.start() for i in range(diff): ratelimit.text = countdown_text % (diff - i) try: time.sleep(1) except IOError: pass ratelimit.stop() # Try again? response = self.go(_url, _params) return response
def get_file_sizes(source): spinner = Halo(text="Getting source file data...") spinner.start() source_size = 0 file_count = 0 for f in source.glob("**/*"): spinner.text = str(f) if f.is_file(): source_size += f.stat().st_size file_count += 1 spinner.succeed(f"Found {file_count} source files with a total size of " f"{humanfriendly.format_size(source_size, binary=True)}.") return source_size
def load_csvs(table=None): if table is not None: load_data(table) else: load_data('alias') load_data('state_codes') spinner = Halo({'text': 'Loading', 'spinner': 'dots'}) spinner.start() for k, v in MAPTABLES.items(): spinner.text = "Loading {}".format(k) load_data(k, state_name_field=v['state_key']) spinner.succeed("Load complete")
def check_app_status(self): if not self.db_setup_complete: return color = get_random_cli_color() if not self.initialized: f = Figlet(font=get_random_figlet_font(), width=120) print_message(f.renderText("vigorish"), wrap=False, fg=f"bright_{color}") spinner = Halo(spinner=get_random_dots_spinner(), color=color) spinner.text = "Updating metrics..." if self.initialized else "Loading..." spinner.start() if self.initialized: del self.app.audit_report self.audit_report = self.app.audit_report spinner.stop()
def combine_scraped_data_for_game(self): subprocess.run(["clear"]) spinner = Halo(spinner=get_random_dots_spinner(), color=get_random_cli_color()) spinner.text = f"Combining scraped data for {self.game_id}..." spinner.start() result = self.combine_data.execute(self.game_id) if (not result["gather_scraped_data_success"] or not result["combined_data_success"] or not result["update_pitch_apps_success"]): spinner.fail(f"Failed to combine data for {self.game_id}!") pause(message="Press any key to continue...") return Result.Fail(result["error"]) pfx_errors = result["results"]["pfx_errors"] fail_results = [ pfx_errors.pop("pitchfx_error", {}), pfx_errors.pop("invalid_pitchfx", {}), ] if all(len(f) <= 0 for f in fail_results): spinner.succeed( f"All scraped data for {self.game_id} was successfully combined!" ) pause(message="Press any key to continue...") return Result.Ok() spinner.stop() subprocess.run(["clear"]) total_pitch_apps = sum(len(f.keys()) for f in fail_results if f) pitch_apps_plural = "pitch appearances" if total_pitch_apps > 1 else "pitch appearance" total_at_bats = sum( len(at_bat_ids) for f in fail_results for at_bat_ids in f.values() if f) at_bats_plural = "at bats" if total_at_bats > 1 else "at bat" error_header = f"PitchFX data could not be reconciled for game: {self.game_id}\n" error_message = ( f"{total_pitch_apps} {pitch_apps_plural} with data errors ({total_at_bats} total {at_bats_plural})\n" ) print_message(error_header, wrap=False, fg="bright_red", bold=True, underline=True) print_message(error_message, fg="bright_red") if not self.prompt_user_investigate_failures(): pause(message="Press any key to continue...") return Result.Ok() subprocess.run(["clear"]) return self.patch_invalid_pfx_single_game()
def test_with_subset(metric: str, model: Vicl, dataset: Dataset, task: int, batch_size: int): device = model.device() dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=6) num_batches = len(dataloader) label_corrects = {} label_totals = {} model.eval() prefix = f'{metric} Testing accuracy' halo = Halo(text=prefix, spinner='dots').start() for batch_idx, batch in enumerate(dataloader): halo.text = f'{prefix} ({batch_idx + 1}/{num_batches})' data, labels = batch data = data.to(device) prediction = model.predict(data) for i in range(0, labels.size(0)): label = labels[i].item() if label not in label_corrects: label_corrects[label] = 0 if label not in label_totals: label_totals[label] = 0 label_totals[label] += 1 label_corrects[label] += 1 if label == prediction[i] else 0 assert len(label_corrects) == len(label_totals) total_accuracy = 0.0 for label, total in label_totals.items(): accuracy = label_corrects[label] / total total_accuracy += accuracy accuracy = total_accuracy / len(label_totals) wandb.log({f'{metric} Mean Acc.': accuracy}) halo.succeed() return accuracy
def fetch(output_dir): spinner = Halo('Fetching tracks', spinner='dots') spinner.start() _track_list = TrackList.load_from_dir(output_dir) track_ids = get_missing(output_dir, AUDIO_ANALYSIS) if not exists(get_data_dir(output_dir, AUDIO_ANALYSIS)): makedirs(get_data_dir(output_dir, AUDIO_ANALYSIS)) track_analyses = n_track_analyses_generator(track_ids) count = 0 + count_data_points(output_dir, AUDIO_ANALYSIS) for track_analysis in track_analyses: if 'track_not_found' in track_analysis: _track_list.remove_track_id(track_analysis['track_not_found']) print(f"removed {track_analysis['track_not_found']} from dataset") continue count += 1 extracted = extract_track_analysis(track_analysis) spinner.text = f'Fetching tracks ({(count / _track_list.get_desired_tracks_amount()) * 100:.2f}%)' store_extracted_analysis(output_dir, extracted) spinner.stop() _track_list.dump(output_dir)
def _delete(stack_name): spinner = Halo(text='deleting', spinner='dots') spinner.start() client_cf = boto3.client('cloudformation') try: while True: client_cf.delete_stack(StackName='sprite') aws_response = client_cf.describe_stacks(StackName='sprite') spinner.text = 'checking delete status' status = aws_response['Stacks'][0]['StackStatus'] if status != 'DELETE_IN_PROGRESS': raise BadStatusException(status) except BadStatusException as e: print(f"received bad status: {status}") except ClientError as e: if 'does not exist' in str(e): spinner.succeed('complete') else: raise e finally: spinner.stop()
def train(self, epochs: int) -> float: """ Trains the model and returns the accuracy on the test_data Parameters: ---------- epochs : int Number of epochs for the training Returns: ------- float Accuracy of the model on the test_data """ trainloader = DataLoader(self.train_data, batch_size=100, shuffle=True, num_workers=2) testloader = DataLoader(self.test_data, batch_size=100, shuffle=True, num_workers=2) halo = Halo(text='Loading', spinner='dots') halo.start() for epoch in range(epochs): for i, data in enumerate(trainloader, 0): features, targets = data features = Variable(features, requires_grad=False) targets = Variable(targets, requires_grad=False) self.optimizer.zero_grad() outputs = self.model(features) loss = self.criterion(outputs, targets) loss.backward() self.optimizer.step() halo.text = f"Epoch:{epoch}, Step:{(i+1)/40*100}, Loss:{loss.data[0]}" halo.stop() features = self.test_data.data_tensor targets = self.test_data.target_tensor features = Variable(features, requires_grad=False) _, output = self.model(features).max(dim=1) print(confusion_matrix(targets.numpy(), output.data.numpy())) print("accuracy", accuracy_score(targets.numpy(), output.data.numpy()))
async def run(self, setting: Setting, spinner: Halo) -> Result: try: return await self._inner_run(setting, spinner) finally: spinner.text = "[experiment] shutting everything down" shutdowns = [] all_workers = setting.workers_west + setting.workers_east for worker in all_workers: shutdowns.append( worker.ssh.run("sudo systemctl stop 'spectrum-worker@*'", check=False)) shutdowns.append( worker.ssh.run("sudo systemctl stop spectrum-leader", check=False)) for client in setting.clients: shutdowns.append( client.ssh.run("sudo systemctl stop 'viewer@*'", check=False)) shutdowns.append( setting.publisher.ssh.run( "sudo systemctl stop spectrum-publisher", check=False)) await asyncio.gather(*shutdowns)
def combine_scraped_data_for_game(self, combine_game_id): subprocess.run(["clear"]) spinner = Halo(color=get_random_cli_color(), spinner=get_random_dots_spinner()) spinner.text = f"Combining scraped data for {combine_game_id}..." spinner.start() result = self.combine_data.execute(combine_game_id) if not (result["gather_scraped_data_success"] and result["combined_data_success"] and result["update_pitch_apps_success"]): spinner.fail(f"Failed to combine data for {combine_game_id}!") print_message(result["error"], wrap=False, fg="bright_red", bold=True) return Result.Fail(result["error"]) spinner.stop() pfx_errors = result["results"]["pfx_errors"] if pfx_errors.get("pitchfx_error", []): self.pfx_errors[combine_game_id] = pfx_errors["pitchfx_error"] if pfx_errors.get("invalid_pitchfx", []): self.invalid_pfx[combine_game_id] = pfx_errors["invalid_pitchfx"] if self.total_pitch_apps_any_pitchfx_error > 0: pitch_apps_plural = ("pitch appearances" if self.total_pitch_apps_any_pitchfx_error > 1 else "pitch appearance") at_bats_plural = "at bats" if self.total_at_bats_any_pitchfx_error > 1 else "at bat" message = ( f"PitchFX data could not be reconciled for game: {combine_game_id}\n" f"{self.total_pitch_apps_any_pitchfx_error} {pitch_apps_plural} with data errors " f"({self.total_at_bats_any_pitchfx_error} total {at_bats_plural})\n" ) print_message(message, fg="bright_yellow", bold=True) else: message = f"All scraped data for {combine_game_id} was successfully combined!" print_message(message, fg="bright_cyan", bold=True) pause(message="Press any key to continue...") return Result.Ok()
def export_state_data(): try: shutil.rmtree(BUILD_DIR) except FileNotFoundError: pass os.makedirs(BUILD_DIR) os.makedirs("{}/states_level".format(BUILD_DIR)) os.makedirs("{}/metrics_level".format(BUILD_DIR)) states = DB['state_codes'].all() states = list(states) spinner = Halo({'text': 'Exporting state data', 'spinner': 'dots'}) spinner.start() with open('{}/states_level/states.json'.format(BUILD_DIR), 'w') as f: msg = "Exporting {}".format("states") json.dump(states, f) for state in states: msg = "Exporting {}".format(state['code']) spinner.text = msg fname = "{}.json".format(state['code'].lower()) d = copy.deepcopy(state) for metrics in MAPTABLES.keys(): _metrics = DB[metrics].find_one(state_code=state['code']) if _metrics: for k in ['id', 'state_code', MAPTABLES[metrics]['state_key']]: if k in _metrics: del _metrics[k] d[metrics] = _metrics with open('{}/states_level/{}'.format(BUILD_DIR, fname), 'w') as f: json.dump(d, f) spinner.succeed("Export state data complete")
attributes = [x["id"] for x in res.json()] for attr in attributes: res = requests.get( f'{HOST}/api/terms/{term}/sectionattributevalues/{attr}/subjects' ) attributed_courses = [] subjects = [x["id"] for x in res.json()] # for every subject for sub in subjects: res = requests.get( f'{HOST}/api/terms/{term}/sectionattributevalues/{attr}/subjects/{sub}/courses' ) courses = [x for x in res.json()] for item in courses: item["sectionAttribute"] = attr spinner.text = f'{spinnertxt}: {n} courses observed, generating `{term} {attr}.jsonl`' n += 1 attributed_courses += courses with open(f'{term} {attr}.jsonl', 'w') as f: for item in attributed_courses: f.write(f'{json.dumps(item)}\n') spinner.succeed() print(f'Files were written') except Exception as err: spinner.fail() print(err) # write manifest.json with open(f'manifest.json', 'w') as f: f.write(f'{json.dumps(catalog_data, indent=4, sort_keys=True)}\n')
def patch_project(hw_prefix, patch_branch, source_repo, token, org, only_repo, dry): """Patch to student homeworks""" ensure_git_cached() ensure_gh_token(token) # init spinner = Halo(stream=sys.stderr) if source_repo == "": source_repo = f"tmpl-{hw_prefix}-revise" # Check if repo already contains the patched branch. Skip if so. # api : https://developer.github.com/v3/git/refs/#get-a-reference res = requests.get( f"https://api.github.com/repos/{org}/{source_repo}/git/refs/heads/{patch_branch}", headers=github_headers(token), ) if res.status_code != 200: # this branch not exists on the remote spinner.fail( f"branch : `{patch_branch}` doesn't exist on repo:{org}/{source_repo} " ) return cur = Path(".") for d in cur.glob("patch-*"): shutil.rmtree(d) spinner.info("delete dated folder") spinner.start( normal.txt("Fetch issue template").kw(patch_branch).txt(" from ").kw( source_repo).to_str()) # Fetch patch template on the source repo issues = get_github_endpoint_paged_list( endpoint=f"repos/{org}/{source_repo}/issues", github_token=token, verbose=False) def find_target_issue() -> Optional[Dict]: for issue in issues: if issue["title"].strip() == patch_branch.strip(): return issue return None target_issue = find_target_issue() if not target_issue: raise Exception( f"cannot found issue tmpl `{patch_branch}` on `{source_repo}`") issue_tmpl_body = target_issue["body"] spinner.succeed() root_folder = Path( tempfile.mkdtemp( prefix="patch-{}-{}-".format( patch_branch, datetime.now().strftime("%b%d%H%M%S")), dir=".", )) spinner.succeed(normal.txt("Create tmp folder ").kw(root_folder).to_str()) spinner.info( normal.txt("Fetch soure repo").kw(source_repo).txt( " from GitHub ").to_str()) src_repo_path = root_folder / "source_repo" sp.run( [ "git", "clone", f"https://github.com/{org}/{source_repo}.git", src_repo_path.name, ], cwd=root_folder, ) src_repo = Repo(src_repo_path) sp.run( ["git", "checkout", "--track", f"origin/{patch_branch}"], cwd=src_repo_path, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) spinner.succeed() # Pasting changed files into students repo src_repo_git = src_repo.git src_repo_git.checkout(patch_branch) changed_files, renamed_files = get_changed_files( master_commit=src_repo.heads["master"].commit, patch_commit=src_repo.heads[patch_branch].commit, ) spinner.start("Fetch information for homework repo") spinner.succeed() if only_repo is not None: repos = [ re for re in query_matching_repos(org, github_repo_prefix=only_repo, github_token=token, verbose=False) if re["name"] == only_repo ] repo = next(iter(repos), None) if repo: spinner.info( normal.txt("Only patch to repo : ").kw(repo["name"]).to_str()) repos = [repo] else: repos = query_matching_repos(org, github_repo_prefix=hw_prefix, github_token=token, verbose=False) spinner.succeed() # Patch to student repos student_path = root_folder / "student_repos" student_path.mkdir() for repo_idx, r in enumerate(repos, start=1): pre_prompt_str = (normal.txt(f"({repo_idx}/{len(repos)})").kw( f" {r['name']} ").to_str()) spinner.start() # Check if repo already contains the patched branch. Skip if so. # api : https://developer.github.com/v3/git/refs/#get-a-reference res = requests.get( f"https://api.github.com/repos/{org}/{r['name']}/git/refs/heads/{patch_branch}", headers=github_headers(token), ) if res.status_code == 200: # this branch exists in the remote spinner.text = ( pre_prompt_str + normal.kw(" Skip ").txt("already patched ").to_str()) spinner.succeed() continue spinner.text = pre_prompt_str + normal.txt(" cloning repo...").to_str() sp.run( ["git", "clone", "--depth=1", r["html_url"]], cwd=student_path, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) hw_repo_name = r["html_url"].rsplit("/")[-1] # open a new branch & checkout to that branch sp.run( ["git", "checkout", "-b", patch_branch], cwd=student_path / hw_repo_name, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) # copy file to student repo for f in changed_files.keys(): (student_path / hw_repo_name / f).parent.mkdir(parents=True, exist_ok=True) shutil.copyfile(src=src_repo_path / f, dst=student_path / hw_repo_name / f) for f in renamed_files.keys(): os.remove(student_path / hw_repo_name / f) # changed_files = get_changed_files( # master_commit = src_repo.heads['master'].commit, # patch_commit = src_repo.heads[patch_branch].commit # ) # push (publish) that branch to student repo sp.run( ["git", "add", "."], cwd=student_path / hw_repo_name, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) # Pass if no changed student_repo = Repo(student_path / hw_repo_name) if len(student_repo.index.diff("HEAD")) == 0: spinner.text = ( pre_prompt_str + normal.kw2(" Passed ").txt("Repo no change").to_str()) spinner.succeed() continue sp.run( [ "git", "commit", "-m", f":construction_worker: Patch: {patch_branch}" ], cwd=student_path / hw_repo_name, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) spinner.text = pre_prompt_str + normal.kw( " publish patch to remote...").to_str() if dry: spinner.succeed(pre_prompt_str + normal.txt(" Patched ").to_str()) continue res = sp.run( ["git", "push", "-u", "origin", patch_branch], cwd=student_path / hw_repo_name, stdout=sp.DEVNULL, stderr=sp.DEVNULL, ) if res.returncode != 0: spinner.text = (pre_prompt_str + warn.kw(" Failed ") + warn.txt(" Cannot push branch ").kw2( patch_branch).txt(" to origin").to_str()) spinner.fail() continue # open an pull-request on students repo # student_repo/patch-branch -> student_repo/master body = { "title": f"[PATCH] {patch_branch}", "body": issue_tmpl_body, "head": patch_branch, "base": "master", } res = requests.post( f"https://api.github.com/repos/{org}/{r['name']}/pulls", headers=github_headers(token), json=body, ) if res.status_code == 201: spinner.text = pre_prompt_str + normal.txt(" Patched ").to_str() spinner.succeed() else: spinner.text = (pre_prompt_str + warn.kw(" Failed ") + warn.txt("Cannot create PR").kw2(patch_branch).txt( "to origin/master").to_str()) spinner.fail() try: info = warn.txt(" ").txt( res.json()["errors"][0]["message"]).to_str() print(info) except: pass continue
def console_output(self, job_number): stream_spinner = Halo(stream=sys.stderr) stream_spinner.start('\n') print('\n\n') print(bcolors.OKCYAN + '#' * 74 + bcolors.ENDC) print(bcolors.OKCYAN + '##{:^70}##'.format(" Started Build [ {} ] - Build # {} ". format(self.job, self.job_number)) + bcolors.ENDC) print(bcolors.OKCYAN + '#' * 74 + bcolors.ENDC) headers = { "Jenkins-Crumb": self.crumb, "Content-type": "application/x-www-form-urlencoded; charset=UTF-8", "Content-Length": "10" } # Get job console till job stops job_url = self.url + "/job/" + self.job + "/" + str( job_number) + "/logText/progressiveText" print(" Getting Console output @ ", job_url) start_at = 0 stream_open = True check_job_status = 0 console_requests_session = requests.session() console_requests_session.auth = (self.jenkins_user, self.jenkins_password) while stream_open: stream_spinner.text = "\n" console_response = console_requests_session.post( job_url, data={'start': start_at}, verify=False, headers=headers) content_length = int( console_response.headers.get("Content-Length", -1)) content_length = int("10") if console_response.status_code != 200: stream_spinner.text = "\n" print(" Oppps we have an issue ... ") print(console_response.content) print(console_response.headers) exit(1) if content_length == 0: sleep(self.sleep) check_job_status += 1 else: check_job_status = 0 # Print to screen console if len(console_response.content) > 0: console_string = str( console_response.content.decode("utf-8")) if self.finish_failure_msg in str( console_response.content): stream_spinner.text = "\n" sleep(5) print( self.format_console_output(console_string + "😑😑😑😑")) self.job_status = "failed" stream_open = False # sys.exit() elif self.finish_success_msg in console_string: stream_spinner.text = "\n" sleep(5) print( self.format_console_output(console_string + "🥳 🥳 🎉🎉🔥🔥💥💥⚡️⚡️")) self.job_status = "success" stream_open = False # sys.exit() else: stream_spinner.text = "\n" self.format_console_output(console_string) try: sleep(self.sleep) except Exception: pass start_at = int(console_response.headers.get("X-Text-Size")) # No content for a while lets check if job is still running if check_job_status > 1: job_status_url = self.url + "/job/" + self.job + "/" + str( job_number) + "/api/json" job_requests = console_requests_session.get(job_status_url, verify=False) job_bulding = job_requests.json().get("building") if not job_bulding: # We are done print("stream ended") stream_open = False else: # Job is still running check_job_status = 0 stream_spinner.text = "\n" stream_spinner.stop()
def filterScrape(need, category, page): spinner = Halo(text='Scraping content', spinner='dots', animation='bounce') headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } output_dic = OrderedDict() found = 0 try: while (found < need): spinner.start() url = "https://ctftime.org/writeups?page={}&hidden-tags={}".format( page, category) spinner.text = "Scraping Page: {}".format(page) response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, 'html.parser') count_per_page = 0 for tr in soup.find_all('tr')[1:]: tds = tr.find_all('td') w_no = tds[4].a["href"] task_name = tds[1].text writeup_url = "https://ctftime.org/" + w_no r = requests.get(writeup_url, headers=headers) spinner.text = "Parsing {} ({})".format( w_no, task_name.encode('ascii', 'ignore').decode('ascii')) spinner.color = "red" if (len(task_name) > 30): task_name = task_name[:27] + '...' flag = 0 original_url = "" new_soup = BeautifulSoup(r.content, 'lxml') a = new_soup.find_all('a') for link in a: if link.text == "Original writeup": original_url = link['href'] if (len(original_url) <= 125): flag = 1 break if flag == 1: if (task_name in output_dic): output_dic[task_name] += '\n' + original_url else: output_dic[task_name] = original_url count_per_page += 1 found += 1 else: if task_name not in output_dic: count_per_page += 1 found += 1 output_dic[task_name] = writeup_url if (found == need): break else: continue if (count_per_page == 0): spinner.fail("Page {} doesn't exist.".format(page)) spinner.info("Try decreasing the Page Seed or limit") spinner.info("Try changing the category") print( "Such as : Change 'rev' -> 'reverse engineering' to get more results" ) break else: spinner.succeed( "Gathered writeups for {} tasks from page {}".format( count_per_page, page)) spinner.color = "cyan" page += 1 return output_dic except (KeyboardInterrupt, SystemExit): spinner.warn('Program exited unexpectedly') exit()
def main(override_args=None): """Method to start the script""" logger.debug(f'START: get_threats_from_query_hash.py') # Load initial args parser = BaseScripts.start( 'Retrieve a list of response from a given query hash.') parser.add_argument( '--query_fields', help= 'fields to be retrieved from the threat (default: only the hashkey)\n' 'If an atom detail isn\'t present in a particular atom, empty string is returned.', nargs='+', default=['threat_hashkey'], ) parser.add_argument( '--list', help= 'Turn the output in a list (require query_fields to be a single element)', action='store_true', ) required_named = parser.add_argument_group('required arguments') required_named.add_argument( 'query_hash', help= 'the query hash from which to retrieve the response hashkeys or a path to the query body json file', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() configure_logging(args.loglevel) if len(args.query_fields) > 1 and args.list: parser.error( "List output format is only available if a single element is queried (via query_fields)" ) query_body = {} query_hash = args.query_hash if len(query_hash) != 32 or os.path.exists(query_hash): try: with open(query_hash, 'r') as query_body_file: query_body = json.load(query_body_file) except FileNotFoundError: logger.error( f"Couldn't understand the given value as a query hash or path to query body: {query_hash}" ) exit(1) # Load api_endpoints and tokens dtl = Datalake(env=args.env, log_level=args.loglevel) logger.debug( f'Start to search for threat from the query hash:{query_hash}') spinner = None if logger.isEnabledFor(logging.INFO): spinner = Halo(text=f'Creating bulk task', spinner='dots') spinner.start() task = dtl.BulkSearch.create_task(query_body=query_body, query_hash=query_hash, query_fields=args.query_fields) if spinner: spinner.text = f'Waiting for bulk task {task.uuid} response' response = task.download_sync() original_count = response.get('count', 0) if spinner: spinner.succeed() spinner.info( f'Number of threat that have been retrieved: {original_count}') formatted_output = format_output(response, args.list) if args.output: with open(args.output, 'w') as output: output.write(formatted_output) else: logger.info(formatted_output) if args.output: logger.info(f'Threats saved in {args.output}') else: logger.info('Done')