def fix_experiments(self, discovered_experiments: List[str]): """ fixing all the given experiments """ logging.info("fixing %d experiments", len(discovered_experiments)) # enhance metadata where possible for experiment in tqdm(discovered_experiments, "correct metadata for experiment"): self.fix_metadata(experiment) # discover associated blanks by naming pattern and link them as reference to the acquisition object. Based for experiment in tqdm(discovered_experiments, "correct blanks, where required"): self.fix_blanks(experiment)
async def main(): N = int(1e6) async for row in tqdm(trange(N, desc="inner"), desc="outer"): if row >= N: break with tqdm(count(), desc="coroutine", total=N + 2) as pbar: async for row in pbar: if row == N: pbar.send(-10) elif row < 0: assert row == -9 break # should be under 10 seconds for i in tqdm.as_completed(list(map(asyncio.sleep, [1] * 10)), desc="as_completed"): await i
def fix_unknown_experiment(self, args): """loads an unknown experiment and tries to parse all the given file names and updates the related meta data to get them into the correct format""" logging.info("loading all samples for unknown experiment...") # 1. load the experiment and all its samples samples = self.stasisClient.load_samples_for_experiment("unknown") logging.info("found %d samples", len(samples)) discovered_experiments = [] # 2. extract te properties from the file names, like mode, experiment, instrument for sample in tqdm(samples, "discovering correct experiments"): experiment, instrument, ionmode, sample_id = self.extract_properties(sample['id']) sample['experiment'] = experiment sample['acquisition']['ionisation'] = "positive" if ionmode.lower().startswith('pos') else "negative" logging.info(f"updating acquisition data or {sample['id']}") self.update_acquisition_data(sample) # now drop old version if experiment not in discovered_experiments: discovered_experiments.append(experiment) self.fix_experiments(discovered_experiments) # on the discovered experiment ids return {"updated": len(samples)}
async def main(database, out_dir, preprocess, num_workers): """ Event loop """ print(f"Connecting to {database}") client = motor.motor_asyncio.AsyncIOMotorClient() db = client[database] print("Contando...") tweets = db.tweets.find() pbar = tqdm(total=await db.tweets.estimated_document_count()) print("Comenzando!") queue = asyncio.Queue() # Create three worker tasks to process the queue concurrently. tasks = [] for i in range(num_workers): task = asyncio.create_task(worker(f'worker-{i}', queue, pbar, out_dir)) tasks.append(task) # Generate random timings and put them into the queue. total_sleep_time = 0 async for tweet in tweets: queue.put_nowait(tweet) await queue.join() for task in tasks: task.cancel() # Wait until all worker tasks are cancelled. await asyncio.gather(*tasks, return_exceptions=True)
def test_ww_edges_time(self): print("loading and parsing data, this might take a few seconds...") time = [datetime.now()] train = pd.read_csv("../../data/amazon/train.csv") X = train['Text'].tolist() cv = CountVectorizer(stop_words="english", min_df=5, max_df=0.9).fit(X) n_vocab = len(cv.vocabulary_) n_documents = len(X) X = jl.Parallel(n_jobs=8)(jl.delayed(lambda doc: [ x.lower() for x in RegexpTokenizer(r"\w+").tokenize(doc) if x.lower() in cv.vocabulary_ ])(doc) for doc in tqdm(X)) max_sent_len = max(map(len, X)) X = np.array(jl.Parallel(n_jobs=8)( jl.delayed(lambda doc: [cv.vocabulary_[w] for w in doc] + [-1] * (max_sent_len - len(doc)))(doc) for doc in X), dtype=np.int32) # test for the unit test, we are going down the rabbit hole assert X.shape == (n_documents, max_sent_len) time.append(datetime.now()) print(f"loading complete!. Took {time[1] - time[0]}") print("starting unit test...") result = compute_word_word_edges(X, n_vocab, n_documents, max_sent_len, n_jobs=8) print(f"edge shape is {result[0].shape}") print(result) time.append(datetime.now()) print(f"graph building took {time[2] - time[1]}")
def __init__( self, queue: Queue, filename: str, max_interval: int = 2, max_chunk: int = 100, include_headers: bool = False, silent: bool = False, early_stop: int = None, ): self.filename = filename if not os.path.exists(filename): head, tail = os.path.split(self.filename) if len(head) > 0: os.makedirs(head, exist_ok=True) self._write_headers() self.max_interval = max_interval self.max_chunk = max_chunk self.data_queue = queue self.stop_flag = False self.early_stop = early_stop self.total = 0 if not silent: self.progress = tqdm(desc="written", unit="emails") else: self.progress = None self.state = None
async def _download_from_asyncgen( items: AsyncGenerator, params: DownloadParams, tcp_connections: int = 64, nb_workers: int = 64, batch_size: int = 16, retries: int = 1, logger: logging.Logger = None, ): """Asynchronous downloader that takes an interable and downloads it Args: items (Union[Generator, AsyncGenerator]): (async/sync) generator that yiels a standardized dict of urls params (DownloadParams): Download parameter dict tcp_connections (int, optional): Maximum number of concurrent TCP connections. Defaults to 128. nb_workers (int, optional): Maximum number of workers. Defaults to 64. batch_size (int, optional): Maximum queue batch size. Defaults to 16. retries (int, optional): Maximum number of attempts. Defaults to 1. logger (logging.Logger, optional): Logger object. Defaults to None. Raises: NotImplementedError: If generator turns out to be invalid. """ queue = asyncio.Queue(nb_workers) progressbar = tqdm(smoothing=0, unit=" Downloads", disable=logger.getEffectiveLevel() > logging.INFO) stats = {"failed": 0, "skipped": 0, "success": 0} retry_options = ExponentialRetry(attempts=retries) async with RetryClient( connector=aiohttp.TCPConnector(limit=tcp_connections), raise_for_status=True, retry_options=retry_options, trust_env=True, ) as session: loop = asyncio.get_event_loop() workers = [ loop.create_task( _download_queue(queue, session, stats, params=params, progressbar=progressbar, logger=logger)) for _ in range(nb_workers) ] # get chunks from async generator and add to async queue async with aiostream.stream.chunks(items, batch_size).stream() as chnk: async for batch in chnk: await queue.put(batch) await queue.join() for w in workers: w.cancel() return stats
async def download(client, path, model_id, url, date=None, id_=None): filename = url.split('?', 1)[0].rsplit('/', 1)[-1] path_to_file = path / filename async with client.stream('GET', url) as r: if not r.is_error: total = int(r.headers['Content-Length']) with tqdm(desc=filename, total=total, unit_scale=True, unit_divisor=1024, unit='B', leave=False) as bar: num_bytes_downloaded = r.num_bytes_downloaded with open(path_to_file, 'wb') as f: async for chunk in r.aiter_bytes(chunk_size=1024): f.write(chunk) bar.update(r.num_bytes_downloaded - num_bytes_downloaded) num_bytes_downloaded = r.num_bytes_downloaded else: r.raise_for_status() if path_to_file.is_file(): if date: set_time(path_to_file, convert_date_to_timestamp(date)) if id_: data = (id_, filename) operations.write_from_data(data, model_id)
def select(func, start_date="2016-10-01", end_date=None, callback=print, order_book_id_list=[]) -> np.array: result = [] print(getsourcelines(func)) start_date = get_int_date(start_date) if end_date is None: end_date = datetime.date.today() end_date = get_int_date(end_date) data_backend = ExecutionContext.get_data_backend() if len(order_book_id_list) == 0: order_book_id_list = data_backend.get_order_book_id_list() trading_dates = data_backend.get_trading_dates(start=start_date, end=end_date) set_start_date(trading_dates[0] - 10000) for idx, date in enumerate(reversed(trading_dates)): if end_date and date > get_int_date(end_date): continue if date < get_int_date(start_date): # 日期小于开始日期则计算完成 break set_current_date(str(date)) print(f"[{date}]") order_book_id_list = tqdm(order_book_id_list) for order_book_id in order_book_id_list: result.append(choose(order_book_id, func, callback)) order_book_id_list.set_description( "Processing {}".format(order_book_id)) print("") return _list2Array(result)
def meter(**kwargs): """ A monitoring :term:`step`. Display a `tqdm` progress bar, where the progress is set by the incoming data (must be float) For full arguments list see `tqdm documentation <https://github.com/tqdm/tqdm#documentation>`_ Keyword Args: desc (str) Prefix for the progressbar description total (int or float, optional) Maximal value :Input: data (float): A value to be displayed :Output: data unchanged """ kwargs.setdefault('bar_format','{desc}[{n: 6.4f}]|{bar}|{r_bar}') t = tqdm(**kwargs) def _f(d): t.n = d t.update(0) t.refresh() return d return _f
async def main(): async with Bergen( host="p-tnagerl-lab1", port=8000, client_id="DSNwVKbSmvKuIUln36FmpWNVE2KrbS2oRX0ke8PJ", client_secret="Gp3VldiWUmHgKkIxZjL2aEjVmNwnSyIGHWbQJo6bWMDoIUlBqvUyoGWUWAe6jI3KRXDOsD13gkYVCZR0po1BLFO9QT4lktKODHDs0GyyJEzmIjkpEOItfdCC4zIa3Qzu", name="frankomanko",# if we want to specifically only use pods on this innstance we would use that it in the selector ): sleep = await Node.asyncs.get(package="basic", interface="sleep") result = None with tqdm(total=100) as pbar: async with sleep.stream_progress({"interval": 1}) as stream: async for item in stream: result = item if isinstance(result, dict): break progress, message = item.split(":") try: pbar.n = int(progress) pbar.refresh() except: pass pbar.set_postfix_str(textwrap.shorten(message, width=30, placeholder="...")) pbar.n = 100 pbar.refresh() pbar.set_postfix_str("Done") print(result)
async def achoose(order_book_id_list, func, callback): with tqdm(range(len(order_book_id_list))) as pbar: async for i in pbar: order_book_id = order_book_id_list[i] results.append(choose(order_book_id, func, callback)) if not (i % 10 == 0): # pbar.update(5) pbar.set_description(f"{i}, {order_book_id}")
async def main(): N = int(1e6) async for row in tqdm(trange(N, desc="inner"), desc="outer"): if row >= N: break with tqdm(count(), desc="coroutine", total=N + 2) as pbar: async for row in pbar: if row == N: pbar.send(-10) elif row < 0: assert row == -9 break # should be ~1sec rather than ~50s due to async scheduling for i in tqdm.as_completed( [asyncio.sleep(0.01 * i) for i in range(100, 0, -1)], desc="as_completed"): await i
def reset(self, desc: str = 'Загрузка данных', total_count: Optional[int] = None) -> type(None): """ Обновляет счётчик загрузки :param desc: str (optional) default = 'Загрузка данных' Название полосы загрузки :param total_count: (optional) Общее количество итерируемых объектов """ try: self.load.close() except AttributeError: pass self.load = tqdm(desc=desc, total=total_count, unit='ШТ', ncols=100)
async def count_here(self, ctx): msg_amount: int = 0; await ctx.send("Starting counting...") for channel in ctx.guild.text_channels: print("Now in " + channel.name) await ctx.send("Now in " + channel.name + ". Messages processed to this moment: " + str(msg_amount), delete_after=5.0) async for message in tqdm(channel.history(limit=None)): self.get_message_stats(ctx.guild.id, message) msg_amount = msg_amount + 1 await ctx.send("I'm done! Messages processed: " + str(msg_amount))
async def main(database, out_dir, preprocess, num_workers): """ Event loop """ print(f"Connecting to {database}") client = motor.motor_asyncio.AsyncIOMotorClient() db = client[database] query = {"processed": True} print("Contando...") total_users = await db.users.count_documents(query) print("Buscando usuarios...") users_and_tweets = db.users.aggregate([ { "$match": query }, { "$lookup": { "from": "tweets", "localField": "id", "foreignField": "user_id", "as": "tweets" } }, { "$project": { "id": 1, "screen_name": 1, "tweets.text": 1 } }, ]) pbar = tqdm(total=total_users) queue = asyncio.Queue() # Create three worker tasks to process the queue concurrently. print(f"Creando {num_workers} workers") tasks = [] for i in range(num_workers): task = asyncio.create_task(worker(f'worker-{i}', queue, pbar, out_dir)) tasks.append(task) print("Comenzando!") async for user in users_and_tweets: queue.put_nowait(user) await queue.join() for task in tasks: task.cancel() # Wait until all worker tasks are cancelled. await asyncio.gather(*tasks, return_exceptions=True)
async def download_photos(photos_path: Path, photos: list): async with aiohttp.ClientSession() as session: futures = [] for i, photo in enumerate(photos, start=1): photo_title = "{}_{}_{}_{}.jpg".format(i, photo.get("likes", ""), photo["owner_id"], photo["id"]) photo_path = photos_path.joinpath(photo_title) futures.append(download_photo(session, photo["url"], photo_path)) for future in tqdm(asyncio.as_completed(futures), total=len(futures)): await future
async def scan_mem(rest: RestClient, settings: Dict[str, Any], *filters: TxFilter) -> List[Tx]: """ Scan available mempool, for each transaction get the prevout information from the UTXO set """ result: List[Tx] = [] result_append: Callable = result.append pending: set[Task] = set() match_policy: Callable = all if settings['filtering']['match_all'] else any f_matches: List[Callable] = [f.match for f in filters] no_filter: bool = not filters print(f'Requested mempool scan\n') try: mempool: Dict[str, Any] = await rest.get_mempool(True) async for tx_done, pending in tqdm(iterate_mem_txs(rest, mempool, settings['limits']['concurrency_limit']), miniters=200, mininterval=0.5, total=len(mempool)): try: tx: Tx = await tx_done except ClientResponseError: continue if no_filter: result_append(tx) elif match_policy(f(tx) for f in f_matches): result_append(tx) except CancelledError as err: # logger.warning('Tasks canceled', exc_info=True) print_error('Task canceled', str(err)) except ClientConnectionError as err: print_error('Connection error', 'Cannot establish connection with Bitcoin full node') print(str(err)) except MemoryError as err: # logger.warning('MemoryError', exc_info=True) print_error('Memory error', str(err)) except KeyboardInterrupt: print_error('Keyboard Interrupt', 'Stopping execution') except Exception as err: # logger.error('Something went wrong', exc_info=True) print_error('Something went wrong', str(err)) finally: # Clean up for task in pending: task.cancel() try: await task except: pass return result
async def scan_blocks(start: int, end: int, rest: RestClient, settings: Dict[str, Any], *filters: TxFilter) -> List[Tx]: """ Scan from `start` block height to `end` block height, included, and yield each transaction from those blocks that match the given criteria. See `parse_start_and_end` function for valid `start` and `end` values. """ result: List[Tx] = [] pending: set[Task] = set() result_extend: Callable = result.extend try: chain_info = await rest.get_chain_info() start, end = parse_start_and_end(start, end, chain_info, settings['scan']['force']) print(f'Requested scan from block {start} to block {end}, included.\n') async for block_done, pending in tqdm(iterate_blocks(start, end, rest, settings['limits']['memory_limit'], settings['limits']['concurrency_limit']), miniters=1, mininterval=0.5, total=end + 1 - start): result_extend(iter_filter_block_txs(await block_done, settings['filtering']['match_all'], filters)) except CancelledError as err: # logger.warning('Tasks canceled', exc_info=True) print_error('Task canceled', str(err)) except ClientConnectionError: # logger.error('Connection error', exc_info=True) print_error('Connection error', 'Cannot establish connection with Bitcoin full node') except MemoryError as err: # logger.warning('MemoryError', exc_info=True) print_error('Memory error', str(err)) except KeyboardInterrupt: print_error('Keyboard Interrupt', 'Stopping execution') except Exception as err: # logger.error('Something went wrong', exc_info=True) print_error('Something went wrong', str(err)) finally: # Clean up for task in pending: task.cancel() try: await task except: pass return result
async def _retrieve_and_write_to_disk(*, response: httpx.Response, outfile: Path, mode: Literal['ab', 'wb'], desc: str, local_file_size: int, remote_file_size: int, remote_file_hash: Optional[str], verify_hash: bool, verify_size: bool) -> None: hash = hashlib.md5() # If we're resuming a download, ensure the already-downloaded # parts of the file are fed into the hash function before # we continue. if verify_hash and local_file_size > 0: async with aiofiles.open(outfile, 'rb') as f: while True: data = await f.read(65536) if not data: break hash.update(data) async with aiofiles.open(outfile, mode=mode) as f: with tqdm(desc=desc, initial=local_file_size, total=remote_file_size, unit='B', unit_scale=True, unit_divisor=1024, leave=False) as progress: num_bytes_downloaded = response.num_bytes_downloaded # TODO Add timeout handling here, too. async for chunk in response.aiter_bytes(): await f.write(chunk) progress.update(response.num_bytes_downloaded - num_bytes_downloaded) num_bytes_downloaded = (response.num_bytes_downloaded) if verify_hash: hash.update(chunk) if verify_hash and remote_file_hash is not None: assert hash.hexdigest() == remote_file_hash # Check the file was completely downloaded. if verify_size: await f.flush() local_file_size = outfile.stat().st_size if not local_file_size == remote_file_size: raise RuntimeError( f'Server claimed file size would be {remote_file_size} ' f'bytes, but downloaded {local_file_size} byes.')
async def map_doc(index_name="documents"): CHUNKSIZE = 50000 document_loader = DataLoader(COLLECTION_PATH, chunksize=CHUNKSIZE, names=["pid", "passage"]) for _, collection in tqdm(enumerate(document_loader.reader), desc="collection"): for i, row in collection.iterrows(): yield { '_op_type': 'index', '_index': index_name, 'pid': row.pid, 'passage': row.passage, }
async def upload_controller(src_folder: Path, dst_url: str): if not src_folder.exists(): raise FileNotFoundError('Folder you specified does not exist') image_files = filter( lambda path: Path.is_file(path) and filetype.is_image(str(path)), src_folder.rglob('*')) loop = asyncio.get_event_loop() semaphore = asyncio.Semaphore(value=MAX_CONCURRENT_FILES_PROCESSED) async with aiohttp.ClientSession() as session: tasks = [ loop.create_task(upload_task(session, path, dst_url, semaphore)) for path in image_files ] for task in tqdm(asyncio.as_completed(tasks), total=len(tasks)): await task
def update_tests(mock_get): fetch_languages = {} pack_languages = {} with tqdm(total=len(test_words)) as progress_bar: for tuple_word in test_words: word, old_id, language = tuple_word parser.set_language(language) result = parser.fetch(word, old_id=old_id) fetch_languages.setdefault(language, {}).update({word: result}) pack_languages.setdefault(language, {}).update( {word: parser.pack_definitions_and_examples(result)}) progress_bar.update() with open('test_fetch_output.json', 'w') as f: f.write(json.dumps(fetch_languages, ensure_ascii=False, indent=4)) with open('test_pack_output.json', 'w') as f: f.write(json.dumps(pack_languages, ensure_ascii=False, indent=4))
async def process_urls(headers, username, model_id, urls): if urls: operations.create_database(model_id) media_ids = operations.get_media_ids(model_id) separated_urls = separate_by_id(urls, media_ids) path = pathlib.Path.cwd() / username path.mkdir(exist_ok=True) # Added pool limit: limits = httpx.Limits(max_connections=8, max_keepalive_connections=5) async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c: add_cookies(c) aws = [asyncio.create_task( download(c, path, model_id, *url)) for url in separated_urls] photo_count = 0 video_count = 0 total_bytes_downloaded = 0 data = 0 desc = 'Progress: ({p_count} photos, {v_count} videos || {data})' with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, data=data), total=len(aws), colour='cyan', leave=True) as main_bar: for coro in asyncio.as_completed(aws): try: media_type, num_bytes_downloaded = await coro except Exception as e: print(e) total_bytes_downloaded += num_bytes_downloaded data = convert_num_bytes(total_bytes_downloaded) if media_type == 'photo': photo_count += 1 main_bar.set_description( desc.format( p_count=photo_count, v_count=video_count, data=data), refresh=False) elif media_type == 'video': video_count += 1 main_bar.set_description( desc.format( p_count=photo_count, v_count=video_count, data=data), refresh=False) main_bar.update()
async def _get_org_names_for_list(ip_list, desc="extracting org names"): # asyncio task_list = [_get_org_name(ip) for ip in ip_list] # # if use this code, rm the async and invoke this function directly (no progress) # t1 = time.time() # loop = asyncio.get_event_loop() # finished_tasks = loop.run_until_complete(asyncio.wait(task_list)) # results = [t.result() for t in finished_tasks[0]] # loop.close() # t2 = time.time() # print("finished in {:.2} s.".format(t2 - t1)) results = [ await f for f in tqdm( asyncio.as_completed(task_list), desc=desc, total=len(task_list)) ] return results
async def export_tx_csv(outfile): with open(outfile, mode='w') as f: writer = csv.writer(f, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([ 'type', 'date', 'block_height', 'pool', 'input_address', 'input_asset', 'input_amount', 'input_usd_price', 'output_address', 'output_asset', 'output_amount', 'output_usd_price', 'rune_volume', 'usd_volume', 'fee', 'slip', ]) n = await ThorTx.all_by_date().count() print(f'export_tx_csv n = {n}') with tqdm(total=n) as pbar: async for tx in ThorTx.all(): if tx.rune_volume is not None: writer.writerow([ tx.type, tx.date, tx.block_height, tx.pool1, tx.input_address, tx.input_asset, tx.input_amount, tx.input_usd_price, tx.output_address, tx.output_asset, tx.output_amount, tx.output_usd_price, tx.rune_volume, tx.usd_volume, tx.fee, tx.slip ]) else: print(f'stop! tx is incomplete {tx}') # break pbar.update(1)
def __init__( self, query: str, token: str, continue_from: State = None, early_stop: int = None, output_path: str = "data/github_users.csv", silent: bool = False, state: State = None, org_flag: bool = False, ): self.data_queue = Queue() self.search_queue = Queue() self.output_queue = Queue() self.org_flag = org_flag if not silent: self.progress = tqdm(desc="progress", position=0, unit="pages") else: self.progress = None self.worker = StalkerWorker(self.data_queue) self.writer = CSVWriter( self.output_queue, filename=output_path, early_stop=early_stop, silent=silent, ) self.query = query self.state = continue_from self.start_time = datetime.datetime.utcnow() self.early_stop = early_stop self.search = Search( query=query, token=token, continue_from=continue_from, state=state, silent=silent, org_flag=self.org_flag, )
def __init__(self, config, targets): self.config = config self.targets = targets if self.config.url: total = len(self.config.pathlist) else: total = len(self.config.pathlist) * len(self.config.url_list) self.pbar = tqdm(total=total, ascii=True, position=0, leave=False, dynamic_ncols=True) self.logger = get_logger('SCAN', 'INFO', handler=TqdmLoggingHandler(self.pbar)) self.scan_logger = get_logger('URL', 'INFO', log_format="[%(asctime)s] %(message)s", handler=TqdmLoggingHandler(self.pbar)) self.sessions = [] self.loop = asyncio.get_event_loop() self.loop.set_exception_handler(self.handle_exception) self.setup_sighandler() if not self.config.proxy: self.conn = TCPConnector( limit=self.config.max_connections, limit_per_host=self.config.max_connections_per_host, ttl_dns_cache=300) else: if self.config.proxy.startswith('socks5h'): proxy_addr = self.config.proxy.replace("socks5h", "socks5") self.conn = ProxyConnector.from_url(proxy_addr) self.conn._rdns = True self.tasks = defaultdict(list) self.sem = asyncio.Semaphore(self.config.max_connections) self.setup_sessions() self.running = asyncio.Event() self.running.set()
async def process_urls(headers, username, model_id, urls): if urls: operations.create_database(model_id) media_ids = operations.get_media_ids(model_id) separated_urls = separate_by_id(urls, media_ids) path = pathlib.Path.cwd() / username path.mkdir(exist_ok=True) # Added pool limit: limits = httpx.Limits(max_connections=10, max_keepalive_connections=5) async with httpx.AsyncClient(headers=headers, limits=limits) as c: aws = [ asyncio.create_task(download(c, path, model_id, *url)) for url in separated_urls ] with tqdm(desc='Files downloaded', total=len(aws), colour='cyan', leave=True) as bar: for coro in asyncio.as_completed(aws): await coro bar.update()
def start( query, page_size, continue_from, early_stop, sort, order, output, workers, token, username, silent, no_auth, org, ): click.clear() if not token: click.echo( "(You can set the GITHUB_TOKEN environment variable to skip this)") token = click.prompt("GitHub Personal Access Token") if not token: click.echo("Token is invalid") raise click.exceptions.Exit(1) click.clear() state = None if os.path.exists(".state"): if click.confirm("Continue from last saved state? (Y/n)"): with open(".state", "rb") as fp: state = pickle.load(fp) continue_from = state.continue_from query = state.query if not silent: if not state: click.echo(f"current query is {query}") if click.confirm("enter new query? (y/N)"): query = click.prompt("query") click.echo(f"continue from {continue_from}") if click.confirm("change? (y/N)", ): continue_from = int(click.prompt("page number")) click.echo( f"stopping after adding {early_stop}? (0 runs until completion)") if click.confirm("change? (y/N)", ): early_stop = int(click.prompt("number of entries")) click.echo(f"output directory is: {output}") if click.confirm("change? (y/N)", ): output = str(click.prompt("filepath")) if org: query = "type:org " + query if not silent: click.clear() click.echo(f"started at: {datetime.now().isoformat()}") click.echo(f"user: {username}") click.echo(f"query: {query}\n") click.echo(f"starting from: {continue_from}\n") click.echo( f"ending early: {f'minimum {early_stop} entries' if early_stop else 'no'}\n" ) stalker = Stalker( query=query, token=token, continue_from=continue_from, output_path=output, silent=silent, state=state, early_stop=early_stop, org_flag=org, ) try: stalker.start() except click.exceptions.Abort: click.echo("exiting...") stalker.stop() if not silent: tq = tqdm() tq.write(f"saved state to .state") else: print(f"saved state to .state")