def run(self): if os.path.exists(self.results_file): # There's something in the cache, read it. print('Using cached result in working directory') regions = self.read_reduce_result(self.results_file) max_id = int(open(self.max_id_file).read()) return regions, max_id print('No cached result found in working directory') self.reset() cpus = mp.cpu_count() pool = mp.Pool(processes=cpus) print('Starting mapreduce using %d cores' % cpus) qfiles = os.listdir(self.qdir) bar = ShadyBar('Mapping', max=len(qfiles), suffix='%(index)d/%(max)d - %(elapsed)ds') for _ in pool.imap_unordered(self.mapper, qfiles): bar.next() bar.finish() pool.terminate() pool = mp.Pool(processes=cpus) bar = ShadyBar('Reducing', max=self.num_map_outputs, suffix='%(index)d/%(max)d - %(elapsed)ds') for _ in pool.imap_unordered(self.reducer, range(self.num_map_outputs)): bar.next() bar.finish() pool.terminate() return self.aggregate(), self.max_id
def remove_rows(color_index): mass = [] zz = [] # xlsFileName clear() print('Обработка файла: %s' % xlsFileName) barr = ShadyBar('Определяю цвета ', max=getNumRows+1) for i in range(7, getNumRows+1): barr.next() this_is = sheet.Cells(i, 2).Font.ColorIndex if this_is != color_index: zz.append(i) if len(zz)>0: c = [zz[0]] mass.append(c) for i in zz[1:]: if i == c[-1] + 1: c.append(i) else: c = [i] mass.append(c) clear() print('Обработка файла: %s' % xlsFileName) bar = ShadyBar('Удаляю строки ', max=len(mass)) for i in reversed(mass): # print('Удаляю строки с %s по %s' % (min(i), max(i))) bar.next() delstr = 'A%s:A%s' % (min(i), max(i)) sheet.Range(delstr).EntireRow.Delete(Shift=color_index)
def init_progress_bars(self): """ initialize the progress bars.""" self.progressbars = {} ocs = list(self.output_connectors.values()) if isnotebook(): from ipywidgets import IntProgress, VBox from IPython.display import display if len(ocs) > 0: for oc in ocs: self.progressbars[oc] = IntProgress( min=0, max=oc.output_streams[0].descriptor.num_points(), bar_style='success', description=f'Digitizer Data {oc.name}:', style={'description_width': 'initial'}) for axis in self.sweeper.axes: self.progressbars[axis] = IntProgress( min=0, max=axis.num_points(), description=f'{axis.name}:', style={'description_width': 'initial'}) display(VBox(list(self.progressbars.values()))) else: from progress.bar import ShadyBar if len(ocs) > 0: for oc in ocs: self.progressbars[oc] = ShadyBar( f'Digitizer Data {oc.name}:', max=oc.output_streams[0].descriptor.num_points()) for axis in self.sweeper.axes: self.progressbars[axis] = ShadyBar(f"Sweep {axis.name}", max=axis.num_points())
def synchronize_by_projects(projects=None, fetcher=None, progress=True, download=None): if fetcher is None: fetcher = PyPIFetcher() # Grab the current datetime current = fetcher.current() # Synchronize all the classifiers with PyPI synchronize_classifiers(fetcher) if not projects: # Grab a list of projects from PyPI projects = fetcher.projects() # We are not synchronizing a subset of projects, so we can check for # any deletions (if required) and yank them. diff.projects(projects) # Commit our yanked projects db.session.commit() if progress: bar = ShadyBar("Processing Projects", max=len(projects)) else: bar = DummyBar() for project in bar.iter(projects): synchronize_project(project, fetcher, download=download) logger.info("Finished processing projects at %s", current) return current
def create_chain(self): tweets = self.get_clean_tweets() markov_chain = {} bar = ShadyBar('Populating chain', max=self.get_total_tweets_words(tweets)) for tweet in tweets: for index, word in enumerate(tweet): if word not in markov_chain: markov_chain[word] = {NODES_KEY: {}, IS_INIT_KEY: False} if markov_chain[word][IS_INIT_KEY] is False: markov_chain[word][IS_INIT_KEY] = index == 0 if index != len(tweet) - 1: following_word = tweet[index + 1] if following_word not in markov_chain[word][NODES_KEY]: markov_chain[word][NODES_KEY][following_word] = { N_OCCURR_KEY: 0, W_TYPE_KEY: { N_END_KEY: 0, N_MID_KEY: 0 } } markov_chain[word][NODES_KEY][following_word][ N_OCCURR_KEY] += 1 w_type_n = N_END_KEY if index + 1 != len(tweet) - 1: w_type_n = N_MID_KEY markov_chain[word][NODES_KEY][following_word][W_TYPE_KEY][ w_type_n] += 1 bar.next() bar.finish() return self.calculate_rates(markov_chain)
def download_model(name, url): """ Downloads large model file returns the hash of the newly downloded model and location of the model in temp folder :param url: string of url location of the model :param name: string name of model """ # https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests local_filename = url.split('/')[-1] local_filename = TEMP_LOCATION + local_filename full_hash = sha3_256() with requests.get(url, stream=True) as r: size = r.headers['content-length'] if size: p = ShadyBar(local_filename, max=int(size)) else: p = Spinner(local_filename) with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks p.next(len(chunk)) f.write(chunk) full_hash.update(chunk) # f.flush() unique_filename = MODEL_LOCATION + name + ".h5" os.rename(local_filename, unique_filename) return full_hash.hexdigest(), unique_filename
def get_studio_dict(): studio_dict = { # 'studio': 'lastest id number' } full_page = '' page_bar = ShadyBar('Get entire site info', max=total_page) for page in range(1, total_page+1): url = page_url.format(page=str(page)) req = sess.get(url, headers=header, timeout=timeout) full_page += req.text # sleep(1) page_bar.next() page_bar.finish() soup = BeautifulSoup(full_page, 'html.parser') movie_boxes = soup.find_all(class_='movie-box') for movie in movie_boxes: tmp = movie['href'].split('/')[-1].split('-') if tmp[0] not in studio_dict: studio_dict[tmp[0]] = tmp[1] else: number_in_movie = int(re.search(r'(\d+)', tmp[1]).group(1)) number_in_dict = int(re.search(r'(\d+)', studio_dict[tmp[0]]).group(1)) if number_in_dict < number_in_movie: studio_dict[tmp[0]] = calculate_id(number_in_movie, studio_dict[tmp[0]]) return studio_dict
def _update_with_es(self): kw={ 'index': self.es_index, 'doc_type': self.query_type, 'scroll': '1m', 'search_type': 'scan', 'size': self.bulk_size } scroll = self.es.search(**kw) sid = scroll['_scroll_id'] total_size = scroll['hits']['total'] hits_size = total_size dealt_size = 0 print("docs: " + str(total_size)) suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]' bar = ShadyBar("clone",suffix=suffix,max=total_size) while (hits_size>0): scroll = self.es.scroll(scroll_id=sid,scroll='1m') sid = scroll['_scroll_id'] hits = scroll['hits']['hits'] hits_size = len(hits) # todo if (hits_size>0): res = self._bulk_es_mongo(hits) # # dealt size dealt_size += hits_size bar.goto(dealt_size) # done print('\nDone !')
def main(): logging.basicConfig(level=logging.INFO, filename="pynmj.log") #_LOGGER.setLevel(logging.INFO) options, arguments = parse_options() try: try: get_lock(arguments[0]) updater = NMJUpdater(arguments[0], "local_directory") if options.clean_name: updater.clean_names() medias = updater.scan_dir() _LOGGER.info("Found %s medias", len(medias)) bar = ShadyBar('Updating database', max=len(medias), suffix='[ETA %(eta_td)s] (%(percent)d%%)') for rank, media in enumerate(medias): _LOGGER.info("Media %s/%s", rank + 1, len(medias)) updater.search_media_and_add(media) bar.next() _LOGGER.info("Cleaning DB...") updater.clean() _LOGGER.info("Done") bar.finish() except: import traceback traceback.print_exc() finally: release_lock(arguments[0])
def _extract_sentums(self, article, **kwargs): ref = [article.get_summary_string()] doc_strings = [] bad_idxs = [] for i in range(len(article)): doc_strings.append(article.get_doc_sent_string(i)) if len(article.doc_sents[i]) < 4: # 4 word sents minimum bad_idxs.append(i) best_sents = [] best_score = -1 combos = list(combinations(range(len(article)), NUM_SENTS_EXTRACT)) bar = ShadyBar('Exhaustive', max=len(combos)) for sent_idxs in combos: # if our sent_idxs to test contains a bad idx, skip it if list(filter(lambda idx: idx in bad_idxs, sent_idxs)): continue # test the Extraction extr = [doc_strings[idx] for idx in sent_idxs] score = get_score([' '.join(extr)], ref, option=self.opt_option) if score > best_score: best_sents = sent_idxs best_score = score bar.next() bar.finish() return list(sorted(best_sents))
def compute_my_variability(event_log: Log) -> float: """ Computes the prefix entropy of the input Log Args: event_log (Log): the input log Returns: the prefix-block entropy """ prefixes: List[List[Event]] = [] bar: Bar = IncrementalBar("Prefix generation", max=len(event_log.trace_list)) for trace in event_log.trace_list: trace_prefixes: List[List[Event]] = trace.get_all_prefixes() for prefix in trace_prefixes: if prefix not in prefixes: prefixes.append(prefix) bar.next() bar.finish() entropy: float = 0 bar = ShadyBar("Prefix likelihood estimation", max=len(prefixes)) for prefix in prefixes: p: float = _prefix_likelihood_estimator(event_log, prefix) entropy += p * logarithm(p, 10) bar.next() bar.finish() entropy *= -1 return entropy
def compute_edit_distance_variability(event_log: Log) -> float: """ Computes the edit distance variability of the log using the Levenstein distance formula Args: event_log: an instance of Log class Returns: The computed edit distance of the input log """ traces = event_log.trace_list distance: int = 0 number_of_comparisons: int = 0 bar = ShadyBar("Edit distance computation", max=len(traces) - 1) trace_1: Trace = traces[0] for trace_2 in traces: if trace_1 != trace_2: bar.next() distance += _levenshtein_distance( trace_1, trace_2) * trace_1.frequency * trace_2.frequency number_of_comparisons += 1 bar.finish() return distance / number_of_comparisons
def load_tweets_from_csv(fname='../data/annotated_tweets.csv', preprocess=True, serialize=True): # Load the data into memory print('Loading MTSA csv...') ids_to_content = defaultdict(lambda: []) with open(fname) as f: csv_reader = DictReader(f) for i, row in enumerate(csv_reader): ids_to_content[row[ID_KEY]].append(row) # construct the tweets and labels bar = ShadyBar('Labelling MTSA', max=len(ids_to_content)) tweets = [] for sample in ids_to_content.values(): bar.next() csv_twt = sample[0] # skip the test questions used for crowdflower! if csv_twt[IS_GOLD_KEY] == 'true': continue # build up the tweet statistics of labels tweet_stats = {s: 0 for s in LABELS} for labelling in sample: if labelling[HAS_SENTIMENT_KEY] == 'no': tweet_stats['obj'] += 1 for key in CSV_LABELS: if labelling[POS_NEG_COM_KEY] == key: tweet_stats[key[0:3]] += 1 # Skipping tweet that had < 5 annotations if sum(tweet_stats.values()) < 5: continue # extract the necessary data tweet = Tweet(csv_twt[TWEET_ID], csv_twt['text'], csv_twt['topic']) tweet.labelling = tweet_stats tweets.append(tweet) bar.finish() """ The preprocessing pipeline is: (see preprocessing.py) (tokenize), (filter_tokens), (remove_mentions), (split_hashtags), (autocorrect), (lemmatize) """ print('Removed {} tweets that had < 5 annotations.'.format(len(ids_to_content) - len(tweets))) print('We now have a total of {} tweets in the MTSA.'.format(len(tweets))) if preprocess: preprocess_tweets(tweets) # save data if desired if serialize: np.save('../data/processed_annotated_tweets.npy', np.array(tweets)) return tweets
def main(): bar = ShadyBar('Preparing update', max=100) for i in range(100): time.sleep(.01) bar.next() bar.finish() subprocess.check_call([sys.executable, "-m", "pip", "install", UPDATE_URL])
def progress_bar(): bar = ShadyBar('Processing Crypto-analysis', max=1000, suffix='%(percent)d%% | %(eta)d seconds remaining |') for i in range(1000): bar.next() time.sleep(.015) bar.finish()
def plot_bar(): # Method 0: Using \r to print def view_bar(num, sum, bar_title="Processing", bar_word="▓"): rate = num / sum rate_num = round(rate * 100) rest_num = 100 - rate_num print(("\r\033[1;32m" + bar_title + " \033[0m\033[1;35m|" + bar_word * rate_num + " " * rest_num + "| \033[0m\033[1;33m%3d%%\033[0m") % (rate_num), end="") if rate_num == 100: print("\n", end="") with open("plot_statistic.py", 'r') as file: lines = file.readlines() for _ in range(len(lines)): time.sleep(0.02) view_bar(_, len(lines) - 1) # Method 1: Using alive_progress <<< with alive_bar(100) as bar: for _ in range(100): bar() time.sleep(0.02) # Method 2: Using tqdm <<< with open("plot_statistic.py", 'r') as file: lines = file.readlines() for _ in tqdm(lines): time.sleep(0.02) # Methods 3: Using Progress <<< with open("plot_statistic.py", "r") as file: lines = file.readlines() # bar = IncrementalBar('BarName', max = len(lines)) # bar = ChargingBar('BarName', max = len(lines)) bar = FillingCirclesBar('BarName', max=len(lines)) # bar = ShadyBar('BarName', max = len(lines)) for _ in lines: bar.next() time.sleep(0.02) bar.finish() with open("plot_statistic.py", "r") as file: lines = file.readlines() bar = ChargingBar('BarName', max=len(lines)) for _ in lines: bar.next() time.sleep(0.02) bar.finish() with open("plot_statistic.py", "r") as file: lines = file.readlines() bar = ShadyBar('BarName', max=len(lines)) for _ in lines: bar.next() time.sleep(0.02) bar.finish()
def rescale_images(directory, height, width): bar = ShadyBar('Image Resize Processing', max=len(os.listdir(directory))) for img in os.listdir(directory): size = (height, width) im = Image.open(directory + '/' + img) im_resized = im.resize(size, Image.ANTIALIAS) im_resized.save(directory + '/' + img) time.sleep(0.005) bar.next() bar.finish()
def RenameFiles(dir, file_name): os.getcwd() bar = ShadyBar('Rename Processing', max=len(os.listdir(dir))) for i, filename in enumerate(os.listdir(dir)): path = pathlib.Path(dir + "/" + filename) if os.path.isfile(path): os.rename(dir + "/" + filename, dir + "/" + file_name + str(i) + ".jpg") time.sleep(0.005) bar.next() bar.finish()
def Pb9(): from progress.bar import ShadyBar import time bar = ShadyBar('进度条9', max=100) #max的值100,可调节 for i in range(100): #这个也需要适当调节 bar.next() time.sleep(0.1) #延迟时间,可调节,0.1~1之间最佳 bar.finish()
def load_data(self) -> List[object]: """ Loaded all data with file and return their. If file empty or exist return empty list """ file_manager.check_db_file() bar = ShadyBar('Load data', suffix='%(percent)d%%', max=1) if self._file_path.stat().st_size == 0: return [] with self._file_path.open() as f: bar.next() return ObjectDeserializer().decode_data(constants.json_format, f)
def loadAllData(client): print('Initiating data load...') current_year = int(datetime.datetime.now().year) + 1 # Wipe the table client['scores']['scores'].remove({}) client['scores']['table'].find_one_and_update({'_id': 0}, {'$set': {'score_table': {} }}) print('Uploading data from NFL seasons', start_year, '-', (current_year - 1)) with ShadyBar('', max=(current_year-start_year), suffix='%(percent).1f%% - %(eta)d s ') as bar: for year in range(start_year, current_year): # Loop through every NFL season loadOneYear(client, year) bar.next()
def liste_ver(self, combo_dosya, cikti) -> None: with open(combo_dosya, 'r+') as combo: combo_liste = combo.readlines() bar = ShadyBar('Taranıyor..', max=len(combo_liste)) for user in combo_liste: kullanici, sifre = user.rstrip('\n').split(':') self.giris_yap(kullanici, sifre, cikti) bar.next() bar.finish()
def _copy_data(self): ss_kw = {} # sort if self.source_sort: ss_kw['sort'] = self.source_sort scroll = self.source_es.search(index=self.source_index, scroll='1m', search_type='scan', size=self.bulk_size, version=True, timeout='60s', **ss_kw) sid = scroll['_scroll_id'] total_size = scroll['hits']['total'] hits_size = total_size dealt_size = 0 print("docs: " + str(total_size)) self.logger.info("docs: " + str(total_size)) suffix = '%(percent)d%% - %(index)d [%(elapsed_td)s / %(eta_td)s]' bar = ShadyBar("clone", suffix=suffix, max=total_size) while (hits_size > 0): scroll = self.source_es.scroll(scroll_id=sid, scroll='1m') sid = scroll['_scroll_id'] hits = scroll['hits']['hits'] hits_size = len(hits) actions = self._bulk_hits(hits) if (len(actions) > 0): kw = {} kw['timeout'] = '60s' res = [] try: res = streaming_bulk(client=self.target_es, actions=actions, **kw) except BulkIndexError as err: print(err) pass okNum = 0 for ok, re in res: if not ok: print(re) else: okNum += 1 # refresh index if (okNum > 0): self.target_es.indices.refresh(index=self.target_index) # dealt size dealt_size += hits_size bar.goto(dealt_size) self.logger.info("dealt: " + str(dealt_size) + " / " + str(total_size)) print('\nDone !') self.logger.info("Done ! \n\n")
def export_corrected(file_num, is_range=False): if is_range: bar = ShadyBar( message="Loading dataset", suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta_td)s', max=file_num * 2) threads = list() for file in range(1, file_num + 1): #export_single(file, bar) t = SplitThread(file, 'SplitThread-{}'.format(file), file) t.start() threads.append(t) for _t in threads: _t.join() print('Job complete. {} Threads executed'.format(file_num)) else: bar = ShadyBar( message="Loading dataset", suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta_td)s', max=2) export_single(file_num, bar)
def compute_cost(query_dir, block_size): cost = 0 files = [os.path.join(query_dir, f) for f in os.listdir(query_dir)] args = zip(files, [block_size] * len(files)) cpus = mp.cpu_count() bar = ShadyBar('Scanning Queries (%d cores)' % cpus, max=len(files)) pool = mp.Pool(processes=cpus) for c in pool.imap_unordered(blocks_for_query, args): cost += c bar.next() bar.finish() print('Total blocks accessed: %d' % cost)
def dl_file(url, **kwargs): """ Download the file from the given url Open file object in binary mode since get.content returns a byte object. Written file will have CRLF line endings. """ # using carriage return to escape incompatible ANSI chars bar = ShadyBar('\rDownloading', max=10, suffix='%(percent)d%%') try: response = get(url) # byte-like object # Added progress intervals for visual consistency. for i in range(10): sleep(0.05) bar.next() sys.stdout.flush() bar.finish() sys.stdout.flush() # errors related to internet connectivity except(NewConnectionError, MaxRetryError, ConnectionError): error_msg = "Failed to send request to URL." + \ " Check your internet connection and try again." print(colored('\rFatal:', 'red'), error_msg, file=sys.stderr) raise SystemExit(1) # error related to HTTP responses if response.status_code != 200: print(colored("\rFatal:", 'red'), "HTTP Error ({})." .format(response.status_code), file=sys.stderr) print("Try setting up your repo manually or contact repo admin.", file=sys.stderr) raise SystemExit(1) # Write response to file if kwargs['file'] == 'pre-commit': file_name = os.path.join(get_hook_dir(), "pre-commit") # in odrer to write 'Done' inline sys.stdout.write('\nWriting response to file ... ') sys.stdout.flush() with open(file_name, "wb") as file: file.write(response.content) elif kwargs['file'] == '.editorconfig': file_name = os.path.join(os.getcwd(), ".editorconfig") # in odrer to write 'Done' inline sys.stdout.write('\nWriting response to file ... ') sys.stdout.flush() with open(file_name, "wb") as file: file.write(response.content) # delays for visual consistency sleep(0.3) sys.stdout.write(colored("Done", 'green')) sys.stdout.flush() print('\n')
def __iter__(self): progress_bar = ShadyBar( "Solving Phase II", max=self.timeslots / 10, width=60, suffix="%(percent).1f%% - ETA: %(eta_td)s", ) count = 0 for k in OmniscientSTWPredictor.__iter__(self): yield k count = (count + 1) % 10 if count == 0: progress_bar.next() progress_bar.finish()
def scanne_data_dir(src_folder,save_folder,N,R): file_list=os.listdir(src_folder) file_num=len(file_list) bar = ShadyBar('Processing', max=file_num) for root,dirs,files in os.walk(src_folder): for file in files: bar.next() name_str=file.split('.') if name_str[1]=='JPG' or name_str[1]=='jpg' or name_str[1]=='png' or name_str[1]=='PNG' or name_str[1]=='bmp': path=os.path.join(root,file) img=getImg(path) code,result_img=CCT_extract1(img,N,R) cv2.imwrite(save_folder+file,img) bar.finish()
def prim(self, coords, distance_matrix, start_node=0): """Algorithm of Prim to calculate minimum spanning tree Takes coords as a list of tuplse in the form of '(x,y)' Returns a list of edges of the MST in the form of tuples "(node1, node2)" """ # initialize the priority queue as a list of dicts for each coordinate with # the vertex with its index and x- and y-coordinate # a dist value which saves the minimum distance connecting the vertex to the MST # and the parent vertex which is the closest vertex already in the MST # Format: [[index, distance, parent, (x-coord, y-coord)],...] priority_queue = [[v, sys.maxsize, math.inf, coords[v]] for v in range(len(coords))] # initialize a MST list which holds the vertices in the order they are visited # initialize a edges list which holds the edges of the MST mst = [] edges = [] bar = ShadyBar('Constructing MST', max=len(coords)) # initialize distance of first vertex as 0 priority_queue[0][1] = 0 # remove first vertex of priority queue and add to MST mst.append(priority_queue.pop(0)) while priority_queue: for v in priority_queue: time_v = -time.process_time() for u in mst: # if the current closest distance of v is bigger than the distance between u and v if v[1] > distance_matrix[v[0]][u[0]]: # update distance of v to the distance of u and v v[1] = distance_matrix[v[0]][u[0]] # and add u as the parent of v for the closest distance v[2] = u[0] time_v += time.process_time() # get the vertex that has least dist in the priority queue new_vertex = min(priority_queue, key=lambda x: x[1]) # removes new vertex from priority queue and adds to MST mst.append(priority_queue.pop(priority_queue.index(new_vertex))) # appends the edge connecting the new vertex and its parent vertex to the edges list edges.append([(new_vertex[0], new_vertex[2]), distance_matrix[new_vertex[0]][new_vertex[2]]]) bar.next() bar.finish() return mst, edges
def __init__(self, original_dir, blurred_dir, confidence_threshold, input_video): self.original_dir = original_dir self.blurred_dir = blurred_dir self.confidence_threshold = confidence_threshold self.input_video = input_video self.prepare_frames() self.n_frames = self.extract_stills() self.bar = ShadyBar("Blurring", max=self.n_frames) self.codec = cv2.VideoWriter_fourcc(*'MP4V') self.blur_movie(confidence_threshold) self.bar.finish() print("New video is in {} with {} frames".format( self.output_video, self.n_frames))