def move(self) -> None: """Move the files between directories. The default method to move the files from `primary_directory` to the `secondary_directory`. Uses process pool and `shutil.move` method to initialize a process for each move operation. """ sys.stdout.write( "\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}` using processing\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory)) files = os.listdir(self.primary_directory) pool = Pool(processes=cpu_count()) for i, f in enumerate(files): pool.apply_async(self._move, (f, )) progress_bar(i, len(files), prefix="Moving files:") pool.close() pool.join() sys.stdout.write("\n")
def create_dataset(featext): """ Reads in a set of texts and splits it into train and test sets, tagged by author. """ files = sorted(os.listdir(PATH)) authors = [file_name.split('___')[0] for file_name in files] authors = index_authors(authors) feature_dicts = [] for count, file_name in enumerate(files): # print progress bar sys.stdout.write('\r%s' % progress_bar(count, len(files))) sys.stdout.flush() feature_dict = featext.extract_features(get_content(file_name)) feature_dicts.append(feature_dict) print '\r%s' % progress_bar(len(files), len(files)) # features = [feature_vector(FeatExtextract_features(get_content(file_name))) # for file_name in files] return np.array(feature_dicts), np.array(authors)
def make_plot(self, directory="", steps=0): path = directory + '/' n = self.length r = len(str(steps)) plot_range = self.get_range(self.config.plot_offset) figure = plot.figure(figsize=(plot_range[0][1] - plot_range[0][0], plot_range[1][1] - plot_range[1][0])) figure.patch.set_facecolor(self.config.plot_background_color) plot.xlim(plot_range[0]) plot.ylim(plot_range[1]) plot.axis('off') step = 0 for i in range(n - 1): color = colorsys.hsv_to_rgb(i / n, 1.0, 1.0) if steps > 0 and i / n >= step / steps: step += 1 plot.savefig(path + str(step).rjust(r, '0')) progress_bar(step, steps) plot.plot(self.data[0][i:i + 2], self.data[1][i:i + 2], color=color, linewidth=self.config.plot_line_width) if directory: plot.savefig(path + (str(step).rjust(r, '0') if steps > 0 else "walk")) else: plot.show() if steps > 0: print('\n')
def render_target(self, rgb_to_img_dict, reduced_img_arr_dict, tile_size): img = cv.imread(self.target_path) if (img is None): print("Error in target image: Invalid path or file") return 0 img_height = len(img) img_width = len(img[0]) # Reminder that the images are in (height,width,rgb), for y_col in range(tile_size, img_width, tile_size): for x_row in range(tile_size, img_height, tile_size): height_pixel_range = y_col - tile_size width_pixel_range = x_row - tile_size pixel_arr = img[width_pixel_range:x_row, height_pixel_range:y_col] pixel_avg = pixel_arr.mean() matching_img = match_rgb_img(pixel_avg, rgb_to_img_dict, reduced_img_arr_dict) # print(matching_img) # print(matching_img.shape) img[width_pixel_range:x_row, height_pixel_range:y_col] = matching_img progress_bar(y_col * img_height, img_width * img_height, "Rendering Target Image: ") return img
def retrieve_tiles(self, img_collection_path): collection_path = img_collection_path rgb_collection = {} scaled_images = {} for root, folders, files in os.walk(collection_path): for img_number, img in enumerate(files): file_path = os.path.join(root, img) img_array = cv.imread(file_path) # Non supported image types will result in the image being read to be None if (img_array is None): continue # # Used to view the image if desired # cv.namedWindow(img,cv.WINDOW_NORMAL) # cv.imshow(img,img_array) # cv.waitKey(0) # cv.destroyAllWindows() progress_bar(img_number, len(files), "Loading Tiles") avg_rgb, reduced_img_array = self.process( img_array, self.tile_dimension) if avg_rgb in rgb_collection: rgb_collection[avg_rgb].append(img) scaled_images[img] = (reduced_img_array) else: rgb_collection[avg_rgb] = [img] scaled_images[img] = (reduced_img_array) return (rgb_collection, scaled_images)
def make_sound(self, directory, steps=0): if steps == 0: return path = directory + '/' n = self.length walk_range = self.get_range() def x(value): return (value - walk_range[0][0]) / (walk_range[0][1] - walk_range[0][0]) def y(value): return (value - walk_range[1][0]) / (walk_range[1][1] - walk_range[1][0]) def frequency(value, frequency_min=self.config.frequency_min, frequency_max=self.config.frequency_max): return frequency_min * ((frequency_max / frequency_min)**y(value)) duration = 1 / self.config.frame_rate sound = Sound(sampling_frequency=self.config.sampling_frequency, note_duration=duration) for i in range(steps + 1): index = math.ceil(i * (n / steps)) if i < steps else -1 point = self.walk[index] note = [frequency(point[1]), x(point[0])] sound.make_sound(note[0], note[1]) progress_bar(i, steps) sound.export_sound(path + "walk.wav") print('\n')
def main() -> None: if not os.path.exists('files'): os.mkdir('files') total_no_files = 1024 * 256 sys.stdout.write( "\033[1mFolder with arbitrary files is not found.\033[0m\n" "Creating it.\n") for i in range(total_no_files): progress_bar(i, total_no_files, prefix='Creating Files:') with open(os.path.join('files', 'file.{}'.format(i)), 'wb') as f: f.seek(1024 * 1024) f.write(b"\0") sys.stdout.write('\n\n') if not os.path.exists('files_new'): os.mkdir('files_new') mover = Mover() mover.move() mover_process = MoverProcess() mover_process.move() mover_process = MoverThread() mover_process.move()
def create_routes(cls): res = requests.get(cls.API["companies"] + "?all") companies = res.json() res = requests.get(cls.API["stations"] + "?all") stations = res.json() print("=== {} companies and {} stations exist ===".format( len(companies), len(stations))) print("=============================================") headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' } for j in range(len(stations)): # len(stations) 까지 하면 월 사용량 이상으로 사용됨 # progress bar progress_bar(j, len(stations), 20) station = stations[j] data = [] res = requests.get(station["routes"]) routes = res.json()["results"] count = len(routes) # already finished if count >= len(companies): continue # not finished for i in range(count, len(companies)): company = companies[i] time = Maps.directions(station["place_id"], companies[i]["place_id"]) if time: route = { "company": company["id"], "station": station["id"], "time": time } data.append(route) cls.create_route(data, headers) res = requests.get(station["routes"]) routes = res.json()["results"] count = len(routes) # already finished if count >= len(companies): continue # not finished for i in range(len(companies)): company = companies[i] time = Maps.directions(station["place_id"], companies[i]["place_id"]) if time: route = { "company": company["id"], "station": station["id"], "time": time } data.append(route) cls.create_route(data, headers)
def test(epoch): global Test_acc global best_Test_acc global best_Test_acc_epoch global Test_loss global Test_gp_loss net.eval() Test_loss = 0 Test_gp_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, requires_grad=True), Variable(targets) outputs = net(inputs) CE_loss, CE_loss_list = ensemble_CE(outputs, targets, num_models) GP_loss = GradPenalty()(inputs, CE_loss_list) loss = opt.alpha * CE_loss + opt.beta * GP_loss Test_loss += loss.data Test_gp_loss += GP_loss.data output_split = torch.split(outputs, num_classes, dim=-1) for out in output_split: _, p = torch.max(out.data, 1) correct += p.eq(targets.data).cpu().sum() / num_models total += targets.size(0) progress.progress_bar( batch_idx, len(testloader), 'Total_Loss: %.3f GP_Loss: %.3f | Acc: %.3f%% (%d/%d)' %(Test_loss /(batch_idx + 1), Test_gp_loss/(batch_idx + 1), 100. *float(correct)/total, correct, total)) # Save checkpoint. Test_acc = 100. * float(correct) / total if Test_acc > best_Test_acc: print('Saving..') print("best_Test_acc: %0.3f" % Test_acc) state = { 'net': net.state_dict() if use_cuda else net, 'acc': Test_acc, 'epoch': epoch, } if not os.path.isdir(path): os.mkdir(path) torch.save(state, os.path.join(path, 'best_model.pth')) best_Test_acc = Test_acc best_Test_acc_epoch = epoch
def move(self) -> None: sys.stdout.write("\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}`\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory)) files = os.listdir(self.primary_directory) for i, f in enumerate(files): self._move(f) progress_bar(i, len(files), prefix="Moving Files:") sys.stdout.write("\n")
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 global train_loss_ls correct = 0 total = 0 ip_loss = [] global test_stat grad_loss = [] global grad_norm for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() _, diag_args = optimizer.step() ip_loss.append(diag_args['ip_loss']) grad_loss.append(diag_args['grad_loss']) train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() if not args.debug: progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d) | IP_sum: %.3f' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total, np.sum(ip_loss))) # convergence tests based on inner product of loss list from epoch diag_stats = { 'ip_loss_sum': np.sum(ip_loss), 'ip_loss_mean': np.mean(ip_loss), 'ip_loss_std': np.std(ip_loss), 'grad_norm_mean': np.mean(grad_loss) } grad_norm.append(diag_stats['grad_norm_mean']) train_loss_ls.append(train_loss) if (args.momentum_switch and momentum_ind == -1) or (not args.momentum_switch and epoch > args.burnin): test_stat += np.sum(ip_loss) return (train_loss, 100. * correct / total, diag_stats)
def move(self): sys.stdout.write( "\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}` using threading\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory)) files = os.listdir(self.primary_directory) with ThreadPoolExecutor(max_workers=4) as executor: for i, f in enumerate(files): progress_bar(i, len(files), prefix="Moving Files:") executor.submit(self._move, f) sys.stdout.write("\n")
def test_clean(testloader, model): correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(testloader): if True: inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() total += targets.size(0) progress.progress_bar( batch_idx, len(testloader), 'clean_acc: %.3f%% (%d/%d)' '' % (100. * float(correct) / total, correct, total))
def export_csv(self): progress_str='writing csv' progress_bar(0,len(self._csv_items),progress_str) with open(self.config.csvfilename, 'w', newline='') as csvfile: fieldnames = list(self._csv_cols) writer = csv.DictWriter( csvfile, fieldnames=fieldnames ) writer.writeheader() c=0; for item in self._csv_items: writer.writerow(item) c+=1 progress_bar(c,len(self._csv_items),progress_str) print('\n') print('limiting: '+str(self._rate_limiter.count()))
def train(epoch): print('\nEpoch: %d' % epoch) global Train_acc global train_loss global train_gp_loss net.train() train_loss = 0 train_gp_loss = 0 correct = 0 total = 0 scheduler.step() print('learning_rate: %s' % str(scheduler.get_lr())) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, requires_grad=True), Variable(targets) outputs = net(inputs) CE_loss, CE_loss_list = ensemble_CE(outputs, targets, num_models) GP_loss = GradPenalty()(inputs, CE_loss_list) loss = opt.alpha*CE_loss + opt.beta*GP_loss optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.data train_gp_loss += GP_loss.data output_split = torch.split(outputs, num_classes, dim=-1) for out in output_split: _, p = torch.max(out.data, 1) correct += p.eq(targets.data).cpu().sum() / num_models total += targets.size(0) progress.progress_bar( batch_idx, len(trainloader), 'Total_Loss: %.3f GP_Loss: %.3f | Acc: %.3f%% (%d/%d)'%(train_loss /(batch_idx +1), train_gp_loss /(batch_idx +1), 100. *float(correct) /total, correct, total)) Train_acc = 100. * float(correct) / total
def move(self) -> None: sys.stdout.write( "\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}` using processing\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory)) files = os.listdir(self.primary_directory) pool = Pool(processes=cpu_count()) for i, f in enumerate(files): pool.apply_async(self._move, (f, )) progress_bar(i, len(files), prefix="Moving files:") pool.close() pool.join() sys.stdout.write("\n")
def move(self): """Move the files between directories. The default method to move the files from `primary_directory` to the `secondary_directory`. Uses Thread Pool and `shutil.move` method to perform the move operations. """ sys.stdout.write( "\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}` using threading\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory)) files = os.listdir(self.primary_directory) with ThreadPoolExecutor(max_workers=4) as executor: for i, f in enumerate(files): progress_bar(i, len(files), prefix="Moving Files:") executor.submit(self._move, f) sys.stdout.write("\n")
def move(self) -> None: """Move the files between directories. The default method to move the files from `primary_directory` to the `secondary_directory`. Uses `shutil.move` method to move files between the directories. """ sys.stdout.write( "\033[1mMoving the files from `{primary_folder}` to " "`{secondary_folder}`\033[0m\n".format( primary_folder=self.primary_directory, secondary_folder=self.secondary_directory ) ) files = os.listdir(self.primary_directory) for i, f in enumerate(files): self._move(f) progress_bar(i, len(files), prefix="Moving Files:") sys.stdout.write("\n")
def test(epoch): net.eval() test_loss = 0 correct = 0 total = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() if not args.debug: progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d) | test stat: %.3f' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total, test_stat)) return (test_loss, 100. * correct / total)
def scrape_to_get_ids(self): '''scrapes the game listing pages at http://www.boardgamegeek.com/boardgames to collect game ids into self._game_ids array. ''' #localise a few objects for convenient access config=self.config game_ids=self._game_ids csv_cols=self._csv_cols csv_items=self._csv_items start_page=config.start_page #this is a count #revisit:change name pages_to_fetch=config.pages_of_ids_to_fetch #update the visible progress bar so the user can see action. progress_str="Scraping boardgame pages" progress_bar(0,pages_to_fetch,progress_str) for page_num in range(start_page,start_page+pages_to_fetch): progress_bar(page_num-start_page,pages_to_fetch,progress_str) url=config.base_url+config.html_path.format(page=page_num) #invoke the rate limiter to slow access and avoid BGG server errors self._rate_limiter.limit() page=urllib.request.urlopen(url) soup=bs(page,'lxml') #collect game ids from the main BGG pages (not xml) table = soup.find( 'table', {'class':'collection_table'}) game_cells=table.find_all('td',{'class': 'collection_objectname'}) links = [] for cell in game_cells: links.append(cell.a['href']) ##regular expression to extract the ids from the links id_regex=re.compile(r"/(?P<id>\d+)/") for link in links: m=id_regex.search(link) if not m: __warning("id not found in href for "+link+" in "+url) else: game_ids.append(m.group('id')) progress_bar(pages_to_fetch,pages_to_fetch,progress_str)
def fetch_ids_xml_and_process(self): ''' Fetches the xml for batches of ids and processes them to extract the data in to the arrays/dicts _csv_items and _csv_cols ready for exporting to a csv. ''' #localise variables for convenience config=self.config game_ids=self._game_ids csv_cols=self._csv_cols csv_items=self._csv_items #update the visible progress bar progress_str="api queries: xml data for "+str(len(game_ids))+' games.' progress_counter=0; progress_bar(0,len(game_ids),progress_str) url=config.base_url+config.xml_path #just trying out generators. The result is a bit clunky. # Also I'm certain it's a python crime to # yield a terminating value. #revisit def id_generator(ids): n=0 l=len(ids) while n<l: yield ids[n] n+=1 # terminating value because I'm abusing # generators to try them out. #bad #shame #disappointedinmyself yield -1 #revisit: is this being used by the testcode? if config.debug: config.games_per_xml_fetch=1 game_ids=['1','2'] #more ids to process is_more_ids=True id_gen=id_generator(game_ids) while is_more_ids: working_url=url #print('passing') #get the next lot of ids using the generator id_batch=[] for i in range(0,config.games_per_xml_fetch): id=next(id_gen) if id!=-1: id_batch.append(id) else:#Klunk, gah!, this is why we need to revisit and straighten out the generator is_more_ids=False break #no more ids, bug out. if len(id_batch)==0: break #make the comma separated list of ids for the url id_str='' for id in id_batch: id_str+=id+',' #remove terminating comma id_str=id_str.strip(',') #make the url working_url=working_url.format(ids=id_str) #invoke the rate limiter to slow access and avoid BGG server errors self._rate_limiter.limit() xml=self.fetch_xml(working_url) xml_items=xml.find_all('item') for xml_item in xml_items: # the first tag 'item' needs special treatment # so is not processed by the recursive function # as otherwise we end up with multiple superfluous # columns derived from the 'type' csv_item={} col_name="/item" cn=col_name+':type' #set the column name in the dict, accessible by itself for quick lookups. #(this repeats overwriting itself which is pointless, # but for ease of flow-control and readability it's not optimised out) csv_cols[cn]=cn #start adding data-points/cells csv_item[cn]=xml_item.attrs['type'] #the items actual BGG id cn=col_name+':id' csv_cols[cn]=cn csv_item[cn]=xml_item.attrs['id'] #having done those two data points, the rest of the item's sub-xml can be done quasi-generically for child in xml_item.children: #call the (quasi)generic processor which processes a single tag and then calls itself for any subtags self.process_item_element_recursively( csv_cols, csv_item, col_name, child) #check and neutralise dodgy data values that might be spreadsheet formulas #revisit: needs test code if config.option_strip_formula_equal_sign_for_csv: self._security_neutralise_spreadsheet_formulas(csv_item) #now we have all the data, add the row csv_items.append(csv_item) #update the visible progress_bar progress_counter+=len(xml_items) progress_bar(progress_counter,len(game_ids),progress_str)
help='show the network visualization in a browser (uses D3)') args = parser.parse_args() root_dir = os.path.dirname(os.path.realpath(__file__)) cache_dir = os.path.join(root_dir, '.cache') # Create the .cache directory if it doesn't already exist if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Get the bills (either from cache or a fresh download) house = 'lower' if args.house in ['lower', 'representatives'] else 'senate' cache_filepath = os.path.join(cache_dir, '%s_%s.json' % (args.session, house)) if args.ignore_cache or not os.path.exists(cache_filepath): bills = [] progress.progress_bar(0) for bill, i in get_bills(args.congress, args.house, limit=args.limit): bills.append(bill) progress.progress_bar(i) with open(cache_filepath, 'w') as fout: json.dump(bills, fout) else: with open(cache_filepath, 'r') as fin: bills = json.load(fin) g = create_graph(bills) if args.trim is not None: g = trim_edges(g, weight=args.trim) # Calculate the betweenness centralities of the nodes. Removing the weakest # edges before calculating the betweenness centralities mainly just for
except KeyError: print(res["message"]) return {}, 0 print("============== Jobs Start ==============") api_key = os.environ.get("SARAMIN_KEY") count = 0 page = 0 res, length = load_data(api_key, page) total = int(res["jobs"]["total"]) print("total : {} jobs exist".format(total)) while length: for i in range(length): # progress bar progress_bar(i + count, total, 20) # parser data initialization parser = Parser(res["jobs"]["job"][i]) # scrap company and job parser.scrap_company() parser.scrap_job() page += 1 count += length if length == 100: res, length = load_data(api_key, page) total = int(res["jobs"]["total"]) else: break print("\n============== Jobs Finish ==============") print("============== Routes Start ==============")
def test_robust(opt, model, classifier, attack_method, c, norm=None): if opt.attack == 'FGSM': adv_crafter = FastGradientMethod(classifier, norm=norm, eps=c, targeted=False, num_random_init=0, batch_size=opt.bs) if opt.attack == 'PGD': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=1, batch_size=opt.bs) if opt.attack == 'BIM': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=0, batch_size=bs) if opt.attack == 'JSMA': adv_crafter = SaliencyMapMethod(classifier, theta=0.1, gamma=c, batch_size=opt.bs) if opt.attack == 'CW': adv_crafter = cw.L2Adversary(targeted=False, confidence=0.01, c_range=(c, 1e10), max_steps=1000, abort_early=False, search_steps=5, box=(0., 1.0), optimizer_lr=0.01) correct = 0 total = 0 total_sum = 0 common_id = [] for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() output = classifier.predict(inputs.cpu().numpy(), batch_size=opt.bs) output = torch.tensor(output) output = output.cuda() init_pred = output.max(1, keepdim=False)[1] common_id = np.where( init_pred.cpu().numpy() == targets.cpu().numpy())[0] if opt.attack == 'CW': x_test_adv = adv_crafter(model, inputs, targets, to_numpy=True) else: x_test_adv = adv_crafter.generate(x=inputs.cpu().numpy()) perturbed_output = classifier.predict(x_test_adv) perturbed_output = torch.tensor(perturbed_output) perturbed_output = perturbed_output.cuda() final_pred = perturbed_output.max(1, keepdim=False)[1] total_sum += targets.size(0) total += len(common_id) correct += final_pred[common_id].eq( targets[common_id].data).cpu().sum() attack_acc = 100. * float(correct) / total progress.progress_bar( batch_idx, len(testloader), 'Attack Strength:%.3f, robust accuracy: %.3f%% (%d/%d)' '' % (c, attack_acc, correct, total))