def train(num_epochs, model=adv_model, criterion=criterion, optimizer=optimizer, dl=dl_adversarial, device="cuda"): model.train() for epoch in range(num_epochs, desc="Epochs", leave=False): running_loss = 0 for img, label in tqdm(dl, desc=f"Epoch {epoch}", leave=False, position=0): img, label = img.to(device), label.to(device) optimizer.zero_grad() pred_logits = model(img).squeeze() loss = criterion(pred_logits, label.type(torch.float32)) running_loss += loss.item() * img.size(0) / len(dl.dataset) loss.backward() optimizer.step() tqdm.write(f"Epoch {epoch}: loss={running_loss:.6f}")
def load_state_dict(self, state_dict): ## load the logger state self.logs = state_dict['logs'] # write logs tqdm.write(self.logs[-1]) for log in self.logs: tqdm.write(log, file=self.file)
def _train_one_epoch(self, epoch, scheduler, hide_progress): self.model.train() for img_batch, mask_batch in tqdm(self.dl_train, desc=f"Epoch {epoch}", leave=False, disable=hide_progress, position=0): img_batch, mask_batch = img_batch.to(self.device), mask_batch.to( self.device) self.optimizer.zero_grad() model_output = self.model(img_batch) loss = self.criterion(model_output, mask_batch) with torch.no_grad(): self.recorder.update_record_on_batch_end( epoch, loss.item(), mask_batch, model_output.squeeze(), img_batch.size(0), self.num_train_samples) loss.backward() self.optimizer.step() scheduler.step() with torch.no_grad(): self.recorder.finalize_record_on_epoch_end() tqdm.write(self.recorder.get_latest_epoch_message(training=True))
def _validate(self, epoch, hide_progress): assert self.dl_valid is not None self.model.eval() with torch.no_grad(): for img_batch, mask_batch in tqdm(self.dl_valid, desc="Validating", disable=hide_progress, leave=False, position=0): img_batch, mask_batch = img_batch.to( self.device), mask_batch.to(self.device) model_output = self.model(img_batch) loss = self.criterion(model_output, mask_batch) self.recorder.update_record_on_batch_end( epoch, loss.item(), mask_batch, model_output.squeeze(), img_batch.size(0), self.num_validation_samples, training=False) self.recorder.finalize_record_on_epoch_end(training=False) tqdm.write(self.recorder.get_latest_epoch_message(training=False))
def emit(self, record): try: msg = self.format(record) tqdm.write(msg) self.flush() except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record)
def train(args, train_loader, triplet_net, criterion, optimizer, epoch): triplet_net.train() losses = AverageMeter() accs = AverageMeter() emb_norms = AverageMeter() mask_norms = AverageMeter() loss_acc_log = {'loss': list(), 'acc': list()} for batch_idx, (data1, data2, data3, c) in tqdm(enumerate(train_loader), desc='training loop'): if args.cuda: data1 = data1.cuda() data2 = data2.cuda() data3 = data3.cuda() c = c.cuda() data1 = Variable(data1) data2 = Variable(data2) data3 = Variable(data3) c = Variable(c) dist_a, dist_b, mask_norm, embed_norm, mask_embed_norm = triplet_net( data1, data2, data3, c) target = torch.FloatTensor(dist_a.size()).fill_(1) if args.cuda: target = target.cuda() target = Variable(target) loss_triplet = criterion(dist_a, dist_b, target) loss_embedd = embed_norm / np.sqrt(data1.size(0)) loss_mask = mask_norm / data1.size(0) loss = loss_triplet + args.embed_loss * loss_embedd + args.mask_loss * loss_mask losses.update(loss_triplet.data[0], data1.size(0)) acc = accuracy(dist_a, dist_b) accs.update(acc, data1.size(0)) emb_norms.update(loss_embedd.data[0]) mask_norms.update(loss_mask.data[0]) optimizer.zero_grad() loss.backward() optimizer.step() loss_acc_log['loss'].append(losses.val) loss_acc_log['acc'].append(accs.val) if batch_idx * args.log_interval == 0: tqdm.write('Epoch: {} [{}/{}]\t' 'Loss: {:.4f} ({:.4f})\t' 'Acc: {:,2f}% ({:.3f}%)\t' 'emb_norm: {:.2f} ({:.2f})'.format( epoch, batch_idx * len(data1), len(train_loader.dataset), losses.val, losses.avg, 100. * accs.val, 100. * accs.avg, emb_norms.val, emb_norms.avg)) return loss_acc_log
def play_(bandit_name, environment, **kwargs): horizon = kwargs['horizon'] obs = environment.generate(**kwargs) bandit = name_to_class(bandit_name) bandit = bandit(**kwargs) episods = np.array(range(horizon)) start = time.time() armsPlayed = play(bandit, obs, **kwargs) elapsed = (time.time() - start) tqdm.write('elapsed : {}'.format(elapsed)) data_episod = regret(armsPlayed, obs, horizon) return data_episod
def output_worker(self, output_queue): while True: message = output_queue.get() if message == None: break if type(message) == str: message = {'message': message} if not 'time' in message: now = datetime.now() message['time'] = now.strftime(time_format) if 'message_type' in message and message['message_type'] == 'http': output_format = http_output_format elif 'message_type' in message and message['message_type'] == 'dns': output_format = dns_output_format elif 'message_type' in message and message[ 'message_type'] == 'port_service': output_format = port_service_output_format elif 'message_type' in message and message['message_type'] == 'smb': output_format = smb_output_format elif 'message_type' in message and message[ 'message_type'] == 'mssql': output_format = mssql_output_format elif 'message_type' in message and message[ 'message_type'] == 'mysql': output_format = mysql_output_format elif 'message_type' in message and message[ 'message_type'] == 'postgresql': output_format = postgresql_output_format elif 'target' in message: output_format = target_output_format else: output_format = simple_output_format # Remove control characters which breaks terminal message = output_format.format(**message) message = ''.join([ c if ord(c) not in [0x9d, 0x9e, 0x9f] else '\\x%x' % ord(c) for c in message ]) tqdm.write(message) sys.stdout.flush()
def train(save_dir, load_pt=None, ver=0): model = AwesomeNet() if load_pt != None: model.load_weights("./weights/{:s}/version_{:05d}".format( load_pt, ver)) optimizer = tf.keras.optimizers.Adadelta( learning_rate=config.LEARNING_RATE) best_loss = float('inf') train_img, valid_img, num_train, num_valid = train_val_split() for e in range(config.EPOCHS + 1): batch_gen = get_batch(train_img) t_losses = [] tqdm.write("Epoch: {:04d}, Training...".format(e)) for step in tqdm(range(num_train)): batch_files = next(batch_gen) img_batch, label_batch = get_batch_data(batch_files) loss_train, grads = model_step(model, img_batch, label_batch) optimizer.apply_gradients(zip(grads, model.trainable_variables)) t_losses.append(loss_train.numpy()) batch_gen = get_batch(valid_img) v_losses = [] tqdm.write("Epoch: {:04d}, Validating...".format(e)) for step in tqdm(range(num_valid)): batch_files = next(batch_gen) img_batch, label_batch = get_batch_data(batch_files) loss_valid, _ = model_step(model, img_batch, label_batch, False) v_losses.append(loss_valid.numpy()) valid_loss = np.mean(v_losses) train_loss = np.mean(t_losses) tqdm.write( "Epoch: {:04d}, Training Loss: {:.6f}, Validation Loss: {:.6f}". format(e, train_loss, valid_loss)) if valid_loss < best_loss: best_loss = valid_loss model.save_weights("./weights/{:s}/version_{:05d}".format( save_dir, ver)) tqdm.write("Weights saved!") ver += 1 print("Final Training Loss: {:.6f}, Final Validation Loss: {:.6f}".format( train_loss, valid_loss))
def weightNDict(): print "Weighting nBkgDict..." # for sampleID in nBkgDict: for sampleID in tqdm(nBkgDict): treeWeight = 1 if sampleID != 0: mcSumW = sumWdict.get(sampleID, -1) # Get sum of weights xSECxEff = -1. xSECxEff = xsecDB.xsectTimesEff(sampleID) # Get xSec * filterEff treeWeight = xSECxEff * luminosity / mcSumW # Weight this tree if treeWeight <= 0: # print "Encounter <=0 weight sample %d , skipped" % sampleID tqdm.write("Encounter <=0 weight sample %d , skipped" % sampleID) continue # else: treeWeight = 33257.2 / 10064.3 # Scale up data # comment out for correctly scaled data for chan in channels: nBkgDict[sampleID][chan] *= treeWeight nBkgTotDict[chan] = nBkgTotDict.get(chan, 0) + nBkgDict[sampleID][chan] print "nBkgDict weighted, nBkgTotDict loaded" print "Weighting nSigDict..." # for sampleID in nSigDict: for sampleID in tqdm(nSigDict): if sampleID == 0: continue mcSumW = sumWdict.get(sampleID, -1) # Get sum of weights xSECxEff = -1. xSECxEff = xsecDB.xsectTimesEff(sampleID, 125) # Get xSec * filterEff treeWeight = xSECxEff * luminosity / mcSumW # Weight this tree if treeWeight <= 0: # print "Encounter <=0 weight sample %d , skipped" % sampleID tqdm.write("Encounter <=0 weight sample %d , skipped" % sampleID) continue for chan in channels: nSigDict[sampleID][chan] = nSigDict[sampleID][chan] * treeWeight print "nSigDict weighted\n"
def import_user(ctx: CLIContext, user_profile: str, username: str): """Import an existing user profile from legacy CDCR tool and associate with given username""" # check that the user is valid user = ctx.usersvc.get_by_username(username) if not user: print(f"No user found with username={username}") return print(f"Load user work from {user_profile}") with open(user_profile, "r") as f: total = sum([1 for line in f]) f.seek(0) for line in tqdm(f, total=total): work = json.loads(line) task = ctx.tasksvc.get_by_hash(hash=work['hash']) if not task: tqdm.write(f"Could not find task with hash={work['hash']}") continue if work['label'] == "invalid": print(f"Mark task with hash={work['hash']} as bad") task.is_bad = True ctx.tasksvc.update(task) else: ctx.usersvc.user_add_task(user, task, work['label']) if work.get('iaa', False): print(f"Mark IAA task where hash={work['hash']}") task.is_iaa = True ctx.tasksvc.update(task)
def facebook_comments_action(namespace): # Handling output output_file = open_output_file(namespace.output) # Handling input if is_url(namespace.column): edit_namespace_with_csv_io(namespace, 'post_url') try: scraper = FacebookMobileScraper(namespace.cookie, throttle=namespace.throttle) except FacebookInvalidCookieError: if namespace.cookie in COOKIE_BROWSERS: die([ 'Could not extract relevant cookie from "%s".' % namespace.cookie ]) die([ 'Relevant cookie not found.', 'A Facebook authentication cookie is necessary to be able to access Facebook post comments.', 'Use the --cookie flag to choose a browser from which to extract the cookie or give your cookie directly.' ]) # Enricher enricher = casanova.enricher(namespace.file, output_file, keep=namespace.select, add=FACEBOOK_COMMENT_CSV_HEADERS) # Loading bar loading_bar = tqdm(desc='Scraping comments', dynamic_ncols=True, unit=' comments') for i, (row, url) in enumerate(enricher.cells(namespace.column, with_rows=True)): if not has_facebook_comments(url): tqdm.write( 'Given url (line %i) probably cannot have Facebook comments: %s' % (i + 1, url), file=sys.stderr) continue batches = scraper.comments(url, per_call=True, detailed=True) for details, batch in batches: for comment in batch: enricher.writerow(row, comment.as_csv_row()) loading_bar.update(len(batch)) loading_bar.set_postfix(calls=details['calls'], replies=details['replies'], q=details['queue_size'], posts=i + 1) loading_bar.close()
tr_loss += loss.item() # Run the optimizer with the gradients optimizer.step() scheduler.step() model.zero_grad() if step % steps_to_print == 0: # Logits is the actual output from the network. # This is the probability of being relevant or not. # You can check its shape (Should be a vector sized 2) with logits.shape() logits = outputs[1] # Send the logits to the CPU and in numpy form. Easier to check what is going on. preds = logits.detach().cpu().numpy() # Bring the labels to CPU too. out_label_ids = inputs['labels'].detach().cpu().numpy().flatten() tqdm.write(f"Train ROC: {roc_auc_score(out_label_ids, preds[:, 1])}") #Get the actual relevance label, not only probability. preds = np.argmax(preds, axis=1) tqdm.write(f"Train accuracy: {accuracy_score(out_label_ids, preds)}") tqdm.write(f"Training loss: {loss.item()}") tqdm.write(f"Learning rate: {scheduler.get_last_lr()[0]}") global_step += 1 # Run an evluation step over the eval dataset. Let's see how we are going. if global_step%steps_to_eval == 0: eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in dev_data_loader, desc="Dev batch":
def getN(fileDir): # import ssUtil from multiLepSearch.ssUtil import getCut # Initialize nDict nDict = {} can = ROOT.TCanvas( ) # for Draw(). Not necessary, but gets rid of some out of place INFO message fileList = [ d for d in listdir(fileDir) if isfile("%s/%s" % (fileDir, d)) and d.endswith(".root") ] nFiles = len(fileList) n = 0 sw = ROOT.TStopwatch() sw.Start() # Stopwatch print "Loading from", fileDir for line in tqdm(fileList): n += 1 if testRun and n > 1: break # print "Folder %d of %d: %s" % (n, nFiles, line) tqdm.write("File %d of %d: %s" % (n, nFiles, line)) # Skip specified directories if any(substr in line for substr in skipDirs): continue # Get sampleID for MC, set sampleID of data to 0 if "data" not in line: match = re.search(".[0-9]{6}.", line) # Find dataset ID if not match: # print "Cannot infer datasetID from filename %s , skipped" % line tqdm.write( "Cannot infer datasetID from filename %s , skipped" % line) continue sampleID = int(match.group()[1:-1]) weight = "(ElSF * MuSF * BtagSF * weight * pwt)" else: if "data15" in line: continue # Reject 2015 data sampleID = 0 # data weight = "(fLwt+qFwt)" tc = TChain("evt2l") tc.Add("%s/%s" % (fileDir, line)) # Add = tc.Add # for f in listdir("%s/%s" % (fileDir, line)): # if re.search("root\.*[0-9]*$", f) is not None: # Add("%s/%s/%s" % (fileDir,line,f)) # The next two lines are some attempt to speed up the for loop Draw = tc.Draw GetHist = gDirectory.Get nSample = {} for chan in channels: # If tqdm not available, comment out the next line and comment in the next line # for chan in tqdm(channels): Draw("%s>>hist" % weight, "(%s)*(%s)" % (getCut(chan), weight)) h = GetHist("hist") if h is None or not isinstance(h, ROOT.TH1): nSample[chan] = 0 else: nSample[chan] = h.GetSumOfWeights() h.Delete() if nDict.get( sampleID, None ) is None: # Save the dictionary if it doesn't exist for the given sampleID nDict[sampleID] = nSample else: for chan in channels: # Sum the entries in the dictionary if it already exists nDict[sampleID][chan] += nSample[chan] sw.Stop() sw.Print() # Print stopwatch return nDict
def train(train_loader, val_loader, model_path, lr=0.001, num_epochs=10): net = models.ConvNet() weights = torch.randn(2) criterion = nn.CrossEntropyLoss(weight=weights) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, nesterov=True) # begin train for epoch in tqdm(range(num_epochs)): train_loss, train_error = 0., 0. val_loss, val_error = 0., 0. correct, count, total = 0., 0., 0. # begin training for i, data in enumerate(train_loader, 0): inputs, labels = data ## zero the parameter gradients optimizer.zero_grad() ## forward + backprop + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss = loss.item() _, predicted = torch.max(outputs.data, 1) count += 1 total += labels.size(0) correct += (predicted == labels).sum().item() # end training train_loss /= count train_error = 1 - (correct / total) correct, count, total = 0., 0., 0. # begin validation for i, data in enumerate(val_loader, 0): inputs, labels = data outputs = net(inputs) loss = criterion(outputs, labels) val_loss += loss.item() _, predicted = torch.max(outputs.data, 1) count += 1 total += labels.size(0) correct += (predicted == labels).sum().item() # end validation val_loss /= count val_error = 1 - (correct / total) # stats output_string = 'general:\n\tepoch --> {} | lr --> {:04.3f}\n'.format( epoch, lr) output_string += 'train:\n\tloss --> {:04.3f} | error --> {:04.3f}\n'.format( train_loss, train_error) output_string += 'validation:\n\tloss --> {:04.3f} | error --> {:04.3f}\n'.format( val_loss, val_error) # print output_string tqdm.write(output_string) # end train # save model torch.save(net, model_path) return
def insert_struct_from_file(db, filename): """ Insert a DicomStruct from a filename """ struct = Box() try: ds = pydicom.read_file(filename) except: tqdm.write('Ignoring {}: not a Structure'.format(Path(filename).name)) return {} if ds.Modality != 'RTSTRUCT': return {} try: sop_uid = ds.data_element("SOPInstanceUID").value except: tqdm.write('Ignoring {}: cannot read UIDs'.format(filename)) return {} # check if the file already exist in the db dicom_file = syd.find_one(db['DicomFile'], sop_uid=sop_uid) if dicom_file is not None: tqdm.write('Ignoring {}: Dicom SOP Instance already in the db'.format( Path(filename))) return {} try: frame_of_ref = ds.ReferencedFrameOfReferenceSequence[0] study = frame_of_ref.RTReferencedStudySequence[0] series_uid = study.RTReferencedSeriesSequence[0].SeriesInstanceUID except: tqdm.write('Ignoring {}: Cannot read SeriesInstanceUID'.format( Path(filename).name)) return {} try: creation_date = ds.StudyDate creation_time = ds.StudyDate except: try: creation_date = ds.StructureSetDate creation_time = ds.StructureSetTime except: print(f'Could not find date for the file : {filename}') creation_date = dcm_str_to_date(creation_date + ' ' + creation_time) try: dicom_serie = syd.find_one(db['DicomSeries'], series_uid=series_uid) except: tqdm.write('Ignoring {} : Cannot read DicomSeries'.format( Path(filename).name)) if dicom_serie is not None: struct_names = [ str(ssroi.ROIName) for ssroi in ds.StructureSetROISequence ] separator = ';' struct_names = separator.join(struct_names) struct = { 'dicom_series_id': dicom_serie['id'], 'names': struct_names, 'series_uid': series_uid, 'frame_of_reference_uid': dicom_serie['frame_of_reference_uid'], 'creation_date': creation_date, 'sop_uid': sop_uid } struct = syd.insert_one(db['DicomStruct'], struct) dicom_file = insert_file(db, ds, filename, struct) return struct else: tqdm.write('Ignoring {} : Cannot find matching DicomSeries'.format( Path(filename).name)) return {}
def insert_roi_from_struct(db, struct, crop): """ Insert an ROI from a DicomStruct file """ roi = Box() res = [] series_id = struct['dicom_series_id'] dicom_series = syd.find_one(db['DicomSeries'], id=series_id) acquisition = syd.find_one(db['Acquisition'], id=dicom_series['acquisition_id']) injection_id = acquisition['injection_id'] acquisition_id = dicom_series['acquisition_id'] injection = syd.find_one(db['Injection'], id=injection_id) patient = syd.find_one(db['Patient'], id=injection['patient_id']) ### Getting the CT image path ### image_ct = syd.find_one(db['Image'], dicom_series_id=series_id) try: file_img = syd.find_one(db['File'], id=image_ct['file_mhd_id']) except: print('Could not find the CT image in the database') filename_img_ct = db.absolute_data_folder + '/' + file_img[ 'folder'] + '/' + file_img['filename'] ### Getting the DicomStruct dicom path ### dicom_file = syd.find_one(db['DicomFile'], dicom_struct_id=struct['id']) file_struct = syd.find_one(db['File'], id=dicom_file['file_id']) filename_struct = db.absolute_data_folder + '/' + file_struct[ 'folder'] + '/' + file_struct['filename'] ### Verifying if the ROI already exists in the table ### e = syd.find(db['Roi'], dicom_struct_id=struct['id']) if e != []: return {} ### Using GateTools to extract the image from the Dicom File ### structset = pydicom.read_file(filename_struct) img_ct = itk.imread(filename_img_ct, itk.F) base_filename, extension = os.path.splitext(filename_img_ct) roi_names = gt.list_roinames(structset) roi_objs = list() npbar = 0 pbar = None for r in roi_names: try: aroi = gt.region_of_interest(structset, r) if not aroi.have_mask(): tqdm.write(f'Mask for {r} not possible') roi_objs.append(aroi) except: tqdm.write(f'Something is wrong with ROI {r}') roi.remove(r) if npbar > 0: pbar = tqdm(total=npbar, leave=False) for roiname, aroi in zip(roi_names, roi_objs): try: mask = aroi.get_mask(img_ct, corrected=False, pbar=pbar) if crop: mask = gt.image_auto_crop(mask, bg=0) output_filename = base_filename + '_' + ''.join( e for e in roiname if e.isalnum()) + '.mhd' im = { 'patient_id': patient['id'], 'injection_id': injection_id, 'acquisition_id': acquisition_id, 'pixel_unit': 'binary', 'pixel_type': 'float', 'frame_of_reference_uid': dicom_series['frame_of_reference_uid'], 'modality': 'RTSTRUCT', 'labels': roiname } im = syd.insert_write_new_image(db, im, mask) roi = { 'dicom_struct_id': struct['id'], 'image_id': im['id'], 'frame_of_reference_uid': struct['frame_of_reference_uid'], 'name': roiname, 'labels': None } roi = syd.insert_one(db['Roi'], roi) im['roi_id'] = roi['id'] res.append(roi) syd.update_one(db['Image'], im) syd.update_roi_characteristics(db, roi) except: tqdm.write(f'Error in {roiname, aroi}') if npbar > 0: pbar.close() return res
fruits = [ "Acai", "Apple", "Apricots", "Avocado", "Banana", "Blackberry", "Blueberries", "Cherries", "Coconut", "Cranberry", "Cucumber", "Durian", "Fig", "Grapefruit", "Grapes", "Kiwi", "Lemon", "Lime", "Mango", "Melon", "Orange", "Papaya", "Peach", "Pear", "Pineapple", "Pomegranate", "Raspberries", "Strawberries", "Watermelon" ] contains_berry = 0 for fruit in tqdm.tqdm(fruits): if "berr" in fruit.lower(): contains_berry += 1 sleep(.1) print(contains_berry) contains_berry = 0 pbar = tqdm.tqdm(fruits, desc="Reviewing names", unit="fruits") for fruit in pbar: if "berr" in fruit.lower(): contains_berry += 1 pbar.set_postfix(hits=contains_berry) sleep(.1) print(contains_berry) for i in tqdm.trange(100000, unit_scale="telnet", desc='Trange:'): tqdm.write() pass
def print(*args, **kwargs): try: tqdm.write(*args, **kwargs) except: print(*args, **kwargs)
for batch in progress_bar: model.zero_grad() batch = tuple(b.to(device) for b in batch) inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2] } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn_utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'traininig_loss': '{:.3f}'.format(loss.item() / len(batch))}) torch.save(model.state_dict(), f'Models/BERT_ft_epoch{epoch}.model') tqdm.write(f'\nEpoch {epoch}') # loss_train_avg = loss_train_total/len(dataloader)
def occasional_jobs(self, sess, global_step): ckpt_filename = os.path.join(self.args.train_dir, "myckpt") if global_step % self.args.save_every == 0: save_path = self.saver.save(sess, ckpt_filename, global_step=global_step) tqdm.write("saved at" + save_path)
def tprint(*args, **kwargs): return tqdm.write(*args, file=sys.stderr, **kwargs)
outSig.write("nSig(0.2),nBkg(0.2),sigma(0.2),") outSig.write("nSig(0.3),nBkg(0.3),sigma(0.3)\n") for directory in tqdm(sys.argv[1:]): directory = directory.rstrip('/') if not (isdir(directory)): continue files = listdir(directory) plotDir = "%s/plots" % directory if not isdir(plotDir): mkdir(plotDir) for file in tqdm(files): if not (file.endswith(".root")): continue # print ("## Now processing %s/%s##" % (directory, file)) tqdm.write("## Now processing %s/%s##" % (directory, file)) dm = int( re.search("[0-9]+", re.search("dm[0-9]+", file).group()).group()) NodeSize = int( re.search("[0-9]+", re.search("NodeSize[0-9]+", directory).group()).group()) NTrees = int( re.search("[0-9]+", re.search("_[0-9]+_", directory).group()).group()) channel = int( re.search("[0-9]+", re.search("Channel[0-9]+", file).group()).group()) Depth = int( re.search("[0-9]+",
def __call__(self, epoch): if (epoch + 1) % self.recreation_epoch_frequency == 0: self.cropper.crop_all(self.image_ids, self.src_image_dir, self.crop_image_dir, self.crop_mask_dir) tqdm.write("Recreated random crops!")
def main(): parser = argparse.ArgumentParser(description='This is a WIP program') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train (default: 200)') parser.add_argument('--start_epoch', type=int, default=1, metavar='N', help='number of start epoch (default: 1)') parser.add_argument('--lr', type=float, default=5e-5, metavar='LR', help='learning rate (default: 5e-5)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--no-cuda', action='store_true', default=False, help='enables CUDA training') parser.add_argument('--log-interval', type=int, default=20, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--margin', type=float, default=0.2, metavar='M', help='margin for triplet loss (default: 0.2)') parser.add_argument('--resume', default='', type=str, help='path to latest checkpoint (default: none)') parser.add_argument('--name', default='Conditional_Similarity_Network', type=str, help='name of experiment') parser.add_argument('--embed_loss', type=float, default=5e-3, metavar='M', help='parameter for loss for embedding norm') parser.add_argument('--mask_loss', type=float, default=5e-4, metavar='M', help='parameter for loss for mask norm') parser.add_argument('--num_traintriplets', type=int, default=100000, metavar='N', help='how many unique training triplets (default: 100000)') parser.add_argument('--dim_embed', type=int, default=64, metavar='N', help='how many dimensions in embedding (default: 64)') parser.add_argument('--test', dest='test', action='store_true', help='To only run inference on test set') parser.add_argument('--learned', dest='learned', action='store_true', help='To learn masks from random initialization') parser.add_argument('--prein', dest='prein', action='store_true', help='To initialize masks to be disjoint') parser.add_argument('--conditions', nargs='*', type=int, help='Set of similarity notions') parser.add_argument('--out', type=str, default='result', help='dir to save models and log') parser.set_defaults(test=False) parser.set_defaults(learned=False) parser.set_defaults(prein=False) args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.conditions is not None: conditions = args.conditions else: conditions = list(range(4)) kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else dict() train_loader = torch.utils.data.DataLoader( TripletImageLoader('data', 'ut-zap50k-images', 'filenames.json', conditions, 'train', n_triplets=args.num_traintriplets, transform=T.Compose([ T.Scale(112), T.CenterCrop(112), T.RandomHorizontalFlip(), T.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( TripletImageLoader('data', 'ut-zap50k-images', 'filenames.json', conditions, 'test', n_triplets=160000, transform=T.Compose([ T.Scale(112), T.CenterCrop(112), T.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( TripletImageLoader('data', 'ut-zap50k-images', 'filenames.json', conditions, 'val', n_triplets=80000, transform=T.Compose([ T.Scale(112), T.CenterCrop(112), T.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, **kwargs) model = resnet_18.resnet18(pretrained=True, embedding_size=args.dim_embed) csn_model = ConditionalSimNet(model, n_conditions=len(conditions), embedding_size=args.dim_embed, learnedmask=args.learned, prein=args.prein) mask_var = csn_model.masks.weight triplet_net = CS_Tripletnet(csn_model) if args.cuda: triplet_net.cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] triplet_net.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True criterion = torch.nn.MarginRankingLoss(margin=args.margin) parameters = filter(lambda p: p.requires_grad, triplet_net.parameters()) optimizer = optim.Adam(parameters, lr=args.lr) n_param = sum([p.data.nelement() for p in triplet_net.parameters()]) print('# of parameters: {}'.format(n_param)) print('\n\n') if args.test: import sys test_loss, test_acc = test(test_loader, triplet_net, args.epochs + 1) print('accuracy: {}, loss: {}'.format(test_acc.avg, test_loss.avg)) sys.exit() root = os.path.join(args.out, dt.now().strftime('%m%d_%H%M')) if not os.path.exists(root): os.makedirs(root) best_acc = .0 log = dict() start_time = dt.now() for epoch in tqdm(range(args.start_epoch, args.epochs + 1), desc='total'): adjust_lr(args, optimizer, epoch) loss_acc_log = train(args, train_loader, triplet_net, criterion, optimizer, epoch) log['epoch_{}_train'.format(epoch)] = loss_acc_log losses, accs = test(args, val_loader, triplet_net, criterion, epoch) log['epoch_{}_val'.format(epoch)] = { 'loss': losses.avg, 'acc': 100. * accs.avg} tqdm.write('[validation]\nloss: {:.4f}\tacc: {:.2f}%\n'.format( losses.avg, 100. * accs.avg)) is_best = accs.avg > best_acc best_acc = max(accs.avg, best_acc) save_ckpt(root, triplet_net.state_dict(), is_best) end_time = dt.now() print('\ntraining finished.') print('started at {}, ended at {}, duration is {} hours'.format( start_time.strftime('%m%d, %H:%M'), end_time.strftime('%m%d, %H:%M'), (end_time - start_time).total_seconds() / 3600.)) save_ckpt(root, triplet_net.state_dict(), filename='model.pth') log_filepath = os.path.join(root, 'log.pkl') with open(log_filepath, 'wb') as f: pickle.dump(log, f) print('log files saved at {}'.format(log_filepath))
def decode_recognition(rec): CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~'] s = '' for c in rec: c = int(c) if c < 95: s += CTLABELS[c] elif c == 95: s += u'口' return s p = [] for path in tqdm(inmages): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) tqdm.write( "{}: detected {} instances in {:.2f}s".format( path, len(predictions["instances"]), time.time() - start_time ) ) p.append([decode_recognition(p) for p in predictions["instances"].recs]) anott = pd.DataFrame({'path': inmages, 'annot': p}) ## First line for train data, second line for test data anott.to_csv("../../train_ocr.csv", index=False) # anott.to_csv("../../test_ocr.csv", index=False)
def write(self, x): # Avoid print() second call (useless \n) if len(x.rstrip()) > 0: tqdm.write(x, file=self.file)
fps = vcap.get(cv2.CAP_PROP_FPS) frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) else: frame_width = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH) frame_height = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT) fps = vcap.get(cv2.cv.CV_CAP_PROP_FPS) frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) cur_frame = 0 while cur_frame < frame_count: suc, frame = vcap.read() if not suc: cur_frame += 1 tqdm.write("warning, %s frame of %s failed" % (cur_frame, video_name)) continue frame = frame.astype("float32") if args.resize: img_w, img_h = get_new_hw(frame.shape[0], frame.shape[1], args.size, args.maxsize) frame = cv2.resize(frame, (img_w, img_h), interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(target_path, "%s_F_%08d.jpg" % (video_name, cur_frame)), frame)
if __name__ == '__main__': parse = argparse.ArgumentParser() parse.add_argument('--images-dir', '-i', required=True, help='the images path') parse.add_argument('--label-file', '-l', required=True, help='the .txt label file of images') parse.add_argument('--out-dir', '-o', default='./out/', help='the output dir') parse.add_argument('--resize', '-r', type=int) args = parse.parse_args() # 创建输出目录 os.makedirs(os.path.dirname(args.out_dir), exist_ok=True) # Scan all images all_images = [name for name in os.listdir(args.images_dir)] tqdm.write('Found total {} images in "{}".'.format(len(all_images), args.images_dir)) # read label file names = ['image', 'label'] cols = [0, 1] types = dict(zip(names, ['str', 'int'])) txt_df = pd.read_csv(args.label_file, sep='\t', names=names, usecols=cols, dtype=types) tqdm.write('Found {} labels in {}'.format(txt_df.shape[0], args.label_file)) txt_df = txt_df[txt_df['image'].isin(all_images)] txt_df.reset_index(drop=True, inplace=True) #read images and save to .npy image_size = args.resize or None labels = [] images = [] for i, row in tqdm(txt_df.iterrows(), total=txt_df.shape[0], desc='reading'):
def write(self, line): ''' Potentially write to the stream ''' if line.rstrip(): # avoid printing empty lines (only whitespace) tqdm.write(line, file=self.stream)