def read_translation_model(file_name, feature_weights, top_translations, max_phrase_length): """Read the translation model""" translation_model = defaultdict(list) document = open(file_name, 'r') num_lines = sum(1 for line in open(file_name, 'r')) point = num_lines / 100 if num_lines > 100 else 1 for i, line in enumerate(document): if i % point == 0: show_progress(i, num_lines, 40, 'LOADING TRANSLATIONMODEL') segments = line.strip().split(' ||| ') source = tuple(segments[0].split()) if len(source) > max_phrase_length: continue target = tuple(segments[1].split()) probs = tuple([float(prob) for prob in segments[2].split()]) # weighted sum of conditional probabilities and lexical weights measure = sum([prob * feature_weights[i] for i, prob in \ enumerate(probs)]) if len(translation_model[source]) < top_translations: heapq.heappush(translation_model[source], (measure, target, probs)) else: heapq.heappushpop(translation_model[source], (measure, target, probs)) show_progress(1, 1, 40, 'LOADING TRANSLATIONMODEL') sys.stdout.write('\n') document.close() return {s: [(t, p) for (m, t, p) in mtp] for (s, mtp) in translation_model.iteritems()}
def train(self): train_start = time.time() for e in range(self.args.num_epochs): for i in range(self.num_batches): time_s = time.time() _, loss, epe, reg_loss = self.sess.run( [self.optimizer, self.loss, self.epe, self.weights_l2]) if i % 20 == 0: batch_time = time.time() - time_s kwargs = { 'loss': loss, 'reg_loss': reg_loss, 'epe': epe, 'batch time': batch_time } show_progress(e + 1, i + 1, self.num_batches, **kwargs) loss_vals, epe_vals, reg_vals = [], [], [] self.sess.run([self.initializer_v]) for i in range(self.num_batches_v): image0_v, image1_v, flows_val, loss_val, epe_val, reg_val \ = self.sess.run([self.image0_v, self.image1_v, self.flow_v, self.loss_v, self.epe_v, self.weights_l2]) loss_vals.append(loss_val) epe_vals.append(epe_val) reg_vals.append(reg_val) g_step = self.sess.run(self.global_step) print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}, reg_loss:{np.mean(reg_vals)}, epe: {np.mean(epe_vals)}'\ + f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.') if not os.path.exists('./model'): os.mkdir('./model') self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
def read_freqs(path, label = 'FREQS'): """Read freqs from an _extracted_lexwords.txt or an _extracted_phrases.txt file""" phrase_pair_freqs = defaultdict(int) source_freqs = defaultdict(int) target_freqs = defaultdict(int) num_lines = sum(1 for line in open(path, 'r')) point = num_lines / 100 if num_lines > 100 else 1 with open(path, 'r') as in_file: for i, line in enumerate(in_file): if i % point == 0: show_progress(i, num_lines, 40, label) source, target, freqs = line.strip().split(' ||| ') source_freq, target_freq, pair_freq = [int(x) for x in freqs.split()] phrase_pair_freqs[(source, target)] = pair_freq source_freqs[source] = source_freq target_freqs[target] = target_freq show_progress(1, 1, 40, label) sys.stdout.write('\n') return phrase_pair_freqs, source_freqs, target_freqs
def read_full_translation_model(file_name, max_phrase_length): """Read the full translation model taking into account the maximal phrase length""" translation_model = defaultdict(list) document = open(file_name, 'r') num_lines = sum(1 for line in open(file_name, 'r')) point = num_lines / 100 if num_lines > 100 else 1 for i, line in enumerate(document): if i % point == 0: show_progress(i, num_lines, 40, 'LOADING FULLTRANSLATIONMODEL') segments = line.strip().split(' ||| ') source = tuple(segments[0].split()) if len(source) > max_phrase_length: continue target = tuple(segments[1].split()) probs = tuple([float(prob) for prob in segments[2].split()]) translation_model[source].append((target, probs)) show_progress(1, 1, 40, 'LOADING FULLTRANSLATIONMODEL') sys.stdout.write('\n') document.close() return translation_model
def conditional_probabilities(phrase_pair_freqs, source_phrase_freqs, target_phrase_freqs, label, logprob): """Calculate conditional probability of phrase pairs in both directions. Input: phrase_pair_freqs -- counts of phrase pairs source_phrase_freqs -- counts of phrases in language 1 target_phraes_freqs -- counts of phrases in lanuage 2 label -- used to indicate current process logprob -- boolean, if true, probabilities are used in log-form Returns 2 dictionaries mapping phrase pair to P(source|target) and P(target|source) """ source_given_target = {} target_given_source = {} num_phrases = len(phrase_pair_freqs) point = num_phrases / 100 if num_phrases > 100 else 1 prob = lambda f1, f2: math.log(float(f1) / f2) if logprob else \ lambda f1, f2: float(f1) / f2 for i, (phrase_pair, freq) in enumerate(phrase_pair_freqs.iteritems()): if i % point == 0: show_progress(i, num_phrases, 40, label) try: source_given_target[phrase_pair] = prob(freq, source_phrase_freqs[phrase_pair[0]]) target_given_source[phrase_pair] = prob(freq, target_phrase_freqs[phrase_pair[1]]) except: _log('phrase pair : {}\ni : {}'.format(phrase_pair, i)) raise show_progress(num_phrases, num_phrases, 40, label) sys.stdout.write('\n') return source_given_target, target_given_source
def resize_npImages(self, np_imgs, resize): ret_imgs = [] for i, img in enumerate(np_imgs): utils.show_progress(i, len(np_imgs)) img = Image.fromarray(img).convert('RGB').resize(resize) img = np.asarray(img) ret_imgs.append(img) return np.asarray(ret_imgs)
def extract_lexical_reordering_counts(alignments_file, source_file, target_file, max_length, max_lines=None): """ for the left-to-right and right-to-left models calculate: c(m,(f,e)), c(s,(f,e)), c(d_l,(f,e)), c(d_r,(f,e)) where m=monotone, s=swap, d_l=left-discontinuous, d_r=right-discontinuous """ # open files num_lines = sum(1 for line in open(alignments_file)) max_lines = int(max_lines) if max_lines else num_lines alignments = open(alignments_file, 'r') source = open(source_file, 'r') target = open(target_file, 'r') reordering_counts = {} # maps phrase pair to its reordering counts point = max_lines / 100 if max_lines > 100 else 1 for i, str_align in enumerate(alignments): if i % point == 0: show_progress(i, max_lines, 40, 'LEXICAL REORDERING') if i == max_lines: break source_words = source.next().strip().split() target_words = target.next().strip().split() source_length = len(source_words) target_length = len(target_words) align = str_to_alignments(str_align) word_phrase_pairs = set([word_pair*2 for word_pair in align]) # phrase to internal is a dict mapping phrase ranges (source_min, # target_min, source_max, target_max) to internal word alignments [] phrase_to_internal = extract_alignments(set(align), source_length, target_length, max_length) try: phrase_pairs = set(phrase_to_internal.keys()) for left_phrase_range in phrase_pairs: # phrase based counting events for right_phrase_range in phrase_pairs - word_phrase_pairs: update_count(left_phrase_range, right_phrase_range, reordering_counts, source_words, target_words) # word based counting events for right_phrase_range in word_phrase_pairs: update_count(left_phrase_range, right_phrase_range, reordering_counts, source_words, target_words) except: print 'source: \n%s' % ' '.join(source_words) print 'target: \n%s' % ' '.join(target_words) print 'alignment: \n%s' % str_align raise show_progress(max_lines, max_lines, 40, 'LEXICAL REORDERING') return reordering_counts
def train(self): train_start = time.time() for e in range(self.args.num_epochs): for i, (images, flows_gt) in enumerate(self.train_loader): images = images.numpy() / 255.0 flows_gt = flows_gt.numpy() time_s = time.time() _, _, loss, epe = \ self.sess.run([self.optimizer, self.global_step_update, self.loss, self.epe], feed_dict = {self.images: images, self.flows_gt: flows_gt}) if i % 20 == 0: batch_time = time.time() - time_s kwargs = { 'loss': loss, 'epe': epe, 'batch time': batch_time } show_progress(e + 1, i + 1, self.num_batches, **kwargs) loss_vals, epe_vals = [], [] for images_val, flows_gt_val in self.val_loader: images_val = images_val.numpy() / 255.0 flows_gt_val = flows_gt_val.numpy() flows, loss_val, epe_val \ = self.sess.run([self.flows, self.loss, self.epe], feed_dict = {self.images: images_val, self.flows_gt: flows_gt_val}) loss_vals.append(loss_val) epe_vals.append(epe_val) g_step = self.sess.run(self.global_step) print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}, epe: {np.mean(epe_vals)}'\ +f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.') # visualize estimated optical flow if self.args.visualize: if not os.path.exists('./figure'): os.mkdir('./figure') # Estimated flow values are downscaled, rescale them compatible to the ground truth flow_set = [] for l, flow in enumerate(flows): upscale = 20 / 2**(self.args.num_levels - l) flow_set.append(flow[0] * upscale) flow_gt = flows_gt_val[0] images_v = images_val[0] vis_flow_pyramid(flow_set, flow_gt, images_v, f'./figure/flow_{str(e+1).zfill(4)}.pdf') if not os.path.exists('./model'): os.mkdir('./model') self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
def rect2square_imgs(self, imgs): print np.shape(imgs) ret_imgs = [] count = 0 for img in imgs: show_progress(count, len(imgs)) padded_img = self.rect_2_square(img) ret_imgs.append(padded_img) count += 1 return ret_imgs
def train(self, num_epochs, batch_size): num_batches = int(len(self.x_train) / batch_size) print('epochs : {}, number of baches : {}' \ .format(num_epochs, num_batches)) lap_times = [] for e in range(num_epochs): permute_idx = np.random.permutation(np.arange(50000)) lap_time = [] for b in range(num_batches): x_batch = self.x_train[permute_idx[b * batch_size:(b + 1) * batch_size]] y_batch = self.y_train[permute_idx[b * batch_size:(b + 1) * batch_size]] s_time = time.time() loss = self.net.train_on_batch(x_batch, y_batch) e_time = time.time() lap_time.append(e_time - s_time) if b % 10 == 0: preds = self.net.predict(x_batch) acc = np.mean(np.sum(preds * y_batch, axis=1)) show_progress(e + 1, b + 1, num_batches, loss, acc) lap_times.append(np.sum(lap_time)) # validation accs_val = [] for b in range(int(len(self.x_test) / batch_size)): x_val = self.x_test[b * batch_size:(b + 1) * batch_size] y_val = self.y_test[b * batch_size:(b + 1) * batch_size] preds_val = self.net.predict(x_val) acc_val = np.mean(np.sum(preds_val * y_val, axis=1)) accs_val.append(acc_val) print('\n{} epoch validation accuracy {}'.format(e + 1, np.mean(accs_val))) # save trained model self.net.save_weights('./model_keras/model_{}.h5'.format(e)) with open('./lap_record.csv', 'a') as f: f.write('keras') for lap in lap_times: f.write(',' + str(lap)) f.write('\n')
def train(self): train_start = time.time() for e in range(self.args.n_epoch): for i, (images, flows_gt) in enumerate(self.train_loader): images = images.numpy()/255.0 flows_gt = flows_gt.numpy() time_s = time.time() _, _, loss_reg, epe_final = \ self.sess.run([self.optimizer, self.global_step_update, self.loss_reg, self.epe_final], feed_dict = {self.images: images, self.flows_gt: flows_gt}) if i%20 == 0: batch_time = time.time() - time_s kwargs = {'loss':loss_reg, 'epe':epe_final, 'batch time':batch_time} show_progress(e+1, i+1, self.num_batches, **kwargs) loss_evals, epe_evals = [], [] for images_eval, flows_gt_eval in self.eval_loader: images_eval = images_eval.numpy()/255.0 flows_gt_eval = flows_gt_eval.numpy() flows_pyramid, loss_eval, epe_eval \ = self.sess.run([self.flows_pyramid, self.loss_reg, self.epe_final], feed_dict = {self.images: images_eval, self.flows_gt: flows_gt_eval}) loss_evals.append(loss_eval) epe_evals.append(epe_eval) g_step = self.sess.run(self.global_step) print(f'\r{e+1} epoch evaluation, loss: {np.mean(loss_evals)}, epe: {np.mean(epe_evals)}'\ +f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.') # visualize estimated optical flow if self.args.visualize: if not os.path.exists('./figure'): os.mkdir('./figure') flow_pyramid = [f_py[0] for f_py in flows_pyramid] flow_gt = flows_gt_eval[0] images_e = images_eval[0] vis_flow_pyramid(flow_pyramid, flow_gt, images_e, f'./figure/flow_{str(e+1).zfill(4)}.pdf') if not os.path.exists('./model'): os.mkdir('./model') self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
def train(self, continue_: bool=False): training_dataset = self.train_set model = self.Model(dropout_chance=self.dropout_chance).cuda() if continue_: model.load_state_dict(torch.load(self.model_file)) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=self.lr) loss_data, validation_accuracy_data, validation_loss_data, precision_data, recall_data = [], [], [], [], [] for epoch in range(1, self.epochs+1): epoch_loss = [] for images, targets in tqdm(training_dataset, desc="epoch", ncols=150): optimizer.zero_grad() images = images.float().cuda() targets = targets.float().cuda() predictions = model.train()(images) loss = criterion(predictions, targets) loss.backward() optimizer.step() epoch_loss.append(loss.item()) current_loss = np.mean(epoch_loss) current_val_accuracy, current_val_loss, precisions, recalls = self._validate(model, self.validation_set, threshold=self.evaluation_treshold) show_progress(self.epochs, epoch, current_loss, current_val_accuracy, current_val_loss) loss_data.append(current_loss) validation_accuracy_data.append(current_val_accuracy) validation_loss_data.append(current_val_loss) precision_data.append(precisions) recall_data.append(recalls) torch.save(model.state_dict(), self.model_file) print("\n finished training") precision_data = list(zip(*precision_data)) recall_data = list(zip(*recall_data)) plot(loss_data, validation_accuracy_data, validation_loss_data, precision_data, recall_data, \ classes=list(self.classes.keys()), save_to=("plots/training_" + self.model_file.split("_")[1].split(".")[0] + ".png"))
def main(): ## make output dirs os.makedirs(args.output_dir, exist_ok=True) if args.save_adv_image: os.makedirs(os.path.join(args.output_dir, 'adv_images'), exist_ok=True) ## input image paths and labels dataset = np.loadtxt('../data/val.txt', dtype=str) ind = np.random.randint(0, len(dataset), args.test_size) dataset = dataset[ind] ## Adv model FGSM = FastGradientSignTargeted(alpha=0.01, n_iter=args.n_iter_adv, aug=False, save=args.save_adv_image, save_path=os.path.join( args.output_dir, 'adv_images')) ## classifier C = Classifier() outs = [] correct = 0 count = 0 for i, (image_path, org_class) in enumerate(dataset): image = cv2.imread(os.path.join(args.input_dir, image_path), 1) org_class = int(org_class) target_class = np.random.randint(0, 1000) out_normal = C.ensemble_classify(image, args.n_iter_aug) flg, image, adv_class_confidence = FGSM.generate( image, org_class, target_class) if flg: out_adv = C.ensemble_classify(image, args.n_iter_aug) correct += int(out_adv == org_class) outs.append([ out_normal, out_adv, org_class, target_class, adv_class_confidence ]) count += 1 else: pass show_progress(i + 1, args.test_size, count, (correct / count)) np.savetxt(os.path.join(args.output_dir, 'log.txt'), np.array(outs))
def training(self, aug_list): max_iter = self.train_step + self.train_iter for step in range(self.train_step, max_iter): show_progress(step, max_iter) learning_rate = self._lr_scheduler(step) #### learning rate schcedule """ #### Traininig ### """ train_fetches = [ self.train_op, self.accuracy_op, self.cost_op, self.lr_op ] self.batch_xs, self.batch_ys = self.sess.run( [self.dataprovider.batch_xs, self.dataprovider.batch_ys]) if 'aug_lv1' in aug_list: self.batch_xs = np.asarray(self.batch_xs).astype('uint8') self.batch_xs = aug_lv1(self.batch_xs) if 'aug_random_clahe' in aug_list: self.batch_xs = np.asarray(self.batch_xs).astype('uint8') self.batch_xs = random_clahe_equalized(self.batch_xs) if 'aug_rotate' in aug_list: self.batch_xs = np.asarray(self.batch_xs).astype('uint8') self.batch_xs = random_rotate_90_180_270(self.batch_xs) if 'aug_clahe' in aug_list: self.batch_xs = np.asarray(self.batch_xs).astype('uint8') self.batch_xs = apply_clahe(self.batch_xs) if 'fundus_projection' in aug_list: self.batch_xs = np.asarray(self.batch_xs).astype('uint8') self.batch_xs = apply_projection(self.batch_xs) if np.max(self.batch_xs) > 1: self.batch_xs = self.batch_xs / 255. train_feedDict = { self.x_: self.batch_xs, self.y_: self.batch_ys, self.cam_ind: 0, self.lr_: learning_rate, self.is_training: True, self.global_step: step } _, self.train_acc, self.train_loss, self.learning_rate = self.sess.run( fetches=train_fetches, feed_dict=train_feedDict) # print 'train acc : {} loss : {}'.format(train_acc, train_loss) self.recorder.write_acc_loss('Train', self.train_loss, self.train_acc, step) self.recorder.write_lr(self.learning_rate, step) self.train_step = step
def train(self): training_dataset = self._create_dataloader(self.train_set) model = self.Model(dropout_chance=self.dropout_chance).cuda() model = freeze_layers(model) criterion = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=self.lr, momentum=0.9) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) loss_data, validation_accuracy_data, validation_loss_data = [], [], [] for epoch in tqdm(range(self.epochs), ncols=90, desc="progress"): epoch_loss = [] for images, targets in training_dataset: optimizer.zero_grad() images = images.float().cuda() targets = targets.float().cuda() predictions = model.train()(images) loss = criterion(predictions, targets) loss.backward() optimizer.step() epoch_loss.append(loss.item()) scheduler.step() # print("learning rate:", optimizer.param_groups[0]["lr"]) current_loss = np.mean(epoch_loss) current_val_accuracy, current_val_loss = self._validate(model) show_progress(self.epochs, epoch, current_loss, current_val_accuracy, current_val_loss) loss_data.append(current_loss) validation_accuracy_data.append(current_val_accuracy) validation_loss_data.append(current_val_loss) if epoch % 5: torch.save(model.state_dict(), "models/model_1.pt") print("\n finished training")
def train(model, training_data, validation_data, optimizer, device, opt): ''' Start training ''' log_train_file = None log_valid_file = None if opt.log: log_train_file = opt.log + '.train.log' log_valid_file = opt.log + '.valid.log' print('[Info] Training performance will be written to file: {} and {}'.format( log_train_file, log_valid_file)) with open(log_train_file, 'w') as log_tf, open(log_valid_file, 'w') as log_vf: log_tf.write('epoch,loss,ppl,accuracy\n') log_vf.write('epoch,loss,ppl,accuracy\n') history = [] valid_accus = [] for e in range(opt.epoch): train_loss, train_accu = train_epoch( model, training_data, optimizer, device, smoothing=opt.label_smoothing) valid_loss, valid_accu = eval_epoch(model, validation_data, device) history.append([train_loss, valid_loss, valid_accu]) valid_accus += [valid_accu] if valid_accu >= max(valid_accus): save_model(model, opt.result_dir) print('[Info] The checkpoint file has been updated.') if log_train_file and log_valid_file: with open(log_train_file, 'a') as log_tf, open(log_valid_file, 'a') as log_vf: log_tf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format( epoch=e, loss=train_loss, ppl=math.exp(min(train_loss, 100)), accu=100*train_accu)) log_vf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format( epoch=e, loss=valid_loss, ppl=math.exp(min(valid_loss, 100)), accu=100*valid_accu)) show_progress(e+1, opt.epoch, train_loss, valid_loss, valid_accu) save_history(history, opt.result_dir)
def load_phrases_from_file(name): _log('Trying to load data from file ' + name + '.txt') num_lines = sum(1 for line in open(name, 'r')) with open(name, 'r') as content_file: phrase_source_given_target = {} phrase_target_given_source = {} lex_weight_source_given_target = {} lex_weight_target_given_source = {} source_phrase_freqs = {} target_phrase_freqs = {} phrase_pair_freqs = {} point = num_lines / 100 if num_lines > 100 else 1 for i, line in enumerate(content_file): words = line.strip().split('|||') f = words[0] e = words[1] first_values = words[2].split() #after first ||| pfe = float(first_values[0]) pef = float(first_values[1]) lfe = float(first_values[2]) lef = float(first_values[3]) second_values = words[3].split() #after second ||| freqf = int(second_values[0]) freqe = int(second_values[1]) freqfe = int(second_values[2]) phrase_source_given_target[f, e] = pfe phrase_target_given_source[f, e] = pef lex_weight_source_given_target[f, e] = lfe lex_weight_target_given_source[f, e] = lef source_phrase_freqs[f, e] = freqf target_phrase_freqs[f, e] = freqe phrase_pair_freqs[f, e] = freqfe if i % point == 0: show_progress(i, num_lines, 40, 'LOADING PHRASES') return (phrase_source_given_target, phrase_target_given_source, lex_weight_source_given_target, lex_weight_target_given_source, source_phrase_freqs, target_phrase_freqs, phrase_pair_freqs)
def train(self): train_start = time.time() for e in range(self.args.num_epochs): # Training for i, (images, t) in enumerate(self.tloader): images = images.numpy() / 255.0 t = t.numpy() time_s = time.time() _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={ self.images: images, self.t: t }) if i % 20 == 0: batch_time = time.time() - time_s kwargs = {'loss': loss, 'batch time': batch_time} show_progress(e + 1, i + 1, self.num_batches, **kwargs) # Validation loss_vals = [] for images_val, t_val in self.vloader: images_val = images_val.numpy() / 255.0 t_val = t_val.numpy() images_t_syn, flow_val, loss_val \ = self.sess.run([self.images_t_syn, self.flow, self.loss], feed_dict = {self.images: images_val, self.t: t_val}) loss_vals.append(loss_val) print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}'\ +f', elapsed time: {time.time()-train_start} sec.') # Visualize estimated results if self.args.visualize: if not os.path.exists('./figure'): os.mkdir('./figure') vis_result(images_val[0], images_t_syn[0], flow_val[0], f'./figure/result_{e+1}epoch.png') # Save trained parameters if not os.path.exists('./model'): os.mkdir('./model') self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
def preprocess(in_path, out_folder, keep_factors, filters, line_map_path = None): """Preprocess a parallel corpus""" in_folder, in_basename = os.path.split(in_path) assert os.path.isdir(in_folder), 'invalid in folder: %s' % in_folder assert in_basename.strip() != '', 'empty basename' assert os.path.isdir(out_folder), 'invalid out folder: %s' % out_folder sc_basename = in_basename + '.sc' doc_basename = in_basename + '.doc' sc_in_path = os.path.join(in_folder, sc_basename) doc_in_path = os.path.join(in_folder, doc_basename) assert os.path.isfile(sc_in_path), 'invalid file: %s' % sc_in_path assert os.path.isfile(doc_in_path), 'invalid file: %s' % doc_in_path sc_out_path = os.path.join(out_folder, sc_basename) doc_out_path = os.path.join(out_folder, doc_basename) num_lines = sum(1 for line in open(sc_in_path, 'r')) point = num_lines / 100 if num_lines > 100 else 1 if line_map_path != None: line_map_out = open(line_map_path, 'w') else: line_map_out = None with open(sc_in_path, 'r') as sc_in, open(doc_in_path, 'r') as doc_in, \ open(sc_out_path, 'w') as sc_out, open(doc_out_path, 'w') as doc_out: for i, sc_line in enumerate(sc_in): if i % point == 0: utils.show_progress(i, num_lines, 40, 'PREPROCESSING') sc_words = sc_line.strip().split() docstring = doc_in.next().strip() sc_words = process_source_code(sc_words, keep_factors) docstring = process_docstring(docstring, filters) if docstring == '' or len(docstring.split()) > 100 or len(sc_words) > 100: continue sc_out.write('%s\n' % ' '.join(sc_words)) doc_out.write('%s\n' % docstring) if line_map_out != None: line_map_out.write('%d\n' % i) if line_map_out != None: line_map_out.close() utils.show_progress(1, 1, 40, 'PREPROCESSING') sys.stdout.write('\n')
def extract_from_video(video_path, output_dir, start_time=5, end_time=52, frame_increment=1): start_frame = FPS * start_time end_frame = FPS * end_time frame = 0 frames_processed = 0 total_frames_to_process = int((end_frame - start_frame) / frame_increment) filename_char_length = len('%d.png' % total_frames_to_process) cap = cv2.VideoCapture(video_path) obj_extractor = masker.ObjectExtractor(extract_type='simple') ret = True while ret and (frame <= end_frame): ret, img = cap.read() if frame <= start_frame: # Learn background pixels using background subtraction obj_extractor.learnBackground(img) else: # Extract object and crop obj = obj_extractor.extractObject(img, thresh=100) obj_framed = masker.cropBox(obj) # Save image filename = os.path.join( output_dir, ('%d.png' % frames_processed).zfill(filename_char_length)) cv2.imwrite(filename, obj_framed) # Print progress frames_processed += 1 show_progress(frames_processed, total_frames_to_process) frame += frame_increment cap.release() cv2.destroyAllWindows() print('')
def predict(self, N, intv, show_avg=True, show_pgr=True): """Get prediction metrics to evaluate how pruning influences the model performance :param N: number of inputs :param intv: display progression at given interval :param show_avg: display average metrics :param show_pgr: display step metrics""" avg_loss, avg_rec, avg_prec, avg_spec = 0., 0., 0., 0. avg_f1 = np.zeros((self.annos_.shape[-1], )) for i in range(N): feed_dict_tr = { self.x: np.expand_dims(self.imgs_[i], axis=0), self.y_true: np.expand_dims(self.annos_[i], axis=0), self.rate: 0., self.is_training: False } loss_ = self.sess.run(self.cost_reg, feed_dict=feed_dict_tr) f1_ = self.sess.run(self.f1_vec, feed_dict=feed_dict_tr) rec_ = self.sess.run(self.recall, feed_dict=feed_dict_tr) prec_ = self.sess.run(self.precision, feed_dict=feed_dict_tr) spec_ = self.sess.run(self.specificity, feed_dict=feed_dict_tr) avg_loss += loss_ / N avg_f1 += f1_ / N avg_rec += rec_ / N avg_prec += prec_ / N avg_spec += spec_ / N if i % intv == 0: if show_pgr is True: utils.show_progress('i ' + str(i), loss_, utils.array_to_text(f1_, 3), rec_, prec_, spec_, True) # convert f1 vector to text avg_f1_txt = utils.array_to_text(avg_f1, 3) if show_avg is True: utils.show_progress('Avg. results', avg_loss, avg_f1_txt, avg_rec, avg_prec, avg_spec, True) return avg_loss, avg_f1
def train(self, continue_: bool = False): model = Model().cuda() if continue_: model.load_state_dict(torch.load(self.model_path)) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=self.lr) train_loss, train_ssim, val_loss, val_ssim = [], [], [], [] for epoch in range(1, (self.epochs + 1)): epoch_loss = [] epoch_train_targets, epoch_train_predictions = [], [] for images in tqdm(self.train_set, desc="epoch", ncols=150): optimizer.zero_grad() images = images.float().cuda() predictions = model.train()(images, train_=True) loss = criterion(predictions, images) loss.backward() optimizer.step() epoch_loss.append(loss.item()) for i in range(predictions.size()[0]): epoch_train_targets.append( images[i].cpu().detach().numpy()) \ epoch_train_predictions.append(predictions[i].cpu().detach().numpy()) current_val_loss, current_val_ssim = self._validate( model, self.validation_set) current_train_loss = np.mean(epoch_loss) current_train_ssim = calculate_ssim(epoch_train_targets, epoch_train_predictions) show_progress(self.epochs, epoch, current_train_loss, current_train_ssim, current_val_loss, current_val_ssim) torch.save(model.state_dict(), self.model_path)
def read_lexical_weights(path): """Read the lexical weights from an _all_info.txt file""" lex_weight_source_given_target = {} lex_weight_target_given_source = {} num_lines = sum(1 for line in open(path, 'r')) point = num_lines / 100 if num_lines > 100 else 1 with open(path, 'r') as in_file: for i, line in enumerate(in_file): if i % point == 0: show_progress(i, num_lines, 40, 'LOADING LEXICAL WEIGHTS') source, target, probs, _freqs = line.strip().split(' ||| ') _pfe, _pef, lfe, lef = [float(x) for x in probs.split()] lex_weight_source_given_target[(source, target)] = lfe lex_weight_target_given_source[(source, target)] = lef show_progress(1, 1, 40, 'LOADING LEXICAL WEIGHTS') sys.stdout.write('\n') return lex_weight_source_given_target, lex_weight_target_given_source
def read_language_model(file_name, max_phrase_length, label='LOADING LANGUAGEMODEL'): """Read the language model""" language_model = {} document = open(file_name, 'r') num_lines = sum(1 for line in open(file_name, 'r')) point = num_lines / 100 if num_lines > 100 else 1 for i, line in enumerate(document): if i % point == 0: show_progress(i, num_lines, 40, label) segments = line.strip().split(' ||| ') phrase = segments[0] if len(phrase.split()) > max_phrase_length: continue prob = float(segments[1]) language_model[tuple(phrase.split())] = prob show_progress(1, 1, 40, label) sys.stdout.write('\n') document.close() return language_model
def trim_translation_model(full_translation_model, weights, top_translations): """Use the full_translation_model to create a smaller translation_model according to the restrictions of the weights and top_translations.""" translation_model = defaultdict(list) num_lines = len(full_translation_model) point = num_lines / 100 if num_lines > 100 else 1 for i, (source, target_probs) in enumerate(full_translation_model.iteritems()): if i % point == 0: show_progress(i, num_lines, 40, 'TRIM FULLTRANSLATIONMODEL') measure_target_probs = [] for target, probs in target_probs: measure = sum([prob * weights[i] for i, prob in \ enumerate(probs)]) if len(measure_target_probs) < top_translations: heapq.heappush(measure_target_probs, (measure, target, probs)) else: heapq.heappushpop(measure_target_probs, (measure, target, probs)) translation_model[source] = [(target, probs) for (_measure, target, probs) in measure_target_probs] show_progress(1, 1, 40, 'TRIM FULLTRANSLATIONMODEL') sys.stdout.write('\n') return translation_model
def read_translation_freqs(file_name, num_lines=None): """Read the number of source translations""" translation_freqs = defaultdict(int) document = open(file_name, 'r') if num_lines == None: num_lines = sum(1 for line in open(file_name, 'r')) point = num_lines / 100 if num_lines > 100 else 1 for i, line in enumerate(document): if i % point == 0: utils.show_progress(i, num_lines, 40, 'LOADING TRANSLATIONMODEL') segments = line.strip().split(' ||| ') source = segments[0] translation_freqs[source] += 1 utils.show_progress(1, 1, 40, 'LOADING TRANSLATIONMODEL') sys.stdout.write('\n') document.close() return translation_freqs
def fill_ntuple(): print('*** starting fill_ntuple() ') AtlasStyle.SetAtlasStyle() # # get key list # tfile = TFile(BasicConfig.workdir + 'systTree.root') # key_list_all = [key.GetName() for key in gDirectory.GetListOfKeys()] # regex = re.compile('PRW|JET|MET.*') # key_list = [key for key in key_list_all if re.match(regex, key)] # tfile.Close() # start making ttree #output_tfile = TFile('rhadron_v06-00-05.root', 'recreate') output_tfile = TFile(args.outputFile, 'recreate') # initialize TTree tree = TTree('rhadron', 'tree of rhadron properties for limit setting') # leaf variables from array import array mass_gluino = array('f', [0.]) delta_mass = array('f', [0.]) ctau = array('f', [0.]) eff = array('f', [0.]) eff_stat_error = array('f', [0.]) eff_syst_error = array('f', [0.]) eff_syst_error_ISR = array('f', [0.]) eff_syst_error_PRW = array('f', [0.]) eff_syst_error_JET = array('f', [0.]) eff_syst_error_MET = array('f', [0.]) # set branch tree.Branch("mGluino", mass_gluino, 'mGluino/F') tree.Branch("deltaM", delta_mass, 'deltaM/F') tree.Branch("ctau", ctau, 'ctau/F') tree.Branch("eff", eff, 'eff/F') tree.Branch("effRelStatErr", eff_stat_error, 'effRelStatErr/F') tree.Branch("effRelSystErr", eff_syst_error, 'effRelSystErr/F') tree.Branch("effRelSystErrISR", eff_syst_error_ISR, 'effRelSystErrISR/F') tree.Branch("effRelSystErrPRW", eff_syst_error_PRW, 'effRelSystErrPRW/F') tree.Branch("effRelSystErrJET", eff_syst_error_JET, 'effRelSystErrJET/F') tree.Branch("effRelSystErrMET", eff_syst_error_MET, 'effRelSystErrMET/F') #directory = '/afs/cern.ch/work/k/kmotohas/DisplacedVertex/DV_xAODAnalysis/submitDir_LSF/mc/hist_DVPlusMETSys/' #directory = BasicConfig.workdir + 'hist_DVPlusMETSys/' #directory = '/home/motohash/data/mc15_13TeV/DVPlusMETSys/v06-00-05/' #tfile = TFile(args.referenceFile) tfile = TFile(args.inputFile) key_list_all = [key.GetName() for key in gDirectory.GetListOfKeys()] print(len(key_list_all), key_list_all) regex = re.compile('Nominal|PRW|JET|MET.*') key_list = [key for key in key_list_all if re.match(regex, key)] print(len(key_list), key_list) tfile.Close() #c = 299792458. # [m/s] #tchains = [[dsid, TChain('Nominal', str(dsid))] for dsid in range(402700, 402740)] #tchains = [[dsid, TChain('Nominal', str(dsid))] for dsid in mc.parameters.keys()] #tchains = [[dsid, [TChain(key, key+str(dsid)) for key in key_list]] for dsid in mc.parameters.keys()] dsids = [args.DSID] tchains = [[dsid, [TChain(key, key + str(dsid)) for key in key_list]] for dsid in dsids] cut_flow = [ 'Initial', 'Trigger', 'Filter', 'Cleaning', 'GRL', 'PV', 'NCB veto', 'MET', 'DV Selection' ] #systematic_tables = TFile('systematic_summary_SimpleMETFilter.root', 'open') #table = TH1F() m_MET_min = 250. # loop over dsid try: for dsid, each_tchain in tchains: print('') print(dsid) #index = 0 #for input in glob(directory + 'systTree_' + str(dsid) + '_*.root'): for tchain in each_tchain: #for input_file in glob(directory+'systTree_mc15_13TeV.' + str(dsid) + '*.root'): # print(input_file) # tchain.Add(input_file) tchain.Add(args.inputFile) mass_gluino[0] = mc.parameters[dsid]['g'] delta_mass[0] = mass_gluino[0] - mc.parameters[dsid]['chi0'] n_reweight_steps = 40 xmin = 1. xmax = 10000. ratio = xmax / xmin bins = [] for ii in range(n_reweight_steps): bins.append( xmax * 10**(ii * TMath.Log10(xmax / xmin) / n_reweight_steps - TMath.Log10(xmax / xmin))) #n_passed_w1 = [0. for _ in range(n_reweight_steps)] #n_passed = [0. for _ in range(n_reweight_steps)] from array import array limitsLifetime = array('d', bins) # tefficiency = [[ TEfficiency('tefficiency_{0}_{1}_{2}'.format(key, step, dsid), ';c#tau [mm]; Event-level efficiency', len(limitsLifetime) - 1, limitsLifetime) for step in range(n_reweight_steps) ] for key in key_list] #h_syst_diff = [[TH1F('syst_diff_{0}_{1}_{2}'.format(key, step, dsid), ';;(N_{shifted} - N_{nominal}) / N_{nominal}', len(key_list)+1, 0, len(key_list)+1) # for step in range(n_reweight_steps)] for key in key_list] h_syst_diff = [ TH1F('syst_diff_{0}_{1}_{2}'.format(key, step, dsid), ';;(N_{shifted} - N_{nominal}) / N_{nominal}', len(key_list) + 1, 0, len(key_list) + 1) for step in range(n_reweight_steps) ] for step in range(n_reweight_steps): for jj, key in enumerate(key_list): h_syst_diff[step].GetXaxis().SetBinLabel(jj + 1, key) h_syst_diff[step].GetXaxis().SetBinLabel( len(key_list) + 1, 'ISR_Py2MG_SF_removed') n_events_weighted = [[0. for _ in range(n_reweight_steps)] for key in key_list] n_events_weighted_noISR = [[0. for _ in range(n_reweight_steps)] for key in key_list] # loop over tchain of each systematic for ii, tchain in enumerate(each_tchain): entries = tchain.GetEntries() print('*** processed systs: {0} / {1}'.format( ii, len(each_tchain))) #n_reweight_steps = 50 #for step in range(n_reweight_steps): # tefficiency.append(TEfficiency('tefficiency_'+str(step), ';c#tau [mm]; Event-level efficiency', # len(limitsLifetime)-1, limitsLifetime)) # h_syst_diff.append(TH1F('syst_diff_'+str(step), ';;(N_{shifted} - N_{nominal}) / N_{nominal}', len(key_list)+1, 0, len(key_list)+1)) for step in range(n_reweight_steps): tefficiency[ii][step].SetUseWeightedEvents() #for jj, key in enumerate(key_list): # h_syst_diff[ii][step].GetXaxis().SetBinLabel(jj+1, key) #h_syst_diff[ii][step].GetXaxis().SetBinLabel(len(key_list)+1, 'ISR_Py2MG_SF_removed') # h_syst_diff[step].SetMinimum(-0.3) # h_syst_diff[step].SetMaximum(0.3) if entries == 0: continue for entry in range(entries): #if entry % 1000 == 0: # print('* processed events: {0} / {1}'.format(entry, entries)) utils.show_progress(entry, entries) #if entry == 605: # break # get the next tree in the chain and verify ientry = tchain.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = tchain.GetEntry(entry) if nb <= 0: continue event_weight = tchain.McEventWeight * tchain.PileupWeight * tchain.ISRWeight ctau_MC = TMath.C( ) * mc.parameters[dsid]['t'] * 1e-9 # [nm]->[m] for step in range(n_reweight_steps): #print(tchain.GetListOfBranches()) pass_all = pass_event_cut(tchain, len(cut_flow) - 1) if pass_all: matched = False for idv in range(len(tchain.DV_x)): matched = matched or match( tchain, idv, cut=1.0) #print('pass_all is ', pass_all, ', matched is ', matched) pass_all = pass_all and matched target_ctau = xmax * 10**( step * TMath.Log10(xmax / xmin) / n_reweight_steps - TMath.Log10(xmax / xmin)) * 1e-3 # [mm]->[m] #print(target_ctau) lifetime_weight = get_lifetime_weight( tchain, target_ctau, ctau_MC) n_events_weighted[ii][ step] += event_weight * lifetime_weight n_events_weighted_noISR[ii][ step] += tchain.McEventWeight * tchain.PileupWeight * lifetime_weight #print(event_weight) #print(event_weight*lifetime_weight) #print(pass_all) tefficiency[ii][step].FillWeighted( pass_all, event_weight * lifetime_weight, target_ctau * 1e3) # end of loop over entries of each TChain # end loop over tchain of each systematic for step in range(n_reweight_steps): n_events_nominal = [0. for _ in range(n_reweight_steps)] for ii in range(len(each_tchain)): # if Nominal TTree, set syst diff of ISR as well if ii == 0: n_events_nominal[step] = n_events_weighted[ii][step] if n_events_nominal[step] < 1e-4: #h_syst_diff[ii][step].SetBinContent(len(key_list)+1, 0) h_syst_diff[step].SetBinContent( len(key_list) + 1, 0) else: #h_syst_diff[ii][step].SetBinContent(len(key_list)+1, h_syst_diff[step].SetBinContent( len(key_list) + 1, float((n_events_weighted_noISR[ii][step] - n_events_nominal[step]) / n_events_nominal[step])) #float((n_events_weighted[ii][step]-n_events_nominal[step])/n_events_nominal[step])) diff = n_events_weighted[ii][step] - n_events_nominal[step] #print(n_events_nominal, n_events_weighted, diff) if n_events_nominal[step] < 1e-4: #h_syst_diff[ii][step].SetBinContent(ii+1, 0) h_syst_diff[step].SetBinContent(ii + 1, 0) else: #h_syst_diff[ii][step].SetBinContent(ii+1, float(diff/n_events_nominal[step])) h_syst_diff[step].SetBinContent( ii + 1, float(diff / n_events_nominal[step])) #systematic_tables.GetObject('systematic_table_'+str(dsid), table) #syst_up, syst_down = root_sum_squares(table, 'x') #systs = root_sum_squares(h_syst_diff[ii][step], 'x') systs = root_sum_squares(h_syst_diff[step], 'x') #eff_syst_error[0] = max(syst_up, syst_down) # TODO #eff_syst_error[0] = (syst_up**2 + syst_down**2)**0.5 #### ############################ eff_syst_error[0] = (systs[0]**2 + systs[1]**2)**0.5 eff_syst_error_ISR[0] = systs[2] eff_syst_error_PRW[0] = systs[3] eff_syst_error_JET[0] = systs[4] eff_syst_error_MET[0] = systs[5] if eff_syst_error[0] > 1: print('eff_syst_error[0] = ' + str(eff_syst_error[0])) #eff_syst_error[0] = 1. #for step in range(n_reweight_steps): #for ct in bins: # print(len(bins), bins) #print(n_total_w1[step], n_total[step]) #sf = n_total_w1[step] / n_total[step] #n_passed[step] *= sf #n_total[step] *= sf #eff_no_weight, stat_error_no_weight = utils.division_error_propagation(n_passed_w1[step], n_total_w1[step]) #ctau[0] = TMath.Power(300, step/float(n_reweight_steps-1)) * 1e-3 # [mm]->[m] ct = bins[step] #print(ct) ctau[0] = ct * 1e-3 # [mm]->[m] #print(ctau[0]) bin_ctau = tefficiency[0][step].GetPassedHistogram().FindBin( ct) print(tefficiency[0][step].GetPassedHistogram().GetBinContent( bin_ctau)) print(tefficiency[0][step].GetTotalHistogram().GetBinContent( bin_ctau)) #print(bin_ctau) #print('ct', ct, 'bin_ctau', bin_ctau) eff[0] = tefficiency[0][step].GetEfficiency(bin_ctau) print(eff[0]) abs_stat_error = ( tefficiency[0][step].GetEfficiencyErrorLow(bin_ctau)**2 + tefficiency[0][step].GetEfficiencyErrorUp(bin_ctau)** 2)**0.5 #eff[0], abs_stat_error = utils.binomial_ratio_and_error(n_passed[step], n_total[step]) #if eff[0] < 1e-4: if eff[0] == 0: eff_stat_error[ 0] = 1. # avoid zero division error and divergence continue # not fill values in tree if efficiency is too small else: eff_stat_error[0] = abs_stat_error / eff[0] #if eff_stat_error[0] > 1: # print(n_passed[step], n_total[step], abs_stat_error, eff[0], eff_stat_error[0]) # eff_stat_error[0] = 1. tree.Fill() # end loop over n_reweight_steps except KeyboardInterrupt: pass output_tfile.Write() output_tfile.Close()
def create_cut_flow(): AtlasStyle.SetAtlasStyle() #input_tfile = utils.open_tfile(BasicConfig.workdir + 'DVTree_NTuple_data15_13TeV.root') #tree = input_tfile.Get('DVTree_NTuple') input_tfile = utils.open_tfile(args.inputFile) tree = input_tfile.Get('Nominal') cut_flow = [ 'Initial', 'Trigger', 'Filter', 'Cleaning', 'GRL', 'PV', 'NCB veto', 'MET', 'DV Selection' ] h_cut_flow = TH1F('cut_flow', ';;Number of Events', len(cut_flow), 0, len(cut_flow)) #h_cut_flow2 = TH1F('cut_flow2', ';;Number of Events', len(cut_flow), 0, len(cut_flow)) for bin, cut in enumerate(cut_flow): h_cut_flow.GetXaxis().SetBinLabel(bin + 1, cut) # entries = tree.GetEntries() for entry in range(entries): #if entry % 10000 == 0: # print('*** processed {0} out of {1}'.format(entry, entries)) utils.show_progress(entry, entries) #if entry == 100000: # break # get the next tree in the chain and verify ientry = tree.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = tree.GetEntry(entry) if nb <= 0: continue event_weight = tree.McEventWeight * tree.PileupWeight * tree.ISRWeight for step, cut in enumerate(cut_flow): if step == 0: h_cut_flow.Fill(cut, event_weight) #h_cut_flow2.Fill(cut, event_weight) #elif step == 2: # if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 2): # h_cut_flow.Fill(cut, event_weight) # if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 2): # h_cut_flow2.Fill(cut, event_weight) #elif step == 6: # if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 6): # h_cut_flow.Fill(cut, event_weight) # if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 6): # h_cut_flow2.Fill(cut, event_weight) #elif step == 7: # #have_signal_like_dv = False # #for dv_index in range(len(tree.DV_passVtxCuts)): # # have_signal_like_dv = have_signal_like_dv or tree.DV_passVtxCuts[dv_index] # #if pass_event_cut(tree, 7) and tree.MET > 220 and have_signal_like_dv: # if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 7): # h_cut_flow.Fill(cut, event_weight) # if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 7): # h_cut_flow2.Fill(cut, event_weight) elif pass_event_cut(tree, step): h_cut_flow.Fill(cut, event_weight) #h_cut_flow2.Fill(cut, event_weight) output = TFile('cut_flow.root', 'recreate') h_cut_flow.Write() output.Close()
def check_n_vertices_vs_met_threshold(): AtlasStyle.SetAtlasStyle() #input_tfile = utils.open_tfile(BasicConfig.workdir + 'DVTree_NTuple_data15_13TeV.root') input_tfile = utils.open_tfile(args.inputFile) #tree = input_tfile.Get('DVTree_NTuple') tree = input_tfile.Get('Nominal') #bin_name = ['Base', 'Trigger', 'Filter', 'MET200', 'MET220', 'MET250'] bin_name = ['Base', 'Trigger', 'Filter', 'MET250'] h_nevents_cut = TH1F('nevents_cut', ';;Double Ratio', len(bin_name), 0, len(bin_name)) h_nevents_all = TH1F('nevents_all', ';;Double Ratio', len(bin_name), 0, len(bin_name)) h_ndvs_cut = { ntracks: TH1F('ndvs_cut_' + str(ntracks), ';;Double Ratio', len(bin_name), 0, len(bin_name)) for ntracks in range(2, 6) } h_ndvs_all = { ntracks: TH1F('ndvs_all_' + str(ntracks), ';;Double Ratio', len(bin_name), 0, len(bin_name)) for ntracks in range(2, 6) } for bin, name in enumerate(bin_name): h_nevents_cut.GetXaxis().SetBinLabel(bin + 1, name) h_nevents_all.GetXaxis().SetBinLabel(bin + 1, name) for ntracks in range(2, 6): h_ndvs_cut[ntracks].GetXaxis().SetBinLabel(bin + 1, name) h_ndvs_all[ntracks].GetXaxis().SetBinLabel(bin + 1, name) entries = tree.GetEntries() for entry in range(entries): utils.show_progress(entry, entries) #if entry == 1000000: # break # get the next tree in the chain and verify ientry = tree.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = tree.GetEntry(entry) if nb <= 0: continue if not utils.basic_event_selection(tree): continue # fill all for name in bin_name: h_nevents_all.Fill(name, 1.) for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): if utils.basic_dv_selection(tree, dv_index): if DV_nTracks < 6: h_ndvs_all[DV_nTracks].Fill(name, 1.) else: h_ndvs_all[5].Fill(name, 1.) # h_nevents_cut.Fill('Base', 1.) for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): if utils.basic_dv_selection(tree, dv_index): if DV_nTracks < 6: h_ndvs_cut[DV_nTracks].Fill('Base', 1.) else: h_ndvs_cut[5].Fill('Base', 1.) # Trigger if not tree.PassCut1: continue h_nevents_cut.Fill('Trigger', 1.) for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): if utils.basic_dv_selection(tree, dv_index): if DV_nTracks < 6: h_ndvs_cut[DV_nTracks].Fill('Trigger', 1.) else: h_ndvs_cut[5].Fill('Trigger', 1.) # Filter if not tree.PassCut2: continue h_nevents_cut.Fill('Filter', 1.) for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): if utils.basic_dv_selection(tree, dv_index): if DV_nTracks < 6: h_ndvs_cut[DV_nTracks].Fill('Filter', 1.) else: h_ndvs_cut[5].Fill('Filter', 1.) ## #if not tree.MET > 200: # continue #h_nevents_cut.Fill('MET200', 1.) #for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): # if pass_base_dv_selection(tree, dv_index): # if DV_nTracks < 6: # h_ndvs_cut[DV_nTracks].Fill(name, 1.) # else: # h_ndvs_cut[5].Fill(name, 1.) ## #if not tree.MET > 220: # continue #h_nevents_cut.Fill('MET220', 1.) #for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): # if pass_base_dv_selection(tree, dv_index): # if DV_nTracks < 6: # h_ndvs_cut[DV_nTracks].Fill(name, 1.) # else: # h_ndvs_cut[5].Fill(name, 1.) # if not tree.MET > 250: continue h_nevents_cut.Fill('MET250', 1.) for dv_index, DV_nTracks in enumerate(tree.DV_nTracks): if utils.basic_dv_selection(tree, dv_index): if DV_nTracks < 6: h_ndvs_cut[DV_nTracks].Fill('MET250', 1.) else: h_ndvs_cut[5].Fill('MET250', 1.) # output_tfile = TFile(args.outputFile, 'recreate') # #canvas = TCanvas('canvas', 'canvas', 1200, 800) #h_ndvs_all_clone = h_ndvs_all[2].Clone('unit') #h_ndvs_all_clone.Divide(h_ndvs_all[2]) #h_ndvs_all_clone.SetMaximum(3) #h_ndvs_all_clone.SetMinimum(0) #h_ndvs_all_clone.Draw() #legend = TLegend(0.5, 0.6, 0.85, 0.85) h_nevents_cut.Write() h_nevents_all.Write() for DV_nTracks in range(2, 6): h_ndvs_cut[DV_nTracks].Write() h_ndvs_all[DV_nTracks].Write() # # h_ndvs_cut[DV_nTracks].Sumw2() # h_ndvs_cut[DV_nTracks].Divide(h_ndvs_all[DV_nTracks]) # h_ndvs_cut[DV_nTracks].Divide(h_nevents_cut) # h_ndvs_cut[DV_nTracks].Multiply(h_nevents_all) # utils.decorate_histogram(h_ndvs_cut[DV_nTracks], BasicConfig.colors[DV_nTracks]) # h_ndvs_cut[DV_nTracks].Draw('same,hist') # legend.AddEntry(h_ndvs_cut[DV_nTracks], # '('+str(DV_nTracks)+'trk-DVs(cut)/2trk-DVs(all))/(Events(cut)/Events(all))', 'l') #utils.decorate_legend(legend) #legend.Draw() #utils.save_as(canvas, BasicConfig.plotdir + 'nVerts_met_dependency') #output = TFile('nVerts_met_dependency.root', 'recreate') #canvas.Write() output_tfile.Close()
def show_progress(self, path): if not self.arguments.json_output: utils.show_progress(self.i) elif self.arguments.progress_output and self.i % utils.FRAMES_TO_INFORM == 0: utils.inform_json_progress(self.i, path)
def main(): global irandom parser = argparse.ArgumentParser() parser.add_argument('-f', '--inputFiles', type=str, help='comma separated input files') parser.add_argument('-o', '--outputFile', type=str, help='output file name') args = parser.parse_args() #input_files = args.inputFiles #print(args.inputFiles) input_files = args.inputFiles.split(',') print('*** input files: ') print(input_files) print('*** output file: ') print(args.outputFile) output_root = TFile(args.outputFile, 'recreate') book_histograms() chain = TChain('Nominal', 'Nominal Tree') for input_file in input_files: chain.Add(input_file) #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_v3.root', 'open') #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_NonVetoOnly_v3.root', 'open') # 3 GeV mass cut #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-00.root', 'open') #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_v06-00-00.root', 'open') #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_v06-00-01.root', 'open') #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-01.root', 'open') f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-03.root', 'open') trkTemplate = f.Get('DVtrkTemplate') nTrkTemplates = trkTemplate.GetEntries() entries = chain.GetEntries() print('* Number of entries = {}'.format(entries)) irandom = int(gRandom.Uniform()*nTrkTemplates) try: for entry in range(entries): #if not entry % 10000: # print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1))) utils.show_progress(entry, entries) #irandom = int(gRandom.Uniform()*nTrkTemplates) #print(irandom) #if entry == 1000: # break # get the next tree in the chain and verify ientry = chain.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = chain.GetEntry(entry) if nb <= 0: continue if chain.EventNumber == 752668466: continue event_weight = chain.McEventWeight * chain.PileupWeight * chain.ISRWeight m_nEvents.Fill(0.5, event_weight) #if chain.MET > 200: # continue m_nEvents_MET.Fill(0.5, event_weight) if utils.basic_event_selection(chain): m_posPV.SetXYZ(chain.PV_x, chain.PV_y, chain.PV_z) for idv in range(len(chain.DV_x)): #if basic_dv_selection(chain, idv) and chain.DV_nTracks[idv] < 7 and chain.DV_Region[idv] >= 0: if utils.basic_dv_selection(chain, idv) and chain.DV_Region[idv] >= 0: dvInfo(chain, idv) #print('orig'+str(m_tlvDV.M())) m_DVPV.SetVect(m_posDV - m_posPV) dvBkgEst(trkTemplate, nTrkTemplates, chain.MET) irandom += 1 except KeyboardInterrupt: pass output_root.cd() m_nEvents.Write() m_nEvents_MET.Write() for itrk in range(2, 7): for region in range(12): m_BkgEst_data_iTrk_Region[itrk][region].Write() if itrk == 6: m_BkgEst_data_iTrk_Region[7][region].Write() m_BkgEst_data_loMET_iTrk_Region[itrk][region].Write() m_BkgEst_data_hiMET_iTrk_Region[itrk][region].Write() m_BkgEst_data_NoCross_iTrk_Region[itrk][region].Write() m_BkgEst_data_NoCross_maxAngle_iTrk_Region[itrk][region].Write() m_BkgEst_data_NoCross_maxDeltaR_iTrk_Region[itrk][region].Write() m_BkgEst_data_NoCross_maxDeltaEta_iTrk_Region[itrk][region].Write() # m_AvgAngleDVmass_iTrk_Region[itrk][region].Write() m_maxAngleDVmass_iTrk_Region[itrk][region].Write() m_dRDVmass_iTrk_Region[itrk][region].Write() m_dEtaDVmass_iTrk_Region[itrk][region].Write() if itrk == 2: continue m_BkgEst_Cross_iTrk_Region[itrk][region].Write() m_BkgEst_Cross_Angle_iTrk_Region[itrk][region].Write() m_BkgEst_Cross_DeltaR_iTrk_Region[itrk][region].Write() m_BkgEst_Cross_NoLargeAngle_iTrk_Region[itrk][region].Write() m_BkgEst_Cross_LargeAngle_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_NoLargeAngle_iTrk_Region[itrk][region].Write() #m_BkgEst_CrossDeltaPhi_LargeAngle_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDelta_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDelta_DeltaR_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDelta_Angle_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_loMET_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_hiMET_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_th08_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_th10_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_th15_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_pt20_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_pt15_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_pt10_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_pt5_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_dR10_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_dR15_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_dR20_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaR_dR25_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_DeltaEta_iTrk_Region[itrk][region].Write() m_BkgEst_CrossDeltaPhi_Angle_iTrk_Region[itrk][region].Write() #for ii,flavor in enumerate(flavors): for jj,prop in enumerate(props): for itrk in range(3, 7): for region in range(12): #if jj == 0: m_TrkProp_Pt_iTrk_Region[jj][itrk][region].Write() m_TrkProp_Angle_iTrk_Region[jj][itrk][region].Write() #h_cut_flow_dv.Write() #h_DVmass_Ntrk.Write() #h_DVmass_Ntrk_MatVeto.Write() #h_DVmass_Ntrk_MatVeto_MET220.Write() #h_DVmass_Ntrk_MatVeto_MET250.Write() #for ntrk in range(2, 7): # for reg in range(12): # h_DVmass_Ntrk_Region[ntrk][reg].Write() output_root.Close()
def train(): # load dataset # ========================== trainloader, testloader = load_CIFAR10() N = len(trainloader) print('# of trainset: ', N) device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu') cnn = CNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(cnn.parameters()) cnn.to(device) criterion.to(device) # train # ========================== loss_history = [] acc_history = [] time_history = [] for epoch in range(opt.epochs): loss_cum = 0.0 acc_cum = 0.0 time_cum = 0.0 for i, (imgs, labels) in enumerate(trainloader): start = time.time() imgs, labels = imgs.to(device), labels.to(device) cnn.zero_grad() outputs = cnn(imgs) loss = criterion(outputs, labels) loss.backward() optimizer.step() time_cum += time.time() - start loss_cum += loss.item() acc = accuracy(outputs, labels) acc_cum += acc show_progress(epoch+1, i+1, N, loss.item(), acc) print('\t mean acc: %f' % (acc_cum/N)) loss_history.append(loss_cum/N) acc_history.append(acc_cum/N) time_history.append(time_cum) # test accuracy cnn.eval() correct, total = 0, 0 for imgs, labels in testloader: imgs, labels = imgs.to(device), labels.to(device) outputs = cnn(imgs) _, pred = torch.max(outputs, 1) total += labels.size(0) correct += (pred == labels).sum().item() print('======================') print('epoch: %d batch size: %d' % (opt.epochs, opt.batch_size)) print('mean accuracy on %d test images: %f' % (total, correct/total)) # save histories # with open('./loss_pytorch.csv', 'w') as f: # f.write('pytorch') # for l in loss_history: # f.write(',' + str(l)) # f.write('\n') # print('saved loss history') # with open('./acc_pytorch.csv', 'w') as f: # f.write('pytorch') # for l in acc_history: # f.write(',' + str(l)) # f.write('\n') # print('saved acc history') with open('./lap_record.csv', 'a') as f: f.write('pytorch') for t in time_history: f.write(',' + str(t)) f.write('\n') # save models torch.save(cnn.state_dict(), 'model_torch.pth')
test_labs = cls2onehot(test_labs, depth=10) train_imgs = train_imgs test_imgs = test_imgs # Setting models models = Models(n_classes=10, img_shape=(32, 32, 3)) # Get batch xs , ys # Augmenatation # batch_xs = aug_lv3(batch_xs) # batch_xs = batch_xs / 255. # plot_images(batch_xs , batch_ys) # Training eval = Eval() for step in range(cfg.max_iter): show_progress(step, cfg.max_iter) batch_xs, batch_ys = next_batch(train_imgs, train_labs, 60) train_cost = models.training(batch_xs, batch_ys, cfg.lr) if step % cfg.ckpt == 0: print 'Validation ... ' pred, pred_cls, eval_cost, accuracy = models.eval( test_imgs, test_labs) #pred_op, pred_cls, eval_cost, accuracy = models.eval(dp.val_imgs , dp.val_labs,) #acc = eval.get_acc(sess_op=models.sess, preds_op=models.pred[:,0], batch_size=60, x_op=models.x_, # phase_train=models.phase_train) print accuracy models.save_models('models/{}.ckpt'.format(step)) print 'train cost : {}'.format(train_cost) print 'test cost : {}'.format(eval_cost)
def lexical_weights(phrase_to_internals, lex_source_given_target, lex_target_given_source, target_lex_freqs): """ p_w(f|e) = max_{a} Prod_{i=1}^n 1 / (j | (i, j) from a) sum w(f_i|e_j) """ source_given_target = {} target_given_source = {} def weight_l1_given_l2(l1_phrase, l2_phrase, alignment, l1_given_l2, reverse, logprob=True): """calculate lexical weight for source|target or target|source""" weight = 0 alignment = [(a, b) for (b, a) in alignment] if reverse else alignment _sum = sum_logs if logprob else sum for i, l1_word in enumerate(l1_phrase): # Determine all words in target phrase aligned to i aligned_to_i = [b for a, b in alignment if a == i] if not aligned_to_i: # Handle non-aligned words pair = ('NULL', l1_word) if reverse else (l1_word, 'NULL') p_l1_given_l2 = l1_given_l2[pair] elif len(aligned_to_i) == 1: # Case added for speed l2_word = l2_phrase[aligned_to_i[0]] pair = (l2_word, l1_word) if reverse else (l1_word, l2_word) p_l1_given_l2 = l1_given_l2[pair] else: # ONELINERS ON MULTIPLE LINES FTW list_of_probs = [] for j in aligned_to_i: pair = (l2_phrase[j], l1_word) if reverse else (l1_word, l2_phrase[j]) list_of_probs.append(l1_given_l2[pair]) p_l1_given_l2 = _sum(list_of_probs) if logprob: p_l1_given_l2 += math.log(1.0 / len(aligned_to_i)) else: p_l1_given_l2 /= len(aligned_to_i) if p_l1_given_l2 > 0 and logprob or p_l1_given_l2 > 1 and not logprob: print p_l1_given_l2, l1_word, [l2_phrase[j] for j in aligned_to_i] raise #p_l1_given_l2 = \ # _sum([l1_given_l2[((l2_phrase[j], l1_word) if reverse \ # else (l1_word, l2_phrase[j]))] # for j in aligned_to_i]) + \ # (1 / math.log(len(aligned_to_i))) # Weight is the product of prob for each word if logprob: weight += p_l1_given_l2 else: weight *= p_l1_given_l2 if weight > 1: print weight print l1_phrase, l2_phrase, alignment, reverse raise return weight num_phrases = len(phrase_to_internals) point = num_phrases / 100 if num_phrases > 100 else 1 for i, (phrase_pair, possible_internals) in enumerate( phrase_to_internals.iteritems()): if i % point == 0: show_progress(i, num_phrases, 40, 'LEXICAL WEIGHTS') weight_source_given_target = float('-inf') weight_target_given_source = float('-inf') source_phrase = phrase_pair[0].split() target_phrase = phrase_pair[1].split() for internal in possible_internals: # Calc weight for the current alignment temp_weight_source_given_target = \ weight_l1_given_l2(source_phrase, target_phrase, internal, lex_source_given_target, reverse=False) # Reverse alignment for target_given_source temp_weight_target_given_source = \ weight_l1_given_l2(target_phrase, source_phrase, internal, lex_target_given_source, reverse=True) if temp_weight_source_given_target > weight_source_given_target: weight_source_given_target = temp_weight_source_given_target if temp_weight_target_given_source > weight_target_given_source: weight_target_given_source = temp_weight_target_given_source source_given_target[phrase_pair] = weight_source_given_target target_given_source[phrase_pair] = weight_target_given_source show_progress(num_phrases, num_phrases, 40, 'LEXICAL WEIGHTS') sys.stdout.write('\n') return source_given_target, target_given_source
restore_model = './models/vgg_11/18/model-564' tester = Tester.Tester(None) tester._reconstruct_model(restore_model) tester.n_classes = 2 """ best second # [1,33](73) [1,32](72) 1.png # [13,43](875) [13,42](874) 3.png # [3,20](146) [3,21](147) 2.png """ imgs_list = [] for p in range(1, 13): utils.show_progress(p, 12) for i in range(7): try: test_imgs = np.load( '../Find_Wally/wally_raspCam_np/second/{}_{}.npy'.format(p, i)) test_imgs = aug.apply_clahe(test_imgs) test_imgs = random_rotate_90_180_270(test_imgs, 3) #test_imgs = np.load('../Find_Wally/wally_raspCam/wally_1_1.npy') test_labs = [0] * len(test_imgs) test_labs = cls2onehot(test_labs, 2) test_imgs = test_imgs / 255. tester.validate(test_imgs, test_labs, 60, 0, False) indices = np.where([np.asarray(tester.pred_all)[:, 0] > 0.8])[1] print indices
def extract_phrase_pair_freqs(alignments_file, source_file, target_file, max_length, max_lines): """Extract and count the frequency of all phrase pairs given an alignment between sentences. Keyword arguments: alignments_file -- file that contains the alignments source_file -- file containing sentences from language 1 target_file -- file containing sentences from language 2 max_length -- maximum length of phrase pairs max_lines -- maximum number of lines to use for phrase pair extraction Returns counts of phrase-pairs, counts of phrases in source and counts of phrases in target ((phrase_pair_freqs, source_phrase_freqs, target_phrase_freqs), (lex_pair_freqs, source_lex_freqs, target_lex_freqs), phrase_to_internals) Returns (3-tuple): phrase pair frequencies (3-tuple): pair frequencies, source frequencies, target frequencies lexical (word pair) frequencies (3-tuple): pair frequencies, source frequencies, target frequencies internal alignments for phrase pairs """ # phrase frequencies phrase_pair_freqs = defaultdict(int) source_phrase_freqs = defaultdict(int) target_phrase_freqs = defaultdict(int) # lexical frequencies lex_pair_freqs = defaultdict(int) source_lex_freqs = defaultdict(int) target_lex_freqs = defaultdict(int) # map phrase pair to possible internal word alignments phrase_to_internals = defaultdict(set) # open files num_lines = sum(1 for line in open(alignments_file)) if max_lines == float('inf'): max_lines = num_lines else: max_lines = int(max_lines) alignments = open(alignments_file, 'r') source = open(source_file, 'r') target = open(target_file, 'r') point = max_lines / 100 if max_lines > 100 else 1 for i, str_align in enumerate(alignments): if i % point == 0: show_progress(i, max_lines, 40, 'PHRASE EXTRACTION') if i == max_lines: break # read files source_words = source.next().strip().split() target_words = target.next().strip().split() source_length = len(source_words) target_length = len(target_words) align = str_to_alignments(str_align) # word pair frequencies for source_index, target_index in align: word_pair = (source_words[source_index], target_words[target_index]) lex_pair_freqs[word_pair] += 1 source_lex_freqs[word_pair[0]] += 1 target_lex_freqs[word_pair[1]] += 1 phrase_to_internal = extract_alignments(set(align), source_length, target_length, max_length) for phrase_pair, internal_alignment in extract_phrase_pairs_gen( phrase_to_internal, source_words, target_words): # phrase pair frequencies phrase_pair_freqs[phrase_pair] += 1 source_phrase_freqs[phrase_pair[0]] += 1 target_phrase_freqs[phrase_pair[1]] += 1 # phrase pair to possible internal word alignments phrase_to_internals[phrase_pair].add(frozenset(internal_alignment)) unaligned, unaligned2 = unaligned_words(align, source_length, target_length) unaligned.extend(unaligned2) for phrase_pair in unaligned_phrase_pairs_gen(unaligned, source_words, target_words): lex_pair_freqs[phrase_pair] += 1 source_lex_freqs[phrase_pair[0]] += 1 target_lex_freqs[phrase_pair[1]] += 1 show_progress(max_lines, max_lines, 40, 'PHRASE EXTRACTION') sys.stdout.write('\n') alignments.close() source.close() target.close() return ((phrase_pair_freqs, source_phrase_freqs, target_phrase_freqs), (lex_pair_freqs, source_lex_freqs, target_lex_freqs), phrase_to_internals)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--inputFiles', type=str, help='comma separated input files') parser.add_argument('-o', '--outputFile', type=str, help='output file name') args = parser.parse_args() # print "Writing a tree" # #f = TFile("DVtrkTemplate_tree.root", "recreate") f = TFile(args.outputFile, "recreate") t = TTree("DVtrkTemplate", "track template for DVMultiTrkBkg") # # create 1 dimensional float arrays (python's float datatype corresponds to c++ doubles) # as fill variables pt = array.array('f', [0.]) eta = array.array('f', [0.]) phi = array.array('f', [0.]) d_eta = array.array('f', [0.]) d_phi = array.array('f', [0.]) dv_r = array.array('f', [0.]) dv_z = array.array('f', [0.]) dv_phi = array.array('f', [0.]) dv_eta = array.array('f', [0.]) dv_nTracks = array.array('i', [0]) dv_m = array.array('f', [0.]) region = array.array('i', [0]) met = array.array('f', [0.]) # # create the branches and assign the fill-variables to them t.Branch('pt', pt, 'pt/F') t.Branch('eta', eta, 'eta/F') t.Branch('phi', phi, 'phi/F') t.Branch('d_eta', d_eta, 'd_eta/F') t.Branch('d_phi', d_phi, 'd_phi/F') t.Branch('dv_r', dv_r, 'dv_r/F') t.Branch('dv_z', dv_z, 'dv_z/F') t.Branch('dv_phi', dv_phi, 'dv_phi/F') t.Branch('dv_eta', dv_eta, 'dv_eta/F') t.Branch('dv_nTracks', dv_nTracks, 'dv_nTracks/I') t.Branch('dv_m', dv_m, 'dv_m/F') t.Branch('region', region, 'region/I') t.Branch('met', met, 'met/F') # chain = TChain('Nominal', 'Nominal Tree') #for input_file in input_files: chain.Add(args.inputFiles) # create some random numbers, fill them into the fill varibles and call Fill() entries = chain.GetEntries() #print('* Number of entries = {}'.format(entries)) try: for entry in range(entries): #if not entry % 100000: # print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1))) utils.show_progress(entry, entries) #if entry == 100000: # break # get the next tree in the chain and verify ientry = chain.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = chain.GetEntry(entry) if nb <= 0: continue if chain.EventNumber == 752668466: continue if not utils.basic_event_selection(chain): continue pos_PV = TVector3(chain.PV_x, chain.PV_y, chain.PV_z) for idv in range(len(chain.DV_x)): if not utils.basic_dv_selection(chain, idv): continue region[0] = chain.DV_Region[idv] pos_DV = TVector3(chain.DV_x[idv], chain.DV_y[idv], chain.DV_z[idv]) dv_m[0] = chain.DV_m[idv] dv_r[0] = pos_DV.Perp() dv_z[0] = pos_DV.Z() dv_phi[0] = pos_DV.Phi() dv_eta[0] = pos_DV.Eta() #if chain.DV_nTracks[idv] < 3 or (chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]): #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 3: if chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 2: #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0: #if chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]: continue dv_nTracks[0] = chain.DV_nTracks[idv] tlv_DVPV = TLorentzVector() tlv_DVPV.SetVect(pos_DV - pos_PV) for itrk in range(chain.DV_nTracks[idv]): #tlv = TLorentzVector() pt[0] = chain.DV_track_pt_wrtSV[idv][itrk] eta[0] = chain.DV_track_eta_wrtSV[idv][itrk] phi[0] = chain.DV_track_phi_wrtSV[idv][itrk] #tlv.SetPtEtaPhiM(pt[0], eta[0], phi[0], 139.57/1e3) d_eta[0] = eta[0] - tlv_DVPV.Eta() d_phi[0] = phi[0] - tlv_DVPV.Phi() t.Fill() except KeyboardInterrupt: pass # write the tree into the output file and close the file f.Write() f.Close()
""" import glob , os from image_processing import ImageProcessing import numpy as np from PIL import Image import utils root_root_dir = '/mnt/Find_Wally/wally_dataset' second_dir=os.path.join( root_root_dir ,'second_dataset') thrid_dir = os.path.join(root_root_dir , 'third_dataset') root_save_dir = 'wally_raspCam_np' img_prc = ImageProcessing() sec_paths = glob.glob(os.path.join(second_dir , '*.jpg')) trd_paths = glob.glob(os.path.join(thrid_dir , '*.jpg')) assert len(sec_paths) != 0 and len(trd_paths) != 0 tmp_dict = {'second' : sec_paths , 'thrid' : trd_paths} for key in tmp_dict: paths = tmp_dict[key] save_dir = os.path.join(root_save_dir, key) utils.makedir(save_dir) for ind,path in enumerate(paths) : utils.show_progress(ind , len(paths)) name = utils.get_name(path) img = np.asarray(Image.open(path).convert('RGB')) # Cropping imgs , coords = img_prc.stride_cropping(img , 200 , 200 , 400 ,400) save_path = os.path.join(save_dir,name.replace('jpg', 'npy')) np.save(save_path , imgs )
def train(self, num_epochs, batch_size, gpu_id): if gpu_id is not None: self.net.to_gpu(gpu_id) self.x_test = to_gpu(self.x_test, gpu_id) self.y_test = to_gpu(self.y_test, gpu_id) num_batches = int(len(self.x_train) / batch_size) print('epochs : {}, number of batches : {}' \ .format(num_epochs, num_batches)) lap_times = [] for e in range(num_epochs): permute_idx = np.random.permutation(np.arange(50000)) lap_time = [] for b in range(num_batches): x_batch = self.x_train[permute_idx[b * batch_size:(b + 1) * batch_size]] y_batch = self.y_train[permute_idx[b * batch_size:(b + 1) * batch_size]] s_time = time.time() if gpu_id is not None: x_batch = to_gpu(x_batch, gpu_id) y_batch = to_gpu(y_batch, gpu_id) logits = self.net(x_batch) loss = F.softmax_cross_entropy(logits, y_batch) self.net.cleargrads() loss.backward() self.opt.update() e_time = time.time() lap_time.append(e_time - s_time) if b % 10 == 0: loss = to_cpu(loss.data) acc = F.accuracy(logits, y_batch) acc = to_cpu(acc.data) show_progress(e + 1, b + 1, batch_size, loss, acc) lap_times.append(np.sum(lap_time)) # validation accs_val = [] for b in range(int(len(self.x_test) / batch_size)): x_val = self.x_test[b * batch_size:(b + 1) * batch_size] y_val = self.y_test[b * batch_size:(b + 1) * batch_size] preds_val = self.net(x_val) acc_val = F.accuracy(preds_val, y_val) accs_val.append(to_cpu(acc_val.data)) print('\n{} epoch validation accuracy {}'.format( e + 1, np.mean(accs_val))) # save trained model serializers.save_npz('./model_chainer/chainer{}.model'.format(e), self.net) with open('./lap_record.csv', 'a') as f: f.write('chainer') for lap in lap_times: f.write(',' + str(lap)) f.write('\n')
#n_passed = [0. for _ in range(n_reweight_steps)] from array import array limitsLifetime = array('d', ibins) #n_passed = TH1F('n_passed', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime) #n_total_w1 = [0. for _ in range(n_reweight_steps)] #n_total = [0. for _ in range(n_reweight_steps)] #n_total = TH1F('n_total', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime) tefficiency = TEfficiency('tefficiency', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime) entries = chain.GetEntries() print('* Number of entries = {}'.format(entries)) try: for entry in range(entries): #if not entry % 100000: # print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1))) utils.show_progress(entry, entries) #if entry == 100000: # break # get the next tree in the chain and verify ientry = chain.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = chain.GetEntry(entry) if nb <= 0: continue if chain.EventNumber == 752668466: continue event_weight = chain.McEventWeight * chain.PileupWeight * chain.ISRWeight h_mu.Fill(chain.CorrectedMu, event_weight) h_mu_pileupWeight.Fill(chain.CorrectedMu, chain.PileupWeight)
# check summary shape , and value val_acc, val_loss, pred = sess.run([accuracy, cost, pred_op], feed_dict=test_feedDict) val_acc_mean.append(val_acc) val_loss_mean.append(val_loss) pred_all.append(pred) val_acc_mean = np.mean(np.asarray(val_acc_mean)) val_loss_mean = np.mean(np.asarray(val_loss_mean)) summary = tf.Summary(value=[ tf.Summary.Value(tag='Test batch_size 1 loss', simple_value=float(val_loss_mean)), tf.Summary.Value(tag='Test batch_size 1 acc', simple_value=float(val_acc_mean)), tf.Summary.Value(tag='Train batch_size 1 loss', simple_value=float(train_loss)), tf.Summary.Value(tag='Train batch_size 1 acc', simple_value=float(train_acc)) ]) writer.add_summary(summary, step) print 'Validation Batch Size : 1 Val accuracy : {} loss : {} '.format( val_acc_mean, val_loss_mean) utils.show_progress(step, max_iter) batch_xs, batch_ys = data.next_batch(train_imgs, train_labs, batch_size) train_acc, train_loss, _ = sess.run([accuracy, cost, train_op], feed_dict={ x_: batch_xs, y_: batch_ys, phase_train: True })
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--inputFiles', type=str, help='comma separated input files') parser.add_argument('-o', '--outputFile', type=str, help='output file name') args = parser.parse_args() # print "Writing a tree" # #f = TFile("DVtrkTemplate_tree.root", "recreate") f = TFile(args.outputFile, "recreate") t = TTree("DVtrkTemplate", "track template for DVMultiTrkBkg") # # create 1 dimensional float arrays (python's float datatype corresponds to c++ doubles) # as fill variables pt = array.array('f', [0.]) eta = array.array('f', [0.]) phi = array.array('f', [0.]) d_eta = array.array('f', [0.]) d_phi = array.array('f', [0.]) dv_r = array.array('f', [0.]) dv_z = array.array('f', [0.]) dv_phi = array.array('f', [0.]) dv_eta = array.array('f', [0.]) dv_nTracks = array.array('i', [0]) dv_m = array.array('f', [0.]) region = array.array('i', [0]) met = array.array('f', [0.]) # # create the branches and assign the fill-variables to them t.Branch('pt', pt, 'pt/F') t.Branch('eta', eta, 'eta/F') t.Branch('phi', phi, 'phi/F') t.Branch('d_eta', d_eta, 'd_eta/F') t.Branch('d_phi', d_phi, 'd_phi/F') t.Branch('dv_r', dv_r, 'dv_r/F') t.Branch('dv_z', dv_z, 'dv_z/F') t.Branch('dv_phi', dv_phi, 'dv_phi/F') t.Branch('dv_eta', dv_eta, 'dv_eta/F') t.Branch('dv_nTracks', dv_nTracks, 'dv_nTracks/I') t.Branch('dv_m', dv_m, 'dv_m/F') t.Branch('region', region, 'region/I') t.Branch('met', met, 'met/F') # chain = TChain('Nominal', 'Nominal Tree') #for input_file in input_files: chain.Add(args.inputFiles) # create some random numbers, fill them into the fill varibles and call Fill() entries = chain.GetEntries() #print('* Number of entries = {}'.format(entries)) try: for entry in range(entries): #if not entry % 100000: # print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1))) utils.show_progress(entry, entries) #if entry == 100000: # break # get the next tree in the chain and verify ientry = chain.LoadTree(entry) if ientry < 0: break # copy next entry into memory and verify nb = chain.GetEntry(entry) if nb <= 0: continue if chain.EventNumber == 752668466: continue if not utils.basic_event_selection(chain): continue pos_PV = TVector3(chain.PV_x, chain.PV_y, chain.PV_z) for idv in range(len(chain.DV_x)): if not utils.basic_dv_selection(chain, idv): continue region[0] = chain.DV_Region[idv] pos_DV = TVector3(chain.DV_x[idv], chain.DV_y[idv], chain.DV_z[idv]) dv_m[0] = chain.DV_m[idv] dv_r[0] = pos_DV.Perp() dv_z[0] = pos_DV.Z() dv_phi[0] = pos_DV.Phi() dv_eta[0] = pos_DV.Eta() #if chain.DV_nTracks[idv] < 3 or (chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]): #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 3: if chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 2: #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0: #if chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]: continue dv_nTracks[0] = chain.DV_nTracks[idv] tlv_DVPV = TLorentzVector() tlv_DVPV.SetVect(pos_DV - pos_PV) for itrk in range(chain.DV_nTracks[idv]): #tlv = TLorentzVector() pt[0] = chain.DV_track_pt_wrtSV[idv][itrk] eta[0] = chain.DV_track_eta_wrtSV[idv][itrk] phi[0] = chain.DV_track_phi_wrtSV[idv][itrk] #tlv.SetPtEtaPhiM(pt[0], eta[0], phi[0], 139.57/1e3) d_eta[0] = eta[0] - tlv_DVPV.Eta() d_phi[0] = phi[0] - tlv_DVPV.Phi() t.Fill() except KeyboardInterrupt: pass # write the tree into the output file and close the file f.Write() f.Close()