Beispiel #1
0
def read_translation_model(file_name, feature_weights, top_translations,
        max_phrase_length):
    """Read the translation model"""
    translation_model = defaultdict(list)
    document = open(file_name, 'r')

    num_lines = sum(1 for line in open(file_name, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    for i, line in enumerate(document):
        if i % point == 0:
            show_progress(i, num_lines, 40, 'LOADING TRANSLATIONMODEL')

        segments = line.strip().split(' ||| ')
        source = tuple(segments[0].split())
        if len(source) > max_phrase_length:
            continue
        target = tuple(segments[1].split())
        probs = tuple([float(prob) for prob in segments[2].split()])
        # weighted sum of conditional probabilities and lexical weights
        measure = sum([prob * feature_weights[i] for i, prob in \
                       enumerate(probs)])
        if len(translation_model[source]) < top_translations:
            heapq.heappush(translation_model[source], (measure, target,
                                                       probs))
        else:
            heapq.heappushpop(translation_model[source], (measure,
                                                          target, probs))
    show_progress(1, 1, 40, 'LOADING TRANSLATIONMODEL')
    sys.stdout.write('\n')
    document.close()

    return {s: [(t, p) for (m, t, p) in mtp] for (s, mtp) in
            translation_model.iteritems()}
Beispiel #2
0
    def train(self):
        train_start = time.time()
        for e in range(self.args.num_epochs):
            for i in range(self.num_batches):
                time_s = time.time()
                _, loss, epe, reg_loss = self.sess.run(
                    [self.optimizer, self.loss, self.epe, self.weights_l2])

                if i % 20 == 0:
                    batch_time = time.time() - time_s
                    kwargs = {
                        'loss': loss,
                        'reg_loss': reg_loss,
                        'epe': epe,
                        'batch time': batch_time
                    }
                    show_progress(e + 1, i + 1, self.num_batches, **kwargs)

            loss_vals, epe_vals, reg_vals = [], [], []
            self.sess.run([self.initializer_v])
            for i in range(self.num_batches_v):
                image0_v, image1_v, flows_val, loss_val, epe_val, reg_val \
                  = self.sess.run([self.image0_v, self.image1_v, self.flow_v,
                                   self.loss_v, self.epe_v, self.weights_l2])
                loss_vals.append(loss_val)
                epe_vals.append(epe_val)
                reg_vals.append(reg_val)

            g_step = self.sess.run(self.global_step)
            print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}, reg_loss:{np.mean(reg_vals)}, epe: {np.mean(epe_vals)}'\
                  + f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.')

            if not os.path.exists('./model'):
                os.mkdir('./model')
            self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
Beispiel #3
0
def read_freqs(path, label = 'FREQS'):
    """Read freqs from an _extracted_lexwords.txt or an _extracted_phrases.txt
    file"""
    phrase_pair_freqs = defaultdict(int)
    source_freqs = defaultdict(int)
    target_freqs = defaultdict(int)

    num_lines = sum(1 for line in open(path, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    with open(path, 'r') as in_file:
        for i, line in enumerate(in_file):
            if i % point == 0:
                show_progress(i, num_lines, 40, label)

            source, target, freqs = line.strip().split(' ||| ')
            source_freq, target_freq, pair_freq = [int(x) for x in freqs.split()]
            phrase_pair_freqs[(source, target)] = pair_freq
            source_freqs[source] = source_freq
            target_freqs[target] = target_freq

    show_progress(1, 1, 40, label)
    sys.stdout.write('\n')

    return phrase_pair_freqs, source_freqs, target_freqs
Beispiel #4
0
def read_full_translation_model(file_name, max_phrase_length):
    """Read the full translation model taking into account the maximal phrase
    length"""
    translation_model = defaultdict(list)
    document = open(file_name, 'r')

    num_lines = sum(1 for line in open(file_name, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    for i, line in enumerate(document):
        if i % point == 0:
            show_progress(i, num_lines, 40, 'LOADING FULLTRANSLATIONMODEL')

        segments = line.strip().split(' ||| ')
        source = tuple(segments[0].split())
        if len(source) > max_phrase_length:
            continue
        target = tuple(segments[1].split())
        probs = tuple([float(prob) for prob in segments[2].split()])

        translation_model[source].append((target, probs))

    show_progress(1, 1, 40, 'LOADING FULLTRANSLATIONMODEL')
    sys.stdout.write('\n')
    document.close()

    return translation_model
Beispiel #5
0
def conditional_probabilities(phrase_pair_freqs, source_phrase_freqs,
                              target_phrase_freqs, label, logprob):
    """Calculate conditional probability of phrase pairs in both directions.

    Input:
    phrase_pair_freqs -- counts of phrase pairs
    source_phrase_freqs -- counts of phrases in language 1
    target_phraes_freqs -- counts of phrases in lanuage 2
    label -- used to indicate current process
    logprob -- boolean, if true, probabilities are used in log-form

    Returns 2 dictionaries mapping phrase pair to P(source|target) and
    P(target|source)
    """
    source_given_target = {}
    target_given_source = {}
    num_phrases = len(phrase_pair_freqs)
    point = num_phrases / 100 if num_phrases > 100 else 1

    prob = lambda f1, f2: math.log(float(f1) / f2) if logprob else \
            lambda f1, f2: float(f1) / f2

    for i, (phrase_pair, freq) in enumerate(phrase_pair_freqs.iteritems()):
        if i % point == 0:
            show_progress(i, num_phrases, 40, label)
        try:
            source_given_target[phrase_pair] = prob(freq, source_phrase_freqs[phrase_pair[0]])
            target_given_source[phrase_pair] = prob(freq, target_phrase_freqs[phrase_pair[1]])
        except:
            _log('phrase pair : {}\ni : {}'.format(phrase_pair, i))
            raise

    show_progress(num_phrases, num_phrases, 40, label)
    sys.stdout.write('\n')
    return source_given_target, target_given_source
Beispiel #6
0
    def resize_npImages(self, np_imgs, resize):

        ret_imgs = []
        for i, img in enumerate(np_imgs):
            utils.show_progress(i, len(np_imgs))
            img = Image.fromarray(img).convert('RGB').resize(resize)
            img = np.asarray(img)
            ret_imgs.append(img)
        return np.asarray(ret_imgs)
Beispiel #7
0
def extract_lexical_reordering_counts(alignments_file, source_file,
                                target_file, max_length, max_lines=None):
    """
    for the left-to-right and right-to-left models calculate:
    c(m,(f,e)), c(s,(f,e)), c(d_l,(f,e)), c(d_r,(f,e))
    where m=monotone, s=swap, d_l=left-discontinuous, d_r=right-discontinuous
    """
    # open files
    num_lines = sum(1 for line in open(alignments_file))
    max_lines = int(max_lines) if max_lines else num_lines

    alignments = open(alignments_file, 'r')
    source = open(source_file, 'r')
    target = open(target_file, 'r')

    reordering_counts = {} # maps phrase pair to its reordering counts

    point = max_lines / 100 if max_lines > 100 else 1
    for i, str_align in enumerate(alignments):
        if i % point == 0:
            show_progress(i, max_lines, 40, 'LEXICAL REORDERING')
        if i == max_lines:
            break

        source_words = source.next().strip().split()
        target_words = target.next().strip().split()
        source_length = len(source_words)
        target_length = len(target_words)

        align = str_to_alignments(str_align)
        word_phrase_pairs = set([word_pair*2 for word_pair in align])
        # phrase to internal is a dict mapping phrase ranges (source_min,
        # target_min, source_max, target_max) to internal word alignments []
        phrase_to_internal = extract_alignments(set(align), source_length,
                                               target_length, max_length)

        try:
            phrase_pairs = set(phrase_to_internal.keys())
            for left_phrase_range in phrase_pairs:
                # phrase based counting events
                for right_phrase_range in phrase_pairs - word_phrase_pairs:
                    update_count(left_phrase_range, right_phrase_range,
                        reordering_counts, source_words, target_words)

                # word based counting events
                for right_phrase_range in word_phrase_pairs:
                    update_count(left_phrase_range, right_phrase_range,
                        reordering_counts, source_words, target_words)

        except:
            print 'source: \n%s' % ' '.join(source_words)
            print 'target: \n%s' % ' '.join(target_words)
            print 'alignment: \n%s' % str_align
            raise

    show_progress(max_lines, max_lines, 40, 'LEXICAL REORDERING')
    return reordering_counts
Beispiel #8
0
    def train(self):
        train_start = time.time()
        for e in range(self.args.num_epochs):
            for i, (images, flows_gt) in enumerate(self.train_loader):
                images = images.numpy() / 255.0
                flows_gt = flows_gt.numpy()

                time_s = time.time()
                _, _, loss, epe = \
                  self.sess.run([self.optimizer, self.global_step_update,
                                 self.loss, self.epe],
                                feed_dict = {self.images: images, self.flows_gt: flows_gt})

                if i % 20 == 0:
                    batch_time = time.time() - time_s
                    kwargs = {
                        'loss': loss,
                        'epe': epe,
                        'batch time': batch_time
                    }
                    show_progress(e + 1, i + 1, self.num_batches, **kwargs)

            loss_vals, epe_vals = [], []
            for images_val, flows_gt_val in self.val_loader:
                images_val = images_val.numpy() / 255.0
                flows_gt_val = flows_gt_val.numpy()

                flows, loss_val, epe_val \
                    = self.sess.run([self.flows, self.loss, self.epe],
                                    feed_dict = {self.images: images_val,
                                                 self.flows_gt: flows_gt_val})
                loss_vals.append(loss_val)
                epe_vals.append(epe_val)

            g_step = self.sess.run(self.global_step)
            print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}, epe: {np.mean(epe_vals)}'\
                  +f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.')

            # visualize estimated optical flow
            if self.args.visualize:
                if not os.path.exists('./figure'):
                    os.mkdir('./figure')
                # Estimated flow values are downscaled, rescale them compatible to the ground truth
                flow_set = []
                for l, flow in enumerate(flows):
                    upscale = 20 / 2**(self.args.num_levels - l)
                    flow_set.append(flow[0] * upscale)
                flow_gt = flows_gt_val[0]
                images_v = images_val[0]
                vis_flow_pyramid(flow_set, flow_gt, images_v,
                                 f'./figure/flow_{str(e+1).zfill(4)}.pdf')

            if not os.path.exists('./model'):
                os.mkdir('./model')
            self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
Beispiel #9
0
    def rect2square_imgs(self, imgs):

        print np.shape(imgs)
        ret_imgs = []
        count = 0
        for img in imgs:
            show_progress(count, len(imgs))
            padded_img = self.rect_2_square(img)
            ret_imgs.append(padded_img)
            count += 1
        return ret_imgs
Beispiel #10
0
    def train(self,
              num_epochs,
              batch_size):

        num_batches = int(len(self.x_train) / batch_size)
        print('epochs : {}, number of baches : {}' \
              .format(num_epochs, num_batches))

        lap_times = []
        for e in range(num_epochs):
            permute_idx = np.random.permutation(np.arange(50000))
            lap_time = []
            for b in range(num_batches):

                x_batch = self.x_train[permute_idx[b * batch_size:(b + 1) * batch_size]]
                y_batch = self.y_train[permute_idx[b * batch_size:(b + 1) * batch_size]]

                s_time = time.time()
                loss = self.net.train_on_batch(x_batch, y_batch)
                e_time = time.time()
                lap_time.append(e_time - s_time)

                if b % 10 == 0:
                    preds = self.net.predict(x_batch)
                    acc = np.mean(np.sum(preds * y_batch, axis=1))
                    show_progress(e + 1, b + 1, num_batches, loss, acc)

            lap_times.append(np.sum(lap_time))

            # validation
            accs_val = []
            for b in range(int(len(self.x_test) / batch_size)):
                x_val = self.x_test[b * batch_size:(b + 1) * batch_size]
                y_val = self.y_test[b * batch_size:(b + 1) * batch_size]
                preds_val = self.net.predict(x_val)
                acc_val = np.mean(np.sum(preds_val * y_val, axis=1))
                accs_val.append(acc_val)
            print('\n{} epoch validation accuracy {}'.format(e + 1, np.mean(accs_val)))

            # save trained model
            self.net.save_weights('./model_keras/model_{}.h5'.format(e))

        with open('./lap_record.csv', 'a') as f:
            f.write('keras')
            for lap in lap_times:
                f.write(',' + str(lap))
            f.write('\n')
Beispiel #11
0
    def train(self):
        train_start = time.time()
        for e in range(self.args.n_epoch):
            for i, (images, flows_gt) in enumerate(self.train_loader):
                images = images.numpy()/255.0
                flows_gt = flows_gt.numpy()
                
                time_s = time.time()
                _, _, loss_reg, epe_final = \
                  self.sess.run([self.optimizer, self.global_step_update,
                                 self.loss_reg, self.epe_final],
                                feed_dict = {self.images: images, self.flows_gt: flows_gt})

                if i%20 == 0:
                    batch_time = time.time() - time_s
                    kwargs = {'loss':loss_reg, 'epe':epe_final, 'batch time':batch_time}
                    show_progress(e+1, i+1, self.num_batches, **kwargs)

            loss_evals, epe_evals = [], []
            for images_eval, flows_gt_eval in self.eval_loader:
                images_eval = images_eval.numpy()/255.0
                flows_gt_eval = flows_gt_eval.numpy()

                flows_pyramid, loss_eval, epe_eval \
                    = self.sess.run([self.flows_pyramid, self.loss_reg, self.epe_final],
                                    feed_dict = {self.images: images_eval,
                                                 self.flows_gt: flows_gt_eval})
                loss_evals.append(loss_eval)
                epe_evals.append(epe_eval)
                
            g_step = self.sess.run(self.global_step)
            print(f'\r{e+1} epoch evaluation, loss: {np.mean(loss_evals)}, epe: {np.mean(epe_evals)}'\
                  +f', global step: {g_step}, elapsed time: {time.time()-train_start} sec.')
            
            # visualize estimated optical flow
            if self.args.visualize:
                if not os.path.exists('./figure'):
                    os.mkdir('./figure')
                flow_pyramid = [f_py[0] for f_py in flows_pyramid]
                flow_gt = flows_gt_eval[0]
                images_e = images_eval[0]
                vis_flow_pyramid(flow_pyramid, flow_gt, images_e,
                                 f'./figure/flow_{str(e+1).zfill(4)}.pdf')

            if not os.path.exists('./model'):
                os.mkdir('./model')
            self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
    def train(self, continue_: bool=False):        
        training_dataset = self.train_set

        model = self.Model(dropout_chance=self.dropout_chance).cuda()

        if continue_:
            model.load_state_dict(torch.load(self.model_file))

        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)

        loss_data, validation_accuracy_data, validation_loss_data, precision_data, recall_data = [], [], [], [], []
        for epoch in range(1, self.epochs+1):

            epoch_loss = []
            for images, targets in tqdm(training_dataset, desc="epoch", ncols=150):
                optimizer.zero_grad()

                images = images.float().cuda()
                targets = targets.float().cuda()

                predictions = model.train()(images)

                loss = criterion(predictions, targets)
                loss.backward()
                optimizer.step()

                epoch_loss.append(loss.item())

            current_loss = np.mean(epoch_loss)
            current_val_accuracy, current_val_loss, precisions, recalls = self._validate(model, self.validation_set, threshold=self.evaluation_treshold)

            show_progress(self.epochs, epoch, current_loss, current_val_accuracy, current_val_loss)

            loss_data.append(current_loss)
            validation_accuracy_data.append(current_val_accuracy)
            validation_loss_data.append(current_val_loss)
            precision_data.append(precisions)
            recall_data.append(recalls)

            torch.save(model.state_dict(), self.model_file)

        print("\n finished training")
        precision_data = list(zip(*precision_data))
        recall_data = list(zip(*recall_data))
        plot(loss_data, validation_accuracy_data, validation_loss_data, precision_data, recall_data,  \
                    classes=list(self.classes.keys()), save_to=("plots/training_" + self.model_file.split("_")[1].split(".")[0] + ".png"))                                        
def main():
    ## make output dirs
    os.makedirs(args.output_dir, exist_ok=True)
    if args.save_adv_image:
        os.makedirs(os.path.join(args.output_dir, 'adv_images'), exist_ok=True)

    ## input image paths and labels
    dataset = np.loadtxt('../data/val.txt', dtype=str)
    ind = np.random.randint(0, len(dataset), args.test_size)
    dataset = dataset[ind]

    ## Adv model
    FGSM = FastGradientSignTargeted(alpha=0.01,
                                    n_iter=args.n_iter_adv,
                                    aug=False,
                                    save=args.save_adv_image,
                                    save_path=os.path.join(
                                        args.output_dir, 'adv_images'))

    ## classifier
    C = Classifier()
    outs = []
    correct = 0
    count = 0
    for i, (image_path, org_class) in enumerate(dataset):
        image = cv2.imread(os.path.join(args.input_dir, image_path), 1)
        org_class = int(org_class)
        target_class = np.random.randint(0, 1000)

        out_normal = C.ensemble_classify(image, args.n_iter_aug)

        flg, image, adv_class_confidence = FGSM.generate(
            image, org_class, target_class)

        if flg:
            out_adv = C.ensemble_classify(image, args.n_iter_aug)
            correct += int(out_adv == org_class)
            outs.append([
                out_normal, out_adv, org_class, target_class,
                adv_class_confidence
            ])
            count += 1
        else:
            pass
        show_progress(i + 1, args.test_size, count, (correct / count))

    np.savetxt(os.path.join(args.output_dir, 'log.txt'), np.array(outs))
Beispiel #14
0
    def training(self, aug_list):

        max_iter = self.train_step + self.train_iter
        for step in range(self.train_step, max_iter):
            show_progress(step, max_iter)
            learning_rate = self._lr_scheduler(step)
            #### learning rate schcedule
            """ #### Traininig  ### """
            train_fetches = [
                self.train_op, self.accuracy_op, self.cost_op, self.lr_op
            ]
            self.batch_xs, self.batch_ys = self.sess.run(
                [self.dataprovider.batch_xs, self.dataprovider.batch_ys])

            if 'aug_lv1' in aug_list:
                self.batch_xs = np.asarray(self.batch_xs).astype('uint8')
                self.batch_xs = aug_lv1(self.batch_xs)
            if 'aug_random_clahe' in aug_list:
                self.batch_xs = np.asarray(self.batch_xs).astype('uint8')
                self.batch_xs = random_clahe_equalized(self.batch_xs)
            if 'aug_rotate' in aug_list:
                self.batch_xs = np.asarray(self.batch_xs).astype('uint8')
                self.batch_xs = random_rotate_90_180_270(self.batch_xs)
            if 'aug_clahe' in aug_list:
                self.batch_xs = np.asarray(self.batch_xs).astype('uint8')
                self.batch_xs = apply_clahe(self.batch_xs)
            if 'fundus_projection' in aug_list:
                self.batch_xs = np.asarray(self.batch_xs).astype('uint8')
                self.batch_xs = apply_projection(self.batch_xs)

            if np.max(self.batch_xs) > 1:
                self.batch_xs = self.batch_xs / 255.
            train_feedDict = {
                self.x_: self.batch_xs,
                self.y_: self.batch_ys,
                self.cam_ind: 0,
                self.lr_: learning_rate,
                self.is_training: True,
                self.global_step: step
            }
            _, self.train_acc, self.train_loss, self.learning_rate = self.sess.run(
                fetches=train_fetches, feed_dict=train_feedDict)
            # print 'train acc : {} loss : {}'.format(train_acc, train_loss)
            self.recorder.write_acc_loss('Train', self.train_loss,
                                         self.train_acc, step)
            self.recorder.write_lr(self.learning_rate, step)
            self.train_step = step
Beispiel #15
0
    def train(self):
        training_dataset = self._create_dataloader(self.train_set)
        model = self.Model(dropout_chance=self.dropout_chance).cuda()
        model = freeze_layers(model)

        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=self.lr,
                                    momentum=0.9)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 10)

        loss_data, validation_accuracy_data, validation_loss_data = [], [], []

        for epoch in tqdm(range(self.epochs), ncols=90, desc="progress"):
            epoch_loss = []

            for images, targets in training_dataset:
                optimizer.zero_grad()

                images = images.float().cuda()
                targets = targets.float().cuda()

                predictions = model.train()(images)

                loss = criterion(predictions, targets)
                loss.backward()
                optimizer.step()

                epoch_loss.append(loss.item())

            scheduler.step()
            # print("learning rate:", optimizer.param_groups[0]["lr"])

            current_loss = np.mean(epoch_loss)
            current_val_accuracy, current_val_loss = self._validate(model)

            show_progress(self.epochs, epoch, current_loss,
                          current_val_accuracy, current_val_loss)

            loss_data.append(current_loss)
            validation_accuracy_data.append(current_val_accuracy)
            validation_loss_data.append(current_val_loss)

            if epoch % 5:
                torch.save(model.state_dict(), "models/model_1.pt")

        print("\n finished training")
Beispiel #16
0
def train(model, training_data, validation_data, optimizer, device, opt):
    ''' Start training '''

    log_train_file = None
    log_valid_file = None

    if opt.log:
        log_train_file = opt.log + '.train.log'
        log_valid_file = opt.log + '.valid.log'

        print('[Info] Training performance will be written to file: {} and {}'.format(
            log_train_file, log_valid_file))

        with open(log_train_file, 'w') as log_tf, open(log_valid_file, 'w') as log_vf:
            log_tf.write('epoch,loss,ppl,accuracy\n')
            log_vf.write('epoch,loss,ppl,accuracy\n')

    history = []
    valid_accus = []
    for e in range(opt.epoch):

        train_loss, train_accu = train_epoch(
            model, training_data, optimizer, device, smoothing=opt.label_smoothing)

        valid_loss, valid_accu = eval_epoch(model, validation_data, device)

        history.append([train_loss, valid_loss, valid_accu])
        valid_accus += [valid_accu]

        if valid_accu >= max(valid_accus):
            save_model(model, opt.result_dir)
            print('[Info] The checkpoint file has been updated.')

        if log_train_file and log_valid_file:
            with open(log_train_file, 'a') as log_tf, open(log_valid_file, 'a') as log_vf:
                log_tf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format(
                    epoch=e, loss=train_loss,
                    ppl=math.exp(min(train_loss, 100)), accu=100*train_accu))
                log_vf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format(
                    epoch=e, loss=valid_loss,
                    ppl=math.exp(min(valid_loss, 100)), accu=100*valid_accu))

        show_progress(e+1, opt.epoch, train_loss, valid_loss, valid_accu)

    save_history(history, opt.result_dir)
Beispiel #17
0
def load_phrases_from_file(name):
    _log('Trying to load data from file ' + name + '.txt')
    num_lines = sum(1 for line in open(name, 'r'))
    with open(name, 'r') as content_file:

        phrase_source_given_target = {}
        phrase_target_given_source = {}
        lex_weight_source_given_target = {}
        lex_weight_target_given_source = {}
        source_phrase_freqs = {}
        target_phrase_freqs = {}
        phrase_pair_freqs = {}

        point = num_lines / 100 if num_lines > 100 else 1

        for i, line in enumerate(content_file):
            words = line.strip().split('|||')
            f = words[0]
            e = words[1]

            first_values = words[2].split() #after first |||
            pfe = float(first_values[0])
            pef = float(first_values[1])
            lfe = float(first_values[2])
            lef = float(first_values[3])

            second_values = words[3].split() #after second |||
            freqf = int(second_values[0])
            freqe = int(second_values[1])
            freqfe = int(second_values[2])

            phrase_source_given_target[f, e] = pfe
            phrase_target_given_source[f, e] = pef
            lex_weight_source_given_target[f, e] = lfe
            lex_weight_target_given_source[f, e] = lef
            source_phrase_freqs[f, e] = freqf
            target_phrase_freqs[f, e] = freqe
            phrase_pair_freqs[f, e] = freqfe

            if i % point == 0:
                show_progress(i, num_lines, 40, 'LOADING PHRASES')

        return (phrase_source_given_target, phrase_target_given_source,
            lex_weight_source_given_target, lex_weight_target_given_source,
            source_phrase_freqs, target_phrase_freqs, phrase_pair_freqs)
Beispiel #18
0
    def train(self):
        train_start = time.time()
        for e in range(self.args.num_epochs):
            # Training
            for i, (images, t) in enumerate(self.tloader):
                images = images.numpy() / 255.0
                t = t.numpy()

                time_s = time.time()
                _, loss = self.sess.run([self.optimizer, self.loss],
                                        feed_dict={
                                            self.images: images,
                                            self.t: t
                                        })

                if i % 20 == 0:
                    batch_time = time.time() - time_s
                    kwargs = {'loss': loss, 'batch time': batch_time}
                    show_progress(e + 1, i + 1, self.num_batches, **kwargs)

            # Validation
            loss_vals = []
            for images_val, t_val in self.vloader:
                images_val = images_val.numpy() / 255.0
                t_val = t_val.numpy()

                images_t_syn, flow_val, loss_val \
                    = self.sess.run([self.images_t_syn, self.flow, self.loss],
                                    feed_dict = {self.images: images_val, self.t: t_val})
                loss_vals.append(loss_val)

            print(f'\r{e+1} epoch validation, loss: {np.mean(loss_vals)}'\
                  +f', elapsed time: {time.time()-train_start} sec.')

            # Visualize estimated results
            if self.args.visualize:
                if not os.path.exists('./figure'):
                    os.mkdir('./figure')
                vis_result(images_val[0], images_t_syn[0], flow_val[0],
                           f'./figure/result_{e+1}epoch.png')

            # Save trained parameters
            if not os.path.exists('./model'):
                os.mkdir('./model')
            self.saver.save(self.sess, f'./model/model_{e+1}.ckpt')
Beispiel #19
0
def preprocess(in_path, out_folder, keep_factors, filters, line_map_path = None):
    """Preprocess a parallel corpus"""
    in_folder, in_basename = os.path.split(in_path)
    assert os.path.isdir(in_folder), 'invalid in folder: %s' % in_folder
    assert in_basename.strip() != '', 'empty basename'
    assert os.path.isdir(out_folder), 'invalid out folder: %s' % out_folder
    sc_basename = in_basename + '.sc'
    doc_basename = in_basename + '.doc'
    sc_in_path = os.path.join(in_folder, sc_basename)
    doc_in_path = os.path.join(in_folder, doc_basename)
    assert os.path.isfile(sc_in_path), 'invalid file: %s' % sc_in_path
    assert os.path.isfile(doc_in_path), 'invalid file: %s' % doc_in_path
    sc_out_path = os.path.join(out_folder, sc_basename)
    doc_out_path = os.path.join(out_folder, doc_basename)

    num_lines = sum(1 for line in open(sc_in_path, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    if line_map_path != None:
        line_map_out = open(line_map_path, 'w')
    else:
        line_map_out = None
    with open(sc_in_path, 'r') as sc_in, open(doc_in_path, 'r') as doc_in, \
            open(sc_out_path, 'w') as sc_out, open(doc_out_path, 'w') as doc_out:
        for i, sc_line in enumerate(sc_in):
            if i % point == 0:
                utils.show_progress(i, num_lines, 40, 'PREPROCESSING')

            sc_words = sc_line.strip().split()
            docstring = doc_in.next().strip()

            sc_words = process_source_code(sc_words, keep_factors)
            docstring = process_docstring(docstring, filters)
            if docstring == '' or len(docstring.split()) > 100 or len(sc_words) > 100:
                continue

            sc_out.write('%s\n' % ' '.join(sc_words))
            doc_out.write('%s\n' % docstring)
            if line_map_out != None:
                line_map_out.write('%d\n' % i)
    if line_map_out != None:
        line_map_out.close()

    utils.show_progress(1, 1, 40, 'PREPROCESSING')
    sys.stdout.write('\n')
Beispiel #20
0
def extract_from_video(video_path,
                       output_dir,
                       start_time=5,
                       end_time=52,
                       frame_increment=1):
    start_frame = FPS * start_time
    end_frame = FPS * end_time
    frame = 0
    frames_processed = 0
    total_frames_to_process = int((end_frame - start_frame) / frame_increment)
    filename_char_length = len('%d.png' % total_frames_to_process)

    cap = cv2.VideoCapture(video_path)
    obj_extractor = masker.ObjectExtractor(extract_type='simple')

    ret = True
    while ret and (frame <= end_frame):
        ret, img = cap.read()

        if frame <= start_frame:
            # Learn background pixels using background subtraction
            obj_extractor.learnBackground(img)
        else:
            # Extract object and crop
            obj = obj_extractor.extractObject(img, thresh=100)
            obj_framed = masker.cropBox(obj)

            # Save image
            filename = os.path.join(
                output_dir,
                ('%d.png' % frames_processed).zfill(filename_char_length))
            cv2.imwrite(filename, obj_framed)

            # Print progress
            frames_processed += 1
            show_progress(frames_processed, total_frames_to_process)

        frame += frame_increment

    cap.release()
    cv2.destroyAllWindows()
    print('')
Beispiel #21
0
    def predict(self, N, intv, show_avg=True, show_pgr=True):
        """Get prediction metrics to evaluate how pruning influences the model performance
        :param N: number of inputs
        :param intv: display progression at given interval
        :param show_avg: display average metrics
        :param show_pgr: display step metrics"""
        avg_loss, avg_rec, avg_prec, avg_spec = 0., 0., 0., 0.
        avg_f1 = np.zeros((self.annos_.shape[-1], ))
        for i in range(N):
            feed_dict_tr = {
                self.x: np.expand_dims(self.imgs_[i], axis=0),
                self.y_true: np.expand_dims(self.annos_[i], axis=0),
                self.rate: 0.,
                self.is_training: False
            }

            loss_ = self.sess.run(self.cost_reg, feed_dict=feed_dict_tr)
            f1_ = self.sess.run(self.f1_vec, feed_dict=feed_dict_tr)
            rec_ = self.sess.run(self.recall, feed_dict=feed_dict_tr)
            prec_ = self.sess.run(self.precision, feed_dict=feed_dict_tr)
            spec_ = self.sess.run(self.specificity, feed_dict=feed_dict_tr)

            avg_loss += loss_ / N
            avg_f1 += f1_ / N
            avg_rec += rec_ / N
            avg_prec += prec_ / N
            avg_spec += spec_ / N

            if i % intv == 0:
                if show_pgr is True:
                    utils.show_progress('i ' + str(i), loss_,
                                        utils.array_to_text(f1_, 3), rec_,
                                        prec_, spec_, True)

        # convert f1 vector to text
        avg_f1_txt = utils.array_to_text(avg_f1, 3)

        if show_avg is True:
            utils.show_progress('Avg. results', avg_loss, avg_f1_txt, avg_rec,
                                avg_prec, avg_spec, True)

        return avg_loss, avg_f1
Beispiel #22
0
    def train(self, continue_: bool = False):
        model = Model().cuda()
        if continue_:
            model.load_state_dict(torch.load(self.model_path))

        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)

        train_loss, train_ssim, val_loss, val_ssim = [], [], [], []
        for epoch in range(1, (self.epochs + 1)):

            epoch_loss = []
            epoch_train_targets, epoch_train_predictions = [], []
            for images in tqdm(self.train_set, desc="epoch", ncols=150):
                optimizer.zero_grad()

                images = images.float().cuda()
                predictions = model.train()(images, train_=True)

                loss = criterion(predictions, images)
                loss.backward()
                optimizer.step()

                epoch_loss.append(loss.item())

                for i in range(predictions.size()[0]):
                    epoch_train_targets.append(
                        images[i].cpu().detach().numpy())                     \
                                        epoch_train_predictions.append(predictions[i].cpu().detach().numpy())

            current_val_loss, current_val_ssim = self._validate(
                model, self.validation_set)
            current_train_loss = np.mean(epoch_loss)
            current_train_ssim = calculate_ssim(epoch_train_targets,
                                                epoch_train_predictions)

            show_progress(self.epochs, epoch, current_train_loss,
                          current_train_ssim, current_val_loss,
                          current_val_ssim)

            torch.save(model.state_dict(), self.model_path)
Beispiel #23
0
def read_lexical_weights(path):
    """Read the lexical weights from an _all_info.txt file"""
    lex_weight_source_given_target = {}
    lex_weight_target_given_source = {}

    num_lines = sum(1 for line in open(path, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    with open(path, 'r') as in_file:
        for i, line in enumerate(in_file):
            if i % point == 0:
                show_progress(i, num_lines, 40, 'LOADING LEXICAL WEIGHTS')

            source, target, probs, _freqs = line.strip().split(' ||| ')
            _pfe, _pef, lfe, lef = [float(x) for x in probs.split()]
            lex_weight_source_given_target[(source, target)] = lfe
            lex_weight_target_given_source[(source, target)] = lef

    show_progress(1, 1, 40, 'LOADING LEXICAL WEIGHTS')
    sys.stdout.write('\n')

    return lex_weight_source_given_target, lex_weight_target_given_source
Beispiel #24
0
def read_language_model(file_name, max_phrase_length,
        label='LOADING LANGUAGEMODEL'):
    """Read the language model"""
    language_model = {}
    document = open(file_name, 'r')
    num_lines = sum(1 for line in open(file_name, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    for i, line in enumerate(document):
        if i % point == 0:
            show_progress(i, num_lines, 40, label)
        segments = line.strip().split(' ||| ')
        phrase = segments[0]
        if len(phrase.split()) > max_phrase_length:
            continue
        prob = float(segments[1])
        language_model[tuple(phrase.split())] = prob
    show_progress(1, 1, 40, label)
    sys.stdout.write('\n')

    document.close()
    return language_model
Beispiel #25
0
def trim_translation_model(full_translation_model, weights,
                           top_translations):
    """Use the full_translation_model to create a smaller translation_model
    according to the restrictions of the weights and top_translations."""
    translation_model = defaultdict(list)
    num_lines = len(full_translation_model)
    point = num_lines / 100 if num_lines > 100 else 1
    for i, (source, target_probs) in enumerate(full_translation_model.iteritems()):
        if i % point == 0:
            show_progress(i, num_lines, 40, 'TRIM FULLTRANSLATIONMODEL')
        measure_target_probs = []
        for target, probs in target_probs:
            measure = sum([prob * weights[i] for i, prob in \
                           enumerate(probs)])
            if len(measure_target_probs) < top_translations:
                heapq.heappush(measure_target_probs, (measure, target, probs))
            else:
                heapq.heappushpop(measure_target_probs, (measure, target, probs))
        translation_model[source] = [(target, probs) for (_measure, target, probs) in measure_target_probs]

    show_progress(1, 1, 40, 'TRIM FULLTRANSLATIONMODEL')
    sys.stdout.write('\n')
    return translation_model
Beispiel #26
0
def read_translation_freqs(file_name, num_lines=None):
    """Read the number of source translations"""
    translation_freqs = defaultdict(int)
    document = open(file_name, 'r')

    if num_lines == None:
        num_lines = sum(1 for line in open(file_name, 'r'))
    point = num_lines / 100 if num_lines > 100 else 1

    for i, line in enumerate(document):
        if i % point == 0:
            utils.show_progress(i, num_lines, 40, 'LOADING TRANSLATIONMODEL')

        segments = line.strip().split(' ||| ')
        source = segments[0]

        translation_freqs[source] += 1

    utils.show_progress(1, 1, 40, 'LOADING TRANSLATIONMODEL')
    sys.stdout.write('\n')
    document.close()

    return translation_freqs
Beispiel #27
0
def fill_ntuple():
    print('*** starting fill_ntuple() ')
    AtlasStyle.SetAtlasStyle()

    #    # get key list
    #    tfile = TFile(BasicConfig.workdir + 'systTree.root')
    #    key_list_all = [key.GetName() for key in gDirectory.GetListOfKeys()]
    #    regex = re.compile('PRW|JET|MET.*')
    #    key_list = [key for key in key_list_all if re.match(regex, key)]
    #    tfile.Close()

    # start making ttree
    #output_tfile = TFile('rhadron_v06-00-05.root', 'recreate')
    output_tfile = TFile(args.outputFile, 'recreate')

    # initialize TTree
    tree = TTree('rhadron', 'tree of rhadron properties for limit setting')
    # leaf variables
    from array import array
    mass_gluino = array('f', [0.])
    delta_mass = array('f', [0.])
    ctau = array('f', [0.])
    eff = array('f', [0.])
    eff_stat_error = array('f', [0.])
    eff_syst_error = array('f', [0.])
    eff_syst_error_ISR = array('f', [0.])
    eff_syst_error_PRW = array('f', [0.])
    eff_syst_error_JET = array('f', [0.])
    eff_syst_error_MET = array('f', [0.])
    # set branch
    tree.Branch("mGluino", mass_gluino, 'mGluino/F')
    tree.Branch("deltaM", delta_mass, 'deltaM/F')
    tree.Branch("ctau", ctau, 'ctau/F')
    tree.Branch("eff", eff, 'eff/F')
    tree.Branch("effRelStatErr", eff_stat_error, 'effRelStatErr/F')
    tree.Branch("effRelSystErr", eff_syst_error, 'effRelSystErr/F')
    tree.Branch("effRelSystErrISR", eff_syst_error_ISR, 'effRelSystErrISR/F')
    tree.Branch("effRelSystErrPRW", eff_syst_error_PRW, 'effRelSystErrPRW/F')
    tree.Branch("effRelSystErrJET", eff_syst_error_JET, 'effRelSystErrJET/F')
    tree.Branch("effRelSystErrMET", eff_syst_error_MET, 'effRelSystErrMET/F')

    #directory = '/afs/cern.ch/work/k/kmotohas/DisplacedVertex/DV_xAODAnalysis/submitDir_LSF/mc/hist_DVPlusMETSys/'
    #directory = BasicConfig.workdir + 'hist_DVPlusMETSys/'
    #directory = '/home/motohash/data/mc15_13TeV/DVPlusMETSys/v06-00-05/'

    #tfile = TFile(args.referenceFile)
    tfile = TFile(args.inputFile)
    key_list_all = [key.GetName() for key in gDirectory.GetListOfKeys()]
    print(len(key_list_all), key_list_all)
    regex = re.compile('Nominal|PRW|JET|MET.*')
    key_list = [key for key in key_list_all if re.match(regex, key)]
    print(len(key_list), key_list)
    tfile.Close()
    #c = 299792458.  # [m/s]
    #tchains = [[dsid, TChain('Nominal', str(dsid))] for dsid in range(402700, 402740)]
    #tchains = [[dsid, TChain('Nominal', str(dsid))] for dsid in mc.parameters.keys()]
    #tchains = [[dsid, [TChain(key, key+str(dsid)) for key in key_list]] for dsid in mc.parameters.keys()]
    dsids = [args.DSID]
    tchains = [[dsid, [TChain(key, key + str(dsid)) for key in key_list]]
               for dsid in dsids]

    cut_flow = [
        'Initial', 'Trigger', 'Filter', 'Cleaning', 'GRL', 'PV', 'NCB veto',
        'MET', 'DV Selection'
    ]
    #systematic_tables = TFile('systematic_summary_SimpleMETFilter.root', 'open')
    #table = TH1F()

    m_MET_min = 250.

    # loop over dsid
    try:
        for dsid, each_tchain in tchains:
            print('')
            print(dsid)
            #index = 0
            #for input in glob(directory + 'systTree_' + str(dsid) + '_*.root'):
            for tchain in each_tchain:
                #for input_file in glob(directory+'systTree_mc15_13TeV.' + str(dsid) + '*.root'):
                #    print(input_file)
                #    tchain.Add(input_file)
                tchain.Add(args.inputFile)

            mass_gluino[0] = mc.parameters[dsid]['g']
            delta_mass[0] = mass_gluino[0] - mc.parameters[dsid]['chi0']
            n_reweight_steps = 40
            xmin = 1.
            xmax = 10000.
            ratio = xmax / xmin
            bins = []
            for ii in range(n_reweight_steps):
                bins.append(
                    xmax *
                    10**(ii * TMath.Log10(xmax / xmin) / n_reweight_steps -
                         TMath.Log10(xmax / xmin)))
            #n_passed_w1 = [0. for _ in range(n_reweight_steps)]
            #n_passed = [0. for _ in range(n_reweight_steps)]
            from array import array
            limitsLifetime = array('d', bins)
            #
            tefficiency = [[
                TEfficiency('tefficiency_{0}_{1}_{2}'.format(key, step, dsid),
                            ';c#tau [mm]; Event-level efficiency',
                            len(limitsLifetime) - 1, limitsLifetime)
                for step in range(n_reweight_steps)
            ] for key in key_list]
            #h_syst_diff = [[TH1F('syst_diff_{0}_{1}_{2}'.format(key, step, dsid), ';;(N_{shifted} - N_{nominal}) / N_{nominal}', len(key_list)+1, 0, len(key_list)+1)
            #                for step in range(n_reweight_steps)] for key in key_list]
            h_syst_diff = [
                TH1F('syst_diff_{0}_{1}_{2}'.format(key, step, dsid),
                     ';;(N_{shifted} - N_{nominal}) / N_{nominal}',
                     len(key_list) + 1, 0,
                     len(key_list) + 1) for step in range(n_reweight_steps)
            ]

            for step in range(n_reweight_steps):
                for jj, key in enumerate(key_list):
                    h_syst_diff[step].GetXaxis().SetBinLabel(jj + 1, key)
                h_syst_diff[step].GetXaxis().SetBinLabel(
                    len(key_list) + 1, 'ISR_Py2MG_SF_removed')
            n_events_weighted = [[0. for _ in range(n_reweight_steps)]
                                 for key in key_list]
            n_events_weighted_noISR = [[0. for _ in range(n_reweight_steps)]
                                       for key in key_list]

            # loop over tchain of each systematic
            for ii, tchain in enumerate(each_tchain):
                entries = tchain.GetEntries()
                print('*** processed systs: {0} / {1}'.format(
                    ii, len(each_tchain)))
                #n_reweight_steps = 50
                #for step in range(n_reweight_steps):
                #    tefficiency.append(TEfficiency('tefficiency_'+str(step), ';c#tau [mm]; Event-level efficiency',
                #                                   len(limitsLifetime)-1, limitsLifetime))
                #    h_syst_diff.append(TH1F('syst_diff_'+str(step), ';;(N_{shifted} - N_{nominal}) / N_{nominal}', len(key_list)+1, 0, len(key_list)+1))
                for step in range(n_reweight_steps):
                    tefficiency[ii][step].SetUseWeightedEvents()
                    #for jj, key in enumerate(key_list):
                    #     h_syst_diff[ii][step].GetXaxis().SetBinLabel(jj+1, key)
                    #h_syst_diff[ii][step].GetXaxis().SetBinLabel(len(key_list)+1, 'ISR_Py2MG_SF_removed')
                #    h_syst_diff[step].SetMinimum(-0.3)
                #    h_syst_diff[step].SetMaximum(0.3)
                if entries == 0:
                    continue
                for entry in range(entries):
                    #if entry % 1000 == 0:
                    #    print('* processed events: {0} / {1}'.format(entry, entries))
                    utils.show_progress(entry, entries)
                    #if entry == 605:
                    #    break
                    # get the next tree in the chain and verify
                    ientry = tchain.LoadTree(entry)
                    if ientry < 0:
                        break
                    # copy next entry into memory and verify
                    nb = tchain.GetEntry(entry)
                    if nb <= 0:
                        continue
                    event_weight = tchain.McEventWeight * tchain.PileupWeight * tchain.ISRWeight
                    ctau_MC = TMath.C(
                    ) * mc.parameters[dsid]['t'] * 1e-9  # [nm]->[m]
                    for step in range(n_reweight_steps):
                        #print(tchain.GetListOfBranches())
                        pass_all = pass_event_cut(tchain, len(cut_flow) - 1)
                        if pass_all:
                            matched = False
                            for idv in range(len(tchain.DV_x)):
                                matched = matched or match(
                                    tchain, idv, cut=1.0)
                            #print('pass_all is ', pass_all, ', matched is ', matched)
                            pass_all = pass_all and matched
                        target_ctau = xmax * 10**(
                            step * TMath.Log10(xmax / xmin) / n_reweight_steps
                            - TMath.Log10(xmax / xmin)) * 1e-3  # [mm]->[m]
                        #print(target_ctau)
                        lifetime_weight = get_lifetime_weight(
                            tchain, target_ctau, ctau_MC)
                        n_events_weighted[ii][
                            step] += event_weight * lifetime_weight
                        n_events_weighted_noISR[ii][
                            step] += tchain.McEventWeight * tchain.PileupWeight * lifetime_weight
                        #print(event_weight)
                        #print(event_weight*lifetime_weight)
                        #print(pass_all)
                        tefficiency[ii][step].FillWeighted(
                            pass_all, event_weight * lifetime_weight,
                            target_ctau * 1e3)
                # end of loop over entries of each TChain
            # end loop over tchain of each systematic
            for step in range(n_reweight_steps):
                n_events_nominal = [0. for _ in range(n_reweight_steps)]
                for ii in range(len(each_tchain)):
                    # if Nominal TTree, set syst diff of ISR as well
                    if ii == 0:
                        n_events_nominal[step] = n_events_weighted[ii][step]
                        if n_events_nominal[step] < 1e-4:
                            #h_syst_diff[ii][step].SetBinContent(len(key_list)+1, 0)
                            h_syst_diff[step].SetBinContent(
                                len(key_list) + 1, 0)
                        else:
                            #h_syst_diff[ii][step].SetBinContent(len(key_list)+1,
                            h_syst_diff[step].SetBinContent(
                                len(key_list) + 1,
                                float((n_events_weighted_noISR[ii][step] -
                                       n_events_nominal[step]) /
                                      n_events_nominal[step]))
                            #float((n_events_weighted[ii][step]-n_events_nominal[step])/n_events_nominal[step]))
                    diff = n_events_weighted[ii][step] - n_events_nominal[step]
                    #print(n_events_nominal, n_events_weighted, diff)
                    if n_events_nominal[step] < 1e-4:
                        #h_syst_diff[ii][step].SetBinContent(ii+1, 0)
                        h_syst_diff[step].SetBinContent(ii + 1, 0)
                    else:
                        #h_syst_diff[ii][step].SetBinContent(ii+1, float(diff/n_events_nominal[step]))
                        h_syst_diff[step].SetBinContent(
                            ii + 1, float(diff / n_events_nominal[step]))
                    #systematic_tables.GetObject('systematic_table_'+str(dsid), table)
                    #syst_up, syst_down = root_sum_squares(table, 'x')
                #systs = root_sum_squares(h_syst_diff[ii][step], 'x')
                systs = root_sum_squares(h_syst_diff[step], 'x')
                #eff_syst_error[0] = max(syst_up, syst_down)  # TODO
                #eff_syst_error[0] = (syst_up**2 + syst_down**2)**0.5

                ####    ############################
                eff_syst_error[0] = (systs[0]**2 + systs[1]**2)**0.5
                eff_syst_error_ISR[0] = systs[2]
                eff_syst_error_PRW[0] = systs[3]
                eff_syst_error_JET[0] = systs[4]
                eff_syst_error_MET[0] = systs[5]
                if eff_syst_error[0] > 1:
                    print('eff_syst_error[0] = ' + str(eff_syst_error[0]))
                    #eff_syst_error[0] = 1.
                #for step in range(n_reweight_steps):
                #for ct in bins:
                #    print(len(bins), bins)
                #print(n_total_w1[step], n_total[step])
                #sf =  n_total_w1[step] / n_total[step]
                #n_passed[step] *= sf
                #n_total[step] *= sf
                #eff_no_weight, stat_error_no_weight = utils.division_error_propagation(n_passed_w1[step], n_total_w1[step])
                #ctau[0] = TMath.Power(300, step/float(n_reweight_steps-1)) * 1e-3  # [mm]->[m]
                ct = bins[step]
                #print(ct)
                ctau[0] = ct * 1e-3  # [mm]->[m]
                #print(ctau[0])
                bin_ctau = tefficiency[0][step].GetPassedHistogram().FindBin(
                    ct)
                print(tefficiency[0][step].GetPassedHistogram().GetBinContent(
                    bin_ctau))
                print(tefficiency[0][step].GetTotalHistogram().GetBinContent(
                    bin_ctau))
                #print(bin_ctau)
                #print('ct', ct, 'bin_ctau', bin_ctau)
                eff[0] = tefficiency[0][step].GetEfficiency(bin_ctau)
                print(eff[0])
                abs_stat_error = (
                    tefficiency[0][step].GetEfficiencyErrorLow(bin_ctau)**2 +
                    tefficiency[0][step].GetEfficiencyErrorUp(bin_ctau)**
                    2)**0.5
                #eff[0], abs_stat_error = utils.binomial_ratio_and_error(n_passed[step], n_total[step])
                #if eff[0] < 1e-4:
                if eff[0] == 0:
                    eff_stat_error[
                        0] = 1.  # avoid zero division error and divergence
                    continue  # not fill values in tree if efficiency is too small
                else:
                    eff_stat_error[0] = abs_stat_error / eff[0]
                #if eff_stat_error[0] > 1:
                #    print(n_passed[step], n_total[step], abs_stat_error, eff[0], eff_stat_error[0])
                #    eff_stat_error[0] = 1.
                tree.Fill()
            # end loop over n_reweight_steps
    except KeyboardInterrupt:
        pass
    output_tfile.Write()
    output_tfile.Close()
Beispiel #28
0
def create_cut_flow():
    AtlasStyle.SetAtlasStyle()

    #input_tfile = utils.open_tfile(BasicConfig.workdir + 'DVTree_NTuple_data15_13TeV.root')
    #tree = input_tfile.Get('DVTree_NTuple')
    input_tfile = utils.open_tfile(args.inputFile)
    tree = input_tfile.Get('Nominal')

    cut_flow = [
        'Initial', 'Trigger', 'Filter', 'Cleaning', 'GRL', 'PV', 'NCB veto',
        'MET', 'DV Selection'
    ]
    h_cut_flow = TH1F('cut_flow', ';;Number of Events', len(cut_flow), 0,
                      len(cut_flow))
    #h_cut_flow2 = TH1F('cut_flow2', ';;Number of Events', len(cut_flow), 0, len(cut_flow))
    for bin, cut in enumerate(cut_flow):
        h_cut_flow.GetXaxis().SetBinLabel(bin + 1, cut)
    #
    entries = tree.GetEntries()
    for entry in range(entries):
        #if entry % 10000 == 0:
        #    print('*** processed {0} out of {1}'.format(entry, entries))
        utils.show_progress(entry, entries)
        #if entry == 100000:
        #    break
        # get the next tree in the chain and verify
        ientry = tree.LoadTree(entry)
        if ientry < 0:
            break
        # copy next entry into memory and verify
        nb = tree.GetEntry(entry)
        if nb <= 0:
            continue
        event_weight = tree.McEventWeight * tree.PileupWeight * tree.ISRWeight
        for step, cut in enumerate(cut_flow):
            if step == 0:
                h_cut_flow.Fill(cut, event_weight)
                #h_cut_flow2.Fill(cut, event_weight)
            #elif step == 2:
            #    if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 2):
            #        h_cut_flow.Fill(cut, event_weight)
            #    if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 2):
            #        h_cut_flow2.Fill(cut, event_weight)
            #elif step == 6:
            #    if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 6):
            #        h_cut_flow.Fill(cut, event_weight)
            #    if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 6):
            #        h_cut_flow2.Fill(cut, event_weight)
            #elif step == 7:
            #    #have_signal_like_dv = False
            #    #for dv_index in range(len(tree.DV_passVtxCuts)):
            #    #    have_signal_like_dv = have_signal_like_dv or tree.DV_passVtxCuts[dv_index]
            #    #if pass_event_cut(tree, 7) and tree.MET > 220 and have_signal_like_dv:
            #    if tree.RandomRunNumber < 309311 and pass_event_cut(tree, 7):
            #        h_cut_flow.Fill(cut, event_weight)
            #    if tree.RandomRunNumber > 309311 and pass_event_cut(tree, 7):
            #        h_cut_flow2.Fill(cut, event_weight)
            elif pass_event_cut(tree, step):
                h_cut_flow.Fill(cut, event_weight)
                #h_cut_flow2.Fill(cut, event_weight)
    output = TFile('cut_flow.root', 'recreate')
    h_cut_flow.Write()
    output.Close()
Beispiel #29
0
def check_n_vertices_vs_met_threshold():
    AtlasStyle.SetAtlasStyle()

    #input_tfile = utils.open_tfile(BasicConfig.workdir + 'DVTree_NTuple_data15_13TeV.root')
    input_tfile = utils.open_tfile(args.inputFile)
    #tree = input_tfile.Get('DVTree_NTuple')
    tree = input_tfile.Get('Nominal')

    #bin_name = ['Base', 'Trigger', 'Filter', 'MET200', 'MET220', 'MET250']
    bin_name = ['Base', 'Trigger', 'Filter', 'MET250']
    h_nevents_cut = TH1F('nevents_cut', ';;Double Ratio', len(bin_name), 0,
                         len(bin_name))
    h_nevents_all = TH1F('nevents_all', ';;Double Ratio', len(bin_name), 0,
                         len(bin_name))
    h_ndvs_cut = {
        ntracks: TH1F('ndvs_cut_' + str(ntracks), ';;Double Ratio',
                      len(bin_name), 0, len(bin_name))
        for ntracks in range(2, 6)
    }
    h_ndvs_all = {
        ntracks: TH1F('ndvs_all_' + str(ntracks), ';;Double Ratio',
                      len(bin_name), 0, len(bin_name))
        for ntracks in range(2, 6)
    }
    for bin, name in enumerate(bin_name):
        h_nevents_cut.GetXaxis().SetBinLabel(bin + 1, name)
        h_nevents_all.GetXaxis().SetBinLabel(bin + 1, name)
        for ntracks in range(2, 6):
            h_ndvs_cut[ntracks].GetXaxis().SetBinLabel(bin + 1, name)
            h_ndvs_all[ntracks].GetXaxis().SetBinLabel(bin + 1, name)
    entries = tree.GetEntries()
    for entry in range(entries):
        utils.show_progress(entry, entries)
        #if entry == 1000000:
        #    break
        # get the next tree in the chain and verify
        ientry = tree.LoadTree(entry)
        if ientry < 0:
            break
        # copy next entry into memory and verify
        nb = tree.GetEntry(entry)
        if nb <= 0:
            continue
        if not utils.basic_event_selection(tree):
            continue
        # fill all
        for name in bin_name:
            h_nevents_all.Fill(name, 1.)
            for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
                if utils.basic_dv_selection(tree, dv_index):
                    if DV_nTracks < 6:
                        h_ndvs_all[DV_nTracks].Fill(name, 1.)
                    else:
                        h_ndvs_all[5].Fill(name, 1.)
        #
        h_nevents_cut.Fill('Base', 1.)
        for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
            if utils.basic_dv_selection(tree, dv_index):
                if DV_nTracks < 6:
                    h_ndvs_cut[DV_nTracks].Fill('Base', 1.)
                else:
                    h_ndvs_cut[5].Fill('Base', 1.)
        # Trigger
        if not tree.PassCut1:
            continue
        h_nevents_cut.Fill('Trigger', 1.)
        for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
            if utils.basic_dv_selection(tree, dv_index):
                if DV_nTracks < 6:
                    h_ndvs_cut[DV_nTracks].Fill('Trigger', 1.)
                else:
                    h_ndvs_cut[5].Fill('Trigger', 1.)
        # Filter
        if not tree.PassCut2:
            continue
        h_nevents_cut.Fill('Filter', 1.)
        for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
            if utils.basic_dv_selection(tree, dv_index):
                if DV_nTracks < 6:
                    h_ndvs_cut[DV_nTracks].Fill('Filter', 1.)
                else:
                    h_ndvs_cut[5].Fill('Filter', 1.)
        ##
        #if not tree.MET > 200:
        #    continue
        #h_nevents_cut.Fill('MET200', 1.)
        #for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
        #    if pass_base_dv_selection(tree, dv_index):
        #        if DV_nTracks < 6:
        #            h_ndvs_cut[DV_nTracks].Fill(name, 1.)
        #        else:
        #            h_ndvs_cut[5].Fill(name, 1.)
        ##
        #if not tree.MET > 220:
        #    continue
        #h_nevents_cut.Fill('MET220', 1.)
        #for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
        #    if pass_base_dv_selection(tree, dv_index):
        #        if DV_nTracks < 6:
        #            h_ndvs_cut[DV_nTracks].Fill(name, 1.)
        #        else:
        #            h_ndvs_cut[5].Fill(name, 1.)
        #
        if not tree.MET > 250:
            continue
        h_nevents_cut.Fill('MET250', 1.)
        for dv_index, DV_nTracks in enumerate(tree.DV_nTracks):
            if utils.basic_dv_selection(tree, dv_index):
                if DV_nTracks < 6:
                    h_ndvs_cut[DV_nTracks].Fill('MET250', 1.)
                else:
                    h_ndvs_cut[5].Fill('MET250', 1.)
    #
    output_tfile = TFile(args.outputFile, 'recreate')
    #
    #canvas = TCanvas('canvas', 'canvas', 1200, 800) #h_ndvs_all_clone = h_ndvs_all[2].Clone('unit')
    #h_ndvs_all_clone.Divide(h_ndvs_all[2])
    #h_ndvs_all_clone.SetMaximum(3)
    #h_ndvs_all_clone.SetMinimum(0)
    #h_ndvs_all_clone.Draw()
    #legend = TLegend(0.5, 0.6, 0.85, 0.85)
    h_nevents_cut.Write()
    h_nevents_all.Write()
    for DV_nTracks in range(2, 6):
        h_ndvs_cut[DV_nTracks].Write()
        h_ndvs_all[DV_nTracks].Write()
    #
    #    h_ndvs_cut[DV_nTracks].Sumw2()
    #    h_ndvs_cut[DV_nTracks].Divide(h_ndvs_all[DV_nTracks])
    #    h_ndvs_cut[DV_nTracks].Divide(h_nevents_cut)
    #    h_ndvs_cut[DV_nTracks].Multiply(h_nevents_all)
    #    utils.decorate_histogram(h_ndvs_cut[DV_nTracks], BasicConfig.colors[DV_nTracks])
    #    h_ndvs_cut[DV_nTracks].Draw('same,hist')
    #    legend.AddEntry(h_ndvs_cut[DV_nTracks],
    #                    '('+str(DV_nTracks)+'trk-DVs(cut)/2trk-DVs(all))/(Events(cut)/Events(all))', 'l')
    #utils.decorate_legend(legend)
    #legend.Draw()
    #utils.save_as(canvas, BasicConfig.plotdir + 'nVerts_met_dependency')
    #output = TFile('nVerts_met_dependency.root', 'recreate')
    #canvas.Write()
    output_tfile.Close()
Beispiel #30
0
    def show_progress(self, path):
        if not self.arguments.json_output:
            utils.show_progress(self.i)

        elif self.arguments.progress_output and self.i % utils.FRAMES_TO_INFORM == 0:
            utils.inform_json_progress(self.i, path)
Beispiel #31
0
def main():
    global irandom
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--inputFiles', type=str, help='comma separated input files')
    parser.add_argument('-o', '--outputFile', type=str, help='output file name')
    args = parser.parse_args()
    #input_files = args.inputFiles
    #print(args.inputFiles)
    input_files = args.inputFiles.split(',')
    print('*** input files: ')
    print(input_files)
    
    print('*** output file: ')
    print(args.outputFile)
    output_root = TFile(args.outputFile, 'recreate')
    
    book_histograms()

    chain = TChain('Nominal', 'Nominal Tree')
    for input_file in input_files:
        chain.Add(input_file)

    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_v3.root', 'open')
    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_NonVetoOnly_v3.root', 'open')
    # 3 GeV mass cut
    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-00.root', 'open')
    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_v06-00-00.root', 'open')
    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_v06-00-01.root', 'open')
    #f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-01.root', 'open')
    f = TFile('~/data/data16_13TeV/DVPlusMETSys/DVtrkTemplate_no2trk_massCut_v06-00-03.root', 'open')
    trkTemplate = f.Get('DVtrkTemplate')
    nTrkTemplates = trkTemplate.GetEntries()
    
    entries = chain.GetEntries()
    print('* Number of entries = {}'.format(entries))

    irandom = int(gRandom.Uniform()*nTrkTemplates)

    try:
        for entry in range(entries):
            #if not entry % 10000:
            #    print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1)))
            utils.show_progress(entry, entries)
                #irandom = int(gRandom.Uniform()*nTrkTemplates)
                #print(irandom)
            #if entry == 1000:
            #    break
            # get the next tree in the chain and verify
            ientry = chain.LoadTree(entry)
            if ientry < 0:
                break
            # copy next entry into memory and verify
            nb = chain.GetEntry(entry)
            if nb <= 0:
                continue
            if chain.EventNumber == 752668466:
                continue
            event_weight = chain.McEventWeight * chain.PileupWeight * chain.ISRWeight
            m_nEvents.Fill(0.5, event_weight)
            #if chain.MET > 200:
            #    continue
            m_nEvents_MET.Fill(0.5, event_weight)
            if utils.basic_event_selection(chain):
                m_posPV.SetXYZ(chain.PV_x, chain.PV_y, chain.PV_z)
                for idv in range(len(chain.DV_x)):
                    #if basic_dv_selection(chain, idv) and chain.DV_nTracks[idv] < 7 and chain.DV_Region[idv] >= 0:
                    if utils.basic_dv_selection(chain, idv) and chain.DV_Region[idv] >= 0:
                        dvInfo(chain, idv)
                        #print('orig'+str(m_tlvDV.M()))
                        m_DVPV.SetVect(m_posDV - m_posPV)
                        
                        dvBkgEst(trkTemplate, nTrkTemplates, chain.MET)
                        irandom += 1
    except KeyboardInterrupt:
        pass
        
    output_root.cd()
    m_nEvents.Write()
    m_nEvents_MET.Write()
    for itrk in range(2, 7):
        for region in range(12):
            m_BkgEst_data_iTrk_Region[itrk][region].Write()
            if itrk == 6:
                m_BkgEst_data_iTrk_Region[7][region].Write()
            m_BkgEst_data_loMET_iTrk_Region[itrk][region].Write()
            m_BkgEst_data_hiMET_iTrk_Region[itrk][region].Write()
            m_BkgEst_data_NoCross_iTrk_Region[itrk][region].Write()
            m_BkgEst_data_NoCross_maxAngle_iTrk_Region[itrk][region].Write()
            m_BkgEst_data_NoCross_maxDeltaR_iTrk_Region[itrk][region].Write()
            m_BkgEst_data_NoCross_maxDeltaEta_iTrk_Region[itrk][region].Write()
            #
            m_AvgAngleDVmass_iTrk_Region[itrk][region].Write() 
            m_maxAngleDVmass_iTrk_Region[itrk][region].Write() 
            m_dRDVmass_iTrk_Region[itrk][region].Write() 
            m_dEtaDVmass_iTrk_Region[itrk][region].Write() 
            if itrk == 2:
                continue
            m_BkgEst_Cross_iTrk_Region[itrk][region].Write()
            m_BkgEst_Cross_Angle_iTrk_Region[itrk][region].Write()
            m_BkgEst_Cross_DeltaR_iTrk_Region[itrk][region].Write()
            m_BkgEst_Cross_NoLargeAngle_iTrk_Region[itrk][region].Write()
            m_BkgEst_Cross_LargeAngle_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_NoLargeAngle_iTrk_Region[itrk][region].Write()
            #m_BkgEst_CrossDeltaPhi_LargeAngle_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDelta_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDelta_DeltaR_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDelta_Angle_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_loMET_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_hiMET_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_th08_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_th10_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_th15_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_pt20_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_pt15_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_pt10_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_pt5_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_dR10_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_dR15_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_dR20_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaR_dR25_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_DeltaEta_iTrk_Region[itrk][region].Write() 
            m_BkgEst_CrossDeltaPhi_Angle_iTrk_Region[itrk][region].Write() 
    #for ii,flavor in enumerate(flavors):
    for jj,prop in enumerate(props):
            for itrk in range(3, 7):
                for region in range(12):
                    #if jj == 0:
                    m_TrkProp_Pt_iTrk_Region[jj][itrk][region].Write()
                    m_TrkProp_Angle_iTrk_Region[jj][itrk][region].Write()
    #h_cut_flow_dv.Write()
    #h_DVmass_Ntrk.Write()
    #h_DVmass_Ntrk_MatVeto.Write()
    #h_DVmass_Ntrk_MatVeto_MET220.Write()
    #h_DVmass_Ntrk_MatVeto_MET250.Write()
    #for ntrk in range(2, 7):
    #    for reg in range(12):
    #        h_DVmass_Ntrk_Region[ntrk][reg].Write()
    output_root.Close()
Beispiel #32
0
def train():
    # load dataset
    # ==========================
    trainloader, testloader = load_CIFAR10()
    N = len(trainloader)
    print('# of trainset: ', N)

    device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu')

    cnn = CNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cnn.parameters())
    cnn.to(device)
    criterion.to(device)

    # train
    # ==========================
    loss_history = []
    acc_history = []
    time_history = []
    for epoch in range(opt.epochs):
        loss_cum = 0.0
        acc_cum = 0.0
        time_cum = 0.0
        for i, (imgs, labels) in enumerate(trainloader):
            start = time.time()
            imgs, labels = imgs.to(device), labels.to(device)
            cnn.zero_grad()
            outputs = cnn(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            time_cum += time.time() - start

            loss_cum += loss.item()
            acc = accuracy(outputs, labels)
            acc_cum += acc
            show_progress(epoch+1, i+1, N, loss.item(), acc)

        print('\t mean acc: %f' % (acc_cum/N))
        loss_history.append(loss_cum/N)
        acc_history.append(acc_cum/N)
        time_history.append(time_cum)

    # test accuracy
    cnn.eval()
    correct, total = 0, 0
    for imgs, labels in testloader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = cnn(imgs)
        _, pred = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()

    print('======================')
    print('epoch: %d  batch size: %d' % (opt.epochs, opt.batch_size))
    print('mean accuracy on %d test images: %f' % (total, correct/total))

    # save histories
    # with open('./loss_pytorch.csv', 'w') as f:
    #     f.write('pytorch')
    #     for l in loss_history:
    #         f.write(',' + str(l))
    #     f.write('\n')
    # print('saved loss history')
    # with open('./acc_pytorch.csv', 'w') as f:
    #     f.write('pytorch')
    #     for l in acc_history:
    #         f.write(',' + str(l))
    #     f.write('\n')
    # print('saved acc history')
    with open('./lap_record.csv', 'a') as f:
        f.write('pytorch')
        for t in time_history:
            f.write(',' + str(t))
        f.write('\n')
    # save models
    torch.save(cnn.state_dict(), 'model_torch.pth')
Beispiel #33
0
    test_labs = cls2onehot(test_labs, depth=10)
    train_imgs = train_imgs
    test_imgs = test_imgs
    # Setting models
    models = Models(n_classes=10, img_shape=(32, 32, 3))
    # Get batch xs , ys

    # Augmenatation
    # batch_xs = aug_lv3(batch_xs)
    # batch_xs = batch_xs / 255.
    # plot_images(batch_xs , batch_ys)

    # Training
    eval = Eval()
    for step in range(cfg.max_iter):
        show_progress(step, cfg.max_iter)
        batch_xs, batch_ys = next_batch(train_imgs, train_labs, 60)

        train_cost = models.training(batch_xs, batch_ys, cfg.lr)
        if step % cfg.ckpt == 0:
            print 'Validation ... '

            pred, pred_cls, eval_cost, accuracy = models.eval(
                test_imgs, test_labs)
            #pred_op, pred_cls, eval_cost, accuracy = models.eval(dp.val_imgs , dp.val_labs,)
            #acc = eval.get_acc(sess_op=models.sess, preds_op=models.pred[:,0], batch_size=60, x_op=models.x_,
            #                   phase_train=models.phase_train)
            print accuracy
            models.save_models('models/{}.ckpt'.format(step))
            print 'train cost : {}'.format(train_cost)
            print 'test cost : {}'.format(eval_cost)
Beispiel #34
0
def lexical_weights(phrase_to_internals,
                    lex_source_given_target,
                    lex_target_given_source,
                    target_lex_freqs):
    """
    p_w(f|e) = max_{a} Prod_{i=1}^n  1 / (j | (i, j) from a) sum w(f_i|e_j)
    """
    source_given_target = {}
    target_given_source = {}

    def weight_l1_given_l2(l1_phrase, l2_phrase, alignment, l1_given_l2,
                           reverse, logprob=True):
        """calculate lexical weight for source|target or target|source"""
        weight = 0
        alignment = [(a, b) for (b, a) in alignment] if reverse else alignment

        _sum = sum_logs if logprob else sum

        for i, l1_word in enumerate(l1_phrase):
            # Determine all words in target phrase aligned to i
            aligned_to_i = [b for a, b in alignment if a == i]
            if not aligned_to_i:                 # Handle non-aligned words
                pair = ('NULL', l1_word) if reverse else (l1_word, 'NULL')
                p_l1_given_l2 = l1_given_l2[pair]
            elif len(aligned_to_i) == 1:      # Case added for speed
                l2_word = l2_phrase[aligned_to_i[0]]
                pair = (l2_word, l1_word) if reverse else (l1_word, l2_word)
                p_l1_given_l2 = l1_given_l2[pair]
            else:
                # ONELINERS ON MULTIPLE LINES FTW
                list_of_probs = []
                for j in aligned_to_i:
                    pair = (l2_phrase[j], l1_word) if reverse else (l1_word, l2_phrase[j])
                    list_of_probs.append(l1_given_l2[pair])
                p_l1_given_l2 = _sum(list_of_probs)

                if logprob:
                    p_l1_given_l2 += math.log(1.0 / len(aligned_to_i))
                else:
                    p_l1_given_l2 /= len(aligned_to_i)

                if p_l1_given_l2 > 0 and logprob or p_l1_given_l2 > 1 and not logprob:
                    print p_l1_given_l2, l1_word, [l2_phrase[j] for j in aligned_to_i]
                    raise

                #p_l1_given_l2 = \
                #    _sum([l1_given_l2[((l2_phrase[j], l1_word) if reverse \
                #                       else (l1_word, l2_phrase[j]))]
                #          for j in aligned_to_i]) + \
                #     (1 / math.log(len(aligned_to_i)))

            # Weight is the product of prob for each word
            if logprob:
                weight += p_l1_given_l2
            else:
                weight *= p_l1_given_l2

        if weight > 1:
            print weight
            print l1_phrase, l2_phrase,  alignment, reverse
            raise
        return weight

    num_phrases = len(phrase_to_internals)
    point = num_phrases / 100 if num_phrases > 100 else 1

    for i, (phrase_pair, possible_internals) in enumerate(
            phrase_to_internals.iteritems()):
        if i % point == 0:
            show_progress(i, num_phrases, 40, 'LEXICAL WEIGHTS')

        weight_source_given_target = float('-inf')
        weight_target_given_source = float('-inf')
        source_phrase = phrase_pair[0].split()
        target_phrase = phrase_pair[1].split()
        for internal in possible_internals:
            # Calc weight for the current alignment
            temp_weight_source_given_target = \
                weight_l1_given_l2(source_phrase, target_phrase, internal,
                                   lex_source_given_target, reverse=False)
            # Reverse alignment for target_given_source
            temp_weight_target_given_source = \
                weight_l1_given_l2(target_phrase, source_phrase, internal,
                                   lex_target_given_source, reverse=True)
            if temp_weight_source_given_target > weight_source_given_target:
                weight_source_given_target = temp_weight_source_given_target
            if temp_weight_target_given_source > weight_target_given_source:
                weight_target_given_source = temp_weight_target_given_source

        source_given_target[phrase_pair] = weight_source_given_target
        target_given_source[phrase_pair] = weight_target_given_source

    show_progress(num_phrases, num_phrases, 40, 'LEXICAL WEIGHTS')
    sys.stdout.write('\n')

    return source_given_target, target_given_source
Beispiel #35
0
restore_model = './models/vgg_11/18/model-564'
tester = Tester.Tester(None)
tester._reconstruct_model(restore_model)

tester.n_classes = 2
"""
best           second 
# [1,33](73)   [1,32](72)    1.png
# [13,43](875) [13,42](874) 3.png
# [3,20](146)  [3,21](147)   2.png

"""
imgs_list = []

for p in range(1, 13):
    utils.show_progress(p, 12)
    for i in range(7):
        try:
            test_imgs = np.load(
                '../Find_Wally/wally_raspCam_np/second/{}_{}.npy'.format(p, i))
            test_imgs = aug.apply_clahe(test_imgs)
            test_imgs = random_rotate_90_180_270(test_imgs, 3)

            #test_imgs = np.load('../Find_Wally/wally_raspCam/wally_1_1.npy')
            test_labs = [0] * len(test_imgs)
            test_labs = cls2onehot(test_labs, 2)

            test_imgs = test_imgs / 255.
            tester.validate(test_imgs, test_labs, 60, 0, False)
            indices = np.where([np.asarray(tester.pred_all)[:, 0] > 0.8])[1]
            print indices
Beispiel #36
0
def extract_phrase_pair_freqs(alignments_file, source_file,
                              target_file, max_length,
                              max_lines):
    """Extract and count the frequency of all phrase pairs given an
    alignment between sentences.

    Keyword arguments:
    alignments_file -- file that contains the alignments
    source_file -- file containing sentences from language 1
    target_file -- file containing sentences from language 2
    max_length -- maximum length of phrase pairs
    max_lines -- maximum number of lines to use for phrase pair extraction

    Returns counts of phrase-pairs, counts of phrases in source
            and counts of phrases in target
    ((phrase_pair_freqs, source_phrase_freqs, target_phrase_freqs),
            (lex_pair_freqs, source_lex_freqs, target_lex_freqs),
            phrase_to_internals)
    Returns (3-tuple):
        phrase pair frequencies (3-tuple):
            pair frequencies, source frequencies, target frequencies
        lexical (word pair) frequencies (3-tuple):
            pair frequencies, source frequencies, target frequencies
        internal alignments for phrase pairs
    """



    # phrase frequencies
    phrase_pair_freqs = defaultdict(int)
    source_phrase_freqs = defaultdict(int)
    target_phrase_freqs = defaultdict(int)

    # lexical frequencies
    lex_pair_freqs = defaultdict(int)
    source_lex_freqs = defaultdict(int)
    target_lex_freqs = defaultdict(int)

    # map phrase pair to possible internal word alignments
    phrase_to_internals = defaultdict(set)

    # open files
    num_lines = sum(1 for line in open(alignments_file))
    if max_lines == float('inf'):
        max_lines = num_lines
    else:
        max_lines = int(max_lines)

    alignments = open(alignments_file, 'r')
    source = open(source_file, 'r')
    target = open(target_file, 'r')

    point = max_lines / 100 if max_lines > 100 else 1
    for i, str_align in enumerate(alignments):
        if i % point == 0:
            show_progress(i, max_lines, 40, 'PHRASE EXTRACTION')
        if i == max_lines:
            break

        # read files
        source_words = source.next().strip().split()
        target_words = target.next().strip().split()
        source_length = len(source_words)
        target_length = len(target_words)

        align = str_to_alignments(str_align)
        # word pair frequencies
        for source_index, target_index in align:
            word_pair = (source_words[source_index], target_words[target_index])
            lex_pair_freqs[word_pair] += 1
            source_lex_freqs[word_pair[0]] += 1
            target_lex_freqs[word_pair[1]] += 1


        phrase_to_internal = extract_alignments(set(align), source_length,
                                                target_length, max_length)

        for phrase_pair, internal_alignment in extract_phrase_pairs_gen(
                                                    phrase_to_internal,
                                                    source_words,
                                                    target_words):
            # phrase pair frequencies
            phrase_pair_freqs[phrase_pair] += 1
            source_phrase_freqs[phrase_pair[0]] += 1
            target_phrase_freqs[phrase_pair[1]] += 1

            # phrase pair to possible internal word alignments
            phrase_to_internals[phrase_pair].add(frozenset(internal_alignment))

        unaligned, unaligned2 = unaligned_words(align, source_length, target_length)
        unaligned.extend(unaligned2)
        for phrase_pair in unaligned_phrase_pairs_gen(unaligned, source_words,
                                                      target_words):
            lex_pair_freqs[phrase_pair] += 1
            source_lex_freqs[phrase_pair[0]] += 1
            target_lex_freqs[phrase_pair[1]] += 1

    show_progress(max_lines, max_lines, 40, 'PHRASE EXTRACTION')
    sys.stdout.write('\n')

    alignments.close()
    source.close()
    target.close()

    return ((phrase_pair_freqs, source_phrase_freqs, target_phrase_freqs),
            (lex_pair_freqs, source_lex_freqs, target_lex_freqs),
            phrase_to_internals)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        '--inputFiles',
                        type=str,
                        help='comma separated input files')
    parser.add_argument('-o',
                        '--outputFile',
                        type=str,
                        help='output file name')
    args = parser.parse_args()
    #
    print "Writing a tree"
    #
    #f = TFile("DVtrkTemplate_tree.root", "recreate")
    f = TFile(args.outputFile, "recreate")
    t = TTree("DVtrkTemplate", "track template for DVMultiTrkBkg")
    #
    # create 1 dimensional float arrays (python's float datatype corresponds to c++ doubles)
    # as fill variables
    pt = array.array('f', [0.])
    eta = array.array('f', [0.])
    phi = array.array('f', [0.])
    d_eta = array.array('f', [0.])
    d_phi = array.array('f', [0.])
    dv_r = array.array('f', [0.])
    dv_z = array.array('f', [0.])
    dv_phi = array.array('f', [0.])
    dv_eta = array.array('f', [0.])
    dv_nTracks = array.array('i', [0])
    dv_m = array.array('f', [0.])
    region = array.array('i', [0])
    met = array.array('f', [0.])
    #
    # create the branches and assign the fill-variables to them
    t.Branch('pt', pt, 'pt/F')
    t.Branch('eta', eta, 'eta/F')
    t.Branch('phi', phi, 'phi/F')
    t.Branch('d_eta', d_eta, 'd_eta/F')
    t.Branch('d_phi', d_phi, 'd_phi/F')
    t.Branch('dv_r', dv_r, 'dv_r/F')
    t.Branch('dv_z', dv_z, 'dv_z/F')
    t.Branch('dv_phi', dv_phi, 'dv_phi/F')
    t.Branch('dv_eta', dv_eta, 'dv_eta/F')
    t.Branch('dv_nTracks', dv_nTracks, 'dv_nTracks/I')
    t.Branch('dv_m', dv_m, 'dv_m/F')
    t.Branch('region', region, 'region/I')
    t.Branch('met', met, 'met/F')
    #
    chain = TChain('Nominal', 'Nominal Tree')
    #for input_file in input_files:
    chain.Add(args.inputFiles)
    # create some random numbers, fill them into the fill varibles and call Fill()
    entries = chain.GetEntries()
    #print('* Number of entries = {}'.format(entries))
    try:
        for entry in range(entries):
            #if not entry % 100000:
            #    print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1)))
            utils.show_progress(entry, entries)
            #if entry == 100000:
            #    break
            # get the next tree in the chain and verify
            ientry = chain.LoadTree(entry)
            if ientry < 0:
                break
            # copy next entry into memory and verify
            nb = chain.GetEntry(entry)
            if nb <= 0:
                continue
            if chain.EventNumber == 752668466:
                continue
            if not utils.basic_event_selection(chain):
                continue
            pos_PV = TVector3(chain.PV_x, chain.PV_y, chain.PV_z)
            for idv in range(len(chain.DV_x)):
                if not utils.basic_dv_selection(chain, idv):
                    continue
                region[0] = chain.DV_Region[idv]
                pos_DV = TVector3(chain.DV_x[idv], chain.DV_y[idv],
                                  chain.DV_z[idv])
                dv_m[0] = chain.DV_m[idv]
                dv_r[0] = pos_DV.Perp()
                dv_z[0] = pos_DV.Z()
                dv_phi[0] = pos_DV.Phi()
                dv_eta[0] = pos_DV.Eta()
                #if chain.DV_nTracks[idv] < 3 or (chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]):
                #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 3:
                if chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 2:
                    #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0:
                    #if chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]:
                    continue
                dv_nTracks[0] = chain.DV_nTracks[idv]
                tlv_DVPV = TLorentzVector()
                tlv_DVPV.SetVect(pos_DV - pos_PV)
                for itrk in range(chain.DV_nTracks[idv]):
                    #tlv = TLorentzVector()
                    pt[0] = chain.DV_track_pt_wrtSV[idv][itrk]
                    eta[0] = chain.DV_track_eta_wrtSV[idv][itrk]
                    phi[0] = chain.DV_track_phi_wrtSV[idv][itrk]
                    #tlv.SetPtEtaPhiM(pt[0], eta[0], phi[0], 139.57/1e3)
                    d_eta[0] = eta[0] - tlv_DVPV.Eta()
                    d_phi[0] = phi[0] - tlv_DVPV.Phi()
                    t.Fill()
    except KeyboardInterrupt:
        pass

    # write the tree into the output file and close the file
    f.Write()
    f.Close()
Beispiel #38
0
"""
import glob , os
from image_processing import ImageProcessing
import numpy as np
from PIL import Image
import utils

root_root_dir = '/mnt/Find_Wally/wally_dataset'
second_dir=os.path.join( root_root_dir ,'second_dataset')
thrid_dir = os.path.join(root_root_dir , 'third_dataset')
root_save_dir = 'wally_raspCam_np'

img_prc = ImageProcessing()
sec_paths = glob.glob(os.path.join(second_dir , '*.jpg'))
trd_paths = glob.glob(os.path.join(thrid_dir , '*.jpg'))

assert len(sec_paths) != 0 and len(trd_paths) != 0

tmp_dict = {'second' : sec_paths , 'thrid' : trd_paths}
for key in tmp_dict:
    paths = tmp_dict[key]
    save_dir = os.path.join(root_save_dir, key)
    utils.makedir(save_dir)
    for ind,path in enumerate(paths) :
        utils.show_progress(ind , len(paths))
        name = utils.get_name(path)
        img = np.asarray(Image.open(path).convert('RGB'))
        # Cropping
        imgs , coords = img_prc.stride_cropping(img , 200 , 200 , 400 ,400)
        save_path = os.path.join(save_dir,name.replace('jpg', 'npy'))
        np.save(save_path , imgs )
Beispiel #39
0
    def train(self, num_epochs, batch_size, gpu_id):

        if gpu_id is not None:
            self.net.to_gpu(gpu_id)
            self.x_test = to_gpu(self.x_test, gpu_id)
            self.y_test = to_gpu(self.y_test, gpu_id)

        num_batches = int(len(self.x_train) / batch_size)
        print('epochs : {}, number of batches : {}' \
              .format(num_epochs, num_batches))

        lap_times = []
        for e in range(num_epochs):
            permute_idx = np.random.permutation(np.arange(50000))
            lap_time = []
            for b in range(num_batches):

                x_batch = self.x_train[permute_idx[b * batch_size:(b + 1) *
                                                   batch_size]]
                y_batch = self.y_train[permute_idx[b * batch_size:(b + 1) *
                                                   batch_size]]

                s_time = time.time()
                if gpu_id is not None:
                    x_batch = to_gpu(x_batch, gpu_id)
                    y_batch = to_gpu(y_batch, gpu_id)
                logits = self.net(x_batch)
                loss = F.softmax_cross_entropy(logits, y_batch)
                self.net.cleargrads()
                loss.backward()
                self.opt.update()
                e_time = time.time()
                lap_time.append(e_time - s_time)

                if b % 10 == 0:
                    loss = to_cpu(loss.data)
                    acc = F.accuracy(logits, y_batch)
                    acc = to_cpu(acc.data)
                    show_progress(e + 1, b + 1, batch_size, loss, acc)

            lap_times.append(np.sum(lap_time))

            # validation
            accs_val = []
            for b in range(int(len(self.x_test) / batch_size)):
                x_val = self.x_test[b * batch_size:(b + 1) * batch_size]
                y_val = self.y_test[b * batch_size:(b + 1) * batch_size]
                preds_val = self.net(x_val)
                acc_val = F.accuracy(preds_val, y_val)
                accs_val.append(to_cpu(acc_val.data))
            print('\n{} epoch validation accuracy {}'.format(
                e + 1, np.mean(accs_val)))

            # save trained model
            serializers.save_npz('./model_chainer/chainer{}.model'.format(e),
                                 self.net)

        with open('./lap_record.csv', 'a') as f:
            f.write('chainer')
            for lap in lap_times:
                f.write(',' + str(lap))
            f.write('\n')
Beispiel #40
0
    #n_passed = [0. for _ in range(n_reweight_steps)]
    from array import array
    limitsLifetime = array('d', ibins)
    #n_passed = TH1F('n_passed', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime)
    #n_total_w1 = [0. for _ in range(n_reweight_steps)]
    #n_total = [0. for _ in range(n_reweight_steps)]
    #n_total = TH1F('n_total', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime)
    tefficiency = TEfficiency('tefficiency', ';c#tau [mm]; Event-level efficiency', len(limitsLifetime)-1, limitsLifetime)

    entries = chain.GetEntries()
    print('* Number of entries = {}'.format(entries))
    try:
        for entry in range(entries):
            #if not entry % 100000:
            #    print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1)))
            utils.show_progress(entry, entries)
            #if entry == 100000:
            #    break
            # get the next tree in the chain and verify
            ientry = chain.LoadTree(entry)
            if ientry < 0:
                break
            # copy next entry into memory and verify
            nb = chain.GetEntry(entry)
            if nb <= 0:
                continue
            if chain.EventNumber == 752668466:
                continue
            event_weight = chain.McEventWeight * chain.PileupWeight * chain.ISRWeight
            h_mu.Fill(chain.CorrectedMu, event_weight)
            h_mu_pileupWeight.Fill(chain.CorrectedMu, chain.PileupWeight)
Beispiel #41
0
            # check summary shape , and value
            val_acc, val_loss, pred = sess.run([accuracy, cost, pred_op],
                                               feed_dict=test_feedDict)
            val_acc_mean.append(val_acc)
            val_loss_mean.append(val_loss)
            pred_all.append(pred)
        val_acc_mean = np.mean(np.asarray(val_acc_mean))
        val_loss_mean = np.mean(np.asarray(val_loss_mean))
        summary = tf.Summary(value=[
            tf.Summary.Value(tag='Test batch_size 1 loss',
                             simple_value=float(val_loss_mean)),
            tf.Summary.Value(tag='Test batch_size 1  acc',
                             simple_value=float(val_acc_mean)),
            tf.Summary.Value(tag='Train batch_size 1  loss',
                             simple_value=float(train_loss)),
            tf.Summary.Value(tag='Train batch_size 1  acc',
                             simple_value=float(train_acc))
        ])
        writer.add_summary(summary, step)
        print 'Validation Batch Size : 1 Val accuracy : {} loss : {} '.format(
            val_acc_mean, val_loss_mean)

    utils.show_progress(step, max_iter)
    batch_xs, batch_ys = data.next_batch(train_imgs, train_labs, batch_size)
    train_acc, train_loss, _ = sess.run([accuracy, cost, train_op],
                                        feed_dict={
                                            x_: batch_xs,
                                            y_: batch_ys,
                                            phase_train: True
                                        })
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--inputFiles', type=str, help='comma separated input files')
    parser.add_argument('-o', '--outputFile', type=str, help='output file name')
    args = parser.parse_args()
    #
    print "Writing a tree"
    #
    #f = TFile("DVtrkTemplate_tree.root", "recreate")
    f = TFile(args.outputFile, "recreate")
    t = TTree("DVtrkTemplate", "track template for DVMultiTrkBkg")
    #
    # create 1 dimensional float arrays (python's float datatype corresponds to c++ doubles)
    # as fill variables
    pt = array.array('f', [0.])
    eta = array.array('f', [0.])
    phi = array.array('f', [0.])
    d_eta = array.array('f', [0.])
    d_phi = array.array('f', [0.])
    dv_r = array.array('f', [0.])
    dv_z = array.array('f', [0.])
    dv_phi = array.array('f', [0.])
    dv_eta = array.array('f', [0.])
    dv_nTracks = array.array('i', [0])
    dv_m = array.array('f', [0.])
    region = array.array('i', [0])
    met = array.array('f', [0.])
    #
    # create the branches and assign the fill-variables to them
    t.Branch('pt', pt, 'pt/F')
    t.Branch('eta', eta, 'eta/F')
    t.Branch('phi', phi, 'phi/F')
    t.Branch('d_eta', d_eta, 'd_eta/F')
    t.Branch('d_phi', d_phi, 'd_phi/F')
    t.Branch('dv_r', dv_r, 'dv_r/F')
    t.Branch('dv_z', dv_z, 'dv_z/F')
    t.Branch('dv_phi', dv_phi, 'dv_phi/F')
    t.Branch('dv_eta', dv_eta, 'dv_eta/F')
    t.Branch('dv_nTracks', dv_nTracks, 'dv_nTracks/I')
    t.Branch('dv_m', dv_m, 'dv_m/F')
    t.Branch('region', region, 'region/I')
    t.Branch('met', met, 'met/F')
    #
    chain = TChain('Nominal', 'Nominal Tree')
    #for input_file in input_files:
    chain.Add(args.inputFiles)
    # create some random numbers, fill them into the fill varibles and call Fill()
    entries = chain.GetEntries()
    #print('* Number of entries = {}'.format(entries))
    try:
        for entry in range(entries):
            #if not entry % 100000:
            #    print('*** processed {0} out of {1} ({2}%)'.format(entry, entries, round(float(entry)/entries*100., 1)))
            utils.show_progress(entry, entries)
            #if entry == 100000:
            #    break
            # get the next tree in the chain and verify
            ientry = chain.LoadTree(entry)
            if ientry < 0:
                break
            # copy next entry into memory and verify
            nb = chain.GetEntry(entry)
            if nb <= 0:
                continue
            if chain.EventNumber == 752668466:
                continue
            if not utils.basic_event_selection(chain):
                continue
            pos_PV = TVector3(chain.PV_x, chain.PV_y, chain.PV_z)
            for idv in range(len(chain.DV_x)):
                if not utils.basic_dv_selection(chain, idv):
                    continue
                region[0] = chain.DV_Region[idv]
                pos_DV = TVector3(chain.DV_x[idv], chain.DV_y[idv], chain.DV_z[idv])
                dv_m[0] = chain.DV_m[idv]
                dv_r[0] = pos_DV.Perp()
                dv_z[0] = pos_DV.Z()
                dv_phi[0] = pos_DV.Phi()
                dv_eta[0] = pos_DV.Eta()
                #if chain.DV_nTracks[idv] < 3 or (chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]):
                #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 3:
                if chain.DV_Region[idv] < 0 or chain.DV_m[idv] < 2:
                #if chain.DV_nTracks[idv] < 3 or chain.DV_Region[idv] < 0:
                #if chain.DV_Region[idv] in [-1, 1, 3, 5, 7, 9]:
                    continue
                dv_nTracks[0] = chain.DV_nTracks[idv]
                tlv_DVPV = TLorentzVector()
                tlv_DVPV.SetVect(pos_DV - pos_PV)
                for itrk in range(chain.DV_nTracks[idv]):
                    #tlv = TLorentzVector()
                    pt[0] = chain.DV_track_pt_wrtSV[idv][itrk]
                    eta[0] = chain.DV_track_eta_wrtSV[idv][itrk]
                    phi[0] = chain.DV_track_phi_wrtSV[idv][itrk]
                    #tlv.SetPtEtaPhiM(pt[0], eta[0], phi[0], 139.57/1e3)
                    d_eta[0] = eta[0] - tlv_DVPV.Eta()
                    d_phi[0] = phi[0] - tlv_DVPV.Phi()
                    t.Fill()
    except KeyboardInterrupt:
        pass
    
    # write the tree into the output file and close the file
    f.Write()
    f.Close()