コード例 #1
0
def run_system_cmd(cmd, desc):
    if subprocess.call(cmd, shell=True) != 0:
        subprocess.call(config.COMMAND_START, shell=True)
        msg = f'An error occurred while {desc}'
        send_mail(msg, 'error')
        write_log(msg)
        raise SystemExit(1)
コード例 #2
0
def send_backup(backup_path, backup_name):
    server_confidential_data = f'-p {config.BACKUP_SERVER_PORT} {config.BACKUP_SERVER_USER}@{config.BACKUP_SERVER_HOST}'
    server_path = config.BACKUP_MOUNT_SERVER_PATH
    mount_path = config.BACKUP_MOUNT_CLIENT_PATH
    mount_cmd = f'sshfs {server_confidential_data}:/{server_path} {mount_path}'
    copy_cmd = f'cp {backup_path} {mount_path}/{backup_name}'

    original_md5 = utils.get_md5_from_file(backup_path)
    run_system_cmd(f'su - {config.CLIENT_USER} -c "{mount_cmd}"',
                   'mounting the SSHFS')
    run_system_cmd(f'su - {config.CLIENT_USER} -c "{copy_cmd}"',
                   'coping the backup to the server')
    returned_md5 = utils.get_md5_from_file(f'{mount_path}/{backup_name}')
    run_system_cmd(f'umount {mount_path}', 'unmounting the SSHFS')

    if original_md5 == returned_md5:
        if config.SEND_FINAL_EMAIL:
            send_mail(config.MAIL_BODY, 'success')
    else:
        error_text = 'The checksums of the backups do not match'
        utils.write_log(error_text)
        send_mail(error_text, 'error')
コード例 #3
0
ファイル: model.py プロジェクト: liuguoyou/PixelPick
    def __call__(self):
        # fully-supervised model
        if self.n_pixels_by_us == 0:
            dir_checkpoints = f"{self.dir_checkpoints}/fully_sup"
            os.makedirs(f"{dir_checkpoints}", exist_ok=True)

            self.log_train, self.log_val = f"{dir_checkpoints}/log_train.txt", f"{dir_checkpoints}/log_val.txt"
            write_log(f"{self.log_train}",
                      header=["epoch", "mIoU", "pixel_acc", "loss"])
            write_log(f"{self.log_val}", header=["epoch", "mIoU", "pixel_acc"])

            self._train()

        # active learning model
        else:
            n_stages = self.max_budget // self.n_pixels_by_us
            n_stages += 1 if self.init_n_pixels > 0 else 0
            print("n_stages:", n_stages)
            for nth_query in range(n_stages):
                dir_checkpoints = f"{self.dir_checkpoints}/{nth_query}_query"
                os.makedirs(f"{dir_checkpoints}", exist_ok=True)

                self.log_train, self.log_val = f"{dir_checkpoints}/log_train.txt", f"{dir_checkpoints}/log_val.txt"
                write_log(f"{self.log_train}",
                          header=["epoch", "mIoU", "pixel_acc", "loss"])
                write_log(f"{self.log_val}",
                          header=["epoch", "mIoU", "pixel_acc"])

                self.nth_query = nth_query

                model = self._train()

                # select queries using the current model and label them.
                queries = self.query_selector(nth_query, model)
                self.dataloader.dataset.label_queries(queries, nth_query + 1)

                if nth_query == n_stages - 1:
                    break

                # if nth_query == 0:
                #     torch.save({"model": model.state_dict()}, self.model_0_query)
        return
コード例 #4
0
    def run(self):
        print("Performing action identifier experiment ...")
        open(self.config['log_file'], 'w')
        count = 0
        sentences_total = 0
        start_time = time.time()

        utils.write_log(self.config,
                        "RUNNING CONFIGURATION: {}".format(self.config))

        # Create dataset object
        wikihow = Wikihow.Wikihow(self.config)

        statistic_list = []
        statistic_similarity = []
        ground_truth_count = 0
        dataset_length = int(
            wikihow.get_length() *
            self.config['action_identifier']['dataset_evaluation_percent'])

        if dataset_length < 1:
            print("No examples to process in dataset. Aborting ...")
            return

        verbs = []

        for idx in trange(dataset_length):
            instance = wikihow.get_entry(idx)
            text = wikihow.process_example(instance[1])
            utils.write_log(
                self.config,
                "\n---------------------------------------------------------------------------\n"
            )
            utils.write_log(self.config, "FILE: {}\n".format(instance[0]))
            spacy_en = spacy.load('en_core_web_sm')

            for sentence in text:
                sentences_total += 1

                # Tokenize
                if self.config['action_identifier'][
                        'ground_truth_generator'] == 'nltk':
                    sentence_tokens = nltk.word_tokenize(sentence)
                    sentence_tags = nltk.pos_tag(sentence_tokens)
                    ground_truth_verbs = [
                        v[0] for v in sentence_tags
                        if len(verbnet.classids(v[0])) > 0
                    ]
                elif self.config['action_identifier'][
                        'ground_truth_generator'] == 'spacy':
                    doc = spacy_en(sentence)
                    sentence_tokens = [t for t in doc]
                    sentence_tags = [(str(t), t.pos_) for t in doc]
                    ground_truth_verbs = [v for v in doc if v.pos_ == 'VERB']
                else:
                    print("No ground-truth mechanism defined! Aborting ...")
                    return

                utils.write_log(self.config,
                                "\n>SENTENCE: {}".format(sentence))
                utils.write_log(self.config,
                                "\n  >SENTENCE TAGS: {}".format(sentence_tags))

                if len(ground_truth_verbs) == 0:
                    ground_truth_count += 1

                utils.write_log(
                    self.config,
                    "\n  >GROUND-TRUTH VERBS: {}".format(ground_truth_verbs))

                embedding_verbs = []

                for token, tag in zip(sentence_tokens, sentence_tags):
                    keyword_similarity = []
                    for keyword in self.config['action_identifier'][
                            'keywords']:
                        try:
                            similarity = 1.0 - self.word_embedding.get_distance(
                                str(token), str(keyword))[2]
                        except KeyError:
                            similarity = 0.0

                        keyword_similarity.append(similarity)

                    mean = np.mean(keyword_similarity)

                    if mean >= float(self.config['action_identifier']
                                     ['similarity_threshold']):
                        embedding_verbs.append((str(token), mean))
                        statistic_similarity.append(mean)
                        verbs.append(token)

                ground_truth_set = {str(v) for v in ground_truth_verbs}
                print("Ground truth set: ", ground_truth_set)

                embedding_verbs_set = {str(v[0]) for v in embedding_verbs}
                print("Embedding set: ", embedding_verbs_set)

                true_positive = embedding_verbs_set.intersection(
                    ground_truth_set)
                print("True positive: ", true_positive)

                false_positive = embedding_verbs_set.difference(
                    ground_truth_set)
                print("False positive: ", false_positive)

                false_negative = ground_truth_set.difference(
                    embedding_verbs_set.intersection(ground_truth_set))
                print("False negative: ", false_negative)

                # false_negative

                # true_positive = [e[0] in ground_truth_verbs for e in embedding_verbs]
                # true_positive = np.count_nonzero(true_positive)
                #
                # false_positive = [e[0] not in ground_truth_verbs for e in embedding_verbs]
                # false_positive = np.count_nonzero(false_positive)
                #
                # true_negative = []
                # false_negative = np.count_nonzero(true_negative)
                #
                # false_negative = [e not in embedding_verbs for e in ground_truth_verbs]
                # false_negative = np.count_nonzero(false_negative)

                true_positive = len(true_positive)
                false_positive = len(false_positive)
                false_negative = len(false_negative)

                sentence_entry = (token, tag,
                                  self.word_embedding.get_word_vector(token),
                                  keyword_similarity, mean)

                utils.write_log(
                    self.config,
                    "\n  >EMBEDDING VERBS: {}".format(embedding_verbs))

                # Text statistics [true positive, false negative, precision, recall, f-score]
                try:
                    precision = true_positive / (true_positive +
                                                 false_positive)
                except ZeroDivisionError:
                    precision = 0.0

                try:
                    recall = true_positive / (true_positive + false_negative)
                except ZeroDivisionError:
                    recall = 0.0

                try:
                    f_score = 2 * (recall * precision) / (recall + precision)
                except ZeroDivisionError:
                    f_score = 0.0

                utils.write_log(
                    self.config,
                    "\n  >TP: {} FP: {} FN: {} Precision: {} Recall: {} F-Score: {}"
                    .format(true_positive, false_positive, false_negative,
                            precision, recall, f_score))
                statistic_list.append([
                    true_positive, false_positive, false_negative, precision,
                    recall, f_score
                ])
            count += 1

        print("Calculating statistics ...")
        statistic_mean = np.mean(statistic_list, axis=0)
        statistic_std = np.std(statistic_list, axis=0)

        utils.write_log(
            self.config,
            "\n=======================================================================\n"
        )
        utils.write_log(
            self.config,
            "RESULTS (Elapsed time: {:.4f} seconds)".format(time.time() -
                                                            start_time))
        utils.write_log(self.config, "\n  Total of examples: {}".format(count))
        utils.write_log(self.config, "\n  Total of sentences: {} - Mean per example: {:.4f} - Ground-truth sentences with zero verbs: {} ({:.4f} %)".format(sentences_total, \
                            sentences_total / count, ground_truth_count, ground_truth_count / sentences_total))
        utils.write_log(
            self.config, "\n  Mean True Positive: {:.4f} - Std: {:.4f}".format(
                statistic_mean[0], statistic_std[0]))
        utils.write_log(
            self.config,
            "\n  Mean False Positive: {:.4f} - Std: {:.4f}".format(
                statistic_mean[1], statistic_std[1]))
        utils.write_log(
            self.config,
            "\n  Mean False Negative: {:.4f} - Std: {:.4f}".format(
                statistic_mean[2], statistic_std[2]))
        utils.write_log(
            self.config, "\n  Mean Similarity: {:.4f} - Std: {:.4f}".format(
                np.mean(statistic_similarity), np.std(statistic_similarity)))
        utils.write_log(
            self.config,
            "\n  Mean Precision: {:.4f} - Recall: {:.4f} - F-Score: {:.4f}".
            format(statistic_mean[3], statistic_mean[4], statistic_mean[5]))

        # flatten = lambda l: [item for sublist in l for item in sublist]
        #
        # verbs = flatten(verbs)
        verbs = [str(v) for v in verbs]

        import pandas as pd
        df = pd.DataFrame(verbs)[0].value_counts().to_csv(
            self.config['log_file'] + "-dataframe")
コード例 #5
0
ファイル: model.py プロジェクト: liuguoyou/PixelPick
    def _val(self, epoch, model):
        dataloader_iter, tbar = iter(self.dataloader_val), tqdm(
            range(len(self.dataloader_val)))
        model.eval()
        for _ in tbar:
            dict_data = next(dataloader_iter)
            x, y = dict_data['x'].to(self.device), dict_data['y'].to(
                self.device)

            if self.dataset_name == "voc":
                h, w = y.shape[1:]
                pad_h = ceil(
                    h / self.stride_total) * self.stride_total - x.shape[2]
                pad_w = ceil(
                    w / self.stride_total) * self.stride_total - x.shape[3]
                x = F.pad(x, pad=(0, pad_w, 0, pad_h), mode='reflect')
                dict_outputs = model(x)
                dict_outputs['pred'] = dict_outputs['pred'][:, :, :h, :w]

            else:
                dict_outputs = model(x)

            logits = dict_outputs['pred']
            prob, pred = F.softmax(logits.detach(),
                                   dim=1), logits.argmax(dim=1)

            self.running_score.update(y.cpu().numpy(), pred.cpu().numpy())
            scores = self.running_score.get_scores()[0]
            miou, pixel_acc = scores['Mean IoU'], scores['Pixel Acc']
            tbar.set_description(
                f"mIoU: {miou:.3f} | pixel acc.: {pixel_acc:.3f}")

            if self.debug:
                break

        if miou > self.best_miou:
            state_dict = {"model": model.state_dict()}

            if self.n_pixels_by_us != 0:
                torch.save(
                    state_dict,
                    f"{self.dir_checkpoints}/{self.nth_query}_query/best_miou_model.pt"
                )
            else:
                torch.save(
                    state_dict,
                    f"{self.dir_checkpoints}/fully_sup/best_miou_model.pt")
            print(
                f"best model has been saved"
                f"(epoch: {epoch} | prev. miou: {self.best_miou:.4f} => new miou: {miou:.4f})."
            )
            self.best_miou = miou

        write_log(self.log_val, list_entities=[epoch, miou, pixel_acc])

        print(
            f"\n{'=' * 100}"
            f"\nExperim name: {self.experim_name}"
            f"\nEpoch {epoch} | miou: {miou:.3f} | pixel_acc.: {pixel_acc:.3f}"
            f"\n{'=' * 100}\n")

        self._reset_meters()

        ent, lc, ms, = [
            self._query(prob, uc)[0].cpu()
            for uc in ["entropy", "least_confidence", "margin_sampling"]
        ]
        dict_tensors = {
            'input': dict_data['x'][0].cpu(),
            'target': dict_data['y'][0].cpu(),
            'pred': pred[0].detach().cpu(),
            'confidence': lc,
            'margin':
            -ms,  # minus sign is to draw smaller margin part brighter
            'entropy': ent
        }

        if self.n_pixels_by_us != 0:
            self.vis(
                dict_tensors,
                fp=
                f"{self.dir_checkpoints}/{self.nth_query}_query/{epoch}_val.png"
            )
        else:
            self.vis(dict_tensors,
                     fp=f"{self.dir_checkpoints}/fully_sup/{epoch}_val.png")
        return
コード例 #6
0
ファイル: model.py プロジェクト: liuguoyou/PixelPick
    def _train_epoch(self, epoch, model, optimizer, lr_scheduler):
        if self.n_pixels_by_us != 0:
            print(
                f"training an epoch {epoch} of {self.nth_query}th query ({self.dataloader.dataset.n_pixels_total} labelled pixels)"
            )
            fp = f"{self.dir_checkpoints}/{self.nth_query}_query/{epoch}_train.png"
        else:
            fp = f"{self.dir_checkpoints}/fully_sup/{epoch}_train.png"
        log = f"{self.log_train}"

        dataloader_iter, tbar = iter(self.dataloader), tqdm(
            range(len(self.dataloader)))
        model.train()
        for _ in tbar:
            dict_data = next(dataloader_iter)
            x, y = dict_data['x'].to(self.device), dict_data['y'].to(
                self.device)

            # if queries
            if self.n_pixels_by_us != 0:
                mask = dict_data['queries'].to(self.device, torch.bool)
                y.flatten()[~mask.flatten()] = self.ignore_index

            # forward pass
            dict_outputs = model(x)

            logits = dict_outputs["pred"]
            dict_losses = {
                "ce": F.cross_entropy(logits,
                                      y,
                                      ignore_index=self.ignore_index)
            }

            # backward pass
            loss = sum(dict_losses.values())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            prob, pred = F.softmax(logits.detach(),
                                   dim=1), logits.argmax(dim=1)
            self.running_score.update(y.cpu().numpy(), pred.cpu().numpy())
            self.running_loss.update(loss.detach().item())

            scores = self.running_score.get_scores()[0]
            miou, pixel_acc = scores['Mean IoU'], scores['Pixel Acc']

            # description
            description = f"({self.experim_name}) Epoch {epoch} | mIoU.: {miou:.3f} | pixel acc.: {pixel_acc:.3f} | " \
                          f"avg loss: {self.running_loss.avg:.3f}"
            for loss_k, loss_v in dict_losses.items():
                description += f" | {loss_k}: {loss_v.detach().cpu().item():.3f}"
            tbar.set_description(description)

            if self.lr_scheduler_type == "Poly":
                lr_scheduler.step(epoch=epoch - 1)

            if self.debug:
                break

        if self.lr_scheduler_type == "MultiStepLR":
            lr_scheduler.step(epoch=epoch - 1)

        write_log(
            log, list_entities=[epoch, miou, pixel_acc, self.running_loss.avg])
        self._reset_meters()

        ent, lc, ms, = [
            self._query(prob, uc)[0].cpu()
            for uc in ["entropy", "least_confidence", "margin_sampling"]
        ]
        dict_tensors = {
            'input': dict_data['x'][0].cpu(),
            'target': dict_data['y'][0].cpu(),
            'pred': pred[0].detach().cpu(),
            'confidence': lc,
            'margin':
            -ms,  # minus sign is to draw smaller margin part brighter
            'entropy': ent
        }

        self.vis(dict_tensors, fp=fp)
        return model, optimizer, lr_scheduler
コード例 #7
0
ファイル: eval.py プロジェクト: trungd/speech-recognition
def eval(hparams, args, Model, BatchedInput):
    tf.reset_default_graph()
    graph = tf.Graph()
    mode = tf.estimator.ModeKeys.EVAL
    hparams.batch_size = hparams.eval_batch_size

    with graph.as_default():
        trainer = Trainer(hparams, Model, BatchedInput, mode)
        trainer.build_model()

        sess = tf.Session(graph=graph)
        load_model(sess, Model, hparams)
        trainer.init(sess)

        dlgids = []
        lers = []

        pbar = tqdm(total=trainer.data_size, ncols=100)
        pbar.set_description("Eval")
        fo = open(os.path.join(hparams.summaries_dir, "eval_ret.txt"), "w")
        utils.prepare_output_path(hparams)
        errs = {}
        ref_lens = {}
        while True:
            try:
                ids, ground_truth_labels, predicted_labels, ground_truth_len, predicted_len = trainer.eval(
                    sess)
                utils.write_log(hparams, [str(ground_truth_labels)])

                decode_fns = trainer.test_model.get_decode_fns()
                # dlgids += list([str(id).split('/')[-2] for id in ids])
                metrics = (args.metrics or hparams.metrics).split(',')
                for acc_id, (gt_labels, p_labels, gt_len, p_len) in \
                        enumerate(zip(ground_truth_labels, predicted_labels,
                                      ground_truth_len, predicted_len)):
                    if acc_id not in lers: lers[acc_id] = []

                    for i in range(len(gt_labels)):
                        if acc_id == 1 and (hparams.model
                                            == "da_attention_seg"):
                            ler, str_original, str_decoded = ops_utils.joint_evaluate(
                                hparams,
                                ground_truth_labels[0][i],
                                predicted_labels[0][i],
                                ground_truth_labels[1][i],
                                predicted_labels[1][i],
                                decode_fns[acc_id],
                            )
                        else:
                            err, ref_len, str_original, str_decoded = ops_utils.evaluate(
                                gt_labels[i],
                                # gt_labels[i][:gt_len[i]],
                                p_labels[i],
                                # p_labels[i][:p_len[i]],
                                decode_fns[acc_id],
                                metrics[acc_id],
                                acc_id)

                        if err is not None:
                            errs[acc_id].append(err)
                            ref_lens[acc_id].append(ref_len)

                            if hparams.input_unit == "word":
                                str_original = ' '.join(str_original)
                                str_decoded = ' '.join(str_decoded)
                            elif hparams.input_unit == "char":
                                str_original = ''.join(str_original).replace(
                                    '_', ' ')
                                str_decoded = ''.join(str_decoded).replace(
                                    '_', ' ')

                            tqdm.write(
                                "\nGT: %s\nPR: %s\nLER: %.3f\n" %
                                (str_original, str_decoded, err / ref_len))
                            #tqdm.write(str(p_labels[i]))
                            #tqdm.write("%d %d" % (gt_len[i], p_len[i]))

                            meta = tf.SummaryMetadata()
                            meta.plugin_data.plugin_name = "text"

                # update pbar progress and postfix
                pbar.update(trainer.batch_size)
                bar_pf = {}
                for acc_id in range(len(ground_truth_labels)):
                    bar_pf["er" + str(acc_id)] = "%2.2f" % (
                        sum(errs[acc_id]) / sum(ref_lens[acc_id]) * 100)
                pbar.set_postfix(bar_pf)
            except tf.errors.OutOfRangeError:
                break

    # acc_by_ids = {}
    # for i, id in enumerate(dlgids):
    #    if id not in acc_by_ids: acc_by_ids[id] = []
    #    acc_by_ids[id].append(lers[0][i])

    # print("\n\n----- Statistics -----")
    # for id, ls in acc_by_ids.items():
    #     print("%s\t%2.2f" % (id, sum(ls) / len(ls)))

    # fo.write("LER: %2.2f" % (sum(lers) / len(lers) * 100))
    # print(len(lers[0]))
    fo.close()