Esempio n. 1
0
    def run_evaluate(self, sess, test, vocab_tags, vocab_words):

        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        for sentences, labels in minibatches(test, vocab_tags, vocab_words, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(sess, sentences)

            for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths):
                lab = lab[:length]
                lab_pred= lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]

                lab_chunks = set(get_chunks(lab, vocab_tags))
                lab_pred_chunks = set(get_chunks(lab_pred, vocab_tags))

                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        
        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p *r /(p+r) if correct_preds > 0 else 0
        acc = np.mean(accs)
              
        return {"acc":100*acc, "f1":100*f1,"precision":100*p,"recall":100*r}


        
Esempio n. 2
0
    def evaluate(self, test):
        accuracy = []
        correct_prediction = 0.
        total_correct = 0.
        total_prediction = 0.
        for word, label in minibatches(test, self.config.batch_size):
            label_predict, seq_len = self.predict_batch(word)

        for lb, lb_pred, length in zip(label, label_predict, seq_len):
            lb = lb[:length]
            lb_pred = lb_pred[:length]
            accuracy += [a == b for (a, b) in zip(lb, lb_pred)]
            lb_chunks = set(get_chunks(lb, self.config.vocab_tag))
            lb_pred_chunks = set(get_chunks(lb_pred, self.config.vocab_tag))
            correct_prediction += len(lb_chunks & lb_pred_chunks)
            total_prediction += len(lb_pred_chunks)
            total_correct += len(lb_chunks)

        precision = correct_prediction / total_prediction if correct_prediction > 0 else 0
        recall = correct_prediction / total_correct if correct_prediction > 0 else 0
        f1 = 2 * precision * recall / (precision +
                                       recall) if correct_prediction > 0 else 0
        acc = np.mean(accuracy)

        return {"accuracy": 100 * acc, "f1-score": 100 * f1}
Esempio n. 3
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(sess, words)

            for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a==b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        return acc, f1
Esempio n. 4
0
    def run_evaluate(self, sess, test, tags, target='src'):
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        nbatces = (len(test) + self.args.batch_size -
                   1) // self.args.batch_size
        prog = Progbar(target=nbatces)
        for i, (words, labels, target_words) in enumerate(
                minibatches(test, self.args.batch_size)):
            if target == 'src':
                labels_pred, sequence_lengths = self.predict_batch(
                    sess, words, mode=target, is_training=False)
            else:
                labels_pred, sequence_lengths = self.predict_batch(
                    sess, None, words, mode=target, is_training=False)

            for lab, label_pred, length in zip(labels, labels_pred,
                                               sequence_lengths):
                lab = lab[:length]
                lab_pred = label_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

            prog.update(i + 1)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        return acc, p, r, f1
Esempio n. 5
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(sess, words)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        return acc, f1
Esempio n. 6
0
    def run_evaluate(self, test, log_step=None, mode='train'):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, tags)
            get_loss: True, if you want to calculate validation loss

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        
        get_loss = self.config.early_stopping_metric == 'loss'
        if get_loss:
            loss = 0.0
            weight = 0.0
        
        for words, labels, pred_flags in minibatches(test, self.config.batch_size):
            if get_loss:
                labels_pred, sequence_lengths, batch_loss = self.predict_batch(words, labels=labels, pred_flags=pred_flags, get_loss=get_loss)
                _weight = len(sequence_lengths)/float(self.config.batch_size)
                weight += _weight
                loss += _weight * batch_loss
            else:
                labels_pred, sequence_lengths = self.predict_batch(words, get_loss, pred_flags=pred_flags)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab      = lab[:length]
                lab_pred = lab_pred[:length]
                accs    += [a==b for (a, b) in zip(lab, lab_pred)]

                lab_chunks      = set(get_chunks(lab, self.config.vocab_tags))
                lab_pred_chunks = set(get_chunks(lab_pred,
                                                 self.config.vocab_tags))

                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds   += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p   = correct_preds / total_preds if correct_preds > 0 else 0
        r   = correct_preds / total_correct if correct_preds > 0 else 0
        f1  = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)

        if get_loss and mode =='train':
            loss = loss/weight
            dev_summary = self.sess.run(self.dev_merged, feed_dict={self.eval_loss: loss, self.eval_f1: f1})
            self.dev_file_writer.add_summary(dev_summary, log_step)
            return {"acc": 100*acc, "f1": 100*f1, "loss": loss}
        
        if mode == 'evaluate':
            dataset_name = basename(normpath(test.filename))
            self.save_evaluation_results(dataset_name,f1)
            
        return {"acc": 100*acc, "f1": 100*f1}
  def run_evaluate(self, sess, test, tags):
      """
      Evaluates performance on test set
      Args:
          sess: tensorflow session
          test: dataset that yields tuple of sentences, tags
          tags: {tag: index} dictionary
      Returns:
          accuracy
          f1 score
      """
      accs = []
      correct_preds, total_correct, total_preds = 0., 0., 0.
      for words, labels, iob_gold, mention_type_gold, mentions_gold, word_features in minibatches(test, self.config.batch_size):
          iob_labels_pred, sequence_lengths= self.predict_iob_batch(sess, words, word_features)
          mentions = []
          mention_sizes = []
          count = 0
          for i in range(self.config.batch_size):
              length = sequence_lengths[i]
              mention = find_mentions(iob_labels_pred[i][:length])
              mentions.append(mention)
              mention_sizes.append(len(mention))
              if len(mention) == 0:
                  count += 1
          if count != self.config.batch_size:
              mentions_pred, _ = self.predict_type_batch(sess, words, word_features, mentions)
          else:
              mentions_pred = [[]]*self.config.batch_size
 
          for lab, iob_pred, length, mention, mention_pred, mention_size in zip(labels, iob_labels_pred, sequence_lengths, mentions, mentions_pred, mention_sizes):
              lab = lab[:length]
              iob_pred = iob_pred[:length]
              mention_pred = mention_pred[:mention_size]
              
              lab_pred = find_labels(iob_pred, mention_pred, tags)
              accs += [a==b for (a, b) in zip(lab, lab_pred)]
              lab_chunks = set(get_chunks(lab, tags))
              lab_pred_chunks = set(get_chunks(lab_pred, tags))
              correct_preds += len(lab_chunks & lab_pred_chunks)
              total_preds += len(lab_pred_chunks)
              total_correct += len(lab_chunks)
              
      p = correct_preds / total_preds if correct_preds > 0 else 0
      r = correct_preds / total_correct if correct_preds > 0 else 0
      f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
      acc = np.mean(accs)
      return acc, f1
Esempio n. 8
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """
        accs = []
        global Globepoch
        Globepoch += 1
        if Globepoch >= 8:
            OutFile = open("Res1/AWS_GPU_BEST_" + str(Globepoch), 'w')

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, labels in minibatches(
                test, self.config.batch_size
        ):  ## here raw words and tags from main.py is starting to get converted into word to id's and tag to id's
            labels_pred, sequence_lengths = self.predict_batch(sess, words)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                test2lab = label2ind_ret()
                # print (test2lab)
                if Globepoch >= 8:
                    for lab1 in lab_pred:
                        OutFile.write(test2lab[lab1] + "\n")
                    OutFile.write("\n")

                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        return acc, f1
 def run_evaluate(self, test, print_or_not = False):
     accs = []
     intent_correct = 0
     intent_total = 0
     correct_preds, total_correct, total_preds = 0., 0., 0.
     for words, labels, intents, all_tags in minibatches(test, self.config.batch_size):
         labels_pred, sequence_lengths, pred_intents, score = self.predict_batch(words, all_tags)
         for word_ins, lab, lab_pred, length, intent, pred_intent in\
                 zip(words, labels, labels_pred,
                 sequence_lengths, intents, pred_intents):
             if print_or_not:
                 #words_list = [str(a) for a in words_ins]
                 #lab_list = [str(a) for a in lab]
                 #lab_pred_list = [str(a) for a in lab_pred ]
                 words_list = [self.config.idx2vocab[a] for a in word_ins]
                 lab_list = [self.config.idx2tag[a] for a in lab]
                 lab_pred_list = [self.config.idx2tag[a] for a in lab_pred ]
                 print "||".join(words_list) + "\t" + "||".join(lab_list) \
                         + "\t" + "||".join(lab_pred_list) + "\t" \
                         + str(self.config.idx2intent[intent]) + "\t"\
                         + str(self.config.idx2intent[pred_intent])
             lab = lab[:length]
             lab_pred = lab_pred[:length]
             accs += [a==b for (a,b) in zip(lab, lab_pred)]
             lab_chunks      = set(get_chunks(lab, self.config.vocab_tags))
             lab_pred_chunks = set(get_chunks(lab_pred,self.config.vocab_tags))
             correct_preds += len(lab_chunks & lab_pred_chunks)
             total_preds   += len(lab_pred_chunks)
             total_correct += len(lab_chunks)
             intent_total += 1
             if pred_intent == intent:
                 intent_correct += 1
     p   = correct_preds / total_preds if correct_preds > 0 else 0
     r   = correct_preds / total_correct if correct_preds > 0 else 0
     f1  = 2 * p * r / (p + r) if correct_preds > 0 else 0
     acc = np.mean(accs)
     if intent_total != 0:
         intent_acc = intent_correct / float(intent_total)
     else:
         intent_acc = 0
     return {"acc": 100*acc, "f1": 100*f1, "intent_acc": 100* intent_acc, \
             "intent_correct": intent_correct, "intent_total": intent_total}
Esempio n. 10
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        output_file = codecs.open("output", 'w', 'UTF-8')
        idx_to_tag = {idx: tag for tag, idx in tags.items()}
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(sess, words)
            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)
                output_string = ""
                for b, c in zip(lab, lab_pred):
                    split_line = [idx_to_tag[b], idx_to_tag[c]]
                    output_string += ' '.join(split_line) + '\n'
                output_file.write(output_string + '\n')

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        output_file.close()
        return acc, f1
Esempio n. 11
0
    def run_evaluate(self, sess, test, tags, test_flag):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """

        #trie setting
        self.lis1 = []
        self.lis2 = []
        self.lis3 = []
        self.lis4 = []
        self.lis5 = []

        trie.gazette(self.lis1, "data/dic/gazette.txt")
        trie.gazette(self.lis2, "data/dic/thres3.txt")
        trie.gazette_DTTI(self.lis3, "data/dic/DT_analysis.txt")
        trie.gazette_DTTI(self.lis4, "data/dic/TI_analysis.txt")
        trie.gazette(self.lis5, "data/dic/wiki_PS.txt")
        fresult = open("results/result.txt", "w")

        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        # for i, (words, fw_words, bw_words, labels, postags) in enumerate(minibatches(train, self.config.batch_size)):
        #     fd, _ = self.get_feed_dict(words, fw_words, bw_words, labels, self.config.lr, self.config.dropout)

        total_chunks = []

        for words, fw_words, bw_words, labels, postags, sentences, print_line in minibatches(
                test, self.config.batch_size):

            dict_labels = self.dict_trie(sentences)

            labels_pred, sequence_lengths = self.predict_batch(
                sess, words, fw_words, bw_words, dict_labels, labels,
                print_line, test_flag)

            line_num = 0
            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags))
                lab_pred_chunks = set(get_chunks(lab_pred, tags))
                #-------------------------------------------------------
                #print(lab_pred_chunks)
                if test_flag == 1:
                    #print(print_line[line_num][1])
                    fresult.write(print_line[line_num][0] + '\n')
                    #fresult.write(print_line[line_num][1]+'\n')
                    print_chunks = list(lab_pred_chunks)
                    print_chunks.sort(key=lambda chunks: chunks[1])
                    #print(print_chunks)
                    for tag, start, end in print_chunks:
                        print_tag = ''
                        if tag.decode() == 'B_PS':
                            print_tag = 'PS'
                        elif tag.decode() == 'B_LC':
                            print_tag = 'LC'
                        elif tag.decode() == 'B_DT':
                            print_tag = 'DT'
                        elif tag.decode() == 'B_TI':
                            print_tag = 'TI'
                        elif tag.decode() == 'B_OG':
                            print_tag = 'OG'
                        else:
                            print_tag = tag.decode()
                        #print(print_tag+'\t'+str(start)+'\t'+str(end)+'\t'+print_line[line_num][start+2].split()[1])
                        fresult.write(print_line[line_num][start +
                                                           1].split()[1] +
                                      '\t' + print_tag + '\n')
                    #print("")
                    fresult.write('\n')
                    line_num = line_num + 1
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)

        #self.print_results(total_chunks)

        return acc, f1, p, r
Esempio n. 12
0
def evaluate(args,
             model,
             tokenizer,
             labels,
             pad_token_label_id,
             best,
             mode,
             prefix="",
             verbose=True):

    eval_dataset = load_and_cache_examples(args,
                                           tokenizer,
                                           labels,
                                           pad_token_label_id,
                                           mode=mode)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # multi-gpu evaluate
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    logger.info("***** Running evaluation %s *****", prefix)
    if verbose:
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    model.eval()
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3]
            }
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (
                    batch[2] if args.model_type in ["bert", "xlnet"] else None
                )  # XLM and RoBERTa don"t use segment_ids
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            if args.n_gpu > 1:
                tmp_eval_loss = tmp_eval_loss.mean()

            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids,
                                      inputs["labels"].detach().cpu().numpy(),
                                      axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels)}
    preds_list = [[] for _ in range(out_label_ids.shape[0])]
    out_id_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_id_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                preds_list[i].append(label_map[preds[i][j]])
                out_id_list[i].append(out_label_ids[i][j])
                preds_id_list[i].append(preds[i][j])

    correct_preds, total_correct, total_preds = 0., 0., 0.  # i variables
    for ground_truth_id, predicted_id in zip(out_id_list, preds_id_list):
        # We use the get chunks function defined above to get the true chunks
        # and the predicted chunks from true labels and predicted labels respectively
        lab_chunks = set(get_chunks(ground_truth_id, tag_to_id(args.data_dir)))
        lab_pred_chunks = set(
            get_chunks(predicted_id, tag_to_id(args.data_dir)))

        # Updating the i variables
        correct_preds += len(lab_chunks & lab_pred_chunks)
        total_preds += len(lab_pred_chunks)
        total_correct += len(lab_chunks)

    p = correct_preds / total_preds if correct_preds > 0 else 0
    r = correct_preds / total_correct if correct_preds > 0 else 0
    new_F = 2 * p * r / (p + r) if correct_preds > 0 else 0

    is_updated = False
    if new_F > best[-1]:
        best = [p, r, new_F]
        is_updated = True

    results = {
        "loss": eval_loss,
        "precision": p,
        "recall": r,
        "f1": new_F,
        "best_precision": best[0],
        "best_recall": best[1],
        "best_f1": best[-1]
    }

    logger.info("***** Eval results %s *****", prefix)
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results, preds_list, best, is_updated
Esempio n. 13
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """

        accs = []
        f = open(
            '/home/chiyoon/python/seq/sequence_tagging/predict/prediction.txt',
            'w')
        f2 = open(
            '/home/chiyoon/python/seq/sequence_tagging/predict/output.txt',
            'w')

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, labels, pos, inputs, position in minibatches(
                test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(
                sess, words, pos)

            for lab, lab_pred, length, wo, idx in zip(labels, labels_pred,
                                                      sequence_lengths, inputs,
                                                      position):
                lab = lab[:length]
                lab_pred = lab_pred[:length]

                ###
                _label = []
                _predict = []
                _w = []

                ##########
                skip_lab = []
                skip_pred = []
                skip_w = []
                skip_idx = []
                for (w, id, label, pred) in zip(wo, idx, lab, lab_pred):
                    _w = w.split("/")[1]
                    if _w != "BL":
                        #print(w)
                        skip_w.append(w)
                        skip_lab.append(label)
                        skip_pred.append(pred)
                        skip_idx.append(id)

                lab = skip_lab
                lab_pred = skip_pred

                ##########

                idx_to_tag = {idx: tag for tag, idx in tags.items()}
                error = False
                for (label, pred) in zip(skip_lab, skip_pred):
                    _label.append(idx_to_tag[label])
                    _predict.append(idx_to_tag[pred])
                    if label != pred:
                        error = True

                for (w, po, pre, label) in zip(skip_w, skip_idx, _predict,
                                               _label):
                    if pre == label:
                        f2.write("{}\t{}\t{}\n".format(w, pre, label))
                    else:
                        f2.write(
                            "{}\t{}\t{} <<<<<<<<<<<<<<<<<<<<<<<<<<\n".format(
                                w, pre, label))
                f2.write("\n")

                ###########   result
                for (w, po, pre, label) in zip(skip_w, skip_idx, _predict,
                                               _label):
                    word = ""
                    for i in w.split("/")[0:-1]:
                        word += i

                    f.write("{}\t{}\t{}\n".format(word, pre, po))
                    #print("{}\t{}\t{}\n".format(word, pre, po))

                f.write("\n")
                #print("\n")

                #################
                ###
                accs += [a == b for (a, b) in zip(skip_lab, skip_pred)]

                lab_chunks = set(get_chunks(skip_lab, tags))
                lab_pred_chunks = set(get_chunks(skip_pred, tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        tf.summary.scalar("accuracy", acc)
        tf.summary.scalar("f1", f1)
        #f.write("f1 scroe : {}".format(f1))
        f2.write("f1 scroe : {}".format(f1))
        print("precision : {}".format(p))
        print("recall : {}".format(r))
        print("f1 scroe : {}".format(f1))

        return acc, f1
Esempio n. 14
0
def fitness(molecules_here, properties_calc_ls, discriminator, disc_enc_type,
            generation_index, max_molecules_len, device, num_processors,
            writer, beta, data_dir, max_fitness_collector,
            impose_time_adapted_pen):
    ''' 
    Calculate fitness of a population
    All properties are standardized based on the mean & std of ZINC
    
    Parameters:
    molecules_here    (list)         : List of a string of molecules
    properties_calc_ls               : Type of property to be shown to the discriminator
    discriminator     (torch.Model)  : Pytorch classifier 
    disc_enc_type     (string)       : Indicated type of encoding shown to discriminator
    generation_index  (int)          : Index of generation
    max_molecules_len (int)          : Largest mol length
    device            (string)       : Device for neural network  
    num_processors    (int)          : Number of CPUs
    writer            (tensorboardX writer obj) : Tensorboard
    beta              (int)          : Discriminator weight
    data_dir          (str)          : Data Directory
    max_fitness_collector (list)     : List for max fitness values
    impose_time_adapted_pen (bool)   : Impose distribution shift with discriminator
        
    Returns:
    fitness                   (np.array) : Combination of properties and discriminator score
    discriminator_predictions (np.array) : The predictions made by the discriminator
    
    '''
    dataset_x = du.get_dis_encoding(molecules_here, disc_enc_type,
                                    max_molecules_len, num_processors,
                                    generation_index)
    if generation_index == 1:
        discriminator_predictions = np.zeros((len(dataset_x), 1))
    else:
        discriminator_predictions = D.predict(discriminator, dataset_x, device)

    if properties_calc_ls == None:
        fitness = discriminator_predictions

    else:

        molecules_here_unique = list(set(molecules_here))

        ratio = len(molecules_here_unique) / num_processors
        chunks = du.get_chunks(molecules_here_unique, num_processors, ratio)
        chunks = [item for item in chunks if len(item) >= 1]

        logP_results, SAS_results, ringP_results, QED_results = {}, {}, {}, {}
        if 'logP' in properties_calc_ls:
            logP_results = create_parr_process(chunks, 'logP')

        if 'SAS' in properties_calc_ls:
            SAS_results = create_parr_process(chunks, 'SAS')

        if 'RingP' in properties_calc_ls:
            ringP_results = create_parr_process(chunks, 'RingP')

        if 'QED' in properties_calc_ls:
            QED_results = {}
            for smi in molecules_here:
                QED_results[smi] = Chem.QED.qed(Chem.MolFromSmiles(smi))

        logP_calculated, SAS_calculated, RingP_calculated, logP_norm, SAS_norm, RingP_norm, QED_results = standardize_properties(
            molecules_here, logP_results, SAS_results, ringP_results,
            QED_results, properties_calc_ls)
        fitness = (logP_norm) - (SAS_norm) - (RingP_norm)

        writer.add_scalar('max fitness without discr', max(fitness),
                          generation_index)
        writer.add_scalar('avg fitness without discr', fitness.mean(),
                          generation_index)

        max_fitness_collector.append(max(fitness)[0])

        #Use discriminator to shift distribution of population
        if impose_time_adapted_pen:
            if generation_index > 100:
                if len(set(max_fitness_collector[-5:])) == 1:
                    beta = 1000
                    print('Beta cutoff imposed  index: ', generation_index)
                    f = open('{}/beta_change_log.txt'.format(data_dir), 'a+')
                    f.write(str(generation_index) + '\n')
                    f.close()

        # Max fitness without discriminator
        f = open('{}/max_fitness_no_discr.txt'.format(data_dir), 'a+')
        f.write(str(max(fitness)[0]) + '\n')
        f.close()

        # Avg fitness without discriminator
        f = open('{}/avg_fitness_no_discr.txt'.format(data_dir), 'a+')
        f.write(str(fitness.mean()) + '\n')
        f.close()

        print('beta value: ', beta)
        fitness = (beta * discriminator_predictions) + fitness

        # Plot fitness with discriminator
        writer.add_scalar('max fitness with discrm', max(fitness),
                          generation_index)
        writer.add_scalar('avg fitness with discrm', fitness.mean(),
                          generation_index)

        # Max fitness with discriminator
        f = open('{}/max_fitness_discr.txt'.format(data_dir), 'a+')
        f.write(str(max(fitness)[0]) + '\n')
        f.close()

        # Avg fitness with discriminator
        f = open('{}/avg_fitness_discr.txt'.format(data_dir), 'a+')
        f.write(str(fitness.mean()) + '\n')
        f.close()

        # Plot properties
        writer.add_scalar('non standr max logp', max(logP_calculated),
                          generation_index)  # logP plots
        writer.add_scalar('non standr mean logp', logP_calculated.mean(),
                          generation_index)
        writer.add_scalar('non standr min sas', min(SAS_calculated),
                          generation_index)  # SAS plots
        writer.add_scalar('non standr mean sas', SAS_calculated.mean(),
                          generation_index)
        writer.add_scalar('non standr min ringp', min(RingP_calculated),
                          generation_index)  # RingP plots
        writer.add_scalar('non standr mean ringp', RingP_calculated.mean(),
                          generation_index)

        # max logP
        f = open('{}/max_logp.txt'.format(data_dir), 'a+')
        f.write(str(max(logP_calculated)) + '\n')
        f.close()

        # mean logP
        f = open('{}/avg_logp.txt'.format(data_dir), 'a+')
        f.write(str(logP_calculated.mean()) + '\n')
        f.close()

        # min SAS
        f = open('{}/min_SAS.txt'.format(data_dir), 'a+')
        f.write(str(min(SAS_calculated)) + '\n')
        f.close()

        # mean SAS
        f = open('{}/avg_SAS.txt'.format(data_dir), 'a+')
        f.write(str(SAS_calculated.mean()) + '\n')
        f.close()

        # min RingP
        f = open('{}/min_RingP.txt'.format(data_dir), 'a+')
        f.write(str(min(RingP_calculated)) + '\n')
        f.close()

        # mean RingP
        f = open('{}/avg_RingP.txt'.format(data_dir), 'a+')
        f.write(str(RingP_calculated.mean()) + '\n')
        f.close()

    return fitness, logP_calculated, SAS_calculated, RingP_calculated, discriminator_predictions
Esempio n. 15
0
def test_chunk():
    tags_dict = load_vocab("../data/tags.txt")
    seq = [10, 3, 6, 12, 12, 6]
    chunks = get_chunks(seq, tags_dict)
    return chunks
Esempio n. 16
0
    def run_evaluate(self,
                     sess,
                     test,
                     test_deps,
                     vocab_words,
                     vocab_tags,
                     print_test_results=False):
        """
        Evaluates performance on test set
        """
        idx_to_words = {}
        if print_test_results:
            idx_to_words = {idx: word for word, idx in vocab_words.iteritems()}

        test_accs = []
        self.config.istrain = False  # set to test first, #batch normalization#
        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, poss, chunks, labels, \
            btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, \
            upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, \
            btup_formidx_list, upbt_formidx_list in minibatches(test, test_deps, self.config.batch_size):

            labels_pred, sequence_lengths = self.predict_batch(
                sess, words, poss, chunks, btup_idx_list, btup_words_list,
                btup_depwords_list, btup_deprels_list,
                btup_depwords_length_list, upbt_idx_list, upbt_words_list,
                upbt_depwords_list, upbt_deprels_list,
                upbt_depwords_length_list, btup_formidx_list,
                upbt_formidx_list)
            if print_test_results:
                char_ids, word_ids = zip(*words)

            index = 0
            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                test_accs += map(lambda a_b: a_b[0] == a_b[1],
                                 zip(lab, lab_pred))

                lab_chunks = set(get_chunks(lab, vocab_tags))
                lab_pred_chunks = set(get_chunks(lab_pred, vocab_tags))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

                if print_test_results:
                    self.logger.info(" ".join(
                        [idx_to_words[w] for w in word_ids[index][:length]]))
                    self.logger.info(" ".join(
                        self.get_aspect_polarity_pairs(lab_chunks)))
                    self.logger.info(" ".join(
                        self.get_aspect_polarity_pairs(lab_pred_chunks)))

                index += 1

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        test_acc = np.mean(test_accs)
        return p, r, f1, test_acc
Esempio n. 17
0
    def run_evaluate(self,
                     sess,
                     test,
                     vocab_aspect_tags,
                     vocab_polarity_tags,
                     vocab_joint_tags,
                     vocab_words,
                     is_dev=True):
        """
        Evaluates performance on test set
        """
        self.config.istrain = False  # set to test first, #batch normalization#
        idx_to_words = {}
        if self.config.show_test_results:
            idx_to_words = {idx: word for word, idx in vocab_words.iteritems()}
        losses = []
        aspect_test_accs, polarity_test_accs = [], []
        aspect_correct_preds, aspect_total_correct, aspect_total_preds = 0., 0., 0.
        polarity_correct_preds, polarity_total_correct, polarity_total_preds = 0., 0., 0.

        for words, poss, chunks, labels_aspect, labels_polarity, labels_joint in minibatches_for_sequence(
                test, self.config.test_batch_size):

            if self.config.show_test_results:
                if type(words) == tuple:
                    char_ids, word_ids = zip(*words)
                else:
                    char_ids, word_ids = [], words

            aspect_lab_chunks = []
            aspect_lab_pred_chunks = []
            # Just used to evaluate Aspect
            labels_pred, sequence_lengths = self.predict_batch(
                sess, words, poss, chunks, vocab_words, self.aspect_logits,
                self.aspect_transition_params, self.aspect_pred)
            for lab, lab_pred, length in zip(labels_aspect, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                aspect_test_accs += map(lambda a_b: a_b[0] == a_b[1],
                                        zip(lab, lab_pred))

                lab_chunks = get_chunks(lab, vocab_aspect_tags)
                aspect_lab_chunks.append(lab_chunks)
                lab_chunks = set(lab_chunks)

                lab_pred_chunks = get_chunks(lab_pred, vocab_aspect_tags)
                aspect_lab_pred_chunks.append(lab_pred_chunks)
                lab_pred_chunks = set(lab_pred_chunks)

                aspect_correct_preds += len(lab_chunks & lab_pred_chunks)
                aspect_total_preds += len(lab_pred_chunks)
                aspect_total_correct += len(lab_chunks)

            # Just used to evaluate Polarity
            labels_pred, sequence_lengths = self.predict_batch(
                sess, words, poss, chunks, vocab_words, self.polarity_logits,
                self.polarity_transition_params, self.polarity_pred)
            index = 0
            for lab, lab_pred, length in zip(labels_polarity, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                polarity_test_accs += map(lambda a_b: a_b[0] == a_b[1],
                                          zip(lab, lab_pred))

                lab_chunks = set(
                    get_polaity_chunks(lab, vocab_polarity_tags,
                                       aspect_lab_chunks[index]))
                lab_pred_chunks = set(
                    get_polaity_chunks(lab_pred, vocab_polarity_tags,
                                       aspect_lab_pred_chunks[index]))
                polarity_correct_preds += len(lab_chunks & lab_pred_chunks)
                polarity_total_preds += len(lab_pred_chunks)
                polarity_total_correct += len(lab_chunks)

                if self.config.show_test_results:
                    self.logger.info(" ".join(
                        [idx_to_words[w] for w in word_ids[index][:length]]))
                    self.logger.info("T: " + " ".join(
                        self.get_aspect_polarity_pairs(
                            aspect_lab_chunks[index], lab_chunks)))
                    self.logger.info("P: " + " ".join(
                        self.get_aspect_polarity_pairs(
                            aspect_lab_pred_chunks[index], lab_pred_chunks)))

                index += 1

            # get loss
            fd, sequence_lengths = self.get_feed_dict(
                words,
                poss,
                chunks,
                labels_aspect=labels_aspect,
                labels_polarity=labels_polarity,
                labels_joint=labels_joint,
                dropout=1.0,
                vocab_aspect_tags=vocab_aspect_tags)
            dev_loss = sess.run(self.loss, feed_dict=fd)
            losses.append(dev_loss)

        aspect_p, aspect_r, aspect_f1 = self.cacul_f1(aspect_correct_preds,
                                                      aspect_total_preds,
                                                      aspect_total_correct)
        aspect_test_acc = np.mean(aspect_test_accs)

        polarity_p, polarity_r, polarity_f1 = self.cacul_f1(
            polarity_correct_preds, polarity_total_preds,
            polarity_total_correct)
        polarity_test_acc = np.mean(polarity_test_accs)

        return aspect_p, aspect_r, aspect_f1, aspect_test_acc, polarity_p, polarity_r, polarity_f1, polarity_test_acc, sum(
            losses) / len(losses)