def calc_accuracy(self, path_test_file):
     """
     This function calculates both sentence accuracy and word accuracy on a given test set.
     :param path_test_file: path to file containing labeled samples (str)
     :return: sentence_accuracy: percentage on complete sentences (float)
     :return: word_accuracy: percentage on words (float)
     """
     total_words = 0
     total_sentences = 0
     correct_words = 0
     correct_sentences = 0
     num_samples = 0
     for _ in dep_sample_generator(path_test_file):
         num_samples += 1
     progress = ProgressBar(num_samples, fmt=ProgressBar.FULL)
     samp_gen = dep_sample_generator(path_test_file)
     for sample in samp_gen:
         total_sentences += 1
         total_words += sample[-1].idx
         infered_sample = self.infer(sample)
         correct_parse = True
         for i in range(len(sample)):
             if not i:
                 # skip ROOT
                 continue
             if sample[i].head == infered_sample[i].head:
                 correct_words += 1
             else:
                 correct_parse = False
         if correct_parse:
             correct_sentences += 1
         progress.current += 1
         progress()
     progress.done()
     print('\n')
     sentence_accuracy = 1.0 * correct_sentences / total_sentences
     word_accuracy = 1.0 * correct_words / total_words
     return sentence_accuracy, word_accuracy
    def perceptron_train(self, num_iterations: int, accuracy_step=10) -> None:
        """
        Given the number of iterations for training we loop
        over the training file said number of iterations preforming
        the perceptron algorithm
        the result is updated weights in self.w
        :param num_iterations: number of iterations to perform (int)
        :param accuracy_step: interval between accuracy calculation (int)
        :return: None
        """
        print("training started")
        self.w = np.zeros(self.num_of_features)
        num_samples = 0
        for _ in dep_sample_generator(self.training_file_path):
            num_samples += 1
        st_time = time.time()
        # dep_weights = DepOptimizer(self.w, None, path_to_train_file=self.training_file_path,
        #                            dicts=self.dicts, minimal=self.minimal) # moved to class level
        train_word_accuracies = []
        train_sentenence_accuracies = []
        for i in range(num_iterations):
            print("iteration: ", i)
            progress = ProgressBar(num_samples, fmt=ProgressBar.FULL)
            total_sentences = 0
            correct_sentences = 0
            total_words = 0
            correct_words = 0
            it_st_time = time.time()
            for idx, sample in enumerate(
                    dep_sample_generator(self.training_file_path)):
                total_sentences += 1
                sample_len = sample[-1].idx

                successors = self.fc_graphs[
                    sample_len]  # sample_to_full_successors(sample_len)
                # dep_weights = DepOptimizer(self.w, sample, dicts=self.dicts, minimal=self.minimal)
                self.dep_weights.update_sample(sample)
                self.dep_weights.update_weights(self.w)
                graph = Digraph(successors, self.dep_weights.get_score)
                mst_start_time = time.time()
                argmax_tree = graph.mst().successors
                argmax_tree = {k: v for k, v in argmax_tree.items() if v}
                ground_truth_successors = self.gt_trees[
                    idx]  # sample_to_successors(sample)

                # print("mst calc time: %.5f secs" % (time.time() - mst_start_time))
                infered_sample = successors_to_sample(deepcopy(sample),
                                                      argmax_tree)
                for j in range(len(sample)):
                    if not j:
                        # skip ROOT
                        continue
                    total_words += 1
                    if sample[j].head == infered_sample[j].head:
                        correct_words += 1

                #  according to python doc dictionary == works as expected
                #  returning true only if both have same keys and same values to those keys
                #  order of dict.values() corresponded to dict.keys()
                if argmax_tree != ground_truth_successors:
                    # features_ground_truth = self.feature_extractor(sample, self.dicts, self.minimal)
                    #  could also be replaced by a dict
                    features_ground_truth = self.gt_global_features[idx]
                    feat_calc_start_time = time.time()
                    features_argmax = self.feature_extractor(
                        infered_sample,
                        self.dicts,
                        self.minimal,
                        use_mcdonald=self.use_mcdonald)
                    # print("feature extraction time: %.5f" % (time.time() - feat_calc_start_time))
                    self.w[list(features_ground_truth.keys())] += np.array(
                        list(features_ground_truth.values()))
                    self.w[list(features_argmax.keys())] -= np.array(
                        list(features_argmax.values()))

                else:
                    correct_sentences += 1
                progress.current += 1
                progress()
            sen_acc = 1.0 * correct_sentences / total_sentences
            word_acc = 1.0 * correct_words / total_words
            train_sentenence_accuracies.append(sen_acc)
            train_word_accuracies.append(word_acc)
            progress.done()
            print('\n')
            print(
                'iteration/epoch ', i, "- iteration time: %.2f min" %
                ((time.time() - it_st_time) / 60),
                ", train accuracy:: sentence: %.3f " % sen_acc,
                " words: %.3f " % word_acc,
                ", total time: %.2f min" % ((time.time() - st_time) / 60))

            if (i + 1
                ) % accuracy_step == 0 and self.path_to_valid_file is not None:
                print("validation accuracy calculation step:")
                valid_sent_acc, valid_word_acc = self.calc_accuracy(
                    self.path_to_valid_file)
                print("valid accuracy:: sentence: %.3f" % valid_sent_acc,
                      " words: %.3f" % valid_word_acc)
                self.w.dump(self.weights_file_name)
                print("saved weights @ ", self.weights_file_name)
                # save checkpoint
                path = self.training_file_path + "_epoch_" + str(
                    i) + ".checkpoint"
                ckpt = {}
                ckpt['weights'] = self.w.tolist()
                ckpt['train_acc'] = (sen_acc, word_acc)
                ckpt['valid_acc'] = (valid_sent_acc, valid_word_acc)
                with open(path, 'wb') as fp:
                    pickle.dump(ckpt, fp)
                print("saved checkpoint @ ", path)

        self.w.dump(self.weights_file_name)
        path = self.training_file_path + "_" + str(i +
                                                   1) + "_epochs" + ".results"
        ckpt = {}
        ckpt['weights'] = self.w.tolist()
        ckpt['train_word_acc'] = train_word_accuracies
        ckpt['train_sen_acc'] = train_sentenence_accuracies
        with open(path, 'wb') as fp:
            pickle.dump(ckpt, fp)
        print("saved final results @ ", path)