Example #1
0
    def __init__(self,
                 corpus_file,
                 minfreq=0,
                 howbig=1000,
                 lemmas=True,
                 spec_rels=None,
                 dirname=None,
                 eval_spec_rels=False,
                 lr=False):
        """
        :param howbig: number of sentences to take into account
        """
        self.corpus_file = corpus_file
        self.vocab_file = "{}.vocab{}".format(self.corpus_file, howbig)
        self.rel_file = "{}.rels.vocab{}".format(self.corpus_file,
                                                 howbig)  # dependency labels

        self.minfreq = minfreq
        self.howbig = howbig
        self.lemmas = lemmas
        self.lr = lr
        #read built vocab
        try:
            self.x_dict = LabelDictionary(
                read_vocab(self.vocab_file, self.minfreq))
        #except FileNotFoundError:
        except IOError:
            self.prepare_vocab_dict()
            self.x_dict = LabelDictionary(
                read_vocab(self.vocab_file, self.minfreq))
        print("LabelDictionary created.")

        if eval_spec_rels:  # in evaluation
            try:
                import pickle

                self.r_dict = pickle.load(
                    open("{}/r_dict.pickle".format(dirname), "rb"))
            except IOError:
                sys.exit("r_dict does not exist.")
        else:
            if self.lr:
                self.r_dict = RelationDictionary(["left", "right"])
                self.r_dict.write("{}/r_dict.pickle".format(dirname))
            else:
                try:
                    r_dict = LabelDictionary(
                        [l.strip() for l in open(self.rel_file)])
                except IOError:
                    self.prepare_rel_vocab_dict()
                    r_dict = LabelDictionary(
                        [l.strip() for l in open(self.rel_file)])
                if spec_rels:
                    self.r_dict = RelationDictionary(spec_rels)
                    self.r_dict.add("OTHER")
                    self.r_dict.add_fixed_id(
                        (set(r_dict.names) - set(spec_rels)),
                        self.r_dict.get_label_id("OTHER"))
                    self.r_dict.write("{}/r_dict.pickle".format(dirname))
                else:
                    self.r_dict = r_dict
        print("Relation/LabelDictionary created.")