def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.extract_stage = [self._extract] # uni_words = ['what', 'whi', 'which', 'how', 'where', 'when', 'if', 'can', 'should'] # do_words = ['doe', 'do', 'did'] # be_words = ['is', 'are'] # will_words = ['will', 'would'] # self.words = uni_words + do_words + be_words + will_words self.words = ['what', 'whi', 'which', 'how', 'where', 'when'] self.columns = [ '_'.join(word) for word in combinations_with_replacement(self.words, 2) ]
def __init__(self, gtype, mode='train'): ClassicalFeature.__init__(self, mode) if gtype == 'concurrence': self.seq2id, self.graph = UnDirectGraph.load() else: self.seq2id, self.graph = UnDirectWeightGraph.load()
def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.idf = TFIDF.load() self.extract_stage = [self._extract] self.idf_vocab = self.idf.get_feature_names()
def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.idf = TFIDF.load() self.extract_stage = [self._extract]
def _init_predict_corpus(self): self.x_submission = ClassicalFeature.load(mode='test')
def _init_train_corpus(self): self.x = ClassicalFeature.load() self.y = pd.read_csv(config.origin_train_file)['is_duplicate']
def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.extract_stage = [self._extract] self.model = W2V.load()
def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.extract_stage = [self._init_prob, self._extract] self.filter_words = list(Punctuations.PUNCTUATIONS) + list( StopWords.StopWordsEN)
def __init__(self, mode='train'): ClassicalFeature.__init__(self, mode) self.extract_stage = [self.init_idf, self._extract]