コード例 #1
0
    def __init__(self, sentences, filename=None):

        # model parameters
        self.sentences = sentences
        self.dataset = "CASEREPORT"
        self.tokenizer = "RAW"
        self.prune_stopwords = stopwords("pubmed")
        self.phrases = None
        self.threshold = 250
        self.decay = 2
        self.bigram_iter = 3

        # data file path
        models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
        if filename is None:
            filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
        self.filepath = os.path.join(models_folder, filename)

        # does identical model already exists?
        model_exists = os.path.isfile(self.filepath)
        if model_exists:
            logging.info("LOADING - loading phrase data..")
            self.phrases = Phrases.load(self.filepath)
        else:
            logging.info("CREATE - creating phrase data..")
            self.build()
コード例 #2
0
ファイル: Analyzers.py プロジェクト: waternk/medical-text
 def __init__(self):
     self.stopwords = stopwords("pubmed_v3")