Beispiel #1
0
 def save(self):
     """
     Save some variables used in this instance using joblib.dump
     """
     joblib.dump(self.allCorpus, ut.rp('wikiroid/allCorpus.dat'),compress=3)
     joblib.dump(self.allInfo, ut.rp('wikiroid/allInfo.dat'),compress=3)
     joblib.dump(self.descs, ut.rp('wikiroid/descs.dat'),compress=3)
Beispiel #2
0
 def load(self):
     """
     Load variables from dumped files
     """
     self.allCorpus = joblib.load(ut.rp('wikiroid/allCorpus.dat'))
     self.allInfo = joblib.load(ut.rp('wikiroid/allInfo.dat'))
     self.descs = joblib.load(ut.rp('wikiroid/descs.dat'))
     self.contextClf = contextClf.ContextClf(useLoad=True)
     self.paramExtr = paramExtr.ParamExtr(useLoad=True)
 def save(self):
     """
     Save some variables used in this instance using joblib.dump
     """
     if self.indvW2VM != None:
         joblib.dump(self.indvW2VM, ut.rp("paramExtr/indvW2V.model"))
     if self.reprDict != None:
         joblib.dump(self.reprDict, ut.rp("paramExtr/reprDict.dat"))
     if self.distMethods != None:
         joblib.dump(self.distMethods, ut.rp("paramExtr/distMethods.dat"))
Beispiel #4
0
 def load(self):
     """
     Load variables from dumped files
     """
     try:
         self.vectorizer = joblib.load(ut.rp('contextClf/vectorizer.dat'))
         self.categories = joblib.load(ut.rp('contextClf/categories.dat'))
         self.clfModel = joblib.load(ut.rp('contextClf/clf.model'))
     except:
         self.vectorizer = None
         self.categories = None
         self.clfModel = None
Beispiel #5
0
    def initialize(self):
        self.create_menu()
        self.create_widgets()

        self.src_img_manager.set_path(rp('./testimages/test1_src.png'))
        self.src_img_manager.load(rp('./testimages/test1_mask.png'))

        self.dst_img_manager.set_path(rp('./testimages/test1_target.png'))
        self.dst_img_manager.load()
        from testimages.config import offset
        self.dst_img_manager.offset = offset[0]

        self.src_img_manager.draw()
Beispiel #6
0
 def load(self):
     """
     Load variables from dumped files
     """
     self.indvW2VM = joblib.load(ut.rp("paramExtr/indvW2V.model"))
     self.reprDict = joblib.load(ut.rp("paramExtr/reprDict.dat"))
     self.distMethods = joblib.load(ut.rp("paramExtr/distMethods.dat"))
     try:
         self.allW2VM = joblib.load(ut.rp("paramExtr/allW2V.model"))
     except:
         self.allW2VM = None
     for cat in self.distMethods:
         for k in self.distMethods[cat]:
             if self.distMethods[cat][k] == 'e':
                 self._buildES(cat, k, self.reprDict[cat][k])
Beispiel #7
0
        def _load_example():
            src_path = rp('./testimages/test%d_src.png' % example_id)
            dst_path = rp('./testimages/test%d_target.png' % example_id)
            mask_path = None

            try:
                from testimages.config import offset

                self.dst_img_manager.offset = offset[example_id - 1]
                mask_path = rp('./testimages/test%d_mask.png' % example_id)
            except IndexError:
                pass

            self.src_img_manager.open(src_path, mask_path=mask_path)
            self.dst_img_manager.open(dst_path)
Beispiel #8
0
    def _buildES(self, cat, feat, reprList):
        """
        Build the category's elasticsearch model using corpus
        """
        if not self.reprDict.has_key(cat):
            self.reprDict[cat] = {}
        self.reprDict[cat][feat] = reprList
        lowerCat = cat.lower() + feat
        os.system(
            ut.rp('elastic/init_entity_search.sh ') + lowerCat + ' ' +
            lowerCat)
        actionList = []
        uniqReprList = list(set(reprList))
        for each in uniqReprList:
            action = {
                "_index": lowerCat,
                "_type": lowerCat,
                "_source": {
                    "name": each
                }
            }
            actionList.append(action)

        for success, info in helpers.parallel_bulk(es_client,
                                                   actionList,
                                                   chunk_size=200,
                                                   thread_count=12):
            print success, info

        self.save()
Beispiel #9
0
 def save(self):
     """
     Save some variables used in this instance using joblib.dump
     """
     if self.vectorizer != None:
         joblib.dump(self.vectorizer,
                     ut.rp('contextClf/vectorizer.dat'),
                     compress=3)
     if self.categories != None:
         joblib.dump(self.categories,
                     ut.rp('contextClf/categories.dat'),
                     compress=3)
     if self.clfModel != None:
         joblib.dump(self.clfModel,
                     ut.rp('contextClf/clf.model'),
                     compress=3)
Beispiel #10
0
 def saveCode(self, cat, rawCode):
     """
     Save reply code in reply directory
     """
     fp = open(ut.rp('reply/' + cat + '.py'), 'w')
     if type(rawCode) is not unicode:
         code = rawCode.decode('utf-8')
     else:
         code = rawCode
     fp.write(code.encode('utf-8'))
     fp.close()
Beispiel #11
0
 def _buildAllW2VM(self, allCorpus):
     """
     Build word2vec model using all corpus
     NOTE : indvW2V makes each category's model but allW2V is shared among categories, so It doesn't have to rebuild often.
     """
     sentences = []
     for v in allCorpus.values():
         sentences.extend(
             [ut.replNum(ut.parseSentence(x)).split(' ') for x in v])
     self.allW2VM = gensim.models.Word2Vec(sentences,
                                           min_count=1,
                                           size=100,
                                           workers=8)
     if self.allW2VM != None:
         joblib.dump(self.allW2VM, ut.rp("paramExtr/allW2V.model"))
Beispiel #12
0
 def __init__(self, useLoad=True):
     """
     Parameter extractor constructor
     Initialize each variables used in this instance
     """
     #useAllW2V defines word2vec model whether indvW2V or allW2V
     self.useAllW2V = True
     self.THR = 0.6
     self.W2VCalcMethod = 'max'  # 'avg'
     #Show progress of word2vec
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                         level=logging.INFO)
     if useLoad:
         self.load()
     else:
         try:
             self.allW2VM = joblib.load(ut.rp("paramExtr/allW2V.model"))
         except:
             self.allW2VM = None
         self.indvW2VM = {}
         self.reprDict = {}
         self.distMethods = {}
         self.save()
Beispiel #13
0
#-*- coding:utf-8 -*-
from sklearn.externals import joblib
import contextClf
import paramExtr
import util as ut
import sys
import time
sys.path.append(ut.rp('reply/'))

"""
Main handler to run wikiroid
This class provides mapping between flask app and each component
"""
class Handler(object):
    def __init__(self, useLoad=True):
        """
        Handler constructor
        Initialize each variables used in this instance
        """
        self.isBuilding = False
        if useLoad:
            self.load()
        else:
            self.allCorpus = {}
            self.allInfo = {}
            self.descs = []
            self.contextClf = contextClf.ContextClf(useLoad=False)
            self.paramExtr = paramExtr.ParamExtr(useLoad=False)
            self.save()

    def save(self):