def save(self): """ Save some variables used in this instance using joblib.dump """ joblib.dump(self.allCorpus, ut.rp('wikiroid/allCorpus.dat'),compress=3) joblib.dump(self.allInfo, ut.rp('wikiroid/allInfo.dat'),compress=3) joblib.dump(self.descs, ut.rp('wikiroid/descs.dat'),compress=3)
def load(self): """ Load variables from dumped files """ self.allCorpus = joblib.load(ut.rp('wikiroid/allCorpus.dat')) self.allInfo = joblib.load(ut.rp('wikiroid/allInfo.dat')) self.descs = joblib.load(ut.rp('wikiroid/descs.dat')) self.contextClf = contextClf.ContextClf(useLoad=True) self.paramExtr = paramExtr.ParamExtr(useLoad=True)
def save(self): """ Save some variables used in this instance using joblib.dump """ if self.indvW2VM != None: joblib.dump(self.indvW2VM, ut.rp("paramExtr/indvW2V.model")) if self.reprDict != None: joblib.dump(self.reprDict, ut.rp("paramExtr/reprDict.dat")) if self.distMethods != None: joblib.dump(self.distMethods, ut.rp("paramExtr/distMethods.dat"))
def load(self): """ Load variables from dumped files """ try: self.vectorizer = joblib.load(ut.rp('contextClf/vectorizer.dat')) self.categories = joblib.load(ut.rp('contextClf/categories.dat')) self.clfModel = joblib.load(ut.rp('contextClf/clf.model')) except: self.vectorizer = None self.categories = None self.clfModel = None
def initialize(self): self.create_menu() self.create_widgets() self.src_img_manager.set_path(rp('./testimages/test1_src.png')) self.src_img_manager.load(rp('./testimages/test1_mask.png')) self.dst_img_manager.set_path(rp('./testimages/test1_target.png')) self.dst_img_manager.load() from testimages.config import offset self.dst_img_manager.offset = offset[0] self.src_img_manager.draw()
def load(self): """ Load variables from dumped files """ self.indvW2VM = joblib.load(ut.rp("paramExtr/indvW2V.model")) self.reprDict = joblib.load(ut.rp("paramExtr/reprDict.dat")) self.distMethods = joblib.load(ut.rp("paramExtr/distMethods.dat")) try: self.allW2VM = joblib.load(ut.rp("paramExtr/allW2V.model")) except: self.allW2VM = None for cat in self.distMethods: for k in self.distMethods[cat]: if self.distMethods[cat][k] == 'e': self._buildES(cat, k, self.reprDict[cat][k])
def _load_example(): src_path = rp('./testimages/test%d_src.png' % example_id) dst_path = rp('./testimages/test%d_target.png' % example_id) mask_path = None try: from testimages.config import offset self.dst_img_manager.offset = offset[example_id - 1] mask_path = rp('./testimages/test%d_mask.png' % example_id) except IndexError: pass self.src_img_manager.open(src_path, mask_path=mask_path) self.dst_img_manager.open(dst_path)
def _buildES(self, cat, feat, reprList): """ Build the category's elasticsearch model using corpus """ if not self.reprDict.has_key(cat): self.reprDict[cat] = {} self.reprDict[cat][feat] = reprList lowerCat = cat.lower() + feat os.system( ut.rp('elastic/init_entity_search.sh ') + lowerCat + ' ' + lowerCat) actionList = [] uniqReprList = list(set(reprList)) for each in uniqReprList: action = { "_index": lowerCat, "_type": lowerCat, "_source": { "name": each } } actionList.append(action) for success, info in helpers.parallel_bulk(es_client, actionList, chunk_size=200, thread_count=12): print success, info self.save()
def save(self): """ Save some variables used in this instance using joblib.dump """ if self.vectorizer != None: joblib.dump(self.vectorizer, ut.rp('contextClf/vectorizer.dat'), compress=3) if self.categories != None: joblib.dump(self.categories, ut.rp('contextClf/categories.dat'), compress=3) if self.clfModel != None: joblib.dump(self.clfModel, ut.rp('contextClf/clf.model'), compress=3)
def saveCode(self, cat, rawCode): """ Save reply code in reply directory """ fp = open(ut.rp('reply/' + cat + '.py'), 'w') if type(rawCode) is not unicode: code = rawCode.decode('utf-8') else: code = rawCode fp.write(code.encode('utf-8')) fp.close()
def _buildAllW2VM(self, allCorpus): """ Build word2vec model using all corpus NOTE : indvW2V makes each category's model but allW2V is shared among categories, so It doesn't have to rebuild often. """ sentences = [] for v in allCorpus.values(): sentences.extend( [ut.replNum(ut.parseSentence(x)).split(' ') for x in v]) self.allW2VM = gensim.models.Word2Vec(sentences, min_count=1, size=100, workers=8) if self.allW2VM != None: joblib.dump(self.allW2VM, ut.rp("paramExtr/allW2V.model"))
def __init__(self, useLoad=True): """ Parameter extractor constructor Initialize each variables used in this instance """ #useAllW2V defines word2vec model whether indvW2V or allW2V self.useAllW2V = True self.THR = 0.6 self.W2VCalcMethod = 'max' # 'avg' #Show progress of word2vec logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) if useLoad: self.load() else: try: self.allW2VM = joblib.load(ut.rp("paramExtr/allW2V.model")) except: self.allW2VM = None self.indvW2VM = {} self.reprDict = {} self.distMethods = {} self.save()
#-*- coding:utf-8 -*- from sklearn.externals import joblib import contextClf import paramExtr import util as ut import sys import time sys.path.append(ut.rp('reply/')) """ Main handler to run wikiroid This class provides mapping between flask app and each component """ class Handler(object): def __init__(self, useLoad=True): """ Handler constructor Initialize each variables used in this instance """ self.isBuilding = False if useLoad: self.load() else: self.allCorpus = {} self.allInfo = {} self.descs = [] self.contextClf = contextClf.ContextClf(useLoad=False) self.paramExtr = paramExtr.ParamExtr(useLoad=False) self.save() def save(self):