def load_neg_from_files(source_dir): sources = glob.glob(source_dir) total = len(sources) for idx,source in enumerate(sources): progress("Load negation tags from %s" % source,total,idx) content = json.load( open(source) ) dp.save_negations(content,tagged_as='manually')
def __loadData(self): if self.entry.get() != '': if self.var2.get() == 1: gridNumber = int(self.entry.get()) self.grid = DataProvider.getNQueensGrid(gridNumber) self.__draw_grid(len(self.grid)) self.__draw_puzzle(self.grid) else: gridNumber = int(self.entry.get()) self.grid = DataProvider.generateSudoku9x9(gridNumber) self.__draw_puzzle(self.grid)
def ViewSave(result,source): os.system('clear') if not result: return op = raw_input("Done! Save result for %s? [y/n] > " % source) if op.lower() == 'n': op = raw_input("Are you sure? [y/n] > ") if op.lower() == 'y': return dp.save_negations(result,tagged_as='manually') if tofile:save(result,"negtag_%s" % source,tofile,overwrite=False)
def analyze(opinions,tofile=None): # #------- Execute Function -------# analyzed = [] _ids = [] total = len(opinions) fails = 0 for idx, opinion in enumerate(opinions): progress("Analyzing %s (%05.2f%%)" % ( opinion['source'], 100.0*fails/total ), total, idx ) try: _id = md5.new(str(opinion['category']) + opinion['text'].encode('ascii', 'ignore')).hexdigest() if not dp.get_opinion(_id) and _id not in _ids: _ids.append(_id) tokens = an.analyze(opinion['text']) if not tokens: raise Exception("Empty analysis") analysis = {} analysis['_id'] = _id analysis['category'] = opinion['category'] analysis['idx'] = opinion['idx'] analysis['source'] = opinion['source'] analysis['text'] = [{ 'word' : token['form'], 'lemma' : token['lemma'], 'tag' : token['tag'] } for token in tokens ] analyzed.append(analysis) if idx % 500 == 0: # partial dump dp.save_opinions(analyzed) analyzed = [] except Exception as e: fails += 1 log("Reason : %s for '%s' (at %s)" % (str(e),opinion['text'].encode('ascii','ignore'),opinion['source']) ) except KeyboardInterrupt: fails += 1 log("Reason : Interrupted on '%s' (at %s)" % (opinion['text'].encode('ascii','ignore'),opinion['source']) ) dp.save_opinions(analyzed) if tofile: save(analyzed,"analyzed_%s" % opinions[0]['source'],tofile) return analyzed
def predict_untagged(self,limit=None,tofile=None): opinions = dp.get_untagged(limit,666) results = {} total = opinions.count(with_limit_and_skip=True) for idx,opinion in enumerate(opinions): progress("Predicting on new data",total,idx) results[ opinion['_id'] ] = [] for X in dp.get_text_embeddings( opinion['text'], self.wleft, self.wright )[0]: X = X.reshape((1, -1)) Y = self.model.predict( X ) Y = ( round(Y) == 1 ) # 0 <= Y <= 1 -- Round is ok? results[ opinion['_id'] ].append( Y ) if tofile: save(results,"prediction",tofile,overwrite=False) #dp.save_negation(result,tagged_as='automatically') return results
def start_crawl(db: DataBaseConnector): try: while 1: url = "https://rent.591.com.tw/?kind=0®ion=1&order=posttime&orderType=desc" subjects = DataProvider.get_subjects_from_url(url) manager = NotificationManager() if subjects != None: new_subjects = list(db.update_subject(subjects)) # print(f"new_subjects: {new_subjects}") for subject in list(new_subjects): # print(f"subject: {subject}") user_ids = db.get_subscribe_user_from_subject(subject) # print(f"{len(user_ids) > 0}") if len(user_ids) > 0: print(f"user_ids: {user_ids}") user_tokens = db.get_user_tokens(user_ids) if len(user_tokens) > 0: print(f"user_tokens: {user_tokens}") manager.send_push_notification( user_tokens, subject) else: print("subject is none.") time.sleep(20) except: raise AssertionError("Oops!", str(sys.exc_info()[1]), "occurred.")
def main(keys, rebuild_db): # collect all data collections = [] # start loop until keys empty while len(keys) > 0: # remove first index from keys key = keys.pop(0) if not key: continue # init read progress Progress.updateReadProgress(0, 0, search_key=key) # get the data result = DataProvider.get(key, Progress.updateReadProgress) if len(result["collections"]) == int(result["count"]): collections.extend(result["collections"]) else: #its failed, put back keys keys.append(key) #saving DBProvider.saveAndUpdate(db_host, db_usr, db_pwd, db_schema, db_table, collections, Progress.updateSaveProgress, rebuild_db) pass
def DisplayMenu(): os.system('clear') title("MENU") print "0 . exit" for i in range(sources_size): qty = len(dp.get_tagged('manually',sources[i])) print i+1,".","%-20s" % sources[i], "(%i)" % qty return
def fit_tagged(self,neg_as,testing_fraction=0.2,verbose=0,early_monitor='val_binary_accuraty',limit=None): opinions = dp.get_tagged('manually') if limit: # Only for testing opinions = opinions.limit(limit) total = opinions.count(with_limit_and_skip=True) if total == 0: raise Exception('Nothing to train') callbacks = [] if early_monitor: callbacks.append( EarlyStopping( monitor = early_monitor, min_delta = 0, patience = 2, mode = 'auto', verbose = 0 ) ) X , Y = [] , [] for idx,opinion in enumerate(opinions): progress("Getting embeddings for trainning (%i words)" % len(opinion['text']),total,idx) x_curr,y_curr = dp.get_text_embeddings( opinion['text'], self.wleft, self.wright ,neg_as=neg_as ) X += x_curr Y += y_curr X = np.array(X) Y = np.array(Y) self.model.fit( X, Y, callbacks=callbacks , batch_size=32 , epochs=100 , validation_split=testing_fraction , verbose=verbose ) scores = self.model.evaluate(X,Y,batch_size=32,verbose=verbose) scores = [ round(score*100,1) for score in scores ] scores = zip( self.model.metrics_names , scores ) log('MODEL EVALUATION\n'+str(scores),level='info') print for metric,score in scores: print "%-20s: %.1f%%" % ( metric, score ) print "_________________________________________________________________" return scores
def predict_opinion(self, opinion): x = [np.array(dp.get_word_embedding(token)) for token in opinion] rest = self.window - len(x) % self.window if rest > 0: x.extend([self.end_vecotr for i in range(rest)]) X = np.array([ x[i * self.window:(i + 1) * self.window] for i in range(len(x) / self.window) ]) Y = self.model.predict(X).flatten() return [round(y) == 1 for y in Y.tolist()[:len(opinion)]]
def __init__(self, wleft, wright, ): # Parameters calculation vec_size = dp.get_size_embedding() input_dim = vec_size * (wright + wleft + 1) # Attributes settings self.wright = wright self.wleft = wleft self.dimension = input_dim
def runTestSudoku(N): timeResults = [] for n in range(1,N): if n%8 == 0: print "iter ", n start = time.time() solved = SudokuSolver.solveBacktracking(DataProvider.generateSudoku16x16(n)) end = time.time() timeResults.append(end-start) f = open("--.csv", 'wb') try: writer = csv.writer(f) writer.writerow( ('number of elements', 'time') ) for n in range(32): writer.writerow( (n+1, timeResults[n]) ) finally: f.close() print "finished"
def runTestSudoku(N): timeResults = [] for n in range(1, N): if n % 8 == 0: print "iter ", n start = time.time() solved = SudokuSolver.solveBacktracking( DataProvider.generateSudoku16x16(n)) end = time.time() timeResults.append(end - start) f = open("--.csv", 'wb') try: writer = csv.writer(f) writer.writerow(('number of elements', 'time')) for n in range(32): writer.writerow((n + 1, timeResults[n])) finally: f.close() print "finished"
def runTestsNQueens(N): timeResults = [] for n in range(8,N): start = time.time() solved = NQueensSolver.solveNQueensForwardChecking(DataProvider.getNQueensGrid(n), 0) end = time.time() timeResults.append(end-start) print "iteration number", n f = open("--.csv", 'wb') try: # plt.plot(timeResults, range(8,N)) # plt.axis([0,30,8,25]) # plt.show() writer = csv.writer(f) writer.writerow( ('size', 'time') ) for n in range(1,N-7): writer.writerow( (n+7, timeResults[n-1]) ) finally: f.close() print "finished"
def fit_tagged(self, testing_fraction=0.2, verbose=2, neg_as=False): opinions = dp.get_tagged('manually') if not opinions: raise Exception('Nothing to train') X, Y = [], [] total = opinions.count(with_limit_and_skip=True) for idx, opinion in enumerate(opinions): progress("Loading training data", total, idx) x_curr, y_curr = self.format_opinion(opinion['text'], neg_as) X += x_curr Y += y_curr self.fit(X, Y, testing_fraction, verbose) scores = self.get_scores(X, Y, verbose) if verbose != 0: log('MODEL EVALUATION\n' + str(scores), level='info') print for metric, score in scores: print "%-20s: %.1f%%" % (metric, score) print "_________________________________________________________________" return scores
def runTestsNQueens(N): timeResults = [] for n in range(8, N): start = time.time() solved = NQueensSolver.solveNQueensForwardChecking( DataProvider.getNQueensGrid(n), 0) end = time.time() timeResults.append(end - start) print "iteration number", n f = open("--.csv", 'wb') try: # plt.plot(timeResults, range(8,N)) # plt.axis([0,30,8,25]) # plt.show() writer = csv.writer(f) writer.writerow(('size', 'time')) for n in range(1, N - 7): writer.writerow((n + 7, timeResults[n - 1])) finally: f.close() print "finished"
def format_opinion(self, opinion, neg_as=False): x_curr = [ np.array(dp.get_word_embedding(token['word'])) for token in opinion ] y_curr = [ token.get('negated') if token.get('negated') is not None else neg_as for token in opinion ] rest = self.window - len(x_curr) % self.window if rest > 0: x_curr.extend([self.end_vecotr for i in range(rest)]) y_curr.extend([False for i in range(rest)]) X = [ x_curr[i * self.window:(i + 1) * self.window] for i in range(len(x_curr) / self.window) ] Y = [ y_curr[i * self.window:(i + 1) * self.window] for i in range(len(y_curr) / self.window) ] return X, Y
def test(self): ftest, ft = self.test_functions(0) testDataProvider = DataProvider(self.lang, self.testFile, self.hotFile, self.mutiNameFile, self.testAttFile, self.batch_size, self.cache_size, 0 , 0, \ self.word_vocab, self.kb_vocab, self.kbp_type_vocab, self.fb_type_vocab) testDataProvider.reset() count = 0 right = 0 start = time.asctime() while (testDataProvider.isEnd() == 0): aBatch, fea, lab = testDataProvider.readNextBatch() (rlt, num) = self.testOneBatch(aBatch, fea, lab, ftest, ft) right = right + rlt count = count + num print 'total: %d groups, right %d groups (%.2f %%)' % ( count, right, right * 100.0 / count) end = time.asctime() print 'time from %s to %s' % (start, end)
def input_guess(self): from analyzer import Analyzer from CorpusReader import review_correction an = Analyzer() while True: # try: os.system('clear') print '\n\033[4mYOUR SENTENCE\033[0m' sentence = raw_input("> ") if not sentence: # exit os.system('clear') ; break sentence = review_correction(sentence) analized_sentence = an.analyze(sentence) analized_sentence = [ {'word': item['form']} for item in analized_sentence ] result = [] for X in dp.get_text_embeddings( analized_sentence , self.wleft , self.wright )[0]: X = X.reshape((1, -1)) Y = self.model.predict( X ) Y = ( round(Y) == 1 ) result.append( Y ) os.system('clear') print '\n\033[4mPREDICTION RESULT\033[0m' print '>',' '.join([ "%s" % ("\033[91m"+wd+"\033[0m" if tg else wd) for wd,tg in zip( [text['word'] for text in analized_sentence] , result ) ]) # except Exception as e: # print 'An error has ocurred during processing (',str(e),")" raw_input("\nPress enter to continue...")
def predict_untagged(self, tofile=None, limit=None): opinions = list(dp.get_untagged(limit=limit)) results = {} total = len(opinions) #opinions.count(with_limit_and_skip=True) for idx, opinion in enumerate(opinions): progress( "Predicting on new data (%i words)" % len(opinion['text']), total, idx) x_curr = [ np.array(dp.get_word_embedding(token['word'])) for token in opinion['text'] ] rest = self.window - len(x_curr) % self.window if rest > 0: x_curr.extend([self.end_vecotr for i in range(rest)]) X = np.array([ x_curr[i * self.window:(i + 1) * self.window] for i in range(len(x_curr) / self.window) ]) try: Y = self.model.predict(X).flatten() except: print 'ERROR' results[opinion['_id']] = [ round(y) == 1 for y in Y.tolist()[:len(opinion['text'])] ] if idx % 500 == 0: # partial dump dp.save_negations(results, tagged_as='automatically') result = {} if tofile: save(results, "predict_untagged_LMST_window_%i" % (self.window), tofile) dp.save_negations(results, tagged_as='automatically') return results
def train(self): iter = 0 lr = float(self.init_lr) (fcost, fupdata) = self.train_functions(1) trainDataProvider = DataProvider(self.lang, self.trainFile, self.hotFile, self.mutiNameFile, self.trainAttFile, self.batch_size, self.cache_size, 0, 1, \ self.word_vocab, self.kb_vocab, self.kbp_type_vocab, self.fb_type_vocab) start = time.clock() while (iter < self.epoc): cost = 0 count = 0 trainDataProvider.reset() beginTime = time.time() while (trainDataProvider.isEnd() == 0): aBatch, fea, lab = trainDataProvider.readNextBatch() # for aGroup in aBatch: # for s in aGroup.samples: # print s.toString() # print fea # print lab tmp = self.processOneBatch(aBatch, fea, lab, lr, fcost, fupdata) #return cost if (tmp >= 0): cost = cost + tmp count = count + self.batch_size if (count / self.batch_size % 10 == 0 and count > 0): fTime = time.time() - beginTime print "iter: %d, lr: %.6f, processed count: %d , avg cost: %.6f, speed: %.2f ins/s" \ %(iter, lr, count, cost / count,count/ fTime ) self.saveModel(iter) iter = iter + 1 if (iter >= self.declr_start): lr = lr / 2 end = time.clock() print 'time is %f' % (end - start)
def load_corpus_negation(sources='../../corpus/corpus_variado_sfu_neg/*/*.xml',tofile=None): sources = glob.glob(sources) total = len(sources) isneg, tmpisneg = False, None opinions = [] for idx,source in enumerate(sources): progress("Reading negation corpus_variado_sfu (%s)" % source.split('/')[-2],total,idx) lines = open(source).readlines() tokens = [] text = "" for line in lines: content = line.strip() if not isinstance(content,unicode): content = unicode(content,'utf8') if content.startswith("<?xml"): regex = re.compile("polarity=\"(.*?)\"",re.DOTALL) category = regex.findall(content)[0] category = 20 if category=='negative' else 80 continue elif content.startswith("<scope"): isneg = True continue elif content.startswith("</scope"): isneg = False continue elif content.startswith("<negexp"): tmpisneg = isneg isneg = None continue elif content.startswith("</negexp"): isneg = tmpisneg tmpisneg = None continue elif content.startswith("<v ") or\ content.startswith("<s ") or\ content.startswith("<f ") or\ content.startswith("<p ") or\ content.startswith("<r ") or\ content.startswith("<a ") or\ content.startswith("<d ") or\ content.startswith("<c ") or\ content.startswith("<n ") or\ content.startswith("<w ") or\ content.startswith("<z ") or\ content.startswith("<i "): forms = re.compile("wd=\"(.*?)\"",re.DOTALL).findall(content) lemma = re.compile("lem=\"(.*?)\"",re.DOTALL).findall(content) tag = re.compile("pos=\"(.*?)\"",re.DOTALL).findall(content) if forms and lemma: forms = forms[0] lemma = lemma[0] elif lemma: forms = lemma[0] lemma = lemma[0] else: # mathsign, solo tiene POS continue if not tag: tag = "cs" else: tag = tag[0] forms = forms.split('_') if not tag.startswith("NP") else forms for form in forms: tokens.append({ 'form':form, 'lemma':lemma, 'tag':tag.upper(), 'negated': isneg, }) text += " "+form else: # Casos raros como > pass _id = md5.new(str(category) + text.encode('ascii', 'ignore')).hexdigest() if not dp.get_opinion(_id): opinion = {} opinion['_id'] = _id opinion['category'] = category opinion['idx'] = idx+1 opinion['source'] = 'corpus_variado_sfu' opinion['tagged'] = 'manually', opinion['text'] = [{ 'word' : token['form'].lower(), 'lemma' : token['lemma'].lower(), 'tag' : token['tag'], 'negated': token['negated'] } for token in tokens ] opinions.append(opinion) dp.save_opinions(opinions) if tofile: save(opinion_data,"from_corpus_sfu_negation",tofile) return len(opinions)
def main(sids,logger): SuiteList = DataProvider.getCaseData(logger,sids) logger.debug("run SuiteIDs:",SuiteList.keys()) report = {} # report {sid:{status:pass/fail,cost:time,detail:{case:pass/fail/norun}}} **update 2016-2-16 conf=EnvInit.config() # print conf.host # sys.exit() for sid in SuiteList.keys(): logger.debug("++++++"+sid+"++++++"+"begin") begintime=time.time() report[sid]={} for case in SuiteList[sid]: for pk in case.param.keys(): if case.param[pk].startswith('$$'): logger.debug('debug main ',case.param[pk]) tmpList = case.param[pk][2:].split('.') tmpSid = tmpList[0] tmpCid = tmpList[1] tmpAttrList,tmpFun = getAttrList(case.param[pk],re.compile('\[(.+?)\]')) for tc in SuiteList[sid]: if tc.cid ==tmpCid and tc.sid == tmpSid: case.param[pk]=tc.getResValue(tmpAttrList) logger.debug( "main ...................... update ",case.__hash__()) tc=Models.contain(case.param[pk],Models.RESERVEDWORD.keys()) if tc!=None: case.param[pk]=Models.RESERVEDWORD[tc](case.param[pk]) logger.debug("main param.....",case.cid,case.sid,case.param) r,c = request(case,conf,logger) logger.debug("main response..",c) if r['status']!='200': report[sid]={'status':False, 'cost':time.time()-begintime, 'detail':{(case.cid,case,sid):False}} break case.res = c assertobj = AssertMain(c,case.asex,case.param,logger) logger.debug("main assertobj...",assertobj) if assertobj['status']: report[sid]['status']=True if report[sid].keys().count('detail')>0: report[sid]['detail'][(case.cid,case.sid)]=assertobj else: report[sid]['detail']={(case.cid,case.sid):assertobj} else: report[sid]['status']=False report[sid]['cost']=time.time()-begintime report[sid]['detail'][(case.cid,case.sid)]=assertobj break if case.otherAction!='': logger.debug( case.otherAction) eval(case.otherAction) if report[sid].keys().count('status')==0: report[sid]['status']=True report[sid]['cost']=time.time()-begintime logger.debug("++++++",sid,"++++++","end") logger.debug( report) logger.debug("dump report file begin") GenerateReport.Report(report) logger.debug("dump report file end")
for n in range(1,N-7): writer.writerow( (n+7, timeResults[n-1]) ) finally: f.close() print "finished" def runTestSudoku(N): timeResults = [] for n in range(1,N): if n%8 == 0: print "iter ", n start = time.time() solved = SudokuSolver.solveBacktracking(DataProvider.generateSudoku16x16(n)) end = time.time() timeResults.append(end-start) f = open("--.csv", 'wb') try: writer = csv.writer(f) writer.writerow( ('number of elements', 'time') ) for n in range(32): writer.writerow( (n+1, timeResults[n]) ) finally: f.close() print "finished" start=time.time() solved = SudokuSolver.solveBacktrackingHeuritic(DataProvider.generateSudoku16x16(10)) end=time.time() print "time = ", end-start
# load the data set tra_set_x, tra_set_y, val_set_x, val_set_y = load_dataset() # get the window length of the training examples windowlength = 599 sess = tf.InteractiveSession() offset = int(0.5 * (windowlength - 1.0)) tra_kwag = {'inputs': tra_set_x, 'targets': tra_set_y, 'flatten': False} val_kwag = {'inputs': val_set_x, 'targets': val_set_y, 'flatten': False} tra_provider = DataProvider.MultiApp_Slider(batchsize=batchsize, shuffle=True, offset=offset) val_provider = DataProvider.MultiApp_Slider(batchsize=5000, shuffle=False, offset=offset) x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 6], name='y_') network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1)) network = tl.layers.Conv2dLayer(network, act=tf.nn.relu, shape=[10, 1, 1, 30], strides=[1, 1, 1, 1], padding='SAME',
def nprint(file): tags = json.load(open(file)) for _id in tags: opinion = dp.get_opinion(_id) print ' '.join(["%s" % ("\033[91m"+wd+"\033[0m" if tg else wd) for wd,tg in zip([text['word'] for text in opinion['text']],tags[_id]) ]) raw_input("Next...")
tra_set_x, tra_set_y, val_set_x, val_set_y = load_dataset() # get the window length of the training examples windowlength = params_appliance[args.appliance_name]['windowlength'] sess = tf.InteractiveSession() offset = int(0.5 * (params_appliance[args.appliance_name]['windowlength'] - 1.0)) tra_kwag = {'inputs': tra_set_x, 'targets': tra_set_y, 'flatten': False} val_kwag = {'inputs': val_set_x, 'targets': val_set_y, 'flatten': False} tra_provider = DataProvider.DoubleSourceSlider(batchsize=args.batchsize, shuffle=True, offset=offset) val_provider = DataProvider.DoubleSourceSlider(batchsize=5000, shuffle=False, offset=offset) x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 1], name='y_') network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1)) network = tl.layers.Conv2dLayer(network, act=tf.nn.relu, shape=[10, 1, 1, 30], strides=[1, 1, 1, 1], padding='SAME',
elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time()-start_time)) log( "NEGATIONS CORPUS - Elapsed: %s" % elapsed , level="INFO") print "NEGATION CORPUS - Elapsed time: %s" % elapsed ################################################################### if count: raw_input("Total %i. Continue..." % count) ################################################################### op = raw_input("Update embeddings? [y/n] > ") op = op.lower() if op == 'y': start_time = time.time() dp.update_embeddings(verbose=True) elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time()-start_time)) log( "EMBEDDINGS - Elapsed: %s" % elapsed , level="INFO") print "EMBEDDINGS - Elapsed time: %s" % elapsed ################################################################### op = raw_input("Start manual tagging? [y/n] > ") op = op.lower() if op == 'y': nt.start_tagging(tofile="./outputs/negation") ################################################################### config_set = config_set_neural_negation_tagger()
print('testset path:{}'.format(app + '_test_x.npy')) print('testset path:{}'.format(app + '_test_y.npy')) return test_set_x, test_set_y, ground_truth test_set_x, test_set_y, ground_truth = load_dataset() shuffle = False windowlength = params_appliance[args.appliance_name]['windowlength'] sess = tf.InteractiveSession() test_kwag = {'inputs': test_set_x, 'targets': test_set_y} test_provider = DataProvider.DoubleSourceProvider(batchsize=-1, shuffle=False) x = tf.placeholder(tf.float32, shape=[None, 1, windowlength], name='x') y_ = tf.placeholder(tf.int64, shape=[None, 1], name='y_') network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1)) network = tl.layers.Conv2dLayer(network, act=tf.nn.relu, shape=[10, 1, 1, 30], strides=[1, 1, 1, 1], padding='SAME', name='cnn1') network = tl.layers.Conv2dLayer(network, act=tf.nn.relu, shape=[8, 1, 30, 30],
import DataProvider as dp import datetime as dttm provider = dp.dataProvider() provider.set_security_id_code('000905', 'XSHG') #security_data = provider.get_security_data_min(dttm.datetime(2007,1,1,9,0,0),dttm.datetime(2007,2,5,15,0,0)) stock_code_list = provider.get_index_stocks()
std = params_appliance[application]['std'] sess = tf.InteractiveSession() windowlength = params_appliance[args.appliance_name]['windowlength'] offset = int(0.5 * (params_appliance[application]['windowlength'] - 1.0)) test_kwag = {'inputs': test_set_x, 'targets': ground_truth, 'flatten': False} # val_kwag = { # 'inputs': val_set_x, # 'targets': val_set_y, # 'flatten':False} test_provider = DataProvider.MultiApp_Slider(batchsize=batchsize, shuffle=False, offset=offset) # val_provider = DataProvider.DoubleSourceSlider(batchsize = 5000, # shuffle = False, offset=offset) x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 1], name='y_') ##### cnn2 network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1)) network = tl.layers.Conv2dLayer(network, act=tf.nn.relu, shape=[10, 1, 1, 30], strides=[1, 1, 1, 1], padding='SAME',
def Ouroborosfit(sess, network, cost, dataset, train_op, batchsize, input_size, x, y_, pad, n_epoch=50, val_provider=None, save_model=-1, val_kwag=None, save_path=None, epoch_identifier=None, mean=0, std=1, shuffle=True, print_frame_loss=True): """ :param sess: TensorFlow session sess = tf.InteractiveSession() :param network: a TensorLayer layer the network will be trained :param cost: cost function :param dataset: raw dataset :param train_op: training optimiser :param batchsize: batch size :param input_size: network input size :param x: placeholder input :param y_: placeholder output :param pad: pad of input :param n_epoch: number of epoch :param val_provider: DataProvider for validation :param save_model: save model mode :param val_kwag: parameters dic. fed to the val_provider :param save_path: model save path :param epoch_identifier: path + epoch? or not :param mean: normalised constant mean :param std: normalised constant std :param shuffle: shuffle data or not :param print_frame_loss: print per frame loss or not :return: None """ for epoch in range(n_epoch): start_time = time.time() for frame in xrange(dataset.shape[-1] - input_size[-1]): output_provider = DataProvider.Provider(stride=(1, 1), input_size=input_size, output_size=(1, 1, 1), prediction_gap=0, batchsize=-1, pad=pad, pad_value=0, shuffle=False) if frame == 0: input_source = dataset[:, :, :input_size[-1]] else: out_kwag = { 'inputs': input_source, 'framebatch': 1, 'mean': mean, 'std': std, 'norm_tar': True } frame_prediction = custompredict( sess=sess, network=network, output_provider=output_provider, x=x, fragment_size=1000, output_length=1, y_op=None, out_kwag=out_kwag) frame_prediction = frame_prediction[0].reshape( dataset.shape[0], dataset.shape[1], 1) * std + mean input_source = np.concatenate( [input_source[:, :, 1:], frame_prediction], axis=2) net_input, = output_provider.feed(inputs=input_source, framebatch=1, mean=mean, std=std, norm_tar=True) tra_provider = DataProvider.DoubleSourceProvider( batchsize=batchsize, shuffle=shuffle) ground_truth = dataset[:, :, input_size[-1] + frame].reshape(-1, 1) tra_kwag = { 'inputs': net_input[0], 'targets': (ground_truth - mean) / std } print 'prediction:', np.mean( np.mean(input_source, axis=0), axis=0)[-1], 'GT:', dataset[:, :, input_size[-1] + frame - 1].mean() if print_frame_loss: print("Epoch %d, frame %d of %d" % (epoch + 1, frame, dataset.shape[-1] - input_size[-1])), easyfit(sess=sess, network=network, cost=cost, train_op=train_op, tra_provider=tra_provider, x=x, y_=y_, n_epoch=1, tra_kwag=tra_kwag, print_loss=print_frame_loss) if val_provider is not None: customtest(sess=sess, network=network, acc=None, test_provider=val_provider, x=x, y_=y_, cost=cost, test_kwag=val_kwag) if save_model > 0 and epoch % save_model == 0: if epoch_identifier: modelsaver(network=network, path=save_path, epoch_identifier=epoch) else: modelsaver(network=network, path=save_path, epoch_identifier=None) print 'Epoch took:', time.time() - start_time, 's' if save_model == -1: modelsaver(network=network, path=save_path, epoch_identifier=None)
} val_kwag = { 'input_dir': INPUT_DIR_VAL, 'input_dir2': INPUT_DIR_VAL2, 'target_dir': TARGET_DIR, 'frame_num': FRAME_NUM, 'crop': CROP, 'resize': RESIZE, 'resample': RESAMPLE, 'resize2': RESIZE2, 'crop2': CROP2 } tra_provider = DataProvider.MultiVideoProvider(batchsize=BATCH_SIZE, buffer_size=BUFFER_SIZE, shuffle=True) val_provider = DataProvider.MultiVideoProvider(batchsize=BATCH_SIZE, buffer_size=1, shuffle=False) filter_shape = 3 filter_shape_s = 2 filter_shape_l = 4 feature = 36 dense_unit = 200 merge_x = 47 merge_y = 37 def stream1(x, act=tf.nn.selu, reuse=False, name='stream1', is_train=True):
def start_tagging(tofile=None): def DisplayMenu(): os.system('clear') title("MENU") print "0 . exit" for i in range(sources_size): qty = len(dp.get_tagged('manually',sources[i])) print i+1,".","%-20s" % sources[i], "(%i)" % qty return def DisplayReview(_id,current,total,words,tags): os.system('clear') print "Review [%s]" % _id print "<<", for i in range(total): if i < current and tags[i] == 'n': print words[i]+"\033[93m/"+tags[i]+"\033[0m", elif i < current: print words[i]+"\033[91m/"+tags[i]+"\033[0m", elif i > current: print words[i]+" ", else: print "\033[92m\033[4m"+words[i]+"\033[0m\033[0m ", print ">>" def chunkstring(string, length): return (string[0+i:length+i] for i in range(0, len(string), length)) def ViewSave(result,source): os.system('clear') if not result: return op = raw_input("Done! Save result for %s? [y/n] > " % source) if op.lower() == 'n': op = raw_input("Are you sure? [y/n] > ") if op.lower() == 'y': return dp.save_negations(result,tagged_as='manually') if tofile:save(result,"negtag_%s" % source,tofile,overwrite=False) # #------- Execute Function -------# while True: # Display menu options DisplayMenu() op = raw_input("\nOption > ") if not op.isdigit(): raw_input("Opcion invalida") continue op = int(op) if op == 0: break # Exit if op > sources_size: raw_input("Opcion invalida") continue else: result = {} id = 0 source = sources[op-1] try: # Ask for retrieving options op = raw_input("\nInsert indexes separated by ',' or <intro> for pick up randomly > ") if op: # From indexes indexes = list(set(int(i) for i in op.split(','))) quantity = len(indexes) indexes = indexes[:quantity] else: # Randomly while not op.isdigit(): op = raw_input("How many? > ") quantity = int(op) indexes = [] # Get a sample of reviews from options samples = dp.get_sample(quantity,source,indexes) # Tag every review left = quantity quit = False while left != 0: # Retrieve relevant data from the sample sample = samples[left-1] _id = sample['_id'] review = sample['text'] # Initialization (keep current words and empty categories) words = [item['word'].encode('ascii','ignore') for item in review] total = len(words) tags = [' ' for _ in range(total)] # For each word, annotate with (N) or (I) and give the possibility of back by pressing (B) cat = "" idx = 0 while True: # Display review DisplayReview(sample['idx'],idx,total,words,tags) # Check end condition if idx == total: op = raw_input("\nDone. Proceed with the next review (left %i)? [y/n] > " % (left-1)) if op == 'y': break idx = idx - 1 if idx != 0 else 0 tags[idx] = ' ' continue # Ask for input tooltip = "\nTag with N(ormal) or I(nverted). " tooltip += "Enter A(bort), B(ack) S(kip), Q(uit) or <intro> for " tooltip += "repeating last action (%s) > " % (cat.upper() if cat else "None") tag = raw_input(tooltip) if not tag and not cat: # Prevents parse empty cat print "Input a category first";raw_input() continue elif tag: cat = tag # Action from decision cat = cat.lower() if not cat or cat not in 'nibasq': print "Option",cat,"is not correct." ;raw_input() continue if cat == 'q': break if cat == 's': break elif cat == 'b': # Back idx = idx - 1 if idx != 0 else 0 tags[idx] = ' ' elif cat == 'a': op = raw_input("Are you sure you want to abort (left %i)? [y/n] > " % left) if op.lower() == 'y': raise Exception("Abort") else: # Associate the category tags[idx] = cat idx = idx + 1 if cat == 'q': break if cat == 's': left -= 1 continue # Once the text is tagged, add it to the result tags = map(lambda cat : cat =='i', tags) result.update({ _id : tags }) # Update left -= 1 # View and save results if op == 0: continue ViewSave(result,source) except Exception as e: content = json.dumps(result,indent=4,ensure_ascii=False) log("Reason : %s (at %s) [%i] '%s'" % ( str(e) , source , sample['idx'] , content )) raw_input("Reason: %s\nEnter to continue..." % str(e))