コード例 #1
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
def load_neg_from_files(source_dir):
    sources = glob.glob(source_dir)
    total = len(sources)
    for idx,source in enumerate(sources):
        progress("Load negation tags from %s" % source,total,idx)
        content = json.load( open(source) )    
        dp.save_negations(content,tagged_as='manually')        
コード例 #2
0
 def __loadData(self):
     if self.entry.get() != '':
         if self.var2.get() == 1:
             gridNumber = int(self.entry.get())
             self.grid = DataProvider.getNQueensGrid(gridNumber)
             self.__draw_grid(len(self.grid))
             self.__draw_puzzle(self.grid)
         else:
             gridNumber = int(self.entry.get())
             self.grid = DataProvider.generateSudoku9x9(gridNumber)
             self.__draw_puzzle(self.grid)
コード例 #3
0
ファイル: GUI.py プロジェクト: BartekH/CSP
 def __loadData(self):
     if self.entry.get() != '':
         if self.var2.get() == 1:
             gridNumber = int(self.entry.get())
             self.grid = DataProvider.getNQueensGrid(gridNumber)
             self.__draw_grid(len(self.grid))
             self.__draw_puzzle(self.grid)
         else:
             gridNumber = int(self.entry.get())
             self.grid = DataProvider.generateSudoku9x9(gridNumber)
             self.__draw_puzzle(self.grid)
コード例 #4
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
    def ViewSave(result,source):
        os.system('clear')
        if not result:
            return

        op = raw_input("Done! Save result for %s? [y/n] > " % source)
        if op.lower() == 'n':
            op = raw_input("Are you sure? [y/n] > ")
            if op.lower() == 'y':
                return
            
        dp.save_negations(result,tagged_as='manually')
        if tofile:save(result,"negtag_%s" % source,tofile,overwrite=False)
コード例 #5
0
ファイル: TextAnalyzer.py プロジェクト: damiansalvia/cldas
def analyze(opinions,tofile=None):

    # #------- Execute Function -------#
    analyzed = []
    _ids = []
    total = len(opinions)
    fails = 0 
    for idx, opinion in enumerate(opinions):
        progress("Analyzing %s (%05.2f%%)" %  ( opinion['source'], 100.0*fails/total ), total, idx )
        try:
                
            _id = md5.new(str(opinion['category']) + opinion['text'].encode('ascii', 'ignore')).hexdigest()
            
            if not dp.get_opinion(_id) and _id not in _ids:
                
                _ids.append(_id)
                tokens = an.analyze(opinion['text'])
                
                if not tokens:
                    raise Exception("Empty analysis")
                
                analysis = {}         
                analysis['_id']      = _id
                analysis['category'] = opinion['category']
                analysis['idx']      = opinion['idx']
                analysis['source']   = opinion['source']
                analysis['text']     = [{
                    'word'  : token['form'],
                    'lemma' : token['lemma'],
                    'tag'   : token['tag']
                } for token in tokens ]
                
                analyzed.append(analysis)
                
                if idx % 500 == 0: # partial dump
                    dp.save_opinions(analyzed)
                    analyzed = []
                    
        except Exception as e:
            fails += 1
            log("Reason : %s for '%s' (at %s)" % (str(e),opinion['text'].encode('ascii','ignore'),opinion['source']) )
        except KeyboardInterrupt:
            fails += 1
            log("Reason : Interrupted on '%s' (at %s)" % (opinion['text'].encode('ascii','ignore'),opinion['source']) )
        
    dp.save_opinions(analyzed)
    if tofile: save(analyzed,"analyzed_%s" % opinions[0]['source'],tofile)
    
    return analyzed
コード例 #6
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
 def predict_untagged(self,limit=None,tofile=None):
     opinions = dp.get_untagged(limit,666)
     results = {}
     total = opinions.count(with_limit_and_skip=True)
     for idx,opinion in enumerate(opinions): 
         progress("Predicting on new data",total,idx)
         results[ opinion['_id'] ] = []
         for X in dp.get_text_embeddings( opinion['text'], self.wleft, self.wright )[0]:
             X = X.reshape((1, -1))
             Y = self.model.predict( X )
             Y = ( round(Y) == 1 ) # 0 <= Y <= 1 -- Round is ok?
             results[ opinion['_id'] ].append( Y ) 
     if tofile: save(results,"prediction",tofile,overwrite=False)
     #dp.save_negation(result,tagged_as='automatically')
     return results   
コード例 #7
0
ファイル: Crawler.py プロジェクト: andrew54068/rentalRadar
def start_crawl(db: DataBaseConnector):
    try:
        while 1:
            url = "https://rent.591.com.tw/?kind=0&region=1&order=posttime&orderType=desc"
            subjects = DataProvider.get_subjects_from_url(url)
            manager = NotificationManager()
            if subjects != None:
                new_subjects = list(db.update_subject(subjects))
                # print(f"new_subjects: {new_subjects}")
                for subject in list(new_subjects):
                    # print(f"subject: {subject}")

                    user_ids = db.get_subscribe_user_from_subject(subject)
                    # print(f"{len(user_ids) > 0}")
                    if len(user_ids) > 0:
                        print(f"user_ids: {user_ids}")
                        user_tokens = db.get_user_tokens(user_ids)
                        if len(user_tokens) > 0:
                            print(f"user_tokens: {user_tokens}")
                            manager.send_push_notification(
                                user_tokens, subject)

            else:
                print("subject is none.")
            time.sleep(20)
    except:
        raise AssertionError("Oops!", str(sys.exc_info()[1]), "occurred.")
コード例 #8
0
def main(keys, rebuild_db):
    # collect all data
    collections = []

    # start loop until keys empty
    while len(keys) > 0:

        # remove first index from keys
        key = keys.pop(0)
        if not key:
            continue

        # init read progress
        Progress.updateReadProgress(0, 0, search_key=key)

        # get the data
        result = DataProvider.get(key, Progress.updateReadProgress)

        if len(result["collections"]) == int(result["count"]):
            collections.extend(result["collections"])
        else:
            #its failed, put back keys
            keys.append(key)

    #saving
    DBProvider.saveAndUpdate(db_host, db_usr, db_pwd, db_schema, db_table,
                             collections, Progress.updateSaveProgress,
                             rebuild_db)
    pass
コード例 #9
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
 def DisplayMenu():
     os.system('clear')
     title("MENU")
     print "0 . exit"
     for i in range(sources_size):
         qty = len(dp.get_tagged('manually',sources[i])) 
         print i+1,".","%-20s" % sources[i], "(%i)" % qty
     return 
コード例 #10
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
 def fit_tagged(self,neg_as,testing_fraction=0.2,verbose=0,early_monitor='val_binary_accuraty',limit=None):    
     opinions = dp.get_tagged('manually')
     if limit: # Only for testing
         opinions = opinions.limit(limit)
     total = opinions.count(with_limit_and_skip=True)       
     if total == 0: raise Exception('Nothing to train')
             
     callbacks = []
     if early_monitor:
         callbacks.append(
             EarlyStopping(
                 monitor   = early_monitor,
                 min_delta = 0,
                 patience  = 2, 
                 mode      = 'auto',
                 verbose   = 0
             )
         )
     
     X , Y = [] , []
     for idx,opinion in enumerate(opinions):
         progress("Getting embeddings for trainning (%i words)"  % len(opinion['text']),total,idx)
         x_curr,y_curr = dp.get_text_embeddings( opinion['text'], self.wleft, self.wright ,neg_as=neg_as )
         X += x_curr
         Y += y_curr         
     X = np.array(X)
     Y = np.array(Y)   
     
     self.model.fit( X, Y, 
         callbacks=callbacks , 
         batch_size=32 , epochs=100 , 
         validation_split=testing_fraction , 
         verbose=verbose 
     )
     
     scores = self.model.evaluate(X,Y,batch_size=32,verbose=verbose)
     scores = [ round(score*100,1) for score in scores ]
     scores = zip( self.model.metrics_names , scores )
     log('MODEL EVALUATION\n'+str(scores),level='info')
     print        
     for metric,score in scores: print "%-20s: %.1f%%" % ( metric, score )
     print "_________________________________________________________________"
     return scores
コード例 #11
0
 def predict_opinion(self, opinion):
     x = [np.array(dp.get_word_embedding(token)) for token in opinion]
     rest = self.window - len(x) % self.window
     if rest > 0:
         x.extend([self.end_vecotr for i in range(rest)])
     X = np.array([
         x[i * self.window:(i + 1) * self.window]
         for i in range(len(x) / self.window)
     ])
     Y = self.model.predict(X).flatten()
     return [round(y) == 1 for y in Y.tolist()[:len(opinion)]]
コード例 #12
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
 def __init__(self,
         wleft,
         wright,
     ):
                 
     # Parameters calculation
     vec_size  = dp.get_size_embedding()        
     input_dim = vec_size * (wright + wleft + 1)
     
     # Attributes settings
     self.wright = wright
     self.wleft  = wleft
     self.dimension = input_dim
コード例 #13
0
ファイル: CSPRun.py プロジェクト: BartekH/CSP
def runTestSudoku(N):
    timeResults = []
    for n in range(1,N):
        if n%8 == 0:
            print "iter ", n
            start = time.time()
            solved = SudokuSolver.solveBacktracking(DataProvider.generateSudoku16x16(n))
            end = time.time()
            timeResults.append(end-start)
    f = open("--.csv", 'wb')
    try:
        writer = csv.writer(f)
        writer.writerow( ('number of elements', 'time') )
        for n in range(32):
            writer.writerow( (n+1, timeResults[n]) )
    finally:
        f.close()
    print "finished"
コード例 #14
0
ファイル: CSPRun.py プロジェクト: parmeshashwath/CSP
def runTestSudoku(N):
    timeResults = []
    for n in range(1, N):
        if n % 8 == 0:
            print "iter ", n
            start = time.time()
            solved = SudokuSolver.solveBacktracking(
                DataProvider.generateSudoku16x16(n))
            end = time.time()
            timeResults.append(end - start)
    f = open("--.csv", 'wb')
    try:
        writer = csv.writer(f)
        writer.writerow(('number of elements', 'time'))
        for n in range(32):
            writer.writerow((n + 1, timeResults[n]))
    finally:
        f.close()
    print "finished"
コード例 #15
0
ファイル: CSPRun.py プロジェクト: BartekH/CSP
def runTestsNQueens(N):
    timeResults = []
    for n in range(8,N):
        start = time.time()
        solved = NQueensSolver.solveNQueensForwardChecking(DataProvider.getNQueensGrid(n), 0)
        end = time.time()
        timeResults.append(end-start)
        print "iteration number", n
    f = open("--.csv", 'wb')
    try:
        # plt.plot(timeResults, range(8,N))
        # plt.axis([0,30,8,25])
        # plt.show()
        writer = csv.writer(f)
        writer.writerow( ('size', 'time') )
        for n in range(1,N-7):
            writer.writerow( (n+7, timeResults[n-1]) )
    finally:
        f.close()
    print "finished"
コード例 #16
0
    def fit_tagged(self, testing_fraction=0.2, verbose=2, neg_as=False):

        opinions = dp.get_tagged('manually')
        if not opinions: raise Exception('Nothing to train')

        X, Y = [], []
        total = opinions.count(with_limit_and_skip=True)
        for idx, opinion in enumerate(opinions):
            progress("Loading training data", total, idx)
            x_curr, y_curr = self.format_opinion(opinion['text'], neg_as)
            X += x_curr
            Y += y_curr
        self.fit(X, Y, testing_fraction, verbose)
        scores = self.get_scores(X, Y, verbose)
        if verbose != 0:
            log('MODEL EVALUATION\n' + str(scores), level='info')
            print
            for metric, score in scores:
                print "%-20s: %.1f%%" % (metric, score)
            print "_________________________________________________________________"
        return scores
コード例 #17
0
ファイル: CSPRun.py プロジェクト: parmeshashwath/CSP
def runTestsNQueens(N):
    timeResults = []
    for n in range(8, N):
        start = time.time()
        solved = NQueensSolver.solveNQueensForwardChecking(
            DataProvider.getNQueensGrid(n), 0)
        end = time.time()
        timeResults.append(end - start)
        print "iteration number", n
    f = open("--.csv", 'wb')
    try:
        # plt.plot(timeResults, range(8,N))
        # plt.axis([0,30,8,25])
        # plt.show()
        writer = csv.writer(f)
        writer.writerow(('size', 'time'))
        for n in range(1, N - 7):
            writer.writerow((n + 7, timeResults[n - 1]))
    finally:
        f.close()
    print "finished"
コード例 #18
0
 def format_opinion(self, opinion, neg_as=False):
     x_curr = [
         np.array(dp.get_word_embedding(token['word'])) for token in opinion
     ]
     y_curr = [
         token.get('negated')
         if token.get('negated') is not None else neg_as
         for token in opinion
     ]
     rest = self.window - len(x_curr) % self.window
     if rest > 0:
         x_curr.extend([self.end_vecotr for i in range(rest)])
         y_curr.extend([False for i in range(rest)])
     X = [
         x_curr[i * self.window:(i + 1) * self.window]
         for i in range(len(x_curr) / self.window)
     ]
     Y = [
         y_curr[i * self.window:(i + 1) * self.window]
         for i in range(len(y_curr) / self.window)
     ]
     return X, Y
コード例 #19
0
 def test(self):
     ftest, ft = self.test_functions(0)
     testDataProvider = DataProvider(self.lang, self.testFile, self.hotFile, self.mutiNameFile, self.testAttFile,  self.batch_size, self.cache_size, 0 , 0, \
                         self.word_vocab, self.kb_vocab, self.kbp_type_vocab, self.fb_type_vocab)
     testDataProvider.reset()
     count = 0
     right = 0
     start = time.asctime()
     while (testDataProvider.isEnd() == 0):
         aBatch, fea, lab = testDataProvider.readNextBatch()
         (rlt, num) = self.testOneBatch(aBatch, fea, lab, ftest, ft)
         right = right + rlt
         count = count + num
     print 'total: %d groups, right %d groups (%.2f %%)' % (
         count, right, right * 100.0 / count)
     end = time.asctime()
     print 'time from %s to %s' % (start, end)
コード例 #20
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
    def input_guess(self):
        
        from analyzer import Analyzer
        from CorpusReader import review_correction
        an = Analyzer()
        
        while True:
#             try:
                
            os.system('clear')
            print '\n\033[4mYOUR SENTENCE\033[0m'
            sentence = raw_input("> ")
            if not sentence: # exit
                os.system('clear') ; break
            sentence = review_correction(sentence)
            analized_sentence = an.analyze(sentence)
            analized_sentence = [ {'word': item['form']} for item in analized_sentence ]
            
            result = []
            for X in dp.get_text_embeddings( analized_sentence , self.wleft , self.wright )[0]:
                X = X.reshape((1, -1))
                Y = self.model.predict( X )
                Y = ( round(Y) == 1 )
                result.append( Y )
            
            os.system('clear') 
            print '\n\033[4mPREDICTION RESULT\033[0m'        
            print '>',' '.join([
                "%s" % ("\033[91m"+wd+"\033[0m" if tg else wd) 
                for wd,tg in zip( [text['word'] for text in analized_sentence] , result ) 
            ])
            
#             except Exception as e:
#                 print 'An error has ocurred during processing (',str(e),")"
                  
            raw_input("\nPress enter to continue...")
コード例 #21
0
    def predict_untagged(self, tofile=None, limit=None):
        opinions = list(dp.get_untagged(limit=limit))
        results = {}
        total = len(opinions)  #opinions.count(with_limit_and_skip=True)
        for idx, opinion in enumerate(opinions):

            progress(
                "Predicting on new data (%i words)" % len(opinion['text']),
                total, idx)
            x_curr = [
                np.array(dp.get_word_embedding(token['word']))
                for token in opinion['text']
            ]
            rest = self.window - len(x_curr) % self.window
            if rest > 0:
                x_curr.extend([self.end_vecotr for i in range(rest)])

            X = np.array([
                x_curr[i * self.window:(i + 1) * self.window]
                for i in range(len(x_curr) / self.window)
            ])
            try:
                Y = self.model.predict(X).flatten()
            except:
                print 'ERROR'
            results[opinion['_id']] = [
                round(y) == 1 for y in Y.tolist()[:len(opinion['text'])]
            ]

            if idx % 500 == 0:  # partial dump
                dp.save_negations(results, tagged_as='automatically')
                result = {}

        if tofile:
            save(results, "predict_untagged_LMST_window_%i" % (self.window),
                 tofile)
        dp.save_negations(results, tagged_as='automatically')
        return results
コード例 #22
0
    def train(self):
        iter = 0
        lr = float(self.init_lr)
        (fcost, fupdata) = self.train_functions(1)

        trainDataProvider = DataProvider(self.lang, self.trainFile, self.hotFile, self.mutiNameFile, self.trainAttFile, self.batch_size, self.cache_size, 0, 1,  \
                            self.word_vocab, self.kb_vocab, self.kbp_type_vocab, self.fb_type_vocab)
        start = time.clock()
        while (iter < self.epoc):
            cost = 0
            count = 0
            trainDataProvider.reset()
            beginTime = time.time()
            while (trainDataProvider.isEnd() == 0):
                aBatch, fea, lab = trainDataProvider.readNextBatch()
                # for aGroup in aBatch:
                #     for s in aGroup.samples:
                #         print s.toString()
                # print fea
                # print lab

                tmp = self.processOneBatch(aBatch, fea, lab, lr, fcost,
                                           fupdata)  #return cost
                if (tmp >= 0):
                    cost = cost + tmp
                    count = count + self.batch_size
                    if (count / self.batch_size % 10 == 0 and count > 0):
                        fTime = time.time() - beginTime
                        print "iter: %d, lr: %.6f, processed count: %d , avg cost: %.6f, speed: %.2f ins/s" \
                            %(iter, lr, count, cost / count,count/ fTime )
            self.saveModel(iter)
            iter = iter + 1
            if (iter >= self.declr_start):
                lr = lr / 2
        end = time.clock()
        print 'time is %f' % (end - start)
コード例 #23
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
def load_corpus_negation(sources='../../corpus/corpus_variado_sfu_neg/*/*.xml',tofile=None):
    sources = glob.glob(sources)
    total = len(sources)
    isneg, tmpisneg = False, None
    opinions = []
    
    for idx,source in enumerate(sources):
        
        progress("Reading negation corpus_variado_sfu (%s)" % source.split('/')[-2],total,idx) 
        lines = open(source).readlines()
        tokens = []
        text = ""
        for line in lines:
            content = line.strip()
            if not isinstance(content,unicode):
                content = unicode(content,'utf8')            
            
            if content.startswith("<?xml"):
                regex = re.compile("polarity=\"(.*?)\"",re.DOTALL)
                category = regex.findall(content)[0]
                category = 20 if category=='negative' else 80
                continue
            
            elif content.startswith("<scope"):
                isneg = True
                continue               
                
            elif content.startswith("</scope"):
                isneg = False
                continue
            
            elif content.startswith("<negexp"):
                tmpisneg = isneg
                isneg = None
                continue               
                
            elif content.startswith("</negexp"):
                isneg = tmpisneg
                tmpisneg = None
                continue
            
            elif content.startswith("<v ") or\
                content.startswith("<s ") or\
                content.startswith("<f ") or\
                content.startswith("<p ") or\
                content.startswith("<r ") or\
                content.startswith("<a ") or\
                content.startswith("<d ") or\
                content.startswith("<c ") or\
                content.startswith("<n ") or\
                content.startswith("<w ") or\
                content.startswith("<z ") or\
                content.startswith("<i "):
                   
                forms = re.compile("wd=\"(.*?)\"",re.DOTALL).findall(content)
                lemma = re.compile("lem=\"(.*?)\"",re.DOTALL).findall(content)
                tag   = re.compile("pos=\"(.*?)\"",re.DOTALL).findall(content)
                
                if forms and lemma:
                    forms = forms[0]
                    lemma = lemma[0]
                elif lemma: 
                    forms = lemma[0]
                    lemma = lemma[0]
                else: # mathsign, solo tiene POS
                    continue                       
                
                if not tag:
                    tag   = "cs"
                else:
                    tag = tag[0]
                
                forms = forms.split('_') if not tag.startswith("NP") else forms
                
                for form in forms:  
                    tokens.append({
                        'form':form,
                        'lemma':lemma,
                        'tag':tag.upper(),
                        'negated': isneg,
                    })    
                    
                    text += " "+form
            else: # Casos raros como &gt;
                pass
                
        _id = md5.new(str(category) + text.encode('ascii', 'ignore')).hexdigest()
        
        if not dp.get_opinion(_id):
            opinion = {}         
            opinion['_id']      = _id
            opinion['category'] = category
            opinion['idx']      = idx+1
            opinion['source']   = 'corpus_variado_sfu'
            opinion['tagged']   = 'manually',
            opinion['text']     = [{
                'word'   : token['form'].lower(),
                'lemma'  : token['lemma'].lower(),
                'tag'    : token['tag'],
                'negated': token['negated']
            } for token in tokens ]
            opinions.append(opinion)
    
    dp.save_opinions(opinions) 
    if tofile: save(opinion_data,"from_corpus_sfu_negation",tofile)
    return len(opinions)
コード例 #24
0
ファイル: main.py プロジェクト: oldsuper/AutoFrame
def main(sids,logger):
    SuiteList = DataProvider.getCaseData(logger,sids)
    logger.debug("run SuiteIDs:",SuiteList.keys())
    report = {}
    # report {sid:{status:pass/fail,cost:time,detail:{case:pass/fail/norun}}} **update 2016-2-16
    conf=EnvInit.config()
    # print conf.host
    # sys.exit()
    for sid in SuiteList.keys():
        logger.debug("++++++"+sid+"++++++"+"begin")
        begintime=time.time()
        report[sid]={}
        for case in SuiteList[sid]:
            for pk in case.param.keys():
                if case.param[pk].startswith('$$'):
                    logger.debug('debug main ',case.param[pk])
                    tmpList = case.param[pk][2:].split('.')
                    tmpSid = tmpList[0]
                    tmpCid = tmpList[1]
                    tmpAttrList,tmpFun = getAttrList(case.param[pk],re.compile('\[(.+?)\]'))
                    for tc in SuiteList[sid]:
                        if tc.cid ==tmpCid and tc.sid == tmpSid:
                            case.param[pk]=tc.getResValue(tmpAttrList)
                            logger.debug( "main ...................... update ",case.__hash__())

                tc=Models.contain(case.param[pk],Models.RESERVEDWORD.keys())
                if tc!=None:
                    case.param[pk]=Models.RESERVEDWORD[tc](case.param[pk])
            logger.debug("main param.....",case.cid,case.sid,case.param)
            r,c = request(case,conf,logger)
            logger.debug("main response..",c)
            if r['status']!='200':
                report[sid]={'status':False,
                             'cost':time.time()-begintime,
                             'detail':{(case.cid,case,sid):False}}
                break
            case.res = c
            assertobj = AssertMain(c,case.asex,case.param,logger)
            logger.debug("main assertobj...",assertobj)
            if assertobj['status']:
                report[sid]['status']=True
                if report[sid].keys().count('detail')>0:
                    report[sid]['detail'][(case.cid,case.sid)]=assertobj
                else:
                    report[sid]['detail']={(case.cid,case.sid):assertobj}
            else:
                report[sid]['status']=False
                report[sid]['cost']=time.time()-begintime
                report[sid]['detail'][(case.cid,case.sid)]=assertobj
                break
            if case.otherAction!='':
                logger.debug( case.otherAction)
                eval(case.otherAction)
        if report[sid].keys().count('status')==0:
            report[sid]['status']=True
            report[sid]['cost']=time.time()-begintime
        logger.debug("++++++",sid,"++++++","end")
    logger.debug( report)
    logger.debug("dump report file begin")
    GenerateReport.Report(report)
    logger.debug("dump report file end")
コード例 #25
0
ファイル: CSPRun.py プロジェクト: BartekH/CSP
        for n in range(1,N-7):
            writer.writerow( (n+7, timeResults[n-1]) )
    finally:
        f.close()
    print "finished"

def runTestSudoku(N):
    timeResults = []
    for n in range(1,N):
        if n%8 == 0:
            print "iter ", n
            start = time.time()
            solved = SudokuSolver.solveBacktracking(DataProvider.generateSudoku16x16(n))
            end = time.time()
            timeResults.append(end-start)
    f = open("--.csv", 'wb')
    try:
        writer = csv.writer(f)
        writer.writerow( ('number of elements', 'time') )
        for n in range(32):
            writer.writerow( (n+1, timeResults[n]) )
    finally:
        f.close()
    print "finished"


start=time.time()
solved = SudokuSolver.solveBacktrackingHeuritic(DataProvider.generateSudoku16x16(10))
end=time.time()
print "time = ", end-start
コード例 #26
0
ファイル: ratio_training.py プロジェクト: aleonnet/AAAI-NILM
# load the data set
tra_set_x, tra_set_y, val_set_x, val_set_y = load_dataset()

# get the window length of the training examples
windowlength = 599

sess = tf.InteractiveSession()

offset = int(0.5 * (windowlength - 1.0))

tra_kwag = {'inputs': tra_set_x, 'targets': tra_set_y, 'flatten': False}

val_kwag = {'inputs': val_set_x, 'targets': val_set_y, 'flatten': False}

tra_provider = DataProvider.MultiApp_Slider(batchsize=batchsize,
                                            shuffle=True,
                                            offset=offset)
val_provider = DataProvider.MultiApp_Slider(batchsize=5000,
                                            shuffle=False,
                                            offset=offset)

x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x')
y_ = tf.placeholder(tf.float32, shape=[None, 6], name='y_')

network = tl.layers.InputLayer(x, name='input_layer')
network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1))
network = tl.layers.Conv2dLayer(network,
                                act=tf.nn.relu,
                                shape=[10, 1, 1, 30],
                                strides=[1, 1, 1, 1],
                                padding='SAME',
コード例 #27
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
def nprint(file):
    tags = json.load(open(file))
    for _id in tags:
        opinion = dp.get_opinion(_id)
        print ' '.join(["%s" % ("\033[91m"+wd+"\033[0m" if tg else wd) for wd,tg in zip([text['word'] for text in opinion['text']],tags[_id]) ])
        raw_input("Next...")
コード例 #28
0
tra_set_x, tra_set_y, val_set_x, val_set_y = load_dataset()

# get the window length of the training examples
windowlength = params_appliance[args.appliance_name]['windowlength']

sess = tf.InteractiveSession()

offset = int(0.5 *
             (params_appliance[args.appliance_name]['windowlength'] - 1.0))

tra_kwag = {'inputs': tra_set_x, 'targets': tra_set_y, 'flatten': False}

val_kwag = {'inputs': val_set_x, 'targets': val_set_y, 'flatten': False}

tra_provider = DataProvider.DoubleSourceSlider(batchsize=args.batchsize,
                                               shuffle=True,
                                               offset=offset)
val_provider = DataProvider.DoubleSourceSlider(batchsize=5000,
                                               shuffle=False,
                                               offset=offset)

x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x')
y_ = tf.placeholder(tf.float32, shape=[None, 1], name='y_')

network = tl.layers.InputLayer(x, name='input_layer')
network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1))
network = tl.layers.Conv2dLayer(network,
                                act=tf.nn.relu,
                                shape=[10, 1, 1, 30],
                                strides=[1, 1, 1, 1],
                                padding='SAME',
コード例 #29
0
    elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time()-start_time))
    log( "NEGATIONS CORPUS  - Elapsed: %s" % elapsed , level="INFO")
    print "NEGATION CORPUS - Elapsed time: %s" % elapsed
           
###################################################################

if count: raw_input("Total %i. Continue..." % count)

###################################################################

op = raw_input("Update embeddings? [y/n] > ")
op = op.lower()
if op == 'y':
    start_time = time.time() 
    
    dp.update_embeddings(verbose=True)

    elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time()-start_time))
    log( "EMBEDDINGS  - Elapsed: %s" % elapsed , level="INFO")
    print "EMBEDDINGS - Elapsed time: %s" % elapsed

###################################################################

op = raw_input("Start manual tagging? [y/n] > ")
op = op.lower() 
if op == 'y': nt.start_tagging(tofile="./outputs/negation") 


###################################################################
  
config_set = config_set_neural_negation_tagger()
コード例 #30
0
    print('testset path:{}'.format(app + '_test_x.npy'))
    print('testset path:{}'.format(app + '_test_y.npy'))

    return test_set_x, test_set_y, ground_truth


test_set_x, test_set_y, ground_truth = load_dataset()

shuffle = False
windowlength = params_appliance[args.appliance_name]['windowlength']

sess = tf.InteractiveSession()

test_kwag = {'inputs': test_set_x, 'targets': test_set_y}

test_provider = DataProvider.DoubleSourceProvider(batchsize=-1, shuffle=False)

x = tf.placeholder(tf.float32, shape=[None, 1, windowlength], name='x')
y_ = tf.placeholder(tf.int64, shape=[None, 1], name='y_')

network = tl.layers.InputLayer(x, name='input_layer')
network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1))
network = tl.layers.Conv2dLayer(network,
                                act=tf.nn.relu,
                                shape=[10, 1, 1, 30],
                                strides=[1, 1, 1, 1],
                                padding='SAME',
                                name='cnn1')
network = tl.layers.Conv2dLayer(network,
                                act=tf.nn.relu,
                                shape=[8, 1, 30, 30],
コード例 #31
0
ファイル: playground.py プロジェクト: zhengyu14/QFC_C_Essence
import DataProvider as dp
import datetime as dttm

provider = dp.dataProvider()
provider.set_security_id_code('000905', 'XSHG')
#security_data = provider.get_security_data_min(dttm.datetime(2007,1,1,9,0,0),dttm.datetime(2007,2,5,15,0,0))
stock_code_list = provider.get_index_stocks()
コード例 #32
0
std = params_appliance[application]['std']
sess = tf.InteractiveSession()

windowlength = params_appliance[args.appliance_name]['windowlength']

offset = int(0.5 * (params_appliance[application]['windowlength'] - 1.0))

test_kwag = {'inputs': test_set_x, 'targets': ground_truth, 'flatten': False}

# val_kwag = {
#     'inputs': val_set_x,
#     'targets': val_set_y,
#     'flatten':False}

test_provider = DataProvider.MultiApp_Slider(batchsize=batchsize,
                                             shuffle=False,
                                             offset=offset)
# val_provider = DataProvider.DoubleSourceSlider(batchsize = 5000,
#                                                  shuffle = False, offset=offset)

x = tf.placeholder(tf.float32, shape=[None, windowlength], name='x')
y_ = tf.placeholder(tf.float32, shape=[None, 1], name='y_')

##### cnn2
network = tl.layers.InputLayer(x, name='input_layer')
network = tl.layers.ReshapeLayer(network, shape=(-1, windowlength, 1, 1))
network = tl.layers.Conv2dLayer(network,
                                act=tf.nn.relu,
                                shape=[10, 1, 1, 30],
                                strides=[1, 1, 1, 1],
                                padding='SAME',
コード例 #33
0
def Ouroborosfit(sess,
                 network,
                 cost,
                 dataset,
                 train_op,
                 batchsize,
                 input_size,
                 x,
                 y_,
                 pad,
                 n_epoch=50,
                 val_provider=None,
                 save_model=-1,
                 val_kwag=None,
                 save_path=None,
                 epoch_identifier=None,
                 mean=0,
                 std=1,
                 shuffle=True,
                 print_frame_loss=True):
    """

    :param sess: TensorFlow session
            sess = tf.InteractiveSession()
    :param network: a TensorLayer layer
            the network will be trained
    :param cost: cost function
    :param dataset: raw dataset
    :param train_op: training optimiser
    :param batchsize: batch size
    :param input_size: network input size
    :param x: placeholder input
    :param y_: placeholder output
    :param pad: pad of input
    :param n_epoch: number of epoch
    :param val_provider: DataProvider for validation
    :param save_model: save model mode
    :param val_kwag: parameters dic. fed to the val_provider
    :param save_path: model save path
    :param epoch_identifier: path + epoch? or not
    :param mean: normalised constant mean
    :param std: normalised constant std
    :param shuffle: shuffle data or not
    :param print_frame_loss: print per frame loss or not
    :return: None
    """

    for epoch in range(n_epoch):
        start_time = time.time()
        for frame in xrange(dataset.shape[-1] - input_size[-1]):
            output_provider = DataProvider.Provider(stride=(1, 1),
                                                    input_size=input_size,
                                                    output_size=(1, 1, 1),
                                                    prediction_gap=0,
                                                    batchsize=-1,
                                                    pad=pad,
                                                    pad_value=0,
                                                    shuffle=False)
            if frame == 0:
                input_source = dataset[:, :, :input_size[-1]]
            else:
                out_kwag = {
                    'inputs': input_source,
                    'framebatch': 1,
                    'mean': mean,
                    'std': std,
                    'norm_tar': True
                }

                frame_prediction = custompredict(
                    sess=sess,
                    network=network,
                    output_provider=output_provider,
                    x=x,
                    fragment_size=1000,
                    output_length=1,
                    y_op=None,
                    out_kwag=out_kwag)
                frame_prediction = frame_prediction[0].reshape(
                    dataset.shape[0], dataset.shape[1], 1) * std + mean

                input_source = np.concatenate(
                    [input_source[:, :, 1:], frame_prediction], axis=2)

            net_input, = output_provider.feed(inputs=input_source,
                                              framebatch=1,
                                              mean=mean,
                                              std=std,
                                              norm_tar=True)
            tra_provider = DataProvider.DoubleSourceProvider(
                batchsize=batchsize, shuffle=shuffle)
            ground_truth = dataset[:, :, input_size[-1] + frame].reshape(-1, 1)

            tra_kwag = {
                'inputs': net_input[0],
                'targets': (ground_truth - mean) / std
            }

            print 'prediction:', np.mean(
                np.mean(input_source, axis=0),
                axis=0)[-1], 'GT:', dataset[:, :,
                                            input_size[-1] + frame - 1].mean()

            if print_frame_loss:
                print("Epoch %d, frame %d of %d" %
                      (epoch + 1, frame, dataset.shape[-1] - input_size[-1])),
            easyfit(sess=sess,
                    network=network,
                    cost=cost,
                    train_op=train_op,
                    tra_provider=tra_provider,
                    x=x,
                    y_=y_,
                    n_epoch=1,
                    tra_kwag=tra_kwag,
                    print_loss=print_frame_loss)

        if val_provider is not None:
            customtest(sess=sess,
                       network=network,
                       acc=None,
                       test_provider=val_provider,
                       x=x,
                       y_=y_,
                       cost=cost,
                       test_kwag=val_kwag)

        if save_model > 0 and epoch % save_model == 0:
            if epoch_identifier:
                modelsaver(network=network,
                           path=save_path,
                           epoch_identifier=epoch)
            else:
                modelsaver(network=network,
                           path=save_path,
                           epoch_identifier=None)

        print 'Epoch took:', time.time() - start_time, 's'

    if save_model == -1:
        modelsaver(network=network, path=save_path, epoch_identifier=None)
コード例 #34
0
}

val_kwag = {
    'input_dir': INPUT_DIR_VAL,
    'input_dir2': INPUT_DIR_VAL2,
    'target_dir': TARGET_DIR,
    'frame_num': FRAME_NUM,
    'crop': CROP,
    'resize': RESIZE,
    'resample': RESAMPLE,
    'resize2': RESIZE2,
    'crop2': CROP2
}

tra_provider = DataProvider.MultiVideoProvider(batchsize=BATCH_SIZE,
                                               buffer_size=BUFFER_SIZE,
                                               shuffle=True)
val_provider = DataProvider.MultiVideoProvider(batchsize=BATCH_SIZE,
                                               buffer_size=1,
                                               shuffle=False)

filter_shape = 3
filter_shape_s = 2
filter_shape_l = 4
feature = 36
dense_unit = 200
merge_x = 47
merge_y = 37


def stream1(x, act=tf.nn.selu, reuse=False, name='stream1', is_train=True):
コード例 #35
0
ファイル: NegationTagger.py プロジェクト: damiansalvia/cldas
def start_tagging(tofile=None):
    
    def DisplayMenu():
        os.system('clear')
        title("MENU")
        print "0 . exit"
        for i in range(sources_size):
            qty = len(dp.get_tagged('manually',sources[i])) 
            print i+1,".","%-20s" % sources[i], "(%i)" % qty
        return 
     
    def DisplayReview(_id,current,total,words,tags):
        os.system('clear')
        print "Review [%s]" % _id
        print "<<",
        for i in range(total):
            if i < current and tags[i] == 'n':
                print words[i]+"\033[93m/"+tags[i]+"\033[0m",
            elif i < current:
                print words[i]+"\033[91m/"+tags[i]+"\033[0m",
            elif i > current:
                print words[i]+"  ",
            else:
                print "\033[92m\033[4m"+words[i]+"\033[0m\033[0m  ",
        print ">>"
        
    def chunkstring(string, length):
        return (string[0+i:length+i] for i in range(0, len(string), length))
    
    def ViewSave(result,source):
        os.system('clear')
        if not result:
            return

        op = raw_input("Done! Save result for %s? [y/n] > " % source)
        if op.lower() == 'n':
            op = raw_input("Are you sure? [y/n] > ")
            if op.lower() == 'y':
                return
            
        dp.save_negations(result,tagged_as='manually')
        if tofile:save(result,"negtag_%s" % source,tofile,overwrite=False)
        
    # #------- Execute Function -------#
    
    while True:
        # Display menu options
        DisplayMenu()
        
        op = raw_input("\nOption > ")
        if not op.isdigit():
            raw_input("Opcion invalida")
            continue
        op = int(op)
        if op == 0:
            break # Exit
        if op > sources_size:
            raw_input("Opcion invalida")
            continue
        else:    
            result = {}
            id     = 0        
            source = sources[op-1]
            try:     
                # Ask for retrieving options 
                op = raw_input("\nInsert indexes separated by ',' or <intro> for pick up randomly > ")
                if op: # From indexes
                    indexes = list(set(int(i) for i in op.split(',')))
                    quantity = len(indexes)
                    indexes = indexes[:quantity]
                else: # Randomly
                    while not op.isdigit():
                        op = raw_input("How many? > ")
                    quantity = int(op)
                    indexes = []
                
                # Get a sample of reviews from options
                samples = dp.get_sample(quantity,source,indexes)
                
                # Tag every review
                left = quantity
                quit = False
                while left != 0:   
                                     
                    # Retrieve relevant data from the sample
                    sample  = samples[left-1]
                    _id     = sample['_id']
                    review  = sample['text']
                    
                    # Initialization (keep current words and empty categories)
                    words = [item['word'].encode('ascii','ignore') for item in review]
                    total = len(words)
                    tags  = ['  ' for _ in range(total)]
                    
                    # For each word, annotate with (N) or (I) and give the possibility of back by pressing (B)
                    cat = ""
                    idx = 0
                    while True:
                        # Display review
                        DisplayReview(sample['idx'],idx,total,words,tags)
                        
                        # Check end condition
                        if idx == total:
                            op = raw_input("\nDone. Proceed with the next review (left %i)? [y/n] > " % (left-1))
                            if op == 'y':
                                break
                            idx = idx - 1 if idx != 0 else 0
                            tags[idx] = '  '
                            continue
                        
                        # Ask for input
                        tooltip  = "\nTag with N(ormal) or I(nverted). "
                        tooltip += "Enter A(bort), B(ack) S(kip), Q(uit) or <intro> for "
                        tooltip += "repeating last action (%s) > " % (cat.upper() if cat else "None")
                        tag = raw_input(tooltip)
                        
                        if not tag and not cat: # Prevents parse empty cat
                            print "Input a category first";raw_input()
                            continue
                        elif tag:
                            cat = tag
                        
                        # Action from decision
                        cat = cat.lower()
                        if not cat or cat not in 'nibasq':
                            print "Option",cat,"is not correct." ;raw_input()
                            continue
                        if cat == 'q':
                            break
                        if cat == 's':
                            break
                        elif cat == 'b': # Back
                            idx = idx - 1 if idx != 0 else 0
                            tags[idx] = '  '
                        elif cat == 'a':
                            op = raw_input("Are you sure you want to abort (left %i)? [y/n] > " % left)
                            if op.lower() == 'y': raise Exception("Abort")
                        else:
                            # Associate the category
                            tags[idx] = cat
                            idx = idx + 1
                    
                    if cat == 'q':
                        break
                    if cat == 's':
                        left -= 1
                        continue
                            
                    # Once the text is tagged, add it to the result
                    tags = map(lambda cat : cat =='i', tags)
                    result.update({
                        _id : tags
                    })
                    
                    # Update
                    left -= 1
                       
                # View and save results
                if op == 0: continue
                ViewSave(result,source)
                
            except Exception as e:
                content = json.dumps(result,indent=4,ensure_ascii=False)
                log("Reason : %s (at %s) [%i] '%s'" % ( str(e) , source , sample['idx'] , content ))
                raw_input("Reason: %s\nEnter to continue..." % str(e))