def bulkcorrectDB(project, treeids=[], commit=True): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db, cursor = sql.open() if treeids: a, v = ["rowid"], treeids else: a, v = [], [] allt = list(sql.getall(cursor, "trees", a, v)) print "nb trees:", len(allt) ti = time() for nr, (treeid, sid, uid, annotype, status, comment, timestamp) in enumerate(allt): dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor) if dic: tree = dic["tree"] newtree, changed = correctLowerProperNouns(tree) if changed: print "________________________________\n" ws, sentence, _ = sql.enterTree(cursor, newtree, sid, uid, tokensChanged=True) print sentence print "changed", changed if not nr % 100: print "committing..." if commit: db.commit() if not nr % 100: print "_____ treeid", treeid, "nr", nr + 1, "/", len( allt), "---", int(float(nr + 1) / (time() - ti)), "trees per second", int( float(len(allt) - nr + 1) / (float(nr + 1) / (time() - ti))), "seconds (", round( float(len(allt) - nr + 1) / (float(nr + 1) / (time() - ti)) / 60, 1), "minutes) to go" if commit: db.commit() db.close()
def bulkcorrectDB(project, treeids=[]): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db,cursor=sql.open() if treeids: a,v=["rowid"],treeids else: a,v=[],[] allt=sql.getall(cursor, "trees",a,v) ti = time() for nr, (treeid,sid,uid,annotype,status,comment,timestamp) in enumerate(allt): print "_____ treeid",treeid,"nr",nr+1,"/",len(allt),"---",float(nr+1)/(time()-ti),"trees per second",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti)),"seconds to go",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti))/60,"minutes to go" dic=sql.gettree(None,None,treeid, indb=db,incursor=cursor) if dic: sentencetree=dic["tree"] #newdic, changed = complexbulkcorrectdic(sentencetree) #newdic, changed = simplebulkcorrectdic(sentencetree) #newdic, changed = correctfeatures(sentencetree) #newdic, changed = correctfeatures(sentencetree,4,{}) newdic, changed = correctfeatures(sentencetree,4,{ 'i1':{ u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': {'i0':'root'}, u't': u'A'}, 'i2':{u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': {'i1': u'sub'}, 'child':None} }) #break if changed: print "________________________________\n" #for i,node in newdic.iteritems(): #print i, node["t"], node #1/0 tokensChanged=True ws,sentence,_ = sql.enterTree(cursor, newdic, sid, uid,tokensChanged=tokensChanged) print sentence print "changed" db.commit() #db.commit() db.close()
def bulkcorrectDB(project, treeids=[]): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db, cursor = sql.open() if treeids: a, v = ["rowid"], treeids else: a, v = [], [] allt = sql.getall(cursor, "trees", a, v) ti = time() for nr, (treeid, sid, uid, annotype, status, comment, timestamp) in enumerate(allt): print "_____ treeid", treeid, "nr", nr + 1, "/", len( allt ), "---", float(nr + 1) / ( time() - ti), "trees per second", float(len(allt) - nr + 1) / ( float(nr + 1) / (time() - ti)), "seconds to go", float(len(allt) - nr + 1) / ( float(nr + 1) / (time() - ti)) / 60, "minutes to go" dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor) if dic: sentencetree = dic["tree"] #newdic, changed = complexbulkcorrectdic(sentencetree) #newdic, changed = simplebulkcorrectdic(sentencetree) #newdic, changed = correctfeatures(sentencetree) #newdic, changed = correctfeatures(sentencetree,4,{}) newdic, changed = correctfeatures( sentencetree, 4, { 'i1': { u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': { 'i0': 'root' }, u't': u'A' }, 'i2': { u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': { 'i1': u'sub' }, 'child': None } }) #break if changed: print "________________________________\n" #for i,node in newdic.iteritems(): #print i, node["t"], node #1/0 tokensChanged = True ws, sentence, _ = sql.enterTree(cursor, newdic, sid, uid, tokensChanged=tokensChanged) print sentence print "changed" db.commit() #db.commit() db.close()