コード例 #1
0
def exportUniqueSentences(project, mode="lasttree", pattern=False):
    """
	exports one tree per sentences: the first time the sentence is found, the newest tree
	"""
    sql = SQL(project)
    db, cursor = sql.open()
    sentences = {}  # toks -> tree
    outdir = os.path.join("..", "projects", project, "export")
    try:
        os.mkdir(outdir)
    except OSError:
        pass
    outfile = os.path.join(outdir, "allSentences.conll")
    if pattern:
        command = """select trees.rowid,userid,max(timestamp) from trees, sentences, texts where texts.rowid=sentences.textid and sentences.rowid=trees.sentenceid
		and textname like "{pattern}"
		group by sentenceid order by trees.rowid;""".format(pattern=pattern)
    else:
        command = """select trees.rowid,userid,max(timestamp) from trees, sentences, texts where texts.rowid=sentences.textid and sentences.rowid=trees.sentenceid
		group by sentenceid order by trees.rowid;"""
    for i, (
            treeid,
            userid,
            timestamp,
    ) in enumerate(cursor.execute(command).fetchall()):
        tree = sql.gettree(treeid=treeid, indb=db, incursor=cursor)["tree"]
        toks = tuple(tree[i]["t"] for i in tree)
        print "___", i, "\r",
        if toks not in sentences:
            sentences[toks] = tree
    print "writing file with", len(sentences), "sentences..."
    conll.trees2conllFile([sentences[toks] for toks in sorted(sentences)],
                          outfile=outfile,
                          columns=10)
    return outfile
コード例 #2
0
def extractConllFiles(project,outfolder):
	"""
	creates the empty files with a word per line as a first step to mate parsing
	"""
	if outfolder[-1]!="/": outfolder=outfolder+"/"
	texts={}
	sql = SQL(project)
	db, cursor = sql.open()
	command = """select distinct texts.textname, sentences.nr, features.nr, features.value 
		from features, trees, texts, sentences, users
		where attr = "t" and trees.rowid=features.treeid --and sentences.textid=13 
		and trees.sentenceid=sentences.rowid and sentences.textid=texts.rowid and users.user="******";"""
	cursor.execute(command)
	a = cursor.fetchall()
	#print a
	for nr, (textname, snr, num, token) in enumerate(a):
		#sql.exportAnnotations(textid, textname, "lastconll")
		texts[textname]= texts.get(textname,[])+[(snr, num,token)]
	newfiles=[]
	for textname in texts:
		print "processing",textname
		f=codecs.open(outfolder+textname, "w", "utf-8")
		for c, (snr, num, tok) in enumerate(sorted(texts[textname])):
			if num == 1 and c > 0:
				f.write('\n')
			f.write("\t".join([str(num), tok]+["_"]*12)+'\n')
		print c+1, "tokens"
		f.close()
		newfiles+=[outfolder+textname]
	return newfiles
コード例 #3
0
def directDatabaseChangeForForgottenCorrection():
    from database import SQL
    sql = SQL("Platinum")
    db, cursor = sql.open()
    cursor.execute('update links set function="comp" where function="aff";')
    db.commit()
    db.close()
    print "changed"
コード例 #4
0
def lastTreeForAllSamples(project, onlyHuman=True, combine=False):
    outdir = os.path.join("..", "projects", project, "export")
    try:
        os.mkdir(outdir)
    except OSError:
        pass
    sql = SQL(project)
    db, cursor = sql.open()
    if onlyHuman:
        parserid = 0
        for pid, in cursor.execute(
                "select rowid from users where user='******';"):
            parserid = pid
    else:
        parserid = -1
    sents = sorted(
        cursor.execute(
            "select texts.textname, sentences.rowid, sentences.nr from sentences, texts where texts.rowid=sentences.textid;"
        ).fetchall())
    print "todo:", len(sents), "sentences"
    pbar = tqdm.tqdm(total=len(sents))
    annotators = {}

    if combine:
        trees = []
        getTreesForSents(sents,
                         trees,
                         annotators,
                         parserid,
                         cursor,
                         db,
                         sql,
                         pbar,
                         project=project)
        outfile = os.path.join(outdir,
                               project + ".lastHumanTreeForAllSamples.conllu")
        conll.trees2conllFile(trees, outfile=outfile)
        print "wrote", outfile

    else:
        for tid, textname, nrtokens in list(
                cursor.execute("select rowid, * from texts;")):
            print tid, textname, nrtokens
            sents = list(
                cursor.execute(
                    "select rowid, * from sentences where textid=?;",
                    (tid, )).fetchall())
            trees = []
            getTreesForSents(sents, trees, annotators, parserid, cursor, db,
                             sql, pbar)
            if textname.endswith(".conll_parse"):
                textname = textname[:len(".conll_parse")]
            outfile = os.path.join(outdir, textname + ".lastHumanTrees.conllu")
            conll.trees2conllFile(trees, outfile=outfile)
            print "wrote", outfile
    for a in annotators:
        print a, annotators[a]
コード例 #5
0
ファイル: trees2train.py プロジェクト: nschneid/arborator
def exportConllByAnnotators(project, annotators=["prof", "Sy", "parser"]):
    """
	exports complete project
	for every sentence, trees of annotators in given order.
	if no tree: throw error 
	
	"""
    outfiles = []
    sql = SQL(project)
    db, cursor = sql.open()
    goodTexts = {}
    outdir = os.path.join("..", "projects", project, "export")
    try:
        os.mkdir(outdir)
    except OSError:
        pass

    annotatorIds = tuple(a for (a, ) in [
        list(
            cursor.execute("select rowid from users where user =?;", (
                annotator, )))[0] for annotator in annotators
    ])
    #print annotators, annotatorIds

    for textid, textname, nrtokens in list(
            cursor.execute("select rowid, * from texts;")):  # for each text
        print "doing", textname, "with", nrtokens, "tokens"
        nrutids = {}
        for nr, userid, treeid in list(
                cursor.execute(
                    "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and  textid = ? order by nr;"
                    .format(annotatorIds=annotatorIds), (textid, ))):
            nrutids[nr] = nrutids.get(nr, {})
            nrutids[nr][userid] = treeid
        trees = []
        for nr in sorted(nrutids):  # for each sentence
            tree = None
            for aid in annotatorIds:  # for each interesting annotator id
                if aid in nrutids[nr]:
                    tree = sql.gettree(treeid=nrutids[nr][aid],
                                       indb=db,
                                       incursor=cursor)["tree"]
                    trees += [tree]
                    #print "atree:",tree
                    break
            if not tree:
                print "problem: no tree for nr", nr, "type", type(nr)
                print "annotatorIds", annotatorIds
                raise Exception('no tree', nr)

        if textname.endswith(".conll"): textname = textname[:-len(".conll")]
        outfile = os.path.join(outdir, textname)
        conll.trees2conllFile(trees, outfile=outfile, columns=10)
        print len(trees), "trees"
        outfiles += [outfile]
    return outfiles
コード例 #6
0
def exportConllByAnnotators(project,
                            annotators=["prof", "Sy", "parser"],
                            fileExtension=".conllu"):
    """
	exports complete project
	for every sentence, trees of annotators in given order.
	if no tree: throw error

	"""
    outfiles = []
    sql = SQL(project)
    db, cursor = sql.open()
    goodTexts = {}
    outdir = os.path.join("..", "projects", project, "export")
    try:
        os.mkdir(outdir)
    except OSError:
        pass
    try:
        annotatorIds = tuple(a for (a, ) in [
            list(
                cursor.execute("select rowid from users where user =?;", (
                    annotator, )))[0] for annotator in annotators
        ])
    except:
        print "some required annotator IDs are not in the database"
        return
    print annotators, annotatorIds

    for textid, textname, nrtokens in list(
            cursor.execute("select rowid, * from texts;")):  # for each text
        print "doing", textname, "with", nrtokens, "tokens"
        nrutids = {}
        for nr, userid, treeid in list(
                cursor.execute(
                    "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and  textid = ? order by nr;"
                    .format(annotatorIds=annotatorIds if len(annotatorIds) > 1
                            else '(' + str(annotatorIds[0]) + ')'),
                    (textid, ))):
            nrutids[nr] = nrutids.get(nr, {})
            nrutids[nr][userid] = treeid
        trees = getSpecificTrees(sql, db, cursor, nrutids, annotatorIds)
        if trees:
            if textname.endswith(".conll"):
                textname = textname[:-len(".conll")]
            if textname.endswith(".conllu"):
                textname = textname[:-len(".conllu")]
            outfile = os.path.join(outdir, textname + fileExtension)
            conll.trees2conllFile(trees, outfile=outfile, columns=10)
            print len(trees), "trees"
            outfiles += [outfile]
        else:
            print "skipped", textname
    return outfiles
コード例 #7
0
ファイル: trees2train.py プロジェクト: nschneid/arborator
def printTree(project, treeid):
    sql = SQL(project)
    db, cursor = sql.open()
    dic = sql.gettree(
        None, None, treeid, indb=db,
        incursor=cursor)  # dic -> get tree == on récupère l'arbre
    #print dic
    if dic and dic["tree"]:
        sentencetree = dic["tree"]
        #sentencetree=corrigerNumerotation(sentencetree)
        for i in sorted(sentencetree):
            print i, sentencetree[i]
コード例 #8
0
 def _re_init(self, confirm_name):
     if confirm_name != self.name:
         return False
     if not self.valied:
         return False
     sql = SQL().Delete('label').Where(data_set_id=self.id).sql
     mysql.run(sql)
     sql = SQL().Delete('image').Where(data_set_id=self.id).sql
     mysql.run(sql)
     sql = SQL().Delete('model').Where(data_set_id=self.id).sql
     mysql.run(sql)
     return True
コード例 #9
0
def bulkcorrectDB(project, treeids=[]):
	"""
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""
	
	sql = SQL(project)
	db,cursor=sql.open()
	
	if treeids:	a,v=["rowid"],treeids
	else:		a,v=[],[]
	
	allt=sql.getall(cursor, "trees",a,v)
	
	ti = time()
	
	for nr, (treeid,sid,uid,annotype,status,comment,timestamp) in enumerate(allt):
		
		print "_____ treeid",treeid,"nr",nr+1,"/",len(allt),"---",float(nr+1)/(time()-ti),"trees per second",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti)),"seconds to go",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti))/60,"minutes to go"
		
		dic=sql.gettree(None,None,treeid, indb=db,incursor=cursor)
		if dic:
			sentencetree=dic["tree"]
			
			#newdic, changed = complexbulkcorrectdic(sentencetree)
			#newdic, changed = simplebulkcorrectdic(sentencetree)
			#newdic, changed = correctfeatures(sentencetree)
			#newdic, changed = correctfeatures(sentencetree,4,{})
			newdic, changed = correctfeatures(sentencetree,4,{	
				'i1':{ u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': {'i0':'root'}, u't': u'A'},
				'i2':{u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': {'i1': u'sub'}, 'child':None}
			})
			
	
			
			#break
			if changed:
				print "________________________________\n"
				#for i,node in newdic.iteritems():
					#print i, node["t"], node
				#1/0
				tokensChanged=True
				ws,sentence,_ = sql.enterTree(cursor, newdic, sid, uid,tokensChanged=tokensChanged)
				print sentence
				print "changed"
				db.commit()
	#db.commit()
	db.close()
コード例 #10
0
 def load(self, limit=None):
     if not self.valied:
         return -1
     sql = SQL().Select('image').Where(data_set_id=self.id)
     sql.sql = sql.sql + "and label_id is not null "
     if limit is not None:
         sql = sql.Limit(limit)
     sql = sql.sql
     result = mysql.query(sql)
     if result is None:
         return -1
     self.data = list(result)
     self.data_count = len(self.data)
     return len(self.data)
コード例 #11
0
 def _add_img(self, data_set_name, path, idx=None):
     result = mysql.find('data_set', name=data_set_name)
     if result:
         data_set_id = result[0]['id']
         exist = mysql.find('label', data_set_id=data_set_id, idx=idx)
         if exist:
             sql = SQL().Insert('image').Values(
                 path=path, data_set_id=data_set_id, label_id=idx).sql
             return mysql.run(sql)
     return False
コード例 #12
0
 def save_model(self, name, path, gpu_counts=None, is_half=None):
     if not self.valied:
         return False
     cols = {
         'name': name,
         'path': path,
         'data_set_id': self.id,
         'gpu_count': gpu_counts,
         'is_half': is_half
     }
     sql = SQL().Insert('model').Values(**cols).sql
     return mysql.run(sql)
コード例 #13
0
 def _get_classes(self, data_set_id=None):
     id = self.id if data_set_id is None else data_set_id
     sql = SQL().Select('label', ['idx', 'value']).Where(
         data_set_id=id).sql
     result = mysql.query(sql)
     idx_to_class = {}
     class_to_idx = {}
     if result is not None:
         for label in result:
             idx_to_class[label['idx']] = label['value']
         for (idx, cla) in idx_to_class.items():
             class_to_idx[cla] = idx
     return class_to_idx, idx_to_class
コード例 #14
0
 def __getitem__(self, index):
     #path = os.path.join(self.root_path, self.data[index]['PATH'])
     path = self.root_path + self.data[index]['path']
     img = self.__pil_loader(path)
     if self.id_to_idx.get(self.data[index]['label_id']) is None:
         sql = SQL().Select('label').Where(id=self.data[index]['label_id'])
         result = mysql.query(sql.sql)
         self.id_to_idx[self.data[index]['label_id']] = result[0]['idx']
     target = self.id_to_idx[self.data[index]['label_id']]
     if self.transform is not None:
         img = self.transform(img)
     if self.target_transform is not None:
         target = self.target_transform(target)
     return img, target
コード例 #15
0
 def _add_data_set_from_folder(self, name, root_path=None):
     result = mysql.find('data_set', name=name)
     if result is False:
         sql = SQL().Insert('data_set').Values(name=name, root_path=root_path).sql
         if not mysql.run(sql):
             return False
         if root_path is None:
             return False
         return self._add_data_set_from_folder(name, root_path)
     data_set_id = result[0]['id']
     root = root_path if root_path is not None else result[0]['root_path']
     sql = SQL().Update('data_set').Set(
         root_path=root).Where(id=data_set_id).sql
     mysql.run(sql)
     classes, _ = self.__find_classes(root)
     tmp = mysql.is_print_log
     mysql.is_print_log = False
     count, classes_count = self._add_labels(classes, name)
     class_to_idx, idx_to_class = self._get_classes(data_set_id=data_set_id)
     if tmp:
         print('>>add {} labels done, total({})'.format(count, classes_count))
     count = 0
     for label in classes:
         pre_path = os.path.join(root, label)
         files = self.__find_files(pre_path)
         for f in files:
             path = '/%s/%s' % (label, f)
             if self.__is_img_file(path):
                 self._add_img(name, path, class_to_idx[label])
                 count += 1
                 if tmp and count % 1000 == 0:
                     print('>>insert {} imgs'.format(count))
         if tmp:
             print('>>insert {} imgs done'.format(count))
     mysql.is_print_log = tmp
     return True
コード例 #16
0
 def get_model(self, name=None, id=None):
     if not self.valied:
         return False
     if id is not None:
         result = mysql.find(
             'model', id=id, data_set_id=self.id, name=name)
         if result:
             return result[0]
         return None
     sql = SQL().Select('model').Where(data_set_id=self.id, name=name)\
         .OrderBy(id='DESC').Limit(1).sql
     result = mysql.query(sql)
     if (result is not None) and (len(result) > 0):
         return result[0]
     return None
コード例 #17
0
def bulkcorrectDB(project, treeids=[], commit=True):
    """
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""

    sql = SQL(project)
    db, cursor = sql.open()

    if treeids: a, v = ["rowid"], treeids
    else: a, v = [], []

    allt = list(sql.getall(cursor, "trees", a, v))
    print "nb trees:", len(allt)
    ti = time()

    for nr, (treeid, sid, uid, annotype, status, comment,
             timestamp) in enumerate(allt):

        dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor)
        if dic:
            tree = dic["tree"]
            newtree, changed = correctLowerProperNouns(tree)

            if changed:
                print "________________________________\n"
                ws, sentence, _ = sql.enterTree(cursor,
                                                newtree,
                                                sid,
                                                uid,
                                                tokensChanged=True)
                print sentence
                print "changed", changed
                if not nr % 100:
                    print "committing..."
                    if commit: db.commit()
            if not nr % 100:
                print "_____ treeid", treeid, "nr", nr + 1, "/", len(
                    allt), "---", int(float(nr + 1) /
                                      (time() - ti)), "trees per second", int(
                                          float(len(allt) - nr + 1) /
                                          (float(nr + 1) /
                                           (time() - ti))), "seconds (", round(
                                               float(len(allt) - nr + 1) /
                                               (float(nr + 1) /
                                                (time() - ti)) / 60,
                                               1), "minutes) to go"
    if commit: db.commit()
    db.close()
コード例 #18
0
 def _add_labels(self, labels, data_set_name=None):
     if data_set_name is None:
         data_set_name = self.name
     result_ds = mysql.find('data_set', name=data_set_name)
     count = 0
     if result_ds:
         data_set_id = result_ds[0]['id']
         result_lb_list = mysql.find(
             'label', data_set_id=data_set_id)
         label_value_list = [x['value']
                             for x in result_lb_list] if result_lb_list else []
         start_idx = 0
         if result_lb_list:
             start_idx = len(result_lb_list)
         for label in labels:
             if label not in label_value_list:
                 sql = SQL().Insert('label').Values(
                     data_set_id=data_set_id, value=label, idx=(start_idx+count)).sql
                 if mysql.run(sql):
                     count += 1
     return count, start_idx + count
コード例 #19
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys,cgi,cgitb,codecs
from database import SQL

#cgitb.enable()
#project = form.getvalue('project',"").decode("utf-8")
project="depexo"
#form = cgi.FieldStorage()
#uid = form.getvalue('uid',None)
uid = 1 # admin
#print 'Content-type: text/html\n\n'

sql=SQL(project)
	
#print project.encode('utf-8'),"uuuuuuuuuuuuuuuuuuuuu",uid

tid=4

uids=[uid for uid, in sql.uidForText(tid)]
print uids
with codecs.open(project+str(tid)+".results.html","w","utf-8") as out:
	for i,uid in enumerate(uids):
		print "__________________",uid,i,float(i)/len(uids)*100,"%"
		out.write(sql.evaluateUser(uid, tid, consolidateLine=True)+"\n")
コード例 #20
0
ファイル: trees2train.py プロジェクト: nschneid/arborator
def getValidatedTrees(project, folder, whoseTrees="validator"):
    sql = SQL(project)
    db, cursor = sql.open()
    sentenceValidationInValidatedText(cursor, sql, db)
    #on récupère les nouveaux arbres
    b = databaseQuery(cursor, table=whoseTrees)
    print len(b), u"trees to extract"
    sids2all = {}
    trees = []
    error_trees = []
    textnames = {}
    for nr, (treeid, textname, user, snr, sid, uid, annotype, status, comment,
             timestamp) in enumerate(b):
        # TODO: remove:
        #if textname.startswith("mandarinParsed"):continue
        sids2all[sid] = sids2all.get(
            sid, []) + [(timestamp, textname, user, snr, treeid)]
        textnames[textname] = None
    #print len(sids2all)
    print u"trees extracted from the samples", ", ".join(sorted(textnames))
    lastpourc = -1
    for c, sid in enumerate(sids2all):
        pourc = int(float(c) / len(sids2all) * 100)
        if pourc != lastpourc:
            sys.stdout.write("{pourc}%\r".format(pourc=pourc))
        sys.stdout.flush()

        snr, treeid2get = sorted(sids2all[sid])[0][-2:]
        #print treeid2get, type(treeid2get)
        #lknlk
        dic = sql.gettree(None, None, treeid2get, indb=db,
                          incursor=cursor)  # dic -> get tree
        #if treeid2get==9669:
        #print 9669,dic

        if dic:
            sentencetree = dic["tree"]
            sentencetree = corrigerNumerotation(sentencetree)
            trees.append(sentencetree)
            #print " ".join(node["t"] for i,node in sentencetree.iteritems())
            if checkTree(sentencetree)[0] == False:
                if checkTree(sentencetree)[1] == "self":
                    error_trees += [
                        "\t".join([
                            textname,
                            str(snr), user,
                            "node " + str(checkTree(sentencetree)[2]) +
                            " points to itself"
                        ])
                    ]
                else:
                    error_trees += [
                        "\t".join([
                            textname,
                            str(snr), user, "no gov at node " +
                            str(checkTree(sentencetree)[2])
                        ])
                    ]
                trees.remove(sentencetree)
                #print "nr arbres",len(trees)
        lastpourc = pourc
    print len(error_trees), "arbre(s) avec erreurs."
    if len(error_trees) > 0:
        print "\t".join(["Texte", "num phrase", "correcteur", "cause"])
        for x in sorted(list(set(error_trees))):
            print x
        f = codecs.open(
            folder + "logs/log_erreurs." +
            datetime.datetime.now().strftime('%Y-%m-%d') + ".tsv", "w",
            "utf-8")
        f.write("\t".join(["Texte", "num phrase", "correcteur", "cause"]) +
                '\n')
        for e in error_trees:
            f.write(e + '\n')
        f.close()
        print "Erreurs dans", f.name
    print len(trees), "arbres restants pour entrainement"
    #Creation d'un fichier log
    db.commit()
    db.close()
    return trees
コード例 #21
0
ファイル: trees2train.py プロジェクト: nschneid/arborator
def fusionForgottenTrees(project="Platinum",
                         fusdir="../projects/OrfeoGold2016/platinum/*",
                         annotators=["admin"]):
    """
	takes trees from project ordered by annotators. if they exist fuse them into the fusdir
	result has the extension "cool.conll"
	,"Sy","Marion"
	"""

    #print lemmacorrection
    sys.path.insert(0, '../tools')
    import difflib
    outfiles = []
    sql = SQL(project)
    db, cursor = sql.open()
    goodTexts = {}
    outdir = os.path.join("..", "projects", project, "exportcool")
    try:
        os.mkdir(outdir)
    except OSError:
        pass
    for annotator in annotators:
        print[
            list(
                cursor.execute("select rowid from users where user =?;",
                               (annotator, )))
        ]
    annotatorIds = tuple(a for (a, ) in [
        list(
            cursor.execute("select rowid from users where user =?;", (
                annotator, )))[0] for annotator in annotators
    ])
    print annotators, annotatorIds

    for textid, textname, nrtokens in list(
            cursor.execute("select rowid, * from texts;")):  # for each text
        print "\n__________________________doing", textname, "with", nrtokens, "tokens"
        nrutids = {}
        for nr, userid, treeid in list(
                cursor.execute(
                    "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and  textid = ? order by nr;"
                    .format(annotatorIds=annotatorIds), (textid, ))):
            nrutids[nr] = nrutids.get(nr, {})
            nrutids[nr][userid] = treeid
        trees = {}
        for nr in sorted(nrutids):  # for each sentence
            tree = None
            for aid in annotatorIds:  # for each interesting annotator id
                if aid in nrutids[nr]:
                    tree = sql.gettree(treeid=nrutids[nr][aid],
                                       indb=db,
                                       incursor=cursor)["tree"]
                    trees[nr] = tree
                    #print "atree:",tree
                    break
            #if not tree:
            #print "problem: no tree for nr",nr,"type",type(nr)
            #print "annotatorIds",annotatorIds
            #raise Exception('no tree', nr)
        #print trees
        print len(trees), "trees from", project
        print textname, textname.split(".")[0]
        btextname = os.path.basename(textname).split(".")[0]
        if btextname.endswith("-one-word-per-line"):
            btextname = btextname[:-len("-one-word-per-line")]
        #print glob.glob(fusdir),[os.path.basename(fi).split(".")[0] for fi in glob.glob(fusdir)]
        cooltrees = []
        ptrees, ftrees = 0, 0
        for fi in glob.glob(fusdir):
            if btextname == os.path.basename(fi).split(".")[0]:
                print "yes", btextname
                fustrees = conll.conllFile2trees(fi)
                print len(fustrees), "ftrees", fi
                for nr, ftree in enumerate(fustrees):
                    if nr + 1 in trees:
                        #print "added tree",nr+1,"from database"
                        #ptree=platinum(trees[nr+1])
                        ptree = trees[nr + 1]
                        for iii in ptree:
                            ptree[iii]["tag2"] = "_"
                            if ptree[iii]["lemma"] in lemmacorrection:
                                ptree[iii]["lemma"] = lemmacorrection[
                                    ptree[iii]["lemma"]]
                        cooltrees += [ptree]
                        #print nr+1,"tree from",project#,tree
                        ptrees += 1
                        if ftree.sentence() != u" ".join(
                            [ptree[i].get("t", "") for i in sorted(ptree)]):
                            print "\n_________", nr + 1
                            print ftree.sentence()
                            print u" ".join(
                                [ptree[i].get("t", "") for i in sorted(ptree)])
                            #for l in difflib.context_diff(ftree.sentence() ,u" ".join([ptree[i].get("t","") for i in sorted(ptree)])):print l

                        #print "dbtree",platinum(trees[nr+1])
                    else:
                        for iii in ftree:
                            ftree[iii]["tag2"] = "_"
                            if ftree[iii]["lemma"] in lemmacorrection:
                                ftree[iii]["lemma"] = lemmacorrection[
                                    ftree[iii]["lemma"]]
                        #print nr+1,"tree from",fusdir#,tree
                        ftrees += 1
                        cooltrees += [ftree]
                        #print "added tree",nr+1,"from fustrees",fi
                outfile = os.path.join(outdir, textname + ".cool.conll")
                conll.trees2conllFile(cooltrees, outfile=outfile, columns=10)
                print "wrote", outfile
                print ptrees, "ptrees, ", ftrees, "ftrees"
                break
        if len(cooltrees) == 0: print "nothing for", btextname
        outfiles += [outfile]
        #qsdf
    return outfiles
コード例 #22
0
ファイル: trees2train.py プロジェクト: nschneid/arborator
def exportGoodTexts(project,
                    lastHuman=False,
                    onlyValidated=True,
                    pattern=False):
    """
	TODO :
	- ajouter parametre p/selectionner Texte
	ex : "UD_ZH_[number]"
	"""
    outfiles = []
    sql = SQL(project)
    db, cursor = sql.open()
    goodTexts = {}
    if onlyValidated: onlyValidated = "and todos.status=1"
    else: onlyValidated = ""
    # take all texts where a validator has validated
    if pattern:
        command = "select distinct * from texts, todos, users where texts.rowid=todos.textid and users.rowid=todos.userid and texts.textname {pattern};".format(
            pattern=pattern)  # like 'UD_ZH%'
    else:
        command = "select distinct * from texts, todos, users where texts.rowid=todos.textid and todos.type=1 {onlyValidated} and users.rowid=todos.userid;".format(
            onlyValidated=onlyValidated)
    for row in cursor.execute(command):
        textname, nrtokens, userid, textid, validator, status, comment, user, realname = row
        goodTexts[textid] = (textname, userid, user)
        print "i'll take", textname, "validated by", user, "with", nrtokens, "tokens"
    sentenceValidationInValidatedText(cursor, sql, db)
    outdir = os.path.join("..", "projects", project, "export")
    try:
        os.mkdir(outdir)
    except OSError:
        pass

    for textid, (textname, userid, user) in goodTexts.iteritems():
        textname = textname.replace("-one-word-per-line.conll14_Parse", "")

        if lastHuman:
            outfile = os.path.join(outdir, textname + ".lastHuman.conll")
        else:
            outfile = os.path.join(
                outdir, "validated." + textname + "." + user + ".conll")
        print "doing", textname, textid
        trees = []

        if lastHuman:
            snr2all = {}
            for row in cursor.execute(
                    """
			select sentences.nr as snr, trees.rowid as treeid, users.user, trees.timestamp 
			from sentences, trees, users 
			where sentences.textid=? 
			and sentences.rowid=trees.sentenceid 
			and users.rowid = trees.userid; """, (textid, )):
                snr, treeid, user, timestamp = row
                snr2all[snr] = snr2all.get(snr,
                                           []) + [(timestamp, user, treeid)]
            lastpourc = -1
            for c, snr in enumerate(sorted(snr2all)):
                pourc = int(float(c) / len(snr2all) * 100)
                if pourc != lastpourc:
                    print "___{pourc}%___\r".format(pourc=pourc),

                lastusersnotparser = sorted([
                    (timestamp, user, treeid)
                    for (timestamp, user, treeid) in snr2all[snr]
                    if user not in ["parser", "mate"]
                ])
                if len(lastusersnotparser) > 0:
                    time, u, tid = lastusersnotparser[-1]  # last tree by human
                else:
                    time, u, tid = sorted(
                        snr2all[snr])[-1]  # last tree by whoever
                #print "je prends l'arbre de",u
                trees += [
                    sql.gettree(treeid=treeid, indb=db,
                                incursor=cursor)["tree"]
                ]

        else:

            for (
                    treeid,
                    sentencenr,
            ) in cursor.execute(
                    "select trees.rowid, sentences.nr from texts, trees, sentences where texts.rowid=? and trees.userid=? and trees.sentenceid = sentences.rowid and sentences.textid=texts.rowid order by sentences.nr;",
                (
                    textid,
                    userid,
                )).fetchall():
                #print "ooo",sentencenr,"\r",
                print "nr", sentencenr, "_____\r",
                trees += [
                    sql.gettree(treeid=treeid, indb=db,
                                incursor=cursor)["tree"]
                ]

        print "exporting", len(trees), "trees into", outfile
        outfiles += [outfile]
        conll.trees2conllFile(trees, outfile, columns=10)
    return outfiles
コード例 #23
0
ファイル: admin.py プロジェクト: ajaiswal-ht/arborator-server
def edituser(theform, userdir, thisscript, userconfig, action, newcookie, msg=None, success=None):
    """Display the screen to edit or delete users.."""
    config = ConfigObj(userdir + 'config.ini')
    templatedir = config['templatedir']
    realadminlev = int(userconfig['admin'])
    
    adminpage = readfile(templatedir+adminpage_file)
    adminpage = adminpage.replace('**this script**', thisscript + '?action=' + action)
    adminpage = adminpage.replace('**admin menu**', thisscript+'?login=admin'+'&action='+action)
  
    userlist = [entry[:-4] for entry in os.listdir(userdir) if os.path.isfile(userdir+entry) and entry[:-4] not in RESERVEDNAMES ]
    mainadmin = config['adminuser']
    username = userconfig['username']
    if mainadmin in userlist:
        userlist.remove(mainadmin)
    if username in userlist:
        userlist.remove(username)
    userlist = cSort(userlist)
    
    start = int(theform.getfirst('start', '1'))
    length = len(userlist)
    if start*numonpage > length:
        start = length//numonpage + 1
    url = '<a href="' + thisscript + '?start=%s&login=admin&admin=edituser&action=' + action + '">%s</a>'
    indexline = '<div style="text-align:center;">%s</div>' % makeindexline(url, start, length, numonpage)

# need to be able to edit -
# username, realname, new password, confirm password, adminlev, email, max-age, editable
    index = (start-1)*numonpage + 1 
    last = min(length+1, index+numonpage)
    usermenu = indexline + '<br>' + edit_table_s
    while  index < last:                                # go through all the users
        thisuser = userlist[index-1]
        index += 1
        thisuserc = ConfigObj(userdir+thisuser+'.ini')
	
        #print "Content-Type: text/html\n" # blank line: end of headers
        #print userdir,thisuser,thisuserc,"admin" in thisuserc, "admin" in thisuserc.keys()
        if thisuserc=={}: continue
        adminlev = thisuserc.get('admin',0)
        if realadminlev <= int(adminlev):
            continue
        loginname = thisuser
        realname = thisuserc['realname']
        email = thisuserc['email']
        maxage = thisuserc['max-age']
        editable = ''
        if istrue(thisuserc['editable']):
            editable = 'checked'
        if theform.getfirst('username')==loginname and msg and not success:
            realname = theform.getfirst('realname', '')
            realname = theform.getfirst('realname', '')
            email = theform.getfirst('email', '')
            adminlev = theform.getfirst('adminlev', '')
            maxage = theform.getfirst('maxage', '')
            editable = theform.getfirst('editable', '')
            if editable:
                editable = 'checked'
            
        thevals = (thisscript, action, 'doedituser', start, loginname,
                   loginname, realname, email, adminlev, maxage, editable,
                   thisscript, action, 'deluser', start, loginname)
        
        usermenu += elem_h+str(index-1) + (account_table % thevals) + elem_f
        
        
        
        # kim's stuff!!!!!
        realname=realname.decode("utf-8")
        project=unicode(action.split("_")[-1].decode("utf-8"))
        sql=SQL(project)
        uid=sql.userid(thisuser, realname)
        #print "Content-Type: text/html\n" # blank line: end of headers
        #print "kim<br>", project,thisuser,uid
        # kim's stuff ends....
        
        
    usermenu += table_e + '<br>' + indexline

    eduserpage = readfile(templatedir+adminuser_file)
    eduserpage = eduserpage.replace('**account table**', usermenu)
    if msg:
        adminpage =  adminpage.replace('<br><!- message --> ', '<h2>%s</h2>' % msg) 

    adminpage = adminpage.replace('**admin**', eduserpage)
    print newcookie
    print serverline
    print
    print adminpage
    sys.exit()
コード例 #24
0
 def _add_data_set(self, name, root_path=None):
     result = mysql.find('data_set', name=name)
     if result is False:
         sql = SQL().Insert('data_set').Values(name=name, root_path=root_path).sql
         return mysql.run(sql)
     return False
コード例 #25
0
    new_trees = list()
    for nr, tree in sortable:

        # adding metadatas	应该是重命名sent_id,从0开始
        tree.sentencefeatures["text"] = tree.sentence()
        tree.sentencefeatures["sent_id"] = prefix + "_" + str(nr - 1)

        # removing useless metadata
        del tree.sentencefeatures["nr"]
        new_trees.append(tree)
    conll.trees2conllFile(new_trees, outfile)


if __name__ == "__main__":

    ## Open project database

    sql = SQL("NaijaSUD")  # 输入project名字
    db, cursor = sql.open()

    ## Use 2 functions :
    # - exportLastBestAnnotations in lib/database.py -> writes a file with trees and their rank
    # - reorder in lib/yuchen.py -> reorder trees based on their rank, write a file with the output

    users, c = sql.exportLastBestAnnotations(
        115, "P_ABJ_GWA_06_Ugo-lifestory_PRO"
    )  # 输入textid和text name,可通过链接https://arborator.ilpga.fr/editor.cgi?project=NaijaSUD&textid=74&opensentence=1看到textid
    print(users, c)
    fpath = "E:/TAL/Stage/arborator/projects/NaijaSUD/export/P_ABJ_GWA_06_Ugo.lifestory_PRO.most.recent.trees.with.feats.conllu"  # 输入导出的文件所在路径
    trees = conll.conllFile2trees(fpath)  # 重新排序conll树,重命名sent_id
    reorder(trees, fpath + "_reordered")
コード例 #26
0
from config import bot
from database import SQL
import random

db = SQL('localhost', 'domaildo_usrbbd6', 'e5svFRqYtG8SdB^P=)',
         'domaildo_boottrg06')


def check_win(room):

    a = []
    for i in db.list_players(room):
        a.append(int(i[0]))
    result = random.choice(a)
    return result
コード例 #27
0
def bulkcorrectDB(project, treeids=[]):
    """
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""

    sql = SQL(project)
    db, cursor = sql.open()

    if treeids: a, v = ["rowid"], treeids
    else: a, v = [], []

    allt = sql.getall(cursor, "trees", a, v)

    ti = time()

    for nr, (treeid, sid, uid, annotype, status, comment,
             timestamp) in enumerate(allt):

        print "_____ treeid", treeid, "nr", nr + 1, "/", len(
            allt
        ), "---", float(nr + 1) / (
            time() - ti), "trees per second", float(len(allt) - nr + 1) / (
                float(nr + 1) /
                (time() - ti)), "seconds to go", float(len(allt) - nr + 1) / (
                    float(nr + 1) / (time() - ti)) / 60, "minutes to go"

        dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor)
        if dic:
            sentencetree = dic["tree"]

            #newdic, changed = complexbulkcorrectdic(sentencetree)
            #newdic, changed = simplebulkcorrectdic(sentencetree)
            #newdic, changed = correctfeatures(sentencetree)
            #newdic, changed = correctfeatures(sentencetree,4,{})
            newdic, changed = correctfeatures(
                sentencetree, 4, {
                    'i1': {
                        u'person': u'3',
                        u'number': u'sg',
                        u'cat': u'V',
                        u'lemma': u'\xeatre',
                        u'token': u'est',
                        u'tense': u'present',
                        u'mode': u'indicative',
                        'gov': {
                            'i0': 'root'
                        },
                        u't': u'A'
                    },
                    'i2': {
                        u'cat': u'Cl',
                        u'lemma': u'c',
                        u'token': u'-ce',
                        u't': u'B',
                        'gov': {
                            'i1': u'sub'
                        },
                        'child': None
                    }
                })

            #break
            if changed:
                print "________________________________\n"
                #for i,node in newdic.iteritems():
                #print i, node["t"], node
                #1/0
                tokensChanged = True
                ws, sentence, _ = sql.enterTree(cursor,
                                                newdic,
                                                sid,
                                                uid,
                                                tokensChanged=tokensChanged)
                print sentence
                print "changed"
                db.commit()
    #db.commit()
    db.close()
コード例 #28
0
ファイル: photos.py プロジェクト: alimzhan2000/roombashare
from oauth2client import file, client, tools
from oauth2client.file import Storage
from googleapiclient.discovery import build

import config
import telebot
from telebot.types import InputMediaPhoto, InputMediaVideo
from telebot import types
from database import SQL
from users import Seeker

logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)
logger = logging.getLogger(__name__)
db = SQL()

SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('client_secrets.json', SCOPES)
    creds = tools.run_flow(flow, store)
drive_service = discovery.build('drive',
                                'v3',
                                http=creds.authorize(Http()),
                                cache_discovery=False)


def download_photo(file_id):
    request = drive_service.files().get_media(fileId=file_id)
コード例 #29
0
#!c:/Python27/python.exe
# -*- coding: utf-8 -*-

import sys, cgi, cgitb, codecs
sys.path.insert(0, '../lib')
from database import SQL

#cgitb.enable()
#project = form.getvalue('project',"").decode("utf-8")
project = "linguistiqueCorpus-Exo"
#form = cgi.FieldStorage()
#uid = form.getvalue('uid',None)
uid = 1  # admin
#print 'Content-type: text/html\n\n'

sql = SQL(project)

#print project.encode('utf-8'),"uuuuuuuuuuuuuuuuuuuuu",uid

tid = 13

uids = [uid for uid, in sql.uidForText(tid)]
print uids
with codecs.open(project + str(tid) + ".results.html", "w", "utf-8") as out:
    for i, uid in enumerate(uids):
        print "__________________", uid, i, float(i) / len(uids) * 100, "%"
        out.write(sql.evaluateUser(uid, tid, consolidateLine=True) + "\n")
コード例 #30
0
ファイル: dbtables.py プロジェクト: tassygan/roomba
import psycopg2
from database import SQL

db = SQL()

db.drop_tables()
db.create_tables()

db.close()
コード例 #31
0
ファイル: admin.py プロジェクト: amir-zeldes/arborator
def edituser(theform, userdir, thisscript, userconfig, action, newcookie, msg=None, success=None):
    """Display the screen to edit or delete users.."""
    config = ConfigObj(userdir + 'config.ini')
    templatedir = config['templatedir']
    realadminlev = int(userconfig['admin'])
    
    adminpage = readfile(templatedir+adminpage_file)
    adminpage = adminpage.replace('**this script**', thisscript + '?action=' + action)
    adminpage = adminpage.replace('**admin menu**', thisscript+'?login=admin'+'&action='+action)
  
    userlist = [entry[:-4] for entry in os.listdir(userdir) if os.path.isfile(userdir+entry) and entry[:-4] not in RESERVEDNAMES ]
    mainadmin = config['adminuser']
    username = userconfig['username']
    if mainadmin in userlist:
        userlist.remove(mainadmin)
    if username in userlist:
        userlist.remove(username)
    userlist = cSort(userlist)
    
    start = int(theform.getfirst('start', '1'))
    length = len(userlist)
    if start*numonpage > length:
        start = length//numonpage + 1
    url = '<a href="' + thisscript + '?start=%s&login=admin&admin=edituser&action=' + action + '">%s</a>'
    indexline = '<div style="text-align:center;">%s</div>' % makeindexline(url, start, length, numonpage)

# need to be able to edit -
# username, realname, new password, confirm password, adminlev, email, max-age, editable
    index = (start-1)*numonpage + 1 
    last = min(length+1, index+numonpage)
    usermenu = indexline + '<br>' + edit_table_s
    while  index < last:                                # go through all the users
        thisuser = userlist[index-1]
        index += 1
        thisuserc = ConfigObj(userdir+thisuser+'.ini')

        #print "Content-Type: text/html\n" # blank line: end of headers
        #print userdir,thisuser,thisuserc
        adminlev = thisuserc['admin']
        if realadminlev <= int(adminlev):
            continue
        loginname = thisuser
        realname = thisuserc['realname']
        email = thisuserc['email']
        maxage = thisuserc['max-age']
        editable = ''
        if istrue(thisuserc['editable']):
            editable = 'checked'
        if theform.getfirst('username')==loginname and msg and not success:
            realname = theform.getfirst('realname', '')
            realname = theform.getfirst('realname', '')
            email = theform.getfirst('email', '')
            adminlev = theform.getfirst('adminlev', '')
            maxage = theform.getfirst('maxage', '')
            editable = theform.getfirst('editable', '')
            if editable:
                editable = 'checked'
            
        thevals = (thisscript, action, 'doedituser', start, loginname,
                   loginname, realname, email, adminlev, maxage, editable,
                   thisscript, action, 'deluser', start, loginname)
        
        usermenu += elem_h + (account_table % thevals) + elem_f
        
        
        
        # kim's stuff!!!!!
        realname=realname.decode("utf-8")
        project=unicode(action.split("_")[-1].decode("utf-8"))
        sql=SQL(project)
        uid=sql.userid(thisuser, realname)
        #print "Content-Type: text/html\n" # blank line: end of headers
        #print "kim<br>", project,thisuser,uid
        # kim's stuff ends....
        
        
    usermenu += table_e + '<br>' + indexline

    eduserpage = readfile(templatedir+adminuser_file)
    eduserpage = eduserpage.replace('**account table**', usermenu)
    if msg:
        adminpage =  adminpage.replace('<br><!- message --> ', '<h2>%s</h2>' % msg) 

    adminpage = adminpage.replace('**admin**', eduserpage)
    print newcookie
    print serverline
    print
    print adminpage
    sys.exit()
コード例 #32
0
ファイル: daily.py プロジェクト: Mempler/Sora-cron
from database import SQL, REDIS
import json

try:
    REDIS.ping()
except:
    print("Failed to connect to redis! a connection is required...")

# Get current position for the current day. this cron runs every day ONCE!
users = SQL.query("SELECT Id FROM Users")
for user in users:
    user_lb = SQL.query("SELECT * FROM Leaderboard WHERE OwnerId = %s LIMIT 1",
                        (user['Id']))
    if (len(user_lb) == 0):
        continue

    user_lb = user_lb[0]

    for mode in [0, 1, 2, 3]:
        key = f"sora:performance:{user['Id']}_{mode}"
        print(key)
        last = REDIS.get(key)
        if last == None:
            last = []
        else:
            last = json.loads(last)

        if (len(last) >= 60):
            last.pop(0)

        position = 0
コード例 #33
0
 def test_instance(self):
     sql = SQL()
     self.assertIsInstance(sql, SQL)