def PurgeDeprecatedTaxon(): # purge les stat sur des instances supprimées ExecSQL( "delete from ecotaxainststat where id_instance not in (select id from ecotaxainst)" ) # purge ceux qui ont été déclaré D avant toutes les synchro UP (normalement précédée d'un down) # et qui n'ont pas d'enfants (d'ou le fait de l'executer 20 fois pour purger une branche entière) for i in range(20): rowcount = ExecSQL("""delete from taxonomy t where taxostatus='D' and lastupdate_datetime<(select min(laststatupdate_datetime) from ecotaxainst) and not exists (select 1 from ecotaxainststat s where s.id_taxon=t.id) and not exists (select 1 from taxonomy c where c.parent_id=t.id) """) if rowcount == 0: break
def RefreshAllProjectsStat(): # Tout les objets validés sans classifications sont repassés en non validés ExecSQL( "update obj_head oh set classif_qual=NULL where classif_qual='V' and classif_id is null " ) ExecSQL( "UPDATE projects SET objcount=Null,pctclassified=null,pctvalidated=NULL" ) ExecSQL("""UPDATE projects SET objcount=q.nbr,pctclassified=100.0*nbrclassified/q.nbr,pctvalidated=100.0*nbrvalidated/q.nbr from (SELECT projid,sum(nbr) nbr,sum(case when id>0 then nbr end) nbrclassified,sum(nbr_v) nbrvalidated from projects_taxo_stat group by projid )q where projects.projid=q.projid""") ExecSQL("""delete from samples s where not exists (select 1 from objects o where o.sampleid=s.sampleid ) and not exists (select 1 from part_samples o where o.sampleid=s.sampleid ) """ ) ComputeOldestSampleDateOnProject()
def dbadmin_merge2taxon(): if gvg("src","")=="" or gvg("dest","")=="": txt="Select source Taxon (will be deleted after merge) :" txt+="<br>Select Target Taxon :" return render_template('search/merge2taxo.html') TaxoSrc=database.Taxonomy.query.filter_by(id=int(gvg("src",""))).first() TaxoDest=database.Taxonomy.query.filter_by(id=int(gvg("dest",""))).first() N1=ExecSQL("update obj_head set classif_id=%(dest)s where classif_id=%(src)s",{"src":TaxoSrc.id,"dest":TaxoDest.id}) N2=ExecSQL("update obj_head set classif_auto_id=%(dest)s where classif_auto_id=%(src)s",{"src":TaxoSrc.id,"dest":TaxoDest.id}) N3=ExecSQL("update objectsclassifhisto set classif_id=%(dest)s where classif_id=%(src)s",{"src":TaxoSrc.id,"dest":TaxoDest.id}) N4=ExecSQL("update taxonomy set parent_id=%(dest)s where parent_id=%(src)s",{"src":TaxoSrc.id,"dest":TaxoDest.id}) N5=ExecSQL("delete from taxonomy where id=%(src)s",{"src":TaxoSrc.id,"dest":TaxoDest.id}) return PrintInCharte("""Merge of '%s' in '%s' done <br>%d Objects Manuel classification updated <br>%d Objects Automatic classification updated <br>%d Objects classification historical updated <br>%d Taxonomy child updated <br>%d Taxonomy Node deleted """%(TaxoSrc.name,TaxoDest.name,N1,N2,N3,N4,N5))
def RefreshTaxoStat(): n = ExecSQL( "UPDATE taxonomy SET nbrobj=Null,nbrobjcum=null where nbrobj is NOT NULL or nbrobjcum is not null" ) print("RefreshTaxoStat cleaned %d taxo" % n) print("Refresh projects_taxo_stat") for r in GetAll('select projid from projects'): RecalcProjectTaxoStat(r['projid']) n = ExecSQL("""UPDATE taxonomy SET nbrobj=q.nbr from (select id classif_id, sum(nbr_v) nbr from projects_taxo_stat pts join projects p on pts.projid=p.projid and p.visible=true where nbr_v>0 group by id )q where taxonomy.id=q.classif_id""") print("RefreshTaxoStat updated %d 1st level taxo" % (n)) n = ExecSQL("""UPDATE taxonomy SET nbrobjcum=q.nbr from (select parent_id,sum(nbrobj) nbr from taxonomy where nbrobj is NOT NULL group by parent_id ) q where taxonomy.id=q.parent_id""") print("RefreshTaxoStat updated %d 2st level taxo" % (n)) for i in range(50): n = ExecSQL("""UPDATE taxonomy SET nbrobjcum=q.nbr from (select parent_id,sum(nbrobjcum+coalesce(nbrobj,0)) nbr from taxonomy where nbrobjcum is NOT NULL group by parent_id ) q where taxonomy.id=q.parent_id and coalesce(taxonomy.nbrobjcum,0)<>q.nbr""") print("RefreshTaxoStat updated %d level %d taxo" % (n, i)) if n == 0: break appli.part.prj.GlobalTaxoCompute()
def RefreshTaxoStat(): n = ExecSQL( "UPDATE taxonomy SET nbrobj=Null,nbrobjcum=null where nbrobj is NOT NULL or nbrobjcum is not null" ) app.logger.info("RefreshTaxoStat cleaned %d taxo" % n) app.logger.info("Refresh projects_taxo_stat") # for r in GetAll('select projid from projects'): # RecalcProjectTaxoStat(r['projid']) n = ExecSQL("""UPDATE taxonomy SET nbrobj=q.nbr from (select id_taxon classif_id, sum(nbr) nbr from ecotaxainststat pts group by id_taxon)q where taxonomy.id=q.classif_id""") app.logger.info("RefreshTaxoStat updated %d 1st level taxo" % (n)) n = ExecSQL("""UPDATE taxonomy SET nbrobjcum=q.nbr from (select parent_id,sum(nbrobj) nbr from taxonomy where nbrobj is NOT NULL group by parent_id ) q where taxonomy.id=q.parent_id""") app.logger.info("RefreshTaxoStat updated %d 2st level taxo" % (n)) for i in range(50): n = ExecSQL("""UPDATE taxonomy SET nbrobjcum=q.nbr from (select parent_id,sum(nbrobjcum+coalesce(nbrobj,0)) nbr from taxonomy where nbrobjcum is NOT NULL group by parent_id ) q where taxonomy.id=q.parent_id and coalesce(taxonomy.nbrobjcum,0)<>q.nbr""") print("RefreshTaxoStat updated %d level %d taxo" % (n, i)) if n == 0: break
def DoTaskClean(TaskID): task = LoadTask(TaskID) ProjectID = getattr(task.param, 'ProjectId', None) WorkingDir = task.GetWorkingDir() Msg = "Erasing Task %d <br>" % TaskID try: ExecSQL("DROP SCHEMA IF EXISTS task%06d CASCADE" % TaskID) if os.path.exists(WorkingDir): shutil.rmtree(WorkingDir) Msg += "Temp Folder Erased (%s)<br>" % WorkingDir db.session.delete(task.task) db.session.commit() Msg += "DB Record Erased<br>" except: flash("Error While erasing " + str(sys.exc_info()), 'error') if ProjectID: Msg += "<a href='/prj/%s'>Back to project</a><br>" % ProjectID CustomReturnURL = getattr(task.param, 'CustomReturnURL', None) CustomReturnLabel = getattr(task.param, 'CustomReturnLabel', None) if CustomReturnLabel and CustomReturnURL: Msg += "<a href='{0}'>{1}</a><br>".format(CustomReturnURL, CustomReturnLabel) return Msg
def ComputeOldestSampleDateOnProject(): ExecSQL( "update part_projects pp set oldestsampledate=(select min(sampledate) from part_samples ps where ps.pprojid=pp.pprojid)" )
def PrjEditAnnot(PrjId): Prj = database.Projects.query.filter_by(projid=PrjId).first() g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, Prj.title) if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") txt = "<h3>Project Edit / Erase annotation massively </h3>" ################ 1er Ecran if not gvg('NewAuthor') or not gvg('OldAuthor'): LstUser = GetAssoc2Col( "select id,name from users order by lower(name)") LstUserOld = OrderedDict({'anyuser': "******"}) for k, v in LstUser.items(): LstUserOld[k] = v LstUserNew = OrderedDict({ 'lastannot': "Previous Annotation available, or prediction, or Nothing" }) for k, v in LstUser.items(): LstUserNew[k] = v LBOld = MakeHTMLSelect("OldAuthor", LstUserOld, AddEmptyLineFirst=True) LBNew = MakeHTMLSelect("NewAuthor", LstUserNew, AddEmptyLineFirst=True) txt += """<form method=get> <table> <tr><td>Replace the identification done by : </td><td>{0}</td></tr> <tr><td>By the identification done by : </td><td>{1}</td></tr> <tr><td>Since the (optionnal) :</td><td> <input type="text" style="width: 80px" id="filt_date" name="filt_date" autocomplete="off"> at <input type="text" style="width: 25px" id="filt_hour" name="filt_hour" autocomplete="off"> h <input type="text" style="width: 25px" id="filt_min" name="filt_min" autocomplete="off"> m </td></tr> </table> <br> <input type=submit class='btn btn-primary' value="Compute an estimation of the impact"><br> On the next screen you will be able to apply the change only on some categories <form> <br><br> <div class='panel panel-default' style="width:1000px;margin-left:10px;"> This correction tool permits to erase the validation jobs for selected categories, selected Annotators and period of time and replace it by the one of a selected Annotator<br> EXAMPLES of possibilities :<br> <ul> <li>Replace validation done by Mr X for all Copepoda by the validation done by Mrs. Y whos is well known specialist of this group <li>Replace validation done by Mr W before 2015 November, 15th (which is the date of his taxonomy training course) by prediction or validation by anyone else </ul> </div> <script> $(document).ready(function() {{ $( "#filt_fromdate,#filt_date" ).datepicker({{ showButtonPanel: true,changeMonth: true,changeYear: true,dateFormat:"yy-mm-dd", }}); }}); </script> """.format(LBOld, LBNew) return PrintInCharte(txt) sqlclause = { "projid": Prj.projid, 'retrictsq': "", 'retrictq': "", 'jointype': "" } if gvg('OldAuthor') == "anyuser": OldAuthor = None txt += "Replace all classification<br>" else: OldAuthor = database.users.query.filter_by( id=int(gvg('OldAuthor'))).first() if OldAuthor is None: flash("Invalid new author", 'error') return PrintInCharte("URL Hacking ?") sqlclause['retrictsq'] += " and och.classif_who!=%s " % gvg( 'OldAuthor') sqlclause['retrictq'] += " and o.classif_who=%s " % gvg('OldAuthor') txt += "Replace classification done by <b>%s</b><br>" % OldAuthor.name if gvg('NewAuthor') == "lastannot": NewAuthor = None txt += "By the last classification of any other author<br>" sqlclause['jointype'] = 'left' else: NewAuthor = database.users.query.filter_by( id=int(gvg('NewAuthor'))).first() if NewAuthor is None: flash("Invalid new author", 'error') return PrintInCharte("URL Hacking ?") sqlclause['retrictsq'] += " and och.classif_who=%s " % gvg('NewAuthor') sqlclause['retrictq'] += " and o.classif_who!=%s " % gvg('NewAuthor') txt += "By classification done by <b>%s</b><br>" % NewAuthor.name if gvg('filt_date'): sqlclause['retrictq'] += " and o.classif_when>=to_date('" + gvg( 'filt_date') if gvg('filt_hour'): sqlclause['retrictq'] += " " + gvg('filt_hour') if gvg('filt_min'): sqlclause['retrictq'] += ":" + gvg('filt_min') sqlclause['retrictq'] += " ','YYYY-MM-DD HH24:MI')" # Protection désactivé en V1.1 # if OldAuthor is None and NewAuthor is None: # flash("This request doesn't make sense, you want replace the last classification of every one by the last classification",'error') # return PrintInCharte("Invalid Request") ################ 2nd Ecran, affichage liste des categories & estimations if not gvg('Process'): txt += """<form method=post action=?OldAuthor={0}&NewAuthor={1}&filt_date={2}&filt_hour={3}&filt_min={4}&Process=Y> """.format(gvg('OldAuthor'), gvg('NewAuthor'), gvg('filt_date'), gvg('filt_hour'), gvg('filt_min')) sql = """ select t.id,concat(t.name,' (',t2.name,')') as name, count(*) Nbr from obj_head o {jointype} join (select rank() over(PARTITION BY och.objid order by och.classif_date desc) ochrank,och.* from objectsclassifhisto och join obj_head ooch on ooch.objid=och.objid and ooch.projid={projid} where och.classif_type='M' {retrictsq}) newclassif on newclassif.objid=o.objid and newclassif.ochrank=1 join taxonomy t on o.classif_id=t.id left join taxonomy t2 on t.parent_id=t2.id where o.projid={projid} {retrictq} GROUP BY t.id,concat(t.name,' (',t2.name,')') order BY t.name """.format(**sqlclause) data = GetAll(sql, debug=False) txt += """ <input type=submit class='btn btn-warning' value="Process the replacement. WARNING : It's irreversible !!!!"><br> Bellow the estimation of each impacted categories, select categories you want replace on theses source categories list<br> Select <a name="tbltop" href="#tbltop" onclick="$('#TblTaxo input').prop( 'checked', true )">All</a> / <a href="#tbltop" onclick="$('#TblTaxo input').prop( 'checked', false );">None</a> <table class="table table-bordered table-condensed" style="width: auto" id="TblTaxo"> <tr><th >Select</th><th width="200">Name</th><th >Nbr</th></tr> """ for r in data: txt += "<tr><td><input type='checkbox' value='Y' name='taxo{0}' ></td><td>{1}</td><td class='rightfixedfont'>{2}</td></tr>".format( *r) txt += "</table>" return PrintInCharte(txt) ################ 3eme Ecran Execution Requetes if gvg('Process') == 'Y': sqlclause['taxoin'] = ",".join( (x[4:] for x in request.form if x[0:4] == "taxo")) if sqlclause['taxoin'] == "": flash( "You must select at least one categorie to do the replacement", 'error') return PrintInCharte( "<a href='#' onclick='history.back();'>Back</a>") sql = """ update obj_head as ou set classif_who=newclassif.classif_who, classif_when=coalesce(newclassif.classif_date,ou.classif_auto_when), classif_id=coalesce(newclassif.classif_id,ou.classif_auto_id), classif_qual=case when newclassif.classif_qual is not null then newclassif.classif_qual when ou.classif_auto_id is not null then 'P' else null end from obj_head o {jointype} join (select rank() over(PARTITION BY och.objid order by och.classif_date desc) ochrank,och.* from objectsclassifhisto och join obj_head ooch on ooch.objid=och.objid and ooch.projid={projid} where och.classif_type='M' {retrictsq}) newclassif on newclassif.objid=o.objid and newclassif.ochrank=1 join taxonomy t on o.classif_id=t.id where o.projid={projid} {retrictq} and o.classif_id in ({taxoin}) and o.objid=ou.objid """.format(**sqlclause) RowCount = ExecSQL(sql, debug=True) RecalcProjectTaxoStat(Prj.projid) txt += "<div class='alert alert-success' role='alert'>Annotation replacement Done successfully. Updated %d Row</div>" % RowCount txt += "<br><a class='btn btn-lg btn-primary' href='/prj/%s'>Back to target project</a>" % Prj.projid return PrintInCharte(txt)
def PrjResetToPredicted(PrjId): request.form # Force la lecture des données POST sinon il y a une erreur 504 Prj = database.Projects.query.filter_by(projid=PrjId).first() if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, XSSEscape(Prj.title)) txt = "<h3>Reset status to predicted</h3>" sqlparam = {} filtres = {} for k in sharedfilter.FilterList: if gvg(k): filtres[k] = gvg(k, "") process = gvp('process') if process == 'Y': sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who) select objid,classif_when,'M', classif_id,classif_qual,classif_who from objects o where projid=""" + str( Prj.projid ) + """ and classif_when is not null and classif_qual in ('V','D') and not exists(select 1 from objectsclassifhisto och where och.objid=o.objid and och.classif_date=o.classif_when) """ sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ExecSQL(sqlhisto, sqlparam) sqlhisto = """update obj_head set classif_qual='P' where projid={0} and objid in (select objid from objects o where projid={0} and classif_qual in ('V','D') {1}) """.format( Prj.projid, sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id))) ExecSQL(sqlhisto, sqlparam) # flash('Data updated', 'success') txt += "<a href='/prj/%s' class='btn btn-primary'>Back to project</a> " % ( Prj.projid) appli.project.main.RecalcProjectTaxoStat(Prj.projid) appli.project.main.UpdateProjectStat(Prj.projid) return PrintInCharte(txt) sql = "select objid FROM objects o where projid=" + str(Prj.projid) if len(filtres): sql += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ObjList = GetAll(sql, sqlparam) ObjListTxt = "\n".join((str(r['objid']) for r in ObjList)) txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format( len(ObjList)) else: txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>" Lst = GetFieldList(Prj) # txt+="%s"%(Lst,) return PrintInCharte( render_template("project/prjresettopredicted.html", Lst=Lst, header=txt))
def DoFullSync(): txt = "" try: UpdatableCols = [ 'parent_id', 'name', 'taxotype', 'taxostatus', 'id_source', 'id_instance', 'rename_to', 'display_name', 'source_desc', 'source_url', 'creation_datetime', 'creator_email' ] MaxUpdate = database.GetAll( "select coalesce(max(lastupdate_datetime),to_timestamp('2000-01-01','YYYY-MM-DD')) lastupdate from taxonomy" ) MaxUpdateDate = MaxUpdate[0]['lastupdate'] j = request_withinstanceinfo("/gettaxon/", { 'filtertype': 'since', 'startdate': MaxUpdateDate }) if 'msg' in j: return appli.ErrorFormat("Sync Error :" + j['msg']) NbrRow = len(j) NbrUpdate = NbrInsert = 0 txt += "Received {} rows<br>".format(NbrRow) if (NbrRow > 0): txt += "Taxo 0 = {}<br>".format(j[0]) for jtaxon in j: taxon = database.Taxonomy.query.filter_by( id=int(jtaxon['id'])).first() lastupdate_datetime = datetime.datetime.strptime( jtaxon['lastupdate_datetime'], '%Y-%m-%d %H:%M:%S') if taxon: if taxon.lastupdate_datetime == lastupdate_datetime: continue #already up to date NbrUpdate += 1 else: if ntcv(jtaxon['rename_to']) != '': continue # don't insert taxon that should be renamed if ntcv(jtaxon['taxostatus']) == 'D': continue # don't insert taxon that are deprecated and planned to be deleted NbrInsert += 1 taxon = database.Taxonomy() taxon.id = int(jtaxon['id']) db.session.add(taxon) for c in UpdatableCols: setattr(taxon, c, jtaxon[c]) taxon.lastupdate_datetime = lastupdate_datetime db.session.commit() # Manage rename_to sqlbase = "with taxorename as (select id,rename_to from taxonomy where rename_to is not null) " sql = sqlbase + """select distinct projid from obj_head o join taxorename tr on o.classif_id=tr.id """ ProjetsToRecalc = database.GetAll(sql) sql = sqlbase + """update obj_head o set classif_id=tr.rename_to from taxorename tr where o.classif_id=tr.id """ NbrRenamedObjects = ExecSQL(sql) sql = sqlbase + """update obj_head o set classif_auto_id=tr.rename_to from taxorename tr where o.classif_auto_id=tr.id """ ExecSQL(sql) sql = sqlbase + """update objectsclassifhisto o set classif_id=tr.rename_to from taxorename tr where o.classif_id=tr.id """ ExecSQL(sql) # on efface les taxon qui doivent être renomé car ils l'ont normalement été sql = """delete from taxonomy where rename_to is not null """ ExecSQL(sql) sql = """delete from taxonomy t where taxostatus='D' and not exists(select 1 from projects_taxo_stat where id=t.id) """ ExecSQL(sql) # il faut recalculer projects_taxo_stat et part_histocat,part_histocat_lst pour ceux qui referencaient un # taxon renomé et donc disparu if NbrRenamedObjects > 0: # cron.RefreshTaxoStat() operation trés longue (env 5 minutes en prod, il faut être plus selectif) # permet de recalculer projects_taxo_stat for Projet in ProjetsToRecalc: appli.project.main.RecalcProjectTaxoStat(Projet['projid']) #recalcul part_histocat,part_histocat_lst appli.part.prj.GlobalTaxoCompute() flash( "Received {} rows,Insertion : {} Update :{}".format( NbrRow, NbrInsert, NbrUpdate), "success") if gvp('updatestat') == 'Y': msg = DoSyncStatUpdate() flash("Taxon statistics update : " + msg, "success" if msg == 'ok' else 'error') # txt="<script>location.reload(true);</script>" # non car ça reprovoque le post de l'arrivée initiale txt = "<script>window.location=window.location;</script>" except: msg = "Error while syncing {}".format(sys.exc_info()) app.logger.error(msg) txt += appli.ErrorFormat(msg) return txt
def SPStep1(self): logging.info("Input Param = %s" % (self.param.__dict__, )) logging.info("Start Step 1") zfile = ZipFile(self.param.InData, 'r', allowZip64=True) # self.param.IntraStep=1 newschema = self.GetWorkingSchema() if getattr(self.param, 'IntraStep', 0) == 0: logging.info("Extract schema") zfile.extract('schema.sql') with open("schema.sql", "r") as schemainfile: with open("schemapatched.sql", "w") as schemaoutfile: for l in schemainfile: schemaoutfile.write( l.replace('public', newschema) + "\n") # purge le schema s'il y a eu un import échoué de la DB logging.info("Create schema " + newschema) ExecSQL("DROP SCHEMA IF EXISTS " + newschema + " CASCADE") ExecSQL("CREATE SCHEMA " + newschema) toolsdir = GetDBToolsDir() os.environ["PGPASSWORD"] = app.config['DB_PASSWORD'] cmd = os.path.join(toolsdir, "psql") cmd += " -h " + app.config['DB_HOST'] + " -U " + app.config[ 'DB_USER'] + " -p " + app.config.get( 'DB_PORT', '5432') + " --file=schemapatched.sql " + app.config[ 'DB_DATABASE'] + " >createschemaout.txt" logging.info("Import Schema : %s", cmd) os.system(cmd) Constraints = GetAll( """select tbl.relname,c.conname from pg_namespace ns join pg_constraint c on connamespace=ns.oid join pg_class tbl on c.conrelid=tbl.oid where ns.nspname='%s' and contype='f' """ % (newschema, )) logging.info("Drop foreign key") for r in Constraints: ExecSQL('alter table {0}.{1} drop CONSTRAINT {2}'.format( newschema, *r), debug=False) logging.info( "Drop Index") # for faster insert and not needed for DB Merge for t in ("is_objectssample", "is_objectslatlong", "is_objectsdepth", "is_objectsdate", "is_objectstime", "is_objectfieldsorigid", "is_objectsprojrandom", "IS_TaxonomySource"): ExecSQL('drop index IF EXISTS {0}."{1}" '.format(newschema, t), debug=False) logging.info("Restore data") for t in table_list: ColList = GetColsForTable(newschema, t) logging.info("Restore table %s " % (t, )) try: zfile.extract(t + ".copy") with open(t + ".copy", "r", encoding='latin_1') as f: self.pgcur.copy_from(f, newschema + "." + t, columns=ColList) self.pgcur.connection.commit() except: logging.error("Error while data restoration %s", str(sys.exc_info())) logging.info("Add newid columns") for t in ("process", "acquisitions", "samples", "obj_head", "images", "users", "taxonomy"): ExecSQL("alter table {0}.{1} add column newid bigint".format( newschema, t), debug=False) self.task.taskstate = "Question" self.UpdateProgress(5, "Import of temporary Database Done")
def SPStep2(self): logging.info("Start Step 2 : Effective data import") logging.info("Taxo Mapping = %s", self.param.TaxoFound) logging.info("Users Mapping = %s", self.param.UserFound) newschema = self.GetWorkingSchema() if getattr(self.param, 'IntraStep', 1) == 1: logging.info("SubStep 1 : Create Sample, process & Acquisition") self.UpdateProgress(6, "Create Sample, process & Acquisition") Ids = { "acq": { "tbl": "acquisitions", "pk": "acquisid" }, "sample": { "tbl": "samples", "pk": "sampleid" }, "process": { "tbl": "process", "pk": "processid" } } #Creation des lignes des tables for r in Ids.values(): # Creation des ID dans la colonne New ID ExecSQL( "UPDATE {0}.{1} set newid=nextval('seq_{1}') where projid={2}" .format(newschema, r["tbl"], self.param.ProjectSrcId)) TblSrc = GetColsForTable(newschema, r["tbl"]) TblDst = GetColsForTable('public', r["tbl"]) InsClause = [r["pk"], "projid"] SelClause = ['newid', str(self.param.ProjectId)] for c in TblSrc: if c != r['pk'] and c != 'projid' and c in TblDst: InsClause.append(c) SelClause.append(c) sql = "insert into public.%s (%s)" % (r["tbl"], ",".join(InsClause)) sql += " select %s from %s.%s where projid=%s" % (",".join( SelClause), newschema, r['tbl'], self.param.ProjectSrcId) N = ExecSQL(sql) logging.info("Create %s %s", N, r["tbl"]) self.param.IntraStep = 2 if self.param.IntraStep == 2: logging.info("SubStep 2 : Assign NewId to users") for k, v in self.param.UserFound.items(): logging.info("Assign NewId to user %s" % (k, )) ExecSQL( "Update {0}.users set newid={1} where lower(name)=(%s)". format(newschema, v['id']), (k, ), debug=False) logging.info("SubStep 2 : Assign NewId to Taxo") for k, v in self.param.TaxoFound.items(): logging.info("Assign Taxo %s/%s" % (k, v['name'])) ExecSQL( "Update {0}.taxonomy set newid={1} where id=(%s)".format( newschema, v['newid']), (k, ), debug=False) logging.info("SubStep 2 : Import privileges on project") ExecSQL("""Insert into projectspriv (id,projid,member,privilege) SELECT nextval('seq_projectspriv'),{1},u.newid,min(privilege) FROM {0}.projectspriv pp join {0}.users u on pp.member=u.id WHERE pp.projid={2} and u.newid is not null and u.newid not in (select member from projectspriv where projid={1}) group by u.newid """.format(newschema, self.param.ProjectId, self.param.ProjectSrcId), None, debug=False) logging.info("SubStep 2 : Create Objects") self.UpdateProgress(10, "Create Objects") ExecSQL( "UPDATE {0}.obj_head set newid=nextval('seq_objects') where projid={1}" .format(newschema, self.param.ProjectSrcId)) TblSrc = GetColsForTable(newschema, 'obj_head') TblDst = GetColsForTable('public', 'obj_head') CustomMapping = { "objid": "o.newid", "projid": str(self.param.ProjectId), "sampleid": "samples.newid", "processid": "process.newid", "acquisid": "acquisitions.newid", "classif_who": "users.newid", "classif_id": "t1.newid", "classif_auto_id": "t2.newid" } InsClause = [] SelClause = [] for c in TblSrc: if c in TblDst: InsClause.append(c) SelClause.append(CustomMapping.get(c, "o." + c)) sql = "insert into public.obj_head (%s)" % (",".join(InsClause), ) sql += """ select {0} from {1}.obj_head o left join {1}.samples on o.sampleid=samples.sampleid left join {1}.process on o.processid=process.processid left join {1}.acquisitions on o.acquisid=acquisitions.acquisid left join {1}.users on o.classif_who=users.id left join {1}.taxonomy t1 on o.classif_id=t1.id left join {1}.taxonomy t2 on o.classif_auto_id=t2.id where o.projid={2}""".format(",".join(SelClause), newschema, self.param.ProjectSrcId) N = ExecSQL(sql) logging.info("Created %s Objects Phase 1", N) # Traitement table obj_field TblSrc = GetColsForTable(newschema, 'obj_field') TblDst = GetColsForTable('public', 'obj_field') CustomMapping = {"objfid": "obj_head.newid"} InsClause = [] SelClause = [] for c in TblSrc: if c in TblDst: InsClause.append(c) SelClause.append(CustomMapping.get(c, "o." + c)) sql = "insert into public.obj_field (%s)" % (",".join(InsClause), ) sql += """ select {0} from {1}.obj_field o left join {1}.obj_head on o.objfid=obj_head.objid where obj_head.projid={2}""".format(",".join(SelClause), newschema, self.param.ProjectSrcId) N = ExecSQL(sql) logging.info("Created %s Objects Phase 2", N) self.param.IntraStep = 3 if self.param.IntraStep == 3: logging.info("SubStep 3 : Create Images") self.UpdateProgress(40, "Import Images") PrevPct = 40 NbrProcessed = 0 NbrToProcess = ExecSQL( """UPDATE {0}.images set newid=nextval('seq_images') from {0}.obj_head o where images.objid=o.objid and o.projid={1}""".format( newschema, self.param.ProjectSrcId)) TblSrc = GetColsForTable(newschema, 'images') TblDst = GetColsForTable('public', 'images') zfile = ZipFile(self.param.InData, 'r', allowZip64=True) vaultroot = Path("../../vault") cur = db.engine.raw_connection().cursor( cursor_factory=RealDictCursor) cur.execute( "select images.*,o.newid newobjid from {0}.images join {0}.obj_head o on images.objid=o.objid where o.projid={1} " .format(newschema, self.param.ProjectSrcId), ) for r in cur: Img = database.Images() Img.imgid = r['newid'] Img.objid = r['newobjid'] if r['file_name']: zipimagefile = "images/%s.img" % r['imgid'] zfile.extract(zipimagefile) SrcImg = r['file_name'] SrcImgMini = r['thumb_file_name'] VaultFolder = "%04d" % (Img.imgid // 10000) #creation du repertoire contenant les images si necessaire CreateDirConcurrentlyIfNeeded( vaultroot.joinpath(VaultFolder)) Img.file_name = "%s/%04d%s" % (VaultFolder, Img.imgid % 10000, Path(SrcImg).suffix) shutil.move(zipimagefile, vaultroot.joinpath(Img.file_name).as_posix()) if r['thumb_file_name']: zipimagefile = "images/%s.thumb" % r['imgid'] zfile.extract(zipimagefile) Img.thumb_file_name = "%s/%04d_mini%s" % ( VaultFolder, Img.imgid % 10000, Path(SrcImgMini).suffix) shutil.move( zipimagefile, vaultroot.joinpath(Img.thumb_file_name).as_posix()) for c in TblSrc: if c in TblDst: if c not in ('imgid', 'objid', 'file_name', 'thumb_file_name') and c in TblDst: setattr(Img, c, r[c]) db.session.add(Img) db.session.commit() NbrProcessed += 1 NewPct = int(40 + 59 * (NbrProcessed / NbrToProcess)) if NewPct != PrevPct: self.UpdateProgress( NewPct, "Import Images %d/%d" % (NbrProcessed, NbrToProcess)) # Recalcule les valeurs de Img0 self.UpdateProgress(99, "Remap Images") self.pgcur.execute("""update obj_head o set imgcount=(select count(*) from images where objid=o.objid) ,img0id=(select imgid from images where objid=o.objid order by imgrank asc limit 1 ) where projid=""" + str(self.param.ProjectId)) self.pgcur.connection.commit() cur.close() # self.task.taskstate="Done" # self.UpdateProgress(100,"Processing done") self.task.taskstate = "Question" self.UpdateProgress( 99, "Processing done, Answer question to import another project from the same database" )
def SPStep1(self): logging.info("Input Param = %s" % (self.param.__dict__)) logging.info("Start Step 1") # self.param.IntraStep=0 if getattr(self.param, 'IntraStep', 0) == 0: fichier = os.path.join(self.GetWorkingDir(), "uploaded.txt") logging.info("Analyzing file %s" % (fichier)) with open(fichier, encoding='latin_1') as csvfile: # lecture en mode dictionnaire basé sur la premiere ligne rdr = csv.reader( csvfile, delimiter='\t', quotechar='"', ) #lecture la la ligne des titre LType = rdr.__next__() # Lecture du contenu du fichier RowCount = 0 ExecSQL("truncate table temp_taxo") sqlinsert = "INSERT INTO temp_taxo(idparent,idtaxo,name,status,typetaxo) values(%s,%s,%s,%s,%s)" for lig in rdr: if lig[0].strip() == '': # Ligne vide continue database.ExecSQL( sqlinsert, (lig[0].strip(), lig[1].strip(), lig[2].replace( '+', ' ').replace('_', ' ').strip(), lig[3].strip(), lig[4].strip())) if RowCount > 0 and RowCount % 1000 == 0: logging.info("Inserted %s lines" % RowCount) RowCount += 1 logging.info("count=%d" % RowCount) self.param.IntraStep = 1 if self.param.IntraStep == 1: # MAJ des IDFinal dans la table temp pour tout ce qui existe. n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo or (lower(tf.name)=lower(tt.name) and tf.id_source is null)""" ) logging.info("%d Nodes already exists " % n) # insertion des nouveaux noeud racines n = ExecSQL( """INSERT INTO taxonomy (id, parent_id, name, id_source) select nextval('seq_taxonomy'),NULL,t.name,t.idtaxo from temp_taxo t where idparent='-1' and idfinal is null and status='1'""" ) logging.info("Inserted %d Root Nodes" % n) # MAJ de la table import existante n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo and tt.idfinal is null and idparent='-1'""") logging.info("Updated %d inserted Root Nodes" % n) while True: # insertion des nouveaux noeud enfants à partir des parents deja insérés # n=ExecSQL("""INSERT INTO taxonomy (id, parent_id, name, id_source) # select nextval('seq_taxonomy'),ttp.idfinal,tt.name,tt.idtaxo from temp_taxo tt join temp_taxo ttp on tt.idparent=ttp.idtaxo # where tt.idfinal is null and ttp.idfinal is not null and status='1'""") n = ExecSQL( """INSERT INTO taxonomy (id, parent_id, name, id_source) select nextval('seq_taxonomy'),ttp.id,tt.name,tt.idtaxo from temp_taxo tt join taxonomy ttp on tt.idparent=ttp.id_source where tt.idfinal is null and status='1'""") if n == 0: logging.info("No more data to import") break else: logging.info("Inserted %d Child Nodes" % n) # MAJ de la table import existante n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo and tt.idfinal is null """) logging.info("Updated %d inserted Child Nodes" % n) n = ExecSQL("""UPDATE taxonomy tf set name=tt.name from temp_taxo tt where tf.id_source=tt.idtaxo and tt.status='1' and tf.name!=tt.name""") logging.info("Updated %d Nodes names" % n) n = ExecSQL("""UPDATE taxonomy tfu set parent_id=sq.idfinal from (select tf.id, ttp.idfinal from taxonomy tf ,temp_taxo tt LEFT JOIN temp_taxo ttp on tt.idparent=ttp.idtaxo where tf.id_source=tt.idtaxo and tt.status='1' and coalesce(tf.parent_id,-1)!=coalesce(ttp.idfinal,-1) and (ttp.idfinal is not null or tt.idparent='-1' )) sq where tfu.id=sq.id""" ) logging.info("Updated %d Nodes Parents" % n) while True: n = ExecSQL("""delete from taxonomy t using temp_taxo tt where t.id=tt.idfinal and tt.status='0' and not exists (select 1 from taxonomy where parent_id=t.id ) and not exists (select 1 from objects where classif_id=t.id or classif_auto_id=t.id)""" ) if n == 0: logging.info("No more data to delete") break else: logging.info("Deleted %d Nodes" % n) Lst = GetAll("""select t.name from taxonomy t,temp_taxo tt where t.id=tt.idfinal and tt.status='0' and (exists (select 1 from taxonomy where parent_id=t.id ) or exists (select 1 from objects where classif_id=t.id or classif_auto_id=t.id))""" ) for r in Lst: logging.info("Can't Delete '%s' because it's used " % r[0]) self.task.taskstate = "Done" self.UpdateProgress(100, "Processing done")
def PrjManualClassif(PrjId): request.form # Force la lecture des données POST sinon il y a une erreur 504 Changement=[] Prj=database.Projects.query.filter_by(projid=PrjId).first() if not Prj.CheckRight(1): # Level 0 = Read, 1 = Annotate, 2 = Admin return '<span class="label label-danger">You cannot Annotate this project</span>' changes={k[8:-1]:v for k,v in request.form.items() if k[0:7]=="changes"} if len(changes)==0: return '<span class="label label-warning">No pending change to update</span>' sql="""select o.objid,o.classif_auto_id,o.classif_auto_when,o.classif_auto_score,o.classif_id,o.classif_qual,o.classif_when,o.classif_who from obj_head o where o.objid in ("""+",".join(changes.keys())+")" prev={r['objid']:r for r in GetAll(sql,debug=False)} sql="update obj_head set classif_id=%(classif_id)s,classif_qual=%(classif_qual)s,classif_who=%(classif_who)s,classif_when=now() where objid=%(objid)s " # sqli="""INSERT INTO objectsclassifhisto (objid, classif_date, classif_type, classif_id, classif_qual, classif_who) # VALUES (%(objid)s,%(classif_when)s,'M',%(classif_id)s,%(classif_qual)s,%(classif_who)s )""" # Traitement global de l'historisation afin de réduire les commandes SQL + test qu'il n'y a pas de doublons sqli="""INSERT INTO objectsclassifhisto(objid, classif_date, classif_type, classif_id, classif_qual, classif_who, classif_score) SELECT objid, classif_when, 'M' classif_type, classif_id, classif_qual, classif_who, null classif_score from obj_head oh where objid= any(%s) and classif_when is not null and not exists(select 1 from objectsclassifhisto och where oh.objid=och.objid and oh.classif_when=och.classif_date )""" try: params=[[int(x) for x in changes.keys()] ] ExecSQL(sqli, params, True) except: app.logger.warning("Unable to add historical information, non-blocking %s" % (sys.exc_info(),)) BatchParam=[] for k,v in changes.items(): ki=int(k) if v=="-1" or v=="" : # utilisé dans validate all v=prev[ki]['classif_id'] if prev[ki]['classif_qual']!=gvp('qual') or prev[ki]['classif_who']!=current_user.id or prev[ki]['classif_id']!=int(v): # il y a eu au moins un changement params={'objid':k,'classif_id':v,'classif_who':current_user.id,'classif_qual':gvp('qual')} BatchParam.append(params) # ExecSQL(sql,params,False) Changement.append({'prev_id':prev[ki]['classif_id'],'prev_qual':prev[ki]['classif_qual'] ,'id':int(v),'qual':gvp('qual')}) # params={'objid':k,'classif_id':prev[ki]['classif_id'],'classif_who':prev[ki]['classif_who'] # ,'classif_qual':prev[ki]['classif_qual'],'classif_when':prev[ki]['classif_when']} # try: # if ntcv(params['classif_when']) !="" : # si pas de date, violation PK # ExecSQL(sqli,params,True) # except: # app.logger.warning("Unable to add historical information, non-blocking %s"%(prev,)) if prev[ki]['classif_id']!=int(v): # il y a eu un changement de classif on maintient la liste des classifs MRU with app.MRUClassif_lock: tbl=app.MRUClassif.get(current_user.id,[]) for i,t in enumerate(tbl): if t["id"]==int(v): if i>0: # on met cet item au début pour gérer un MRU tbl=[t]+tbl[0:i]+tbl[i+1:] break else: # si pas trouvé dans la liste des MRU on l'ajoute au début si on trouve bien son nom dans la taxo Taxon=GetAll("""select tf.display_name as name from taxonomy tf left join taxonomy p1 on tf.parent_id=p1.id where tf.id=%(id)s """,{"id":v}) if len(Taxon)==1: Taxon=Taxon[0].get('name', "") tbl.insert(0,{"id": int(v), "pr": 0, "text": Taxon}) if len(tbl)>10: tbl=tbl[0:10] app.MRUClassif[current_user.id]=tbl if len(BatchParam)>0: upcur = db.engine.raw_connection().cursor() try: upcur.executemany(sql, BatchParam) upcur.connection.commit() except: upcur.close() app.logger.warning("Unable to save changes %s" % (sys.exc_info(),)) return '<span class="label label-danger">Unable to save changes</span>' upcur.close() app.logger.info("Changement = %s",Changement) # applique les changements dans projects_taxo_stat Empty = {'n': 0, 'V': 0, 'P': 0, 'D': 0} Changes = {} for c in Changement: if c['prev_id'] is None: c['prev_id'] = -1 if c['prev_id'] not in Changes: Changes[c['prev_id']] = Empty.copy() if c['id'] is None: c['id'] = -1 if c['id'] not in Changes: Changes[c['id']] = Empty.copy() Changes[c['prev_id']]['n'] -= 1 Changes[c['id']]['n'] += 1 if c['prev_qual'] in ('V', 'P', 'D'): Changes[c['prev_id']][c['prev_qual']] -= 1 if c['qual'] in ('V', 'P', 'D'): Changes[c['id']][c['qual']] += 1 LstIdInDB=[x[0] for x in database.GetAll("select id from projects_taxo_stat where projid=%s",[PrjId])] for k,c in Changes.items(): if k not in LstIdInDB: database.ExecSQL("insert into projects_taxo_stat(projid, id, nbr, nbr_v, nbr_d, nbr_p) values (%s,%s,0,0,0,0)",[PrjId,k]) sqlparam={'projid':PrjId,'id':k,'n':c['n'],'v':c['V'],'d':c['D'],'p':c['P']} database.ExecSQL("""update projects_taxo_stat set nbr=nbr+%(n)s, nbr_v=nbr_v+%(v)s, nbr_d=nbr_d+%(d)s, nbr_p=nbr_p+%(p)s where projid=%(projid)s and id=%(id)s""",sqlparam) return '<span class="label label-success">Database update Successfull</span>'
def doimporttext(): # test avec D:\temp\Downloads\taxoexport_20181228_101007.tsv txt="" uploadfile = request.files.get("uploadfile") if uploadfile is None: return PrintInCharte(FormatError("You must send a file")) Desctxt="{0} on {1:%Y-%m-%d %H:%M:%S} ".format(uploadfile.filename,datetime.datetime.now()) app.logger.info('Load file {} by {}'.format(uploadfile.filename,current_user)) #creation d'un fichier temporaire qui s'efface automatiquement tmpfile =tempfile.TemporaryFile(mode='w+b') # app.logger.info('TMP file is {}'.format(tmpfile.name)) uploadfile.save(tmpfile) # on copie le contenu dedants tmpfile.seek(0) # on se remet au debut fichier= TextIOWrapper(tmpfile, encoding = 'latin_1', errors = 'replace') # conversion du format binaire au format texte app.logger.info("Analyzing file %s" % (fichier)) # lecture en mode dictionnaire basé sur la premiere ligne rdr = csv.reader(fichier, delimiter='\t', quotechar='"', ) # lecture la la ligne des titre LType = rdr.__next__() # Lecture du contenu du fichier RowCount = 0 ExecSQL("truncate table temp_taxo") sqlinsert = "INSERT INTO temp_taxo(idparent,idtaxo,name,status,typetaxo) values(%s,%s,%s,%s,%s)" for lig in rdr: if lig[0].strip() == '': # Ligne vide continue database.ExecSQL(sqlinsert, ( lig[0].strip(), lig[1].strip(), lig[2].replace('+', ' ').replace('_', ' ').strip(), lig[3].strip(), lig[4].strip())) if RowCount > 0 and RowCount % 1000 == 0: app.logger.info("Inserted %s lines" % RowCount) RowCount += 1 app.logger.info("count=%d" % RowCount) # app.logger.info(str(r)) # if len(UpdatedTaxon)>0: # ComputeDisplayName(UpdatedTaxon) txt+="<p style='color: green'> %s taxon loaded </p> "%(RowCount) # MAJ des IDFinal dans la table temp pour tout ce qui existe. n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo or (lower(tf.name)=lower(tt.name) and tf.id_source is null)""") app.logger.info("%d Nodes already exists " % n) TSVal="to_timestamp('{}','YYYY-MM-DD HH24:MI:SS')".format(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')) TSUpdate="lastupdate_datetime="+TSVal # insertion des nouveaux noeud racines n = ExecSQL("""INSERT INTO taxonomy (id, parent_id, name, id_source,lastupdate_datetime,source_desc) select nextval('seq_taxonomy'),NULL,t.name,t.idtaxo,{} ,%s from temp_taxo t where idparent='-1' and idfinal is null and status='1'""".format(TSVal),["Created by "+Desctxt]) app.logger.info("Inserted %d Root Nodes" % n) # MAJ de la table import existante n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo and tt.idfinal is null and idparent='-1'""") app.logger.info("Updated %d inserted Root Nodes" % n) while True: # insertion des nouveaux noeud enfants à partir des parents deja insérés # n=ExecSQL("""INSERT INTO taxonomy (id, parent_id, name, id_source) # select nextval('seq_taxonomy'),ttp.idfinal,tt.name,tt.idtaxo from temp_taxo tt join temp_taxo ttp on tt.idparent=ttp.idtaxo # where tt.idfinal is null and ttp.idfinal is not null and status='1'""") n = ExecSQL("""INSERT INTO taxonomy (id, parent_id, name, id_source,taxotype,lastupdate_datetime,source_desc) select nextval('seq_taxonomy'),ttp.id,tt.name,tt.idtaxo,case when lower(tt.typetaxo)='taxa' then 'P' else 'M' end,{},%s from temp_taxo tt join taxonomy ttp on tt.idparent=ttp.id_source where tt.idfinal is null and status='1'""".format(TSVal),["Created by "+Desctxt]) if n == 0: app.logger.info("No more data to import") break else: app.logger.info("Inserted %d Child Nodes" % n) # MAJ de la table import existante n = ExecSQL("""UPDATE temp_taxo tt set idfinal=tf.id from taxonomy tf where tf.id_source=tt.idtaxo and tt.idfinal is null """) app.logger.info("Updated %d inserted Child Nodes" % n) n = ExecSQL("""UPDATE taxonomy tf set name=tt.name,{},taxotype=case when lower(tt.typetaxo)='taxa' then 'P' else 'M' end ,source_desc=%s from temp_taxo tt where tf.id_source=tt.idtaxo and tt.status='1' and (tf.name!=tt.name or tf.taxotype!=case when lower(tt.typetaxo)='taxa' then 'P' else 'M' end ) """.format(TSUpdate),["Updated by "+Desctxt]) app.logger.info("Updated %d Nodes names" % n) n = ExecSQL("""UPDATE taxonomy tfu set parent_id=sq.idfinal,{},source_desc=%s from (select tf.id, ttp.idfinal from taxonomy tf ,temp_taxo tt LEFT JOIN temp_taxo ttp on tt.idparent=ttp.idtaxo where tf.id_source=tt.idtaxo and tt.status='1' and coalesce(tf.parent_id,-1)!=coalesce(ttp.idfinal,-1) and (ttp.idfinal is not null or tt.idparent='-1' )) sq where tfu.id=sq.id""".format(TSUpdate),["Updated by "+Desctxt]) app.logger.info("Updated %d Nodes Parents" % n) # while True: # n = ExecSQL("""delete from taxonomy t # using temp_taxo tt # where t.id=tt.idfinal and tt.status='0' # and not exists (select 1 from taxonomy where parent_id=t.id ) # and not exists (select 1 from objects where classif_id=t.id or classif_auto_id=t.id)""") # if n == 0: # app.logger.info("No more data to delete") # break # else: # app.logger.info("Deleted %d Nodes" % n) # Lst = GetAll("""select t.name from taxonomy t,temp_taxo tt # where t.id=tt.idfinal and tt.status='0' # and (exists (select 1 from taxonomy where parent_id=t.id ) # or exists (select 1 from objects where classif_id=t.id or classif_auto_id=t.id))""") # for r in Lst: # app.logger.info("Can't Delete '%s' because it's used " % r[0]) txt+="<br><a href='/browsetaxo/' class='btn btn-primary'><i class='fas fa-arrow-left'></i> Back to taxonomy</a>" LstId=[x['idfinal'] for x in GetAll("select idfinal from temp_taxo where idfinal is not null")] ComputeDisplayName(LstId) app.logger.info("Updated Display name" ) # if len(Errors): # txt += "<p style='color: red'> %s errors <ul> " % (len(Errors)) # txt += "\n".join("<li>%s</li>"%x for x in Errors) # txt += "</ul></p> " g.bodydivmargin="10px" return PrintInCharte(txt)
def PrjEditDataMass(PrjId): request.form # Force la lecture des données POST sinon il y a une erreur 504 Prj = database.Projects.query.filter_by(projid=PrjId).first() if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, XSSEscape(Prj.title)) txt = "<h3>Project Mass data edition </h3>" sqlparam = {} filtres = {} for k in sharedfilter.FilterList: if gvg(k): filtres[k] = gvg(k, "") field = gvp('field') if field and gvp('newvalue'): tables = { 'f': 'obj_field', 'h': 'obj_head', 's': 'samples', 'a': 'acquisitions', 'p': 'process' } tablecode = field[0] table = tables[ tablecode] # on extrait la table à partir de la premiere lettre de field field = field[ 1:] # on supprime la premiere lettre qui contenait le nom de la table sql = "update " + table + " set " + field + "=%(newvalue)s " if field == 'classif_id': sql += " ,classif_when=current_timestamp,classif_who=" + str( current_user.id) sql += " where " if tablecode == "h": sql += " objid in ( select objid from objects o " elif tablecode == "f": sql += " objfid in ( select objid from objects o " elif tablecode == "s": sql += " sampleid in ( select distinct sampleid from objects o " elif tablecode == "a": sql += " acquisid in ( select distinct acquisid from objects o " elif tablecode == "p": sql += " processid in ( select distinct processid from objects o " sql += " where projid=" + str(Prj.projid) sqlparam['newvalue'] = gvp('newvalue') if len(filtres): sql += " " + sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) sql += ")" if field == 'classif_id': sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who) select objid,classif_when,'M', classif_id,classif_qual,classif_who from objects o where projid=""" + str( Prj.projid) + " and classif_when is not null " sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ExecSQL(sqlhisto, sqlparam) ExecSQL(sql, sqlparam) flash('Data updated', 'success') if field == 'latitude' or field == 'longitude' or gvp('recompute') == 'Y': ExecSQL( """update samples s set latitude=sll.latitude,longitude=sll.longitude from (select o.sampleid,min(o.latitude) latitude,min(o.longitude) longitude from obj_head o where projid=%(projid)s and o.latitude is not null and o.longitude is not null group by o.sampleid) sll where s.sampleid=sll.sampleid and projid=%(projid)s """, {'projid': Prj.projid}) flash('sample latitude and longitude updated', 'success') sql = "select objid FROM objects o where projid=" + str(Prj.projid) if len(filtres): sql += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ObjList = GetAll(sql, sqlparam) ObjListTxt = "\n".join((str(r['objid']) for r in ObjList)) txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format( len(ObjList)) else: txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>" Lst = GetFieldList(Prj) # txt+="%s"%(Lst,) return PrintInCharte( render_template("project/prjeditdatamass.html", Lst=Lst, header=txt))
def PrjMerge(PrjId): Prj = database.Projects.query.filter_by(projid=PrjId).first() if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, XSSEscape(Prj.title)) txt = "<h3>Project Merge / Fusion </h3>" if not gvg('src'): txt += """<ul><li>You are allowed to merge projects that you are allowed to manage <li>User privileges from both projects will be added <li>This tool allow to merge two projects in a single projet (called Current project). The added project will then be automatically deleted. If object data are not consistent between both projects : <ul><li>New data fields are added to the Current project <li>The resulting project will thus contain partially documented datafields. </ul><li>Note : Next screen will indicate compatibility issues (if exists) and allow you to Confirm the merging operation. </ul> """ sql = "select p.projid,title,status,coalesce(objcount,0) objcount,coalesce(pctvalidated,0) pctvalidated,coalesce(pctclassified,0) pctclassified from projects p" if not current_user.has_role(database.AdministratorLabel): sql += " Join projectspriv pp on p.projid = pp.projid and pp.member=%d" % ( current_user.id, ) sql += " where p.projid!=%d order by title" % Prj.projid res = GetAll(sql, doXSSEscape=True) #,debug=True txt += """<table class='table table-bordered table-hover table-verycondensed'> <tr><th width=120>ID</td><th>Title</td><th width=100>Status</th><th width=100>Nbr Obj</th> <th width=100>% Validated</th><th width=100>% Classified</th></tr>""" for r in res: txt += """<tr><td><a class="btn btn-primary" href='/prj/merge/{activeproject}?src={projid}'>Select</a> {projid}</td> <td>{title}</td> <td>{status}</td> <td>{objcount:0.0f}</td> <td>{pctvalidated:0.2f}</td> <td>{pctclassified:0.2f}</td> </tr>""".format(activeproject=Prj.projid, **r) txt += "</table>" return PrintInCharte(txt) PrjSrc = database.Projects.query.filter_by(projid=int(gvg('src'))).first() if PrjSrc is None: flash("Source project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not PrjSrc.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot merge for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") txt += """<h4>Source Project : {0} - {1} (This project will be destroyed)</h4> """.format(PrjSrc.projid, XSSEscape(PrjSrc.title)) if not gvg('merge'): # Ici la src à été choisie et vérifiée if PrjSrc.mappingobj != Prj.mappingobj: flash("Object mapping differ With source project ", "warning") if PrjSrc.mappingsample != Prj.mappingsample: flash("Sample mapping differ With source project ", "warning") if PrjSrc.mappingacq != Prj.mappingacq: flash("Acquisition mapping differ With source project ", "warning") if PrjSrc.mappingprocess != Prj.mappingprocess: flash("Process mapping differ With source project ", "warning") txt += FormatError( """ <span class='glyphicon glyphicon-warning-sign'></span> Warning project {1} - {2}<br> Will be destroyed, its content will be transfered in the target project.<br> This operation is irreversible</p> <br><a class='btn btn-lg btn-warning' href='/prj/merge/{0}?src={1}&merge=Y'>Start Project Fusion</a> """, Prj.projid, PrjSrc.projid, XSSEscape(PrjSrc.title), DoNotEscape=True) return PrintInCharte(txt) if gvg('merge') == 'Y': ExecSQL("update acquisitions set projid={0} where projid={1}".format( Prj.projid, PrjSrc.projid)) ExecSQL("update process set projid={0} where projid={1}".format( Prj.projid, PrjSrc.projid)) ExecSQL("update samples set projid={0} where projid={1}".format( Prj.projid, PrjSrc.projid)) ExecSQL("update obj_head set projid={0} where projid={1}".format( Prj.projid, PrjSrc.projid)) ExecSQL("update part_projects set projid={0} where projid={1}".format( Prj.projid, PrjSrc.projid)) # garde le privilege le plus elevé des 2 projets ExecSQL("""UPDATE projectspriv ppdst set privilege=case when 'Manage' in (ppsrc.privilege,ppdst.privilege) then 'Manage' when 'Annotate' in (ppsrc.privilege,ppdst.privilege) then 'Annotate' else 'View' end from projectspriv ppsrc where ppsrc.projid={1} and ppdst.projid={0} and ppsrc.member=ppdst.member""" .format(Prj.projid, PrjSrc.projid), debug=True) # Transfere les privilege depuis le projet source ExecSQL("""update projectspriv set projid={0} where projid={1} and member not in (select member from projectspriv where projid={0})""" .format(Prj.projid, PrjSrc.projid)) # Efface ceux qui etait des 2 cotés ExecSQL("delete from projectspriv where projid={0}".format( PrjSrc.projid)) ExecSQL("delete from projects where projid={0}".format(PrjSrc.projid)) appli.project.main.RecalcProjectTaxoStat(Prj.projid) appli.project.main.UpdateProjectStat(Prj.projid) txt += "<div class='alert alert-success' role='alert'>Fusion Done successfully</div>" txt += "<br><a class='btn btn-lg btn-primary' href='/prj/%s'>Back to target project</a>" % Prj.projid return PrintInCharte(txt)
def SPStep1(self): logging.info("Input Param = %s" % (self.param.__dict__, )) logging.info("Start Step 1") self.UpdateProgress(1, "Retrieve Data from Learning Set") TInit = time.time() Prj = database.Projects.query.filter_by( projid=self.param.ProjectId).first() PrjBase = database.Projects.query.filter_by( projid=self.param.BaseProject).first() MapPrj = self.GetReverseObjMap(Prj) MapPrjBase = self.GetReverseObjMap(PrjBase) logging.info("MapPrj %s", MapPrj) logging.info("MapPrjBase %s", MapPrjBase) CritVar = self.param.CritVar.split(",") ColsPrj = [ ] # contient les colonnes communes au deux projets dans le même ordre ColsPrjBase = [] MapColTargetToBase = {} PostTaxoMapping = {} if 'posttaxomapping' in self.param.CustSettings: PostTaxoMapping = { int(el[0].strip()): int(el[1].strip()) for el in [ el.split(':') for el in self.param.CustSettings['posttaxomapping'].split(',') ] } logging.info("PostTaxoMapping = %s ", PostTaxoMapping) if self.param.learninglimit: self.param.learninglimit = int(self.param.learninglimit) # convert for c in CritVar: if c not in MapPrj: logging.info( "Variable %s not available in the classified project", c) elif c not in MapPrjBase: logging.info("Variable %s not available in the base project", c) else: ColsPrjBase.append(MapPrjBase[c]) ColsPrj.append(MapPrj[c]) MapColTargetToBase[MapPrj[c]] = MapPrjBase[c] sql = "select 1" for c in ColsPrjBase: sql += ",coalesce(percentile_cont(0.5) WITHIN GROUP (ORDER BY {0}),-9999) as {0}".format( c) sql += " from objects where projid={0} and classif_id is not null and classif_qual='V'".format( PrjBase.projid) if self.param.learninglimit: sql += """ and objid in ( select objid from ( select objid,row_number() over(PARTITION BY classif_id order by random_value) rang from obj_head where projid={0} and classif_id is not null and classif_qual='V' ) q where rang <={1} ) """.format( PrjBase.projid, self.param.learninglimit) DefVal = GetAll(sql)[0] sql = "select objid,classif_id" for c in ColsPrjBase: sql += ",coalesce({0},{1}) ".format(c, DefVal[c]) sql += """ from objects where classif_id is not null and classif_qual='V' and projid={0} and classif_id in ({1}) """.format( PrjBase.projid, self.param.Taxo, self.param.learninglimit) if self.param.learninglimit: sql += """ and objid in ( select objid from ( select objid,row_number() over(PARTITION BY classif_id order by random_value) rang from obj_head where projid={0} and classif_id is not null and classif_qual='V' ) q where rang <={1} ) """.format( PrjBase.projid, self.param.learninglimit) sql += " order by objid " DBRes = np.array(GetAll(sql)) # Ids = DBRes[:,0] # Que l'objid learn_cat = DBRes[:, 1] # Que la classif learn_var = DBRes[:, 2:] # exclu l'objid & la classif DBRes = None # libere la mémoire logging.info('DB Conversion to NP : %0.3f s', time.time() - TInit) logging.info("Variable shape %d Row, %d Col", *learn_var.shape) # Note : La multiplication des jobs n'est pas forcement plus performante, en tous cas sur un petit ensemble. if self.param.Methode == 'randomforest': Classifier = RandomForestClassifier(n_estimators=300, min_samples_leaf=2, n_jobs=1, class_weight="balanced") elif self.param.Methode == 'svm': Classifier = svm.SVC() else: raise Exception("Classifier '%s' not implemented " % self.param.Methode) if self.param.Perimeter != 'all': PerimeterWhere = " and ( classif_qual='P' or classif_qual is null) " else: PerimeterWhere = "" if self.param.TargetTaxo != "": PerimeterWhere += " and classif_id in (%s) " % ( self.param.TargetTaxo, ) if self.param.resetpredicted == "ResetToPredicted" and self.param.TargetTaxo != "": sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who) select objid,classif_when,'M', classif_id,classif_qual,classif_who from obj_head o where projid={0} and classif_when is not null and classif_qual='V' and classif_id in ({1})""".format( Prj.projid, self.param.TargetTaxo) ExecSQL(sqlhisto) sqlhisto = """update obj_head set classif_qual=null where projid={0} and classif_when is not null and classif_qual='V' and classif_id in ({1}) """.format( Prj.projid, self.param.TargetTaxo) ExecSQL(sqlhisto) # TStep = time.time() # cette solution ne convient pas, car lorsqu'on l'applique par bloc de 100 parfois il n'y a pas de valeur dans # toute la colonne et du coup la colonne est supprimé car on ne peut pas calculer la moyenne. # learn_var = Imputer().fit_transform(learn_var) #learn_var[learn_var==np.nan] = -99999 Les Nan sont des NULL dans la base traités parle coalesce # logging.info('Clean input variables : %0.3f s', time.time() - TStep) TStep = time.time() Classifier.fit(learn_var, learn_cat) logging.info('Model fit duration : %0.3f s', time.time() - TStep) NbrItem = GetAll( "select count(*) from obj_head where projid={0} {1} ".format( Prj.projid, PerimeterWhere))[0][0] if NbrItem == 0: raise Exception( "No object to classify, perhaps all object already classified or you should adjust the perimeter settings as it was probably set to 'Not Validated' " ) sql = "select objid" for c in ColsPrj: sql += ",coalesce({0},{1}) ".format(c, DefVal[MapColTargetToBase[c]]) sql += """ from objects where projid={0} {1} order by objid""".format(Prj.projid, PerimeterWhere) self.pgcur.execute(sql) upcur = db.engine.raw_connection().cursor() ProcessedRows = 0 while True: self.UpdateProgress(15 + 85 * (ProcessedRows / NbrItem), "Processed %d/%d" % (ProcessedRows, NbrItem)) TStep = time.time() # recupère les variables des objets à classifier DBRes = np.array(self.pgcur.fetchmany(100)) if len(DBRes) == 0: break ProcessedRows += len(DBRes) Tget_Ids = DBRes[:, 0] # Que l'objid Tget_var = DBRes[:, 1:] # exclu l'objid TStep2 = time.time() # Tget_var= Imputer().fit_transform(Tget_var) # voir commentaire sur learn_var # Tget_var[Tget_var==np.nan] = -99999 Result = Classifier.predict_proba(Tget_var) ResultMaxCol = np.argmax(Result, axis=1) # Typage important pour les perf postgresql SqlParam = [{ 'cat': int(Classifier.classes_[mc]), 'p': r[mc], 'id': int(i) } for i, mc, r in zip(Tget_Ids, ResultMaxCol, Result)] for i, v in enumerate(SqlParam): if v['cat'] in PostTaxoMapping: SqlParam[i]['cat'] = PostTaxoMapping[v['cat']] TStep3 = time.time() # MAJ dans la base, Si pas de classif devient predicted , Si vide ou predicted, MAJ de la classif if self.param.keeplog: upcur.executemany( """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_score) select objid,classif_auto_when,'A', classif_auto_id,classif_qual,classif_auto_score from obj_head where objid=%(id)s and classif_auto_id!=%(cat)s and classif_auto_id is not null and classif_auto_when is not null """, SqlParam) upcur.executemany( """update obj_head set classif_auto_id=%(cat)s,classif_auto_score=%(p)s,classif_auto_when=now() ,classif_qual=case when classif_qual in ('D','V') then classif_qual else 'P' END ,classif_id=case when classif_qual in ('D','V') then classif_id else %(cat)s end where objid=%(id)s""", SqlParam) upcur.connection.commit() logging.info( 'Chunk Db Extract %d/%d, Classification and Db Save : %0.3f s %0.3f+%0.3f+%0.3f', ProcessedRows, NbrItem, time.time() - TStep, TStep2 - TStep, TStep3 - TStep2, time.time() - TStep3) self.task.taskstate = "Done" self.UpdateProgress(100, "Classified %d objects" % ProcessedRows)