def CreateSUM(self): self.UpdateProgress(1, "Start Summary export") Prj = database.Projects.query.filter_by( projid=self.param.ProjectId).first() grp = "to1.display_name" if self.param.sumsubtotal == "A": grp = "a.orig_id," + grp if self.param.sumsubtotal == "S": grp = "s.orig_id,s.latitude,s.longitude," + grp sql1 = "SELECT " + grp if self.param.sumsubtotal == "S": # Il est demandé d'avoir la colonne agrégé date au milieu du groupe, donc réécriture de la requete. sql1 = "SELECT s.orig_id,s.latitude,s.longitude,max(objdate) as date,to1.display_name" sql1 += ",count(*) Nbr" sql2 = """ FROM obj_head o LEFT JOIN taxonomy to1 on o.classif_id=to1.id LEFT JOIN samples s on o.sampleid=s.sampleid LEFT JOIN acquisitions a on o.acquisid=a.acquisid """ sql3 = " where o.projid=%(projid)s " params = {'projid': int(self.param.ProjectId)} if self.param.samplelist != "": sql3 += " and s.orig_id= any(%(samplelist)s) " params['samplelist'] = self.param.samplelist.split(",") sql3 += sharedfilter.GetSQLFilter(self.param.filtres, params, self.task.owner_id) sql3 += " group by " + grp sql3 += " order by " + grp sql = sql1 + " " + sql2 + " " + sql3 logging.info("Execute SQL : %s" % (sql, )) logging.info("Params : %s" % (params, )) self.pgcur.execute(sql, params) self.param.OutFile = "export_summary_{0:d}_{1:s}.tsv".format( Prj.projid, datetime.datetime.now().strftime("%Y%m%d_%H%M")) fichier = os.path.join(self.GetWorkingDir(), self.param.OutFile) logging.info("Creating file %s" % (fichier, )) with open(fichier, 'w', encoding='latin_1') as csvfile: # lecture en mode dictionnaire basé sur la premiere ligne wtr = csv.writer(csvfile, delimiter='\t', quotechar='"', lineterminator='\n') colnames = [desc[0] for desc in self.pgcur.description] wtr.writerow(colnames) for r in self.pgcur: wtr.writerow(r) logging.info("Extracted %d rows", self.pgcur.rowcount)
def CreateIMG(self, SplitImageBy): # tsvfile=self.param.OutFile self.UpdateProgress(1, "Start Image export") Prj = database.Projects.query.filter_by( projid=self.param.ProjectId).first() # self.param.OutFile= "exportimg_{0:d}_{1:s}.zip".format(Prj.projid, # datetime.datetime.now().strftime("%Y%m%d_%H%M")) # fichier=os.path.join(self.GetWorkingDir(),self.param.OutFile) logging.info("Opening for appending file %s" % (self.param.OutFile, )) # zfile=zipfile.ZipFile(fichier, 'w',allowZip64 = True,compression= zipfile.ZIP_DEFLATED) # zfile.write(tsvfile) zfile = zipfile.ZipFile(self.param.OutFile, 'a', allowZip64=True, compression=zipfile.ZIP_DEFLATED) sql = """SELECT i.objid,i.file_name,i.orig_file_name,t.name,replace(t.display_name,'<','_') taxo_parent_child,imgrank ,s.orig_id sample_orig_id From objects o left join samples s on o.sampleid=s.sampleid join images i on o.objid=i.objid left join taxonomy t on o.classif_id=t.id LEFT JOIN taxonomy to1p on t.parent_id=to1p.id where o.projid=%(projid)s """ params = {'projid': int(self.param.ProjectId)} if self.param.samplelist != "": sql += " and s.orig_id= any(%(samplelist)s) " params['samplelist'] = self.param.samplelist.split(",") sql += sharedfilter.GetSQLFilter(self.param.filtres, params, self.task.owner_id) logging.info("Execute SQL : %s" % (sql, )) logging.info("Params : %s" % (params, )) self.pgcur.execute(sql, params) vaultroot = Path("../../vault") for r in self.pgcur: # r0=objid, r2=orig_file_name,r4 parent_taxo,r5=imgrank,r6=samplename if SplitImageBy == 'taxo': zfile.write(vaultroot.joinpath(r[1]).as_posix(), arcname=GetDOIImgFileName(r['objid'], r['imgrank'], r['taxo_parent_child'], r['file_name'])) else: zfile.write(vaultroot.joinpath(r[1]).as_posix(), arcname="{0}/{1}".format(r['sample_orig_id'], r['orig_file_name']))
def SPStep1(self): logging.info("Input Param = %s"%(self.param.__dict__,)) logging.info("Start Step 1") TInit = time.time() Prj = database.Projects.query.filter_by(projid=self.param.ProjectId).first() MapPrj = self.GetReverseObjMap(Prj) # Dict NomVariable=>N° colonne ex Area:n42 CommonKeys = set(MapPrj.keys()) # PostTaxoMapping décodé sous la forme source:target PostTaxoMapping = {int(el[0].strip()): int(el[1].strip()) for el in [el.split(':') for el in self.param.PostTaxoMapping.split(',') if el != '']} logging.info("PostTaxoMapping = %s ", PostTaxoMapping) if self.param.usemodel_foldername!='': # Utilisation d'un modèle existant ModelFolderName = self.param.usemodel_foldername ModelFolder = Path("../../RF_models") / ModelFolderName if not ModelFolder.is_dir(): raise Exception("Invalid model directory RF_models/{} ".format(ModelFolderName)) Meta=json.load((ModelFolder / "meta.json").open("r")) if Meta.get('scn_model','')!="": self.param.usescn = 'Y' CNNCols = "" if self.param.usescn=='Y': self.ComputeSCNFeatures(Prj) CNNCols="".join([",cnn%02d"%(i+1) for i in range(50)]) if self.param.usemodel_foldername!='': # Utilisation d'un modèle existant self.UpdateProgress(1, "Load model from file") Classifier=joblib.load( ModelFolder / 'random_forest.jbl') zooprocess_fields=Meta.get('zooprocess_fields', Meta.get('zooprocess_fields:')) CommonKeys = zooprocess_fields # on utilise un array et pas un set car il faut imperativement respecter l'ordre des colonnes du modèle zooprocess_medians=Meta.get('zooprocess_medians') DefVal={} # valeurs par défaut pour les données manquantes, clé colonne dans le projet source ex : n61:1.234 for c,m in zip(zooprocess_fields,zooprocess_medians): if c not in MapPrj: raise Exception("Column {} present in model but missing in project".format(c)) DefVal[MapPrj[c]]=m # CommonKeys = CommonKeys.intersection(set(zooprocess_fields)) else: # Calcul du modèlé à partir de projets sources self.UpdateProgress(1, "Retrieve Data from Learning Set") PrjListInClause=database.CSVIntStringToInClause(self.param.BaseProject) LstPrjSrc=GetAll("select projid,mappingobj from projects where projid in({0})".format(PrjListInClause)) MapPrjBase={} for PrjBase in LstPrjSrc: #gènere la reverse mapping MapPrjBase[PrjBase['projid']] = self.GetReverseObjMap(PrjBase) # et cherche l'intersection des attributs communs CommonKeys = CommonKeys.intersection(set(MapPrjBase[PrjBase['projid']].keys())) if self.param.learninglimit: self.param.learninglimit=int(self.param.learninglimit) # convert logging.info("MapPrj %s",MapPrj) logging.info("MapPrjBase %s",MapPrjBase) CritVar = self.param.CritVar.split(",") # ne garde que les colonnes communes qui sont aussi selectionnées. CommonKeys = CommonKeys.intersection(set(CritVar)) # Calcule les mediane sql ="" for BasePrj in LstPrjSrc: bprojid=BasePrj['projid'] if sql !="": sql+=" union all " sql+="select 1" for c in CommonKeys: sql+=",coalesce(percentile_cont(0.5) WITHIN GROUP (ORDER BY {0}),-9999) as {1}".format(MapPrjBase[bprojid][c],MapPrj[c]) sql+=" from objects " sql += " where projid={0} and classif_id is not null and classif_qual='V'".format(bprojid) if self.param.learninglimit: LimitHead = """ with objlist as ( select objid from ( select objid,row_number() over(PARTITION BY classif_id order by random_value) rang from obj_head where projid in ({0}) and classif_id is not null and classif_qual='V' ) q where rang <={1} ) """.format(PrjListInClause, self.param.learninglimit) LimitFoot = """ and objid in ( select objid from objlist ) """ sql= LimitHead + sql + LimitFoot else : LimitHead=LimitFoot="" DefVal=GetAll(sql)[0] # Extrait les données du learning set sql = "" for BasePrj in LstPrjSrc: bprojid = BasePrj['projid'] if sql != "": sql += " \nunion all " sql="select classif_id" for c in CommonKeys: sql+=",coalesce(case when {0} not in ('Infinity','-Infinity','NaN') then {0} end,{1}) as {2}".format(MapPrjBase[bprojid][c],DefVal[MapPrj[c]],MapPrj[c]) sql+=CNNCols+" from objects " if self.param.usescn == 'Y': sql += " join obj_cnn_features on obj_cnn_features.objcnnid=objects.objid " sql+=""" where classif_id is not null and classif_qual='V' and projid in ({0}) and classif_id in ({1}) """.format(PrjListInClause,self.param.Taxo) if self.param.learninglimit: sql = LimitHead + sql + LimitFoot # Convertie le LS en tableau NumPy DBRes=np.array(GetAll(sql)) LSSize=DBRes.shape[0] learn_cat = DBRes[:,0] # Que la classif learn_var = DBRes[:,1:] # exclu l'objid & la classif DBRes=None # libere la mémoire logging.info('DB Conversion to NP : %0.3f s', time.time() - TInit) logging.info("Variable shape %d Row, %d Col",*learn_var.shape) # Note : La multiplication des jobs n'est pas forcement plus performante, en tous cas sur un petit ensemble. Classifier = RandomForestClassifier(n_estimators=300, min_samples_leaf=2, n_jobs=1, class_weight="balanced") # TStep = time.time() # cette solution ne convient pas, car lorsqu'on l'applique par bloc de 100 parfois il n'y a pas de valeur dans # toute la colonne et du coup la colonne est supprimé car on ne peut pas calculer la moyenne. # learn_var = Imputer().fit_transform(learn_var) #learn_var[learn_var==np.nan] = -99999 Les Nan sont des NULL dans la base traités parle coalesce # logging.info('Clean input variables : %0.3f s', time.time() - TStep) TStep = time.time() Classifier.fit(learn_var, learn_cat) logging.info('Model fit duration : %0.3f s', time.time() - TStep) if self.param.savemodel_foldername!="": # Il faut sauver le modèle ModelFolderName=re.sub('[^\w-]', '_', self.param.savemodel_foldername.strip()) ModelFolder=Path("../../RF_models") / ModelFolderName if not ModelFolder.is_dir(): ModelFolder.mkdir() joblib.dump(Classifier,ModelFolder/'random_forest.jbl') Meta={"zooprocess_fields":[],"zooprocess_medians":[],"name":self.param.savemodel_title , "comments": self.param.savemodel_comments,'date':datetime.datetime.now().isoformat() ,"scn_model":"","type": "RandomForest-ZooProcess","n_objects": LSSize} for c in CommonKeys: Meta["zooprocess_fields"].append(c) Meta["zooprocess_medians"].append(DefVal[MapPrj[c]]) Meta['categories'] = {r[0]: r[1] for r in database.GetTaxoNameFromIdList([int(x) for x in Classifier.classes_])} Meta['scn_model'] ="" if self.param.usescn == 'Y': Meta['scn_model']=Prj.cnn_network_id json.dump(Meta,(ModelFolder/"meta.json").open("w"),indent="\t") # ------ Fin de la partie apprentissage ou chargement du modèle if self.param.Perimeter!='all': PerimeterWhere=" and ( classif_qual='P' or classif_qual is null) " else: PerimeterWhere="" sqlparam={} PerimeterWhere +=sharedfilter.GetSQLFilter(self.param.filtres, sqlparam,-99999) NbrItem=GetAll("select count(*) from objects o where projid={0} {1} ".format(Prj.projid,PerimeterWhere),sqlparam)[0][0] if NbrItem==0: raise Exception ("No object to classify, perhaps all object already classified or you should adjust the perimeter settings as it was probably set to 'Not Validated' ") sql="select objid" for c in CommonKeys: sql += ",coalesce(case when {0} not in ('Infinity','-Infinity','NaN') then {0} end,{1}) as {0}".format(MapPrj[c], DefVal[MapPrj[c]]) sql += CNNCols + " from objects o " if self.param.usescn == 'Y': sql += " join obj_cnn_features on obj_cnn_features.objcnnid=o.objid " sql+=""" where projid={0} {1} order by objid""".format(Prj.projid,PerimeterWhere) self.pgcur.execute(sql,sqlparam) # logging.info("SQL=%s",sql) upcur=db.engine.raw_connection().cursor() ProcessedRows=0 while True: self.UpdateProgress(15+85*(ProcessedRows/NbrItem),"Processed %d/%d"%(ProcessedRows,NbrItem)) TStep = time.time() # recupère les variables des objets à classifier DBRes=np.array(self.pgcur.fetchmany(100)) if len(DBRes)==0: break ProcessedRows+=len(DBRes) Tget_Ids = DBRes[:,0] # Que l'objid Tget_var = DBRes[:,1:] # exclu l'objid TStep2 = time.time() # Tget_var= Imputer().fit_transform(Tget_var) # voir commentaire sur learn_var # Tget_var[Tget_var==np.nan] = -99999 Result=Classifier.predict_proba(Tget_var) ResultMaxCol=np.argmax(Result,axis=1) # Typage important pour les perf postgresql SqlParam=[{'cat':int(Classifier.classes_[mc]),'p':r[mc],'id':int(i)} for i,mc,r in zip(Tget_Ids,ResultMaxCol,Result)] for i, v in enumerate(SqlParam): if v['cat'] in PostTaxoMapping: SqlParam[i]['cat']=PostTaxoMapping[v['cat']] TStep3 = time.time() # MAJ dans la base, Si pas de classif devient predicted , Si vide ou predicted, MAJ de la classif if self.param.keeplog: upcur.executemany("""insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_score) select objid,classif_auto_when,'A', classif_auto_id,classif_qual,classif_auto_score from obj_head where objid=%(id)s and classif_auto_id!=%(cat)s and classif_auto_id is not null and classif_auto_when is not null """,SqlParam) upcur.executemany("""update obj_head set classif_auto_id=%(cat)s,classif_auto_score=%(p)s,classif_auto_when=now() ,classif_qual=case when classif_qual in ('D','V') then classif_qual else 'P' END ,classif_id=case when classif_qual in ('D','V') then classif_id else %(cat)s end where objid=%(id)s""",SqlParam) upcur.connection.commit() logging.info('Chunk Db Extract %d/%d, Classification and Db Save : %0.3f s %0.3f+%0.3f+%0.3f' , ProcessedRows ,NbrItem , time.time() - TStep,TStep2 - TStep,TStep3 - TStep2,time.time() - TStep3) RecalcProjectTaxoStat(Prj.projid) UpdateProjectStat(Prj.projid) self.task.taskstate="Done" self.UpdateProgress(100,"Classified %d objects"%ProcessedRows)
def PrjEditDataMass(PrjId): request.form # Force la lecture des données POST sinon il y a une erreur 504 Prj = database.Projects.query.filter_by(projid=PrjId).first() if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, XSSEscape(Prj.title)) txt = "<h3>Project Mass data edition </h3>" sqlparam = {} filtres = {} for k in sharedfilter.FilterList: if gvg(k): filtres[k] = gvg(k, "") field = gvp('field') if field and gvp('newvalue'): tables = { 'f': 'obj_field', 'h': 'obj_head', 's': 'samples', 'a': 'acquisitions', 'p': 'process' } tablecode = field[0] table = tables[ tablecode] # on extrait la table à partir de la premiere lettre de field field = field[ 1:] # on supprime la premiere lettre qui contenait le nom de la table sql = "update " + table + " set " + field + "=%(newvalue)s " if field == 'classif_id': sql += " ,classif_when=current_timestamp,classif_who=" + str( current_user.id) sql += " where " if tablecode == "h": sql += " objid in ( select objid from objects o " elif tablecode == "f": sql += " objfid in ( select objid from objects o " elif tablecode == "s": sql += " sampleid in ( select distinct sampleid from objects o " elif tablecode == "a": sql += " acquisid in ( select distinct acquisid from objects o " elif tablecode == "p": sql += " processid in ( select distinct processid from objects o " sql += " where projid=" + str(Prj.projid) sqlparam['newvalue'] = gvp('newvalue') if len(filtres): sql += " " + sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) sql += ")" if field == 'classif_id': sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who) select objid,classif_when,'M', classif_id,classif_qual,classif_who from objects o where projid=""" + str( Prj.projid) + " and classif_when is not null " sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ExecSQL(sqlhisto, sqlparam) ExecSQL(sql, sqlparam) flash('Data updated', 'success') if field == 'latitude' or field == 'longitude' or gvp('recompute') == 'Y': ExecSQL( """update samples s set latitude=sll.latitude,longitude=sll.longitude from (select o.sampleid,min(o.latitude) latitude,min(o.longitude) longitude from obj_head o where projid=%(projid)s and o.latitude is not null and o.longitude is not null group by o.sampleid) sll where s.sampleid=sll.sampleid and projid=%(projid)s """, {'projid': Prj.projid}) flash('sample latitude and longitude updated', 'success') sql = "select objid FROM objects o where projid=" + str(Prj.projid) if len(filtres): sql += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ObjList = GetAll(sql, sqlparam) ObjListTxt = "\n".join((str(r['objid']) for r in ObjList)) txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format( len(ObjList)) else: txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>" Lst = GetFieldList(Prj) # txt+="%s"%(Lst,) return PrintInCharte( render_template("project/prjeditdatamass.html", Lst=Lst, header=txt))
def PrjResetToPredicted(PrjId): request.form # Force la lecture des données POST sinon il y a une erreur 504 Prj = database.Projects.query.filter_by(projid=PrjId).first() if Prj is None: flash("Project doesn't exists", 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") if not Prj.CheckRight(2): # Level 0 = Read, 1 = Annotate, 2 = Admin flash('You cannot edit settings for this project', 'error') return PrintInCharte("<a href=/prj/>Select another project</a>") g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format( Prj.projid, XSSEscape(Prj.title)) txt = "<h3>Reset status to predicted</h3>" sqlparam = {} filtres = {} for k in sharedfilter.FilterList: if gvg(k): filtres[k] = gvg(k, "") process = gvp('process') if process == 'Y': sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who) select objid,classif_when,'M', classif_id,classif_qual,classif_who from objects o where projid=""" + str( Prj.projid ) + """ and classif_when is not null and classif_qual in ('V','D') and not exists(select 1 from objectsclassifhisto och where och.objid=o.objid and och.classif_date=o.classif_when) """ sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ExecSQL(sqlhisto, sqlparam) sqlhisto = """update obj_head set classif_qual='P' where projid={0} and objid in (select objid from objects o where projid={0} and classif_qual in ('V','D') {1}) """.format( Prj.projid, sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id))) ExecSQL(sqlhisto, sqlparam) # flash('Data updated', 'success') txt += "<a href='/prj/%s' class='btn btn-primary'>Back to project</a> " % ( Prj.projid) appli.project.main.RecalcProjectTaxoStat(Prj.projid) appli.project.main.UpdateProjectStat(Prj.projid) return PrintInCharte(txt) sql = "select objid FROM objects o where projid=" + str(Prj.projid) if len(filtres): sql += sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)) ObjList = GetAll(sql, sqlparam) ObjListTxt = "\n".join((str(r['objid']) for r in ObjList)) txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format( len(ObjList)) else: txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>" Lst = GetFieldList(Prj) # txt+="%s"%(Lst,) return PrintInCharte( render_template("project/prjresettopredicted.html", Lst=Lst, header=txt))
def CreateTSV(self,ExportMode): self.UpdateProgress(1,"Start TSV export") Prj=database.Projects.query.filter_by(projid=self.param.ProjectId).first() ImageExport='' if ExportMode in ('BAK', 'DOI'): self.param.sampledata=self.param.objectdata=self.param.processdata=self.param.acqdata='1' self.param.commentsdata=self.param.histodata=self.param.internalids='' if ExportMode=='BAK': ImageExport =self.param.exportimagesbak if ExportMode=='DOI': ImageExport =self.param.exportimagesdoi sql1="""SELECT o.orig_id as object_id,o.latitude as object_lat,o.longitude as object_lon ,to_char(objdate,'YYYYMMDD') as object_date ,to_char(objtime,'HH24MISS') as object_time ,object_link,depth_min as object_depth_min,depth_max as object_depth_max ,case o.classif_qual when 'V' then 'validated' when 'P' then 'predicted' when 'D' then 'dubious' ELSE o.classif_qual end object_annotation_status ,uo1.name object_annotation_person_name,uo1.email object_annotation_person_email ,to_char(o.classif_when,'YYYYMMDD') object_annotation_date ,to_char(o.classif_when,'HH24MISS') object_annotation_time ,to1.display_name as object_annotation_category """ if ExportMode=='BAK': sql1 += ",to1.id as object_annotation_category_id" if ExportMode!='BAK': sql1 += """ ,(WITH RECURSIVE rq(id,name,parent_id) as ( select id,name,parent_id,1 rang FROM taxonomy where id =o.classif_id union SELECT t.id,t.name,t.parent_id, rang+1 rang FROM rq JOIN taxonomy t ON t.id = rq.parent_id) select string_agg(name,'>') from (select name from rq order by rang desc)q) object_annotation_hierarchy """ sql2=""" FROM objects o LEFT JOIN taxonomy to1 on o.classif_id=to1.id LEFT JOIN taxonomy to1p on to1.parent_id=to1p.id LEFT JOIN users uo1 on o.classif_who=uo1.id LEFT JOIN taxonomy to2 on o.classif_auto_id=to2.id LEFT JOIN samples s on o.sampleid=s.sampleid """ sql3=" where o.projid=%(projid)s " params={'projid':int(self.param.ProjectId)} OriginalColName={} # Nom de colonneSQL => Nom de colonne permet de traiter le cas de %area if ImageExport=='1': sql1 += "\n,img.orig_file_name as img_file_name,img.imgrank img_rank" sql2 += "\nLEFT JOIN images img on o.img0id = img.imgid " if ImageExport=='A': # Toutes les images sql1 += "\n,img.orig_file_name as img_file_name,img.imgrank img_rank" sql2 += "\nLEFT JOIN images img on o.objid = img.objid " if self.param.samplelist!="" : sql3+=" and s.orig_id= any(%(samplelist)s) " params['samplelist']=self.param.samplelist.split(",") if self.param.commentsdata=='1': sql1+="\n,complement_info" if self.param.objectdata=='1': sql1+="\n" Mapping=DecodeEqualList(Prj.mappingobj) for k,v in Mapping.items() : AliasSQL='object_%s'%re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v) OriginalColName[AliasSQL]='object_%s'%v sql1+=',o.%s as "%s" '%(k,AliasSQL) if self.param.sampledata=='1': sql1+="\n,s.orig_id sample_id,s.dataportal_descriptor as sample_dataportal_descriptor " Mapping=DecodeEqualList(Prj.mappingsample) for k,v in Mapping.items() : sql1+=',s.%s as "sample_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v)) if self.param.processdata=='1': sql1+="\n,p.orig_id process_id" Mapping=DecodeEqualList(Prj.mappingprocess) for k,v in Mapping.items() : sql1+=',p.%s as "process_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v)) sql2+=" left join process p on o.processid=p.processid " if self.param.acqdata=='1': sql1+="\n,a.orig_id acq_id,a.instrument as acq_instrument" Mapping=DecodeEqualList(Prj.mappingacq) for k,v in Mapping.items() : sql1+=',a.%s as "acq_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v)) sql2+=" left join acquisitions a on o.acquisid=a.acquisid " if ExportMode =='DOI': sql1 += "\n,o.objid" if self.param.internalids == '1': sql1 += """\n,o.objid,o.acquisid as acq_id_internal,o.processid as processid_internal,o.sampleid as sample_id_internal,o.classif_id,o.classif_who ,o.classif_auto_id,to2.name classif_auto_name,classif_auto_score,classif_auto_when ,o.random_value object_random_value,o.sunpos object_sunpos """ if self.param.sampledata == '1': sql1 += "\n,s.latitude sample_lat,s.longitude sample_long " if self.param.histodata=='1': if self.param.samplelist!="": # injection du filtre sur les echantillons dans les historique samplefilter=" join samples s on o.sampleid=s.sampleid and s.orig_id= any(%(samplelist)s) " else: samplefilter="" sql1+=" ,oh.classif_date histoclassif_date,classif_type histoclassif_type,to3.name histoclassif_name,oh.classif_qual histoclassif_qual,uo3.name histoclassif_who,classif_score histoclassif_score" sql2+=""" left join (SELECT o.objid,classif_date,classif_type,och.classif_id,och.classif_qual,och.classif_who,classif_score from objectsclassifhisto och join objects o on o.objid=och.objid and o.projid=1 {0} union all SELECT o.objid,o.classif_when classif_date,'C' classif_type,classif_id,classif_qual,classif_who,NULL from objects o {0} where o.projid=1 )oh on o.objid=oh.objid LEFT JOIN taxonomy to3 on oh.classif_id=to3.id LEFT JOIN users uo3 on oh.classif_who=uo3.id """.format(samplefilter) sql3+=sharedfilter.GetSQLFilter(self.param.filtres,params,self.task.owner_id) splitfield="object_id" # cette valeur permet d'éviter des erreurs plus loins dans r[splitfield] if ExportMode=='BAK': self.param.splitcsvby = "sample" elif ExportMode=='DOI': self.param.splitcsvby = "" if self.param.splitcsvby=="sample": sql3+=" order by s.orig_id, o.objid " splitfield = "sample_id" elif self.param.splitcsvby=="taxo": sql1 += "\n,concat(to1p.name,'_',to1.name) taxo_parent_child " sql3+=" order by taxo_parent_child, o.objid " splitfield = "taxo_parent_child" else: sql3 += " order by s.orig_id, o.objid " #tri par defaut if ImageExport!='': sql3+=",img_rank" sql=sql1+" "+sql2+" "+sql3 logging.info("Execute SQL : %s"%(sql,)) logging.info("Params : %s"%(params,)) self.pgcur.execute(sql,params) splitcsv = (self.param.splitcsvby != "") self.param.OutFile= "export_{0:d}_{1:s}.{2}".format(Prj.projid ,datetime.datetime.now().strftime("%Y%m%d_%H%M") ,"zip" ) zfile = zipfile.ZipFile(os.path.join(self.GetWorkingDir(),self.param.OutFile) , 'w', allowZip64=True, compression=zipfile.ZIP_DEFLATED) if splitcsv: csvfilename='temp.tsv' prevvalue = "NotAssigned" else: csvfilename =self.param.OutFile.replace('.zip','.tsv') prevvalue = self.param.OutFile.replace('.zip', '') fichier=os.path.join(self.GetWorkingDir(),csvfilename) csvfile=None for r in self.pgcur: if (csvfile is None and (splitcsv == False)) or ((prevvalue!=r[splitfield]) and splitcsv ): if csvfile : csvfile.close() if zfile : if ExportMode == 'BAK': zfile.write(fichier, os.path.join(str(prevvalue),"ecotaxa_" + str(prevvalue) + ".tsv")) else: zfile.write(fichier,"ecotaxa_"+str(prevvalue)+".tsv") if splitcsv: prevvalue = r[splitfield] logging.info("Creating file %s" % (fichier,)) csvfile=open(fichier,'w',encoding='latin_1') wtr = csv.writer(csvfile, delimiter='\t', quotechar='"',lineterminator='\n',quoting=csv.QUOTE_NONNUMERIC ) colnames = [desc[0] for desc in self.pgcur.description] coltypes=[desc[1] for desc in self.pgcur.description] FloatType=coltypes[2] # on lit le type de la colonne 2 alias latitude pour determiner le code du type double wtr.writerow([OriginalColName.get(c,c) for c in colnames]) if ExportMode == 'BAK': wtr.writerow(['[f]' if x==FloatType else '[t]' for x in coltypes]) # on supprime les CR des commentaires. if r.get('img_file_name','') and ExportMode == 'DOI': # les images sont dans des dossiers par taxo r['img_file_name']=GetDOIImgFileName(r['objid'],r['img_rank'],r['object_annotation_category'],r['img_file_name']) if self.param.commentsdata == '1' and r['complement_info']: r['complement_info'] = ' '.join(r['complement_info'].splitlines()) if self.param.usecomasepa == '1': # sur les decimaux on remplace . par , for i, t in zip(range(1000), coltypes): if t == FloatType and r[i] is not None: r[i] = str(r[i]).replace('.', ',') wtr.writerow(r) if csvfile: csvfile.close() if zfile: if ExportMode == 'BAK': #Split par sample et le fTSV est avec le sample. zfile.write(fichier, os.path.join(str(prevvalue) , "ecotaxa_" + str(prevvalue) + ".tsv")) else: zfile.write(fichier, "ecotaxa_"+str(prevvalue) + ".tsv") zfile.close() logging.info("Extracted %d rows", self.pgcur.rowcount)
def SPStep1(self): logging.info("Input Param = %s" % (self.param.__dict__, )) # self.param.ProjectId="2" Prj = database.Projects.query.filter_by( projid=self.param.ProjectId).first() # self.param.IntraStep=0 if getattr(self.param, 'IntraStep', 0) == 0: self.param.IntraStep = 1 db.session.expunge(Prj) NewPrj = Prj Prj = copy.copy( NewPrj) # Si on fait une copy on arrive plus à insérer. make_transient(NewPrj) NewPrj.title = self.param.subsetprojecttitle NewPrj.projid = None NewPrj.visible = False db.session.add(NewPrj) db.session.commit() pp = database.ProjectsPriv() pp.member = self.task.owner_id pp.privilege = "Manage" NewPrj.projmembers.append(pp) db.session.commit() self.param.subsetproject = NewPrj.projid self.UpdateProgress( 5, "Subset Project %d Created : %s" % (NewPrj.projid, NewPrj.title)) if self.param.IntraStep == 1: vaultroot = Path("../../vault") sqlparam = {'projid': self.param.ProjectId} sqlwhere = "" if self.param.extraprojects: sqlparam['projid'] += "," + self.param.extraprojects sqlparam['ranklimit'] = self.param.valeur if self.param.valtype == 'V': rankfunction = 'rank' elif self.param.valtype == 'P': rankfunction = '100*percent_rank' else: rankfunction = 'FunctionError' # if self.param.samplelist: # sqlwhere+=" and s.orig_id in (%s) "%(",".join(["'%s'"%x for x in self.param.samplelist.split(",")])) # sqlwhere+=" and (o.classif_qual in (%s) "%(",".join(["'%s'"%x for x in self.param.what.split(",")])) # if self.param.what.find('N')>=0: # sqlwhere+=" or o.classif_qual is null " # sqlwhere+=")" sqlwhere += sharedfilter.GetSQLFilter(self.param.filtres, sqlparam, str(self.task.owner_id)) logging.info("SQLParam=%s", sqlparam) sql = """select objid from ( SELECT """ + rankfunction + """() OVER (partition by classif_id order by random() )rang,o.objid from objects o left join samples s on o.sampleid=s.sampleid where o.projid in ( %(projid)s ) """ + sqlwhere + """ ) sr where rang<=%(ranklimit)s """ logging.info("SQL=%s %s", sql, sqlparam) # for obj in db.session.query(database.Objects).from_statement( text(sql) ).all(): LstObjects = GetAll(sql, sqlparam) logging.info("matched %s objects", len(LstObjects)) if len(LstObjects) == 0: self.task.taskstate = "Error" self.UpdateProgress( 10, "No object to include in the subset project") NbrObjects = 0 for objid in LstObjects: obj = db.session.query( database.Objects).filter_by(objid=objid[0]).first() objf = db.session.query( database.ObjectsFields).filter_by(objfid=objid[0]).first() objcnn = db.session.query( database.Objects_cnn_features).filter_by( objcnnid=objid[0]).first() NbrObjects += 1 oldobjid = obj.objid if self.param.withimg == 'Y': for img in obj.images: db.session.expunge(img) make_transient(img) self.pgcur.execute("select nextval('seq_images')") img.imgid = self.pgcur.fetchone()[0] # print("New Image id=",img.imgid) SrcImg = img.file_name SrcImgMini = img.thumb_file_name VaultFolder = "%04d" % (img.imgid // 10000) #creation du repertoire contenant les images si necessaire CreateDirConcurrentlyIfNeeded( vaultroot.joinpath(VaultFolder)) img.file_name = "%s/%04d%s" % (VaultFolder, img.imgid % 10000, Path(SrcImg).suffix) shutil.copyfile( vaultroot.joinpath(SrcImg).as_posix(), vaultroot.joinpath(img.file_name).as_posix()) if SrcImgMini is not None: img.thumb_file_name = "%s/%04d_mini%s" % ( VaultFolder, img.imgid % 10000, Path(SrcImgMini).suffix) shutil.copyfile( vaultroot.joinpath(SrcImgMini).as_posix(), vaultroot.joinpath( img.thumb_file_name).as_posix()) db.session.expunge(obj) make_transient(obj) obj.objid = None obj.img0id = None obj.projid = self.param.subsetproject obj.sampleid = self.GetSampleID(obj.sampleid) obj.processid = self.GetProcessID(obj.processid) obj.acquisid = self.GetAcquisID(obj.acquisid) db.session.add(obj) db.session.commit() dummy = objf.n01 #permet de forcer l'etat de objf sinon perd ses données sur les instruction suivantes. db.session.expunge(objf) make_transient(objf) objf.objfid = obj.objid db.session.add(objf) if objcnn: dummy = objcnn.cnn01 #permet de forcer l'etat de objcnn sinon perd ses données sur les instruction suivantes. db.session.expunge(objcnn) make_transient(objcnn) objcnn.objcnnid = obj.objid db.session.add(objcnn) db.session.commit() if NbrObjects % 20 == 0: self.UpdateProgress(5 + 95 * NbrObjects / len(LstObjects), "Subset creation in progress") # print (oldobjid,obj.objid) # Recalcule les valeurs de Img0 self.pgcur.execute("""update obj_head o set imgcount=(select count(*) from images where objid=o.objid) ,img0id=(select imgid from images where objid=o.objid order by imgrank asc limit 1 ) where projid=""" + str(self.param.subsetproject)) self.pgcur.connection.commit() import appli.project.main appli.project.main.RecalcProjectTaxoStat(self.param.subsetproject) appli.project.main.UpdateProjectStat(self.param.subsetproject) self.task.taskstate = "Done" self.UpdateProgress(100, "Subset created successfully")