예제 #1
0
    def CreateSUM(self):
        self.UpdateProgress(1, "Start Summary export")
        Prj = database.Projects.query.filter_by(
            projid=self.param.ProjectId).first()
        grp = "to1.display_name"
        if self.param.sumsubtotal == "A":
            grp = "a.orig_id," + grp
        if self.param.sumsubtotal == "S":
            grp = "s.orig_id,s.latitude,s.longitude," + grp
        sql1 = "SELECT " + grp
        if self.param.sumsubtotal == "S":  # Il est demandé d'avoir la colonne agrégé date au milieu du groupe, donc réécriture de la requete.
            sql1 = "SELECT  s.orig_id,s.latitude,s.longitude,max(objdate) as date,to1.display_name"
        sql1 += ",count(*) Nbr"
        sql2 = """ FROM obj_head o
                LEFT JOIN taxonomy to1 on o.classif_id=to1.id
                LEFT JOIN samples s on o.sampleid=s.sampleid
                LEFT JOIN acquisitions a on o.acquisid=a.acquisid """
        sql3 = " where o.projid=%(projid)s "
        params = {'projid': int(self.param.ProjectId)}
        if self.param.samplelist != "":
            sql3 += " and s.orig_id= any(%(samplelist)s) "
            params['samplelist'] = self.param.samplelist.split(",")
        sql3 += sharedfilter.GetSQLFilter(self.param.filtres, params,
                                          self.task.owner_id)
        sql3 += " group by " + grp
        sql3 += " order by " + grp
        sql = sql1 + " " + sql2 + " " + sql3
        logging.info("Execute SQL : %s" % (sql, ))
        logging.info("Params : %s" % (params, ))
        self.pgcur.execute(sql, params)

        self.param.OutFile = "export_summary_{0:d}_{1:s}.tsv".format(
            Prj.projid,
            datetime.datetime.now().strftime("%Y%m%d_%H%M"))
        fichier = os.path.join(self.GetWorkingDir(), self.param.OutFile)
        logging.info("Creating file %s" % (fichier, ))
        with open(fichier, 'w', encoding='latin_1') as csvfile:
            # lecture en mode dictionnaire basé sur la premiere ligne
            wtr = csv.writer(csvfile,
                             delimiter='\t',
                             quotechar='"',
                             lineterminator='\n')
            colnames = [desc[0] for desc in self.pgcur.description]
            wtr.writerow(colnames)
            for r in self.pgcur:
                wtr.writerow(r)
        logging.info("Extracted %d rows", self.pgcur.rowcount)
예제 #2
0
    def CreateIMG(self, SplitImageBy):
        # tsvfile=self.param.OutFile
        self.UpdateProgress(1, "Start Image export")
        Prj = database.Projects.query.filter_by(
            projid=self.param.ProjectId).first()
        # self.param.OutFile= "exportimg_{0:d}_{1:s}.zip".format(Prj.projid,
        #                                                      datetime.datetime.now().strftime("%Y%m%d_%H%M"))
        # fichier=os.path.join(self.GetWorkingDir(),self.param.OutFile)
        logging.info("Opening for appending file %s" % (self.param.OutFile, ))
        # zfile=zipfile.ZipFile(fichier, 'w',allowZip64 = True,compression= zipfile.ZIP_DEFLATED)
        # zfile.write(tsvfile)
        zfile = zipfile.ZipFile(self.param.OutFile,
                                'a',
                                allowZip64=True,
                                compression=zipfile.ZIP_DEFLATED)

        sql = """SELECT i.objid,i.file_name,i.orig_file_name,t.name,replace(t.display_name,'<','_') taxo_parent_child,imgrank
                  ,s.orig_id sample_orig_id
                 From objects o 
                 left join samples s on o.sampleid=s.sampleid
                 join images i on o.objid=i.objid
                 left join taxonomy t on o.classif_id=t.id
                 LEFT JOIN taxonomy to1p on t.parent_id=to1p.id
                   where o.projid=%(projid)s """
        params = {'projid': int(self.param.ProjectId)}
        if self.param.samplelist != "":
            sql += " and s.orig_id= any(%(samplelist)s) "
            params['samplelist'] = self.param.samplelist.split(",")

        sql += sharedfilter.GetSQLFilter(self.param.filtres, params,
                                         self.task.owner_id)

        logging.info("Execute SQL : %s" % (sql, ))
        logging.info("Params : %s" % (params, ))
        self.pgcur.execute(sql, params)
        vaultroot = Path("../../vault")
        for r in self.pgcur:  # r0=objid, r2=orig_file_name,r4 parent_taxo,r5=imgrank,r6=samplename
            if SplitImageBy == 'taxo':
                zfile.write(vaultroot.joinpath(r[1]).as_posix(),
                            arcname=GetDOIImgFileName(r['objid'], r['imgrank'],
                                                      r['taxo_parent_child'],
                                                      r['file_name']))
            else:
                zfile.write(vaultroot.joinpath(r[1]).as_posix(),
                            arcname="{0}/{1}".format(r['sample_orig_id'],
                                                     r['orig_file_name']))
예제 #3
0
    def SPStep1(self):
        logging.info("Input Param = %s"%(self.param.__dict__,))
        logging.info("Start Step 1")
        TInit = time.time()
        Prj = database.Projects.query.filter_by(projid=self.param.ProjectId).first()
        MapPrj = self.GetReverseObjMap(Prj) # Dict NomVariable=>N° colonne ex Area:n42
        CommonKeys = set(MapPrj.keys())
        # PostTaxoMapping décodé sous la forme source:target
        PostTaxoMapping = {int(el[0].strip()): int(el[1].strip()) for el in
                           [el.split(':') for el in self.param.PostTaxoMapping.split(',') if el != '']}
        logging.info("PostTaxoMapping = %s ", PostTaxoMapping)

        if self.param.usemodel_foldername!='': # Utilisation d'un modèle existant
            ModelFolderName = self.param.usemodel_foldername
            ModelFolder = Path("../../RF_models") / ModelFolderName
            if not ModelFolder.is_dir():
                raise Exception("Invalid model directory RF_models/{} ".format(ModelFolderName))
            Meta=json.load((ModelFolder / "meta.json").open("r"))
            if Meta.get('scn_model','')!="":
                self.param.usescn = 'Y'

        CNNCols = ""
        if self.param.usescn=='Y':
            self.ComputeSCNFeatures(Prj)
            CNNCols="".join([",cnn%02d"%(i+1) for i in range(50)])

        if self.param.usemodel_foldername!='': # Utilisation d'un modèle existant
            self.UpdateProgress(1, "Load model from file")
            Classifier=joblib.load( ModelFolder / 'random_forest.jbl')
            zooprocess_fields=Meta.get('zooprocess_fields', Meta.get('zooprocess_fields:'))
            CommonKeys = zooprocess_fields # on utilise un array et pas un set car il faut imperativement respecter l'ordre des colonnes du modèle
            zooprocess_medians=Meta.get('zooprocess_medians')
            DefVal={} # valeurs par défaut pour les données manquantes, clé colonne dans le projet source ex : n61:1.234
            for c,m in zip(zooprocess_fields,zooprocess_medians):
                if c not in MapPrj:
                    raise Exception("Column {} present in model but missing in project".format(c))
                DefVal[MapPrj[c]]=m
            # CommonKeys = CommonKeys.intersection(set(zooprocess_fields))
        else: # Calcul du modèlé à partir de projets sources
            self.UpdateProgress(1, "Retrieve Data from Learning Set")
            PrjListInClause=database.CSVIntStringToInClause(self.param.BaseProject)
            LstPrjSrc=GetAll("select projid,mappingobj from projects where projid in({0})".format(PrjListInClause))
            MapPrjBase={}
            for PrjBase in LstPrjSrc:
                #gènere la reverse mapping
                MapPrjBase[PrjBase['projid']] = self.GetReverseObjMap(PrjBase)
                # et cherche l'intersection des attributs communs
                CommonKeys = CommonKeys.intersection(set(MapPrjBase[PrjBase['projid']].keys()))
            if self.param.learninglimit:
                self.param.learninglimit=int(self.param.learninglimit) # convert
            logging.info("MapPrj %s",MapPrj)
            logging.info("MapPrjBase %s",MapPrjBase)
            CritVar = self.param.CritVar.split(",")
            # ne garde que les colonnes communes qui sont aussi selectionnées.
            CommonKeys = CommonKeys.intersection(set(CritVar))
            # Calcule les mediane
            sql =""
            for BasePrj in LstPrjSrc:
                bprojid=BasePrj['projid']
                if sql !="": sql+=" union all "
                sql+="select 1"
                for c in CommonKeys:
                    sql+=",coalesce(percentile_cont(0.5) WITHIN GROUP (ORDER BY {0}),-9999) as {1}".format(MapPrjBase[bprojid][c],MapPrj[c])
                sql+=" from objects "
                sql += " where projid={0} and classif_id is not null and classif_qual='V'".format(bprojid)
            if self.param.learninglimit:
                LimitHead = """ with objlist as ( select objid from (
                            select objid,row_number() over(PARTITION BY classif_id order by random_value) rang
                            from obj_head
                            where projid in ({0}) and classif_id is not null
                            and classif_qual='V' ) q where rang <={1} ) """.format(PrjListInClause,
                                                                                   self.param.learninglimit)
                LimitFoot = """ and objid in ( select objid from objlist ) """
                sql= LimitHead + sql + LimitFoot
            else : LimitHead=LimitFoot=""
            DefVal=GetAll(sql)[0]
            # Extrait les données du learning set
            sql = ""
            for BasePrj in LstPrjSrc:
                bprojid = BasePrj['projid']
                if sql != "": sql += " \nunion all "
                sql="select classif_id"
                for c in CommonKeys:
                    sql+=",coalesce(case when {0} not in ('Infinity','-Infinity','NaN') then {0} end,{1}) as {2}".format(MapPrjBase[bprojid][c],DefVal[MapPrj[c]],MapPrj[c])
                sql+=CNNCols+" from objects "
                if self.param.usescn == 'Y':
                    sql += " join obj_cnn_features on obj_cnn_features.objcnnid=objects.objid "
                sql+=""" where classif_id is not null and classif_qual='V'
                            and projid in ({0})
                            and classif_id in ({1}) """.format(PrjListInClause,self.param.Taxo)
            if self.param.learninglimit:
                sql = LimitHead + sql + LimitFoot
            # Convertie le LS en tableau NumPy
            DBRes=np.array(GetAll(sql))
            LSSize=DBRes.shape[0]
            learn_cat = DBRes[:,0] # Que la classif
            learn_var = DBRes[:,1:] # exclu l'objid & la classif
            DBRes=None # libere la mémoire
            logging.info('DB Conversion to NP : %0.3f s', time.time() - TInit)
            logging.info("Variable shape %d Row, %d Col",*learn_var.shape)
            # Note : La multiplication des jobs n'est pas forcement plus performante, en tous cas sur un petit ensemble.
            Classifier = RandomForestClassifier(n_estimators=300, min_samples_leaf=2, n_jobs=1, class_weight="balanced")

            # TStep = time.time()
            # cette solution ne convient pas, car lorsqu'on l'applique par bloc de 100 parfois il n'y a pas de valeur dans
            # toute la colonne et du coup la colonne est supprimé car on ne peut pas calculer la moyenne.
            # learn_var = Imputer().fit_transform(learn_var)
            #learn_var[learn_var==np.nan] = -99999 Les Nan sont des NULL dans la base traités parle coalesce
            # logging.info('Clean input variables :  %0.3f s', time.time() - TStep)
            TStep = time.time()
            Classifier.fit(learn_var, learn_cat)
            logging.info('Model fit duration :  %0.3f s', time.time() - TStep)
            if self.param.savemodel_foldername!="": # Il faut sauver le modèle
                ModelFolderName=re.sub('[^\w-]', '_', self.param.savemodel_foldername.strip())
                ModelFolder=Path("../../RF_models") / ModelFolderName
                if not ModelFolder.is_dir():
                    ModelFolder.mkdir()
                joblib.dump(Classifier,ModelFolder/'random_forest.jbl')
                Meta={"zooprocess_fields":[],"zooprocess_medians":[],"name":self.param.savemodel_title
                    , "comments": self.param.savemodel_comments,'date':datetime.datetime.now().isoformat()
                    ,"scn_model":"","type": "RandomForest-ZooProcess","n_objects": LSSize}
                for c in CommonKeys:
                    Meta["zooprocess_fields"].append(c)
                    Meta["zooprocess_medians"].append(DefVal[MapPrj[c]])
                Meta['categories'] = {r[0]: r[1] for r in database.GetTaxoNameFromIdList([int(x) for x in Classifier.classes_])}
                Meta['scn_model'] =""
                if self.param.usescn == 'Y':
                    Meta['scn_model']=Prj.cnn_network_id
                json.dump(Meta,(ModelFolder/"meta.json").open("w"),indent="\t")
        # ------ Fin de la partie apprentissage ou chargement du modèle

        if self.param.Perimeter!='all':
            PerimeterWhere=" and ( classif_qual='P' or classif_qual is null)  "
        else: PerimeterWhere=""
        sqlparam={}
        PerimeterWhere +=sharedfilter.GetSQLFilter(self.param.filtres, sqlparam,-99999)
        NbrItem=GetAll("select count(*) from objects o where projid={0} {1} ".format(Prj.projid,PerimeterWhere),sqlparam)[0][0]
        if NbrItem==0:
            raise Exception ("No object to classify, perhaps all object already classified or you should adjust the perimeter settings as it was probably set to 'Not Validated' ")

        sql="select objid"
        for c in CommonKeys:
            sql += ",coalesce(case when {0} not in ('Infinity','-Infinity','NaN') then {0} end,{1}) as {0}".format(MapPrj[c], DefVal[MapPrj[c]])
        sql += CNNCols + " from objects o "
        if self.param.usescn == 'Y':
            sql += " join obj_cnn_features on obj_cnn_features.objcnnid=o.objid "
        sql+=""" where projid={0} {1}
                    order by objid""".format(Prj.projid,PerimeterWhere)
        self.pgcur.execute(sql,sqlparam)
        # logging.info("SQL=%s",sql)
        upcur=db.engine.raw_connection().cursor()
        ProcessedRows=0
        while True:
            self.UpdateProgress(15+85*(ProcessedRows/NbrItem),"Processed %d/%d"%(ProcessedRows,NbrItem))
            TStep = time.time()
            # recupère les variables des objets à classifier
            DBRes=np.array(self.pgcur.fetchmany(100))
            if len(DBRes)==0:
                break
            ProcessedRows+=len(DBRes)
            Tget_Ids = DBRes[:,0] # Que l'objid
            Tget_var = DBRes[:,1:] # exclu l'objid
            TStep2 = time.time()
            # Tget_var= Imputer().fit_transform(Tget_var) # voir commentaire sur learn_var
            # Tget_var[Tget_var==np.nan] = -99999
            Result=Classifier.predict_proba(Tget_var)
            ResultMaxCol=np.argmax(Result,axis=1)
            # Typage important pour les perf postgresql
            SqlParam=[{'cat':int(Classifier.classes_[mc]),'p':r[mc],'id':int(i)} for i,mc,r in zip(Tget_Ids,ResultMaxCol,Result)]
            for i, v in enumerate(SqlParam):
                if v['cat'] in PostTaxoMapping:
                    SqlParam[i]['cat']=PostTaxoMapping[v['cat']]
            TStep3 = time.time()
            # MAJ dans la base, Si pas de classif devient predicted , Si vide ou predicted, MAJ de la classif
            if self.param.keeplog:
                upcur.executemany("""insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_score)
                                      select objid,classif_auto_when,'A', classif_auto_id,classif_qual,classif_auto_score
                                        from obj_head
                                        where objid=%(id)s and classif_auto_id!=%(cat)s and classif_auto_id is not null
                                        and classif_auto_when is not null """,SqlParam)
            upcur.executemany("""update obj_head set classif_auto_id=%(cat)s,classif_auto_score=%(p)s,classif_auto_when=now()
                                    ,classif_qual=case when classif_qual in ('D','V') then  classif_qual else 'P'  END
                                    ,classif_id=case when classif_qual in ('D','V') then classif_id  else %(cat)s end
                                    where objid=%(id)s""",SqlParam)
            upcur.connection.commit()
            logging.info('Chunk Db Extract %d/%d, Classification and Db Save :  %0.3f s %0.3f+%0.3f+%0.3f'
                         , ProcessedRows ,NbrItem
                         , time.time() - TStep,TStep2 - TStep,TStep3 - TStep2,time.time() - TStep3)

        RecalcProjectTaxoStat(Prj.projid)
        UpdateProjectStat(Prj.projid)
        self.task.taskstate="Done"
        self.UpdateProgress(100,"Classified %d objects"%ProcessedRows)
예제 #4
0
def PrjEditDataMass(PrjId):
    request.form  # Force la lecture des données POST sinon il y a une erreur 504
    Prj = database.Projects.query.filter_by(projid=PrjId).first()
    if Prj is None:
        flash("Project doesn't exists", 'error')
        return PrintInCharte("<a href=/prj/>Select another project</a>")
    if not Prj.CheckRight(2):  # Level 0 = Read, 1 = Annotate, 2 = Admin
        flash('You cannot edit settings for this project', 'error')
        return PrintInCharte("<a href=/prj/>Select another project</a>")
    g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format(
        Prj.projid, XSSEscape(Prj.title))
    txt = "<h3>Project Mass data edition </h3>"
    sqlparam = {}
    filtres = {}
    for k in sharedfilter.FilterList:
        if gvg(k):
            filtres[k] = gvg(k, "")
    field = gvp('field')
    if field and gvp('newvalue'):
        tables = {
            'f': 'obj_field',
            'h': 'obj_head',
            's': 'samples',
            'a': 'acquisitions',
            'p': 'process'
        }
        tablecode = field[0]
        table = tables[
            tablecode]  # on extrait la table à partir de la premiere lettre de field
        field = field[
            1:]  # on supprime la premiere lettre qui contenait le nom de la table
        sql = "update " + table + " set " + field + "=%(newvalue)s  "
        if field == 'classif_id':
            sql += " ,classif_when=current_timestamp,classif_who=" + str(
                current_user.id)
        sql += " where "
        if tablecode == "h": sql += " objid in ( select objid from objects o "
        elif tablecode == "f":
            sql += " objfid in ( select objid from objects o "
        elif tablecode == "s":
            sql += " sampleid in ( select distinct sampleid from objects o "
        elif tablecode == "a":
            sql += " acquisid in ( select distinct acquisid from objects o "
        elif tablecode == "p":
            sql += " processid in ( select distinct processid from objects o "
        sql += "  where projid=" + str(Prj.projid)
        sqlparam['newvalue'] = gvp('newvalue')
        if len(filtres):
            sql += " " + sharedfilter.GetSQLFilter(filtres, sqlparam,
                                                   str(current_user.id))
        sql += ")"
        if field == 'classif_id':
            sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who)
                          select objid,classif_when,'M', classif_id,classif_qual,classif_who
                            from objects o
                            where projid=""" + str(
                Prj.projid) + " and classif_when is not null "
            sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam,
                                                  str(current_user.id))
            ExecSQL(sqlhisto, sqlparam)
        ExecSQL(sql, sqlparam)
        flash('Data updated', 'success')
    if field == 'latitude' or field == 'longitude' or gvp('recompute') == 'Y':
        ExecSQL(
            """update samples s set latitude=sll.latitude,longitude=sll.longitude
              from (select o.sampleid,min(o.latitude) latitude,min(o.longitude) longitude
              from obj_head o
              where projid=%(projid)s and o.latitude is not null and o.longitude is not null
              group by o.sampleid) sll where s.sampleid=sll.sampleid and projid=%(projid)s """,
            {'projid': Prj.projid})
        flash('sample latitude and longitude updated', 'success')
    sql = "select objid FROM objects o where projid=" + str(Prj.projid)
    if len(filtres):
        sql += sharedfilter.GetSQLFilter(filtres, sqlparam,
                                         str(current_user.id))
        ObjList = GetAll(sql, sqlparam)
        ObjListTxt = "\n".join((str(r['objid']) for r in ObjList))
        txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format(
            len(ObjList))
    else:
        txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>"
    Lst = GetFieldList(Prj)
    # txt+="%s"%(Lst,)

    return PrintInCharte(
        render_template("project/prjeditdatamass.html", Lst=Lst, header=txt))
예제 #5
0
def PrjResetToPredicted(PrjId):
    request.form  # Force la lecture des données POST sinon il y a une erreur 504
    Prj = database.Projects.query.filter_by(projid=PrjId).first()
    if Prj is None:
        flash("Project doesn't exists", 'error')
        return PrintInCharte("<a href=/prj/>Select another project</a>")
    if not Prj.CheckRight(2):  # Level 0 = Read, 1 = Annotate, 2 = Admin
        flash('You cannot edit settings for this project', 'error')
        return PrintInCharte("<a href=/prj/>Select another project</a>")
    g.headcenter = "<h4><a href='/prj/{0}'>{1}</a></h4>".format(
        Prj.projid, XSSEscape(Prj.title))
    txt = "<h3>Reset status to predicted</h3>"
    sqlparam = {}
    filtres = {}
    for k in sharedfilter.FilterList:
        if gvg(k):
            filtres[k] = gvg(k, "")
    process = gvp('process')
    if process == 'Y':
        sqlhisto = """insert into objectsclassifhisto(objid,classif_date,classif_type,classif_id,classif_qual,classif_who)
                      select objid,classif_when,'M', classif_id,classif_qual,classif_who
                        from objects o
                        where projid=""" + str(
            Prj.projid
        ) + """ and classif_when is not null and classif_qual in ('V','D')
                         and not exists(select 1 from objectsclassifhisto och where och.objid=o.objid and och.classif_date=o.classif_when)
                        """
        sqlhisto += sharedfilter.GetSQLFilter(filtres, sqlparam,
                                              str(current_user.id))
        ExecSQL(sqlhisto, sqlparam)

        sqlhisto = """update obj_head set classif_qual='P'
                        where projid={0} and objid in (select objid from objects o   
                                              where projid={0} and classif_qual in ('V','D') {1})
                                              """.format(
            Prj.projid,
            sharedfilter.GetSQLFilter(filtres, sqlparam, str(current_user.id)))
        ExecSQL(sqlhisto, sqlparam)

        # flash('Data updated', 'success')
        txt += "<a href='/prj/%s' class='btn btn-primary'>Back to project</a> " % (
            Prj.projid)
        appli.project.main.RecalcProjectTaxoStat(Prj.projid)
        appli.project.main.UpdateProjectStat(Prj.projid)
        return PrintInCharte(txt)
    sql = "select objid FROM objects o where projid=" + str(Prj.projid)
    if len(filtres):
        sql += sharedfilter.GetSQLFilter(filtres, sqlparam,
                                         str(current_user.id))
        ObjList = GetAll(sql, sqlparam)
        ObjListTxt = "\n".join((str(r['objid']) for r in ObjList))
        txt += "<span style='color:red;font-weight:bold;font-size:large;'>USING Active Project Filters, {0} objects</span>".format(
            len(ObjList))
    else:
        txt += "<span style='color:red;font-weight:bold;font-size:large;'>Apply to ALL OBJETS OF THE PROJECT (NO Active Filters)</span>"
    Lst = GetFieldList(Prj)
    # txt+="%s"%(Lst,)

    return PrintInCharte(
        render_template("project/prjresettopredicted.html",
                        Lst=Lst,
                        header=txt))
예제 #6
0
    def CreateTSV(self,ExportMode):
        self.UpdateProgress(1,"Start TSV export")
        Prj=database.Projects.query.filter_by(projid=self.param.ProjectId).first()
        ImageExport=''
        if ExportMode in ('BAK', 'DOI'):
            self.param.sampledata=self.param.objectdata=self.param.processdata=self.param.acqdata='1'
            self.param.commentsdata=self.param.histodata=self.param.internalids=''
        if ExportMode=='BAK':
            ImageExport =self.param.exportimagesbak
        if ExportMode=='DOI':
            ImageExport =self.param.exportimagesdoi

        sql1="""SELECT o.orig_id as object_id,o.latitude as object_lat,o.longitude as object_lon
                ,to_char(objdate,'YYYYMMDD') as object_date
                ,to_char(objtime,'HH24MISS') as object_time
                ,object_link,depth_min as object_depth_min,depth_max as object_depth_max
                ,case o.classif_qual when 'V' then 'validated' when 'P' then 'predicted' when 'D' then 'dubious' ELSE o.classif_qual end object_annotation_status                
                ,uo1.name object_annotation_person_name,uo1.email object_annotation_person_email
                ,to_char(o.classif_when,'YYYYMMDD') object_annotation_date
                ,to_char(o.classif_when,'HH24MISS') object_annotation_time                
                ,to1.display_name as object_annotation_category 
                    """
        if ExportMode=='BAK':
            sql1 += ",to1.id as object_annotation_category_id"

        if ExportMode!='BAK':
            sql1 += """
                ,(WITH RECURSIVE rq(id,name,parent_id) as ( select id,name,parent_id,1 rang FROM taxonomy where id =o.classif_id
                        union
                        SELECT t.id,t.name,t.parent_id, rang+1 rang FROM rq JOIN taxonomy t ON t.id = rq.parent_id)
                        select string_agg(name,'>') from (select name from rq order by rang desc)q) object_annotation_hierarchy """
        sql2=""" FROM objects o
                LEFT JOIN taxonomy to1 on o.classif_id=to1.id
                LEFT JOIN taxonomy to1p on to1.parent_id=to1p.id
                LEFT JOIN users uo1 on o.classif_who=uo1.id
                LEFT JOIN taxonomy to2 on o.classif_auto_id=to2.id
                LEFT JOIN samples s on o.sampleid=s.sampleid """
        sql3=" where o.projid=%(projid)s "
        params={'projid':int(self.param.ProjectId)}
        OriginalColName={} # Nom de colonneSQL => Nom de colonne permet de traiter le cas de %area
        if ImageExport=='1':
            sql1 += "\n,img.orig_file_name as img_file_name,img.imgrank img_rank"
            sql2 += "\nLEFT JOIN images img on o.img0id = img.imgid "
        if ImageExport=='A': # Toutes les images
            sql1 += "\n,img.orig_file_name as img_file_name,img.imgrank img_rank"
            sql2 += "\nLEFT JOIN images img on o.objid = img.objid "

        if self.param.samplelist!="" :
            sql3+=" and s.orig_id= any(%(samplelist)s) "
            params['samplelist']=self.param.samplelist.split(",")


        if self.param.commentsdata=='1':
            sql1+="\n,complement_info"
        if self.param.objectdata=='1':
            sql1+="\n"
            Mapping=DecodeEqualList(Prj.mappingobj)
            for k,v in Mapping.items() :
                AliasSQL='object_%s'%re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v)
                OriginalColName[AliasSQL]='object_%s'%v
                sql1+=',o.%s as "%s" '%(k,AliasSQL)
        if self.param.sampledata=='1':
            sql1+="\n,s.orig_id sample_id,s.dataportal_descriptor as sample_dataportal_descriptor "
            Mapping=DecodeEqualList(Prj.mappingsample)
            for k,v in Mapping.items() :
                sql1+=',s.%s as "sample_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v))
        if self.param.processdata=='1':
            sql1+="\n,p.orig_id process_id"
            Mapping=DecodeEqualList(Prj.mappingprocess)
            for k,v in Mapping.items() :
                sql1+=',p.%s as "process_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v))
            sql2+=" left join process p on o.processid=p.processid "
        if self.param.acqdata=='1':
            sql1+="\n,a.orig_id acq_id,a.instrument as acq_instrument"
            Mapping=DecodeEqualList(Prj.mappingacq)
            for k,v in Mapping.items() :
                sql1+=',a.%s as "acq_%s" '%(k,re.sub(R"[^a-zA-Z0-9\.\-µ]","_",v))
            sql2+=" left join acquisitions a on o.acquisid=a.acquisid "
        if ExportMode =='DOI':
            sql1 += "\n,o.objid"
        if self.param.internalids == '1':
            sql1 += """\n,o.objid,o.acquisid as acq_id_internal,o.processid as processid_internal,o.sampleid as sample_id_internal,o.classif_id,o.classif_who
                    ,o.classif_auto_id,to2.name classif_auto_name,classif_auto_score,classif_auto_when
                    ,o.random_value object_random_value,o.sunpos object_sunpos """
            if self.param.sampledata == '1':
                sql1 += "\n,s.latitude sample_lat,s.longitude sample_long "

        if self.param.histodata=='1':
            if self.param.samplelist!="": # injection du filtre sur les echantillons dans les historique
                samplefilter=" join samples s on o.sampleid=s.sampleid and s.orig_id= any(%(samplelist)s) "
            else: samplefilter=""
            sql1+=" ,oh.classif_date histoclassif_date,classif_type histoclassif_type,to3.name histoclassif_name,oh.classif_qual histoclassif_qual,uo3.name histoclassif_who,classif_score histoclassif_score"
            sql2+=""" left join (SELECT o.objid,classif_date,classif_type,och.classif_id,och.classif_qual,och.classif_who,classif_score
                    from objectsclassifhisto och
                    join objects o on o.objid=och.objid and o.projid=1 {0}
                    union all
                    SELECT o.objid,o.classif_when classif_date,'C' classif_type,classif_id,classif_qual,classif_who,NULL
                    from objects o {0} where o.projid=1
                    )oh on o.objid=oh.objid
                    LEFT JOIN taxonomy to3 on oh.classif_id=to3.id
                    LEFT JOIN users uo3 on oh.classif_who=uo3.id
                    """.format(samplefilter)

        sql3+=sharedfilter.GetSQLFilter(self.param.filtres,params,self.task.owner_id)
        splitfield="object_id" # cette valeur permet d'éviter des erreurs plus loins dans r[splitfield]
        if ExportMode=='BAK':
            self.param.splitcsvby = "sample"
        elif ExportMode=='DOI':
            self.param.splitcsvby = ""
        if self.param.splitcsvby=="sample":
            sql3+=" order by s.orig_id, o.objid "
            splitfield = "sample_id"
        elif self.param.splitcsvby=="taxo":
            sql1 += "\n,concat(to1p.name,'_',to1.name) taxo_parent_child "
            sql3+=" order by taxo_parent_child, o.objid "
            splitfield = "taxo_parent_child"
        else:
            sql3 += " order by s.orig_id, o.objid " #tri par defaut
        if ImageExport!='':
            sql3+=",img_rank"
        sql=sql1+" "+sql2+" "+sql3
        logging.info("Execute SQL : %s"%(sql,))
        logging.info("Params : %s"%(params,))
        self.pgcur.execute(sql,params)
        splitcsv = (self.param.splitcsvby != "")
        self.param.OutFile= "export_{0:d}_{1:s}.{2}".format(Prj.projid
                                                            ,datetime.datetime.now().strftime("%Y%m%d_%H%M")
                                                            ,"zip"  )

        zfile = zipfile.ZipFile(os.path.join(self.GetWorkingDir(),self.param.OutFile)
                                , 'w', allowZip64=True, compression=zipfile.ZIP_DEFLATED)
        if splitcsv:
            csvfilename='temp.tsv'
            prevvalue = "NotAssigned"
        else:
            csvfilename =self.param.OutFile.replace('.zip','.tsv')
            prevvalue = self.param.OutFile.replace('.zip', '')
        fichier=os.path.join(self.GetWorkingDir(),csvfilename)
        csvfile=None
        for r in self.pgcur:
            if (csvfile is None and (splitcsv == False)) or ((prevvalue!=r[splitfield]) and splitcsv ):
                if csvfile :
                    csvfile.close()
                    if zfile :
                        if ExportMode == 'BAK':
                            zfile.write(fichier, os.path.join(str(prevvalue),"ecotaxa_" + str(prevvalue) + ".tsv"))
                        else:
                            zfile.write(fichier,"ecotaxa_"+str(prevvalue)+".tsv")
                if splitcsv:
                    prevvalue = r[splitfield]
                logging.info("Creating file %s" % (fichier,))
                csvfile=open(fichier,'w',encoding='latin_1')
                wtr = csv.writer(csvfile, delimiter='\t', quotechar='"',lineterminator='\n',quoting=csv.QUOTE_NONNUMERIC  )
                colnames = [desc[0] for desc in self.pgcur.description]
                coltypes=[desc[1] for desc in self.pgcur.description]
                FloatType=coltypes[2] # on lit le type de la colonne 2 alias latitude pour determiner le code du type double
                wtr.writerow([OriginalColName.get(c,c) for c in colnames])
                if ExportMode == 'BAK':
                    wtr.writerow(['[f]' if x==FloatType else '[t]' for x in coltypes])
            # on supprime les CR des commentaires.
            if r.get('img_file_name','') and ExportMode == 'DOI': # les images sont dans des dossiers par taxo
                r['img_file_name']=GetDOIImgFileName(r['objid'],r['img_rank'],r['object_annotation_category'],r['img_file_name'])
            if self.param.commentsdata == '1' and r['complement_info']:
                r['complement_info'] = ' '.join(r['complement_info'].splitlines())
            if self.param.usecomasepa == '1':  # sur les decimaux on remplace . par ,
                for i, t in zip(range(1000), coltypes):
                    if t == FloatType and r[i] is not None:
                        r[i] = str(r[i]).replace('.', ',')
            wtr.writerow(r)
        if csvfile:
            csvfile.close()
            if zfile:
                if ExportMode == 'BAK': #Split par sample et le fTSV est avec le sample.
                    zfile.write(fichier, os.path.join(str(prevvalue) , "ecotaxa_" + str(prevvalue) + ".tsv"))
                else:
                    zfile.write(fichier, "ecotaxa_"+str(prevvalue) + ".tsv")
                zfile.close()
        logging.info("Extracted %d rows", self.pgcur.rowcount)
예제 #7
0
    def SPStep1(self):
        logging.info("Input Param = %s" % (self.param.__dict__, ))
        # self.param.ProjectId="2"
        Prj = database.Projects.query.filter_by(
            projid=self.param.ProjectId).first()
        # self.param.IntraStep=0
        if getattr(self.param, 'IntraStep', 0) == 0:
            self.param.IntraStep = 1
            db.session.expunge(Prj)
            NewPrj = Prj
            Prj = copy.copy(
                NewPrj)  # Si on fait une copy on arrive plus à insérer.
            make_transient(NewPrj)
            NewPrj.title = self.param.subsetprojecttitle
            NewPrj.projid = None
            NewPrj.visible = False
            db.session.add(NewPrj)
            db.session.commit()
            pp = database.ProjectsPriv()
            pp.member = self.task.owner_id
            pp.privilege = "Manage"
            NewPrj.projmembers.append(pp)
            db.session.commit()
            self.param.subsetproject = NewPrj.projid
            self.UpdateProgress(
                5, "Subset Project %d Created : %s" %
                (NewPrj.projid, NewPrj.title))

        if self.param.IntraStep == 1:
            vaultroot = Path("../../vault")
            sqlparam = {'projid': self.param.ProjectId}
            sqlwhere = ""
            if self.param.extraprojects:
                sqlparam['projid'] += "," + self.param.extraprojects
            sqlparam['ranklimit'] = self.param.valeur
            if self.param.valtype == 'V': rankfunction = 'rank'
            elif self.param.valtype == 'P': rankfunction = '100*percent_rank'
            else: rankfunction = 'FunctionError'
            # if self.param.samplelist:
            #     sqlwhere+=" and s.orig_id in (%s) "%(",".join(["'%s'"%x for x in self.param.samplelist.split(",")]))
            # sqlwhere+=" and (o.classif_qual in (%s) "%(",".join(["'%s'"%x for x in self.param.what.split(",")]))
            # if self.param.what.find('N')>=0:
            #     sqlwhere+=" or o.classif_qual is null "
            # sqlwhere+=")"
            sqlwhere += sharedfilter.GetSQLFilter(self.param.filtres, sqlparam,
                                                  str(self.task.owner_id))
            logging.info("SQLParam=%s", sqlparam)
            sql = """select objid from (
                SELECT """ + rankfunction + """() OVER (partition by classif_id order by random() )rang,o.objid
                      from objects o left join samples s on o.sampleid=s.sampleid
                      where o.projid in ( %(projid)s ) """ + sqlwhere + """ ) sr
                where rang<=%(ranklimit)s """
            logging.info("SQL=%s %s", sql, sqlparam)
            # for obj in db.session.query(database.Objects).from_statement( text(sql) ).all():
            LstObjects = GetAll(sql, sqlparam)
            logging.info("matched %s objects", len(LstObjects))
            if len(LstObjects) == 0:
                self.task.taskstate = "Error"
                self.UpdateProgress(
                    10, "No object to include in the subset project")
            NbrObjects = 0
            for objid in LstObjects:
                obj = db.session.query(
                    database.Objects).filter_by(objid=objid[0]).first()
                objf = db.session.query(
                    database.ObjectsFields).filter_by(objfid=objid[0]).first()
                objcnn = db.session.query(
                    database.Objects_cnn_features).filter_by(
                        objcnnid=objid[0]).first()
                NbrObjects += 1
                oldobjid = obj.objid
                if self.param.withimg == 'Y':
                    for img in obj.images:
                        db.session.expunge(img)
                        make_transient(img)
                        self.pgcur.execute("select nextval('seq_images')")
                        img.imgid = self.pgcur.fetchone()[0]
                        # print("New Image id=",img.imgid)
                        SrcImg = img.file_name
                        SrcImgMini = img.thumb_file_name
                        VaultFolder = "%04d" % (img.imgid // 10000)
                        #creation du repertoire contenant les images si necessaire
                        CreateDirConcurrentlyIfNeeded(
                            vaultroot.joinpath(VaultFolder))
                        img.file_name = "%s/%04d%s" % (VaultFolder,
                                                       img.imgid % 10000,
                                                       Path(SrcImg).suffix)
                        shutil.copyfile(
                            vaultroot.joinpath(SrcImg).as_posix(),
                            vaultroot.joinpath(img.file_name).as_posix())
                        if SrcImgMini is not None:
                            img.thumb_file_name = "%s/%04d_mini%s" % (
                                VaultFolder, img.imgid % 10000,
                                Path(SrcImgMini).suffix)
                            shutil.copyfile(
                                vaultroot.joinpath(SrcImgMini).as_posix(),
                                vaultroot.joinpath(
                                    img.thumb_file_name).as_posix())

                db.session.expunge(obj)
                make_transient(obj)
                obj.objid = None
                obj.img0id = None
                obj.projid = self.param.subsetproject
                obj.sampleid = self.GetSampleID(obj.sampleid)
                obj.processid = self.GetProcessID(obj.processid)
                obj.acquisid = self.GetAcquisID(obj.acquisid)
                db.session.add(obj)
                db.session.commit()
                dummy = objf.n01  #permet de forcer l'etat de objf sinon perd ses données sur les instruction suivantes.
                db.session.expunge(objf)
                make_transient(objf)
                objf.objfid = obj.objid
                db.session.add(objf)
                if objcnn:
                    dummy = objcnn.cnn01  #permet de forcer l'etat de objcnn sinon perd ses données sur les instruction suivantes.
                    db.session.expunge(objcnn)
                    make_transient(objcnn)
                    objcnn.objcnnid = obj.objid
                    db.session.add(objcnn)
                db.session.commit()
                if NbrObjects % 20 == 0:
                    self.UpdateProgress(5 + 95 * NbrObjects / len(LstObjects),
                                        "Subset creation in progress")
                # print (oldobjid,obj.objid)
            # Recalcule les valeurs de Img0
            self.pgcur.execute("""update obj_head o
                                set imgcount=(select count(*) from images where objid=o.objid)
                                ,img0id=(select imgid from images where objid=o.objid order by imgrank asc limit 1 )
                                where projid=""" +
                               str(self.param.subsetproject))
            self.pgcur.connection.commit()
        import appli.project.main
        appli.project.main.RecalcProjectTaxoStat(self.param.subsetproject)
        appli.project.main.UpdateProjectStat(self.param.subsetproject)
        self.task.taskstate = "Done"
        self.UpdateProgress(100, "Subset created successfully")