コード例 #1
0
ファイル: valrule.py プロジェクト: sergioespana/openAggre
def write():
    st.title("Validation rules")
    st.write("Add validation rule")
    
    options = get_indicators()
    indicator_selected = st.selectbox("Select Indicator", options=options)
    realid = indicator_selected[0]

    currentNaNmethoddf = db.selectfromwhere("realid,nanmethod","parameter", "realid = ?",(realid,))
    currentNaNMethod = currentNaNmethoddf['nanmethod'].iloc[0]

    st.write("Current way to deal with empty values: " + nanMethods[currentNaNMethod])
    #if st.button("Update NaN-method"):
    NaNoptions = ["None selected","Convert to 0", "Convert to no", "Set empty values to inactive"]
    nanMethod = st.selectbox("Update NaN-method",options = NaNoptions)
    if st.button("Update method"):
        if nanMethod == "None selected":
            db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(0,realid))
        elif nanMethod == "Convert to 0":
            db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(1,realid))
        elif nanMethod == "Convert to no":
            db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(2,realid))
        else:
            db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(3,realid))

    indicators = db.selectallfrom("indicator")
    parameters = db.selectallfrom("parameter")
    parameters = parameters[parameters["fk_methodid"] == currentMethod]
    questions = db.selectallfrom("question")
    valrules = db.selectallfrom("validationrule")
    valrules = valrules[valrules["fk_methodid"] == currentMethod]
    

    parid = parameters[parameters["realid"] == realid]['parameterid'].iloc[0]
    #ind = questions[questions['realid'] == realid]
    #parid = questions[questions['fk_parameterid'] == par].iloc[0]
    #print(parid)

    temprules = db.selectfromwhere("fk_parameterid,fk_validationruleid","validationruleparameter_map", "fk_parameterid = ?",(int(parid),))
    valruleids = temprules['fk_validationruleid'].tolist()
    validationrules = valrules[valrules['validationruleid'].isin(valruleids)]
    st.write("Other validation rules: ")
    rows = selectable_data_table(validationrules[['rule','description']])
    if st.button("Delete selected"):
        for i in rows:
            db.deletevalue("validationruleparameter_map","fk_parameterid = ? and fk_validationruleid = ?", (int(parid),int(validationrules['validationruleid'].iloc[i])))
            

    newRule = st.text_input("New rule")
    description = st.text_input("Description")
    if st.button("Save rule"):
        db.insertvaluessingle("validationrule(rule,description,fk_methodid)","(?,?,?)",(newRule,description,int(currentMethod)))
        justAddedrule = db.selectfromwhere("rule,validationruleid","validationrule","rule = ?",(newRule,))
        ruleid = justAddedrule['validationruleid'].iloc[0]
        db.insertvaluessingle("validationruleparameter_map(fk_parameterid,fk_validationruleid)","(?,?)",(int(parid),int(ruleid)))
コード例 #2
0
ファイル: valparse.py プロジェクト: sergioespana/openAggre
def applynanMethod(nanmethod, df):
    if nanmethod == 0:
        return
    elif nanmethod == 1:
        for i in range(len(df)):
            dataid = df['dataid'].iloc[i]
            db.updatevalues("data", "isActive = ?", "dataid  = ?",
                            (int(0), int(dataid)))
            versiondf = db.selectfromwhere("dataid,version", "data",
                                           "dataid = ?", (int(dataid), ))
            currentversion = versiondf['version'].iloc[0]
            tempsplit = currentversion.split(".")
            fineversion = int(tempsplit[1]) + 1
            newVersion = tempsplit[0] + "." + str(fineversion)
            newValue = "0"
            year = df['year'].iloc[i]
            parid = df['fk_parameterid'].iloc[i]
            projectid = df['fk_projectid'].iloc[i]
            orgid = df['fk_organisationid'].iloc[i]
            db.insertvaluessingle(
                "data(value,year,version,isActive,fk_parameterid,fk_projectid,fk_organisationid)",
                "(?,?,?,?,?,?,?)", (newValue, year, newVersion, int(1),
                                    int(parid), int(projectid), int(orgid)))
    elif nanmethod == 2:
        for i in range(len(df)):
            dataid = df['dataid'].iloc[i]
            db.updatevalues("data", "isActive = ?", "dataid  = ?",
                            (int(0), int(dataid)))
            versiondf = db.selectfromwhere("dataid,version", "data",
                                           "dataid = ?", (int(dataid), ))
            currentversion = versiondf['version'].iloc[0]
            tempsplit = currentversion.split(".")
            fineversion = int(tempsplit[1]) + 1
            newVersion = tempsplit[0] + "." + str(fineversion)
            newValue = "no"
            year = df['year'].iloc[i]
            parid = df['fk_parameterid'].iloc[i]
            projectid = df['fk_projectid'].iloc[i]
            orgid = df['fk_organisationid'].iloc[i]
            db.insertvaluessingle(
                "data(value,year,version,isActive,fk_parameterid,fk_projectid,fk_organisationid)",
                "(?,?,?,?,?,?,?)", (newValue, year, newVersion, int(1),
                                    int(parid), int(projectid), int(orgid)))
    elif nanmethod == 3:
        for i in range(len(df)):
            dataid = df['dataid'].iloc[i]
            db.updatevalues("data", "isActive = ?", "dataid = ?",
                            (int(0), int(dataid)))
    else:
        return
コード例 #3
0
def validateparameter(parameterid,alterations,columns):
    data = db.selectallfrom("data")
    datadf = data[data['isActive'] == 1]
    pardf = datadf[datadf['fk_parameterid'] == parameterid]
    
    
    valrules = db.selectallfrom("validationrule")
    temprules = db.selectfromwhere("fk_parameterid,fk_validationruleid","validationruleparameter_map", "fk_parameterid = ?",(int(parameterid),))
    valruleids = temprules['fk_validationruleid'].tolist()
    validationrules = valrules[valrules['validationruleid'].isin(valruleids)]
    

    for i in range(len(validationrules)):
        temprule = validationrules['rule'].iloc[i]
        for j in range(len(pardf)):
            tempRow = pardf.iloc[j]
            value = tempRow['value']
            dataid = tempRow['dataid']
            orgid = tempRow['fk_organisationid']
            tempdf = datadf[datadf['fk_organisationid'] == orgid]
            result = valparse.check(temprule,value,tempdf)
            if result == False:
                row = pd.DataFrame([[int(dataid),temprule,value,parameterid]],columns = columns)
                alterations = alterations.append(row)
    return alterations
コード例 #4
0
def write():
    st.title("Validation")
    columns = ["dataid","rule","value","parameterid"]
    # Get all the alterations that are going to be done
    alterations = pd.DataFrame(columns = columns)
    
    parameters = get_indicators()
    if st.button("Validate!"):
        for i in range(len(parameters)):
            realid = parameters[i][0]
            parameteriddf = db.selectfromwhere("realid,fk_methodid,parameterid","parameter","realid = ? and fk_methodid = ?",(realid,int(currentMethod),))
            parameterid = parameteriddf['parameterid'].iloc[0]
            data = db.selectallfrom("data")
            datadf = data[data['fk_parameterid'] == parameterid]
            datadf = datadf[datadf['isActive'] == 1]
            
            nanmethoddf = db.selectfromwhere("parameterid,nanmethod","parameter","parameterid = ?",(int(parameterid),))
            nanmethod = nanmethoddf['nanmethod'].iloc[0]
            nandf = datadf[datadf['value'] == "nan"]
            print("Nandf: " + str(len(nandf)))
            if len(nandf) > 0:
                if nanmethod == 0:
                    st.write("There are several NaN values in the data for this indicator, but there has not been a method selected yet how to handle these. Please do so under the validation rules section and come back to this page!")
                valparse.applynanMethod(nanmethod,nandf) 

            alterations = validateparameter(parameterid,alterations,columns)

        if len(alterations) > 0:
            for j in range(len(alterations)):
                val = alterations['value'].iloc[j]
                dataid = alterations['dataid'].iloc[j]
                parid = alterations['parameterid'].iloc[j]
                rule = alterations['rule'].iloc[j]
                description = "Rule: " + rule
                db.insertvaluessingle("alteration(old_val,new_val,description,fk_dataid,fk_projectid)","(?,?,?,?,?)", (val,"?",description,int(dataid),int(currentProject)))

        else:
            st.write("All good! According to the validation rules, there are no wrong values.")
    alts = db.selectallfrom("alteration")
    alts = alts[alts['fk_projectid'] == currentProject]
    if len(alts) > 0:
        interactivetable(alts)
    else:
        st.write("Currently no flagged alterations!")
コード例 #5
0
ファイル: valparse.py プロジェクト: sergioespana/openAggre
def getQvalue(realid, df):
    pardf = db.selectfromwhere("realid,fk_methodid,parameterid", "parameter",
                               "realid = ? and fk_methodid = ?", (
                                   realid,
                                   int(currentMethod),
                               ))
    parid = pardf['parameterid'].iloc[0]
    data = df[df['fk_parameterid'] == parid]
    result = data['value'].iloc[0]
    return result
コード例 #6
0
def getParameterid(realid):
    # Use db class to get parameterid
    parameterdf = db.selectfromwhere("realid,fk_methodid,parameterid",
                                     "parameter",
                                     "realid = ? and fk_methodid = ?", (
                                         realid,
                                         int(currentMethod),
                                     ))
    result = parameterdf['parameterid'].iloc[0]
    return result
コード例 #7
0
 def parsePeriod(self, variable):
     currentyeardf = db.selectfromwhere("projectid,currentYear", "project",
                                        "projectid = ?",
                                        (int(currentProject), ))
     currentyear = currentyeardf['currentYear'].iloc[0]
     result = []
     if '{' in variable:
         variable = variable[1:-1]
         tempSplit = variable.split(",")
         for i in range(len(tempSplit)):
             tempString = tempSplit[i]
             if "currentyear" in tempString:
                 tempString = tempString.replace("currentyear", currentyear)
                 result.append(eval(tempString))
     else:
         if "currentyear" in variable:
             tempString = variable.replace("currentyear", currentyear)
             result.append(eval(tempString))
         else:
             result.append(variable)
     return result
コード例 #8
0
    def constructDf(self, type, formula, label, population, period):
        rawData = db.selectallfrom("data")
        rawData = rawData[rawData["fk_projectid"] == currentProject]
        rawData = rawData[rawData['isActive'] == 1]
        if type == "barchart":
            dims = self.calculateDimensions(formula, population, period)
            years = self.parsePeriod(period)
            labels = self.parseVariable(label)
            pops = self.parseVariable(population)
            vars = self.parseVariable(formula)
            labelVardf = pd.DataFrame(data={'label': labels, 'var': vars})

            colPeriod = []
            colVariable = []
            colLabel = []
            colPopulation = []
            colValue = []
            for year in years:
                for pop in pops:
                    print(pop)
                    for var in vars:
                        colPeriod.append(year)
                        colPopulation.append(pop)
                        colVariable.append(var)
                        colLabel.append(labelVardf[labelVardf['var'] == var]
                                        ['label'].iloc[0])

                        popiddf = db.selectfromwhere(
                            "name,fk_projectid,populationid", "population",
                            "name = ? and fk_projectid = ?", (
                                pop,
                                int(currentProject),
                            ))
                        popid = popiddf['populationid'].iloc[0]

                        poporgmapdf = db.selectfromwhere(
                            "fk_populationid,fk_organisationid",
                            "populationorganisation_map",
                            "fk_populationid = ?", (int(popid), ))
                        popOrgList = []
                        for i in range(len(poporgmapdf)):
                            tempOrg = poporgmapdf["fk_organisationid"].iloc[i]
                            popOrgList.append(tempOrg)
                        tempDF = rawData[rawData['year'] == str(year)]
                        tempDF = tempDF[tempDF['fk_organisationid'].isin(
                            popOrgList)]
                        #st.write(tempDF)

                        #### Calculate the value based on year, pop and var(formula)

                        result = aggparse.interpret(var, tempDF)
                        colValue.append(result)

            data = {
                'Year': colPeriod,
                'Variable': colVariable,
                'Label': colLabel,
                'Population': colPopulation,
                'Value': colValue
            }

            resultdf = pd.DataFrame(data=data)
            st.write(resultdf)
        elif type == "categories":
            dims = self.calculateDimensions(formula, population, period)
            years = self.parsePeriod(period)
            labels = self.parseVariable(label)
            pops = self.parseVariable(population)
            vars = self.parseVariable(formula)
            labelVardf = pd.DataFrame(data={'label': labels, 'var': vars})

            colPeriod = []
            colVariable = []
            colLabel = []
            colPopulation = []
            colValue = []
            for year in years:
                for pop in pops:
                    for var in vars:
                        colPeriod.append(year)
                        colPopulation.append(pop)
                        colVariable.append(var)
                        colLabel.append(labelVardf[labelVardf['var'] == var]
                                        ['label'].iloc[0])

                        popiddf = db.selectfromwhere(
                            "name,fk_projectid,populationid", "population",
                            "name = ? and fk_projectid = ?", (
                                pop,
                                int(currentProject),
                            ))
                        popid = popiddf['populationid'].iloc[0]

                        poporgmapdf = db.selectfromwhere(
                            "fk_populationid,fk_organisationid",
                            "populationorganisation_map",
                            "fk_populationid = ?", (int(popid), ))
                        popOrgList = []
                        for i in range(len(poporgmapdf)):
                            tempOrg = poporgmapdf["fk_organisationid"].iloc[i]
                            popOrgList.append(tempOrg)
                        tempDF = rawData[rawData['year'] == str(year)]
                        tempDF = tempDF[tempDF['fk_organisationid'].isin(
                            popOrgList)]
                        #st.write(tempDF)

                        #### Calculate the value based on year, pop and var(formula)
                        result = aggparse.interpret(var, tempDF)
                        colValue.append(result)

            data = {
                'Year': colPeriod,
                'Variable': colVariable,
                'Label': colLabel,
                'Population': colPopulation,
                'Value': colValue
            }

            resultdf = pd.DataFrame(data=data)
            st.write(resultdf)

        elif type == "table":
            dims = self.calculateDimensions(formula, population, period)
            years = self.parsePeriod(period)
            labels = self.parseVariable(label)
            pops = self.parseVariable(population)
            vars = self.parseVariable(formula)
            labelVardf = pd.DataFrame(data={'label': labels, 'var': vars})

            colPeriod = []
            colVariable = []
            colLabel = []
            colPopulation = []
            colValue = []
            for year in years:
                for pop in pops:
                    for var in vars:
                        colPeriod.append(year)
                        colPopulation.append(pop)
                        colVariable.append(var)
                        colLabel.append(labelVardf[labelVardf['var'] == var]
                                        ['label'].iloc[0])

                        popiddf = db.selectfromwhere(
                            "name,fk_projectid,populationid", "population",
                            "name = ? and fk_projectid = ?", (
                                pop,
                                int(currentProject),
                            ))
                        popid = popiddf['populationid'].iloc[0]

                        poporgmapdf = db.selectfromwhere(
                            "fk_populationid,fk_organisationid",
                            "populationorganisation_map",
                            "fk_populationid = ?", (int(popid), ))
                        popOrgList = []
                        for i in range(len(poporgmapdf)):
                            tempOrg = poporgmapdf["fk_organisationid"].iloc[i]
                            popOrgList.append(tempOrg)
                        tempDF = rawData[rawData['year'] == str(year)]
                        tempDF = tempDF[tempDF['fk_organisationid'].isin(
                            popOrgList)]
                        #st.write(tempDF)

                        #### Calculate the value based on year, pop and var(formula)
                        result = aggparse.interpret(var, tempDF)
                        colValue.append(result)

            data = {
                'Year': colPeriod,
                'Variable': colVariable,
                'Label': colLabel,
                'Population': colPopulation,
                'Value': colValue
            }

            resultdf = pd.DataFrame(data=data)
            st.write(resultdf)

        else:
            return
        return resultdf