Beispiel #1
0
def export_survey(survey_id):
    file_obj = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if file_obj["user"] != current_user._id:
        flash("You do not have access to that survey", "danger")
        abort(403)
    # Get all graphs and tests relating to this file
    tests = mongo.db.tests.find({"surveyId": survey_id})
    graphs = mongo.db.graphs.find({"surveyId": survey_id})
    # Use a temp file so that it can be deleted after
    with tempfile.NamedTemporaryFile() as f:
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        writer = pd.ExcelWriter(f.name, engine='xlsxwriter')
        # Convert the dataframe to an XlsxWriter Excel object.
        df = read_file(file_obj["fileName"])
        df.to_excel(writer, sheet_name='Survey data')
        wb = writer.book
        # Get a worksheet for the statistical tests
        ws = wb.add_worksheet("Statistical Tests")
        tests_to_excel(ws, tests)
        # Get a worksheet for the graphs
        ws2 = wb.add_worksheet("Graphs")
        graphs_to_excel(ws2, graphs)
        writer.close()
        wb.close()
        return send_file(f.name,
                         attachment_filename=file_obj["title"] + ".xlsx",
                         as_attachment=True)
Beispiel #2
0
def input():
    # Initialise variables for handsontable (2d array for values, 1d array for column headers)
    value_list = [[]]
    header_list = []
    survey_id = request.args.get("survey_id")
    form = EditForm()
    # Handsontable data cannot be posted using WTForms POST methods - Post needs to be from combined WTForm and javascript AJAX
    if form.validate_on_submit():
        # get the file_obj (if one exists yet)
        file_obj = mongo.db.surveys.find_one({"_id": ObjectId(survey_id)})
        # if file already exists we can simply get the name of the file
        if file_obj:
            file_name = file_obj["fileName"]
            file = os.path.join(current_app.root_path, "uploads", file_name)
        # Else we need to generate a new filename with a new random hex.
        else:
            # Generate a random hex to be the new filename
            file_name = generate_filepath()
            file = os.path.join(current_app.root_path, "uploads", file_name)
        # write/overwrite the table values to the file
        with open(file, "w") as file_to_write:
            file_to_write.write(request.form["table"])
        # Update/insert into the database
        survey = mongo.db.surveys.update_one({"_id": ObjectId(survey_id)},\
        {"$set": {"fileName" : file_name,\
                "user" : current_user._id,\
                "title" : form.title.data}}, upsert=True)
        if not survey_id:
            survey_id = survey.upserted_id
        # Respond to the jquery POST with the survey_id. This is so that if the survey was new, it
        # can now be incorporated into subsequent POST requests to avoid multiple surveys being saved
        return str(survey_id)
    # If GET request and the survey already exists (i.e. editing an existing survey)
    elif request.method == "GET" and survey_id:
        file_obj = mongo.db.surveys.find_one_or_404(
            {"_id": ObjectId(survey_id)})
        if file_obj["user"] != current_user._id:
            flash("You do not have access to that page", "danger")
            return redirect(url_for("main.index"))
        # Read the file and extract the cell values and column headers
        df = read_file(file_obj["fileName"])
        value_list = df.values.tolist()
        header_list = df.columns.values.tolist()
        form.title.data = file_obj["title"]
    data = {"values": value_list, "headers": header_list}
    return render_template("surveys/input.html",
                           title="Input",
                           data=data,
                           survey_id=survey_id,
                           form=form)
Beispiel #3
0
def quick_stats(survey_id):
    file_obj = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if file_obj["user"] != current_user._id:
        flash("You do not have access to that page", "danger")
        abort(403)
    df = read_file(file_obj["fileName"])
    rows = len(df.index)
    cols = len(df.columns)
    column_info = parse_data(df)
    return render_template("analysis/quickstats.html",
                           rows=rows,
                           cols=cols,
                           column_info=column_info,
                           survey_id=survey_id,
                           survey_title=file_obj["title"])
Beispiel #4
0
def run_tests(survey_id, user_id):
    file_obj = mongo.db.surveys.find_one({"_id": ObjectId(survey_id)})
    df = read_file(file_obj["fileName"])
    column_info = parse_data(df)
    test_results = []
    for column_1 in column_info:
        if column_1["data_type"] == "categorical" or column_1[
                "data_type"] == "true/false":
            # Chi square goodness of fit only takes one, non-parametric variable
            p_value, result = chi_goodness(df, column_1["title"])
            if p_value < 0.05:
                test_results.append(result)
            # Now loop through again from the start, checking second variable against the first
            for column_2 in column_info:
                # If the columns are the same then we can contnue with next iteration
                if column_2["title"] == column_1["title"]:
                    continue
                elif column_2["data_type"] == "categorical" or column_2[
                        "data_type"] == "true/false":
                    # Chi square needs 2 categorical variables
                    p_value, result = chi_square(df, column_1["title"],
                                                 column_2["title"])
                    # As Chi square can be done twice (with variable swapping places)
                    # we need to check that it has not yet been done
                    if p_value < 0.05 and not test_done(test_results, result):
                        test_results.append(result)
                elif column_2["data_type"] == "numerical":
                    if column_1[
                            "num_unique"] == 2 and column_2["num_unique"] > 1:
                        # We perform mann-whitney U test
                        p_value, result = mann_whitney(df, column_1["title"],
                                                       column_2["title"])
                    elif column_1["num_unique"] > 2 and column_2[
                            "num_unique"] > 1:
                        # We perform kruskal wallis test
                        p_value, result = kruskal_wallis(
                            df, column_1["title"], column_2["title"])
                    if p_value < 0.05:
                        test_results.append(result)
    # Now we can loop through the statistical tests, adding significant ones to
    # a temporary database. This will be presented to the user through a notficiation
    # on the home page.
    for result in test_results:
        mongo.db.temp_results.insert_one({
            "user": user_id,
            "survey_id": survey_id,
            "result": result
        })
Beispiel #5
0
def graph(survey_id):
    # Get the file object so that we can load the data
    file_obj = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if file_obj["user"] != current_user._id:
        flash("You do not have access to that page", "danger")
        return redirect(url_for("main.index"))
    # Get the id of the graph (if it exists yet)
    graph_id = request.args.get("graph_id")
    graph_obj = mongo.db.graphs.find_one({"_id": ObjectId(graph_id)})
    # i.e. if user is choosing to edit an existing graph then it already has a type
    if graph_obj:
        chart_type = graph_obj["type"]
    # Else user is creating a new graph of a chosen type
    else:
        chart_type = request.args.get("chart_type")
    # Read the csv file in
    df = read_file(file_obj["fileName"])
    # parse the columns to get information regarding type of data
    column_info = parse_data(df)
    # Convert the dataframe to a dict of records to be handled by D3.js on the client side.
    chart_data = df.to_dict(orient='records')
    # ----------SAME ROUTE USED FOR BAR AND PIE CHART----------
    if chart_type == "Bar chart" or chart_type == "Pie chart":
        return pie_bar_chart(survey_id, column_info, chart_data, graph_id,
                             file_obj["title"], chart_type)
    # ----------SCATTER CHART----------
    elif chart_type == "Scatter chart":
        return scatter_chart(survey_id, column_info, chart_data, graph_id,
                             file_obj["title"])
    # ----------HISTOGRAM----------
    elif chart_type == "Histogram":
        return histogram(survey_id, column_info, chart_data, graph_id,
                         file_obj["title"])
    # ----------MAP CHART----------
    elif chart_type == "Map":
        return map_chart(survey_id, column_info, chart_data, graph_id,
                         file_obj["title"])
    # ----------Box and whisker CHART----------
    elif chart_type == "Box and whisker":
        return box_chart(survey_id, column_info, chart_data, graph_id,
                         file_obj["title"])
    else:
        flash("something went wrong", "danger")
        abort(404)
Beispiel #6
0
def analyse(survey_id):
    form = StatisticalTestForm()
    survey = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if survey["user"] != current_user._id:
        flash("You do not have access to that page", "danger")
        abort(403)
    df = read_file(survey["fileName"])
    # Populate the select options in the form with all the variables
    for variable in list(df.columns.values):
        form.independent_variable.choices.append((variable, variable))
        form.dependent_variable.choices.append((variable, variable))
    if form.validate_on_submit():
        # Get the dataset, and save the variables in python variables
        independent_variable = form.independent_variable.data
        dependent_variable = form.dependent_variable.data
        # Ensure the user hasn't selected the same variable for both
        if independent_variable == dependent_variable:
            flash("You can't select the same variable for both.", "danger")
            return render_template("analysis/analysedata.html", form=form)
        test = form.test.data
        # If the user selects Chi-Square goodness fit then they are redirected to a separate URL
        if test == "Chi-Square goodness of fit":
            # Chi-square goodness of fit needs an additional page where user fills in their expected distribution
            return redirect(
                url_for('analysis.chi_goodness',
                        variable=independent_variable,
                        survey_id=survey_id))
        # The other tests all require a dependent variable
        if dependent_variable == "":
            flash("You must select a dependent variable for this test.",
                  "danger")
            return render_template("analysis/analysedata.html", form=form)
        if test == "Kruskall Wallis Test":
            return kruskall_wallis(survey_id, df, independent_variable,
                                   dependent_variable, form)
        # AT THE MOMENT, THIS TEST IS 2 TAILED. MAY WANT TO ADD OPTIONS FOR 1 TAILED TESTS
        elif test == "Mann-Whitney U Test":
            return mann_whitney(survey_id, df, independent_variable,
                                dependent_variable, form)
        elif test == "Chi-Square Test":
            return chi_square(survey_id, df, independent_variable,
                              dependent_variable)
    return render_template("analysis/analysedata.html", form=form)
Beispiel #7
0
def chi_goodness(survey_id, variable):
    # Get survey object and datafram
    survey = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    df = read_file(survey["fileName"])
    group_by = df.groupby(variable)
    keys = list(group_by.groups.keys())
    # Populate the form with unique groups in the given variable
    key_list = []
    # Get the total count, so that we can check the expected distribution matches
    total_count = len(df.index)
    # Populate the keys objects, initialising "expected" to 0
    for key in keys:
        key_list.append({"expected": 0, "key": key})
    form = ChiGoodnessForm(field=key_list)
    if form.validate_on_submit():
        # Initialise lists for actual and expected ditributions in the data
        actual_distribution = []
        expected_distribution = []
        for key in keys:
            # For each group, we get the count in the data and append it to our list
            key_count = df[df[variable] == key].shape[0]
            actual_distribution.append(key_count)
            for input in form.field.data:
                if key == input['key']:
                    # Now we populate the expected count from the form data
                    expected_distribution.append(input['expected'])
        if sum(expected_distribution) == 0:
            _, p_value = chisquare(actual_distribution)
        else:
            _, p_value = chisquare(actual_distribution, expected_distribution)
        return redirect(
            url_for(
                'analysis.result',
                survey=survey_id,
                test="Chi-Square goodness of fit",
                p_value=p_value,
                independent_variable=variable,
            ))
    return render_template("analysis/chisquare.html",
                           form=form,
                           keys=keys,
                           total=total_count)
Beispiel #8
0
def analyse(survey_id):
    form = StatisticalTestForm()
    survey = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if survey["user"] != current_user._id:
        flash("You do not have access to that page", "danger")
        abort(403)
    df = read_file(survey["fileName"])
    # Populate the select options in the form with all the variables
    for variable in list(df.columns.values):
        form.independent_variable.choices.append((variable, variable))
        form.dependent_variable.choices.append((variable, variable))
    if form.validate_on_submit():
        # Get the dataset, and save the variables in python variables
        independent_variable = form.independent_variable.data
        dependent_variable = form.dependent_variable.data
        # Ensure the user hasn't selected the same variable for both
        if independent_variable == dependent_variable:
            flash("You can't select the same variable for both.", "danger")
            return render_template("analysis/analysedata.html", form=form)
        test = form.test.data
        # If the user selects Chi-Square goodness fit then they are redirected to a separate URL
        if test == "Chi-Square goodness of fit":
            return redirect(
                url_for('analysis.chi_goodness',
                        variable=independent_variable,
                        survey_id=survey_id))
        # The other tests all require a dependent variable
        if dependent_variable == "":
            flash("You must select a dependent variable for this test.",
                  "danger")
            return render_template("analysis/analysedata.html", form=form)
        if test == "Kruskall Wallis Test":
            if is_string_dtype(df[dependent_variable]):
                flash(
                    "Dependent Variable '" + dependent_variable +
                    "' is not numeric.", "danger")
                return render_template("analysis/analysedata.html", form=form)
            kruskal_result = kruskal(data=df,
                                     dv=dependent_variable,
                                     between=independent_variable)
            # get the p-value (p-unc) from the kruskal test and convert to 4 decimal places only
            p_value = "%.4f" % kruskal_result["p-unc"][0]
        # AT THE MOMENT, THIS TEST IS 2 TAILED. MAY WANT TO ADD OPTIONS FOR 1 TAILED TESTS
        elif test == "Mann-Whitney U Test":
            if is_string_dtype(df[dependent_variable]):
                flash(
                    "Dependent Variable '" + dependent_variable +
                    "' is not numeric.", "danger")
                return render_template("analysis/analysedata.html", form=form)
            group_by = df.groupby(independent_variable)
            group_array = [group_by.get_group(x) for x in group_by.groups]
            if len(group_array) != 2:
                flash(
                    "Independent variable '" + independent_variable +
                    "' has too many groups, only 2 allowed for Mann-Whitney U Test.",
                    "danger")
                return render_template("analysis/analysedata.html", form=form)
            x = group_array[0][dependent_variable].values
            y = group_array[1][dependent_variable].values
            mwu_result = mwu(x, y)
            p_value = "%.4f" % mwu_result['p-val'].values[0]
        elif test == "Chi-Square Test":
            contingency_table = pd.crosstab(df[independent_variable],
                                            df[dependent_variable])
            _, p_value, _, _ = chi2_contingency(contingency_table,
                                                correction=False)

        return redirect(
            url_for('analysis.result',
                    survey=survey_id,
                    test=test,
                    p_value=p_value,
                    independent_variable=independent_variable,
                    dependent_variable=dependent_variable))
    return render_template("analysis/analysedata.html", form=form)