def get_variable_dataf(variable_ids, ro_id, survey_id, org_id, analysis_id): cnx, now = connect(), currtime() data = [] for variable_id in variable_ids: cursor = cnx.cursor(dictionary=True, buffered=True) try: cursor.callproc('GetVariableDataF', [variable_id, org_id, survey_id, ro_id, 0, '']) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Variable data can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): fetched = i.fetchall() fetched = [('1',) if element[0] is None else element for element in fetched] data.append(fetched) cursor.close() cnx.close() cleaned = [] for variable in data: ints = [] for data_point in variable: if data_point is not None: ints.append(int(data_point[0])) else: ints.append(1) cleaned.append(ints) return cleaned
def save_results_as_corr(p, percentages, survey_id, variable_ids, options, analysis_id): cnx, now = connect(), currtime() for i in range(len(options[0])): for j in range(len(options[1])): cursor1 = cnx.cursor() cursor2 = cnx.cursor() query1 = """INSERT INTO correlation_analysis_results SET analysis_id = {current_id}, var1 = {var1}, var2 = {var2}, var1_option = {var1_option}, var2_option = {var2_option}, p_value = {p_value}, percentage = {percentage}, is_chisquare = 1""".format( current_id=analysis_id, var1=variable_ids[0], var2=variable_ids[1], var1_option=i+1, var2_option=j+1, p_value=p, percentage=round(percentages[i][j], 2)) query2 = """INSERT INTO correlation_analysis_results SET analysis_id = {current_id}, var1 = {var2}, var2 = {var1}, var1_option = {var2_option}, var2_option = {var1_option}, p_value = {p_value}, percentage = {percentage}, is_chisquare = 1""".format( current_id=analysis_id, var1=variable_ids[0], var2=variable_ids[1], var1_option=i + 1, var2_option=j + 1, p_value=p, percentage=round(percentages[i][j], 2)) try: cursor1.execute(query1) cursor2.execute(query2) cnx.commit() except: exit(1) cursor1.close() cursor2.close() cnx.close() return
def ro(analysis_id, variable_ids): cnx, now = connect(), currtime() result, ro_id, survey_id, org_id = select_from_analysis(analysis_id) update_analysis_running(analysis_id) delete_results(analysis_id) variables = get_varinfo_from_varids(variable_ids, analysis_id) variable_ids = variable_ids.split(',') options = get_options(variable_ids) data = get_variable_dataf(variable_ids, ro_id, survey_id, org_id, analysis_id) check_empty_var(data, variables, cnx) orders = list(range(len(variable_ids))) for subset in combinations(orders, 2): (first, second) = subset print(subset) subids = [variable_ids[i] for i in [first, second]] subdata = [data[i] for i in [first, second]] subopts = [options[i] for i in [first, second]] counts = find_counts(subdata, subopts) percentages = find_percentages(counts) try: chi2, p, dof, ex = chi2_contingency(counts, correction=False) except: cursor_err = cnx.cursor() message = "Algorithm has failed" cursor_err.execute(sperror(message, analysis_id)) cnx.commit() cursor_err.close() exit(1) p = round(p, 4) save_results_as_corr(p, percentages, survey_id, subids, subopts, analysis_id) update_analysis_done(analysis_id) return
def update_analysis_done(analysis_id): cnx, now = connect(), currtime() cursor = cnx.cursor(dictionary=True) try: cursor.execute("""UPDATE analysis SET analysis_status = 'done', error = NULL, modified = '{current_timestamp}' WHERE id = {current_id}""" .format(current_timestamp=now, current_id=analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Analysis couldn't be updated", analysis_id)) cnx.commit() cursor_err.close() exit(1) cnx.close() return
def delete_results(analysis_id): cnx, now = connect(), currtime() cursor = cnx.cursor(dictionary=True) try: cursor.execute("""DELETE FROM correlation_analysis_results WHERE analysis_id = {current_id} AND is_chisquare = 1""" .format(current_id=analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Old results couldn't be deleted", analysis_id)) cnx.commit() cursor_err.close() exit(1) cnx.close() return
def get_varinfo_from_varids(variable_ids, analysis_id): cnx, now = connect(), currtime() cursor = cnx.cursor(dictionary=True, buffered=True) try: cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype, var_name AS name FROM var_list WHERE var_id IN ({current_var_ids})""" .format(current_var_ids=variable_ids)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("A variable can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) variables = cursor.fetchall() cursor.close() cnx.close() return variables
def select_from_analysis(analysis_id): cnx, now = connect(), currtime() cursor = cnx.cursor(dictionary=True) try: cursor.execute("SELECT * FROM analysis WHERE id = {current_id}" .format(current_id=analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Could not find analysis in database", analysis_id)) cnx.commit() cursor_err.close() exit(1) result = cursor.fetchall() ro_id = result[0]["ro_id"] survey_id = result[0]["survey_id"] org_id = result[0]["org_id"] cursor.close() cnx.close() return result, ro_id, survey_id, org_id
def get_options(variable_ids): cnx, now = connect(), currtime() options = [] for variable_id in variable_ids: cursor = cnx.cursor(dictionary=True, buffered=True) if True: try: cursor.callproc('GetVariableOptions', [variable_id]) except: cursor_err = cnx.cursor() cnx.commit() cursor_err.close() cnx.close() print("---") exit(1) for i in cursor.stored_results(): options.append(i.fetchall()) cursor.close() cnx.close() return options
def get_params(analysis_id): result, ro_id, survey_id, org_id = select_from_analysis(analysis_id) cnx, now = connect(), currtime() cursor = cnx.cursor(dictionary=True, buffered=True) ro_id = result[0]["ro_id"] try: cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id WHERE ro_vars.ro_id = {current_ro_id}""".format(current_ro_id=ro_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("A variable can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) variables = cursor.fetchall() cursor.close() variable_ids = str(variables[0]["id"]) + "," + str(variables[1]["id"]) cnx.close() return org_id, survey_id, variable_ids
from checkwarns import checkwarns import argparse import json checkwarns() writelog = False if not writelog: log = logging.getLogger() log.setLevel(logging.CRITICAL) test = False cnx = connect() now = currtime() # binumerical correlation def twocorr(x, y): try: corr, p = pearsonr(x, y) except: cursor_err = cnx.cursor() # cursor_err.execute(sperror("Correlation can't be done", analysis_id)) cursor_err.execute(sperror("Correlation can't be done", 1)) cnx.commit() cursor_err.close() cnx.close() exit(1)
def main(org_id, survey_id, variable_ids): if org_id == 0 or survey_id == 0 or variable_ids == "": exit(1) variable_ids = variable_ids.split(",") cnx = connect() now = currtime() data_raw = [] for variable_id in variable_ids: cursor = cnx.cursor(dictionary=True, buffered=True) try: cursor.callproc('GetVariableDataF', [variable_id, org_id, survey_id, 'FALSE', 'NULL']) except: cursor_err = cnx.cursor() # cursor_err.execute(sperror("Variables can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() print("-") exit(1) for i in cursor.stored_results(): data_raw.append(i.fetchall()) cursor.close() data = [] for variable in data_raw: data_points = [] for point in variable: try: to_be_appended = float(point[0]) except: to_be_appended = 0.0 data_points.append(to_be_appended) data.append(data_points) result = [] for i in range(0, len(data)): for j in range(i, len(data)): if i != j: try: corr, p = pearsonr(data[i], data[j]) if np.isnan(corr): corr = 0.00 except: checkprint("ERR_IND") exit(1) result.append({ "v1": variable_ids[i], "v2": variable_ids[j], "c": round(corr, 2) }) result = json.dumps(result) print(result) cnx.close() return result
def regression(regression_id): cnx = connect() now = currtime() # try starting analysis cursor = cnx.cursor(dictionary=True) try: cursor.execute("""UPDATE analysis SET analysis_status = 'running', error = NULL, modified = '{current_time}' WHERE id = {current_id}""".format(current_time=now, current_id=analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Analysis couldn't start", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) cnx.commit() cursor.close() # remove old results cursor = cnx.cursor() try: cursor.execute("""DELETE FROM regression_analysis_results WHERE analysis_id = {current_id}""".format(current_id=analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Can't access or modify analysis results", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) cnx.commit() cursor.close() # select given analysis id cursor = cnx.cursor(dictionary=True) try: cursor.execute("SELECT * FROM analysis WHERE id = {current_id} AND analysis_type = 'REGRESSION'" .format(current_id = analysis_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Could not find analysis in database", analysis_id)) cnx.commit() cursor_err.close() exit(1) result = cursor.fetchall() ro_id = result[0]["ro_id"] survey_id = result[0]["survey_id"] org_id = result[0]["org_id"] cursor.close() # fetch dependent variable cursor = cnx.cursor(dictionary=True, buffered=True) try: cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id WHERE ro_vars.ro_id = {current_ro_id} AND ro_vars.analysis_var_type = 'DV'""" .format(current_ro_id=ro_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("DV can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) dv = cursor.fetchall() cursor.close() checkprint(dv) # fetch independent variables ivids = [] cursor = cnx.cursor(dictionary=True, buffered=True) ro_id = result[0]["ro_id"] try: cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id WHERE ro_vars.ro_id = {current_ro_id} AND ro_vars.analysis_var_type = 'IV'""" .format(current_ro_id=ro_id)) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("IV can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) ivs = cursor.fetchall() cursor.close() for iv in ivs: ivids.append(iv['id']) # get options for categorical independent variables options_iv = [] for variable in ivs: cursor = cnx.cursor(dictionary=True, buffered=True) if True: try: cursor.callproc('GetVariableOptions', [variable["id"]]) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Variable options can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): options_iv.append(i.fetchall()) cursor.close() checkprint(options_iv) # get options for categorical dependent variables options_dv = [] cursor = cnx.cursor(dictionary=True, buffered=True) if True: try: cursor.callproc('GetVariableOptions', [dv[0]["id"]]) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("Variable options can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): options_dv.append(i.fetchall()) cursor.close() # fetch data points for independent variables data_raw_iv = [] for i in range(0, len(ivs)): variable = ivs[i] cursor = cnx.cursor(dictionary=True, buffered=True) try: # cursor.callproc('GetVariableData', [variable["id"], org_id, survey_id]) cursor.callproc('GetVariableDataF', [variable["id"], org_id, survey_id, ro_id, 0, '']) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("IV content can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) for j in cursor.stored_results(): fetched = j.fetchall() # for numerical variables if ivs[i]['subtype'] in ['decimal', 'numeric', 'scalar', 'labeled_scalar']: fetched = [float(x[0]) if x[0] is not None else float(random.gauss(0, 1)) for x in fetched] # for categorical variables elif ivs[i]['subtype'] in ['single_choice', 'multiple_choice', 'text_scalar']: binaries = [] for option_id, option_label in options_iv[i]: binary = [] for point in fetched: try: # binarize each option for each categorical variable if point[0] == option_id: binary.append(1.0) else: binary.append(0.0) except: binary.append(0.0) binaries.append(binary) fetched = binaries else: exit(1) data_raw_iv.append(fetched) cursor.close() # get type of the dependent variable dv_type = dv[0]['subtype'] mulchoopts = [] # fetch data points for dependent variable data_raw_dv = [] for variable in dv: cursor = cnx.cursor(dictionary=True, buffered=True) try: # cursor.callproc('GetVariableData', [variable["id"], org_id, survey_id]) cursor.callproc('GetVariableDataF', [variable["id"], org_id, survey_id, ro_id, 0, '']) except: cursor_err = cnx.cursor() cursor_err.execute(sperror("DV content can't be retrieved", analysis_id)) cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): fetched = i.fetchall() if dv_type in ['decimal', 'numeric', 'scalar', 'labeled_scalar']: fetched = [float(x[0]) if x[0] is not None else float(random.gauss(0, 1)) for x in fetched] data_raw_dv.append(fetched) elif dv_type in ['single_choice', 'text_scalar', 'multiple_choice']: for option_id, option_label in options_dv[0]: mulchoopts.append(option_id) checkprint(mulchoopts) for mulchoopt in mulchoopts: binary = [] for point in fetched: try: # binarize options for categorical variables if point[0] == mulchoopt: binary.append(1.0) else: binary.append(0.0) except: binary.append(0.0) data_raw_dv.append([binary]) else: exit(1) cursor.close() # start the appropriate analysis according to the type of the dependent variable if dv_type in ['decimal', 'numeric', 'scalar', 'labeled_scalar']: if len(ivids) >= len(data_raw_dv[0]): try: cursor_err = cnx.cursor() cursor_err.execute(sperror("ERR_TOO_FEW_DATA", analysis_id)) cnx.commit() cursor_err.close() checkprint("error can be saved") except: checkprint("error can't be saved") cnx.close() exit(1) try: # start linear regression for numerical dv linear_regression(cnx, now, regression_id, ivs, ivids, dv, data_raw_iv, data_raw_dv, options_iv) except: exit(1) elif dv_type in ['single_choice', 'text_scalar', 'multiple_choice']: if len(ivids) >= len(data_raw_dv[0][0]): try: cursor_err = cnx.cursor() cursor_err.execute(sperror("ERR_TOO_FEW_DATA", analysis_id)) cnx.commit() cursor_err.close() checkprint("error can be saved") except: checkprint("error can't be saved") cnx.close() exit(1) for i in range(0, len(mulchoopts)): # start multiple regression for each option of categorical dv multi_regression(cnx, now, regression_id, ivs, ivids, dv, data_raw_iv, data_raw_dv[i], options_iv, options_dv, mulchoopts[i]) else: cursor_err = cnx.cursor() cursor_err.execute(sperror("Unknown DV type", analysis_id)) cnx.commit() cursor_err.close() cnx.commit() exit(1) try: cnx.close() except: exit(1)
def getdata(org_id, survey_id, variable_ids): if org_id == 0 or survey_id == 0 or variable_ids == "": exit(1) variable_ids = variable_ids.split(",") # checkprint(variable_ids) cnx = connect() now = currtime() # checkprint("start") counter = 0 data_raw = [] for variable_id in variable_ids: if not int(variable_id) == 2846: continue print(counter) counter += 1 cursor = cnx.cursor(dictionary=True, buffered=True) try: cursor.callproc('GetVariableData', [variable_id, org_id, survey_id]) except: cursor_err = cnx.cursor() cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): fetched = i.fetchall() print(fetched) fetched = [('0', ) if element[0] is None else element for element in fetched] data_raw.append(fetched) cursor.close() exit(1) options = [] for variable_id in variable_ids: print(counter) counter += 1 cursor = cnx.cursor(dictionary=True, buffered=True) if True: try: cursor.callproc('GetVariableOptions', [variable_id]) except: cursor_err = cnx.cursor() cnx.commit() cursor_err.close() cnx.close() exit(1) for i in cursor.stored_results(): options.append(i.fetchall()) cursor.close() # checkprint(options) cnx.close() # for variable in data_raw: # checkprint(variable) # checkprint(options) return data_raw, options