Example #1
0
def theta_labeling(data):
    database = analysis.make_twoD(data, start, stop, 128, 80)
    Th = []
    for i in range(len(database)):
        RI = analysis.R_I(data, i)
        R = RI[0]
        I = RI[1]
        if analysis.dots(database[i]) >= 85:
            error = analysis.for_error(R, I)
            if error >= 0.85 and error <= 1:
                print(i, error)
                theta = analysis.ura(R, I)
            else:
                theta = analysis.circle_fit(R, I)[1]
            Th.append(theta)
        else:
            Th.append(0)
    angle = analysis.scatter(Th, 10)
    plt.plot(angle[0], angle[1])
    plt.show()
    Th_label = []
    for i in Th:
        if i >= 220:
            Th_label.append(1)
        else:
            Th_label.append(0)
    return Th_label
Example #2
0
def showcorr(vartype=None):
    try:
        sessionid = session["id"]
        user_sess = UserSession.query.filter(UserSession.id == sessionid).first()
        pid = user_sess.pid
        aid = user_sess.aid
        p = Project.query.filter(Project.id == pid).first()
        vartype = str(vartype)
        if request.form is not None and "vartype" in request.form:
            vartype = request.form["vartype"]
        if vartype == "regress":
            if "rcont" not in session.keys():
                rcont = list(set(request.form.getlist("variables")))
                """UserSession.query.filter_by(id=sessionid).update({"rcont":rcont})
                db_session.commit()"""
                session["rcont"] = rcont
            return redirect(url_for("regress"))
        elif vartype == "output":
            corrvars = session["ovars"]
        elif vartype == "input":
            if "rout" not in session.keys():
                rout = list(set(request.form.getlist("variables")))
                """UserSession.query.filter_by(id=sessionid).update({"rout":rout})
                db_session.commit()"""
                session["rout"] = rout
            corrvars = session["ivars"]
        else:
            if "rinp" not in session.keys():
                rinp = list(set(request.form.getlist("variables")))
                """UserSession.query.filter_by(id=sessionid).update({"rinp":rinp})
                corrvars=user_sess.control"""
                session["rinp"] = rinp
            corrvars = session["cvars"]
            # ivars=user_sess.input
            # ivars=session["ivars"]
            ivars = session["ivars"]
        pid = user_sess.pid
        # csvf=user_sess.csvf
        csvf = data[pid]
        params = []
        nocor = []
        cors = []
        variables = []
        count = 0
        plots = []
        pltpath = app.config["PLOTPATH"] + "/" + vartype + "/scatter"
        # Plotting the scatterplots
        i = 0
        skipPlot = False
        if vartype == "control":
            combos = itertools.combinations(corrvars + ivars, 2)
        else:
            if len(corrvars) == 1:
                skipPlot = True
            else:
                combos = itertools.combinations(corrvars, 2)

        if skipPlot is False:
            combos = itertools.combinations(corrvars, 2)
            for combo in combos:
                # Redundancy removal for input variables in control correlations
                if vartype == "control" and combo[0] in ivars and combo[1] in ivars:
                    continue
                x = csvf[combo[0]].fillna(0)
                # x=csvf[combo[0]].replace('',0)
                y = csvf[combo[1]].fillna(0)
                corr = np.corrcoef(x, y)[0][1]
                corr = round(corr, 2)
                if corr >= 0.70:
                    pltfile = analysis.scatter(x, y, count, combo[0], combo[1], pltpath, vartype, corr)
                    filepath = "../static/images/plots/" + vartype + "/" + pltfile
                    # Different path for accessing images through python files versus html files
                    session["plots"].append(filepath[2:])
                    count += 1
                    # params.append((filepath,corr,combo[0],combo[1]))
                    params.append((filepath, corr))
                    cors.append(combo[0])
                    cors.append(combo[1])
                else:
                    # create list of uncorrelated variables and pass it to vars
                    if vartype == "control":
                        if combo[0] not in ivars:
                            nocor.append(combo[0])
                        if combo[1] not in ivars:
                            nocor.append(combo[1])
                    else:
                        nocor.append(combo[0])
                        nocor.append(combo[1])
            """UserSession.query.filter_by(id=sessionid).update({"plots":plots})
            db_session.commit()"""

            cors = list(set(cors))
            nocor = list(set(nocor))
            nocor = [item for item in nocor if item not in cors]
            variables.append(cors)
            variables.append(nocor)
            if count == 0:
                msg = "none"
            else:
                msg = "corr"
            if len(params) > 3:
                height = str(int(len(params) / 3) * 500) + "px"
            else:
                height = "500px"
            return render_template(
                "scatter.html", params=params, vars=variables, vartype=vartype, msg=msg, height=height
            )
        else:
            if vartype == "output":
                rout = session["ovars"]
                # UserSession.query.filter_by(id=sessionid).update({"rout":rout})
                session["rout"] = rout
                vartype = "input"
            elif vartype == "input":
                rinp = session["ivars"]
                # UserSession.query.filter_by(id=sessionid).update({"rinp":rinp})
                session["rinp"] = rinp
                vartype = "control"
            elif vartype == "control":
                rcnt = session["cvars"]
                # UserSession.query.filter_by(id=sessionid).update({"rcont":rcnt})
                session["rcont"] = rcont
                vartype == "regress"
            else:
                return redirect(url_for("regress"))
            return redirect(url_for("showcorr", vartype=vartype))
    except Exception as e:
        app.logger.exception(traceback.format_exc())
        flash("Sorry, an internal error occurred.")
Example #3
0
def showcorr(vartype=None):
	try:
		print "session Dictionary",session
		vartype=str(vartype)
		if request.form is not None and "vartype" in request.form:
			vartype=request.form["vartype"]
		if vartype=="regress":
			if "rcont" not in session:
				session["rcont"]=list(set(request.form.getlist('variables')))
			return redirect(url_for("regress"))
		elif vartype=="output":
			corrvars=session['output']
		elif vartype=="input":
			if "rout" not in session:
				session["rout"]=list(set(request.form.getlist('variables')))
			corrvars=session["input"]
		else:
			if "rinp" not in session:
				session["rinp"]=list(set(request.form.getlist('variables')))
			corrvars=session["control"]
			ivars=session["input"]
		pid=session["pid"]
		csvf=data[pid]
		params=[]	
		nocor=[]
		cors=[]
		count=0
		plots=[]
		pltpath=app.config['PLOTPATH']+'/'+vartype+'/scatter'
		#Plotting the scatterplots
		i=0
		skipPlot=False
		if vartype=="control":
			combos=itertools.combinations(corrvars+ivars,2)
		else:
			if len(corrvars)==1:
				skipPlot=True;
			else:
				combos=itertools.combinations(corrvars,2)
			
		if skipPlot is False:	
			combos=itertools.combinations(corrvars,2)
			for combo in combos:
				#Redundancy removal for input variables in control correlations
				if vartype=="control" and combo[0] in ivars and combo[1] in ivars:
					continue
				x=csvf[combo[0]].fillna(0)
				#x=csvf[combo[0]].replace('',0)
				y=csvf[combo[1]].fillna(0)
				corr=np.corrcoef(x,y)[0][1]
				corr=round(corr,2)
				if corr>=0.70:
					pltfile=analysis.scatter(x,y,count,combo[0],combo[1],pltpath,vartype,corr)
					filepath='../static/images/plots/'+vartype+'/'+pltfile
					#Different path for accessing images through python files versus html files
					session["plots"].append(filepath[2:])
					count+=1
					params.append((filepath,corr,combo[0],combo[1]))
					cors.append(combo[0])
					cors.append(combo[1])
				else:
					#create list of uncorrelated variables and pass it to vars
					if vartype=="control":
						if combo[0] not in ivars:
							nocor.append(combo[0])
						if combo[1] not in ivars:
							nocor.append(combo[1])
						cors=list(set(cors))
						nocor=list(set(nocor))
						nocor=[item for item in nocor if item not in cors]
			if count==0:
				msg="none"
			else:
				msg="corr"
			return render_template("scatter.html",params=params,vars=nocor,vartype=vartype,msg=msg)
		else:
			if vartype=="output":
				session["rout"]=session["output"]
				vartype="input"
			elif vartype=="input":
				session["rinp"]=session["input"]
				vartype="control"
			elif vartype=="control":
				session["rcont"]=session["control"]
				vartype=="regress"
			else: 
				return redirect(url_for("regress"))
			return redirect(url_for("showcorr",vartype=vartype))
	except Exception as e:
		app.logger.exception(traceback.format_exc())
		flash('Sorry, an internal error occurred.')
Example #4
0
def main(context):
    """Main function takes a Spark SQL context."""

    # --- User defined functions ---
    try:
        print("Attempting to load full dataset...")
        tags = ["demP", "demN", "gopP", "gopN", "djtP", "djtN"]
        df_full = run_full(0,context,0,0,0, True)

        df_sub = read_submission(context)
    except:
        sanitize = udf(sanitizeX, ArrayType(StringType()))

        # --- Read Files --- #
        print("Loading Files")
        df_comm = read_comments_minimal(context)
        df_sub = read_submission(context)
        df_lab = read_csv("data/labeled_data.csv", context)

        # # # --- Retrieving labeled comments --- #
        print("Retrieving labeled comments")
        df_c_lab = retrieve_labeled_comments(df_comm, df_lab, context)

        # # # --- Clean --- #
        print("Sanitizing labeled")
        df_clean = clean_df(df_c_lab, sanitize, context)

        # # --- Vectorize --- #
        print("Vectorizing labeled")
        df_vector, CVmodel, count_v = createCV(df_clean, context)

        # # --- Binary Labeling --- #
        print("Setting binary labels")
        df_labeled_training = binary_label(df_vector, context, name="training_labeled")

        # # --- Regression Training --- #
        print("Training Regression model")
        tags = ["demP", "demN", "gopP", "gopN", "djtP", "djtN"]
        models = {}
        for t in tags:
            models[t] = spark_regression.regression(df_labeled_training, t, t)

        # --- Run on full file --- #
        print("Running on full set")
        df_full = run_full(df_comm, context, sanitize, CVmodel, models)

    # Top stories and map data over time
    for tag in tags:
        analysis.top_stories(df_full, df_sub, context, tag)
        analysis.top_stories(df_full, df_sub, context, tag, 10)
        map_wrap_to_pandas(df_full, context, tag)

    # Scatter, sentiment, map data
    for t in [["demP", "demN"], ["gopP", "gopN"], ["djtP", "djtN"]]:
        map_wrap_to_pandas(df_full, context, t[0], t[1])
        analysis.sentiment_over_time(df_full, context, t[0], t[1])
        analysis.scatter(df_full, df_sub, context, t[0], t[1], 1)
        analysis.scatter(df_full, df_sub, context, t[0], t[1], 100)

    # Total Republican Scatter
    analysis.total_scatter(df_full, df_sub, context)