Beispiel #1
0
def generate_scores():
	"""
	Description: Build a scoring system based on the customer state.

	"""
	states = ['IN', 'NY', 'PA', 'WV', 'MO', 'OH', 'OK', 'FL', 'OR', 'WA', 'KS',
       'NV', 'ID', 'CO', 'CT', 'AL', 'AR', 'NM', 'MS', 'MD', 'RI', 'UT',
       'ME', 'TN', 'WI', 'MT', 'KY', 'WY', 'NE', 'ND', 'DE', 'GA', 'NH',
       'IA', 'DC', 'SD']
	score_data = "../data/train.csv"
	data = ProcessData()
	df_score = data.getData(score_data)
	df_score = data.cleanData(df_score)

	cust_score = {}
	db.scores.remove()
	for state in states:
		random.seed(9)
		fscore = random.random()
		f_state_weight = fscore * len(df_score[df_score.state == state])/len(df_score)

		random.seed(10)
		tscore = random.random()
		t_state_weight = tscore * len(df_score[df_score.state == state])/len(df_score)
		cust_score = {'state': state, 'fscore' : f_state_weight, 'tscore': t_state_weight }
		db.scores.save(cust_score)

	return
def customerAnalytics():
    """
	Description: Purchase prediction based on the quote transactions and scores generated 
	from social media like facebook and twitter. Since customer sentiment is derived from the number of followers
	from that state. Keeping the scores at level at the moment due to the inability to compute much more detailed metrics.
	"""
    train_data = "../data/train.csv"
    test_data = "../data/test_v2.csv"

    data = ProcessData()
    df_train = data.get_data(train_data)
    df_train = data.clean_data(df_train)

    X, y = data.featurize_data(df_train, db)
    del df_train  # memory optimization

    #Build model and run validation

    clf = RandomForestClassifier(verbose=10,
                                 n_estimators=10,
                                 n_jobs=-1,
                                 max_features=5)
    model = ModelValidation()
    baselineclf = model.get_score(clf, X, y)
    """
	Build a pickle for web app to start the purchase prediction ( compute conversion scores)

	"""
    cp.dump(clf, open('predict-purchase', "wb"))
def customerAnalytics():
	"""
	Description: Purchase prediction based on the quote transactions and scores generated 
	from social media like facebook and twitter. Since customer sentiment is derived from the number of followers
	from that state. Keeping the scores at level at the moment due to the inability to compute much more detailed metrics.
	"""
	train_data = "../data/train.csv"
	test_data = "../data/test_v2.csv"

	data = ProcessData()
	df_train = data.get_data(train_data)
	df_train = data.clean_data(df_train)

	X,y= data.featurize_data(df_train, db)
	del df_train # memory optimization


	#Build model and run validation

	clf = RandomForestClassifier(verbose=10, n_estimators=10, n_jobs=-1, max_features=5)
	model = ModelValidation()
	baselineclf = model.get_score(clf,X,y)

	"""
	Build a pickle for web app to start the purchase prediction ( compute conversion scores)

	"""
	cp.dump(clf, open( 'predict-purchase', "wb"))
    def __init__(self):
        rec_data = "../data/train.csv"
        data = ProcessData()
        df_rec = data.get_data(rec_data)
        df_rec = data.clean_data(df_rec)
        df_rec = df_rec[df_rec.record_type == 1]
        sf = SFrame(data=df_rec)
        del df_rec  # memory optimization

        self.modelA = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="A")
        self.modelB = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="B")
        self.modelC = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="C")
        self.modelD = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="D")
        self.modelE = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="E")
        self.modelF = recommender.create(sf,
                                         user_column="customer_ID",
                                         item_column="F")
    def __init__(self):
        rec_data = "../data/train.csv"
        data = ProcessData()
        df_rec = data.get_data(rec_data)
        df_rec = data.clean_data(df_rec)
        df_rec = df_rec[df_rec.record_type == 1]
        sf = SFrame(data=df_rec)
        del df_rec  # memory optimization

        self.modelA = recommender.create(sf, user_column="customer_ID", item_column="A")
        self.modelB = recommender.create(sf, user_column="customer_ID", item_column="B")
        self.modelC = recommender.create(sf, user_column="customer_ID", item_column="C")
        self.modelD = recommender.create(sf, user_column="customer_ID", item_column="D")
        self.modelE = recommender.create(sf, user_column="customer_ID", item_column="E")
        self.modelF = recommender.create(sf, user_column="customer_ID", item_column="F")
Beispiel #6
0
def main():
	# Read all 3 files and generate their handlers
	plagObject = PlagiarismHandler('AlgorithmsP1-plagiarism-final.xlsx')
	enrolmentObject = EnrolmentHandler('Algo P1 -final.xlsx')    # Also, add these files into git repo
	contestDataObject = ContestHandler('AlgorithmsP1-result-final.xlsx')

	# Process 3 files to create reports
	dataHandlerObject = ProcessData(plagObject, enrolmentObject, contestDataObject)

	# Get reports from processor and make reports
	PostProcessHandler(dataHandlerObject)
def main():
	# -----------------------------------------------------------
	# parse command line args
	# -----------------------------------------------------------
	parser = argparse.ArgumentParser()
	parser.add_argument("--portname", help="for realtime: name of portname (com* for windows or /dev/tty.usbmodem* for mac)",
	                    type=str)
	parser.add_argument("--datafile", help="for offline: name of sensor data file to plot3d from",
	                    type=str)

	parser.add_argument("--output", help="save data to OUTPUT",
                    type=str)
	args = parser.parse_args()

	out_file = ''
	if args.output:
		out_file = args.output

	#strPort = '/dev/tty.usbserial-A7006Yqh'
	strPort = args.portname;

	# initialize raw data processing 
	sensor = ProcessData()

	if (not args.portname) and (not args.datafile):
		print 'please specify either portname of data file (see python plotData.py -h for usage'
		exit(1)
	if (args.portname) and (args.datafile):
		print 'you cant do both my dear'
		exit(1)


	print 'plotting data...'

	# -----------------------------------------------------------
	# 2D plotting
	# -----------------------------------------------------------
	# open serial port
	if strPort:
		ser = serial.Serial(port = strPort, baudrate = 9600, timeout= 2) 
		line = ser.readline()   # this is NEEDED before writing tx 
		nbytes = ser.write("tx".encode('ascii'))

		polar = PolarPlot()
		lines = []
		while True:
			try:
				if ser.readable(): 
					line = ser.readline()
					# print line 
					data = [float(val) for val in line.split(',')]
					# print data
					if(len(data)==9):
						(pitch, roll, yaw) = sensor.process(data[3:len(data)])
						signalStrength = data[0]
						# if flag:
						# 	angles.add([pitch, roll, yaw])
						# 	anglesPlot.update(angles)
						# else:
						polar.update(signalStrength, yaw)
						if out_file:
							fileline = ' '.join([str(val) for val in data])
							lines.append(fileline)
			except ValueError:
				if not line: 
					userInput = raw_input('receive data again? or else exit (y/n)? ')
					if userInput.lower() == 'y':
						# polar.xdata = []
						# polar.ydata = []
						nbytes = ser.write("tx".encode('ascii'))
					else:
						print 'exiting loop'
						break
				else: 
					print 'bad data', line
			except KeyboardInterrupt:
				print 'exiting'
				break

	# -----DEBUG CODE------------
		# polar = PolarPlot()
		# lines = []
		# with open('data2.txt') as f:
		# 	for line in f: 
		# 		# print line 
		# 		try:
		# 			data = [float(val) for val in line.split(',')]
		# 			# print data
		# 			if(len(data)==9):
		# 				(pitch, roll, yaw) = sensor.process(data[3:len(data)])
		# 				print 3*'%-10.2f' %(pitch,roll,yaw)
		# 				signalStrength = data[0]
		# 				# if flag:
		# 				# 	angles.add([pitch, roll, yaw])
		# 				# 	anglesPlot.update(angles)
		# 				# else:
		# 				polar.update(signalStrength, yaw)
		# 				if out_file:
		# 					fileline = ' '.join([str(val) for val in data])
		# 					lines.append(fileline)
		# 		except ValueError:
		# 			pass
	# ---END DEBUG CODE----------



	# save data to output is user specifies output args 
	if out_file:
		lines = '\n'.join(lines)
		with open(out_file, 'w') as f:
			f.writelines(lines)

	# -----------------------------------------------------------
	# 3D plotting
	# -----------------------------------------------------------
	if args.datafile:
		polar = Polar3D()
		sensor = ProcessData()
		in_file = args.datafile
		with open(in_file) as in_f:
			for line in in_f: 
				try:
					data = [float(val) for val in line.split(',')]
					if(len(data)==9):
						(pitch, roll, yaw) = sensor.process(data[3:len(data)])
						signalStrength = data[0]
						polar.update(signalStrength, pitch, roll, yaw)
				except ValueError:
					print 'bogus data', line 
				except (KeyboardInterrupt,SystemExit):
					print 'exiting'
					# plt.close('all')
					break

	plt.show(block=True)



	# close serial
	if strPort:
		ser.flush()
		ser.close()