def generate_scores(): """ Description: Build a scoring system based on the customer state. """ states = ['IN', 'NY', 'PA', 'WV', 'MO', 'OH', 'OK', 'FL', 'OR', 'WA', 'KS', 'NV', 'ID', 'CO', 'CT', 'AL', 'AR', 'NM', 'MS', 'MD', 'RI', 'UT', 'ME', 'TN', 'WI', 'MT', 'KY', 'WY', 'NE', 'ND', 'DE', 'GA', 'NH', 'IA', 'DC', 'SD'] score_data = "../data/train.csv" data = ProcessData() df_score = data.getData(score_data) df_score = data.cleanData(df_score) cust_score = {} db.scores.remove() for state in states: random.seed(9) fscore = random.random() f_state_weight = fscore * len(df_score[df_score.state == state])/len(df_score) random.seed(10) tscore = random.random() t_state_weight = tscore * len(df_score[df_score.state == state])/len(df_score) cust_score = {'state': state, 'fscore' : f_state_weight, 'tscore': t_state_weight } db.scores.save(cust_score) return
def customerAnalytics(): """ Description: Purchase prediction based on the quote transactions and scores generated from social media like facebook and twitter. Since customer sentiment is derived from the number of followers from that state. Keeping the scores at level at the moment due to the inability to compute much more detailed metrics. """ train_data = "../data/train.csv" test_data = "../data/test_v2.csv" data = ProcessData() df_train = data.get_data(train_data) df_train = data.clean_data(df_train) X, y = data.featurize_data(df_train, db) del df_train # memory optimization #Build model and run validation clf = RandomForestClassifier(verbose=10, n_estimators=10, n_jobs=-1, max_features=5) model = ModelValidation() baselineclf = model.get_score(clf, X, y) """ Build a pickle for web app to start the purchase prediction ( compute conversion scores) """ cp.dump(clf, open('predict-purchase', "wb"))
def customerAnalytics(): """ Description: Purchase prediction based on the quote transactions and scores generated from social media like facebook and twitter. Since customer sentiment is derived from the number of followers from that state. Keeping the scores at level at the moment due to the inability to compute much more detailed metrics. """ train_data = "../data/train.csv" test_data = "../data/test_v2.csv" data = ProcessData() df_train = data.get_data(train_data) df_train = data.clean_data(df_train) X,y= data.featurize_data(df_train, db) del df_train # memory optimization #Build model and run validation clf = RandomForestClassifier(verbose=10, n_estimators=10, n_jobs=-1, max_features=5) model = ModelValidation() baselineclf = model.get_score(clf,X,y) """ Build a pickle for web app to start the purchase prediction ( compute conversion scores) """ cp.dump(clf, open( 'predict-purchase', "wb"))
def __init__(self): rec_data = "../data/train.csv" data = ProcessData() df_rec = data.get_data(rec_data) df_rec = data.clean_data(df_rec) df_rec = df_rec[df_rec.record_type == 1] sf = SFrame(data=df_rec) del df_rec # memory optimization self.modelA = recommender.create(sf, user_column="customer_ID", item_column="A") self.modelB = recommender.create(sf, user_column="customer_ID", item_column="B") self.modelC = recommender.create(sf, user_column="customer_ID", item_column="C") self.modelD = recommender.create(sf, user_column="customer_ID", item_column="D") self.modelE = recommender.create(sf, user_column="customer_ID", item_column="E") self.modelF = recommender.create(sf, user_column="customer_ID", item_column="F")
def main(): # Read all 3 files and generate their handlers plagObject = PlagiarismHandler('AlgorithmsP1-plagiarism-final.xlsx') enrolmentObject = EnrolmentHandler('Algo P1 -final.xlsx') # Also, add these files into git repo contestDataObject = ContestHandler('AlgorithmsP1-result-final.xlsx') # Process 3 files to create reports dataHandlerObject = ProcessData(plagObject, enrolmentObject, contestDataObject) # Get reports from processor and make reports PostProcessHandler(dataHandlerObject)
def main(): # ----------------------------------------------------------- # parse command line args # ----------------------------------------------------------- parser = argparse.ArgumentParser() parser.add_argument("--portname", help="for realtime: name of portname (com* for windows or /dev/tty.usbmodem* for mac)", type=str) parser.add_argument("--datafile", help="for offline: name of sensor data file to plot3d from", type=str) parser.add_argument("--output", help="save data to OUTPUT", type=str) args = parser.parse_args() out_file = '' if args.output: out_file = args.output #strPort = '/dev/tty.usbserial-A7006Yqh' strPort = args.portname; # initialize raw data processing sensor = ProcessData() if (not args.portname) and (not args.datafile): print 'please specify either portname of data file (see python plotData.py -h for usage' exit(1) if (args.portname) and (args.datafile): print 'you cant do both my dear' exit(1) print 'plotting data...' # ----------------------------------------------------------- # 2D plotting # ----------------------------------------------------------- # open serial port if strPort: ser = serial.Serial(port = strPort, baudrate = 9600, timeout= 2) line = ser.readline() # this is NEEDED before writing tx nbytes = ser.write("tx".encode('ascii')) polar = PolarPlot() lines = [] while True: try: if ser.readable(): line = ser.readline() # print line data = [float(val) for val in line.split(',')] # print data if(len(data)==9): (pitch, roll, yaw) = sensor.process(data[3:len(data)]) signalStrength = data[0] # if flag: # angles.add([pitch, roll, yaw]) # anglesPlot.update(angles) # else: polar.update(signalStrength, yaw) if out_file: fileline = ' '.join([str(val) for val in data]) lines.append(fileline) except ValueError: if not line: userInput = raw_input('receive data again? or else exit (y/n)? ') if userInput.lower() == 'y': # polar.xdata = [] # polar.ydata = [] nbytes = ser.write("tx".encode('ascii')) else: print 'exiting loop' break else: print 'bad data', line except KeyboardInterrupt: print 'exiting' break # -----DEBUG CODE------------ # polar = PolarPlot() # lines = [] # with open('data2.txt') as f: # for line in f: # # print line # try: # data = [float(val) for val in line.split(',')] # # print data # if(len(data)==9): # (pitch, roll, yaw) = sensor.process(data[3:len(data)]) # print 3*'%-10.2f' %(pitch,roll,yaw) # signalStrength = data[0] # # if flag: # # angles.add([pitch, roll, yaw]) # # anglesPlot.update(angles) # # else: # polar.update(signalStrength, yaw) # if out_file: # fileline = ' '.join([str(val) for val in data]) # lines.append(fileline) # except ValueError: # pass # ---END DEBUG CODE---------- # save data to output is user specifies output args if out_file: lines = '\n'.join(lines) with open(out_file, 'w') as f: f.writelines(lines) # ----------------------------------------------------------- # 3D plotting # ----------------------------------------------------------- if args.datafile: polar = Polar3D() sensor = ProcessData() in_file = args.datafile with open(in_file) as in_f: for line in in_f: try: data = [float(val) for val in line.split(',')] if(len(data)==9): (pitch, roll, yaw) = sensor.process(data[3:len(data)]) signalStrength = data[0] polar.update(signalStrength, pitch, roll, yaw) except ValueError: print 'bogus data', line except (KeyboardInterrupt,SystemExit): print 'exiting' # plt.close('all') break plt.show(block=True) # close serial if strPort: ser.flush() ser.close()