def main(): filename = "stars_data.csv" data = a.read_data(filename) data.pop(0) random.shuffle(data) frequency = a.frequency_word(data) data_neg = [x for x in data if int(x[6]) == 1] data_pos = [x for x in data if int(x[6]) == 5] matrix_pos = np.zeros((2000,2500)) matrix_neg = np.zeros((2000,2500)) matrix_pos = cluster.create_matrix(matrix_pos,data_pos,frequency) matrix_neg = cluster.create_matrix(matrix_neg,data_neg,frequency) kmeans_feature = cluster.kmeans_bin(data,matrix_pos,matrix_neg,frequency,50) smeans_feature = cluster.smeans_bin(data,matrix_pos,matrix_neg,frequency,50) origin_feature = a.create_binary_feature(data,frequency,6) sample_origin_feature = a.create_binary_feature(data,random.sample(frequency,100),6) combine_feature = combine(kmeans_feature,sample_origin_feature) print "Test1" test1(matrix_pos,matrix_neg) print "Test2" test2(kmeans_feature,smeans_feature) print "Test3" test3(origin_feature,kmeans_feature) print "Test4" test4(sample_origin_feature,kmeans_feature,combine_feature)
def main(args): #take argument trainfile = args[1] testfile = args[2] classlabel = int(args[3]) printWord = int(args[4]) #set train file an dtest file train = a.read_data(trainfile) test = a.read_data(testfile) #get top 2000 frequency fre = a.frequency(train) #if yes, print Words if (printWord == 1): a.printTopwords(fre) #create binary feature for boss data train = a.create_binary_feature(train,fre,classlabel) test = a.create_binary_feature(test,fre,classlabel) #get probability table based on train data prob_table,pYes,pNo = a.train_nbc(train) #use probability table for testing,and return result result = a.test_nbc(prob_table,test,pYes,pNo) #get test class label classlabel = [x[-1] for x in test] #use zero one difference figure out result diff = a.zero_onr_loss(result,classlabel) print "ZERO-ONE-LOSS {0}".format(diff)
def main(): #data preprocessing filename = "stars_data.csv" data = a.read_data(filename) data.pop(0) random.shuffle(data) words = a.frequency_word(data) features = a.create_binary_feature(data,words,6) words.append("isPositive") words.append("isNegative") minsupport = 0.03 minconf = 3.81 L,support_count = apriori.frequentItemsetGeneration(features,words,minsupport) print len(L[0]) + len(L[1]) + len(L[2]) rules = ruleG(L,support_count,minconf) print len(rules) rules = sorted(rules.items(),key=operator.itemgetter(1),reverse= True) rules = [rules[i] for i in range(30)] for rule in rules: print rule
def A_Cabins(filepath='Data_Input\cabins.csv'): """ -------------------------------------------------------------------------- Calculate cabin areas -------------------------------------------------------------------------- Input: N_Trailers - int, number of trailers [-] L_Trailer - float, typical trailer length [m], default=12 N_Cars - int, number of cars [-] L_Car - float, typical car length [m], default=4.5 -------------------------------------------------------------------------- Output: Trailer_Lane_Meters - int, length of trailer lanes [m] Car_Lane_Meters - int, length of car lanes [m] -------------------------------------------------------------------------- """ #Read cabin data dt = dpro.read_data(filepath=filepath, print_stats=False) #Manipulate the data dt['N_Berths'] = dt['N_Cabins'] * dt['Berths'] dt['A_Cabins'] = dt['N_Cabins'] * dt['Area'] A_Pas_Cabins = dt['A_Cabins'].sum() return A_Pas_Cabins
def main(): #data preprocessing filename = "stars_data.csv" data = a.read_data(filename) data.pop(0) random.shuffle(data) words = a.frequency_word(data) features = a.create_binary_feature(data,words,6) words.append("isPositive") words.append("isNegative") minsupport = 0.03 minconf = 0.25 D = construct(features,words) D = map(set, D) t = [] t.append(frozenset(['friendly'])) t.append(frozenset(['isPositive'])) t.append(frozenset(['staff'])) t.append(frozenset(['favorite'])) q2(D,t) '''
start_time = time.time() #%% Set global parameters setfilters = True #Set argument for running the filter UpdateReport = False #Update the report once the code has executed MergeDatabase = False #Update the database file from the database #%% Read and analyze the data #Merge the database files if MergeDatabase: dp.merge_database(path='Ship_Database_Raw_Archives/', ShipType='-', ShipTypeTags='-') #Read dataframe - database ShipData = dp.read_data(print_summary=True) #Aplly missing data fill filters fill_filters = [['MCR', 85], ['L_LaneTrailer', 0], ['L_LaneTrailer(or)', 0], ['L_LaneCar', 0], ['L_LaneCar(or)', 0], ['L_TrainTrack', 0], ['N_Trailers', 0], ['N_Trailers(or)', 0], ['N_Cars', 0], ['N_Cars(or)', 0], ['L_Car', 4.5], ['L_Trailer', 12], ['L_TrainTrack', 0], ['LNG Tanks', 0]] ShipData = dp.batch_fill_data(ShipData, fill_filters) #Process the data #ADD: Analyse the data for pass and vehicle area vs no decks ShipData = dp.data_process(ShipData) #Filter the data if setfilters: