def remove_points(table): f = open('output.txt', 'a') #sys.stdout = f error = {} start_time = '' end_time = '' ts_list = [] sql = "SELECT timestamp, error from {}".format(table) print(sql) db = connect_db() c = db.cursor() c.execute(sql) rows = [list(i) for i in list(c.fetchall())] # print(type(rows[0][0])) indeces = [] for i in range(len(rows)): if rows[i][1] == 1: indeces.append(i) error[rows[i][0]] = rows[i][1] #print(sorted(error.items())) error_sorted = collections.OrderedDict(sorted(error.items())) for key, value in error_sorted.iteritems(): ts_list.append(key) print(ts_list) dataset = get_data(db, table) for i in range(len(ts_list)-1): j = i+1 timeDiff = ts_list[j] - ts_list[i] # print("Time before next error occurs : {} days : {} hours : {} minutes".format(timeDiff.days, # timeDiff.seconds/3600, (timeDiff.seconds/60)%60)) hours = timeDiff.seconds/3600 if hours >= 7: print("Time before next error occurs : {} days : {} hours : {} minutes".format(timeDiff.days, timeDiff.seconds/3600, (timeDiff.seconds/60)%60)) print("Next error occurs at : {} and today's date : {}".format(ts_list[j], ts_list[i].date())) print("Conserving 6hr of data before an error occurs, we can remove {} hours of data points".format(timeDiff - datetime.timedelta(hours=6))) print("\n") # choice = raw_input("Enter choice of method (1/2)") choice = '1' if choice == '1': dataset = method1(ts_list[i], ts_list[j], dataset) print("Final length of dataset is : ",len(dataset)) table = 'balanced_manual_new_small_ds1_tse_temporal_lookback4_m1_29_04' elif choice == '2': dataset = method2(ts_list[i], dataset) print("Final length of dataset is : ",len(dataset)) table = 'balanced_manual_new_small_ds1_tse_temporal_lookback4_method2' print type(dataset) ll_dataset = dict_to_list(dataset) ts = [] for i in range(len(ll_dataset)): ts.append(ll_dataset[i][0].date()) print "Length of ts list :",len(set(ts)) push_to_db(ll_dataset, table) ds = list(set(ts)) start(table, ds)
def main(): # option1 = raw_input("Enter your choice on how to balance dataset.:\nEnter 1 for Over-Sampling\nEnter 2 for Under-Sampling\n") option1 = 1 option1 = int(option1) if option1 == 1: category = 'over_sampling' print "The following Algorighm will be used for over-sampling :" print "1 : Random Over Sampler" print "2 : SMOTE" print "3 : SMOTE-Boderline 1" print "4 : SMOTE-Boderline 2" print "5 : SMOTE-SVM" print "6 : SMOTE-Tomek Links" print "7 - SMOTE-ENN" print "8 : EasyEnsemble" print "9 : BalanceCascade" choice = raw_input("Enter your choice : ") #choice = randint(1,4) choice = int(choice) if choice == 1: algorithm = 'random_over_sampling' elif choice == 2: algorithm = 'smote' elif choice == 3: algorithm = 'smote_boderline1' elif choice == 4: algorithm = 'smote_boderline2' elif choice == 5: algorithm = 'smote_svm' elif choice == 6: algorithm = 'smote_tomek_links' elif choice == 7: algorithm = 'smote_enn' elif choice == 8: algorithm = 'easy_ensemble' elif choice == 9: algorithm = 'balance_cascade' elif option1 == 2: category = 'under_sampling' print "The following Algorighm will be used for under-sampling :" print "1 : Random Under Sampler" print "2 : Tomek links" print "3 : Clustering centroids" print "4 : NearMiss-1" print "5 : NearMiss-2" print "6 : NearMiss-3" print "7 : Condensed Nearest Neighbour" print "8 : One-Sided Selection" print "9 : Neighboorhood Cleaning Rule" # choice = raw_input("Enter your choice : ") choice = randint(1,9) choice = int(choice) if choice == 1: algorithm = 'under_sampling' elif choice == 2: algorithm = 'tomek_links' elif choice == 3: algorithm = 'clustering_centroids' elif choice == 4: algorithm = 'near_miss1' elif choice == 5: algorithm = 'near_miss2' elif choice == 6: algorithm = 'near_miss3' elif choice == 7: algorithm = 'condensed_nn' elif choice == 8: algorithm = 'one_sided_selection' elif choice == 9: algorithm = 'neighbourhood_cleaning_rule' tx, ty, ts_epoch, response = balanceIt(category, algorithm) new_ds = [None] * len(tx) print "Length of transformed Predictor set : {} and that of response set : {}".format(len(tx), len(ty)) for i in xrange(len(tx)): new_ds[i] = [ts_epoch[int(tx[i][-1])][0]] + list(tx[i][0:-1]) + [int(ty[i])] + response[int(tx[i][-1])] push_to_db(new_ds) start() # f.close() '''Todo : Append tx & ty side by side thus forming new dataset. This new 2d list (Dataset) is passed to RF and replaced this line\