def main(): arr = utils.get_array() len_arr = len(arr) for j in range(1, len_arr): i = j - 1 key = arr[j] while i >= 0 and arr[i] > key: arr[i + 1] = arr[i] i = i - 1 arr[i + 1] = key print(arr)
def __init__(self, path, transform=None): self.path = path self.transform = transform self.array = utils.get_array(path)
import utils def merge(left, right): A = [] i ,j = 0, 0 while i < len(left) and j < len(right): if left[i] <= right[j]: A.append(left[i]) i += 1 else: A.append(right[j]) j += 1 A += left[i:] A += right[j:] return A def mergesort(A): if len(A) > 1: q = len(A) // 2 left = mergesort(A[:q]) right = mergesort(A[q:]) return merge(left, right) return A if __name__ == "__main__": A = utils.get_array() print "unsorted(A): " + str(A) print "mergesort(A): " + str(mergesort(A))
def get_physical_chan_bool_vtr_attribute(devName, attrId): values = utils.get_array(c_daqmx.DAQmxGetPhysicalChanAttribute, ctypes.c_uint32, ctypes.c_char_p(devName), ctypes.c_int32(attrId)) return [bool(b) for b in values]
def get_physical_chan_double_vtr_attribute(devName, attrId): return utils.get_array(c_daqmx.DAQmxGetPhysicalChanAttribute, ctypes.c_double, devName, ctypes.c_int32(attrId))
def get_device_bool_vtr_attribute(devName, attrId): values = utils.get_array(c_daqmx.DAQmxGetDeviceAttribute, ctypes.c_uint32, ctypes.c_char_p(devName), ctypes.c_int32(attrId)) return [bool(b) for b in values]
def get_device_double_vtr_attribute(devName, attrId): return utils.get_array(c_daqmx.DAQmxGetDeviceAttribute, ctypes.c_double, devName, ctypes.c_int32(attrId))
#! /usr/bin/python # coding: utf-8 import utils def insertion_sort(A): for i in range(1, len(A)): key = A[i] j = i - 1 while j >= 0 and A[j] > key: A[j + 1] = A[j] j -= 1 A[j + 1] = key if __name__ == "__main__": A = utils.get_array() print "unsorted(A): " + str(A) insertion_sort(A) print "sorted(A): " + str(A)
def train(X ,y, groups, algo_option, feature_option, balancing_option, scale_option, reduce_dimension_option): # Read processed file X_subset = get_subset_features(X, feature_option) y_subset = deepcopy(y) logo = StratifiedShuffleSplit(n_splits=50, test_size=0.2, random_state=0) fold_accuracy_scores = np.zeros(0) fold_f1_macro_scores = np.zeros(0) fold_f1_weighted_scores = np.zeros(0) fold_recall_scores = [] fold_precision_scores = [] # 5 folds corresponding to 5 events for train_index, test_index in logo.split(X_subset, y_subset): # Split train and test from folds X_train, X_test = get_array(train_index, X_subset), get_array(test_index, X_subset) y_train, y_test = get_array(train_index, y_subset), get_array(test_index, y_subset) # Init a classifer model = init_model(algo_option) # Init an optional scaler if scale_option: scaler = init_scaler(scale_option) scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) # Init an optional balancing model if balancing_option: balancer = init_balancing_model(balancing_option) X_train, y_train = balancer.fit_sample(X_train, y_train) # Init an optional reduce dimenstion model if reduce_dimension_option: reducer = init_reduce_dimension_model(reduce_dimension_option) reducer.fit(X_train, y_train) X_train = reducer.transform(X_train) X_test = reducer.transform(X_test) # Fit prerocessed data to classifer model model.fit(X_train, y_train) # Predict y_pred = model.predict(X_test) # Metrics matrix = confusion_matrix(np.asarray(y_test), y_pred) # false_false_rate = 1.0* matrix[0][1] / sum(matrix[0]) # could be high # false_true_rate = 1.0* matrix[1][0] / sum(matrix[:, 0]) # must be low current_fold_accuracy = f1_score(np.asarray(y_test), y_pred, average='micro') current_fold_macro_f1 = f1_score(np.asarray(y_test), y_pred, average='macro') current_fold_weighted_f1 = f1_score(np.asarray(y_test), y_pred, average='weighted') current_recall = recall_score(np.asarray(y_test), y_pred, average=None) current_precision = precision_score(np.asarray(y_test), y_pred, average=None) # print "Micro f1-score (Accuracy):\t\t\t", current_fold_accuracy # print "Macro f1-score:\t\t\t", current_fold_macro_f1 # print "Weighted f1-score:\t\t\t", current_fold_weighted_f1 # print "Rate false of false label:\t\t\t", false_false_rate # print "Rate false of true label:\t\t\t", false_true_rate fold_accuracy_scores = np.append(fold_accuracy_scores,current_fold_accuracy) fold_f1_macro_scores = np.append(fold_f1_macro_scores, current_fold_macro_f1) fold_f1_weighted_scores = np.append(fold_f1_weighted_scores, current_fold_weighted_f1) fold_recall_scores.append(current_recall) fold_precision_scores.append(current_precision) # print current_recall # print current_precision # print confusion_matrix(np.asarray(y_test), y_pred) # tmp = [] # for (index,x) in enumerate(model.feature_importances_): # if x!=0: # tmp.append((x,index)) # print sorted(tmp, reverse=True) # raw_input() # print "Accuracy:\t\t", fold_accuracy_scores, '\t\t', fold_accuracy_scores.mean() # print "F1-macro:\t\t", fold_f1_macro_scores, '\t\t', fold_f1_macro_scores.mean() # print "F1-weighted:\t", fold_f1_weighted_scores, '\t\t', fold_f1_weighted_scores.mean() print "Accuracy:\t\t", fold_accuracy_scores.mean() print "F1-macro:\t\t", fold_f1_macro_scores.mean() print "F1-weighted:\t", fold_f1_weighted_scores.mean() print "Recall: \t\t", np.asarray(fold_recall_scores).mean(axis=0) print "Precision: \t\t", np.asarray(fold_precision_scores).mean(axis=0) # TRAIN AND SAVE A MODEL FOR TESTING ON SEMEVAL TEST SET X_train = X_subset y_train = y_subset # Init a classifer model = init_model(algo_option) # Init an optional scaler scaler = None if scale_option: scaler = init_scaler(scale_option) scaler.fit(X_train) X_train = scaler.transform(X_train) # Init an optional balancing model balancer = None if balancing_option: balancer = init_balancing_model(balancing_option) X_train, y_train = balancer.fit_sample(X_train, y_train) # Init an optional reduce dimenstion model reducer = None if reduce_dimension_option: reducer = init_reduce_dimension_model(reduce_dimension_option) reducer.fit(X_train, y_train) X_train = reducer.transform(X_train) # Fit prerocessed data to classifer model model.fit(X_train, y_train) # Save model pickle.dump(model, open(os.path.join(MODELS_ROOT,'classifier.model'),"wb")) if os.path.exists(os.path.join(MODELS_ROOT, 'scaler.model')): os.remove(os.path.join(MODELS_ROOT, 'scaler.model')) if scaler != None: pickle.dump(scaler, open(os.path.join(MODELS_ROOT, 'scaler.model'), "wb")) if os.path.exists(os.path.join(MODELS_ROOT, 'balancer.model')): os.remove(os.path.join(MODELS_ROOT, 'balancer.model')) if balancer != None: pickle.dump(balancer, open(os.path.join(MODELS_ROOT, 'balancer.model'), "wb")) if os.path.exists(os.path.join(MODELS_ROOT, 'reducer.model')): os.remove(os.path.join(MODELS_ROOT, 'reducer.model')) if reducer != None: pickle.dump(reducer, open(os.path.join(MODELS_ROOT, 'reducer.model'), "wb")) training_settings = { 'features_subset': feature_option, 'balancing_class_algorithm': balancing_option, 'scale_option': scale_option, 'reduce_dimension_algorithm': reduce_dimension_option, 'training_algorithm': algo_option } pickle.dump(training_settings, open(os.path.join(MODELS_ROOT,'settings.model'),"wb"))
from sklearn.metrics import mean_absolute_error from utils import get_array, get_data, get_model_3 train, test, max_user, max_work, mapping_work = get_data('ml-20m/ratings.csv', nrows=100000) # Train data analysis # print(train.groupby('userId').aggregate({'movieId': 'count', 'rating': 'median', 'timestamp': ['min', 'max']})) print(train.describe()) print((max_work, max_user)) np.random.seed(1) model = get_model_3(max_work=max_work, max_user=max_user, latent_factors=50) print(model.summary()) print(max(get_array(train["userId"]))) history = model.fit([get_array(train["movieId"]), get_array(train["userId"])], get_array(train["rating"]), epochs=10, # batch_size=13, validation_split=0.2, verbose=1) model.save('model.h5') predictions = model.predict([get_array(test["movieId"]), get_array(test["userId"])]) test_performance = mean_absolute_error(test["rating"], predictions) print(" Test Mae model 3 : %s " % test_performance)
from utils import get_model_3, get_array, get_data train, test, max_user, max_work, mapping_work = get_data('ml-20m/ratings.csv', nrows=100000) # Train data analysis # print(train.groupby('userId').aggregate({'movieId': 'count', 'rating': 'median', 'timestamp': ['min', 'max']})) print(train.describe()) print((max_work, max_user)) np.random.seed(1) model = get_model_3(max_work=max_work, max_user=max_user, latent_factors=50) print(model.summary()) print(max(get_array(train["userId"]))) history = model.fit( [get_array(train["movieId"]), get_array(train["userId"])], get_array(train["rating"]), epochs=10, # batch_size=13, validation_split=0.2, verbose=1) model.save('model.h5') predictions = model.predict( [get_array(test["movieId"]), get_array(test["userId"])]) test_performance = mean_absolute_error(test["rating"], predictions)