def record_model(db_name, username, passwd, host, port, features, model, exp_id): #model = [f1_w1_value, # f2_w1_value # f3_w1_value # f1_w2_value, # f2_w2_value, # f3_w2_value, # ...] conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port) sql = "INSERT INTO %s.models(longitudinal_feature_id," % db_name sql = sql + ''' longitudinal_feature_week, longitudinal_feature_value, exp_id) VALUES (%s, %s, %s, %s) ''' data = [(features[0], 0, model[0][0], exp_id)] week = 0 num_features = len(features) - 1 for i, value in enumerate(model[1:]): feature_idx = (i % num_features) + 1 if feature_idx == 1 and i != 0: week += 1 data.append((features[feature_idx], week, value[0], exp_id)) cursor = conn.cursor() cursor.executemany(sql, data) cursor.close() conn.commit() conn.close()
def record_model(db_name, username, passwd, host, port, features, model, exp_id): # model = [f1_w1_value, # f2_w1_value # f3_w1_value # f1_w2_value, # f2_w2_value, # f3_w2_value, # ...] conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port) sql = "INSERT INTO %s.models(longitudinal_feature_id," % db_name sql = ( sql + """ longitudinal_feature_week, longitudinal_feature_value, exp_id) VALUES (%s, %s, %s, %s) """ ) data = [(features[0], 0, model[0], exp_id)] week = 0 num_features = len(features) - 1 for i, value in enumerate(model[1:]): feature_idx = (i % num_features) + 1 if feature_idx == 0 and i != 0: week += 1 data.append((features[feature_idx], week, value, exp_id)) cursor = conn.cursor() cursor.executemany(sql, data) cursor.close() conn.commit() conn.close()
def record_experiment(db_name, username, passwd, host, port, lead, lag, auc_train, testing_course, auc_test, p_lambda, p_epsilon, exp_time_stamp): conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port) ##returns exp_id sql = '''INSERT INTO `%s`.`experiments` (`lead`, `lag`, `auc_train`, `course_test_id`, `auc_test`, `parameter_lambda`, `parameter_epsilon`, `experiment_time_stamp` ) VALUES (%s, %s, %s, '%s', %s, %s, %s, '%s')''' % ( db_name, lead, lag, auc_train, testing_course, auc_test, p_lambda, p_epsilon, exp_time_stamp) cursor = conn.cursor() cursor.execute(sql) cursor.close() conn.commit() sql = "SELECT exp_id FROM `%s`.`experiments` WHERE experiment_time_stamp = '%s'" % ( db_name, exp_time_stamp) cursor = conn.cursor() cursor.execute(sql) data = cursor.fetchall() cursor.close() conn.close() return int(max(data, key=lambda x: int(x[0]))[0])
def preprocess(dbName, userName, passwd, host, port, dirName, startDate, currentDate): # fileName, wordsToBeReplaced, wordsToReplace preprocessing_files = [ #[ #'create_longitudinal_features.sql', #['moocdb'], #[dbName] #], #[ #'populate_longitudinal_features.py', #[], #[] #], #[ #'create_models_table.sql', #['moocdb'], #[dbName] #], #[ #'create_experiments_table.sql', #['moocdb'], #[dbName] #], ['create_user_longitudinal_feature_values.sql', ['moocdb'], [dbName]], [ 'users_populate_dropout_week.sql', ['START_DATE_PLACEHOLDER', 'moocdb'], [startDate, dbName] ] ] conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, host, port) for fileName, toBeReplaced, replaceBy in preprocessing_files: if fileName[-2:] == 'py': print "executing: ", fileName sql_functions.runPythonFile(conn, conn, dirName, fileName[:-3], dbName, startDate, currentDate) else: this_file = os.path.dirname(os.path.realpath(__file__)) fileLocation = dirName + '/' + fileName fileLocation = this_file + '/' + fileLocation newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy) print "executing: ", fileName sql_functions.executeSQL(conn, newFile) conn.commit() sql_functions.closeSQLConnection(conn)
def run_sql_curation_files(dbName, userName, passwd, dbHost, dbPort,preprocessing_files): conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, dbHost,dbPort) for fileName, toBeReplaced, replaceBy in preprocessing_files: fileLocation = os.path.dirname(os.path.realpath(__file__))+'/'+ fileName print fileLocation newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy) print "executing: ", fileName sql_functions.executeSQL(conn, newFile) conn.commit() print "done" sql_functions.closeSQLConnection(conn)
def run_sql_curation_files(dbName, userName, passwd, dbHost, dbPort,preprocessing_files): conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, dbHost,dbPort) for fileName, toBeReplaced, replaceBy in preprocessing_files: fileLocation = os.path.dirname(os.path.realpath(__file__))+'/'+ fileName print fileLocation newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy) print "executing: ", fileName sql_functions.executeSQL(conn, newFile) conn.commit() print "done" sql_functions.closeSQLConnection(conn)
def record_experiment( db_name, username, passwd, host, port, lead, lag, auc_train, testing_course, auc_test, p_lambda, p_epsilon, exp_time_stamp, ): conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port) ##returns exp_id sql = """INSERT INTO `%s`.`experiments` (`lead`, `lag`, `auc_train`, `course_test_id`, `auc_test`, `parameter_lambda`, `parameter_epsilon`, `experiment_time_stamp` ) VALUES (%s, %s, %s, '%s', %s, %s, %s, '%s')""" % ( db_name, lead, lag, auc_train, testing_course, auc_test, p_lambda, p_epsilon, exp_time_stamp, ) cursor = conn.cursor() cursor.execute(sql) cursor.close() conn.commit() sql = "SELECT exp_id FROM `%s`.`experiments` WHERE experiment_time_stamp = '%s'" % (db_name, exp_time_stamp) cursor = conn.cursor() cursor.execute(sql) data = cursor.fetchall() cursor.close() conn.close() return int(max(data, key=lambda x: int(x[0]))[0])
def run_dropout_prediction(userName, passwd, host, port, trainingCourse, testingCourse, earliest_date, latest_date, features, weeks, pred_week, feat_week, epsilon, lamb=1): conn = sql.openSQLConnectionP(trainingCourse, userName, passwd, host, port) ############## Download course data and split into train and test sets ###################### training_course_threshold = 0.6 a = Course(trainingCourse, earliest_date, latest_date, features, weeks, training_course_threshold, conn) testing_course_threshold = 0.6 b = Course(testingCourse, earliest_date, latest_date, features, weeks, testing_course_threshold, conn) ############## Set parameters ################################################################# n_A = 2000 # Number of samples from source domain (course) n_B_known = 0.6 # Percentage of sample used from target domain available n_B_unknown = 0.4 # Percentage of sample used from target domain available is_FM = False seed = 1 # set up the randomization seed ############# Initialize model ################################################################# model = initialize_model(a, b, n_A, n_B_known, n_B_unknown, pred_week, feat_week, seed, is_FM) model.normalize_features_independently() ############# Test model performance ########################################################### auc_test = AUC_naive(model, lamb, epsilon) auc_train = AUC_train(model, lamb, epsilon) conn.close() return (auc_test, auc_train, model.weight)
def run_dropout_prediction(userName, passwd, host, port, trainingCourse, testingCourse, earliest_date, latest_date, features, weeks, pred_week, feat_week, epsilon, lamb=1): conn = sql.openSQLConnectionP(trainingCourse, userName, passwd, host, port) ############## Download course data and split into train and test sets ###################### training_course_threshold = 0.6 a=Course(trainingCourse, earliest_date, latest_date, features, weeks, training_course_threshold, conn) testing_course_threshold = 0.6 b=Course(testingCourse, earliest_date, latest_date, features, weeks, testing_course_threshold, conn) ############## Set parameters ################################################################# n_A=2000 # Number of samples from source domain (course) n_B_known=0.6 # Percentage of sample used from target domain available n_B_unknown=0.4 # Percentage of sample used from target domain available is_FM=False seed=1 # set up the randomization seed ############# Initialize model ################################################################# model=initialize_model(a,b,n_A,n_B_known,n_B_unknown,pred_week,feat_week,seed,is_FM) model.normalize_features_independently() ############# Test model performance ########################################################### auc_test=AUC_naive(model,lamb,epsilon) auc_train = AUC_train(model,lamb, epsilon) conn.close() return (auc_test,auc_train,model.weight)
def preprocess(dbName, userName, passwd, host, port, dirName, startDate, currentDate): # fileName, wordsToBeReplaced, wordsToReplace preprocessing_files = [ #[ #'create_longitudinal_features.sql', #['moocdb'], #[dbName] #], #[ #'populate_longitudinal_features.py', #[], #[] #], #[ #'create_models_table.sql', #['moocdb'], #[dbName] #], #[ #'create_experiments_table.sql', #['moocdb'], #[dbName] #], [ 'create_user_longitudinal_feature_values.sql', ['moocdb'], [dbName] ], [ 'users_populate_dropout_week.sql', ['START_DATE_PLACEHOLDER','moocdb'], [startDate,dbName] ] ] conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, host,port) for fileName, toBeReplaced, replaceBy in preprocessing_files: if fileName[-2:] == 'py': print "executing: ", fileName sql_functions.runPythonFile(conn,conn,dirName, fileName[:-3],dbName,startDate, currentDate) else: this_file = os.path.dirname(os.path.realpath(__file__)) fileLocation = dirName+'/'+fileName fileLocation = this_file+'/'+fileLocation newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy) print "executing: ", fileName sql_functions.executeSQL(conn, newFile) conn.commit() sql_functions.closeSQLConnection(conn)