def get_features_db_parallel(conn, rank, table_name, feature_name): if "##DataTable##" in feature_name: feature_name = feature_name.replace("##DataTable##", table_name) if not data_io.table_view_existence_db(feature_name, conn): # feature view/table not exist print "view " + feature_name + " not exist, creating..." data_io.create_table(feature_name, conn) # create feature view if table_name in feature_name: feature_name = feature_name.replace(table_name, "_") print "getting " + feature_name + " ..." query = open("feature_parallel/" + feature_name + ".sql").read().strip() query = query.replace("##DataTable##", table_name) cursor = conn.cursor() cursor.execute(query) # NOTE: get the column vector, for better adjunction of matrix we use the numpy package res = map(list, cursor.fetchall()) # return row vector for adjunction res = map(list, np.array(res).T) return res
def get_features_db_parallel(conn, rank, table_name, feature_name): if "##DataTable##" in feature_name: feature_name = feature_name.replace("##DataTable##", table_name) if not data_io.table_view_existence_db( feature_name, conn): # feature view/table not exist print "view " + feature_name + " not exist, creating..." data_io.create_table(feature_name, conn) # create feature view if table_name in feature_name: feature_name = feature_name.replace(table_name, "_") print "getting " + feature_name + " ..." query = open("feature_parallel/" + feature_name + ".sql").read().strip() query = query.replace("##DataTable##", table_name) cursor = conn.cursor() cursor.execute(query) # NOTE: get the column vector, for better adjunction of matrix we use the numpy package res = map(list, cursor.fetchall()) # return row vector for adjunction res = map(list, np.array(res).T) return res
def find_coauthor(conn, authorid, table_name): # finding coauthors confirmed/deleted by some other authors # to be modified: matching if table_name == 'trainconfirmed': table_name = 'TrainConfirmed' if table_name == 'traindeleted': table_name = 'TrainDeleted' feature_view_name = table_name + 'CoAuthors' if not data_io.table_view_existence_db(feature_view_name, conn): # feature view/table not exist data_io.create_view(feature_view_name, conn) # create feature view query = """ SELECT Author2 FROM ##table_name##CoAuthors WHERE Author1 = ##authorid## """ query = query.replace("##table_name##", table_name) query = query.replace("##authorid##", str(authorid)) cursor = conn.cursor() cursor.execute(query) res = cursor.fetchall() # list of tuples return res
def find_coauthor(conn, authorid, table_name): # finding coauthors confirmed/deleted by some other authors # to be modified: matching if table_name == 'trainconfirmed': table_name = 'TrainConfirmed' if table_name == 'traindeleted': table_name = 'TrainDeleted' feature_view_name = table_name + 'CoAuthors' if not data_io.table_view_existence_db( feature_view_name, conn): # feature view/table not exist data_io.create_view(feature_view_name, conn) # create feature view query = """ SELECT Author2 FROM ##table_name##CoAuthors WHERE Author1 = ##authorid## """ query = query.replace("##table_name##", table_name) query = query.replace("##authorid##", str(authorid)) cursor = conn.cursor() cursor.execute(query) res = cursor.fetchall() # list of tuples return res
def main(): conn = data_io.get_db_conn() cursor = conn.cursor() if not data_io.table_view_existence_db('AP_features', conn): query = """ CREATE TABLE AP_features ( Result int, authorid bigint, paperid bigint, AP float, AP_PP float, AP_PJ_JP float, AP_PC_CP float, AP_PJ_JJ_JP float, AP_PC_CC_CP float) """ cursor.execute(query) conn.commit() query = """ COPY AP_features FROM '##path##sampleTrain.txt' DELIMITER ' ' """ query = query.replace('##path##', '/home/yingzhen/Projects/KDDCUP2013/benchmark/PythonBenchmark/') cursor.execute(query) conn.commit() query = """ SELECT * FROM AP_features LIMIT 3 """ cursor.execute(query) res = cursor.fetchall() return res
def main(): conn = data_io.get_db_conn() cursor = conn.cursor() if not data_io.table_view_existence_db('AP_features', conn): query = """ CREATE TABLE AP_features ( Result int, authorid bigint, paperid bigint, AP float, AP_PP float, AP_PJ_JP float, AP_PC_CP float, AP_PJ_JJ_JP float, AP_PC_CC_CP float) """ cursor.execute(query) conn.commit() query = """ COPY AP_features FROM '##path##sampleTrain.txt' DELIMITER ' ' """ query = query.replace( '##path##', '/home/yingzhen/Projects/KDDCUP2013/benchmark/PythonBenchmark/') cursor.execute(query) conn.commit() query = """ SELECT * FROM AP_features LIMIT 3 """ cursor.execute(query) res = cursor.fetchall() return res