def insert_org_tut_spinoff(): #set up database con = mdb.connect(host="mysql1", user="******", passwd="TBtsTL4Xn6e4Whwh", db="khandb", charset='utf8'); cur = con.cursor() cur.execute('SELECT org_tut_id, spinoff_count FROM 0810_0_original_tutorial WHERE is_checked = "0"') db_org_tuts = cur.fetchall() cur.execute('SELECT project_id FROM 0810_first_level_spinoff') db_project_ids = cur.fetchall() for db_org_tut in db_org_tuts: parent_project_id = db_org_tut[0] spinoff_count = db_org_tut[1] print parent_project_id if spinoff_count <= 2000 : org_tut_spinoff_urls = gasu.get_all_spinoff_url(parent_project_id, spinoff_count) else: org_tut_spinoff_urls = gasub2.get_all_spinoff_url_bigger_2k(parent_project_id, spinoff_count) accurate_spinoff_count = len(org_tut_spinoff_urls) for org_tut_spinoff_url in org_tut_spinoff_urls: user_id = gui.get_user_id(org_tut_spinoff_url) project_id = org_tut_spinoff_url[org_tut_spinoff_url.rfind("/")+1:] if not is_in_db(db_project_ids, project_id): org_tut_spinoff_info = gpi.get_project_info(project_id) title = org_tut_spinoff_info[0] created_date = org_tut_spinoff_info[4].replace("T"," ").replace("Z","") spinoff_count = org_tut_spinoff_info[5] parent_project_id = org_tut_spinoff_info[6] #print project_id cur.execute("INSERT INTO 0810_first_level_spinoff (user_id, project_id, project_title, url, created, spinoff_count, parent_project_id, org_tut_id, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now())", (user_id[0], project_id, title, org_tut_spinoff_url, created_date, spinoff_count, parent_project_id, parent_project_id)) cur.execute('UPDATE 0810_0_original_tutorial SET is_checked = "1", checked_date = now(), a_spinoff_count = %s WHERE org_tut_id = %s', (accurate_spinoff_count, parent_project_id)) con.close()
def insert_org_tutorial(): #get all the id of original tutorials all_tutorial_ids = gati.get_ids() #set up database con = mdb.connect(host="mysql1", user="******", passwd="TBtsTL4Xn6e4Whwh", db="khandb"); cur = con.cursor() #------------------------------------------------------------------------------- # tags in one field #cur.execute("SELECT org_tut_id FROM 0810_0_original_tutorial") #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- #tags in mutiple lines cur.execute("SELECT org_tut_id FROM 0810_1_original_tutorial") #------------------------------------------------------------------------------- db_tut_ids = cur.fetchall() for tutorial_id in all_tutorial_ids: count = 0 for db_tut_id in db_tut_ids: if str(db_tut_id[0]) == tutorial_id: count = 1 if count != 1: tutorial_info = gpi.get_project_info(tutorial_id) title = tutorial_info[0] category = tutorial_info[1] tags = tutorial_info[2] difficulty_dict = {'10':'Getting_Started','20':'Easy','30':'Intermediate','40':'Expert'} difficulty = difficulty_dict.get(str(tutorial_info[3]),'NULL') created_time = tutorial_info[4].replace("T"," ").replace("Z","") spinoff_count = tutorial_info[5] #------------------------------------------------------------------------------- #tags in one field #tmp_tag = "" #for tag in tags: #if tag == "": #tmp_tag = "NULL" #else: #tmp_tag += str(tag) + ";" #tag = tmp_tag #cur.execute("INSERT INTO 0810_0_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())", #(tutorial_id, title, category, tag, difficulty , created_time, spinoff_count)) #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- #tags in mutiple lines if tags: for tag in tags: cur.execute("INSERT INTO 0810_1_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())", (tutorial_id, title, category, tag, difficulty , created_time, spinoff_count)) else: cur.execute("INSERT INTO 0810_1_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())", (tutorial_id, title, category, "NULL", difficulty , created_time, spinoff_count)) #------------------------------------------------------------------------------- con.close()