def insert_org_tut_spinoff():

    #set up database
    con = mdb.connect(host="mysql1", user="******", passwd="TBtsTL4Xn6e4Whwh", db="khandb", charset='utf8');
    cur = con.cursor()

    cur.execute('SELECT org_tut_id, spinoff_count FROM 0810_0_original_tutorial WHERE is_checked = "0"')
    db_org_tuts = cur.fetchall()

    cur.execute('SELECT project_id FROM 0810_first_level_spinoff')
    db_project_ids = cur.fetchall()
    
    for db_org_tut in db_org_tuts:
        parent_project_id = db_org_tut[0]
        spinoff_count = db_org_tut[1]
        print parent_project_id
        if spinoff_count <= 2000 :
            org_tut_spinoff_urls = gasu.get_all_spinoff_url(parent_project_id, spinoff_count)
        else:
            org_tut_spinoff_urls = gasub2.get_all_spinoff_url_bigger_2k(parent_project_id, spinoff_count)
         
        accurate_spinoff_count = len(org_tut_spinoff_urls)
        for org_tut_spinoff_url in org_tut_spinoff_urls:
            user_id = gui.get_user_id(org_tut_spinoff_url)
            project_id = org_tut_spinoff_url[org_tut_spinoff_url.rfind("/")+1:]
            if not is_in_db(db_project_ids, project_id):
                org_tut_spinoff_info = gpi.get_project_info(project_id)
                title = org_tut_spinoff_info[0]
                created_date = org_tut_spinoff_info[4].replace("T"," ").replace("Z","")
                spinoff_count = org_tut_spinoff_info[5]
                parent_project_id = org_tut_spinoff_info[6]
                #print project_id
                cur.execute("INSERT INTO 0810_first_level_spinoff (user_id, project_id, project_title, url, created, spinoff_count, parent_project_id, org_tut_id, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now())",
                            (user_id[0], project_id, title, org_tut_spinoff_url, created_date, spinoff_count, parent_project_id, parent_project_id))

        cur.execute('UPDATE 0810_0_original_tutorial SET is_checked = "1", checked_date = now(), a_spinoff_count = %s WHERE org_tut_id = %s', (accurate_spinoff_count, parent_project_id))
    
    con.close()
def insert_org_tutorial():
    #get all the id of original tutorials
    all_tutorial_ids = gati.get_ids()

    #set up database
    con = mdb.connect(host="mysql1", user="******", passwd="TBtsTL4Xn6e4Whwh", db="khandb");
    cur = con.cursor()
	
    #-------------------------------------------------------------------------------
    # tags in one field
    #cur.execute("SELECT org_tut_id FROM 0810_0_original_tutorial")        
    #-------------------------------------------------------------------------------

    #-------------------------------------------------------------------------------
    #tags in mutiple lines
    cur.execute("SELECT org_tut_id FROM 0810_1_original_tutorial")
    #-------------------------------------------------------------------------------

    db_tut_ids = cur.fetchall()
    
    for tutorial_id in all_tutorial_ids:
        count = 0
        for db_tut_id in db_tut_ids:
            if str(db_tut_id[0]) == tutorial_id:
                count = 1

        if count != 1:
            tutorial_info = gpi.get_project_info(tutorial_id)
            title = tutorial_info[0]
            category = tutorial_info[1]
            tags = tutorial_info[2]

            difficulty_dict = {'10':'Getting_Started','20':'Easy','30':'Intermediate','40':'Expert'}
            difficulty = difficulty_dict.get(str(tutorial_info[3]),'NULL')
            
            created_time = tutorial_info[4].replace("T"," ").replace("Z","")
            spinoff_count = tutorial_info[5]


            #-------------------------------------------------------------------------------
            #tags in one field
            #tmp_tag = ""
            #for tag in tags:
                #if tag == "":
                    #tmp_tag = "NULL"
                #else:
                    #tmp_tag += str(tag) + ";"
            #tag = tmp_tag
            #cur.execute("INSERT INTO 0810_0_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())",
                        #(tutorial_id, title, category, tag, difficulty , created_time, spinoff_count))
            #-------------------------------------------------------------------------------

            #-------------------------------------------------------------------------------
            #tags in mutiple lines
            if tags:
                for tag in tags:
                    cur.execute("INSERT INTO 0810_1_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())",
                                (tutorial_id, title, category, tag, difficulty , created_time, spinoff_count))
            else:
                cur.execute("INSERT INTO 0810_1_original_tutorial (org_tut_id, org_tut_name, category, tags, difficulty, created, spinoff_count, inserted_time) VALUES (%s, %s, %s, %s, %s, %s, %s, now())",
                            (tutorial_id, title, category, "NULL", difficulty , created_time, spinoff_count))

            #-------------------------------------------------------------------------------
    con.close()