def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        val = reg_china_condition()
        result = db["user"].find(val)
        count = 0
        for item in result:
            count += 1
            add_chinese(db, item)
            if count % 500 == 0:
                print count


#TODO make it a lib
        old_res = db["research_result"].find_one({"type": "chinese_count"})
        if old_res:
            db["research_result"].update({"type": "chinese_count"},
                                         {"$set": {
                                             "total_count": count
                                         }})
        else:
            db["research_result"].insert({
                "type": "chinese_count",
                "total_count": count
            })
    else:
        print "Cannot connect to database"
Example #2
0
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        report_lang(db)
    else:
        print "Cannot connect to database"
def main_string ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        calculate_months_string(db)
    else:
        print "Cannot connect to database"
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        calculate_months(db)
    else:
        print "Cannot connect to database"
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if db:
        calculate_months(db)
    else:
        print "Cannot connect to database"
Example #6
0
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        report_active(db, "active_count")
        report_active(db, "active_count_3_month")
    else:
        print "Cannot connect to database"
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        nums = [100, 200, 1000, 10, 50]
        for num in nums:
            fork_org(db, num)
        print "Cannot connect to database"
Example #8
0
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        report_active(db, "active_count")
        report_active(db, "active_count_3_month")
    else:
        print "Cannot connect to database"
Example #9
0
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        for num in range(0, 76000):
            report_forks(db, num)
    else:
        print "Cannot connect to database"
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        for num in range(0, 76000):
            report_forks(db, num)
    else:
        print "Cannot connect to database"
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        nums = [100, 200, 1000, 10, 50]
        for num in nums:
            fork_org(db, num)
        print "Cannot connect to database"
Example #12
0
def main_int ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
#        print_user_info(db, "initlove")
#        calculate_months_int(db)
         merge_month(db, 201412)
    else:
        print "Cannot connect to database"
Example #13
0
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        fo = open("./lang.txt", "r")
        for line in fo.readlines():
            line = line.strip()
            if not len(line) or line.startswith('#'):
                continue
            else:
                generate_lang(db, line)
    else:
        print "Cannot connect to database"
Example #14
0
def main ():
    dm_db = DMDatabase()
    client = dm_db.getClient()
    db = dm_db.getDB()
    if (client):
        login = "******"
        init_beginer(client, login, 0)
        for level in range (1, 10):
            if get_followers(client, db, level) == 0:
                print "No more, exit"
                break
    else:
        print "Cannot connect to database"
Example #15
0
def main():
    dm_db = DMDatabase()
    client = dm_db.getClient()
    db = dm_db.getDB()
    if (client):
        login = "******"
        init_beginer(client, login, 0)
        for level in range(1, 10):
            if get_followers(client, db, level) == 0:
                print "No more, exit"
                break
    else:
        print "Cannot connect to database"
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        fo = open("./lang.txt", "r")
        for line in fo.readlines():
            line = line.strip()
            if not len(line) or line.startswith('#'):
                continue
            else:
                generate_lang(db, line)
    else:
        print "Cannot connect to database"
Example #17
0
def main():
    dm_db = DMDatabase()
    client = dm_db.getClient()
    if (client):
        #user name is a database
        #repo_name is col to store repos that
        #user is another col to store contributors' info
        user_name = "openstack"
        full_name = "openstack/horizon"
        repo_name = "horizon"
        #        generate_small_by_repo(client, user_name, repo_name)
        report_small_by_repo(client, user_name, repo_name)
    else:
        print "Cannot connect to database"
def main ():
    dm_db = DMDatabase()
    client = dm_db.getClient()
    if (client):
#user name is a database
#repo_name is col to store repos that
#user is another col to store contributors' info
        user_name = "openstack"
        full_name = "openstack/horizon"
        repo_name = "horizon"
#        generate_small_by_repo(client, user_name, repo_name)
        report_small_by_repo(client, user_name, repo_name)
    else:
        print "Cannot connect to database"
Example #19
0
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        res = db["event"].find({"count": {"$gte": 299}})
        org_count = 0
        for item in res:
            user_item = db["user"].find_one({"id": item["id"]})
            if user_item:
                print user_item["login"]
                if user_item["type"] == "Organization":
                    org_count += 1
        print "Org is " + str(org_count)
    else:
        print "Cannot connect to database"
Example #20
0
def get_unfinished_repos(db):
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "status": "running",
        "name": "get_commit_check"
    })
    repos = []
    for item in res:
        for item_user in item["users"]:
            if db["commit_check_meta_result"].find_one({"login": item_user}):
                continue
            else:
                user_res = db["user_contributor_result2"].find_one(
                    {"login": item_user})
                if user_res:
                    repo_list = user_res["repo_list"]
                    repos += repo_list
    unfinish_repos = []
    for item in repos:
        unfinish_repos.append(item["full_name"])
    unfinish_repos = list(set(unfinish_repos))
    repos = []
    for item in unfinish_repos:
        if item.startswith("GITenberg/"):
            continue
        if db["commit_check_meta_result"].find_one({"full_name": item}):
            continue
        else:
            repos.append(item)

    for repo in repos:
        new_thread = myThread2(db, repo)
        user_thread.append(new_thread)
    print str(len(repos)) + " task received, start to run them!"
    run_task()
Example #21
0
def main(type):
    timeout = 300
    socket.setdefaulttimeout(timeout)

    print "Account has " + str(DMSharedUsers().getRemaining()) + " API calls"

    db = DMDatabase().getDB()
    if db:
        if type == "user":
            user = "******"
            get_commit_repos_by_user(db, user)
        if type == "user_repos":
            user = "******"
            get_commit_repos_by_user_repos(db, user)
        elif type == "query":
            query = {"contributor_repos": {"$gte": 200}}
            get_commit_repos_by_query(db, query)
        elif type == "init_task":
            query = {"contributor_repos": {"$gte": 100, "$lt": 200}}
            init_users_thread_by_query(db, query)
        elif type == "run_task":
            run_free_task(db, 60)
        elif type == "un_user":
            get_unfinished_users(db)
        elif type == "un_repo":
            get_unfinished_repos(db)
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()

    if (db):
        val = active_user(db)
        count = db["chinese"].find(val).count()

#TODO make it a lib
        old_res = db["research_result"].find_one({"type": "chinese_active_count"})
        if old_res:
            db["research_result"].update({"type": "chinese_active_count"}, {"$set": {"total_count": count}})
        else:
            db["research_result"].insert({"type": "chinese_active_count", "total_count": count})
    else:
        print "Cannot connect to database"
Example #23
0
def fix_add_count_id_created_at_int():
    db = DMDatabase().getDB()
    #2730627
    gap = 1000
    start = 0
    # end id is now set to 10300000
    end = 10300

    for i in range(start, end):
        res = db["user"].find({"id": {"$gte": i * gap, "$lt": (i + 1) * gap}})
        for item in res:
            old_item = db["followers"].find_one({"login": item["login"]})
            if old_item:
                if old_item.has_key("created_at_int") and old_item.has_key(
                        "id") and old_item.has_key("count"):
                    continue
                else:
                    if old_item.has_key("count"):
                        db["followers"].update({"login": item["login"]}, {
                            "$set": {
                                "created_at_int": item["created_at_int"],
                                "id": item["id"]
                            }
                        })
                    else:
                        db["followers"].update({"login": item["login"]}, {
                            "$set": {
                                "created_at_int": item["created_at_int"],
                                "id": item["id"],
                                "count": item["followers"]
                            }
                        })
        print i
Example #24
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res = db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i % pieces == 0:
            task = DMTask()
            val = {
                "name": "get_commit_check",
                "action_type": "loop",
                "query": str(query),
                "users": user_list,
                "start": 800000 + i - 100,
                "end": 800000 + i
            }
            task.init("github", val)
            user_list = []

    if i % pieces != 0:
        task = DMTask()
        val = {
            "name": "get_commit_check",
            "action_type": "loop",
            "query": str(query),
            "users": user_list,
            "start": 800000 + i - i % pieces,
            "end": 800000 + 1
        }
        task.init("github", val)
    return
Example #25
0
def fix_user_loop():
    db = DMDatabase().getDB()

    total = 1050
    gap_num = 10000
    i = 0
    while i < total:
        min = i * gap_num
        max = (i + 1) * gap_num
        query = {"id": {"$gte": min, "$lt": max}}

        res = db["user"].find(query)
        for item in res:
            if item.has_key("created_at_int"):
                continue
            created_at_string = item["created_at"]
            updated_at_string = item["updated_at"]
            created_at_int = date_string_to_int(created_at_string)
            updated_at_int = date_string_to_int(updated_at_string)
            db["user"].update({"login": item["login"]}, {
                "$set": {
                    "created_at_int": created_at_int,
                    "updated_at_int": updated_at_int
                }
            })
        i += 1
        print i
    print "Finish"
Example #26
0
def top_fork():
    db = DMDatabase().getDB()
    num = 1000
    res = db["repositories"].find().sort("forks_count",
                                         pymongo.DESCENDING).limit(num)
    for item in res:
        if item.has_key("contributors_count"):
            continue
        full_name = item["full_name"]
        id = item["id"]
        ret_val = top_get_contributors(db, full_name, id)
        if ret_val["error"] == 1:
            pass
        else:
            count = len(ret_val["val"])
            db["contributors"].insert({
                "full_name": full_name,
                "id": id,
                "contributors": ret_val["val"],
                "count": count,
                "update_date": datetime.datetime.utcnow()
            })
            db["repositories"].update({
                "full_name": full_name,
                "id": id
            }, {"$set": {
                "contributors_count": count
            }})
            print "insert " + full_name + "with " + str(count)
Example #27
0
def fix_add_login_one_by_one():
    db = DMDatabase().getDB()
#2730627
    i = 0
    last_id = "<null>"
    while 1:
        res =db["user_repos"].find_one({"id": {"$exists": False}})
        if res:
            i += 1
            item = db["user"].find_one({"login": res["login"]})
            if item:
                if last_id == res["login"]:
                    print res["login"] +" dup, removed"
                    db["user_repos"].remove({"login": res["login"]})
                else:
                    print res["login"] + " updated "
                    created_at_int = 0
                    if item.has_key("created_at_int"):
                        created_at_int = item["created_at_int"]
                    else:
                        created_at_int = date_string_to_int(item["created_at"])
                    db["user_repos"].update({"login": res["login"]}, {"$set": {"created_at_int": created_at_int, "id": item["id"], "count": item["public_repos"]}})
                last_id = res["login"]
            else:
                print res["login"] + "  is not found"
                # if we get followers and not sync with user, this problem happens
                db["user_repos"].remove({"login": res["login"]})
        else:
            print 'exit'
            return
        if i%1000 == 0:
            print i
Example #28
0
def init_commit_task_by_user():
    db = DMDatabase().getDB()
    user = "******"
    res = db["user_contributor_result2"].find_one({"login": user})
    if res:
        repo_list = res["repo_list"]
        for repo in repo_list:
            init_commit_task(db, repo["full_name"])
Example #29
0
def init_repo_single_task(login):
    db = DMDatabase().getDB()
    task = DMTask()
    item = db["user"].find_one({"login": login})
    if item:
        val = {"name": "get_repos", "action_type": "single", "start": login, "end": item["id"]}
        task.init("github", val)
    else:
        print "user not found"
Example #30
0
def main(type):
    timeout = 300
    socket.setdefaulttimeout(timeout)

    db = DMDatabase().getDB()
    if db:
        if type == "top":
            get_top_commit_repos(db)
        elif type == "dup":
            get_dup_repos(db)
Example #31
0
def init_event_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
def main ():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    if (db):
        val = reg_china_condition()
        result = db["user"].find(val)
        count = 0
        for item in result:
            count += 1
            add_chinese(db, item)
            if count%500 == 0:
                print count

#TODO make it a lib
        old_res = db["research_result"].find_one({"type": "chinese_count"})
        if old_res:
            db["research_result"].update({"type": "chinese_count"}, {"$set": {"total_count": count}})
        else:
            db["research_result"].insert({"type": "chinese_count", "total_count": count})
    else:
        print "Cannot connect to database"
Example #33
0
def fix_add_created_at_int():
    db = DMDatabase().getDB()
#2730627
    gap = 1000
    start = 7866
# end id is now set to 10300000
    end = 9000  

    for i in range(start, end):
        res = db["user"].find({"id": {"$gte": i * gap, "$lt": (i+1)*gap}})
        for item in res:
            db["event"].update({"login": item["login"]}, {"$set": {"created_at_int": item["created_at_int"]}})
        print i
Example #34
0
def main():
    dm_db = DMDatabase()
    db = dm_db.getDB()

    if (db):
        val = active_user(db)
        count = db["chinese"].find(val).count()

        #TODO make it a lib
        old_res = db["research_result"].find_one(
            {"type": "chinese_active_count"})
        if old_res:
            db["research_result"].update({"type": "chinese_active_count"},
                                         {"$set": {
                                             "total_count": count
                                         }})
        else:
            db["research_result"].insert({
                "type": "chinese_active_count",
                "total_count": count
            })
    else:
        print "Cannot connect to database"
Example #35
0
def resolve_contributors_30():
    db = DMDatabase().getDB()

    task1 = DMTask()
    val = {"name": "fake-contributors", "action_type": "loop", "start": 0, "end": 1000}
    task1.init_test("github", val)
    e1 = GithubContributors(task1)

    for num in range(1, 300):
        res = db["contributors"].find({"count": num*30})
        res_list = []
        for item in res:
            res_list.append({"full_name": item["full_name"], "id": item["id"]})
        for item in res_list:
            e1.get_repo_contributors(item["full_name"], item["id"])
Example #36
0
def test():
    db = DMDatabase().getDB()
    res = db["event"].find({"id": {"$gte": 1000, "$lt": 200}}).limit(20)
    if res is None:
        print 'res is none'
    else:
        print res.count()
    return

    task1 = DMTask()
    val = {"name": "fake-event", "action_type": "loop", "start": 6001000, "end": 6005000}

    task1.init_test("github", val)
    e1 = GithubEvent(task1)
    e1.runTask()
    task1.remove()
Example #37
0
def fix_user():
    db = DMDatabase().getDB()
    i = 0
    query = {"updated_at_int": {"$exists": False}}
    res = db["user"].find(query)
    for item in res:
        created_at_string = item["created_at"]
        updated_at_string = item["updated_at"]
        created_at_int = date_string_to_int(created_at_string)
        updated_at_int = date_string_to_int(updated_at_string)
        db["user"].update({"login": item["login"]}, {
            "$set": {
                "created_at_int": created_at_int,
                "updated_at_int": updated_at_int
            }
        })
        i += 1
    print i
    print "Finish"
Example #38
0
def fix_add_login_one_by_one():
    db = DMDatabase().getDB()
    #2730627
    i = 0
    last_id = "<null>"
    while 1:
        res = db["followers"].find_one({"login": {"$exists": False}})
        if res:
            i += 1
            db["followers"].remove({"_id": res["_id"]})
            print str(i) + "  removed"
        else:
            break
    print "login all exists"
    i = 0
    while 1:
        res = db["followers"].find_one({"id": {"$exists": False}})
        if res:
            i += 1
            item = db["user"].find_one({"login": res["login"]})
            if item:
                if last_id == res["login"]:
                    print res["login"] + " dup, removed"
                    db["followers"].remove({"login": res["login"]})
                else:
                    print res["login"] + " updated "
                    db["followers"].update({"login": res["login"]}, {
                        "$set": {
                            "created_at_int": item["created_at_int"],
                            "id": item["id"],
                            "count": item["followers"]
                        }
                    })
                last_id = res["login"]
            else:
                print res["login"] + "  is not found"
                # if we get followers and not sync with user, this problem happens
                db["followers"].remove({"login": res["login"]})
        else:
            print 'exit'
            return
        if i % 1000 == 0:
            print i
Example #39
0
def get_unfinished_users(db):
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "status": "running",
        "name": "get_commit_check"
    })
    users = []
    for item in res:
        for item_user in item["users"]:
            if db["commit_check_meta_result"].find_one({"login": item_user}):
                print "exist"
            else:
                users.append(item_user)

    for user in users:
        new_thread = myThread1(db, user)
        user_thread.append(new_thread)
    print str(len(users)) + " task received, start to run them!"
    run_task()
Example #40
0
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "name": "get_repositories",
        "error_count": {
            "$gte": 10
        }
    })
    count = 0
    for item in res:
        task = DMTask()
        val = {
            "name": "get_repositories",
            "action_type": "loop",
            "start": item["start"],
            "end": item["end"]
        }
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Example #41
0
def test():
    dm_db = DMDatabase()
    db = dm_db.getDB()
    login = "******"
    if (db):
        addOneUser(db, login)