Exemple #1
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res = db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i % pieces == 0:
            task = DMTask()
            val = {
                "name": "get_commit_check",
                "action_type": "loop",
                "query": str(query),
                "users": user_list,
                "start": 800000 + i - 100,
                "end": 800000 + i
            }
            task.init("github", val)
            user_list = []

    if i % pieces != 0:
        task = DMTask()
        val = {
            "name": "get_commit_check",
            "action_type": "loop",
            "query": str(query),
            "users": user_list,
            "start": 800000 + i - i % pieces,
            "end": 800000 + 1
        }
        task.init("github", val)
    return
Exemple #2
0
def init_commit_task(db, repo):
    task = DMTask()
    item = db["repositories"].find_one({"full_name": repo})
    if item:
        id = item["id"]
        val = {"name": "get_commit", "action_type": "loop", "start": repo, "end": id}
        task.init("github", val)
Exemple #3
0
def event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        r1.runTask()
Exemple #4
0
def gen_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFiles/get_events_start_" + str(i * gap)
        r1.generateToFile(file)
Exemple #5
0
def import_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFinishedFiles/get_events_start_" + str(i * gap) + ".output"
        r1.runTaskFromFile(file)
Exemple #6
0
def init_repo_single_task(login):
    db = DMDatabase().getDB()
    task = DMTask()
    item = db["user"].find_one({"login": login})
    if item:
        val = {"name": "get_repos", "action_type": "single", "start": login, "end": item["id"]}
        task.init("github", val)
    else:
        print "user not found"
Exemple #7
0
def updated_repositories_task():
    last_id = get_last_saved_id()
    task = DMTask()
    val = {
        "name": "get_repositories",
        "action_type": "update",
        "start": last_id,
        "end": 0
    }
    task.init("github", val)
Exemple #8
0
def init_event_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
Exemple #9
0
def init_followers_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_followers", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
Exemple #10
0
def init_commit_task(db, repo):
    task = DMTask()
    item = db["repositories"].find_one({"full_name": repo})
    if item:
        id = item["id"]
        val = {
            "name": "get_commit",
            "action_type": "loop",
            "start": repo,
            "end": id
        }
        task.init("github", val)
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({"name": "get_repositories", "error_count": {"$gte": 10}})
    count = 0
    for item in res:
        task = DMTask()
        val = {"name": "get_repositories", "action_type": "loop", "start": item["start"], "end": item["end"]}
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemple #12
0
def event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        r1.runTask()
Exemple #13
0
def resolve_contributors_loop_errors():
    print "resolve contributors errors"
    gap = 1000
    start = 0
    end = 29000
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_contributors", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubContributors(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemple #14
0
def resolve_event_errors():
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubEvent(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemple #15
0
def gen_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFiles/get_events_start_" + str(i * gap)
        r1.generateToFile(file)
Exemple #16
0
class myThread (threading.Thread):
    def __init__(self, db, val):
        threading.Thread.__init__(self)
        self.db = db
        self.val = val
        self.task = DMTask()
        self.task.init("github", val)

    def run(self):
        print "Start the thread" + str(self.val)
        self.task.update({"status": "running", "percent": 0.0, "update_date": datetime.datetime.utcnow()})
        get_commit_repos_by_users(self.db, self.val["users"])
        self.task.update({"status": "finish", "percent": 1.0, "update_date": datetime.datetime.utcnow()})

        print "Exist the thread"
Exemple #17
0
def import_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFinishedFiles/get_events_start_" + str(
            i * gap) + ".output"
        r1.runTaskFromFile(file)
Exemple #18
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res =db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i%pieces == 0:
            task = DMTask()
            val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-100, "end": 800000 + i}
            task.init("github", val)
            user_list = []

    if i%pieces != 0:
        task = DMTask()
        val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-i%pieces, "end": 800000 + 1}
        task.init("github", val)
    return
Exemple #19
0
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "name": "get_repositories",
        "error_count": {
            "$gte": 10
        }
    })
    count = 0
    for item in res:
        task = DMTask()
        val = {
            "name": "get_repositories",
            "action_type": "loop",
            "start": item["start"],
            "end": item["end"]
        }
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemple #20
0
class myThread(threading.Thread):
    def __init__(self, db, val):
        threading.Thread.__init__(self)
        self.db = db
        self.val = val
        self.task = DMTask()
        self.task.init("github", val)

    def run(self):
        print "Start the thread" + str(self.val)
        self.task.update({
            "status": "running",
            "percent": 0.0,
            "update_date": datetime.datetime.utcnow()
        })
        get_commit_repos_by_users(self.db, self.val["users"])
        self.task.update({
            "status": "finish",
            "percent": 1.0,
            "update_date": datetime.datetime.utcnow()
        })

        print "Exist the thread"
def updated_contributors_task():
    last_id  = get_last_saved_id()
    task = DMTask()
    val = {"name": "get_contributors", "action_type": "update", "start": last_id, "end": 0}
    task.init("github", val)
Exemple #22
0
class myThread(threading.Thread):
    def __init__(self, action_type, cmd, start, end, endless):
        threading.Thread.__init__(self)
        self.endless = endless
        self.set(action_type, cmd, start, end)

    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)
        # TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1

    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {"col": "github", "num": 1, "query": {"status": "init"}}
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"], item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break
Exemple #23
0
class myThread(threading.Thread):
    def __init__(self, action_type, cmd, start, end, endless):
        threading.Thread.__init__(self)
        self.endless = endless
        self.set(action_type, cmd, start, end)

    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)


# TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1

    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {
                    "col": "github",
                    "num": 1,
                    "query": {
                        "status": "init"
                    }
                }
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"],
                                    item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break