Beispiel #1
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res = db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i % pieces == 0:
            task = DMTask()
            val = {
                "name": "get_commit_check",
                "action_type": "loop",
                "query": str(query),
                "users": user_list,
                "start": 800000 + i - 100,
                "end": 800000 + i
            }
            task.init("github", val)
            user_list = []

    if i % pieces != 0:
        task = DMTask()
        val = {
            "name": "get_commit_check",
            "action_type": "loop",
            "query": str(query),
            "users": user_list,
            "start": 800000 + i - i % pieces,
            "end": 800000 + 1
        }
        task.init("github", val)
    return
Beispiel #2
0
    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {
                    "col": "github",
                    "num": 1,
                    "query": {
                        "status": "init"
                    }
                }
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"],
                                    item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break
Beispiel #3
0
def run_free_task(num, endless):
    query = {
        "col": "github",
        "num": num,
        "query": {
            "status": "init",
            "name": "get_repos"
        }
    }
    query = {
        "col": "github",
        "num": num,
        "query": {
            "name": "get_repositories",
            "status": "init"
        }
    }
    #    query = {"col": "github", "num": num, "query": {"status": "fixed", "name": "get_contributors"}}
    res = DMTask().getFreeTasks(query)
    i = 0
    for item in res:
        new_thread = myThread(item["action_type"], item["name"], item["start"],
                              item["end"], endless)
        user_thread.append(new_thread)
        i += 1
    print str(i) + " task received, start to run them!"
    run_task()
Beispiel #4
0
def test():
    task1 = DMTask()
    val = {"name": "fake-repo", "action_type": "loop", "start": 6001000, "end": 6005000}

    task1.init_test("github", val)
    e1 = GithubRepo(task1)
    e1.runTask()
    task1.remove()
Beispiel #5
0
def init_repo_single_task(login):
    db = DMDatabase().getDB()
    task = DMTask()
    item = db["user"].find_one({"login": login})
    if item:
        val = {"name": "get_repos", "action_type": "single", "start": login, "end": item["id"]}
        task.init("github", val)
    else:
        print "user not found"
Beispiel #6
0
def updated_repositories_task():
    last_id = get_last_saved_id()
    task = DMTask()
    val = {
        "name": "get_repositories",
        "action_type": "update",
        "start": last_id,
        "end": 0
    }
    task.init("github", val)
Beispiel #7
0
def single_download_demo():
    task1 = DMTask()
    val = {
        "name": "fake-followers",
        "action_type": "loop",
        "start": 6001000,
        "end": 6005000
    }
    task1.init_test("github", val)
    e1 = GithubFollowers(task1)
    e1.upload_user_followers("002", 3269460, 2)
Beispiel #8
0
def init_event_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
Beispiel #9
0
def init_commit_task(db, repo):
    task = DMTask()
    item = db["repositories"].find_one({"full_name": repo})
    if item:
        id = item["id"]
        val = {
            "name": "get_commit",
            "action_type": "loop",
            "start": repo,
            "end": id
        }
        task.init("github", val)
Beispiel #10
0
def event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        r1.runTask()
Beispiel #11
0
def gen_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFiles/get_events_start_" + str(i * gap)
        r1.generateToFile(file)
Beispiel #12
0
def resolve_contributors_loop_errors():
    print "resolve contributors errors"
    gap = 1000
    start = 0
    end = 29000
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_contributors", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubContributors(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Beispiel #13
0
def resolve_event_errors():
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubEvent(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Beispiel #14
0
def import_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFinishedFiles/get_events_start_" + str(
            i * gap) + ".output"
        r1.runTaskFromFile(file)
Beispiel #15
0
def resolve_contributors_30():
    db = DMDatabase().getDB()

    task1 = DMTask()
    val = {"name": "fake-contributors", "action_type": "loop", "start": 0, "end": 1000}
    task1.init_test("github", val)
    e1 = GithubContributors(task1)

    for num in range(1, 300):
        res = db["contributors"].find({"count": num*30})
        res_list = []
        for item in res:
            res_list.append({"full_name": item["full_name"], "id": item["id"]})
        for item in res_list:
            e1.get_repo_contributors(item["full_name"], item["id"])
Beispiel #16
0
def test():
    db = DMDatabase().getDB()
    res = db["event"].find({"id": {"$gte": 1000, "$lt": 200}}).limit(20)
    if res is None:
        print 'res is none'
    else:
        print res.count()
    return

    task1 = DMTask()
    val = {"name": "fake-event", "action_type": "loop", "start": 6001000, "end": 6005000}

    task1.init_test("github", val)
    e1 = GithubEvent(task1)
    e1.runTask()
    task1.remove()
Beispiel #17
0
def run_free_task(db, num):
    query = {
        "col": "github",
        "num": num,
        "query": {
            "status": "running",
            "name": "get_commit_check"
        }
    }
    res = DMTask().getFreeTasks(query)
    i = 0
    for item in res:
        new_thread = myThread(db, item)
        user_thread.append(new_thread)
        i += 1
    print str(i) + " task received, start to run them!\n\n\n\n"
    run_task()
Beispiel #18
0
    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)


# TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1
Beispiel #19
0
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "name": "get_repositories",
        "error_count": {
            "$gte": 10
        }
    })
    count = 0
    for item in res:
        task = DMTask()
        val = {
            "name": "get_repositories",
            "action_type": "loop",
            "start": item["start"],
            "end": item["end"]
        }
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Beispiel #20
0
 def __init__(self, db, val):
     threading.Thread.__init__(self)
     self.db = db
     self.val = val
     self.task = DMTask()
     self.task.init("github", val)