def init_users_thread_by_query(db, query): pieces = 100 db = DMDatabase().getDB() i = 0 res = db["user_contributor_result2"].find(query) user_list = [] for item in res: i += 1 user_list.append(item["login"]) if i % pieces == 0: task = DMTask() val = { "name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i - 100, "end": 800000 + i } task.init("github", val) user_list = [] if i % pieces != 0: task = DMTask() val = { "name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i - i % pieces, "end": 800000 + 1 } task.init("github", val) return
def init_commit_task(db, repo): task = DMTask() item = db["repositories"].find_one({"full_name": repo}) if item: id = item["id"] val = {"name": "get_commit", "action_type": "loop", "start": repo, "end": id} task.init("github", val)
def event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap} task.init("github", val) r1 = GithubEvent(task) r1.runTask()
def gen_event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap} task.init("github", val) r1 = GithubEvent(task) file = "./TaskFiles/get_events_start_" + str(i * gap) r1.generateToFile(file)
def import_event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap} task.init("github", val) r1 = GithubEvent(task) file = "./TaskFinishedFiles/get_events_start_" + str(i * gap) + ".output" r1.runTaskFromFile(file)
def init_repo_single_task(login): db = DMDatabase().getDB() task = DMTask() item = db["user"].find_one({"login": login}) if item: val = {"name": "get_repos", "action_type": "single", "start": login, "end": item["id"]} task.init("github", val) else: print "user not found"
def updated_repositories_task(): last_id = get_last_saved_id() task = DMTask() val = { "name": "get_repositories", "action_type": "update", "start": last_id, "end": 0 } task.init("github", val)
def init_event_task(): # TODO: 1000 is system defined, maybe add to DMTask? or config file? gap = 1000 start = 0 # end id is now set to 10300000 end = 10300 db = DMDatabase().getDB() for i in range (start, end): task = DMTask() val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap} task.init("github", val)
def init_followers_task(): # TODO: 1000 is system defined, maybe add to DMTask? or config file? gap = 1000 start = 0 # end id is now set to 10300000 end = 10300 db = DMDatabase().getDB() for i in range (start, end): task = DMTask() val = {"name": "get_followers", "action_type": "loop", "start": i * gap, "end": (i+1)*gap} task.init("github", val)
def init_commit_task(db, repo): task = DMTask() item = db["repositories"].find_one({"full_name": repo}) if item: id = item["id"] val = { "name": "get_commit", "action_type": "loop", "start": repo, "end": id } task.init("github", val)
def resolve_event_errors(): client = DMDatabase().getClient() res = client["task"]["github"].find({"name": "get_repositories", "error_count": {"$gte": 10}}) count = 0 for item in res: task = DMTask() val = {"name": "get_repositories", "action_type": "loop", "start": item["start"], "end": item["end"]} task.init("github", val) r = GithubRepositories(task) res = r.error_check() count += res print str(count) + " errors solved"
def event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = { "name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap } task.init("github", val) r1 = GithubEvent(task) r1.runTask()
def resolve_contributors_loop_errors(): print "resolve contributors errors" gap = 1000 start = 0 end = 29000 count = 0 for i in range (start, end): task = DMTask() val = {"name": "get_contributors", "action_type": "loop", "start": i * gap, "end": (i+1)*gap} task.init("github", val) r = GithubContributors(task) res = r.error_check() count += res print str(count) + " errors solved"
def resolve_event_errors(): gap = 1000 start = 0 # end id is now set to 10300000 end = 10300 count = 0 for i in range (start, end): task = DMTask() val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap} task.init("github", val) r = GithubEvent(task) res = r.error_check() count += res print str(count) + " errors solved"
def gen_event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = { "name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap } task.init("github", val) r1 = GithubEvent(task) file = "./TaskFiles/get_events_start_" + str(i * gap) r1.generateToFile(file)
class myThread (threading.Thread): def __init__(self, db, val): threading.Thread.__init__(self) self.db = db self.val = val self.task = DMTask() self.task.init("github", val) def run(self): print "Start the thread" + str(self.val) self.task.update({"status": "running", "percent": 0.0, "update_date": datetime.datetime.utcnow()}) get_commit_repos_by_users(self.db, self.val["users"]) self.task.update({"status": "finish", "percent": 1.0, "update_date": datetime.datetime.utcnow()}) print "Exist the thread"
def import_event(start, end): gap = 1000 for i in range(start, end): task = DMTask() val = { "name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap } task.init("github", val) r1 = GithubEvent(task) file = "./TaskFinishedFiles/get_events_start_" + str( i * gap) + ".output" r1.runTaskFromFile(file)
def init_users_thread_by_query(db, query): pieces = 100 db = DMDatabase().getDB() i = 0 res =db["user_contributor_result2"].find(query) user_list = [] for item in res: i += 1 user_list.append(item["login"]) if i%pieces == 0: task = DMTask() val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-100, "end": 800000 + i} task.init("github", val) user_list = [] if i%pieces != 0: task = DMTask() val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-i%pieces, "end": 800000 + 1} task.init("github", val) return
def resolve_event_errors(): client = DMDatabase().getClient() res = client["task"]["github"].find({ "name": "get_repositories", "error_count": { "$gte": 10 } }) count = 0 for item in res: task = DMTask() val = { "name": "get_repositories", "action_type": "loop", "start": item["start"], "end": item["end"] } task.init("github", val) r = GithubRepositories(task) res = r.error_check() count += res print str(count) + " errors solved"
class myThread(threading.Thread): def __init__(self, db, val): threading.Thread.__init__(self) self.db = db self.val = val self.task = DMTask() self.task.init("github", val) def run(self): print "Start the thread" + str(self.val) self.task.update({ "status": "running", "percent": 0.0, "update_date": datetime.datetime.utcnow() }) get_commit_repos_by_users(self.db, self.val["users"]) self.task.update({ "status": "finish", "percent": 1.0, "update_date": datetime.datetime.utcnow() }) print "Exist the thread"
def updated_contributors_task(): last_id = get_last_saved_id() task = DMTask() val = {"name": "get_contributors", "action_type": "update", "start": last_id, "end": 0} task.init("github", val)
class myThread(threading.Thread): def __init__(self, action_type, cmd, start, end, endless): threading.Thread.__init__(self) self.endless = endless self.set(action_type, cmd, start, end) def set(self, action_type, cmd, start, end): self.task = DMTask() self.r = None self.val = {"action_type": action_type, "start": start, "end": end} if cmd == "get_repos": self.val["name"] = "get_repos" self.task.init("github", self.val) self.r = GithubRepo(self.task) elif cmd == "get_followers": self.val["name"] = "get_followers" self.task.init("github", self.val) self.r = GithubFollowers(self.task) elif cmd == "get_events": self.val["name"] = "get_events" self.task.init("github", self.val) self.r = GithubEvent(self.task) # TODO: do not support now elif cmd == "get_users": self.val["name"] = "get_users" self.task.init("github", self.val) self.r = GithubUser(self.task) elif cmd == "get_repositories": self.val["name"] = "get_repositories" self.task.init("github", self.val) self.r = GithubRepositories(self.task) elif cmd == "get_contributors": self.val["name"] = "get_contributors" self.task.init("github", self.val) self.r = GithubContributors(self.task) elif cmd == "get_commit": self.val["name"] = "get_commit" self.task.init("github", self.val) self.r = GithubCommit(self.task) elif cmd == "get_commit_p1": self.val["name"] = "get_commit_p1" self.task.init("github", self.val) self.r = GithubCommit(self.task) else: print "Failed to init the task" return 0 return 1 def run(self): print "Starting " + str(self.val) if self.r: # self.r.runFix2Task() self.r.runTask() print "Exiting " + str(self.val) if self.endless == 1: while 1: query = {"col": "github", "num": 1, "query": {"status": "init"}} # query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}} threadLock.acquire() res = DMTask().getFreeTasks(query) threadLock.release() if res: for item in res: print item if self.set(item["name"], item["start"], item["end"]) == 1: # self.r.runFix2Task() self.r.runTask() else: return print "\n Start another task in the finished thread\n" break
class myThread(threading.Thread): def __init__(self, action_type, cmd, start, end, endless): threading.Thread.__init__(self) self.endless = endless self.set(action_type, cmd, start, end) def set(self, action_type, cmd, start, end): self.task = DMTask() self.r = None self.val = {"action_type": action_type, "start": start, "end": end} if cmd == "get_repos": self.val["name"] = "get_repos" self.task.init("github", self.val) self.r = GithubRepo(self.task) elif cmd == "get_followers": self.val["name"] = "get_followers" self.task.init("github", self.val) self.r = GithubFollowers(self.task) elif cmd == "get_events": self.val["name"] = "get_events" self.task.init("github", self.val) self.r = GithubEvent(self.task) # TODO: do not support now elif cmd == "get_users": self.val["name"] = "get_users" self.task.init("github", self.val) self.r = GithubUser(self.task) elif cmd == "get_repositories": self.val["name"] = "get_repositories" self.task.init("github", self.val) self.r = GithubRepositories(self.task) elif cmd == "get_contributors": self.val["name"] = "get_contributors" self.task.init("github", self.val) self.r = GithubContributors(self.task) elif cmd == "get_commit": self.val["name"] = "get_commit" self.task.init("github", self.val) self.r = GithubCommit(self.task) elif cmd == "get_commit_p1": self.val["name"] = "get_commit_p1" self.task.init("github", self.val) self.r = GithubCommit(self.task) else: print "Failed to init the task" return 0 return 1 def run(self): print "Starting " + str(self.val) if self.r: # self.r.runFix2Task() self.r.runTask() print "Exiting " + str(self.val) if self.endless == 1: while 1: query = { "col": "github", "num": 1, "query": { "status": "init" } } # query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}} threadLock.acquire() res = DMTask().getFreeTasks(query) threadLock.release() if res: for item in res: print item if self.set(item["name"], item["start"], item["end"]) == 1: # self.r.runFix2Task() self.r.runTask() else: return print "\n Start another task in the finished thread\n" break