def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname): how_long = 60 if gotten_tag is None: #retry 3 times for i in range(0, 3): time.sleep(how_long) how_long *= 3 while True: try: local_filename_html, headers_html = urllib.urlretrieve( html_addr, filename + '.html') break except IOError: io_socket_message = 'Socket error while retrieving HTML' +\ ' file from GitHub! Internet or ' +\ 'GitHub down? Retry after 1 minute' scream.ssay(io_socket_message) scream.log_warning(io_socket_message) time.sleep(60) soup = BeautifulSoup(html_content_file) gotten_tag = soup.find(tagname, {objectname: arg_objectname}) if gotten_tag is not None: raise StopIteration if gotten_tag is None: #nothing to do here, lets move on scream.ssay('orphaned' + filename + '.json') scream.log_error( filename + '.json' + 'is without proper html. ' + 'GitHub not responding or giving 404/501 erorr ??') return None scream.say( 'No action required. retry_if_neccessary() returning gotten_tag') return gotten_tag
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname): how_long = 60 if gotten_tag is None: #retry 3 times for i in range(0, 3): time.sleep(how_long) how_long *= 3 while True: try: local_filename_html, headers_html = urllib.urlretrieve( html_addr, filename + '.html') break except IOError: io_socket_message = 'Socket error while retrieving HTML' +\ ' file from GitHub! Internet or ' +\ 'GitHub down? Retry after 1 minute' scream.ssay(io_socket_message) scream.log_warning(io_socket_message) time.sleep(60) soup = BeautifulSoup(html_content_file) gotten_tag = soup.find(tagname, {objectname: arg_objectname}) if gotten_tag is not None: raise StopIteration if gotten_tag is None: #nothing to do here, lets move on scream.ssay('orphaned' + filename + '.json') scream.log_error(filename + '.json' + 'is without proper html. ' + 'GitHub not responding or giving 404/501 erorr ??') return None scream.say('No action required. retry_if_neccessary() returning gotten_tag') return gotten_tag
def build_list_of_programmers(self, result_set_programmers, repo_key, repository): result_set = dict() contributors__ = result_set_programmers while True: result_set.clear() try: for contributor in contributors__: result_set[contributor.login] = contributor break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break return result_set
def build_list_of_programmers(self, result_set_programmers, repo_key, repository): result_set = dict() contributors__ = result_set_programmers while True: result_set.clear() try: for contributor in contributors__: result_set[contributor.login] = contributor break except TypeError as e: scream.log_error('Building list of programmers TypeError. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Building list of programmers socket timeout.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break except Exception as e: scream.log_error('Exception while building list of programmers .. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break return result_set
def get_data(self): global resume_stage scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId)) if resume_stage in [None, 'contributors']: #try: scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team') '1. Team size of a repository' self.contributors = self.repository.get_contributors() assert self.contributors is not None self.repo_contributors = set() self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository) for contributor in self.contributors_static.items(): scream.log_debug('move with contributor to next from contributors_static.items()', True) while True: scream.say('Inside while True: (line 674)') try: self.contributor_login = contributor[0] self.contributor_object = contributor[1] scream.say(str(self.contributor_login)) self.repo_contributors.add(self.contributor_login) scream.say(str(self.repo_contributors)) #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object) developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object) scream.say('Finished revealing developer') break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('socket.timeout in paginate through x contributors') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze(str(e) + ' in paginate through x contributors') if force_raise: raise #break assert self.repo_contributors is not None self.repo.setContributors(self.repo_contributors) self.repo.setContributorsCount(len(self.repo_contributors)) scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key) self.cleanup()
def get_data(self): global resume_stage scream.say('Preparing to build list of programmers: ' + str(self.threadId)) if resume_stage in [None, 'contributors']: #try: scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team') '1. Team size of a repository' self.contributors = self.repository.get_contributors() assert self.contributors is not None self.repo_contributors = set() self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository) for contributor in self.contributors_static.items(): scream.log_debug('move with contributor to next from contributors_static.items()', True) while True: scream.say('Get details for a contributor..') try: self.contributor_login = contributor[0] self.contributor_object = contributor[1] scream.say(str(self.contributor_login)) self.repo_contributors.add(self.contributor_login) scream.say(str(self.repo_contributors)) #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object) developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object) scream.say('Finished revealing developer') break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('socket.timeout in paginate through x contributors') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze(str(e) + ' in paginate through x contributors') if force_raise: raise #break assert self.repo_contributors is not None self.repo.setContributors(self.repo_contributors) self.repo.setContributorsCount(len(self.repo_contributors)) scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key) self.cleanup()
def build_list_of_programmers(self, result_set_programmers, repo_key, repository): result_set = dict() contributors__ = result_set_programmers while True: result_set.clear() try: for contributor in contributors__: result_set[contributor.login] = contributor break except TypeError as e: scream.log_error( 'Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + str(key) + ', error({0})'.format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error( 'Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break except Exception as e: scream.log_error( 'Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('build_list_of_programmers will retry') if force_raise: raise #break return result_set
repos_reported_execution_error.write(key + os.linesep) repo.setContributors(repo_contributors) #repo.setContributorsCount(len(repo_contributors)) 'class fields are not garbage, ' 'its better to calculate count on demand' scream.log('Added contributors of count: ' + str(len(repo_contributors)) + ' to a repo ' + key) except GithubException as e: if 'repo_contributors' not in locals(): repo.setContributors([]) else: repo.setContributors(repo_contributors) scream.log_error('Repo didnt gave any contributors, ' + 'or paginated through' + ' contributors gave error. ' + key + ', error({0}): {1}'. format(e.status, e.data)) finally: resume_stage = None #if resume_stage in [None, 'languages']: # scream.ssay('Getting languages of a repo') # languages = repository.get_languages() # dict object (json? object) # repo.setLanguage(languages) # scream.log('Added languages ' + str(languages) + ' to a repo ' + key) # resume_stage = None # to juz mamy # if resume_stage in [None, 'labels']: # scream.ssay('Getting labels of a repo')
def descr_user(s): if s in persist_users: if persist_users[s] is None: return s else: return persist_users[s] #get user name and surname here while True: try: response = urllib.urlopen('https://api.github.com/users/' + s + '?client_id=' + client_id + '&client_secret=' + client_secret) break except IOError: print 'API GitHub not responding, urlopen failed' print 'retrying after 1 minute' time.sleep(60) scream.ssay(response) data = simplejson.load(response) scream.ssay(data) #fullname = data['name'] if 'name' in data: fullname = data['name'] else: scream.say('Fullname not provided') persist_users[s] = None return s if fullname is None: scream.say('Fullname provided but an empty entry') persist_users[s] = None return s if (len(fullname) > 0): first_name = unicode(fullname.split()[0]) if (len(first_name) > 0): scream.say('#ask now internet for gender') while True: try: response = my_browser.open('http://genderchecker.com/') response.read() break except urllib2.URLError: scream.ssay('Site genderchecker.com seems to be down' + '. awaiting for 60s before retry') time.sleep(60) scream.say('Response read. Mechanize selecting form.') my_browser.select_form("aspnetForm") my_browser.form.set_all_readonly(False) # allow everything to be written control = my_browser.form.find_control("ctl00$TextBoxName") if only_roman_chars(first_name): control.value = StripNonAlpha(first_name.encode('utf-8')) else: control.value = StripNonAlpha(cyrillic2latin(first_name).encode('utf-8')) #check if value is enough #control.text = first_name scream.say('Control value is set to :' + str(control.value)) submit_retry_counter = 4 while True: try: response = my_browser.submit() html = response.read() break except mechanize.HTTPError, e: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. HTTPError ' +\ str(e.code) +\ '. awaiting for 60s before retry' scream.say(error_message) scream.log_error(str(e.code) + ': ' + error_message) time.sleep(60) except: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. ' +\ 'awaiting for 60s before retry' scream.say(error_message) scream.log_error(error_message) time.sleep(60)
def descr_user(s): if s in persist_users: if persist_users[s] is None: return s else: return persist_users[s] #get user name and surname here while True: try: response = urllib.urlopen('https://api.github.com/users/' + s + '?client_id=' + client_id + '&client_secret=' + client_secret) break except IOError: print 'API GitHub not responding, urlopen failed' print 'retrying after 1 minute' time.sleep(60) scream.ssay(response) data = simplejson.load(response) scream.ssay(data) #fullname = data['name'] if 'name' in data: fullname = data['name'] else: scream.say('Fullname not provided') persist_users[s] = None return s if fullname is None: scream.say('Fullname provided but an empty entry') persist_users[s] = None return s if (len(fullname) > 0): first_name = unicode(fullname.split()[0]) if (len(first_name) > 0): scream.say('#ask now internet for gender') while True: try: response = my_browser.open('http://genderchecker.com/') response.read() break except urllib2.URLError: scream.ssay('Site genderchecker.com seems to be down' + '. awaiting for 60s before retry') time.sleep(60) scream.say('Response read. Mechanize selecting form.') my_browser.select_form("aspnetForm") my_browser.form.set_all_readonly(False) # allow everything to be written control = my_browser.form.find_control("ctl00$TextBoxName") if only_roman_chars(first_name): control.value = StripNonAlpha(first_name.encode('utf-8')) else: control.value = StripNonAlpha( cyrillic2latin(first_name).encode('utf-8')) #check if value is enough #control.text = first_name scream.say('Control value is set to :' + str(control.value)) submit_retry_counter = 4 while True: try: response = my_browser.submit() html = response.read() break except mechanize.HTTPError, e: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. HTTPError ' +\ str(e.code) +\ '. awaiting for 60s before retry' scream.say(error_message) scream.log_error(str(e.code) + ': ' + error_message) time.sleep(60) except: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. ' +\ 'awaiting for 60s before retry' scream.say(error_message) scream.log_error(error_message) time.sleep(60)
for contributor in contributors: repo_contributors.append(contributor) check_quota_limit() repo.setContributors(repo_contributors) #repo.setContributorsCount(len(repo_contributors)) 'class fields are not garbage, ' 'its better to calculate count on demand' scream.log('Added contributors of count: ' + str(len(repo_contributors)) + ' to a repo ' + key) except GithubException as e: if 'repo_contributors' not in locals(): repo.setContributors([]) else: repo.setContributors(repo_contributors) scream.log_error('Repo didnt gave any contributors, ' + 'or paginated through' + ' contributors gave error. ' + key + ', error({0}): {1}'.format(e.status, e.data)) finally: resume_stage = None if resume_stage in [None, 'languages']: scream.ssay('Getting languages of a repo') languages = repository.get_languages( ) # dict object (json? object) repo.setLanguage(languages) scream.log('Added languages ' + str(languages) + ' to a repo ' + key) resume_stage = None if resume_stage in [None, 'labels']: scream.ssay('Getting labels of a repo')
def get_data(self, page, conn): global results_done global results_all global pagination global openhub_query_tags self.params_sort_rating = urllib.urlencode({'query': 'tag:' + openhub_query_tags[0], 'api_key': return_random_openhub_key(), 'sort': 'rating', 'page': page}) self.projects_api_url = "https://www.openhub.net/projects.xml?%s" % (self.params_sort_rating) self.result_flow = urllib.urlopen(self.projects_api_url) scream.say('') scream.say('-------------------------- PAGE ' + str(page) + ' parsed -----------------------------') scream.say('') # Parse the response into a structured XML object self.tree = ET.parse(self.result_flow) # Did Ohloh return an error? self.elem = self.tree.getroot() self.error = self.elem.find("error") if self.error is not None: print 'OpenHub returned ERROR:', ET.tostring(self.error), sys.exit() results_done += int(self.elem.find("items_returned").text) results_all = int(self.elem.find("items_available").text) self.i = 0 for self.node in self.elem.findall("result/project"): self.i += 1 scream.say('Checking element ' + str(self.i) + '/' + str(pagination)) self.project_id = self.node.find("id").text self.project_name = self.node.find("name").text self.project_url = self.node.find("url").text self.project_htmlurl = self.node.find("html_url").text self.project_created_at = self.node.find("created_at").text self.project_updated_at = self.node.find("updated_at").text self.project_homepage_url = self.node.find("homepage_url").text self.project_average_rating = self.node.find("average_rating").text self.project_rating_count = self.node.find("rating_count").text self.project_review_count = self.node.find("review_count").text self.project_activity_level = self.node.find("project_activity_index/value").text self.project_user_count = self.node.find("user_count").text # project may have multiple GitHub repositories # or even it may be not present on GitHub - check that self.is_github_project = False self.github_repo_id = None # in case of multiple github CODE repositories (quite often) # treat as a seperate repo - remember, we focus on github repositories, not aggregates self.enlistments_detailed_params = urllib.urlencode({'api_key': return_random_openhub_key()}) self.enlistments_detailed_url = "https://www.openhub.net/projects/%s/enlistments.xml?%s" % (self.project_id, self.enlistments_detailed_params) self.enlistments_result_flow = urllib.urlopen(self.enlistments_detailed_url) # Parse the response into a structured XML object self.enlistments_tree = ET.parse(self.enlistments_result_flow) # Did Ohloh return an error? self.enlistments_elem = self.enlistments_tree.getroot() self.enlistments_error = self.enlistments_elem.find("error") if self.enlistments_error is not None: print 'Ohloh returned:', ET.tostring(self.enlistments_error), sys.exit() self.repos_lists = list() for self.enlistment_node in self.enlistments_elem.findall("result/enlistment"): self.ee_type = self.enlistment_node.find("repository/type").text if (self.ee_type == "GitRepository"): self.ee_link = self.enlistment_node.find("repository/url").text if (self.ee_link.startswith("git://github.com/")): scream.say('Is a GitHub project!') self.is_github_project = True self.github_repo_id = self.ee_link.split("git://github.com/")[1].split(".git")[0] scream.say(self.github_repo_id) self.repos_lists.append(self.github_repo_id) if not self.is_github_project: continue # now lets get even more sophisticated details self.params_detailed_url = urllib.urlencode({'api_key': return_random_openhub_key()}) self.project_detailed_url = "https://www.openhub.net/projects/%s.xml?%s" % (self.project_id, self.params_detailed_url) # how come here was a typo ? self.detailed_result_flow = urllib.urlopen(self.project_detailed_url) # Parse the response into a structured XML object self.detailed_tree = ET.parse(self.detailed_result_flow) # Did Ohloh return an error? self.detailed_elem = self.detailed_tree.getroot() self.detailed_error = self.detailed_elem.find("error") if self.detailed_error is not None: print 'Ohloh returned:', ET.tostring(self.detailed_error), sys.exit() self.twelve_month_contributor_count = self.detailed_elem.find("result/project/analysis/twelve_month_contributor_count").text self.total_contributor_count = self.detailed_elem.find("result/project/analysis/total_contributor_count").text self.twelve_month_commit_count = self.detailed_elem.find("result/project/analysis/twelve_month_commit_count") self.twelve_month_commit_count = self.twelve_month_commit_count.text if self.twelve_month_commit_count is not None else NullChar self.total_commit_count = self.detailed_elem.find("result/project/analysis/total_commit_count") self.total_commit_count = self.total_commit_count.text if self.total_commit_count is not None else NullChar self.total_code_lines = self.detailed_elem.find("result/project/analysis/total_code_lines") self.total_code_lines = self.total_code_lines.text if self.total_code_lines is not None else NullChar self.main_language_name = self.detailed_elem.find("result/project/analysis/main_language_name") self.main_language_name = self.main_language_name.text if self.main_language_name is not None else NullChar self.current_ghc = github_clients[num_modulo(self.i-1)] self.current_ghc_desc = github_clients_ids[num_modulo(self.i-1)] print 'Now using github client id: ' + str(self.current_ghc_desc) for self.gh_entity in self.repos_lists: try: self.repository = self.current_ghc.get_repo(self.gh_entity) self.repo_name = self.repository.name self.repo_full_name = self.repository.full_name self.repo_html_url = self.repository.html_url self.repo_stargazers_count = self.repository.stargazers_count self.repo_forks_count = self.repository.forks_count self.repo_created_at = self.repository.created_at self.repo_is_fork = self.repository.fork self.repo_has_issues = self.repository.has_issues self.repo_open_issues_count = self.repository.open_issues_count self.repo_has_wiki = self.repository.has_wiki self.repo_network_count = self.repository.network_count self.repo_pushed_at = self.repository.pushed_at self.repo_size = self.repository.size self.repo_updated_at = self.repository.updated_at self.repo_watchers_count = self.repository.watchers_count # Now its time to get the list of developers! # yay! rec-09 mysql instance is visible from the yoshimune computer ! # ok, but I forgot github blacklisted our comptuing clusters # make sure your local win machine runs it.. # just pjatk things.. carry on scream.say('Retrieving the project id from mysql database.. should take max 1 second.') # Get here project id used in the database ! #conn.ping(True) self.cursor = conn.cursor() self.cursor.execute(r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name)) self.rows = self.cursor.fetchall() try: self.repo_db_id = self.rows[0] except: #print str(cursor.info()) # this is too new repo , because it is not found on mysql db, skip it ! continue #print 'Faulty query was: -------- ' #print r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name) scream.say('project id retrieved from database is: ' + str(self.repo_db_id)) self.cursor.close() #conn.ping(True) self.cursor = conn.cursor() # Now get list of GitHub logins which are project_members ! self.cursor.execute(r'SELECT login FROM project_members INNER JOIN users ON users.id = project_members.user_id WHERE repo_id = %s' % self.repo_db_id) self.project_developers = self.cursor.fetchall() self.project_developers = [i[0] for i in self.project_developers] # unzipping tuples in tuples self.contributors_count = len(self.project_developers) self.cursor.close() #conn.close() for self.project_developer in self.project_developers: # create a GitHub user named object for GitHub API self.current_user = self.current_ghc.get_user(self.project_developer) self.current_user_bio = self.current_user.bio self.current_user_blog = self.current_user.blog self.current_user_collaborators = self.current_user.collaborators self.current_user_company = self.current_user.company self.current_user_contributions = self.current_user.contributions self.current_user_created_at = self.current_user.created_at self.current_user_followers = self.current_user.followers self.current_user_following = self.current_user.following self.current_user_hireable = self.current_user.hireable self.current_user_login = self.current_user.login self.current_user_name = self.current_user.name self.developer_login = self.project_developer # Does he commit during business hours? scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"PushEvent"): self.developer_all_pushes += self.usage_element['total'] elif (self.usage_element['type'] == "WatchEvent"): self.developer_all_stars_given += self.usage_element['total'] elif (self.usage_element['type'] == "CreateEvent"): self.developer_all_creations += self.usage_element['total'] elif (self.usage_element['type'] == "IssuesEvent"): self.developer_all_issues_created += self.usage_element['total'] elif (self.usage_element['type'] == "PullRequestEvent"): self.developer_all_pull_requests += self.usage_element['total'] # ----------------------------------------------------------------------- scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404') scream.say('try ' + str(self.tries) + ' more times') self.tries -= 1 finally: if self.tries < 1: self.developer_works_during_bd = None self.developer_works_period = 0 break self.collection = [str(((page-1)*pagination) + self.i), self.gh_entity, self.repo_full_name, self.repo_html_url, str(self.repo_forks_count), str(self.repo_stargazers_count), str(self.contributors_count), str(self.repo_created_at), str(self.repo_is_fork), str(self.repo_has_issues), str(self.repo_open_issues_count), str(self.repo_has_wiki), str(self.repo_network_count), str(self.repo_pushed_at), str(self.repo_size), str(self.repo_updated_at), str(self.repo_watchers_count), self.project_id, self.project_name, self.project_url, self.project_htmlurl, str(self.project_created_at), str(self.project_updated_at), self.project_homepage_url, str(self.project_average_rating), str(self.project_rating_count), str(self.project_review_count), self.project_activity_level, str(self.project_user_count), str(self.twelve_month_contributor_count), str(self.total_contributor_count), str(self.twelve_month_commit_count), str(self.total_commit_count), str(self.total_code_lines), self.main_language_name, str(self.developer_works_during_bd), str(self.developer_works_period), str(self.developer_all_pushes), str(self.developer_all_stars_given), str(self.developer_all_creations), str(self.developer_all_issues_created), str(self.developer_all_pull_requests)] csv_writer.writerow(self.collection) #self.set_finished(True) print '.' except UnknownObjectException: print 'Repo ' + self.gh_entity + ' is not available anymore..' except GithubException: # TODO: write here something clever raise self.set_finished(True)
proxy = {'http': '94.154.26.132:8090'} session_osrc = requests.Session() requests_osrc = session_osrc.get(osrc_url, proxies=proxy) # print requests_osrc.text osrc_data = json.loads(requests_osrc.text) scream.say("JSON parsed..") if ("message" in osrc_data) and (osrc_data["message"].startswith("Not enough information for")): scream.say(osrc_data["message"]) limit = limit + 1 break for language in osrc_data["usage"]["languages"]: #print language["count"] #print language["language"] csv_writer.writerow([str(counter), str(developer_login), str(language["language"]), str(language["count"]), str(language["quantile"])]) scream.log_debug("Languages diagram for user " + str(developer_login) + ' created..', True) # ----------------------------------------------------------------------- scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404') scream.say('try ' + str(tries) + ' more times') tries -= 1 finally: if tries < 1: developer_works_during_bd = 0 developer_works_period = 0 break #with open('progress_bar.lock') as f: scream.say(f.read()) counter = counter + 1
check_quota_limit() repo.setContributors(repo_contributors) #repo.setContributorsCount(len(repo_contributors)) 'class fields are not garbage, ' 'its better to calculate count on demand' scream.log('Added contributors of count: ' + str(len(repo_contributors)) + ' to a repo ' + key) except GithubException as e: if 'repo_contributors' not in locals(): repo.setContributors([]) else: repo.setContributors(repo_contributors) scream.log_error('Repo didnt gave any contributors, ' + 'or paginated through' + ' contributors gave error. ' + key + ', error({0}): {1}'. format(e.status, e.data)) finally: resume_stage = None if resume_stage in [None, 'languages']: scream.ssay('Getting languages of a repo') languages = repository.get_languages() # dict object (json? object) repo.setLanguage(languages) scream.log('Added languages ' + str(languages) + ' to a repo ' + key) resume_stage = None if resume_stage in [None, 'labels']: scream.ssay('Getting labels of a repo') 'getting labels, label is a tag which you can put in an issue'
except: repos_reported_execution_error.write(key + os.linesep) repo.setContributors(repo_contributors) #repo.setContributorsCount(len(repo_contributors)) 'class fields are not garbage, ' 'its better to calculate count on demand' scream.log('Added contributors of count: ' + str(len(repo_contributors)) + ' to a repo ' + key) except GithubException as e: if 'repo_contributors' not in locals(): repo.setContributors([]) else: repo.setContributors(repo_contributors) scream.log_error( 'Repo didnt gave any contributors, ' + 'or paginated through' + ' contributors gave error. ' + key + ', error({0}): {1}'.format(e.status, e.data)) finally: resume_stage = None #if resume_stage in [None, 'languages']: # scream.ssay('Getting languages of a repo') # languages = repository.get_languages() # dict object (json? object) # repo.setLanguage(languages) # scream.log('Added languages ' + str(languages) + ' to a repo ' + key) # resume_stage = None # to juz mamy # if resume_stage in [None, 'labels']: # scream.ssay('Getting labels of a repo') # 'getting labels, label is a tag which you can put in an issue'
def developer_revealed(thread_getter_instance, repository, repo, contributor): global result_writer global result_punch_card_writer assert result_punch_card_writer is not None developer_login = contributor.login scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True) developer_name = contributor.name # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent] developer_followers = contributor.followers # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent] developer_following = contributor.following developer_location = contributor.location developer_total_private_repos = contributor.total_private_repos developer_total_public_repos = contributor.public_repos # 5a. Ilosc repo, w ktorych jest team member [TeamAddEvent] [MemberEvent] developer_collaborators = contributor.collaborators scream.say('Developer collaborators count is: ' + str(developer_collaborators)) # 6a. Ilosc repo, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent] developer_contributions = contributor.contributions scream.say('Developer contributions count is: ' + str(developer_contributions)) # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem his_repositories = contributor.get_repos() # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)? scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"pod repozytorium" while True: try: trying_to_get_stats += 1 stats = his_repo.get_stats_contributors() status_code__ = get_status_code('https://api.github.com/repos/' + his_repo.full_name + '/stats/contributors') if status_code__ != 204: for s in stats: ad___c = 0 ad___a = 0 ad___d = 0 for w in s.weeks: ad___c += w.c ad___a += w.a ad___d += w.d if s.author.login not in his_contributors: his_contributors.add(s.author.login) result_punch_card_writer.writerow([str(his_repo.owner.login), str(his_repo.name), str(developer_login), str(s.author.login), str(s.total), str(ad___c), str(ad___a), str(ad___d)]) else: scream.log_debug('The subrepo is empty, thus no statistics (punchcard) generated this time', True) break except GithubException as e: freeze(str(e) + ' his_repo.get_stats_contributors(). Unexpected error with getting stats.') if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"): scream.log_debug("It is a private repo.. Skip!", True) break if force_raise: raise except TypeError as e: scream.log_warning('This was stats attempt no: ' + str(trying_to_get_stats), True) freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?') # probably punch card not ready if force_raise: raise except Exception as e: freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?') # probably punch card not ready if force_raise: raise # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest deweloperem if developer_login in his_contributors: self_contributing += 1 # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member subrepo_collaborators = his_repo.get_collaborators() for collaborator in subrepo_collaborators: total_his_collaborators += 1 if developer_login == collaborator.login: self_collaborating += 1 # All elements paginated through his_repositories, thus we can safely break loop break except GithubException as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories') if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"): scream.log_debug("It is a private repo.. Skip!") continue if force_raise: raise except TypeError as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?') # probably punch card not ready if force_raise: raise except Exception as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?') # probably punch card not ready if force_raise: raise total_his_contributors = len(his_contributors) # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent] # tutaj od wartosci developer_collaborators wystarczy odjac wystapienia loginu w podrepo.get_collaborators() developer_foreign_collaborators = (developer_collaborators if developer_collaborators is not None else 0) - self_collaborating # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent] # tutaj od wartosci developer_contributions wystarczy odjac wystapienia loginu w podrepo.get_contributions() developer_foreign_contributions = developer_contributions - self_contributing # All properties checked for a dev, thus we can safely break loop break except Exception as e: freeze('Error ' + str(e) + ' in for his_repo in his_repositories loop. Will start the subrepo analysis from the beginning.') his_repositories = contributor.get_repos() if force_raise: raise # Developer company (if any given) company = contributor.company created_at = contributor.created_at # Does the developer want to be hired? hireable = contributor.hireable disk_usage = contributor.disk_usage public_gists = contributor.public_gists owned_private_repos = contributor.owned_private_repos total_private_repos = contributor.total_private_repos scream.log_debug('Thread ' + str(thread_getter_instance) + ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True) if show_trace: scream.log_debug('Printing traceback stack', True) traceback.print_stack() scream.log_debug('Printing traceback exc pathway', True) traceback.print_exc() if not use_utf8: result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()), str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(repo.getCreatedAt()), str(repo.getDefaultBranch()), str(repo.getDescription()), str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()), str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()), str(repo.getLanguage()), str(repo.getMasterBranch()), str(repo.getNetworkCount()), str(repo.getOpenedIssues()), str(repo.getOrganization()), str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()), str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators), str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests), str(developer_login), str(developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following), str(developer_collaborators), str(company if company is not None else ''), str(developer_contributions), str(created_at), str(hireable if hireable is not None else ''), str(developer_location if developer_location is not None else ''), str(developer_total_private_repos), str(developer_total_public_repos), str(developer_works_during_bd), str(developer_works_period), str(disk_usage), str(public_gists), str(owned_private_repos), str(total_private_repos)]) else: result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(repo.getCreatedAt()), repo.getDefaultBranch(), repo.getDescription() if repo.getDescription() is not None else '', str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()), str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()), repo.getLanguage() if repo.getLanguage() is not None else '', repo.getMasterBranch() if repo.getMasterBranch() is not None else '', str(repo.getNetworkCount()), str(repo.getOpenedIssues()), repo.getOrganization() if repo.getOrganization() is not None else '', str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()), str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators), str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests), developer_login, developer_name if developer_name is not None else '', str(developer_followers), str(developer_following), str(developer_collaborators), company if company is not None else '', str(developer_contributions), str(created_at), str(hireable) if hireable is not None else '', developer_location if developer_location is not None else '', str(developer_total_private_repos), str(developer_total_public_repos), str(developer_works_during_bd), str(developer_works_period), str(disk_usage), str(public_gists), str(owned_private_repos), str(total_private_repos)]) scream.log_debug('Wrote row to CSV.', True)