def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname): how_long = 60 if gotten_tag is None: #retry 3 times for i in range(0, 3): time.sleep(how_long) how_long *= 3 while True: try: local_filename_html, headers_html = urllib.urlretrieve( html_addr, filename + '.html') break except IOError: io_socket_message = 'Socket error while retrieving HTML' +\ ' file from GitHub! Internet or ' +\ 'GitHub down? Retry after 1 minute' scream.ssay(io_socket_message) scream.log_warning(io_socket_message) time.sleep(60) soup = BeautifulSoup(html_content_file) gotten_tag = soup.find(tagname, {objectname: arg_objectname}) if gotten_tag is not None: raise StopIteration if gotten_tag is None: #nothing to do here, lets move on scream.ssay('orphaned' + filename + '.json') scream.log_error( filename + '.json' + 'is without proper html. ' + 'GitHub not responding or giving 404/501 erorr ??') return None scream.say( 'No action required. retry_if_neccessary() returning gotten_tag') return gotten_tag
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname): how_long = 60 if gotten_tag is None: #retry 3 times for i in range(0, 3): time.sleep(how_long) how_long *= 3 while True: try: local_filename_html, headers_html = urllib.urlretrieve( html_addr, filename + '.html') break except IOError: io_socket_message = 'Socket error while retrieving HTML' +\ ' file from GitHub! Internet or ' +\ 'GitHub down? Retry after 1 minute' scream.ssay(io_socket_message) scream.log_warning(io_socket_message) time.sleep(60) soup = BeautifulSoup(html_content_file) gotten_tag = soup.find(tagname, {objectname: arg_objectname}) if gotten_tag is not None: raise StopIteration if gotten_tag is None: #nothing to do here, lets move on scream.ssay('orphaned' + filename + '.json') scream.log_error(filename + '.json' + 'is without proper html. ' + 'GitHub not responding or giving 404/501 erorr ??') return None scream.say('No action required. retry_if_neccessary() returning gotten_tag') return gotten_tag
def get_data(self): global resume_stage scream.say('Preparing to build list of programmers: ' + str(self.threadId)) if resume_stage in [None, 'contributors']: #try: scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team') '1. Team size of a repository' self.contributors = self.repository.get_contributors() assert self.contributors is not None self.repo_contributors = set() self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository) for contributor in self.contributors_static.items(): scream.log_debug('move with contributor to next from contributors_static.items()', True) while True: scream.say('Get details for a contributor..') try: self.contributor_login = contributor[0] self.contributor_object = contributor[1] scream.say(str(self.contributor_login)) self.repo_contributors.add(self.contributor_login) scream.say(str(self.repo_contributors)) #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object) developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object) scream.say('Finished revealing developer') break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('socket.timeout in paginate through x contributors') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze(str(e) + ' in paginate through x contributors') if force_raise: raise #break assert self.repo_contributors is not None self.repo.setContributors(self.repo_contributors) self.repo.setContributorsCount(len(self.repo_contributors)) scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key) self.cleanup()
def get_data(self): global resume_stage scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId)) if resume_stage in [None, 'contributors']: #try: scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team') '1. Team size of a repository' self.contributors = self.repository.get_contributors() assert self.contributors is not None self.repo_contributors = set() self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository) for contributor in self.contributors_static.items(): scream.log_debug('move with contributor to next from contributors_static.items()', True) while True: scream.say('Inside while True: (line 674)') try: self.contributor_login = contributor[0] self.contributor_object = contributor[1] scream.say(str(self.contributor_login)) self.repo_contributors.add(self.contributor_login) scream.say(str(self.repo_contributors)) #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object) developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object) scream.say('Finished revealing developer') break except TypeError as e: scream.log_error('Repo + Contributor TypeError, or paginated through' + ' contributors gave error. ' + key + ', error({0})'. format(str(e)), True) repos_reported_execution_error.write(key + os.linesep) if force_raise: raise #break except socket.timeout as e: scream.log_error('Timeout while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze('socket.timeout in paginate through x contributors') if force_raise: raise #break except Exception as e: scream.log_error('Exception while revealing details.. ' + ', error({0})'.format(str(e)), True) freeze(str(e) + ' in paginate through x contributors') if force_raise: raise #break assert self.repo_contributors is not None self.repo.setContributors(self.repo_contributors) self.repo.setContributorsCount(len(self.repo_contributors)) scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key) self.cleanup()
def check_quota_limit(): global quota_check quota_check += 1 if quota_check > 9: quota_check = 0 limit = gh.get_rate_limit() scream.ssay('Rate limit: ' + str(limit.rate.limit) + ' remaining: ' + str(limit.rate.remaining)) reset_time = gh.rate_limiting_resettime scream.ssay('Rate limit reset time: ' + str(reset_time)) if limit.rate.remaining < 10: freeze()
# print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) if len(opts) < 2: print 'There were ' + str( len(opts) ) + ' arguments provided. Not to little? Check --help for more info.' else: print 'There were ' + str(len(opts)) + ' arguments provided.' for o, a in opts: if o in ("-v", "--verbose"): __builtin__.verbose = True scream.ssay('Enabling verbose mode.') elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--tokens"): auth_with_tokens = (a in ['true', 'True']) elif o in ("-u", "--utf8"): use_utf8 = (a not in ['false', 'False']) elif o in ("-r", "--resume" ): # if running after a long pause, consider starting from new resume_on_repo = a # remember dataset is a static one point in time scream.ssay('Resume on repo? ' + str(resume_on_repo)) elif o in ('--resumeinclusive'): resume_on_repo_inclusive = True scream.ssay('Resume on repo with inclusion') elif o in ("-s", "--resumestage"):
print line try: opts, args = getopt.getopt(sys.argv[1:], "ht:u:r:s:e:v", ["help", "tokens=", "utf8=", "resume=", "resumestage=", "entity=", "verbose"]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o in ("-v", "--verbose"): __builtin__.verbose = True scream.ssay('Enabling verbose mode.') elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--tokens"): auth_with_tokens = (a in ['true', 'True']) elif o in ("-u", "--utf8"): use_utf8 = (a not in ['false', 'False']) elif o in ("-r", "--resume"): resume_on_repo = a scream.ssay('Resume on repo? ' + str(resume_on_repo)) elif o in ("-s", "--resumestage"): resume_stage = a scream.ssay('Resume on repo with stage ' + str(resume_stage)) elif o in ("-e", "--entity"): resume_entity = a
def report_quota(quota_is, quota_left): thr = threading.Thread(target=report_quota_async, args=(quota_is, quota_left)) scream.ssay('starting email thread...') thr.start() # will run "foo"
try: opts, args = getopt.getopt(sys.argv[1:], "ht:u:r:s:e:vx:z:qim:j:d:y", ["help", "tokens=", "utf8=", "resume=", "resumestage=", "entity=", "verbose", "threads=", "timeout=", "reverse", "intelli", "safemargin=", "sleep=", "fraise=", "trace", "resumeinclusive"]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o in ("-v", "--verbose"): __builtin__.verbose = True scream.ssay('Enabling verbose mode.') elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--tokens"): auth_with_tokens = (a in ['true', 'True']) elif o in ("-u", "--utf8"): use_utf8 = (a not in ['false', 'False']) elif o in ("-r", "--resume"): # if running after a long pause, consider starting from new resume_on_repo = a # remember dataset is a static one point in time scream.ssay('Resume on repo? ' + str(resume_on_repo)) elif o in ('--resumeinclusive'): resume_on_repo_inclusive = True scream.ssay('Resume on repo with inclusion') elif o in ("-s", "--resumestage"): resume_stage = a
if filename == resume_from_entity: scream.say('Found! Resuming work.') scream.say(pullnumber) resume_from_entity = None else: continue json_timeout = 60 while True: try: local_filename, headers = urllib.urlretrieve(key + '?client_id=' + client_id + '&client_secret=' + client_secret, filename + '.json') break except IOError: scream.ssay('Error retrieving data from GitHub API. ' + 'Socket error / timeout. Retry after ' + str(json_timeout) + ' s.') time.sleep(json_timeout) json_timeout *= 2 except: scream.ssay('Error retrieving data from GitHub API. ' + 'Unknown error. Retry after ' + str(json_timeout) + ' s.') time.sleep(json_timeout) json_timeout *= 2 with open(local_filename, 'r') as content_file: json = simplejson.load(content_file) #print json if (len(json) < 2): scream.say('JSON empty! lunching retry_json(3, 60) protocol') json = retry_json(3, 60) if ('message' in json) and (json['message'].startswith('API rate limit exceeded for')):
def descr_user(s): if s in persist_users: if persist_users[s] is None: return s else: return persist_users[s] #get user name and surname here while True: try: response = urllib.urlopen('https://api.github.com/users/' + s + '?client_id=' + client_id + '&client_secret=' + client_secret) break except IOError: print 'API GitHub not responding, urlopen failed' print 'retrying after 1 minute' time.sleep(60) scream.ssay(response) data = simplejson.load(response) scream.ssay(data) #fullname = data['name'] if 'name' in data: fullname = data['name'] else: scream.say('Fullname not provided') persist_users[s] = None return s if fullname is None: scream.say('Fullname provided but an empty entry') persist_users[s] = None return s if (len(fullname) > 0): first_name = unicode(fullname.split()[0]) if (len(first_name) > 0): scream.say('#ask now internet for gender') while True: try: response = my_browser.open('http://genderchecker.com/') response.read() break except urllib2.URLError: scream.ssay('Site genderchecker.com seems to be down' + '. awaiting for 60s before retry') time.sleep(60) scream.say('Response read. Mechanize selecting form.') my_browser.select_form("aspnetForm") my_browser.form.set_all_readonly(False) # allow everything to be written control = my_browser.form.find_control("ctl00$TextBoxName") if only_roman_chars(first_name): control.value = StripNonAlpha(first_name.encode('utf-8')) else: control.value = StripNonAlpha(cyrillic2latin(first_name).encode('utf-8')) #check if value is enough #control.text = first_name scream.say('Control value is set to :' + str(control.value)) submit_retry_counter = 4 while True: try: response = my_browser.submit() html = response.read() break except mechanize.HTTPError, e: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. HTTPError ' +\ str(e.code) +\ '. awaiting for 60s before retry' scream.say(error_message) scream.log_error(str(e.code) + ': ' + error_message) time.sleep(60) except: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. ' +\ 'awaiting for 60s before retry' scream.say(error_message) scream.log_error(error_message) time.sleep(60)
def descr_user(s): if s in persist_users: if persist_users[s] is None: return s else: return persist_users[s] #get user name and surname here while True: try: response = urllib.urlopen('https://api.github.com/users/' + s + '?client_id=' + client_id + '&client_secret=' + client_secret) break except IOError: print 'API GitHub not responding, urlopen failed' print 'retrying after 1 minute' time.sleep(60) scream.ssay(response) data = simplejson.load(response) scream.ssay(data) #fullname = data['name'] if 'name' in data: fullname = data['name'] else: scream.say('Fullname not provided') persist_users[s] = None return s if fullname is None: scream.say('Fullname provided but an empty entry') persist_users[s] = None return s if (len(fullname) > 0): first_name = unicode(fullname.split()[0]) if (len(first_name) > 0): scream.say('#ask now internet for gender') while True: try: response = my_browser.open('http://genderchecker.com/') response.read() break except urllib2.URLError: scream.ssay('Site genderchecker.com seems to be down' + '. awaiting for 60s before retry') time.sleep(60) scream.say('Response read. Mechanize selecting form.') my_browser.select_form("aspnetForm") my_browser.form.set_all_readonly(False) # allow everything to be written control = my_browser.form.find_control("ctl00$TextBoxName") if only_roman_chars(first_name): control.value = StripNonAlpha(first_name.encode('utf-8')) else: control.value = StripNonAlpha( cyrillic2latin(first_name).encode('utf-8')) #check if value is enough #control.text = first_name scream.say('Control value is set to :' + str(control.value)) submit_retry_counter = 4 while True: try: response = my_browser.submit() html = response.read() break except mechanize.HTTPError, e: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. HTTPError ' +\ str(e.code) +\ '. awaiting for 60s before retry' scream.say(error_message) scream.log_error(str(e.code) + ': ' + error_message) time.sleep(60) except: submit_retry_counter -= 1 if submit_retry_counter < 1: raise StopIteration error_message = 'Site genderchecker.com seems to have ' +\ 'internal problems. or my request is' +\ ' wibbly-wobbly nonsense. ' +\ 'awaiting for 60s before retry' scream.say(error_message) scream.log_error(error_message) time.sleep(60)
def output_data(repo): with open('repos.csv', 'ab') as output_csvfile: scream.ssay('repos.csv opened for append..') repowriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) rn = repo.getRepoObject() rfc = repo.getForksCount() rwc = repo.getWatchersCount() rcc = repo.getContributorsCount() rsc = repo.getSubscribersCount() rstc = repo.getStargazersCount() rlc = repo.getLabelsCount() rcmc = repo.getCommitsCount() rpc = repo.getPullsCount() assert rfc.isdigit() assert rwc.isdigit() assert type(rcc) == int assert type(rsc) == int assert type(rstc) == int assert type(rlc) == int assert type(rcmc) == int assert type(rpc) == int tempv = (repo.getName(), repo.getOwner(), str(rfc), # this is always string representation of number # ! str() allowed str(rwc), # this is always string representation of number # ! str() allowed str(rcc), # this is always int ! str() allowed str(rsc), # this is always int ! str() allowed str(rstc), # this is always int ! str() allowed str(rlc), # this is always int ! str() allowed str(rcmc), # this is always int ! str() allowed str(rpc), # this is always int ! str() allowed rn.archive_url if rn.archive_url is not None else '', rn.assignees_url if rn.assignees_url is not None else '', rn.blobs_url if rn.blobs_url is not None else '', rn.branches_url if rn.branches_url is not None else '', rn.clone_url if rn.clone_url is not None else '', rn.collaborators_url if rn.collaborators_url is not None else '', rn.comments_url if rn.comments_url is not None else '', rn.commits_url if rn.commits_url is not None else '', rn.compare_url if rn.compare_url is not None else '', rn.contents_url if rn.contents_url is not None else '', rn.contributors_url if rn.contributors_url is not None else '', str(rn.created_at) if rn.created_at is not None else '', rn.default_branch if rn.default_branch is not None else '', rn.description if rn.description is not None else '', rn.events_url if rn.events_url is not None else '', str(rn.fork) if rn.fork is not None else '', rn.full_name if rn.full_name is not None else '', rn.git_commits_url if rn.git_commits_url is not None else '', rn.git_refs_url if rn.git_refs_url is not None else '', rn.git_tags_url if rn.git_tags_url is not None else '', str(rn.has_downloads) if rn.has_downloads is not None else '', str(rn.has_wiki) if rn.has_wiki is not None else '', rn.master_branch if rn.master_branch is not None else '') repowriter.writerow(tempv) with open('contributors.csv', 'ab') as output_csvfile: scream.ssay('contributors.csv opened for append..') contribwriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for contributor in repo.getContributors(): tempv = (repo.getName(), repo.getOwner(), contributor.login) contribwriter.writerow(tempv) with open('commits.csv', 'ab') as output_csvfile: scream.ssay('commits.csv opened for append..') commitswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for commit in repo.getCommits(): tempv = (repo.getName(), repo.getOwner(), commit.sha, (commit.author.login if commit.author is not None else ''), (commit.committer.login if commit.committer is not None else ''), commit.url, commit.html_url, commit.comments_url) commitswriter.writerow(tempv) if repo.getLanguages is not None: with open('languages.csv', 'ab') as output_csvfile: scream.ssay('languages.csv opened for append..') langwriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for language in repo.getLanguages(): tempv = (repo.getName(), repo.getOwner(), language) langwriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no languages[]') if repo.getContributors() is not None: with open('subscribers.csv', 'ab') as output_csvfile: scream.ssay('subscribers.csv opened for append..') subscriberswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for subscriber in repo.getContributors(): tempv = (repo.getName(), repo.getOwner(), (subscriber.login if subscriber.login is not None else ''), (subscriber.bio if subscriber.bio is not None else ''), (subscriber.blog if subscriber.blog is not None else ''), str(subscriber.collaborators), (subscriber.company if subscriber.company is not None else ''), str(subscriber.contributions), str(subscriber.followers), str(subscriber.following)) subscriberswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no subscribers[]') if repo.getLabels() is not None: with open('labels.csv', 'ab') as output_csvfile: scream.ssay('labels.csv opened for append..') labelswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for label in repo.getLabels(): tempv = (repo.getName(), repo.getOwner(), label.name, label.color) labelswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no labels[]') if repo.getIssues() is not None: with open('issues.csv', 'ab') as output_csvfile: scream.ssay('issues.csv opened for append..') issueswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for issue in repo.getIssues(): assert (type(issue.id) == int or issue.id is None) assert (type(issue.number) == int or issue.number is None) tempv = (repo.getName(), repo.getOwner(), (issue.assignee.login if issue.assignee is not None else ''), (' '.join(issue.body.splitlines()) if issue.body is not None else ''), (issue.closed_at if issue.closed_at is not None else ''), (issue.closed_by.login if issue.closed_by is not None else ''), str(issue.id), str(issue.number), (issue.title if issue.title is not None else '')) issueswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no issues[]') if repo.getPulls() is not None: with open('pulls.csv', 'ab') as output_csvfile: scream.ssay('pulls.csv opened for append..') pullswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for pull in repo.getPulls(): tempv = (repo.getName(), repo.getOwner(), str(pull.additions), # is always int (pull.assignee.login if pull.assignee is not None else ''), (' '.join(pull.body.splitlines()) if pull.body is not None else ''), str(pull.changed_files), # is always int (str(pull.closed_at) if pull.closed_at is not None else ''), str(pull.comments), # is always int (pull.comments_url if pull.comments_url is not None else ''), (str(pull.created_at) if pull.created_at is not None else ''), str(pull.deletions), # is always int (pull.diff_url if pull.diff_url is not None else ''), (pull.html_url if pull.html_url is not None else ''), str(pull.id), # is always int (pull.issue_url if pull.issue_url is not None else ''), (pull.merge_commit_sha if pull.merge_commit_sha is not None else ''), str(pull.mergeable), # is always boolean (pull.mergeable_state if pull.mergeable_state is not None else ''), str(pull.merged), # is always boolean (str(pull.merged_at) if pull.merged_at is not None else ''), str(pull.number), (pull.patch_url if pull.patch_url is not None else ''), (pull.review_comment_url if pull.review_comment_url is not None else ''), str(pull.review_comments), # is always int (pull.review_comments_url if pull.review_comments_url is not None else ''), (pull.state if pull.state is not None else ''), (' '.join(pull.title.splitlines()) if pull.title is not None else ''), (str(pull.updated_at) if pull.updated_at is not None else ''), (pull.user.login if pull.user is not None else '')) pullswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no pulls[]')
method = a elif o in ("-s", "--sites"): sites = a elif o in ("-i", "--input"): geckoname = a makeHeaders() if 'goldpoll' in sites: if method == 'static': scream.log('Not supported yet! Use native or dont define @method at all') elif method == 'native': doc = html.parse(goldpoll_url) #print etree.tostring(doc) elements_c10 = doc.xpath('//table[@class="cl0"]') scream.ssay(len(elements_c10)) for element in elements_c10: scream.ssay('') scream.ssay('Parsing HYIP..') hyip = Hyip() local_soup = BeautifulSoup(etree.tostring(element)) hyip_name_tag = local_soup.find("a", {"class": "nhyip"}) hyip_name = hyip_name_tag.string hyip_url = 'http://www.goldpoll.com' + hyip_name_tag['href'] scream.say('Name: ' + hyip_name.strip()) scream.say('URL: ' + hyip_url) hyip.setName(hyip_name.strip()) session = requests.session()
scream.say('Found! Resuming work.') scream.say(pullnumber) resume_from_entity = None else: continue json_timeout = 60 while True: try: local_filename, headers = urllib.urlretrieve( key + '?client_id=' + client_id + '&client_secret=' + client_secret, filename + '.json') break except IOError: scream.ssay('Error retrieving data from GitHub API. ' + 'Socket error / timeout. Retry after ' + str(json_timeout) + ' s.') time.sleep(json_timeout) json_timeout *= 2 except: scream.ssay('Error retrieving data from GitHub API. ' + 'Unknown error. Retry after ' + str(json_timeout) + ' s.') time.sleep(json_timeout) json_timeout *= 2 with open(local_filename, 'r') as content_file: json = simplejson.load(content_file) #print json if (len(json) < 2): scream.say(
def output_data(repo): with open('repos.csv', 'ab') as output_csvfile: scream.ssay('repos.csv opened for append..') repowriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer( output_csvfile, dialect=MyDialect) rn = repo.getRepoObject() rfc = repo.getForksCount() rwc = repo.getWatchersCount() rcc = repo.getContributorsCount() rsc = repo.getSubscribersCount() rstc = repo.getStargazersCount() rlc = repo.getLabelsCount() rcmc = repo.getCommitsCount() rpc = repo.getPullsCount() assert rfc.isdigit() assert rwc.isdigit() assert type(rcc) == int assert type(rsc) == int assert type(rstc) == int assert type(rlc) == int assert type(rcmc) == int assert type(rpc) == int tempv = ( repo.getName(), repo.getOwner(), str(rfc), # this is always string representation of number # ! str() allowed str(rwc), # this is always string representation of number # ! str() allowed str(rcc), # this is always int ! str() allowed str(rsc), # this is always int ! str() allowed str(rstc), # this is always int ! str() allowed str(rlc), # this is always int ! str() allowed str(rcmc), # this is always int ! str() allowed str(rpc), # this is always int ! str() allowed rn.archive_url if rn.archive_url is not None else '', rn.assignees_url if rn.assignees_url is not None else '', rn.blobs_url if rn.blobs_url is not None else '', rn.branches_url if rn.branches_url is not None else '', rn.clone_url if rn.clone_url is not None else '', rn.collaborators_url if rn.collaborators_url is not None else '', rn.comments_url if rn.comments_url is not None else '', rn.commits_url if rn.commits_url is not None else '', rn.compare_url if rn.compare_url is not None else '', rn.contents_url if rn.contents_url is not None else '', rn.contributors_url if rn.contributors_url is not None else '', str(rn.created_at) if rn.created_at is not None else '', rn.default_branch if rn.default_branch is not None else '', rn.description if rn.description is not None else '', rn.events_url if rn.events_url is not None else '', str(rn.fork) if rn.fork is not None else '', rn.full_name if rn.full_name is not None else '', rn.git_commits_url if rn.git_commits_url is not None else '', rn.git_refs_url if rn.git_refs_url is not None else '', rn.git_tags_url if rn.git_tags_url is not None else '', str(rn.has_downloads) if rn.has_downloads is not None else '', str(rn.has_wiki) if rn.has_wiki is not None else '', rn.master_branch if rn.master_branch is not None else '') repowriter.writerow(tempv) with open('contributors.csv', 'ab') as output_csvfile: scream.ssay('contributors.csv opened for append..') contribwriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for contributor in repo.getContributors(): tempv = (repo.getName(), repo.getOwner(), contributor.login) contribwriter.writerow(tempv) with open('commits.csv', 'ab') as output_csvfile: scream.ssay('commits.csv opened for append..') commitswriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for commit in repo.getCommits(): tempv = (repo.getName(), repo.getOwner(), commit.sha, (commit.author.login if commit.author is not None else ''), (commit.committer.login if commit.committer is not None else ''), commit.url, commit.html_url, commit.comments_url) commitswriter.writerow(tempv) if repo.getLanguages is not None: with open('languages.csv', 'ab') as output_csvfile: scream.ssay('languages.csv opened for append..') langwriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for language in repo.getLanguages(): tempv = (repo.getName(), repo.getOwner(), language) langwriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no languages[]') if repo.getContributors() is not None: with open('subscribers.csv', 'ab') as output_csvfile: scream.ssay('subscribers.csv opened for append..') subscriberswriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for subscriber in repo.getContributors(): tempv = ( repo.getName(), repo.getOwner(), (subscriber.login if subscriber.login is not None else ''), (subscriber.bio if subscriber.bio is not None else ''), (subscriber.blog if subscriber.blog is not None else ''), str(subscriber.collaborators), (subscriber.company if subscriber.company is not None else ''), str(subscriber.contributions), str(subscriber.followers), str(subscriber.following)) subscriberswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no subscribers[]') if repo.getLabels() is not None: with open('labels.csv', 'ab') as output_csvfile: scream.ssay('labels.csv opened for append..') labelswriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for label in repo.getLabels(): tempv = (repo.getName(), repo.getOwner(), label.name, label.color) labelswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no labels[]') if repo.getIssues() is not None: with open('issues.csv', 'ab') as output_csvfile: scream.ssay('issues.csv opened for append..') issueswriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for issue in repo.getIssues(): assert (type(issue.id) == int or issue.id is None) assert (type(issue.number) == int or issue.number is None) tempv = (repo.getName(), repo.getOwner(), (issue.assignee.login if issue.assignee is not None else ''), (' '.join(issue.body.splitlines()) if issue.body is not None else ''), (issue.closed_at if issue.closed_at is not None else ''), (issue.closed_by.login if issue.closed_by is not None else ''), str(issue.id), str(issue.number), (issue.title if issue.title is not None else '')) issueswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no issues[]') if repo.getPulls() is not None: with open('pulls.csv', 'ab') as output_csvfile: scream.ssay('pulls.csv opened for append..') pullswriter = UnicodeWriter( output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect) for pull in repo.getPulls(): tempv = ( repo.getName(), repo.getOwner(), str(pull.additions), # is always int (pull.assignee.login if pull.assignee is not None else ''), (' '.join(pull.body.splitlines()) if pull.body is not None else ''), str(pull.changed_files), # is always int (str(pull.closed_at) if pull.closed_at is not None else ''), str(pull.comments), # is always int (pull.comments_url if pull.comments_url is not None else ''), (str(pull.created_at) if pull.created_at is not None else ''), str(pull.deletions), # is always int (pull.diff_url if pull.diff_url is not None else ''), (pull.html_url if pull.html_url is not None else ''), str(pull.id), # is always int (pull.issue_url if pull.issue_url is not None else ''), (pull.merge_commit_sha if pull.merge_commit_sha is not None else ''), str(pull.mergeable), # is always boolean (pull.mergeable_state if pull.mergeable_state is not None else ''), str(pull.merged), # is always boolean (str(pull.merged_at) if pull.merged_at is not None else ''), str(pull.number), (pull.patch_url if pull.patch_url is not None else ''), (pull.review_comment_url if pull.review_comment_url is not None else ''), str(pull.review_comments), # is always int (pull.review_comments_url if pull.review_comments_url is not None else ''), (pull.state if pull.state is not None else ''), (' '.join(pull.title.splitlines()) if pull.title is not None else ''), (str(pull.updated_at) if pull.updated_at is not None else ''), (pull.user.login if pull.user is not None else '')) pullswriter.writerow(tempv) else: scream.log_warning('Repo ' + repo.getName() + ' has no pulls[]')
try: opts, args = getopt.getopt(sys.argv[1:], "ht:u:r:s:e:v", [ "help", "tokens=", "utf8=", "resume=", "resumestage=", "entity=", "verbose" ]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o in ("-v", "--verbose"): __builtin__.verbose = True scream.ssay('Enabling verbose mode.') elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--tokens"): auth_with_tokens = (a in ['true', 'True']) elif o in ("-u", "--utf8"): use_utf8 = (a not in ['false', 'False']) elif o in ("-r", "--resume"): resume_on_repo = a scream.ssay('Resume on repo? ' + str(resume_on_repo)) elif o in ("-s", "--resumestage"): resume_stage = a scream.ssay('Resume on repo with stage ' + str(resume_stage)) elif o in ("-e", "--entity"): resume_entity = a
print line try: opts, args = getopt.getopt(sys.argv[1:], "ht:u:r:s:v", ["help", "tokens=", "utf8=", "resume=", "resumestage=", "verbose"]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) for o, a in opts: if o in ("-v", "--verbose"): __builtin__.verbose = True scream.ssay('Enabling verbose mode.') elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--tokens"): auth_with_tokens = (a in ['true', 'True']) elif o in ("-u", "--utf8"): use_utf8 = (a not in ['false', 'False']) elif o in ("-r", "--resume"): resume_on_repo = a scream.ssay('Resume on repo? ' + str(resume_on_repo)) elif o in ("-s", "--resumestage"): resume_stage = a scream.ssay('Resume on repository with name ' + str(resume_stage)) repos = dict()
def developer_revealed(thread_getter_instance, repository, repo, contributor): global result_writer global result_punch_card_writer assert result_punch_card_writer is not None developer_login = contributor.login scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True) developer_name = contributor.name # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent] developer_followers = contributor.followers # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent] developer_following = contributor.following developer_location = contributor.location developer_total_private_repos = contributor.total_private_repos developer_total_public_repos = contributor.public_repos # 5a. Ilosc repo, w ktorych jest team member [TeamAddEvent] [MemberEvent] developer_collaborators = contributor.collaborators scream.say('Developer collaborators count is: ' + str(developer_collaborators)) # 6a. Ilosc repo, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent] developer_contributions = contributor.contributions scream.say('Developer contributions count is: ' + str(developer_contributions)) # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem his_repositories = contributor.get_repos() # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)? scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"pod repozytorium" while True: try: trying_to_get_stats += 1 stats = his_repo.get_stats_contributors() status_code__ = get_status_code('https://api.github.com/repos/' + his_repo.full_name + '/stats/contributors') if status_code__ != 204: for s in stats: ad___c = 0 ad___a = 0 ad___d = 0 for w in s.weeks: ad___c += w.c ad___a += w.a ad___d += w.d if s.author.login not in his_contributors: his_contributors.add(s.author.login) result_punch_card_writer.writerow([str(his_repo.owner.login), str(his_repo.name), str(developer_login), str(s.author.login), str(s.total), str(ad___c), str(ad___a), str(ad___d)]) else: scream.log_debug('The subrepo is empty, thus no statistics (punchcard) generated this time', True) break except GithubException as e: freeze(str(e) + ' his_repo.get_stats_contributors(). Unexpected error with getting stats.') if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"): scream.log_debug("It is a private repo.. Skip!", True) break if force_raise: raise except TypeError as e: scream.log_warning('This was stats attempt no: ' + str(trying_to_get_stats), True) freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?') # probably punch card not ready if force_raise: raise except Exception as e: freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?') # probably punch card not ready if force_raise: raise # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest deweloperem if developer_login in his_contributors: self_contributing += 1 # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member subrepo_collaborators = his_repo.get_collaborators() for collaborator in subrepo_collaborators: total_his_collaborators += 1 if developer_login == collaborator.login: self_collaborating += 1 # All elements paginated through his_repositories, thus we can safely break loop break except GithubException as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories') if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"): scream.log_debug("It is a private repo.. Skip!") continue if force_raise: raise except TypeError as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?') # probably punch card not ready if force_raise: raise except Exception as e: freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?') # probably punch card not ready if force_raise: raise total_his_contributors = len(his_contributors) # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent] # tutaj od wartosci developer_collaborators wystarczy odjac wystapienia loginu w podrepo.get_collaborators() developer_foreign_collaborators = (developer_collaborators if developer_collaborators is not None else 0) - self_collaborating # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent] # tutaj od wartosci developer_contributions wystarczy odjac wystapienia loginu w podrepo.get_contributions() developer_foreign_contributions = developer_contributions - self_contributing # All properties checked for a dev, thus we can safely break loop break except Exception as e: freeze('Error ' + str(e) + ' in for his_repo in his_repositories loop. Will start the subrepo analysis from the beginning.') his_repositories = contributor.get_repos() if force_raise: raise # Developer company (if any given) company = contributor.company created_at = contributor.created_at # Does the developer want to be hired? hireable = contributor.hireable disk_usage = contributor.disk_usage public_gists = contributor.public_gists owned_private_repos = contributor.owned_private_repos total_private_repos = contributor.total_private_repos scream.log_debug('Thread ' + str(thread_getter_instance) + ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True) if show_trace: scream.log_debug('Printing traceback stack', True) traceback.print_stack() scream.log_debug('Printing traceback exc pathway', True) traceback.print_exc() if not use_utf8: result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()), str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(repo.getCreatedAt()), str(repo.getDefaultBranch()), str(repo.getDescription()), str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()), str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()), str(repo.getLanguage()), str(repo.getMasterBranch()), str(repo.getNetworkCount()), str(repo.getOpenedIssues()), str(repo.getOrganization()), str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()), str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators), str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests), str(developer_login), str(developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following), str(developer_collaborators), str(company if company is not None else ''), str(developer_contributions), str(created_at), str(hireable if hireable is not None else ''), str(developer_location if developer_location is not None else ''), str(developer_total_private_repos), str(developer_total_public_repos), str(developer_works_during_bd), str(developer_works_period), str(disk_usage), str(public_gists), str(owned_private_repos), str(total_private_repos)]) else: result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(repo.getCreatedAt()), repo.getDefaultBranch(), repo.getDescription() if repo.getDescription() is not None else '', str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()), str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()), repo.getLanguage() if repo.getLanguage() is not None else '', repo.getMasterBranch() if repo.getMasterBranch() is not None else '', str(repo.getNetworkCount()), str(repo.getOpenedIssues()), repo.getOrganization() if repo.getOrganization() is not None else '', str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()), str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators), str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests), developer_login, developer_name if developer_name is not None else '', str(developer_followers), str(developer_following), str(developer_collaborators), company if company is not None else '', str(developer_contributions), str(created_at), str(hireable) if hireable is not None else '', developer_location if developer_location is not None else '', str(developer_total_private_repos), str(developer_total_public_repos), str(developer_works_during_bd), str(developer_works_period), str(disk_usage), str(public_gists), str(owned_private_repos), str(total_private_repos)]) scream.log_debug('Wrote row to CSV.', True)