Esempio n. 1
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(
                filename + '.json' + 'is without proper html. ' +
                'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say(
        'No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
Esempio n. 2
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(filename + '.json' + 'is without proper html. ' +
                             'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say('No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
    def build_list_of_programmers(self, result_set_programmers, repo_key, repository):
        result_set = dict()
        contributors__ = result_set_programmers

        while True:
            result_set.clear()
            try:
                for contributor in contributors__:
                    result_set[contributor.login] = contributor
                break
            except TypeError as e:
                scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                 ' contributors gave error. ' + key + ', error({0})'.
                                 format(str(e)), True)
                repos_reported_execution_error.write(key + os.linesep)
                if force_raise:
                    raise
                #break
            except socket.timeout as e:
                scream.log_error('Timeout while revealing details.. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
            except Exception as e:
                scream.log_error('Exception while revealing details.. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
        return result_set
    def build_list_of_programmers(self, result_set_programmers, repo_key, repository):
        result_set = dict()
        contributors__ = result_set_programmers

        while True:
            result_set.clear()
            try:
                for contributor in contributors__:
                    result_set[contributor.login] = contributor
                break
            except TypeError as e:
                scream.log_error('Building list of programmers TypeError. ' + key + ', error({0})'.
                                 format(str(e)), True)
                repos_reported_execution_error.write(key + os.linesep)
                if force_raise:
                    raise
                #break
            except socket.timeout as e:
                scream.log_error('Building list of programmers socket timeout.. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
            except Exception as e:
                scream.log_error('Exception while building list of programmers .. ' +
                                 ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
        return result_set
    def get_data(self):
        global resume_stage

        scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Inside while True: (line 674)')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
    def get_data(self):
        global resume_stage

        scream.say('Preparing to build list of programmers: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Get details for a contributor..')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
    def build_list_of_programmers(self, result_set_programmers, repo_key,
                                  repository):
        result_set = dict()
        contributors__ = result_set_programmers

        while True:
            result_set.clear()
            try:
                for contributor in contributors__:
                    result_set[contributor.login] = contributor
                break
            except TypeError as e:
                scream.log_error(
                    'Repo + Contributor TypeError, or paginated through' +
                    ' contributors gave error. ' + str(key) +
                    ', error({0})'.format(str(e)), True)
                repos_reported_execution_error.write(key + os.linesep)
                if force_raise:
                    raise
                #break
            except socket.timeout as e:
                scream.log_error(
                    'Timeout while revealing details.. ' +
                    ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
            except Exception as e:
                scream.log_error(
                    'Exception while revealing details.. ' +
                    ', error({0})'.format(str(e)), True)
                freeze('build_list_of_programmers will retry')
                if force_raise:
                    raise
                #break
        return result_set
Esempio n. 8
0
                        repos_reported_execution_error.write(key + os.linesep)
                    repo.setContributors(repo_contributors)
                    #repo.setContributorsCount(len(repo_contributors))
                    'class fields are not garbage, '
                    'its better to calculate count on demand'
                    scream.log('Added contributors of count: ' +
                               str(len(repo_contributors)) +
                               ' to a repo ' + key)
                except GithubException as e:
                    if 'repo_contributors' not in locals():
                        repo.setContributors([])
                    else:
                        repo.setContributors(repo_contributors)
                    scream.log_error('Repo didnt gave any contributors, ' +
                                     'or paginated through' +
                                     ' contributors gave error. ' + key +
                                     ', error({0}): {1}'.
                                     format(e.status, e.data))
                finally:
                    resume_stage = None

            #if resume_stage in [None, 'languages']:
            #    scream.ssay('Getting languages of a repo')
            #    languages = repository.get_languages()  # dict object (json? object)
            #    repo.setLanguage(languages)
            #    scream.log('Added languages ' + str(languages) + ' to a repo ' + key)
            #    resume_stage = None

            # to juz mamy
            # if resume_stage in [None, 'labels']:
            #     scream.ssay('Getting labels of a repo')
Esempio n. 9
0
def descr_user(s):
    if s in persist_users:
        if persist_users[s] is None:
            return s
        else:
            return persist_users[s]
    #get user name and surname here
    while True:
        try:
            response = urllib.urlopen('https://api.github.com/users/' + s
                                      + '?client_id='
                                      + client_id + '&client_secret='
                                      + client_secret)
            break
        except IOError:
            print 'API GitHub not responding, urlopen failed'
            print 'retrying after 1 minute'
            time.sleep(60)
    scream.ssay(response)
    data = simplejson.load(response)
    scream.ssay(data)
    #fullname = data['name']
    if 'name' in data:
        fullname = data['name']
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
    if fullname is None:
        scream.say('Fullname provided but an empty entry')
        persist_users[s] = None
        return s
    if (len(fullname) > 0):
        first_name = unicode(fullname.split()[0])
        if (len(first_name) > 0):
            scream.say('#ask now internet for gender')
            while True:
                try:
                    response = my_browser.open('http://genderchecker.com/')
                    response.read()
                    break
                except urllib2.URLError:
                    scream.ssay('Site genderchecker.com seems to be down' +
                                '. awaiting for 60s before retry')
                    time.sleep(60)
            scream.say('Response read. Mechanize selecting form.')
            my_browser.select_form("aspnetForm")
            my_browser.form.set_all_readonly(False)
            # allow everything to be written

            control = my_browser.form.find_control("ctl00$TextBoxName")
            if only_roman_chars(first_name):
                control.value = StripNonAlpha(first_name.encode('utf-8'))
            else:
                control.value = StripNonAlpha(cyrillic2latin(first_name).encode('utf-8'))
            #check if value is enough
            #control.text = first_name
            scream.say('Control value is set to :' + str(control.value))
            submit_retry_counter = 4
            while True:
                try:
                    response = my_browser.submit()
                    html = response.read()
                    break
                except mechanize.HTTPError, e:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. HTTPError ' +\
                                    str(e.code) +\
                                    '. awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(str(e.code) + ': ' + error_message)
                    time.sleep(60)
                except:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. ' +\
                                    'awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(error_message)
                    time.sleep(60)
Esempio n. 10
0
def descr_user(s):
    if s in persist_users:
        if persist_users[s] is None:
            return s
        else:
            return persist_users[s]
    #get user name and surname here
    while True:
        try:
            response = urllib.urlopen('https://api.github.com/users/' + s +
                                      '?client_id=' + client_id +
                                      '&client_secret=' + client_secret)
            break
        except IOError:
            print 'API GitHub not responding, urlopen failed'
            print 'retrying after 1 minute'
            time.sleep(60)
    scream.ssay(response)
    data = simplejson.load(response)
    scream.ssay(data)
    #fullname = data['name']
    if 'name' in data:
        fullname = data['name']
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
    if fullname is None:
        scream.say('Fullname provided but an empty entry')
        persist_users[s] = None
        return s
    if (len(fullname) > 0):
        first_name = unicode(fullname.split()[0])
        if (len(first_name) > 0):
            scream.say('#ask now internet for gender')
            while True:
                try:
                    response = my_browser.open('http://genderchecker.com/')
                    response.read()
                    break
                except urllib2.URLError:
                    scream.ssay('Site genderchecker.com seems to be down' +
                                '. awaiting for 60s before retry')
                    time.sleep(60)
            scream.say('Response read. Mechanize selecting form.')
            my_browser.select_form("aspnetForm")
            my_browser.form.set_all_readonly(False)
            # allow everything to be written

            control = my_browser.form.find_control("ctl00$TextBoxName")
            if only_roman_chars(first_name):
                control.value = StripNonAlpha(first_name.encode('utf-8'))
            else:
                control.value = StripNonAlpha(
                    cyrillic2latin(first_name).encode('utf-8'))
            #check if value is enough
            #control.text = first_name
            scream.say('Control value is set to :' + str(control.value))
            submit_retry_counter = 4
            while True:
                try:
                    response = my_browser.submit()
                    html = response.read()
                    break
                except mechanize.HTTPError, e:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. HTTPError ' +\
                                    str(e.code) +\
                                    '. awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(str(e.code) + ': ' + error_message)
                    time.sleep(60)
                except:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. ' +\
                                    'awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(error_message)
                    time.sleep(60)
Esempio n. 11
0
                for contributor in contributors:
                    repo_contributors.append(contributor)
                    check_quota_limit()
                repo.setContributors(repo_contributors)
                #repo.setContributorsCount(len(repo_contributors))
                'class fields are not garbage, '
                'its better to calculate count on demand'
                scream.log('Added contributors of count: ' +
                           str(len(repo_contributors)) + ' to a repo ' + key)
            except GithubException as e:
                if 'repo_contributors' not in locals():
                    repo.setContributors([])
                else:
                    repo.setContributors(repo_contributors)
                scream.log_error('Repo didnt gave any contributors, ' +
                                 'or paginated through' +
                                 ' contributors gave error. ' + key +
                                 ', error({0}): {1}'.format(e.status, e.data))
            finally:
                resume_stage = None

        if resume_stage in [None, 'languages']:
            scream.ssay('Getting languages of a repo')
            languages = repository.get_languages(
            )  # dict object (json? object)
            repo.setLanguage(languages)
            scream.log('Added languages ' + str(languages) + ' to a repo ' +
                       key)
            resume_stage = None

        if resume_stage in [None, 'labels']:
            scream.ssay('Getting labels of a repo')
    def get_data(self, page, conn):
        global results_done
        global results_all
        global pagination
        global openhub_query_tags

        self.params_sort_rating = urllib.urlencode({'query': 'tag:' + openhub_query_tags[0], 'api_key': return_random_openhub_key(),
                                                    'sort': 'rating', 'page': page})
        self.projects_api_url = "https://www.openhub.net/projects.xml?%s" % (self.params_sort_rating)

        self.result_flow = urllib.urlopen(self.projects_api_url)

        scream.say('')
        scream.say('-------------------------- PAGE ' + str(page) + ' parsed -----------------------------')
        scream.say('')

        # Parse the response into a structured XML object
        self.tree = ET.parse(self.result_flow)

        # Did Ohloh return an error?
        self.elem = self.tree.getroot()
        self.error = self.elem.find("error")
        if self.error is not None:
            print 'OpenHub returned ERROR:', ET.tostring(self.error),
            sys.exit()

        results_done += int(self.elem.find("items_returned").text)
        results_all = int(self.elem.find("items_available").text)

        self.i = 0
        for self.node in self.elem.findall("result/project"):
            self.i += 1
            scream.say('Checking element ' + str(self.i) + '/' + str(pagination))

            self.project_id = self.node.find("id").text
            self.project_name = self.node.find("name").text
            self.project_url = self.node.find("url").text
            self.project_htmlurl = self.node.find("html_url").text
            self.project_created_at = self.node.find("created_at").text
            self.project_updated_at = self.node.find("updated_at").text
            self.project_homepage_url = self.node.find("homepage_url").text

            self.project_average_rating = self.node.find("average_rating").text
            self.project_rating_count = self.node.find("rating_count").text
            self.project_review_count = self.node.find("review_count").text

            self.project_activity_level = self.node.find("project_activity_index/value").text

            self.project_user_count = self.node.find("user_count").text

            # project may have multiple GitHub repositories
            # or even it may be not present on GitHub - check that

            self.is_github_project = False
            self.github_repo_id = None

            # in case of multiple github CODE repositories (quite often)
            # treat as a seperate repo - remember, we focus on github repositories, not aggregates

            self.enlistments_detailed_params = urllib.urlencode({'api_key': return_random_openhub_key()})
            self.enlistments_detailed_url = "https://www.openhub.net/projects/%s/enlistments.xml?%s" % (self.project_id, self.enlistments_detailed_params)

            self.enlistments_result_flow = urllib.urlopen(self.enlistments_detailed_url)

            # Parse the response into a structured XML object
            self.enlistments_tree = ET.parse(self.enlistments_result_flow)

            # Did Ohloh return an error?
            self.enlistments_elem = self.enlistments_tree.getroot()
            self.enlistments_error = self.enlistments_elem.find("error")
            if self.enlistments_error is not None:
                print 'Ohloh returned:', ET.tostring(self.enlistments_error),
                sys.exit()

            self.repos_lists = list()

            for self.enlistment_node in self.enlistments_elem.findall("result/enlistment"):
                self.ee_type = self.enlistment_node.find("repository/type").text
                if (self.ee_type == "GitRepository"):
                    self.ee_link = self.enlistment_node.find("repository/url").text
                    if (self.ee_link.startswith("git://github.com/")):
                        scream.say('Is a GitHub project!')
                        self.is_github_project = True
                        self.github_repo_id = self.ee_link.split("git://github.com/")[1].split(".git")[0]
                        scream.say(self.github_repo_id)
                        self.repos_lists.append(self.github_repo_id)

            if not self.is_github_project:
                continue

            # now lets get even more sophisticated details
            self.params_detailed_url = urllib.urlencode({'api_key': return_random_openhub_key()})
            self.project_detailed_url = "https://www.openhub.net/projects/%s.xml?%s" % (self.project_id, self.params_detailed_url)  # how come here was a typo ?

            self.detailed_result_flow = urllib.urlopen(self.project_detailed_url)

            # Parse the response into a structured XML object
            self.detailed_tree = ET.parse(self.detailed_result_flow)

            # Did Ohloh return an error?
            self.detailed_elem = self.detailed_tree.getroot()
            self.detailed_error = self.detailed_elem.find("error")
            if self.detailed_error is not None:
                print 'Ohloh returned:', ET.tostring(self.detailed_error),
                sys.exit()

            self.twelve_month_contributor_count = self.detailed_elem.find("result/project/analysis/twelve_month_contributor_count").text
            self.total_contributor_count = self.detailed_elem.find("result/project/analysis/total_contributor_count").text
            self.twelve_month_commit_count = self.detailed_elem.find("result/project/analysis/twelve_month_commit_count")
            self.twelve_month_commit_count = self.twelve_month_commit_count.text if self.twelve_month_commit_count is not None else NullChar
            self.total_commit_count = self.detailed_elem.find("result/project/analysis/total_commit_count")
            self.total_commit_count = self.total_commit_count.text if self.total_commit_count is not None else NullChar
            self.total_code_lines = self.detailed_elem.find("result/project/analysis/total_code_lines")
            self.total_code_lines = self.total_code_lines.text if self.total_code_lines is not None else NullChar
            self.main_language_name = self.detailed_elem.find("result/project/analysis/main_language_name")
            self.main_language_name = self.main_language_name.text if self.main_language_name is not None else NullChar

            self.current_ghc = github_clients[num_modulo(self.i-1)]
            self.current_ghc_desc = github_clients_ids[num_modulo(self.i-1)]

            print 'Now using github client id: ' + str(self.current_ghc_desc)

            for self.gh_entity in self.repos_lists:

                try:
                    self.repository = self.current_ghc.get_repo(self.gh_entity)
                    self.repo_name = self.repository.name
                    self.repo_full_name = self.repository.full_name
                    self.repo_html_url = self.repository.html_url
                    self.repo_stargazers_count = self.repository.stargazers_count
                    self.repo_forks_count = self.repository.forks_count
                    self.repo_created_at = self.repository.created_at
                    self.repo_is_fork = self.repository.fork
                    self.repo_has_issues = self.repository.has_issues
                    self.repo_open_issues_count = self.repository.open_issues_count
                    self.repo_has_wiki = self.repository.has_wiki
                    self.repo_network_count = self.repository.network_count
                    self.repo_pushed_at = self.repository.pushed_at
                    self.repo_size = self.repository.size
                    self.repo_updated_at = self.repository.updated_at
                    self.repo_watchers_count = self.repository.watchers_count

                    # Now its time to get the list of developers!

                    # yay! rec-09 mysql instance is visible from the yoshimune computer !
                    # ok, but I forgot github blacklisted our comptuing clusters
                    # make sure your local win machine runs it..
                    # just pjatk things.. carry on

                    scream.say('Retrieving the project id from mysql database.. should take max 1 second.')

                    # Get here project id used in the database !
                    #conn.ping(True)
                    self.cursor = conn.cursor()
                    self.cursor.execute(r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name))
                    self.rows = self.cursor.fetchall()

                    try:
                        self.repo_db_id = self.rows[0]
                    except:
                        #print str(cursor.info())
                        # this is too new repo , because it is not found on mysql db, skip it !
                        continue
                        #print 'Faulty query was: -------- '
                        #print r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name)

                    scream.say('project id retrieved from database is: ' + str(self.repo_db_id))

                    self.cursor.close()

                    #conn.ping(True)
                    self.cursor = conn.cursor()
                    # Now get list of GitHub logins which are project_members !
                    self.cursor.execute(r'SELECT login FROM project_members INNER JOIN users ON users.id = project_members.user_id WHERE repo_id = %s' % self.repo_db_id)
                    self.project_developers = self.cursor.fetchall()

                    self.project_developers = [i[0] for i in self.project_developers]  # unzipping tuples in tuples
                    self.contributors_count = len(self.project_developers)

                    self.cursor.close()
                    #conn.close()

                    for self.project_developer in self.project_developers:

                        # create a GitHub user named object for GitHub API
                        self.current_user = self.current_ghc.get_user(self.project_developer)

                        self.current_user_bio = self.current_user.bio
                        self.current_user_blog = self.current_user.blog
                        self.current_user_collaborators = self.current_user.collaborators
                        self.current_user_company = self.current_user.company
                        self.current_user_contributions = self.current_user.contributions
                        self.current_user_created_at = self.current_user.created_at
                        self.current_user_followers = self.current_user.followers
                        self.current_user_following = self.current_user.following

                        self.current_user_hireable = self.current_user.hireable
                        self.current_user_login = self.current_user.login
                        self.current_user_name = self.current_user.name

                        self.developer_login = self.project_developer

                        # Does he commit during business hours?
                        scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"PushEvent"):
                                        self.developer_all_pushes += self.usage_element['total']
                                    elif (self.usage_element['type'] == "WatchEvent"):
                                        self.developer_all_stars_given += self.usage_element['total']
                                    elif (self.usage_element['type'] == "CreateEvent"):
                                        self.developer_all_creations += self.usage_element['total']
                                    elif (self.usage_element['type'] == "IssuesEvent"):
                                        self.developer_all_issues_created += self.usage_element['total']
                                    elif (self.usage_element['type'] == "PullRequestEvent"):
                                        self.developer_all_pull_requests += self.usage_element['total']

                                # -----------------------------------------------------------------------
                                scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404')
                                scream.say('try ' + str(self.tries) + ' more times')
                                self.tries -= 1
                            finally:
                                if self.tries < 1:
                                    self.developer_works_during_bd = None
                                    self.developer_works_period = 0
                                    break

                        self.collection = [str(((page-1)*pagination) + self.i), self.gh_entity, self.repo_full_name, self.repo_html_url,
                                           str(self.repo_forks_count), str(self.repo_stargazers_count), str(self.contributors_count),
                                           str(self.repo_created_at), str(self.repo_is_fork), str(self.repo_has_issues), str(self.repo_open_issues_count),
                                           str(self.repo_has_wiki), str(self.repo_network_count), str(self.repo_pushed_at), str(self.repo_size),
                                           str(self.repo_updated_at), str(self.repo_watchers_count), self.project_id,
                                           self.project_name, self.project_url, self.project_htmlurl, str(self.project_created_at),
                                           str(self.project_updated_at), self.project_homepage_url, str(self.project_average_rating),
                                           str(self.project_rating_count), str(self.project_review_count), self.project_activity_level,
                                           str(self.project_user_count), str(self.twelve_month_contributor_count), str(self.total_contributor_count),
                                           str(self.twelve_month_commit_count), str(self.total_commit_count), str(self.total_code_lines),
                                           self.main_language_name, str(self.developer_works_during_bd), str(self.developer_works_period),
                                           str(self.developer_all_pushes), str(self.developer_all_stars_given), str(self.developer_all_creations),
                                           str(self.developer_all_issues_created), str(self.developer_all_pull_requests)]

                        csv_writer.writerow(self.collection)
                        #self.set_finished(True)
                        print '.'
                except UnknownObjectException:
                    print 'Repo ' + self.gh_entity + ' is not available anymore..'
                except GithubException:
                    # TODO: write here something clever
                    raise
        self.set_finished(True)
Esempio n. 13
0
                    proxy = {'http': '94.154.26.132:8090'}
                    session_osrc = requests.Session()
                    requests_osrc = session_osrc.get(osrc_url, proxies=proxy)
                    # print requests_osrc.text
                    osrc_data = json.loads(requests_osrc.text)
                    scream.say("JSON parsed..")
                    if ("message" in osrc_data) and (osrc_data["message"].startswith("Not enough information for")):
                        scream.say(osrc_data["message"])
                        limit = limit + 1
                        break
                    for language in osrc_data["usage"]["languages"]:
                        #print language["count"]
                        #print language["language"]
                        csv_writer.writerow([str(counter), str(developer_login), str(language["language"]), str(language["count"]), str(language["quantile"])])
                    scream.log_debug("Languages diagram for user " + str(developer_login) + ' created..', True)
                    # -----------------------------------------------------------------------
                    scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404')
                    scream.say('try ' + str(tries) + ' more times')
                    tries -= 1
                finally:
                    if tries < 1:
                        developer_works_during_bd = 0
                        developer_works_period = 0
                        break
            #with open('progress_bar.lock') as f: scream.say(f.read())
            counter = counter + 1
Esempio n. 14
0
                    check_quota_limit()
                repo.setContributors(repo_contributors)
                #repo.setContributorsCount(len(repo_contributors))
                'class fields are not garbage, '
                'its better to calculate count on demand'
                scream.log('Added contributors of count: ' +
                           str(len(repo_contributors)) +
                           ' to a repo ' + key)
            except GithubException as e:
                if 'repo_contributors' not in locals():
                    repo.setContributors([])
                else:
                    repo.setContributors(repo_contributors)
                scream.log_error('Repo didnt gave any contributors, ' +
                                 'or paginated through' +
                                 ' contributors gave error. ' + key +
                                 ', error({0}): {1}'.
                                 format(e.status, e.data))
            finally:
                resume_stage = None

        if resume_stage in [None, 'languages']:
            scream.ssay('Getting languages of a repo')
            languages = repository.get_languages()  # dict object (json? object)
            repo.setLanguage(languages)
            scream.log('Added languages ' + str(languages) + ' to a repo ' + key)
            resume_stage = None

        if resume_stage in [None, 'labels']:
            scream.ssay('Getting labels of a repo')
            'getting labels, label is a tag which you can put in an issue'
Esempio n. 15
0
                    except:
                        repos_reported_execution_error.write(key + os.linesep)
                    repo.setContributors(repo_contributors)
                    #repo.setContributorsCount(len(repo_contributors))
                    'class fields are not garbage, '
                    'its better to calculate count on demand'
                    scream.log('Added contributors of count: ' +
                               str(len(repo_contributors)) + ' to a repo ' +
                               key)
                except GithubException as e:
                    if 'repo_contributors' not in locals():
                        repo.setContributors([])
                    else:
                        repo.setContributors(repo_contributors)
                    scream.log_error(
                        'Repo didnt gave any contributors, ' +
                        'or paginated through' + ' contributors gave error. ' +
                        key + ', error({0}): {1}'.format(e.status, e.data))
                finally:
                    resume_stage = None

            #if resume_stage in [None, 'languages']:
            #    scream.ssay('Getting languages of a repo')
            #    languages = repository.get_languages()  # dict object (json? object)
            #    repo.setLanguage(languages)
            #    scream.log('Added languages ' + str(languages) + ' to a repo ' + key)
            #    resume_stage = None

            # to juz mamy
            # if resume_stage in [None, 'labels']:
            #     scream.ssay('Getting labels of a repo')
            #     'getting labels, label is a tag which you can put in an issue'
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global result_punch_card_writer

    assert result_punch_card_writer is not None

    developer_login = contributor.login
    scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True)
    developer_name = contributor.name
    # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5a.  Ilosc repo, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    scream.say('Developer collaborators count is: ' + str(developer_collaborators))
    # 6a.  Ilosc repo, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions
    scream.say('Developer contributions count is: ' + str(developer_contributions))

    # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem
    his_repositories = contributor.get_repos()

    # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)?
    scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"pod repozytorium"
                        while True:
                            try:
                                trying_to_get_stats += 1
                                stats = his_repo.get_stats_contributors()
                                status_code__ = get_status_code('https://api.github.com/repos/' + his_repo.full_name + '/stats/contributors') 
                                if status_code__ != 204:
                                    for s in stats:
                                        ad___c = 0
                                        ad___a = 0
                                        ad___d = 0
                                        for w in s.weeks:
                                            ad___c += w.c
                                            ad___a += w.a
                                            ad___d += w.d
                                        if s.author.login not in his_contributors:
                                            his_contributors.add(s.author.login)
                                        result_punch_card_writer.writerow([str(his_repo.owner.login), str(his_repo.name),
                                                                          str(developer_login), str(s.author.login), str(s.total), str(ad___c), str(ad___a), str(ad___d)])
                                else:
                                    scream.log_debug('The subrepo is empty, thus no statistics (punchcard) generated this time', True)
                                break
                            except GithubException as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Unexpected error with getting stats.')
                                if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                                    scream.log_debug("It is a private repo.. Skip!", True)
                                    break
                                if force_raise:
                                    raise
                            except TypeError as e:
                                scream.log_warning('This was stats attempt no: ' + str(trying_to_get_stats), True)
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise
                            except Exception as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise

                        # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest deweloperem
                        if developer_login in his_contributors:
                            self_contributing += 1

                        # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member
                        subrepo_collaborators = his_repo.get_collaborators()
                        for collaborator in subrepo_collaborators:
                            total_his_collaborators += 1
                            if developer_login == collaborator.login:
                                self_collaborating += 1

                        # All elements paginated through his_repositories, thus we can safely break loop
                        break
                    except GithubException as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories')
                        if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                            scream.log_debug("It is a private repo.. Skip!")
                            continue
                        if force_raise:
                            raise
                    except TypeError as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise
                    except Exception as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise

            total_his_contributors = len(his_contributors)

            # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
            # tutaj od wartosci developer_collaborators wystarczy odjac wystapienia loginu w podrepo.get_collaborators()
            developer_foreign_collaborators = (developer_collaborators if developer_collaborators is not None else 0) - self_collaborating
            # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
            # tutaj od wartosci developer_contributions wystarczy odjac wystapienia loginu w podrepo.get_contributions()
            developer_foreign_contributions = developer_contributions - self_contributing

            # All properties checked for a dev, thus we can safely break loop
            break

        except Exception as e:
            freeze('Error ' + str(e) + ' in for his_repo in his_repositories loop. Will start the subrepo analysis from the beginning.')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable
    disk_usage = contributor.disk_usage

    public_gists = contributor.public_gists
    owned_private_repos = contributor.owned_private_repos
    total_private_repos = contributor.total_private_repos

    scream.log_debug('Thread ' + str(thread_getter_instance) +
                     ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()),
                               str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), str(repo.getDefaultBranch()), str(repo.getDescription()),
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               str(repo.getLanguage()), str(repo.getMasterBranch()), str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               str(repo.getOrganization()), str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               str(developer_login),
                               str(developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), str(company if company is not None else ''), str(developer_contributions),
                               str(created_at), str(hireable if hireable is not None else ''),
                               str(developer_location if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    else:
        result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), repo.getDefaultBranch(), repo.getDescription() if repo.getDescription() is not None else '',
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               repo.getLanguage() if repo.getLanguage() is not None else '',
                               repo.getMasterBranch() if repo.getMasterBranch() is not None else '',
                               str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               repo.getOrganization() if repo.getOrganization() is not None else '',
                               str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               developer_login,
                               developer_name if developer_name is not None else '', str(developer_followers), str(developer_following),
                               str(developer_collaborators), company if company is not None else '', str(developer_contributions),
                               str(created_at), str(hireable) if hireable is not None else '',
                               developer_location if developer_location is not None else '',
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    scream.log_debug('Wrote row to CSV.', True)